├── .github └── workflows │ ├── 02-manual-trigger-job.yml │ └── 04-code-checks.yml ├── .gitignore ├── _build.yml ├── _config.yml ├── documentation ├── 00-script.md ├── 01-aml-job.md ├── 02-github-actions.md ├── 03-trigger-workflow.md ├── 04-unit-test-linting.md ├── 05-environments.md ├── 06-deploy-model.md └── media │ └── 00-01-github-secret.png ├── experimentation ├── data │ └── diabetes-dev.csv └── train-classification-model.ipynb ├── index.md ├── production └── data │ └── diabetes-prod.csv ├── pytest.ini ├── requirements.txt ├── src ├── job.yml └── model │ └── train.py └── tests ├── .flake8 ├── __init__.py ├── datasets ├── first.csv ├── foo.py └── second.csv └── test_train.py /.github/workflows/02-manual-trigger-job.yml: -------------------------------------------------------------------------------- 1 | name: Manually trigger an Azure Machine Learning job 2 | 3 | on: 4 | workflow_dispatch: 5 | 6 | jobs: 7 | train: 8 | runs-on: ubuntu-latest 9 | steps: 10 | - name: Check out repo 11 | uses: actions/checkout@main 12 | - name: Install az ml extension 13 | run: az extension add -n ml -y 14 | - name: Azure login 15 | uses: azure/login@v1 16 | with: 17 | creds: ${{secrets.AZURE_CREDENTIALS}} 18 | 19 | 20 | -------------------------------------------------------------------------------- /.github/workflows/04-code-checks.yml: -------------------------------------------------------------------------------- 1 | name: Code checks 2 | 3 | on: 4 | workflow_dispatch: 5 | 6 | jobs: 7 | job1: 8 | name: linting 9 | runs-on: ubuntu-latest 10 | steps: 11 | - name: Check out repo 12 | uses: actions/checkout@main 13 | - name: Use Python version 3.8 14 | uses: actions/setup-python@v3 15 | with: 16 | python-version: '3.8' 17 | - name: Install Flake8 18 | run: | 19 | python -m pip install flake8 20 | - name: Run linting tests 21 | run: | 22 | flake8 src/model/ 23 | 24 | 25 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | -------------------------------------------------------------------------------- /_build.yml: -------------------------------------------------------------------------------- 1 | name: '$(Date:yyyyMMdd)$(Rev:.rr)' 2 | jobs: 3 | - job: build_markdown_content 4 | displayName: 'Build Markdown Content' 5 | workspace: 6 | clean: all 7 | pool: 8 | vmImage: 'Ubuntu 16.04' 9 | container: 10 | image: 'microsoftlearning/markdown-build:latest' 11 | steps: 12 | - task: Bash@3 13 | displayName: 'Build Content' 14 | inputs: 15 | targetType: inline 16 | script: | 17 | cp /{attribution.md,template.docx,package.json,package.js} . 18 | npm install 19 | node package.js --version $(Build.BuildNumber) 20 | - task: GitHubRelease@0 21 | displayName: 'Create GitHub Release' 22 | inputs: 23 | gitHubConnection: 'github-microsoftlearning-organization' 24 | repositoryName: '$(Build.Repository.Name)' 25 | tagSource: manual 26 | tag: 'v$(Build.BuildNumber)' 27 | title: 'Version $(Build.BuildNumber)' 28 | releaseNotesSource: input 29 | releaseNotes: '# Version $(Build.BuildNumber) Release' 30 | assets: '$(Build.SourcesDirectory)/out/*.zip' 31 | assetUploadMode: replace 32 | - task: PublishBuildArtifacts@1 33 | displayName: 'Publish Output Files' 34 | inputs: 35 | pathtoPublish: '$(Build.SourcesDirectory)/out/' 36 | artifactName: 'Lab Files' 37 | -------------------------------------------------------------------------------- /_config.yml: -------------------------------------------------------------------------------- 1 | remote_theme: MicrosoftLearning/Jekyll-Theme 2 | exclude: 3 | - readme.md 4 | - .github/ 5 | header_pages: 6 | - index.html 7 | author: Microsoft Learning 8 | twitter_username: mslearning 9 | github_username: MicrosoftLearning 10 | plugins: 11 | - jekyll-sitemap 12 | - jekyll-mentions 13 | - jemoji 14 | markdown: kramdown 15 | kramdown: 16 | syntax_highlighter_opts: 17 | disable : true 18 | -------------------------------------------------------------------------------- /documentation/00-script.md: -------------------------------------------------------------------------------- 1 | --- 2 | challenge: 3 | module: Convert a notebook to production code 4 | challenge: '0: Convert a notebook to production code' 5 | --- 6 | 7 | 16 | 17 | # Challenge 0: Convert a notebook to production code 18 | 19 | 20 | 21 | ## Challenge scenario 22 | 23 | The first step to automate machine learning workflows is to convert a Jupyter notebook to production-ready code. When you store your code as scripts, it's easier to automate the code execution. You can parameterize scripts to easily reuse the code for retraining. 24 | 25 | ## Prerequisites 26 | 27 | To complete this challenge, you'll need: 28 | 29 | - Access to an Azure subscription. 30 | - A GitHub account. 31 | 32 | ## Objectives 33 | 34 | By completing this challenge, you'll learn how to: 35 | 36 | - Clean nonessential code. 37 | - Convert your code to Python scripts. 38 | - Use functions in your scripts. 39 | - Use parameters in your scripts. 40 | 41 | > **Important!** 42 | > Each challenge is designed to allow you to explore how to implement DevOps principles when working with machine learning models. Some instructions may be intentionally vague, inviting you to think about your own preferred approach. If for example, the instructions ask you to create an Azure Machine Learning workspace, it's up to you to explore and decide how you want to create it. To make it the best learning experience for you, it's up to you to make it as simple or as challenging as you want. 43 | 44 | ## Challenge Duration 45 | 46 | - **Estimated Time**: 30 minutes 47 | 48 | ## Instructions 49 | 50 | To work through the challenges, you need **your own public repo** which includes the challenge files. Create a new public repo by navigating to [https://github.com/MicrosoftLearning/mslearn-mlops](https://github.com/MicrosoftLearning/mslearn-mlops) and selecting the **Use this template** button to create your own repo. 51 | 52 | In the **experimentation** folder, you'll find a Jupyter notebook that trains a classification model. The data used by the notebook is in the **experimentation/data** folder and contains a CSV file. 53 | 54 | In the **src/model** folder you'll find a `train.py` script which already includes code converted from part of the notebook. It's up to you to complete it. 55 | 56 | - Go through the notebook to understand what the code does. 57 | - Convert the code under the **Split data** header and include it in the `train.py` script as a `split_data` function. Remember to: 58 | - Remove nonessential code. 59 | - Include the necessary code as a function. 60 | - Include any necessary libraries at the top of the script. 61 | 62 |
63 | Hint 64 |
65 | The split_data function is already included in the main function. You only need to add the function itself with the required inputs and outputs underneath the comment TO DO: add function to split data. 66 |
67 | 68 | - Add logging so that every time you run the script, all parameters and metrics are tracked. Use the autologging feature of MLflow to also ensure the necessary model files are stored with the job run to easily deploy the model in the future. 69 | 70 |
71 | Hint 72 |
73 | MLflow is an open source library for tracking and managing machine learning models. You can use it to track custom metrics. However, since the current model is trained with the common Scikit-learn library, you can also use autologging. By enabling autologging with mlflow.autolog(), all parameters, metrics, and model files will automatically be stored with your job run. Enable autologging in the main function under TO DO: enable autologging. 74 |
75 | 76 | ## Success criteria 77 | 78 | To complete this challenge successfully, you should be able to show: 79 | 80 | - A training script which includes a function to split the data and autologging using MLflow. 81 | 82 | > **Note:** 83 | > If you've used a compute instance for experimentation, remember to stop the compute instance when you're done. 84 | 85 | ## Useful resources 86 | 87 | - [Tutorial: Convert ML experiments to production Python code](https://docs.microsoft.com/azure/machine-learning/tutorial-convert-ml-experiment-to-production) 88 | - [Logging MLflow models in Azure Machine Learning](https://docs.microsoft.com/azure/machine-learning/how-to-log-mlflow-models) 89 | - [MLflow documentation](https://www.mlflow.org/docs/latest/python_api/mlflow.html) 90 | 91 | 92 | -------------------------------------------------------------------------------- /documentation/01-aml-job.md: -------------------------------------------------------------------------------- 1 | --- 2 | challenge: 3 | module: Use an Azure Machine Learning job for automation 4 | challenge: '1: Create an Azure Machine Learning job' 5 | --- 6 | 7 | 16 | 17 | # Challenge 1: Create an Azure Machine Learning job 18 | 19 | 20 | 21 | ## Challenge scenario 22 | 23 | To automate machine learning workflows, you can define machine learning tasks in scripts. To execute any workflow consisting of Python scripts, use Azure Machine Learning jobs. Azure Machine Learning jobs store all metadata of a workflow, including input parameters and output metrics. By running scripts as jobs, it's easier to track and manage your machine learning models. 24 | 25 | ## Prerequisites 26 | 27 | If you haven't, complete the [previous challenge](00-script.md) before you continue. 28 | 29 | ## Objectives 30 | 31 | By completing this challenge, you'll learn how to: 32 | 33 | - Define an Azure Machine Learning job in YAML. 34 | - Run an Azure Machine Learning job with the CLI v2. 35 | 36 | > **Important!** 37 | > Each challenge is designed to allow you to explore how to implement DevOps principles when working with machine learning models. Some instructions may be intentionally vague, inviting you to think about your own preferred approach. If for example, the instructions ask you to create an Azure Machine Learning workspace, it's up to you to explore and decide how you want to create it. To make it the best learning experience for you, it's up to you to make it as simple or as challenging as you want. 38 | 39 | ## Challenge Duration 40 | 41 | - **Estimated Time**: 30 minutes 42 | 43 | ## Instructions 44 | 45 | In the **src/model** folder, you'll find a Python script which reads CSV files from a folder and uses the data to train a classification model. In the **src** folder, you'll find a YAML file to define a job. There are values missing in the YAML file. It's up to you to complete it. 46 | 47 | - Create an Azure Machine Learning workspace and a compute instance. 48 | - Use the CLI (v2) to create a registered data asset with the following configuration: 49 | - **Name**: *diabetes-dev-folder* 50 | - **Path**: The **data** folder in the **experimentation** folder which contains the CSV file to train the model. The path should point to the folder, not to the specific file. 51 | 52 |
53 | Hint 54 |
55 | Using the CLI (v2) you can create a data asset by defining the configuration in a YAML file or by specifying the configuration in the CLI command. 56 |
57 | 58 | - Complete the `job.yml` file to define the Azure Machine Learning job to run the `train.py` script, with the registered data asset as input. 59 | - Use the CLI (v2) to run the job. 60 | 61 | > **Tip:** 62 | > Whether you're working from the Cloud Shell, compute instance or a local terminal, make sure to update the Azure Machine Learning extension for the CLI to the latest version. 63 | 64 | ## Success criteria 65 | 66 | To complete this challenge successfully, you should be able to show: 67 | 68 | - A successfully completed job in the Azure Machine Learning workspace. The job should contain all input parameters and output metrics for the model you trained. 69 | 70 | > **Note:** 71 | > If you've used a compute instance for experimentation, remember to stop the compute instance when you're done. 72 | 73 | ## Useful resources 74 | 75 | - [Learning path on how to use the CLI v2 with Azure Machine Learning.](https://docs.microsoft.com/learn/paths/train-models-azure-machine-learning-cli-v2/) 76 | - [CLI reference for managing Azure Machine Learning workspaces](https://docs.microsoft.com/cli/azure/ml/workspace?view=azure-cli-latest) 77 | - [CLI reference for managing Azure ML compute resources](https://docs.microsoft.com/cli/azure/ml/compute?view=azure-cli-latest) 78 | - [CLI reference for managing Azure ML data assets](https://docs.microsoft.com/cli/azure/ml/data?view=azure-cli-latest) 79 | - [CLI reference for jobs.](https://docs.microsoft.com/cli/azure/ml/job?view=azure-cli-latest) 80 | - [YAML reference for command jobs.](https://docs.microsoft.com/azure/machine-learning/reference-yaml-job-command) 81 | - [Example job YAML files.](https://github.com/Azure/azureml-examples/tree/main/cli/jobs/basics) 82 | 83 | 84 | -------------------------------------------------------------------------------- /documentation/02-github-actions.md: -------------------------------------------------------------------------------- 1 | --- 2 | challenge: 3 | module: 'Trigger Azure Machine Learning jobs with GitHub Actions' 4 | challenge: '2: Trigger the Azure Machine Learning job with GitHub Actions' 5 | --- 6 | 7 | 16 | 17 | # Challenge 2: Trigger the Azure Machine Learning job with GitHub Actions 18 | 19 | 20 | 21 | ## Challenge scenario 22 | 23 | The benefit of using the CLI (v2) to run an Azure Machine Learning job, is that you can submit the job from anywhere. Using a platform like GitHub will allow you to automate Azure Machine Learning jobs. To trigger the job to run, you can use GitHub Actions. 24 | 25 | ## Prerequisites 26 | 27 | If you haven't, complete the [previous challenge](01-aml-job.md) before you continue. 28 | 29 | To complete the challenge, you need to have the authorization to create a service principal. 30 | 31 | ## Objectives 32 | 33 | By completing this challenge, you'll learn how to: 34 | 35 | - Create a service principal and use it to create a GitHub secret for authentication. 36 | - Run the Azure Machine Learning job with GitHub Actions. 37 | 38 | > **Important!** 39 | > Each challenge is designed to allow you to explore how to implement DevOps principles when working with machine learning models. Some instructions may be intentionally vague, inviting you to think about your own preferred approach. If for example, the instructions ask you to create an Azure Machine Learning workspace, it's up to you to explore and decide how you want to create it. To make it the best learning experience for you, it's up to you to make it as simple or as challenging as you want. 40 | 41 | ## Challenge Duration 42 | 43 | - **Estimated Time**: 45 minutes 44 | 45 | ## Instructions 46 | 47 | In the **.github/workflows** folder, you'll find the `02-manual-trigger.yml` file. The file defines a GitHub Action which can be manually triggered. The workflow checks out the repo onto the runner, installs the Azure Machine Learning extension for the CLI (v2), and logs in to Azure using the `AZURE_CREDENTIALS` secret. 48 | 49 | - Create a service principal, using the Cloud Shell in the Azure portal, which has contributor access to your resource group. 50 | 51 | **Save the output**, you'll *also* need it for later challenges. Update the `` (should be unique), ``, and `` before using the following command: 52 | ```azurecli 53 | az ad sp create-for-rbac --name "" --role contributor \ 54 | --scopes /subscriptions//resourceGroups/ \ 55 | --sdk-auth 56 | ``` 57 | - Create a GitHub secret in your repository. Name it `AZURE_CREDENTIALS` and copy and paste the output of the service principal to the **Value** field of the secret. 58 | 59 |
60 | Hint 61 |
62 | The output of the service principal which you need to paste into the Value field of the secret should be a JSON with the following structure: 63 |
 64 | {
 65 | "clientId": "your-client-id",
 66 | "clientSecret": "your-client-secret",
 67 | "subscriptionId": "your-subscription-id",
 68 | "tenantId": "your-tenant-id",
 69 | "activeDirectoryEndpointUrl": "https://login.microsoftonline.com",
 70 | "resourceManagerEndpointUrl": "https://management.azure.com/",
 71 | "activeDirectoryGraphResourceId": "https://graph.windows.net/",
 72 | "sqlManagementEndpointUrl": "https://management.core.windows.net:8443/",
 73 | "galleryEndpointUrl": "https://gallery.azure.com/",
 74 | "managementEndpointUrl": "https://management.core.windows.net/"
 75 | }
 76 | 
77 |
78 | 79 | - Edit the `02-manual-trigger.yml` workflow to trigger the Azure Machine Learning job you defined in challenge 1. 80 | 81 |
82 | Hint 83 |
84 | GitHub is authenticated to use your Azure Machine Learning workspace with a service principal. The service principal is only allowed to submit jobs that use a compute cluster, not a compute instance. 85 |
86 | 87 | ## Success criteria 88 | 89 | To complete this challenge successfully, you should be able to show: 90 | 91 | - A successfully completed Action in your GitHub repo, triggered manually in GitHub. 92 | - A step in the Action should have submitted a job to the Azure Machine Learning workspace. 93 | - A successfully completed Azure Machine Learning job, shown in the Azure Machine Learning workspace. 94 | 95 | ## Useful resources 96 | 97 | - The introduction to DevOps principles for machine learning module covers [how to integrate Azure Machine Learning with DevOps tools.](https://docs.microsoft.com/learn/paths/introduction-machine-learn-operations/) 98 | - [Use GitHub Actions with Azure Machine Learning.](https://docs.microsoft.com/azure/machine-learning/how-to-github-actions-machine-learning) 99 | - Learn more about [service principal objects in Azure Active Directory.](https://docs.microsoft.com/azure/active-directory/develop/app-objects-and-service-principals#service-principal-object) 100 | - Learn more about encrypted secrets in GitHub, like [how to name and how to create a secret in a GitHub repo.](https://docs.github.com/actions/security-guides/encrypted-secrets) 101 | - [Manually running a workflow in GitHub Actions.](https://docs.github.com/actions/managing-workflow-runs/manually-running-a-workflow) 102 | - [Re-running workflows and jobs in GitHub Actions.](https://docs.github.com/actions/managing-workflow-runs/re-running-workflows-and-jobs) 103 | - [General documentation for GitHub Actions.](https://docs.github.com/actions/guides) 104 | 105 | -------------------------------------------------------------------------------- /documentation/03-trigger-workflow.md: -------------------------------------------------------------------------------- 1 | --- 2 | challenge: 3 | module: 'Trigger GitHub Actions with feature-based development' 4 | challenge: '3: Trigger GitHub Actions with feature-based development' 5 | --- 6 | 7 | 16 | 17 | # Challenge 3: Trigger GitHub Actions with feature-based development 18 | 19 | 20 | 21 | ## Challenge scenario 22 | 23 | Triggering a workflow by pushing directly to the repo is **not** considered a best practice. Preferably, you'll want to review any changes before you build them with GitHub Actions. 24 | 25 | ## Prerequisites 26 | 27 | If you haven't, complete the [previous challenge](02-github-actions.md) before you continue. 28 | 29 | ## Objectives 30 | 31 | By completing this challenge, you'll learn how to: 32 | 33 | - Work with feature-based development. 34 | - Protect the main branch. 35 | - Trigger a GitHub Actions workflow by creating a pull request. 36 | 37 | > **Important!** 38 | > Each challenge is designed to allow you to explore how to implement DevOps principles when working with machine learning models. Some instructions may be intentionally vague, inviting you to think about your own preferred approach. If for example, the instructions ask you to create an Azure Machine Learning workspace, it's up to you to explore and decide how you want to create it. To make it the best learning experience for you, it's up to you to make it as simple or as challenging as you want. 39 | 40 | ## Challenge Duration 41 | 42 | - **Estimated Time**: 45 minutes 43 | 44 | ## Instructions 45 | 46 | Use feature-based development to better govern changes made to the repo and the triggering of GitHub Actions. 47 | 48 | - Create a GitHub Actions workflow which is triggered by the creation of a pull request. 49 | 50 | The workflow will be used for code verification in the next challenge. For now, you can include whatever step you want. For example, use the `echo` command: 51 | 52 | ```yml 53 | - name: Placeholder 54 | run: | 55 | echo "Will add code checks here in next challenge" 56 | ``` 57 | 58 | - Create a **branch protection rule** to block any direct pushes to the **main** branch. 59 | 60 | > **Note:** 61 | > By default, branch protection rules do not apply to administrators. If you're the administrator of the repo you're working with, you'll still be allowed to push directly to the repo. 62 | 63 | To trigger the workflow, do the following: 64 | 65 | - Create a branch in the repo. 66 | - Make a change and push it. For example, change the hyperparameter value. 67 | - Create a pull request merge the new branch with the main. 68 | 69 | ## Success criteria 70 | 71 | To complete this challenge successfully, you should be able to show: 72 | 73 | - The branch protection rule for the main branch. 74 | - A successfully completed Action in your GitHub repo which is triggered by a new pull request. 75 | 76 | ## Useful resources 77 | 78 | - Learn more about source control for machine learning projects and [how to work with feature-based development and GitHub repos.](https://docs.microsoft.com/learn/modules/source-control-for-machine-learning-projects/) 79 | - [General documentation for GitHub Actions.](https://docs.github.com/actions/guides) 80 | - [Triggering a GitHub Actions workflow.](https://docs.github.com/actions/using-workflows/triggering-a-workflow) 81 | - [Events that trigger workflows.](https://docs.github.com/actions/using-workflows/events-that-trigger-workflows) 82 | - [Workflow syntax for GitHub Actions.](https://docs.github.com/actions/using-workflows/workflow-syntax-for-github-actions) 83 | 84 | -------------------------------------------------------------------------------- /documentation/04-unit-test-linting.md: -------------------------------------------------------------------------------- 1 | --- 2 | challenge: 3 | module: 'Work with linting and unit testing in GitHub Actions' 4 | challenge: '4: Work with linting and unit testing' 5 | --- 6 | 7 | 16 | 17 | # Challenge 4: Work with linting and unit testing 18 | 19 | 20 | 21 | ## Challenge scenario 22 | 23 | Code quality can be assessed in two ways: linting and unit testing. Use linting to check for any stylistic errors and unit testing to verify your functions. 24 | 25 | ## Prerequisites 26 | 27 | If you haven't, complete the [previous challenge](03-trigger-workflow.md) before you continue. 28 | 29 | You'll complete the workflow created in the previous challenge. 30 | 31 | ## Objectives 32 | 33 | By completing this challenge, you'll learn how to: 34 | 35 | - Run linters and unit tests with GitHub Actions. 36 | - Troubleshoot errors to improve your code. 37 | 38 | > **Important!** 39 | > Each challenge is designed to allow you to explore how to implement DevOps principles when working with machine learning models. Some instructions may be intentionally vague, inviting you to think about your own preferred approach. If for example, the instructions ask you to create an Azure Machine Learning workspace, it's up to you to explore and decide how you want to create it. To make it the best learning experience for you, it's up to you to make it as simple or as challenging as you want. 40 | 41 | ## Challenge Duration 42 | 43 | - **Estimated Time**: 45 minutes 44 | 45 | ## Instructions 46 | 47 | In the **tests** folder, you'll find files that will perform linting and unit testing on your code. The `flake8` lints your code to check for stylistic errors. The `test_train.py` performs unit tests on your code to check whether the functions behave as expected. 48 | 49 | - Go to the **Actions** tab in your GitHub repo and trigger the **Code checks** workflow manually. Inspect the output and fix your code where necessary. 50 | 51 |
52 | Hint 53 |
54 | Whenever the linter finds an error, the GitHub Actions step will fail with exit code 1. Inspect the output of the workflow to see the specific error codes for the linter. Next to the error code, the output will also list the source file with the line number and column number to help you find the cause of the error. 55 |
56 | 57 | - Add linting and unit tests jobs to the workflow you created in the previous challenge. The workflow should be triggered by the creation of a new pull request. The workflow should run the Flake8 linter *and* run the Pytest unit tests. 58 | 59 |
60 | Hint 61 |
62 | To include unit testing in your workflow, install Pytest (using the requirements.txt), and run the tests with pytest tests/. By default, Pytest uses test files that are prefixed with test. 63 |
64 | 65 | - Create (or edit) a **branch protection rule** to require the two code checks to be successful before merging a pull request to the **main** branch. 66 | 67 |
68 | Hint 69 |
70 | To configure checks to be required to pass before merging, you can enable status checks in a branch protection rule. To find the checks, your jobs need to have a name. To ensure the checks run whenever a pull request is created, your checks should be part of a GitHub Actions workflow triggered by a pull_request event. 71 |
72 | 73 | To trigger the workflow, do the following: 74 | 75 | - Make a change and push it. For example, change the hyperparameter value. 76 | - Create a pull request, showing the integrated code checks. 77 | 78 | ## Success criteria 79 | 80 | To complete this challenge successfully, you should be able to show: 81 | 82 | - Both the **Linting** and **Unit tests** checks are completed successfully without any errors. The successful checks should be shown in a newly created pull request. 83 | 84 | ## Useful resources 85 | 86 | - [Flake8 documentation](https://flake8.pycqa.org/latest/user/index.html), including [error codes and their descriptions.](https://flake8.pycqa.org/en/latest/user/error-codes.html) 87 | - [A beginner's guide to Python testing.](https://miguelgfierro.com/blog/2018/a-beginners-guide-to-python-testing) 88 | - Learn more about [test infrastructure using Azure ML and how to create tests.](https://github.com/microsoft/recommenders/tree/main/tests) 89 | - Learn more about [testing with Pytest.](https://docs.microsoft.com/learn/modules/test-python-with-pytest/) 90 | 91 | In this challenge, all testing is executed with GitHub Actions. Optionally, you can learn how to [verify your code locally with Visual Studio Code](https://docs.microsoft.com/learn/modules/source-control-for-machine-learning-projects/5-verify-your-code-locally). Running linters and unit tests locally is not required for this challenge. 92 | 93 | -------------------------------------------------------------------------------- /documentation/05-environments.md: -------------------------------------------------------------------------------- 1 | --- 2 | challenge: 3 | module: 'Work with environments in GitHub Actions' 4 | challenge: '5: Work with environments' 5 | --- 6 | 7 | 16 | 17 | # Challenge 5: Work with environments 18 | 19 | 20 | 21 | ## Challenge scenario 22 | 23 | There are many advantages to using environments in machine learning projects. When you have separate environments for development, staging, and production, you can more easily control access to resources. 24 | 25 | Use environments to isolate workloads and control the deployment of the model. 26 | 27 | ## Prerequisites 28 | 29 | If you haven't, complete the [previous challenge](04-unit-test-linting.md) before you continue. 30 | 31 | **Your repo should be set to public**. If you're using a private repo without GitHub Enterprise Cloud, you'll not be able to create environments. [Change the visibility of your repo to public](https://docs.github.com/repositories/managing-your-repositorys-settings-and-features/managing-repository-settings/setting-repository-visibility) if your repo is set to private. 32 | 33 | You'll re-use the workflow you created for [challenge 2: trigger the Azure Machine Learning job with GitHub Actions](02-github-actions.md). 34 | 35 | ## Objectives 36 | 37 | By completing this challenge, you'll learn how to: 38 | 39 | - Set up a development and production environment. 40 | - Add a required reviewer. 41 | - Add environments to a GitHub Actions workflow. 42 | 43 | > **Important!** 44 | > Each challenge is designed to allow you to explore how to implement DevOps principles when working with machine learning models. Some instructions may be intentionally vague, inviting you to think about your own preferred approach. If for example, the instructions ask you to create an Azure Machine Learning workspace, it's up to you to explore and decide how you want to create it. To make it the best learning experience for you, it's up to you to make it as simple or as challenging as you want. 45 | 46 | ## Challenge Duration 47 | 48 | - **Estimated Time**: 60 minutes 49 | 50 | ## Instructions 51 | 52 | Initially, data scientists will train the model in an Azure Machine Learning workspace which is configured for experimentation. Ideally, we don't want to make the production data available in the experimentation or development environment. Instead, data scientists will only have access to a small dataset which should behave similarly to the production dataset. 53 | 54 | By reusing the training script created by the data scientists, you can train the model in the production environment using the production data, simply by changing the data input. 55 | 56 | > **Note:** 57 | > Though it's a best practice to associate a separate Azure Machine Learning workspace to each separate environment, you can use one workspace for both the development and production environment for this challenge (to avoid extra costs). 58 | 59 | - Within your GitHub repo, create a development and production environment. 60 | - Add an approval check for the production environment. 61 | - Remove the global repo **AZURE_CREDENTIALS** secret, so that each environment will only be able to use its own secret. 62 | - For each environment, add the **AZURE_CREDENTIALS** secret that contains the service principal output. 63 | 64 | > **Note:** 65 | > If you don't have the service principal output anymore from [challenge 2](03-github-actions.md), go back to the Azure portal and create it again. You can only get the necessary output at the time of creation. 66 | 67 | - Create a new data asset in the workspace with the following configuration: 68 | - **Name**: *diabetes-prod-folder* 69 | - **Path**: The **data** folder in the **production** folder which contains a larger CSV file to train the model. The path should point to the folder, not to the specific file. 70 | - Create one GitHub Actions workflow, triggered by changes being pushed to the main branch, with two jobs: 71 | - The **experiment** job that trains the model using the *diabetes-dev-folder* dataset in the **development environment**. 72 | - The **production** job that trains the model in the **production environment**, using the production data (the *diabetes-prod-folder* data asset as input). 73 | - Add a condition that the **production** job is only allowed to run when the **experiment** job ran *successfully*. Success means that the Azure Machine Learning job ran successfully too. 74 | 75 |
76 | Hint 77 |
78 | You'll need to do two things to ensure the production job only runs when the experiment job is successful: add needs to the workflow and add --stream to the CLI command to trigger the Azure Machine Learning job. 79 |
80 | 81 | ## Success criteria 82 | 83 | To complete this challenge successfully, you should be able to show: 84 | 85 | - Show the environment secrets in the settings. 86 | - A successfully completed Actions workflow that contains two jobs. The production job needs the experimentation job to be successful to run. 87 | - Show that the workflow required an approval before running the production workload. 88 | - Show two successful Azure Machine Learning jobs, one trained with the *diabetes-dev-folder* as input and the other with the *diabetes-prod-folder* as input. 89 | 90 | ## Useful resources 91 | 92 | - Learn more about [continuous deployment for machine learning.](https://docs.microsoft.com/learn/modules/continuous-deployment-for-machine-learning/) 93 | - [Workflow syntax for GitHub Actions.](https://docs.github.com/actions/using-workflows/workflow-syntax-for-github-actions) 94 | - [Using environments for deployment in GitHub.](https://docs.github.com/actions/deployment/targeting-different-environments/using-environments-for-deployment) 95 | - [How to create a secret in a GitHub repo.](https://docs.github.com/actions/security-guides/encrypted-secrets) 96 | - [CLI reference for jobs.](https://docs.microsoft.com/cli/azure/ml/job?view=azure-cli-latest) 97 | 98 | -------------------------------------------------------------------------------- /documentation/06-deploy-model.md: -------------------------------------------------------------------------------- 1 | --- 2 | challenge: 3 | module: 'Deploy a model with GitHub Actions' 4 | challenge: '6: Deploy and test the model' 5 | --- 6 | 7 | 16 | 17 | # Challenge 6: Deploy and test the model 18 | 19 | 20 | 21 | ## Challenge scenario 22 | 23 | To get value from a model, you'll want to deploy it. You can deploy a model to a managed online or batch endpoint. 24 | 25 | ## Prerequisites 26 | 27 | If you haven't, complete the [previous challenge](05-environments.md) before you continue. 28 | 29 | ## Objectives 30 | 31 | By completing this challenge, you'll learn how to: 32 | 33 | - Register the model with GitHub Actions. 34 | - Deploy the model to an online endpoint with GitHub Actions. 35 | - Test the deployed model. 36 | 37 | > **Important!** 38 | > Each challenge is designed to allow you to explore how to implement DevOps principles when working with machine learning models. Some instructions may be intentionally vague, inviting you to think about your own preferred approach. If for example, the instructions ask you to create an Azure Machine Learning workspace, it's up to you to explore and decide how you want to create it. To make it the best learning experience for you, it's up to you to make it as simple or as challenging as you want. 39 | 40 | ## Challenge Duration 41 | 42 | - **Estimated Time**: 45 minutes 43 | 44 | ## Instructions 45 | 46 | When a model is trained and logged by using MLflow, you can easily register and deploy the model with Azure Machine Learning. After training the model, you want to deploy the model to a real-time endpoint so that it can be consumed by a web app. 47 | 48 | - Register the model from the production job output in the Azure Machine Learning Studio. 49 | - Create a GitHub Actions workflow which deploys the latest version of the registered model. 50 | - The workflow should create an endpoint and deploy your model to the endpoint using the CLI (v2). 51 | 52 |
53 | Hint 54 |
55 | The model's output was automatically generated by the MLflow auto log function in the training script. When you register the model as an MLflow type model, you don't need to provide a scoring script or environment to deploy the model. 56 |
57 | 58 | - Test whether the deployed model returns predictions as expected. 59 | 60 |
61 | Hint 62 |
63 | You can test the endpoint in the Studio, using the CLI, or by calling the endpoint from an app like Postman. 64 |
65 | 66 | Here's some sample data to test your endpoint with: 67 | ``` 68 | Pregnancies,PlasmaGlucose,DiastolicBloodPressure,TricepsThickness,SerumInsulin,BMI,DiabetesPedigree,Age 69 | 9,104,51,7,24,27.36983156,1.350472047,43 70 | 6,73,61,35,24,18.74367404,1.074147566,75 71 | 4,115,50,29,243,34.69215364,0.741159926,59 72 | ``` 73 | 74 | ## Success criteria 75 | 76 | To complete this challenge successfully, you should be able to show: 77 | 78 | - A model registered in the Azure Machine Learning workspace. 79 | - A successfully completed Action in your GitHub repo that deploys the model to a managed online endpoint. 80 | 81 | ## Useful resources 82 | 83 | - [Work with models in Azure Machine Learning.](https://docs.microsoft.com/azure/machine-learning/how-to-manage-models) 84 | - [Deploy an Azure Machine Learning model to a managed endpoint with CLI (v2).](https://docs.microsoft.com/learn/modules/deploy-azure-machine-learning-model-managed-endpoint-cli-v2/) 85 | - [Deploy MLflow models.](https://docs.microsoft.com/azure/machine-learning/how-to-deploy-mlflow-models) 86 | - [YAML reference to create an online endpoint.](https://docs.microsoft.com/azure/machine-learning/reference-yaml-endpoint-online) 87 | - [YAML reference to create a managed online deployment.](https://docs.microsoft.com/azure/machine-learning/reference-yaml-deployment-managed-online) 88 | - [CLI (v2) documentation for managing Azure ML online endpoints.](https://docs.microsoft.com/cli/azure/ml/online-endpoint?view=azure-cli-latest) 89 | - [CLI (v2) documentation for managing Azure ML online deployments.](https://docs.microsoft.com/cli/azure/ml/online-deployment?view=azure-cli-latest) 90 | - [GitHub Actions.](https://docs.github.com/actions/guides) -------------------------------------------------------------------------------- /documentation/media/00-01-github-secret.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MicrosoftLearning/mslearn-mlops/a103a1bcdc53849e30c8e1952cb1321db97b7248/documentation/media/00-01-github-secret.png -------------------------------------------------------------------------------- /experimentation/train-classification-model.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Train diabetes classification model\n", 8 | "\n", 9 | "This notebook reads a CSV file and trains a model to predict diabetes in patients. The data is already preprocessed and requires no feature engineering.\n", 10 | "\n", 11 | "The evaluation methods were used during experimentation to decide whether the model was accurate enough. Moving forward, there's a preference to use the autolog feature of MLflow to more easily deploy the model later on." 12 | ] 13 | }, 14 | { 15 | "cell_type": "markdown", 16 | "metadata": {}, 17 | "source": [ 18 | "## Read data from local file\n", 19 | "\n" 20 | ] 21 | }, 22 | { 23 | "cell_type": "code", 24 | "execution_count": 50, 25 | "metadata": {}, 26 | "outputs": [], 27 | "source": [ 28 | "import pandas as pd" 29 | ] 30 | }, 31 | { 32 | "cell_type": "code", 33 | "execution_count": 51, 34 | "metadata": {}, 35 | "outputs": [], 36 | "source": [ 37 | "df = pd.read_csv('data/diabetes.csv')" 38 | ] 39 | }, 40 | { 41 | "cell_type": "code", 42 | "execution_count": 52, 43 | "metadata": {}, 44 | "outputs": [ 45 | { 46 | "data": { 47 | "text/html": [ 48 | "
\n", 49 | "\n", 62 | "\n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | " \n", 209 | " \n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | " \n", 214 | " \n", 215 | " \n", 216 | " \n", 217 | " \n", 218 | " \n", 219 | " \n", 220 | " \n", 221 | " \n", 222 | " \n", 223 | "
PatientIDPregnanciesPlasmaGlucoseDiastolicBloodPressureTricepsThicknessSerumInsulinBMIDiabetesPedigreeAgeDiabetic
01354778017180342343.5097261.213191210
1114743889293473621.2405760.158365230
21640031711547523541.5115230.079019230
318833509103782530429.5821921.282870431
4142411918559273542.6045360.549542220
.................................
99951469198695853726718.4975420.660240310
999614327360555175021.8653410.086589340
9997141096259959476730.7740182.301594431
99981958653014567302118.8118610.789572260
999913329381010054342738.8409430.175465230
\n", 224 | "

10000 rows × 10 columns

\n", 225 | "
" 226 | ], 227 | "text/plain": [ 228 | " PatientID Pregnancies PlasmaGlucose DiastolicBloodPressure \\\n", 229 | "0 1354778 0 171 80 \n", 230 | "1 1147438 8 92 93 \n", 231 | "2 1640031 7 115 47 \n", 232 | "3 1883350 9 103 78 \n", 233 | "4 1424119 1 85 59 \n", 234 | "... ... ... ... ... \n", 235 | "9995 1469198 6 95 85 \n", 236 | "9996 1432736 0 55 51 \n", 237 | "9997 1410962 5 99 59 \n", 238 | "9998 1958653 0 145 67 \n", 239 | "9999 1332938 10 100 54 \n", 240 | "\n", 241 | " TricepsThickness SerumInsulin BMI DiabetesPedigree Age \\\n", 242 | "0 34 23 43.509726 1.213191 21 \n", 243 | "1 47 36 21.240576 0.158365 23 \n", 244 | "2 52 35 41.511523 0.079019 23 \n", 245 | "3 25 304 29.582192 1.282870 43 \n", 246 | "4 27 35 42.604536 0.549542 22 \n", 247 | "... ... ... ... ... ... \n", 248 | "9995 37 267 18.497542 0.660240 31 \n", 249 | "9996 7 50 21.865341 0.086589 34 \n", 250 | "9997 47 67 30.774018 2.301594 43 \n", 251 | "9998 30 21 18.811861 0.789572 26 \n", 252 | "9999 34 27 38.840943 0.175465 23 \n", 253 | "\n", 254 | " Diabetic \n", 255 | "0 0 \n", 256 | "1 0 \n", 257 | "2 0 \n", 258 | "3 1 \n", 259 | "4 0 \n", 260 | "... ... \n", 261 | "9995 0 \n", 262 | "9996 0 \n", 263 | "9997 1 \n", 264 | "9998 0 \n", 265 | "9999 0 \n", 266 | "\n", 267 | "[10000 rows x 10 columns]" 268 | ] 269 | }, 270 | "execution_count": 52, 271 | "metadata": {}, 272 | "output_type": "execute_result" 273 | } 274 | ], 275 | "source": [ 276 | "df" 277 | ] 278 | }, 279 | { 280 | "cell_type": "markdown", 281 | "metadata": {}, 282 | "source": [ 283 | "## Split data" 284 | ] 285 | }, 286 | { 287 | "cell_type": "code", 288 | "execution_count": 53, 289 | "metadata": {}, 290 | "outputs": [], 291 | "source": [ 292 | "X, y = df[['Pregnancies','PlasmaGlucose','DiastolicBloodPressure','TricepsThickness','SerumInsulin','BMI','DiabetesPedigree','Age']].values, df['Diabetic'].values" 293 | ] 294 | }, 295 | { 296 | "cell_type": "code", 297 | "execution_count": 54, 298 | "metadata": {}, 299 | "outputs": [ 300 | { 301 | "data": { 302 | "text/plain": [ 303 | "10000" 304 | ] 305 | }, 306 | "execution_count": 54, 307 | "metadata": {}, 308 | "output_type": "execute_result" 309 | } 310 | ], 311 | "source": [ 312 | "len(X)" 313 | ] 314 | }, 315 | { 316 | "cell_type": "code", 317 | "execution_count": 55, 318 | "metadata": {}, 319 | "outputs": [], 320 | "source": [ 321 | "import numpy as np" 322 | ] 323 | }, 324 | { 325 | "cell_type": "code", 326 | "execution_count": 56, 327 | "metadata": {}, 328 | "outputs": [ 329 | { 330 | "name": "stdout", 331 | "output_type": "stream", 332 | "text": [ 333 | "(array([0, 1], dtype=int64), array([6656, 3344], dtype=int64))\n" 334 | ] 335 | } 336 | ], 337 | "source": [ 338 | "print(np.unique(y, return_counts=True))" 339 | ] 340 | }, 341 | { 342 | "cell_type": "code", 343 | "execution_count": 57, 344 | "metadata": {}, 345 | "outputs": [], 346 | "source": [ 347 | "from sklearn.model_selection import train_test_split" 348 | ] 349 | }, 350 | { 351 | "cell_type": "code", 352 | "execution_count": 58, 353 | "metadata": {}, 354 | "outputs": [], 355 | "source": [ 356 | "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=0)" 357 | ] 358 | }, 359 | { 360 | "cell_type": "markdown", 361 | "metadata": {}, 362 | "source": [ 363 | "## Train model" 364 | ] 365 | }, 366 | { 367 | "cell_type": "code", 368 | "execution_count": 59, 369 | "metadata": {}, 370 | "outputs": [], 371 | "source": [ 372 | "from sklearn.linear_model import LogisticRegression" 373 | ] 374 | }, 375 | { 376 | "cell_type": "code", 377 | "execution_count": 60, 378 | "metadata": {}, 379 | "outputs": [], 380 | "source": [ 381 | "model = LogisticRegression(C=1/0.1, solver=\"liblinear\").fit(X_train, y_train)" 382 | ] 383 | }, 384 | { 385 | "cell_type": "markdown", 386 | "metadata": {}, 387 | "source": [ 388 | "## Evaluate model" 389 | ] 390 | }, 391 | { 392 | "cell_type": "code", 393 | "execution_count": 61, 394 | "metadata": {}, 395 | "outputs": [], 396 | "source": [ 397 | "import numpy as np" 398 | ] 399 | }, 400 | { 401 | "cell_type": "code", 402 | "execution_count": 62, 403 | "metadata": {}, 404 | "outputs": [], 405 | "source": [ 406 | "y_hat = model.predict(X_test)\n", 407 | "acc = np.average(y_hat == y_test)" 408 | ] 409 | }, 410 | { 411 | "cell_type": "code", 412 | "execution_count": 63, 413 | "metadata": {}, 414 | "outputs": [ 415 | { 416 | "data": { 417 | "text/plain": [ 418 | "0.7736666666666666" 419 | ] 420 | }, 421 | "execution_count": 63, 422 | "metadata": {}, 423 | "output_type": "execute_result" 424 | } 425 | ], 426 | "source": [ 427 | "acc" 428 | ] 429 | }, 430 | { 431 | "cell_type": "code", 432 | "execution_count": 64, 433 | "metadata": {}, 434 | "outputs": [], 435 | "source": [ 436 | "from sklearn.metrics import roc_auc_score" 437 | ] 438 | }, 439 | { 440 | "cell_type": "code", 441 | "execution_count": 65, 442 | "metadata": {}, 443 | "outputs": [], 444 | "source": [ 445 | "y_scores = model.predict_proba(X_test)\n", 446 | "auc = roc_auc_score(y_test,y_scores[:,1])" 447 | ] 448 | }, 449 | { 450 | "cell_type": "code", 451 | "execution_count": 66, 452 | "metadata": {}, 453 | "outputs": [ 454 | { 455 | "data": { 456 | "text/plain": [ 457 | "0.848386486889895" 458 | ] 459 | }, 460 | "execution_count": 66, 461 | "metadata": {}, 462 | "output_type": "execute_result" 463 | } 464 | ], 465 | "source": [ 466 | "auc" 467 | ] 468 | }, 469 | { 470 | "cell_type": "code", 471 | "execution_count": 67, 472 | "metadata": {}, 473 | "outputs": [], 474 | "source": [ 475 | "\n", 476 | "from sklearn.metrics import roc_curve\n", 477 | "import matplotlib.pyplot as plt" 478 | ] 479 | }, 480 | { 481 | "cell_type": "code", 482 | "execution_count": 68, 483 | "metadata": {}, 484 | "outputs": [ 485 | { 486 | "data": { 487 | "text/plain": [ 488 | "Text(0.5, 1.0, 'ROC Curve')" 489 | ] 490 | }, 491 | "execution_count": 68, 492 | "metadata": {}, 493 | "output_type": "execute_result" 494 | }, 495 | { 496 | "data": { 497 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYIAAAEWCAYAAABrDZDcAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/YYfK9AAAACXBIWXMAAAsTAAALEwEAmpwYAAAvPElEQVR4nO3de5xN9f748de7mUoXpwu6yP0+Q5ImQkjkEpJKiZSaSNJNuirJkdyJ3CVSSimlk5PT6ZzSrwvJnVLTuEcuX5QumPH+/bHWOLtpLnvMrL323uv9fDz2Y9ba+7P3eq8x1nt/LuvzEVXFGGNMcJ3gdwDGGGP8ZYnAGGMCzhKBMcYEnCUCY4wJOEsExhgTcJYIjDEm4CwRGGNMwFkiMHFFRDaJyO8iclBEdorITBE5PVuZhiLyHxH5RUQOiMh7IpKcrczfRGSsiGxxP+sHd79kLscVEblPRNaKyK8isk1E3hSRC708X2OKgiUCE4/aq+rpQB3gYuDxrBdEpAHwL+BdoDRQEVgFfCYildwyJwEfATWB1sDfgAbAXqBeLsd8HrgfuA84G6gGvAO0LWjwIpJY0PcYUxhidxabeCIim4A7VfXf7v5woKaqtnX3PwXWqGrvbO/7J7BbVW8VkTuBZ4HKqnowjGNWBb4FGqjq0lzKfAy8oqrT3f3ubpyXu/sK9AEeABKBD4BfVbVfyGe8C3yiqqNFpDQwHmgCHATGqOq4/H9DxvyV1QhM3BKRMkAbIM3dPxVoCLyZQ/E3gKvc7RbAB+EkAVdzYFtuSaAArgXqA8nAa8BNIiIAInIW0BJ4XUROAN7Dqclc4B7/ARFpVcjjm4CyRGDi0Tsi8guwFdgFPO0+fzbO3/yOHN6zA8hq/y+RS5ncFLR8bp5T1f9T1d+BTwEFGruv3QB8oao/ApcCpVR1kKoeVtV0YBrQuQhiMAFkicDEo2tVtThwBVCD/13g9wFHgfNzeM/5wB53e28uZXJT0PK52Zq1oU6b7evAze5TXYBX3e3yQGkR2Z/1AJ4Azi2CGEwAWSIwcUtVPwFmAiPd/V+BL4BOORS/EaeDGODfQCsROS3MQ30ElBGRlDzK/AqcGrJ/Xk4hZ9t/DbhBRMrjNBm95T6/FdioqmeGPIqr6tVhxmvMn1giMPFuLHCViFzk7j8G3OYO9SwuImeJyGCcUUHPuGVm41xs3xKRGiJygoiUEJEnROQvF1tV/R6YCLwmIleIyEkiUkxEOovIY26xlcB1InKqiFQBUvMLXFVX4NRSpgOLVHW/+9JS4BcReVREThGRBBGpJSKXFvi3YwyWCEycU9XdwMvAAHf//wGtgOtw2vU34wwxvdy9oKOqh3A6jL8FPgR+xrn4lgSW5HKo+4AXgAnAfuAHoCNOpy7AGOAw8BMwi/818+RnjhvLnJBzygTa4QyP3cj/ksUZYX6mMX9iw0eNMSbgrEZgjDEBZ4nAGGMCzhKBMcYEnCUCY4wJuJib3KpkyZJaoUIFv8MwxpiY8vXXX+9R1VI5vRZziaBChQosW7bM7zCMMSamiMjm3F6zpiFjjAk4SwTGGBNwlgiMMSbgLBEYY0zAWSIwxpiA8ywRiMgMEdklImtzeV1EZJyIpInIahGp61UsxhhjcudljWAmzsLfuWkDVHUfPYFJHsZijDEmF57dR6Cqi0WkQh5FOgAvuysxfSkiZ4rI+apaFEv+GWNMxMxZsoV3V2737POPHs3k8OEj1K10Dk+3r1nkn+/nDWUXELI0H7DNfe4viUBEeuLUGihXrlxEgjPGRA+vL7SFtWTj/wFQv+LZRf7Z+/fvZ8OGDSQmJnJxxRxvDC60mLizWFWnAlMBUlJSbAEFY2JQYS7mXl5oi0L9imfToc4FdKlfdF9U9+/fz8MPP8wb06dTpUoVpk+fTtOmtYrs80P5mQi2A2VD9su4zxljfOLlN+/CXMy9uNBGs8zMTBo2bMiGDRt45JFHGDhwIKeccopnx/MzESwA+ojI6zgLcx+w/gFjil5BLu5efvMO2sX8eOzdu5ezzz6bhIQEnn32WcqWLUtKSornx/UsEYjIa8AVQEkR2QY8DZwIoKqTgYXA1UAa8Btwu1exGBPLCvstvSAXd7tY+0NVefXVV7n//vsZOnQoPXr0oGPHjhE7vpejhm7O53UF7vHq+MbEquwX/sJ+S7eLe3TbunUrvXr1YuHChVx22WU0atQo4jHERGexMfEinG/32S/8diGPX6+99hp33XUXmZmZjB07lj59+pCQkBDxOCwRGOOh4/l2bxf+4DjrrLOoX78+U6dOpWLFir7FIU4LTexISUlRW5jGRKOcvu3ndOG3i3xwZWRkMGbMGA4fPkz//v0Bp39ARDw/toh8rao59jxbjcCYAsiraSeni759uzdZVq1aRWpqKl9//TU33njjsQQQiSSQH0sExuQi3G/4Weyib3Jy6NAhBg8ezNChQzn77LN58803uf7666MiAWSxRGCMK5z2fLvYm4L6/vvvGTZsGF26dGH06NGUKFHC75D+whKBCbTQi7+N1jFF5eDBg7z77rt07dqVWrVq8e2331KpUiW/w8qVJQITWHOWbOGJ+WsA56JvF35TFD788EN69uzJ5s2bqVu3LklJSVGdBMASgQmo0CQwpOOFdvE3hbZv3z769evHjBkzqFatGp988glJSUl+hxUWSwQmULKagrKagSwJmKKQmZlJo0aN+O6773j88ccZMGAAxYoV8zussFkiMHEpt2Geof0A1gxkCmvPnj3HJokbMmQI5cqVo27d2Ft11xKBiSvZv/FnH+ZpCcAUBVVl9uzZPPDAAwwdOpSePXty7bXX+h3WcbNEYOJCTgnALvjGC5s3b+auu+5i0aJFNGzYkCZNmvgdUqFZIjAxL/voH0sAxiuvvPIKd999N6rK+PHj6d27NyeccILfYRWaJQITs6zj10RaqVKlaNSoEVOmTKF8+fJ+h1NkLBGYmJHXnb9WCzBeOHLkCKNGjeLIkSM89dRTtGrVipYtW0bV9BBFwRKBiXq5dQBbAjBeWrFiBampqaxYsYLOnTtH1SRxRc0SgYlq1v5vIu2PP/5g0KBBDB8+nJIlS/LWW29x3XXX+R2WpywRmKhld/8aP6SlpTFy5EhuvfVWRo0axVlnneV3SJ6zRGCikiUBE0kHDx5k/vz5dOvWjVq1arFhwwZfVwyLNEsEJqrYSCATaYsWLaJnz55s3bqVlJQUkpKSApUEwBKBiRJ2Q5iJtL1799K3b19efvllatSowaeffhozk8QVNUsExnfWIWwiLWuSuLS0NPr378+TTz4ZU5PEFTVLBMY31gxkIm337t2UKFGChIQEhg0bRvny5alTp47fYfku9u+NNjFnzpIt3DTlC56Yv4YlG/+P+hXPtiRgPKWqvPTSS1SrVo1p06YB0KFDB0sCLqsRmIixfgDjh02bNtGzZ08+/PBDGjduTLNmzfwOKepYIjARYf0Axg+zZ8/m7rvvRkSYOHEid911V1xMElfULBEYT1k/gPHTueeeS5MmTZg8eTLlytnfXW4sEZgik9OqYNYMZCLpyJEjDB8+nMzMTAYMGEDLli1p2bKl32FFPUsEpkhkb/rJYgnARMry5cu54447WLVqFV26dDk2SZzJnyUCc9xCawDW9GP88vvvv/PMM88wcuRISpUqxfz582N62Ug/eJoIRKQ18DyQAExX1aHZXi8HzALOdMs8pqoLvYzJFE5OF//6Fc+2b/7GN+np6YwePZru3bszYsSIQEwSV9Q8SwQikgBMAK4CtgFficgCVV0fUuxJ4A1VnSQiycBCoIJXMZnCyd78Yxd/45eff/6Zt99+m+7du1OzZk2+//77uFoxLNK8rBHUA9JUNR1ARF4HOgChiUCBv7nbZwA/ehiPOU428sdEk4ULF9KrVy+2b99O/fr1SUpKsiRQSF4OqL0A2Bqyv819LtRA4BYR2YZTG7g3pw8SkZ4iskxElu3evduLWE0usmoBdgew8duePXvo1q0bbdu2pXjx4nz22WeBnSSuqPndWXwzMFNVR4lIA2C2iNRS1aOhhVR1KjAVICUlRX2IM5BsTQATLbImiUtPT2fAgAE88cQTnHzyyX6HFTe8TATbgbIh+2Xc50KlAq0BVPULESkGlAR2eRiXCYMlARMNfvrpJ0qVKkVCQgIjR46kfPny1K5d2++w4o6XTUNfAVVFpKKInAR0BhZkK7MFaA4gIklAMcDafnxmScD4TVV58cUXqV69OlOnTgWgffv2lgQ84lkiUNUMoA+wCPgGZ3TQOhEZJCLXuMUeAnqIyCrgNaC7qlrTj48sCRi/paen06JFC+68807q1KlDixYt/A4p7nnaR+DeE7Aw23MDQrbXA428jMGEx0YGmWgwa9YsevfuTUJCApMnT6ZHjx42SVwE+N1ZbKKAzQxqokXp0qW58sormTRpEmXKlPE7nMCwRBBw1hRk/HT48GGGDh3K0aNHGThwIFdddRVXXXWV32EFjtW5AsySgPHTV199xSWXXMLTTz9Neno61j3oH0sEAZY1Z5AlARNJv/32G/369eOyyy5j3759LFiwgJdfftlmCvWRJYKAmrNky7G7hS0JmEjauHEj48ePp0ePHqxbt4727dv7HVLgWR9BAIU2CXWok33WD2OK3oEDB3j77be5/fbbqVmzJmlpaZQtWzb/N5qIsBpBAFmTkImk999/n5o1a3LnnXfy7bffAlgSiDKWCALGmoRMpOzevZuuXbvSrl07zjrrLL744gtq1Kjhd1gmB9Y0FBDZbxizJiHjpczMTC6//HI2btzIM888w2OPPcZJJ53kd1gmF5YIAsBuGDORsnPnTs455xwSEhIYNWoUFSpUoFatWn6HZfIRdtOQiJzqZSDGG9nvFZh7VwNLAqbIHT16lClTplCtWjWmTJkCQLt27SwJxIh8E4GINBSR9cC37v5FIjLR88hModkNYyYS0tLSaN68Ob169eLSSy+lVatWfodkCiicGsEYoBWwF0BVVwFNvAzKFA0bHWS89tJLL3HhhReyfPlypk2bxr///W8qVarkd1imgMLqI1DVrdnu+sv0JhxTFLI6htfv+NlGBxlPlStXjlatWjFhwgQuuMAGIMSqcBLBVhFpCKiInAjcj7O+gIlCOXUMG1NUDh06xHPPPcfRo0cZNGgQzZs3p3nz5n6HZQopnETQC3geZ+H57cC/gN5eBmWOj/UJGC8tWbKE1NRU1q1bx2233Yaq2vxAcSKcPoLqqtpVVc9V1XNU9RYgyevATMFYEjBe+fXXX+nbty8NGjTgwIED/OMf/2DmzJmWBOJIOIlgfJjPGZ9YEjBe2rx5MxMnTqRXr16sW7eOtm3b+h2SKWK5Ng2JSAOgIVBKRPqGvPQ3IMHrwEx4LAkYL+zfv5958+Zx5513kpycTFpamq0YFsfyqhGcBJyOkyyKhzx+Bm7wPjQTDhsiaorau+++S3JyMr169To2SZwlgfiWa41AVT8BPhGRmaq6OYIxmTDZBHKmKO3atYv77ruPuXPnUrt2bRYsWGCTxAVEOKOGfhOREUBNoFjWk6p6pWdRmbBk1QZsiKgprMzMTBo1asSWLVsYPHgwjzzyCCeeeKLfYZkICScRvArMBdrhDCW9DdjtZVAmfFYbMIXx448/ct5555GQkMDzzz9PhQoVSE5O9jssE2HhjBoqoaovAkdU9RNVvQOw2oCP5izZwk1TvmD9jp/9DsXEqKNHjzJp0iRq1KjB5MmTAbj66qstCQRUODWCI+7PHSLSFvgRONu7kExe7M5hU1jfffcdPXr0YPHixbRo0YI2bdr4HZLxWTiJYLCInAE8hHP/wN+AB7wMyvxZ1txBwLGFZWyUkDkeL774In369KFYsWLMmDGD7t27241hJv9EoKr/cDcPAM0ARKSRl0GZ/8leA7CFZUxhVKhQgTZt2jBhwgTOP/98v8MxUSKvG8oSgBtx5hj6QFXXikg74AngFODiyIQYXHazmCmsQ4cO8fe//x2AwYMH2yRxJkd51QheBMoCS4FxIvIjkAI8pqrvRCC2QLMkYArr888/JzU1lW+//ZY77rjDJokzucorEaQAtVX1qIgUA3YClVV1b2RCCza7Y9gcr4MHD9K/f3/Gjx9P2bJl+eCDD2zVMJOnvIaPHlbVowCq+geQXtAkICKtRWSDiKSJyGO5lLlRRNaLyDoRmVOQz493do+AOR5btmxhypQp3HPPPaxdu9aSgMlXXjWCGiKy2t0WoLK7L4Cqau28PtjtY5gAXAVsA74SkQWquj6kTFXgcaCRqu4TkXMKcS5xI3TqCGPCsW/fPt5880169uxJcnIy6enplC5d2u+wTIzIKxEUds2BekCaqqYDiMjrQAdgfUiZHsAEVd0HoKq7CnnMmBfaN2D3CJhwzJ8/n969e7N7926aNm1K9erVLQmYAsm1aUhVN+f1COOzLwC2huxvc58LVQ2oJiKficiXItI6pw8SkZ4iskxElu3eHd+zW1jfgAnXzp076dSpE9dddx3nnXceS5cupXr16n6HZWJQWIvXe3z8qsAVQBlgsYhcqKr7Qwup6lRgKkBKSopGOMaIsdlETbgyMzNp3LgxW7duZciQIfTr188miTPHzctEsB1n+GmWMu5zobYBS1T1CLBRRL7DSQxfeRhXVLImIROObdu2Ubp0aRISEhg3bhwVK1a0qaJNoYUz6RwicoqIFLTO+RVQVUQqishJQGdgQbYy7+DUBhCRkjhNRekFPE7Ms3sGTH6OHj3K+PHjqVGjBpMmTQKgTZs2lgRMkcg3EYhIe2Al8IG7X0dEsl/Q/0JVM4A+wCLgG+ANVV0nIoNE5Bq32CJgr4isB/4LPBy0+xQsCZj8fPvttzRp0oT77ruPyy+/nHbt2vkdkokz4TQNDcQZAfQxgKquFJGK4Xy4qi4EFmZ7bkDItgJ93UcgWeewycv06dPp06cPp556KrNmzaJbt252d7ApcmFNQ62qB7L98cVth20kWeewyU/lypVp3749L7zwAueee67f4Zg4FU4iWCciXYAE9waw+4DPvQ0r/lnnsMnJH3/8waBBgwAYMmQIzZo1o1mzZj5HZeJdOJ3F9+KsV3wImIMzHfUDHsYU17JWF7N+AZPdZ599Rp06dXjuuefYvXs3TsupMd4Lp0ZQQ1X7A/29Dibe5bS6mCUB88svv/DEE08wYcIEypcvz6JFi2jZsqXfYZkACScRjBKR84B5wFxVXetxTHHLOoZNTrZt28b06dO59957efbZZzn99NP9DskETL5NQ6raDGdlst3AFBFZIyJPeh5ZnLKOYQOwd+/eY/cDJCUlkZ6ezvPPP29JwPgirBvKVHWnqo4DeuHcUzAg73eY7LJGCJlgU1XmzZtHcnIy9913Hxs2bACwZSONr8K5oSxJRAaKyBqcxes/x5kuwoTJRggZgB07dnD99dfTqVMnypYty7Jly2ySOBMVwukjmAHMBVqp6o8exxN37M5hA/+bJG779u0MHz6cBx98kMREv+d8NMaR71+iqjaIRCDxyJKA2bp1KxdccAEJCQlMmDCBihUrUq1aNb/DMuZPcm0aEpE33J9rRGR1yGNNyMplJg82Sii4MjMzGTdu3J8miWvVqpUlAROV8qoR3O/+tBmujoNNHxFc33zzDampqXzxxRe0adOG9u3b+x2SMXnKa4WyHe5m7xxWJ+sdmfBiV1ZtwDqHg2Xq1KnUqVOH7777jtmzZ/P+++9Trpx9ETDRLZzho1fl8Fybog4kHlltIHiqVq1Kx44dWb9+PbfccovNFGpiQq5NQyJyN843/0rZ+gSKA595HVgsC20WMvHt999/Z+DAgYgIQ4cOtUniTEzKq0YwB2iPs6pY+5DHJap6SwRii1nWLBQMixcv5qKLLmL48OEcOHDAJokzMSuvRKCqugm4B/gl5IGI2FfdXFgncfz7+eef6d27N02bNiUzM5OPPvqISZMmWTOQiVl5jRqagzNi6GuchWhC/8oVqORhXDHJ7iAOhh9//JGZM2fSt29fBg0axGmnneZ3SMYUSq6JQFXbuT/DWpbS2H0D8WzPnj288cYb9O7dmxo1arBx40ZbMczEjXDmGmokIqe527eIyGgRsatcLqxJKL6oKnPnziU5OZkHHniA7777DsCSgIkr4QwfnQT8JiIXAQ8BPwCzPY0qBtnsovHnxx9/5Nprr6Vz586UL1+er7/+2u4MNnEpnFmvMlRVRaQD8IKqvigiqV4HFmtspFB8yczMpEmTJmzfvp2RI0dy//332yRxJm6F85f9i4g8DnQDGovICcCJ3oYVW2ykUPzYvHkzZcqUISEhgYkTJ1KpUiWqVKnid1jGeCqcpqGbcBauv0NVd+KsRTDC06hiiI0Uig+ZmZmMHj2apKSkY5PEtWzZ0pKACYRwlqrcCbwKnCEi7YA/VPVlzyOLETZSKPatXbuWhg0b8tBDD9G8eXOuvfZav0MyJqLCGTV0I7AU6ATcCCwRkRu8DiwWWJNQ7Js8eTJ169YlPT2dOXPmsGDBAsqUsQX4TLCE00fQH7hUVXcBiEgp4N/APC8Di3bWJBTbVBURISkpiU6dOjF27FhKlSrld1jG+CKcRHBCVhJw7SXMRe/jmTUJxabffvuNAQMGkJCQwLBhw2jatClNmzb1OyxjfBXOBf0DEVkkIt1FpDvwPrDQ27CimzUJxaaPP/6Y2rVrM2rUKA4ePGiTxBnjCqez+GFgClDbfUxV1Ue9DixaWZNQ7Dlw4AB33XXXsemh//Of/zBhwgSbJM4YV17rEVQFRgKVgTVAP1XdHqnAopU1CcWeHTt28Morr9CvXz+eeeYZTj31VL9DMiaq5FUjmAH8A7geZwbS8QX9cBFpLSIbRCRNRB7Lo9z1IqIiklLQY0SSNQnFjt27dzN+vPMnW6NGDTZt2sSIESMsCRiTg7wSQXFVnaaqG1R1JFChIB8sIgnABJxlLZOBm0UkOYdyxYH7gSUF+Xw/2DQS0U9VmTNnDklJSTz00EPHJomzEUHG5C6vRFBMRC4WkboiUhc4Jdt+fuoBaaqarqqHgdeBDjmU+zswDPijwNFHkNUGot/WrVtp3749Xbt2pUqVKqxYscImiTMmDHkNH90BjA7Z3xmyr8CV+Xz2BcDWkP1tQP3QAm5CKauq74vIw7l9kIj0BHoClCsX+YuwdRBHv4yMDK644gp27tzJmDFjuPfee0lISPA7LGNiQl4L03i6Arc7ed1ooHt+ZVV1KjAVICUlJeJj/qyDOHpt2rSJsmXLkpiYyJQpU6hUqRKVKtniecYUhJc3hm0Hyobsl3Gfy1IcqAV8LCKbgMuABdHaYWxNQtElIyODkSNHkpSUxMSJEwFo0aKFJQFjjoOXE6x/BVQVkYo4CaAz0CXrRVU9AJTM2heRj3GGqC7zMCYTB1avXk1qairLli2jQ4cOXH/99X6HZExM86xGoKoZQB9gEfAN8IaqrhORQSJyjVfHLWq28lh0mThxIpdccgmbN29m7ty5zJ8/n9KlS/sdljExLd8agTi3X3YFKqnqIHe94vNUdWl+71XVhWSbjkJVB+RS9oqwIo4wGzIaHbImiatVqxadO3dmzJgxlCxZMv83GmPyFU7T0ETgKM4ooUHAL8BbwKUexhUVbMio/3799VeefPJJEhMTGTFiBE2aNKFJkyZ+h2VMXAmnaai+qt6DO85fVfcBJ3kaVZSw2oC/PvroIy688ELGjh3LoUOHbJI4YzwSTiI44t4lrHBsPYKjnkYVBaw24J/9+/dz55130qJFCxITE1m8eDHjxo2zSeKM8Ug4iWAcMB84R0SeBf4fMMTTqKKA1Qb889NPP/H666/z6KOPsmrVKho3bux3SMbEtXz7CFT1VRH5GmgOCHCtqn7jeWRRwGoDkZN18b///vupXr06mzZtss5gYyIknDWLywG/Ae8BC4Bf3eeMKTRV5ZVXXiE5OZlHHnmE77//HsCSgDERFE7T0Ps401G/D3wEpAP/9DIov9m9A5GxZcsW2rZtS7du3ahevTorV66katWqfodlTOCE0zR0Yei+O1Fcb88iigLWP+C9rEnidu3axbhx4+jdu7dNEmeMTwo8xYSqLheR+vmXjG3WP+CN9PR0ypcvT2JiItOmTaNy5cpUqFDB77CMCbRw+gj6hjz6icgc4McIxOYLaxbyRkZGBsOGDSM5OZkJEyYA0Lx5c0sCxkSBcGoExUO2M3D6Ct7yJhz/WbNQ0Vu5ciWpqaksX76cjh070qlTJ79DMsaEyDMRuDeSFVfVfhGKJypYs1DReeGFF3jwwQcpUaIE8+bNs5lCjYlCuTYNiUiiqmYCjSIYj4kTWdNB1K5dm65du7J+/XpLAsZEqbxqBEuBusBKEVkAvAn8mvWiqr7tcWwmBh08eJD+/ftz4oknMnLkSJskzpgYEM59BMWAvTizj7YD2rs/jfmTf/3rX9SqVYvx48dz5MgRmyTOmBiRV43gHBHpC6zFmXAudMYv+x9ujtm3bx99+/Zl5syZVK9encWLF3P55Zf7HZYxJkx51QgSgNPdR/GQ7axH3LGho8dn165dzJs3j8cff5yVK1daEjAmxuRVI9ihqoMiFkkUsKGj4du5cyevvfYaDz744LFJ4kqUKOF3WMaY45BXjSBQk7/b+gPhUVVmzZpFcnIyjz/++LFJ4iwJGBO78koEzSMWhc/mLNnCE/PXAFYbyMumTZto3bo13bt3Jzk52SaJMyZO5No0pKqBaSzPahIa0vFCqw3kIiMjg2bNmrFnzx4mTJhAr169OOGEcAadGWOiXYEnnYtX1iSUs7S0NCpWrEhiYiIzZsygUqVKlC9f3u+wjDFFyL7SmRwdOXKEIUOGULNmzWOTxDVr1sySgDFxyGoE5i+WL19OamoqK1eupFOnTtx0001+h2SM8ZDVCMyfjBs3jnr16rFz507efvtt3njjDc4991y/wzLGeCjwicBuInNkTQdx8cUXc+utt7J+/Xo6duzoc1TGmEgIfNNQ0G8i++WXX3j88cc5+eSTGTVqFI0bN6Zx48Z+h2WMiaDA1wgguCOGPvjgA2rVqsXEiRNRVZskzpiACnQiCGqz0N69e7ntttto06YNp512Gp999hmjR49GJFA3kxtjXIFOBEFtFtq7dy/z58/nqaeeYsWKFTRo0MDvkIwxPvI0EYhIaxHZICJpIvJYDq/3FZH1IrJaRD4SkYgPUg9Ks9COHTsYOXIkqkq1atXYvHkzgwYN4uSTT/Y7NGOMzzxLBO56xxOANkAycLOIJGcrtgJIUdXawDxguFfxBJWqMmPGDJKSknjqqadIS0sD4KyzzvI5MmNMtPCyRlAPSFPVdFU9DLwOdAgtoKr/VdXf3N0vgTIexhM4GzdupGXLlqSmpnLRRRexatUqmyTOGPMXXg4fvQDYGrK/DaifR/lU4J85vSAiPYGeAOXKxX8zTlHIyMjgyiuvZO/evUyaNImePXvaJHHGmBxFxX0EInILkAI0zel1VZ0KTAVISUkpkjGOoesPxJPvv/+eSpUqkZiYyEsvvUTlypUpW7as32EZY6KYl18RtwOhV6Ay7nN/IiItgP7ANap6yMN4/iTeRgwdOXKEwYMHU6tWLV544QUArrjiCksCxph8eVkj+AqoKiIVcRJAZ6BLaAERuRiYArRW1V0expKjeBkxtGzZMlJTU1m9ejWdO3fm5ptv9jskY0wM8axGoKoZQB9gEfAN8IaqrhORQSJyjVtsBHA68KaIrBSRBV7FE6+ef/556tevz549e3j33Xd57bXXOOecc/wOyxgTQzztI1DVhcDCbM8NCNlu4eXx45mqIiKkpKSQmprK8OHDOfPMM/0OyxgTg6Kis9iE7+eff+bRRx+lWLFijBkzhkaNGtGoUSO/wzLGxDAbTxhDFi5cSM2aNZk6dSqJiYk2SZwxpkgEMhHE2mRze/bs4ZZbbqFt27acccYZfP7554wYMcImiTPGFIlAJoJYGzq6b98+3nvvPZ5++mmWL19O/fp53ZdnjDEFE9g+gmgfOrp9+3ZeffVVHn74YapWrcrmzZutM9gY44lA1giimaoybdo0kpOTGThwID/88AOAJQFjjGcsEUSRH374gebNm9OzZ0/q1q3L6tWrqVKlit9hGWPiXOASQbR2FGdkZNC8eXOWLVvGlClT+OijjywJGGMiInB9BNHWUbxhwwYqV65MYmIis2bNonLlypQpY7NxG2MiJ3A1AoiOjuLDhw/zzDPPcOGFFzJhwgQAmjZtaknAGBNxgasRRIOlS5eSmprK2rVr6dKlC127dvU7JGNMgAWyRuCnsWPH0qBBg2P3Brz66quULFnS77CMMQFmiSBCsqaDqFevHj169GDdunW0a9fO56iMMcaahjx34MABHnnkEU455RTGjh1Lw4YNadiwod9hGWPMMVYj8NB7771HcnIy06dP5+STT7ZJ4owxUSlQiSBS9xDs3r2bLl26cM0111CiRAm+/PJLhg0bZpPEGWOiUqASQaTuIThw4AALFy7kmWeeYdmyZVx66aWeHs8YYwojcH0EXt1DsHXrVl555RUee+wxqlSpwubNmznjjDOK/DjGGFPUAlUj8MLRo0eZPHkyNWvWZPDgwccmibMkYIyJFZYICuH777/nyiuv5O6776ZevXqsWbPG5gcyxsScwDUNFZWMjAyuuuoq9u/fz4svvsjtt99uncHGmJhkiaCAvvnmG6pWrUpiYiKzZ8+mcuXKlC5d2u+wjDHmuFnTUJgOHTrE008/Te3atXnhhRcAaNy4sSUBY0zMsxpBGL788ktSU1NZv3493bp1o1u3bn6HZIwxRcZqBPkYNWoUDRs25JdffmHhwoW8/PLLlChRwu+wjDGmyFgiyMXRo0cBaNCgAb169WLt2rW0adPG56iMMaboWdNQNvv37+ehhx7i1FNPZfz48TZJnDEm7gWmRhDOPEPvvPMOycnJzJo1i+LFi9skccaYQAhMIshrnqFdu3Zx44030rFjR84991yWLl3KkCFD7L4AY0wgBCYRQO7zDP388898+OGHPPvssyxdupS6dev6EJ0xxvgjsH0EW7ZsYfbs2TzxxBNUqVKFLVu2ULx4cb/DMsaYiPO0RiAirUVkg4ikichjObx+sojMdV9fIiIVvIwHnNFAEydOpGbNmgwZMuTYJHGWBIwxQeVZIhCRBGAC0AZIBm4WkeRsxVKBfapaBRgDDPMqHoDff/+NK664gnvuuYcGDRqwbt06myTOGBN4XtYI6gFpqpquqoeB14EO2cp0AGa52/OA5uJRD62qsnr1atasWcNLL73EokWLqFChgheHMsaYmOJlH8EFwNaQ/W1A/dzKqGqGiBwASgB7QguJSE+gJ0C5cse3qEzNC87grPq1GPjses4///zj+gxjjIlHMdFZrKpTgakAKSkpxzW4/+n2NYGaRRmWMcbEBS+bhrYDZUP2y7jP5VhGRBKBM4C9HsZkjDEmGy8TwVdAVRGpKCInAZ2BBdnKLABuc7dvAP6jdjuvMcZElGdNQ26bfx9gEZAAzFDVdSIyCFimqguAF4HZIpIG/B9OsjDGGBNBnvYRqOpCYGG25waEbP8BdPIyBmOMMXkL1BQTxhhj/soSgTHGBJwlAmOMCThLBMYYE3ASa6M1RWQ3sPk4316SbHctB4CdczDYOQdDYc65vKqWyumFmEsEhSEiy1Q1xe84IsnOORjsnIPBq3O2piFjjAk4SwTGGBNwQUsEU/0OwAd2zsFg5xwMnpxzoPoIjDHG/FXQagTGGGOysURgjDEBF5eJQERai8gGEUkTkcdyeP1kEZnrvr5ERCr4EGaRCuOc+4rIehFZLSIfiUh5P+IsSvmdc0i560VERSTmhxqGc84icqP7b71OROZEOsaiFsbfdjkR+a+IrHD/vq/2I86iIiIzRGSXiKzN5XURkXHu72O1iNQt9EFVNa4eOFNe/wBUAk4CVgHJ2cr0Bia7252BuX7HHYFzbgac6m7fHYRzdssVBxYDXwIpfscdgX/nqsAK4Cx3/xy/447AOU8F7na3k4FNfsddyHNuAtQF1uby+tXAPwEBLgOWFPaY8VgjqAekqWq6qh4GXgc6ZCvTAZjlbs8DmouIRDDGopbvOavqf1X1N3f3S5wV42JZOP/OAH8HhgF/RDI4j4Rzzj2ACaq6D0BVd0U4xqIWzjkr8Dd3+wzgxwjGV+RUdTHO+iy56QC8rI4vgTNFpFALscdjIrgA2Bqyv819LscyqpoBHABKRCQ6b4RzzqFScb5RxLJ8z9mtMpdV1fcjGZiHwvl3rgZUE5HPRORLEWkdsei8Ec45DwRuEZFtOOuf3BuZ0HxT0P/v+YqJxetN0RGRW4AUoKnfsXhJRE4ARgPdfQ4l0hJxmoeuwKn1LRaRC1V1v59BeexmYKaqjhKRBjirHtZS1aN+BxYr4rFGsB0oG7Jfxn0uxzIikohTndwbkei8Ec45IyItgP7ANap6KEKxeSW/cy4O1AI+FpFNOG2pC2K8wzicf+dtwAJVPaKqG4HvcBJDrArnnFOBNwBU9QugGM7kbPEqrP/vBRGPieAroKqIVBSRk3A6gxdkK7MAuM3dvgH4j7q9MDEq33MWkYuBKThJINbbjSGfc1bVA6paUlUrqGoFnH6Ra1R1mT/hFolw/rbfwakNICIlcZqK0iMYY1EL55y3AM0BRCQJJxHsjmiUkbUAuNUdPXQZcEBVdxTmA+OuaUhVM0SkD7AIZ8TBDFVdJyKDgGWqugB4Eaf6mIbTKdPZv4gLL8xzHgGcDrzp9otvUdVrfAu6kMI857gS5jkvAlqKyHogE3hYVWO2thvmOT8ETBORB3E6jrvH8hc7EXkNJ5mXdPs9ngZOBFDVyTj9IFcDacBvwO2FPmYM/76MMcYUgXhsGjLGGFMAlgiMMSbgLBEYY0zAWSIwxpiAs0RgjDEBZ4nARCURyRSRlSGPCnmUPVgEx5spIhvdYy1371At6GdMF5Fkd/uJbK99XtgY3c/J+r2sFZH3ROTMfMrXifXZOI33bPioiUoiclBVTy/qsnl8xkzgH6o6T0RaAiNVtXYhPq/QMeX3uSIyC/hOVZ/No3x3nFlX+xR1LCZ+WI3AxAQROd1dR2G5iKwRkb/MNCoi54vI4pBvzI3d51uKyBfue98Ukfwu0IuBKu57+7qftVZEHnCfO01E3heRVe7zN7nPfywiKSIyFDjFjeNV97WD7s/XRaRtSMwzReQGEUkQkREi8pU7x/xdYfxavsCdbExE6rnnuEJEPheR6u6duIOAm9xYbnJjnyEiS92yOc3YaoLG77m37WGPnB44d8WudB/zce6C/5v7WkmcuyqzarQH3Z8PAf3d7QSc+YZK4lzYT3OffxQYkMPxZgI3uNudgCXAJcAa4DScu7LXARcD1wPTQt57hvvzY9w1D7JiCimTFWNHYJa7fRLOLJKnAD2BJ93nTwaWARVziPNgyPm9CbR29/8GJLrbLYC33O3uwAsh7x8C3OJun4kzF9Fpfv9728PfR9xNMWHixu+qWidrR0ROBIaISBPgKM434XOBnSHv+QqY4ZZ9R1VXikhTnMVKPnOn1jgJ55t0TkaIyJM489Sk4sxfM19Vf3VjeBtoDHwAjBKRYTjNSZ8W4Lz+CTwvIicDrYHFqvq72xxVW0RucMudgTNZ3MZs7z9FRFa65/8N8GFI+VkiUhVnmoUTczl+S+AaEenn7hcDyrmfZQLKEoGJFV2BUsAlqnpEnBlFi4UWUNXFbqJoC8wUkdHAPuBDVb05jGM8rKrzsnZEpHlOhVT1O3HWOrgaGCwiH6nqoHBOQlX/EJGPgVbATTgLrYCz2tS9qroon4/4XVXriMipOPPv3AOMw1mA57+q2tHtWP84l/cLcL2qbggnXhMM1kdgYsUZwC43CTQD/rLmsjjrMP+kqtOA6TjL/X0JNBKRrDb/00SkWpjH/BS4VkROFZHTcJp1PhWR0sBvqvoKzmR+Oa0Ze8StmeRkLs5EYVm1C3Au6ndnvUdEqrnHzJE6q83dBzwk/5tKPWsq4u4hRX/BaSLLsgi4V9zqkTiz0pqAs0RgYsWrQIqIrAFuBb7NocwVwCoRWYHzbft5Vd2Nc2F8TURW4zQL1QjngKq6HKfvYClOn8F0VV0BXAgsdZtongYG5/D2qcDqrM7ibP6FszDQv9VZfhGcxLUeWC7OouVTyKfG7sayGmdhluHAc+65h77vv0ByVmcxTs3hRDe2de6+CTgbPmqMMQFnNQJjjAk4SwTGGBNwlgiMMSbgLBEYY0zAWSIwxpiAs0RgjDEBZ4nAGGMC7v8DPMojSB0aO1YAAAAASUVORK5CYII=", 498 | "text/plain": [ 499 | "
" 500 | ] 501 | }, 502 | "metadata": { 503 | "needs_background": "light" 504 | }, 505 | "output_type": "display_data" 506 | } 507 | ], 508 | "source": [ 509 | "# plot ROC curve\n", 510 | "fpr, tpr, thresholds = roc_curve(y_test, y_scores[:,1])\n", 511 | "fig = plt.figure(figsize=(6, 4))\n", 512 | "# Plot the diagonal 50% line\n", 513 | "plt.plot([0, 1], [0, 1], 'k--')\n", 514 | "# Plot the FPR and TPR achieved by our model\n", 515 | "plt.plot(fpr, tpr)\n", 516 | "plt.xlabel('False Positive Rate')\n", 517 | "plt.ylabel('True Positive Rate')\n", 518 | "plt.title('ROC Curve')" 519 | ] 520 | } 521 | ], 522 | "metadata": { 523 | "interpreter": { 524 | "hash": "f2b2cd046deda8eabef1e765a11d0ec9aa9bd1d31d56ce79c815a38c323e14ec" 525 | }, 526 | "kernel_info": { 527 | "name": "python38-azureml" 528 | }, 529 | "kernelspec": { 530 | "display_name": "Python 3.9.5 ('base')", 531 | "language": "python", 532 | "name": "python3" 533 | }, 534 | "language_info": { 535 | "codemirror_mode": { 536 | "name": "ipython", 537 | "version": 3 538 | }, 539 | "file_extension": ".py", 540 | "mimetype": "text/x-python", 541 | "name": "python", 542 | "nbconvert_exporter": "python", 543 | "pygments_lexer": "ipython3", 544 | "version": "3.9.5" 545 | }, 546 | "nteract": { 547 | "version": "nteract-front-end@1.0.0" 548 | } 549 | }, 550 | "nbformat": 4, 551 | "nbformat_minor": 0 552 | } 553 | -------------------------------------------------------------------------------- /index.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Online Hosted Instructions 3 | permalink: index.html 4 | layout: home 5 | --- 6 | 7 | # MLOps Challenges 8 | 9 | This repository contains hands-on challenges for end-to-end machine learning operations (MLOps) with Azure Machine Learning. 10 | 11 | To complete these exercises, you’ll need a Microsoft Azure subscription. If your instructor has not provided you with one, you can sign up for a free trial at [https://azure.microsoft.com](https://azure.microsoft.com/). 12 | 13 | ## Challenges 14 | 15 | {% assign challenge = site.pages | where_exp:"page", "page.url contains '/documentation'" %} 16 | | Module | Challenge | 17 | | --- | --- | 18 | {% for activity in challenge %}| {{ activity.challenge.module }} | [{{ activity.challenge.challenge }}{% if activity.challenge.type %} - {{ activity.challenge.type }}{% endif %}]({{ site.github.url }}{{ activity.url }}) | 19 | {% endfor %} -------------------------------------------------------------------------------- /pytest.ini: -------------------------------------------------------------------------------- 1 | [pytest] 2 | pythonpath = src 3 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | pytest==7.1.2 2 | mlflow==1.27.0 3 | pandas==1.4.3 4 | sklearn==0.0 5 | scikit-learn==1.1.1 6 | -------------------------------------------------------------------------------- /src/job.yml: -------------------------------------------------------------------------------- 1 | $schema: https://azuremlschemas.azureedge.net/latest/commandJob.schema.json 2 | code: model 3 | command: >- 4 | python 5 | -- 6 | -- 7 | inputs: 8 | training_data: 9 | type: uri_folder 10 | path: 11 | reg_rate: 0.01 12 | environment: azureml:AzureML-sklearn-0.24-ubuntu18.04-py37-cpu@latest 13 | compute: 14 | experiment_name: 15 | description: -------------------------------------------------------------------------------- /src/model/train.py: -------------------------------------------------------------------------------- 1 | # Import libraries 2 | 3 | import argparse 4 | import glob 5 | import os 6 | 7 | import pandas as pd 8 | 9 | from sklearn.linear_model import LogisticRegression 10 | 11 | 12 | # define functions 13 | def main(args): 14 | # TO DO: enable autologging 15 | 16 | 17 | # read data 18 | df = get_csvs_df(args.training_data) 19 | 20 | # split data 21 | X_train, X_test, y_train, y_test = split_data(df) 22 | 23 | # train model 24 | train_model(args.reg_rate, X_train, X_test, y_train, y_test) 25 | 26 | 27 | def get_csvs_df(path): 28 | if not os.path.exists(path): 29 | raise RuntimeError(f"Cannot use non-existent path provided: {path}") 30 | csv_files = glob.glob(f"{path}/*.csv") 31 | if not csv_files: 32 | raise RuntimeError(f"No CSV files found in provided data path: {path}") 33 | return pd.concat((pd.read_csv(f) for f in csv_files), sort=False) 34 | 35 | 36 | # TO DO: add function to split data 37 | 38 | 39 | def train_model(reg_rate, X_train, X_test, y_train, y_test): 40 | # train model 41 | LogisticRegression(C=1/reg_rate, solver="liblinear").fit(X_train, y_train) 42 | 43 | 44 | def parse_args(): 45 | # setup arg parser 46 | parser = argparse.ArgumentParser() 47 | 48 | # add arguments 49 | parser.add_argument("--training_data", dest='training_data', 50 | type=str) 51 | parser.add_argument("--reg_rate", dest='reg_rate', 52 | type=float, default=0.01) 53 | 54 | # parse args 55 | args = parser.parse_args() 56 | 57 | # return args 58 | return args 59 | 60 | # run script 61 | if __name__ == "__main__": 62 | # add space in logs 63 | print("\n\n") 64 | print("*" * 60) 65 | 66 | # parse args 67 | args = parse_args() 68 | 69 | # run main function 70 | main(args) 71 | 72 | # add space in logs 73 | print("*" * 60) 74 | print("\n\n") 75 | -------------------------------------------------------------------------------- /tests/.flake8: -------------------------------------------------------------------------------- 1 | [flake8] 2 | ignore = 3 | W504, 4 | C901, 5 | E41, 6 | E722, 7 | W, 8 | D, 9 | F, 10 | N, 11 | C, 12 | I 13 | max-line-length = 79 14 | exclude = 15 | .tox, 16 | .git, 17 | __pycache__, 18 | *.pyc, 19 | *.egg-info, 20 | .cache, 21 | .eggs, 22 | develop 23 | per-file-ignores = 24 | src/__init__.py:D104 25 | src/*/__init__.py:D104 26 | max-complexity = 10 27 | import-order-style = pep8 -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MicrosoftLearning/mslearn-mlops/a103a1bcdc53849e30c8e1952cb1321db97b7248/tests/__init__.py -------------------------------------------------------------------------------- /tests/datasets/first.csv: -------------------------------------------------------------------------------- 1 | index,first,last 2 | 0,Glenn,Hernandez 3 | 1,Sarah,Pedersen 4 | 2,Jill,Tracy 5 | 3,Melissa,Nelson 6 | 4,Hugh,Soto 7 | 5,Frank,Dees 8 | 6,Vita,Singleton 9 | 7,James,Papenfuss 10 | 8,Mary,Smithson 11 | 9,Bonnie,Begor 12 | -------------------------------------------------------------------------------- /tests/datasets/foo.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | current_directory = os.path.dirname(os.path.abspath(__file__)) 4 | 5 | print(current_directory) 6 | -------------------------------------------------------------------------------- /tests/datasets/second.csv: -------------------------------------------------------------------------------- 1 | index,first,last 2 | 0,Tina,Holloway 3 | 1,Katherine,Logan 4 | 2,Juan,Duncan 5 | 3,Doyle,Clyne 6 | 4,Jacob,Kazin 7 | 5,Kimberly,Tomes 8 | 6,Lisa,Cochrane 9 | 7,Troy,Hall 10 | 8,Erin,Johnson 11 | 9,Joan,Laborde 12 | -------------------------------------------------------------------------------- /tests/test_train.py: -------------------------------------------------------------------------------- 1 | from model.train import get_csvs_df 2 | import os 3 | import pytest 4 | 5 | 6 | def test_csvs_no_files(): 7 | with pytest.raises(RuntimeError) as error: 8 | get_csvs_df("./") 9 | assert error.match("No CSV files found in provided data") 10 | 11 | 12 | def test_csvs_no_files_invalid_path(): 13 | with pytest.raises(RuntimeError) as error: 14 | get_csvs_df("/invalid/path/does/not/exist/") 15 | assert error.match("Cannot use non-existent path provided") 16 | 17 | 18 | def test_csvs_creates_dataframe(): 19 | current_directory = os.path.dirname(os.path.abspath(__file__)) 20 | datasets_directory = os.path.join(current_directory, 'datasets') 21 | result = get_csvs_df(datasets_directory) 22 | assert len(result) == 20 23 | --------------------------------------------------------------------------------