├── .ci ├── azure-pipelines.yml └── vars │ └── ai-utilities.yml ├── .dependabot └── config.yml ├── .developer ├── pycharm_code_style.xml └── pycharm_inspections.xml ├── .github └── workflows │ └── pythonpublish.yml ├── .gitignore ├── .gitmodules ├── .pycrunch-config.yaml ├── .pylintrc ├── CODE_OF_CONDUCT.md ├── LICENSE ├── README.md ├── SECURITY.md ├── _config.yml ├── azure_utils ├── __init__.py ├── azureml_tools │ ├── __init__.py │ ├── config.py │ ├── experiment.py │ ├── resource_group.py │ ├── storage.py │ ├── subscription.py │ └── workspace.py ├── configuration │ ├── __init__.py │ ├── configuration_ui.py │ ├── configuration_validation.py │ ├── notebook_config.py │ ├── project.yml │ └── project_configuration.py ├── dev_ops │ ├── __init__.py │ └── testing_utilities.py ├── logger │ ├── README.md │ ├── __init__.py │ ├── ai_logger.py │ ├── blob_storage.py │ ├── key_vault.py │ ├── storageutils.py │ └── tests │ │ ├── __init__.py │ │ └── statsCollectionTest.py ├── machine_learning │ ├── __init__.py │ ├── contexts │ │ ├── __init__.py │ │ ├── model_management_context.py │ │ ├── realtime_score_context.py │ │ └── workspace_contexts.py │ ├── datasets │ │ ├── __init__.py │ │ └── stack_overflow_data.py │ ├── deep │ │ ├── __init__.py │ │ └── create_deep_model.py │ ├── duplicate_model.py │ ├── factories │ │ ├── __init__.py │ │ └── realtime_factory.py │ ├── item_selector.py │ ├── label_rank.py │ ├── realtime │ │ ├── __init__.py │ │ ├── image.py │ │ └── kubernetes.py │ ├── register_datastores.py │ ├── templates │ │ └── webtest.json │ ├── train_local.py │ ├── training_arg_parsers.py │ └── utils.py ├── notebook_widgets │ ├── __init__.py │ ├── notebook_configuration_widget.py │ └── workspace_widget.py ├── rts_estimator.py ├── samples │ ├── __init__.py │ └── deep_rts_samples.py └── utilities.py ├── docs ├── Configuration_ReadMe.md ├── DEVELOPMENT_README.md ├── app_insights_1.png ├── app_insights_availability.png ├── app_insights_perf.png ├── app_insights_perf_dash.png ├── conda_ui.png ├── images │ └── pycharm_import_code_style.png ├── kubernetes.png ├── studio.png └── tkinter_ui.png ├── environment.yml ├── environment_r.yml ├── notebooks ├── AzureMachineLearningConfig.ipynb ├── R │ └── get_or_create_workspace.r ├── __init__.py ├── ai-deep-realtime-score.ipynb ├── ai-deep-realtime-score_no_files.ipynb ├── ai-ml-realtime-score.ipynb └── exampleconfiguration.ipynb ├── project_sample.yml ├── pytest.ini ├── sample_workspace_conf.yml ├── scripts ├── add_ssh_ip.py ├── add_webtest.sh ├── create_deep_model.py ├── create_model.py ├── deploy_app_insights_k8s.sh ├── deploy_function.sh ├── version.sh ├── webtest.json └── webtest.parameters.json ├── setup.py ├── source └── score.py ├── tests ├── __init__.py ├── configuration │ ├── __init__.py │ ├── test_config.py │ └── test_validation.py ├── conftest.py ├── machine_learning │ ├── __init__.py │ ├── contexts │ │ ├── __init__.py │ │ ├── test_fpga_deploy.py │ │ ├── test_realtime_contexts.py │ │ ├── test_realtime_contexts_integration.py │ │ └── test_realtime_contexts_mock.py │ ├── script │ │ └── create_model.py │ ├── test_deep_rts_samples.py │ ├── test_realtime.py │ ├── test_register_datastores.py │ └── test_utils.py ├── mocks │ ├── __init__.py │ └── azureml │ │ ├── __init__.py │ │ └── azureml_mocks.py └── test_notebooks.py └── workspace_conf.yml /.ci/azure-pipelines.yml: -------------------------------------------------------------------------------- 1 | # AI Utilities Sample 2 | # 3 | # A Github Service Connection must also be created with the name "AIArchitecturesAndPractices-GitHub" 4 | # https://docs.microsoft.com/en-us/azure/devops/pipelines/process/demands?view=azure-devops&tabs=yaml 5 | # 6 | # An Agent_Name Variable must be creating in the Azure DevOps UI. 7 | # https://docs.microsoft.com/en-us/azure/devops/pipelines/process/variables?view=azure-devops&tabs=yaml%2Cbatch#secret-variables 8 | # 9 | # This must point to an Agent Pool, with a Self-Hosted Linux VM with a DOcker. 10 | # https://docs.microsoft.com/en-us/azure/devops/pipelines/agents/v2-linux?view=azure-devops 11 | 12 | resources: 13 | repositories: 14 | - repository: aitemplates 15 | type: github 16 | name: microsoft/AI 17 | endpoint: AIArchitecturesAndPractices-GitHub 18 | 19 | trigger: 20 | batch: true 21 | branches: 22 | include: 23 | - master 24 | 25 | pr: 26 | autoCancel: true 27 | branches: 28 | include: 29 | - master 30 | 31 | variables: 32 | - template: ./vars/ai-utilities.yml 33 | 34 | stages: 35 | - template: .ci/stages/deploy_notebooks_stages_v5.yml@aitemplates 36 | parameters: 37 | Agent: $(Agent_Name) 38 | jobDisplayName: ai-utilities 39 | TridentWorkloadTypeShort: ${{ variables.TridentWorkloadTypeShort }} 40 | DeployLocation: ${{ variables.DeployLocation }} 41 | ProjectLocation: ${{ variables.ProjectLocation }} 42 | conda: ${{ variables.conda }} 43 | post_cleanup: false 44 | 45 | multi_region_1: false 46 | multi_region_2: false 47 | 48 | flighting_release: false 49 | flighting_preview: false 50 | flighting_master: false 51 | 52 | sql_server_name: $(sql_server_name) 53 | sql_database_name: $(sql_database_name) 54 | sql_username: $(sql_username) 55 | sql_password: $(sql_password) 56 | 57 | container_name: $(container_name) 58 | account_name: $(account_name) 59 | account_key: $(account_key) 60 | datastore_rg: $(datastore_rg) 61 | -------------------------------------------------------------------------------- /.ci/vars/ai-utilities.yml: -------------------------------------------------------------------------------- 1 | variables: 2 | TridentWorkloadTypeShort: aiutilities 3 | DeployLocation: eastus 4 | ProjectLocation: "." 5 | conda: ai-utilities 6 | -------------------------------------------------------------------------------- /.dependabot/config.yml: -------------------------------------------------------------------------------- 1 | version: 1 2 | update_configs: 3 | - package_manager: "python" 4 | directory: "/" 5 | update_schedule: "live" 6 | allowed_updates: 7 | - match: 8 | # Only includes indirect (aka transient/sub-dependencies) for 9 | # supported package managers: ruby:bundler, python, php:composer, rust:cargo 10 | update_type: "all" 11 | -------------------------------------------------------------------------------- /.developer/pycharm_code_style.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 5 | -------------------------------------------------------------------------------- /.developer/pycharm_inspections.xml: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /.github/workflows/pythonpublish.yml: -------------------------------------------------------------------------------- 1 | name: Upload Python Package 2 | 3 | on: 4 | release: 5 | types: [created] 6 | 7 | jobs: 8 | deploy: 9 | runs-on: ubuntu-latest 10 | steps: 11 | - uses: actions/checkout@v2 12 | - name: Set up Python 13 | uses: actions/setup-python@v1 14 | with: 15 | python-version: '3.x' 16 | - name: Install dependencies 17 | run: | 18 | python -m pip install --upgrade pip 19 | pip install setuptools wheel twine 20 | - name: Build and publish 21 | env: 22 | TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }} 23 | TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }} 24 | run: | 25 | python setup.py sdist bdist_wheel 26 | twine upload dist/* 27 | - name: Update version for next release 28 | run: | 29 | var=$(sed -ne "s/version=['\"]\([^'\"]*\)['\"] *,.*/\1/p" ./setup.py) 30 | IFS='.' read -r -a array <<<"$var" 31 | 32 | major="${array[1]}" 33 | minor="${array[2]}" 34 | 35 | if [ "${array[2]}" -eq 9 ]; then 36 | echo $major 37 | major=$((major + 1)) 38 | echo $major 39 | else 40 | minor=$((minor + 1)) 41 | fi 42 | 43 | version=0.$major.$minor 44 | 45 | sed -i "s/version=['\"]\([^'\"]*\)['\"] *,.*/version=\"$version\",/" ./setup.py 46 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | workspace_conf.yml 2 | project.yml 3 | data_folder 4 | model.pkl 5 | test-timing-output.xml 6 | *project.yml 7 | *.output_ipynb 8 | *lynx*jpg 9 | 10 | *env.yml 11 | my_env.yml 12 | lgbmenv.yml 13 | 14 | # Byte-compiled / optimized / DLL files 15 | __pycache__/ 16 | *.py[cod] 17 | *$py.class 18 | 19 | # Jetbrains 20 | .idea 21 | 22 | # AzureML 23 | .azureml 24 | 25 | # C extensions 26 | *.so 27 | 28 | # Distribution / packaging 29 | .Python 30 | build/ 31 | develop-eggs/ 32 | dist/ 33 | downloads/ 34 | eggs/ 35 | .eggs/ 36 | lib/ 37 | lib64/ 38 | parts/ 39 | sdist/ 40 | var/ 41 | wheels/ 42 | *.egg-info/ 43 | .installed.cfg 44 | *.egg 45 | MANIFEST 46 | 47 | # PyInstaller 48 | # Usually these files are written by a python script from a template 49 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 50 | *.manifest 51 | *.spec 52 | 53 | # Installer logs 54 | pip-log.txt 55 | pip-delete-this-directory.txt 56 | 57 | # Unit test / coverage reports 58 | htmlcov/ 59 | .tox/ 60 | .coverage 61 | .coverage.* 62 | .cache 63 | nosetests.xml 64 | coverage.xml 65 | *.cover 66 | .hypothesis/ 67 | .pytest_cache/ 68 | 69 | # Translations 70 | *.mo 71 | *.pot 72 | 73 | # Django stuff: 74 | *.log 75 | local_settings.py 76 | db.sqlite3 77 | 78 | # Flask stuff: 79 | instance/ 80 | .webassets-cache 81 | 82 | # Scrapy stuff: 83 | .scrapy 84 | 85 | # Sphinx documentation 86 | docs/_build/ 87 | 88 | # PyBuilder 89 | target/ 90 | 91 | # Jupyter Notebook 92 | .ipynb_checkpoints 93 | 94 | # pyenv 95 | .python-version 96 | 97 | # celery beat schedule file 98 | celerybeat-schedule 99 | 100 | # SageMath parsed files 101 | *.sage.py 102 | 103 | # Environments 104 | .env 105 | .venv 106 | env/ 107 | venv/ 108 | ENV/ 109 | env.bak/ 110 | venv.bak/ 111 | 112 | # Spyder project settings 113 | .spyderproject 114 | .spyproject 115 | 116 | # Rope project settings 117 | .ropeproject 118 | 119 | # mkdocs documentation 120 | /site 121 | 122 | # mypy 123 | .mypy_cache/ 124 | 125 | config/__pycahce__/ 126 | /.vscode/ 127 | /aml_config/ 128 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/ai-utilities/b6c097dddff00ae4f7321da52653d9d6d8a94884/.gitmodules -------------------------------------------------------------------------------- /.pycrunch-config.yaml: -------------------------------------------------------------------------------- 1 | engine: 2 | runtime: pytest 3 | # maximum number of concurrent test runners 4 | cpu-cores: 2 5 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Microsoft Open Source Code of Conduct 2 | 3 | This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/). 4 | 5 | Resources: 6 | 7 | - [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/) 8 | - [Microsoft Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) 9 | - Contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with questions or concerns 10 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) Microsoft Corporation. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![Azure Notebooks](https://notebooks.azure.com/launch.png)](https://notebooks.azure.com/import/gh/microsoft/AI-Utilities) 2 | Code style: black 3 | License: MIT 4 | # AI Utilities Sample Project 5 | This project is a sample showcasing how to create a common repository for utilities that can be used by other AI projects. 6 | 7 | # Working with Project 8 | ## Build Project 9 | 10 | ```bash 11 | pip install -e .\AI-Utilities\ 12 | ``` 13 | 14 | ## Run Unit Tests with Conda 15 | ```bash 16 | conda env create -f environment.yml 17 | conda activate ai-utilities 18 | python -m ipykernel install --prefix=[anaconda-root-dir]\envs\ai-utilities --name ai-utilities 19 | pytest tests 20 | ``` 21 | 22 | Example Anaconda Root Dir 23 | C:\Users\dcibo\Anaconda3 24 | 25 | # Contributing 26 | 27 | This project welcomes contributions and suggestions. Most contributions require you to agree to a 28 | Contributor License Agreement (CLA) declaring that you have the right to, and actually do, grant us 29 | the rights to use your contribution. For details, visit https://cla.opensource.microsoft.com. 30 | 31 | When you submit a pull request, a CLA bot will automatically determine whether you need to provide 32 | a CLA and decorate the PR appropriately (e.g., status check, comment). Simply follow the instructions 33 | provided by the bot. You will only need to do this once across all repos using our CLA. 34 | 35 | This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/). 36 | For more information see the [Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) or 37 | contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with any additional questions or comments. 38 | -------------------------------------------------------------------------------- /SECURITY.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | ## Security 4 | 5 | Microsoft takes the security of our software products and services seriously, which includes all source code repositories managed through our GitHub organizations, which include [Microsoft](https://github.com/Microsoft), [Azure](https://github.com/Azure), [DotNet](https://github.com/dotnet), [AspNet](https://github.com/aspnet), [Xamarin](https://github.com/xamarin), and [our GitHub organizations](https://opensource.microsoft.com/). 6 | 7 | If you believe you have found a security vulnerability in any Microsoft-owned repository that meets Microsoft's [Microsoft's definition of a security vulnerability](https://docs.microsoft.com/en-us/previous-versions/tn-archive/cc751383(v=technet.10)) of a security vulnerability, please report it to us as described below. 8 | 9 | ## Reporting Security Issues 10 | 11 | **Please do not report security vulnerabilities through public GitHub issues.** 12 | 13 | Instead, please report them to the Microsoft Security Response Center (MSRC) at [https://msrc.microsoft.com/create-report](https://msrc.microsoft.com/create-report). 14 | 15 | If you prefer to submit without logging in, send email to [secure@microsoft.com](mailto:secure@microsoft.com). If possible, encrypt your message with our PGP key; please download it from the the [Microsoft Security Response Center PGP Key page](https://www.microsoft.com/en-us/msrc/pgp-key-msrc). 16 | 17 | You should receive a response within 24 hours. If for some reason you do not, please follow up via email to ensure we received your original message. Additional information can be found at [microsoft.com/msrc](https://www.microsoft.com/msrc). 18 | 19 | Please include the requested information listed below (as much as you can provide) to help us better understand the nature and scope of the possible issue: 20 | 21 | * Type of issue (e.g. buffer overflow, SQL injection, cross-site scripting, etc.) 22 | * Full paths of source file(s) related to the manifestation of the issue 23 | * The location of the affected source code (tag/branch/commit or direct URL) 24 | * Any special configuration required to reproduce the issue 25 | * Step-by-step instructions to reproduce the issue 26 | * Proof-of-concept or exploit code (if possible) 27 | * Impact of the issue, including how an attacker might exploit the issue 28 | 29 | This information will help us triage your report more quickly. 30 | 31 | If you are reporting for a bug bounty, more complete reports can contribute to a higher bounty award. Please visit our [Microsoft Bug Bounty Program](https://microsoft.com/msrc/bounty) page for more details about our active programs. 32 | 33 | ## Preferred Languages 34 | 35 | We prefer all communications to be in English. 36 | 37 | ## Policy 38 | 39 | Microsoft follows the principle of [Coordinated Vulnerability Disclosure](https://www.microsoft.com/en-us/msrc/cvd). 40 | 41 | 42 | -------------------------------------------------------------------------------- /_config.yml: -------------------------------------------------------------------------------- 1 | theme: jekyll-theme-hacker -------------------------------------------------------------------------------- /azure_utils/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | azure_utils - __init__.py 3 | 4 | Copyright (c) Microsoft Corporation. All rights reserved. 5 | Licensed under the MIT License. 6 | """ 7 | import os 8 | 9 | directory = os.path.dirname(os.path.realpath(__file__)) 10 | notebook_directory = directory.replace("azure_utils", "notebooks") 11 | -------------------------------------------------------------------------------- /azure_utils/azureml_tools/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | AI-Utilities - azureml_tools/__init__.py 3 | 4 | Copyright (c) Microsoft Corporation. All rights reserved. 5 | Licensed under the MIT License. 6 | """ 7 | -------------------------------------------------------------------------------- /azure_utils/azureml_tools/config.py: -------------------------------------------------------------------------------- 1 | """ 2 | AI-Utilities - azureml_tools/config.py 3 | 4 | Copyright (c) Microsoft Corporation. All rights reserved. 5 | Licensed under the MIT License. 6 | """ 7 | 8 | import ast 9 | import logging 10 | 11 | from dotenv import dotenv_values, find_dotenv, set_key 12 | 13 | defaults = { 14 | "CLUSTER_NAME": "gpucluster24rv3", 15 | "CLUSTER_VM_SIZE": "Standard_NC24rs_v3", 16 | "CLUSTER_MIN_NODES": 0, 17 | "CLUSTER_MAX_NODES": 2, 18 | "WORKSPACE": "workspace", 19 | "RESOURCE_GROUP": "amlccrg", 20 | "REGION": "eastus", 21 | "DATASTORE_NAME": "datastore", 22 | "CONTAINER_NAME": "container", 23 | "ACCOUNT_NAME": "premiumstorage", 24 | "SUBSCRIPTION_ID": None, 25 | } 26 | 27 | 28 | def load_config(dot_env_path: find_dotenv(raise_error_if_not_found=True)): 29 | """ Load the variables from the .env file 30 | Returns: 31 | .env variables(dict) 32 | """ 33 | logger = logging.getLogger(__name__) 34 | logger.info(f"Found config in {dot_env_path}") 35 | return dotenv_values(dot_env_path) 36 | 37 | 38 | def _convert(value): 39 | try: 40 | return ast.literal_eval(value) 41 | except (ValueError, SyntaxError): 42 | return value 43 | 44 | 45 | class AzureMLConfig: 46 | """Creates AzureMLConfig object 47 | 48 | Stores all the configuration options and syncs them with the .env file 49 | """ 50 | 51 | _reserved = ("_dot_env_path",) 52 | 53 | def __init__(self): 54 | self._dot_env_path = find_dotenv(raise_error_if_not_found=True) 55 | 56 | for key, value in load_config(dot_env_path=self._dot_env_path).items(): 57 | self.__dict__[key] = _convert(value) 58 | 59 | for key, value in defaults.items(): 60 | if key not in self.__dict__: 61 | setattr(self, key, value) 62 | 63 | def __setattr__(self, name, value): 64 | if name not in self._reserved: 65 | if not isinstance(value, str): 66 | value = str(value) 67 | set_key(self._dot_env_path, name, value) 68 | self.__dict__[name] = value 69 | 70 | 71 | experiment_config = AzureMLConfig() 72 | -------------------------------------------------------------------------------- /azure_utils/azureml_tools/resource_group.py: -------------------------------------------------------------------------------- 1 | """ 2 | AI-Utilities - resource_group.py 3 | 4 | Copyright (c) Microsoft Corporation. All rights reserved. 5 | Licensed under the MIT License. 6 | """ 7 | 8 | import logging 9 | 10 | from azure.common.credentials import get_cli_profile 11 | from azure.mgmt.resource import ResourceManagementClient 12 | from azure.mgmt.resource.resources.models import ResourceGroup 13 | 14 | 15 | def _get_resource_group_client(profile_credentials, subscription_id): 16 | return ResourceManagementClient(profile_credentials, subscription_id) 17 | 18 | 19 | def resource_group_exists(resource_group_name, resource_group_client=None): 20 | """ 21 | 22 | :param resource_group_name: 23 | :param resource_group_client: 24 | :return: 25 | """ 26 | return resource_group_client.resource_groups.check_existence(resource_group_name) 27 | 28 | 29 | class ResourceGroupException(Exception): 30 | """Except when checking for Resource Group""" 31 | 32 | pass 33 | 34 | 35 | def create_resource_group( 36 | profile_credentials, subscription_id: str, location: str, resource_group_name: str 37 | ) -> ResourceGroup: 38 | """Creates resource group if it doesn't exist 39 | 40 | Args: 41 | profile_credentials : credentials from Azure login 42 | subscription_id (str): subscription you wish to use 43 | location (str): location you wish the strage to be created in 44 | resource_group_name (str): the name of the resource group you want the storage to be created under 45 | 46 | Raises: 47 | ResourceGroupException: Exception if the resource group could not be created 48 | 49 | Returns: 50 | ResourceGroup: an Azure resource group object 51 | 52 | Examples: 53 | >>> profile = get_cli_profile() 54 | >>> profile.set_active_subscription("YOUR-SUBSCRIPTION") 55 | >>> cred, subscription_id, _ = profile.get_login_credentials() 56 | >>> rg = create_resource_group(cred, subscription_id, "eastus", "testrg2") 57 | """ 58 | logger = logging.getLogger(__name__) 59 | resource_group_client = _get_resource_group_client( 60 | profile_credentials, subscription_id 61 | ) 62 | if resource_group_exists( 63 | resource_group_name, resource_group_client=resource_group_client 64 | ): 65 | logger.debug(f"Found resource group {resource_group_name}") 66 | resource_group = resource_group_client.resource_groups.get(resource_group_name) 67 | else: 68 | logger.debug(f"Creating resource group {resource_group_name} in {location}") 69 | resource_group_params = {"location": location} 70 | resource_group = resource_group_client.resource_groups.create_or_update( 71 | resource_group_name, resource_group_params 72 | ) 73 | 74 | if "Succeeded" not in resource_group.properties.provisioning_state: 75 | raise ResourceGroupException( 76 | f"Resource group not created successfully | State {resource_group.properties.provisioning_state}" 77 | ) 78 | 79 | return resource_group 80 | -------------------------------------------------------------------------------- /azure_utils/azureml_tools/storage.py: -------------------------------------------------------------------------------- 1 | """ 2 | AI-Utilities - storage.py 3 | 4 | Copyright (c) Microsoft Corporation. All rights reserved. 5 | Licensed under the MIT License. 6 | """ 7 | from typing import Any, Tuple 8 | 9 | from azure.mgmt.storage import StorageManagementClient 10 | from azure.mgmt.storage.models import Kind, Sku, SkuName, StorageAccountCreateParameters 11 | 12 | from azure_utils.azureml_tools.resource_group import create_resource_group 13 | 14 | 15 | class StorageAccountCreateFailure(Exception): 16 | """Storage Account Create Failure Exception""" 17 | 18 | pass 19 | 20 | 21 | def create_premium_storage( 22 | profile_credentials: object, 23 | subscription_id: str, 24 | location: str, 25 | resource_group_name: str, 26 | storage_name: str, 27 | ) -> Tuple[Any, dict]: 28 | """Create premium blob storage 29 | 30 | Args: 31 | profile_credentials : credentials from Azure login (see example below for details) 32 | subscription_id (str): subscription you wish to use 33 | location (str): location you wish the strage to be created in 34 | resource_group_name (str): the name of the resource group you want the storage to be created under 35 | storage_name (str): the name of the storage account 36 | 37 | Raises: 38 | Exception: [description] 39 | 40 | Returns: 41 | [type]: [description] 42 | 43 | Example: 44 | >>> from azure.common.credentials import get_cli_profile 45 | >>> profile = get_cli_profile() 46 | >>> profile.set_active_subscription("YOUR-ACCOUNT") 47 | >>> cred, subscription_id, _ = profile.get_login_credentials() 48 | >>> storage = create_premium_storage(cred, subscription_id, "eastus", "testrg", "teststr", wait=False) 49 | """ 50 | storage_client = StorageManagementClient(profile_credentials, subscription_id) 51 | create_resource_group( 52 | profile_credentials, subscription_id, location, resource_group_name 53 | ) 54 | if not storage_client.storage_accounts.check_name_availability( 55 | storage_name 56 | ).name_available: 57 | storage_account = storage_client.storage_accounts.get_properties( 58 | resource_group_name, storage_name 59 | ) 60 | else: 61 | storage_async_operation = storage_client.storage_accounts.create( 62 | resource_group_name, 63 | storage_name, 64 | StorageAccountCreateParameters( 65 | sku=Sku(name=SkuName.premium_lrs), 66 | kind=Kind.block_blob_storage, 67 | location="eastus", 68 | ), 69 | ) 70 | storage_account = storage_async_operation.result() 71 | 72 | if "Succeeded" not in storage_account.provisioning_state: 73 | raise StorageAccountCreateFailure( 74 | f"Storage account not created successfully | State {storage_account.provisioning_state}" 75 | ) 76 | 77 | storage_keys = storage_client.storage_accounts.list_keys( 78 | resource_group_name, storage_name 79 | ) 80 | storage_keys = {v.key_name: v.value for v in storage_keys.keys} 81 | 82 | return storage_account, storage_keys 83 | -------------------------------------------------------------------------------- /azure_utils/azureml_tools/subscription.py: -------------------------------------------------------------------------------- 1 | """ 2 | AI-Utilities - storageutils.py 3 | 4 | Copyright (c) Microsoft Corporation. All rights reserved. 5 | Licensed under the MIT License. 6 | """ 7 | import logging 8 | import subprocess 9 | import sys 10 | 11 | # noinspection PyProtectedMember 12 | from azure.cli.core._profile import Profile 13 | from azure.common.credentials import get_cli_profile 14 | from azure.mgmt.resource import SubscriptionClient 15 | from knack.util import CLIError 16 | from prompt_toolkit import prompt 17 | from tabulate import tabulate 18 | from toolz import pipe 19 | 20 | _GREEN = "\033[0;32m" 21 | _BOLD = "\033[;1m" 22 | 23 | 24 | def run_az_cli_login() -> None: 25 | """ 26 | Run az login in shell 27 | """ 28 | process = subprocess.Popen( 29 | ["az", "login"], stdout=subprocess.PIPE, stderr=subprocess.STDOUT 30 | ) 31 | for c in iter(lambda: process.stdout.read(1), b""): 32 | sys.stdout.write(_GREEN + _BOLD + c.decode(sys.stdout.encoding)) 33 | 34 | 35 | def list_subscriptions(profile: Profile = None) -> list: 36 | """Lists the subscriptions associated with the profile 37 | 38 | If you don't supply a profile it will try to get the profile from your Azure CLI login 39 | 40 | Args: 41 | profile (azure.cli.core._profile.Profile, optional): Profile you wish to use. Defaults to None. 42 | 43 | Returns: 44 | list: list of subscriptions 45 | """ 46 | if profile is None: 47 | profile = subscription_profile() 48 | cred, _, _ = profile.get_login_credentials() 49 | sub_client = SubscriptionClient(cred) 50 | return [ 51 | {"Index": i, "Name": sub.display_name, "id": sub.subscription_id} 52 | for i, sub in enumerate(sub_client.subscriptions.list()) 53 | ] 54 | 55 | 56 | def subscription_profile() -> Profile: 57 | """Return the Azure CLI profile 58 | 59 | Returns: 60 | azure.cli.core._profile.Profile: Azure profile 61 | """ 62 | logger = logging.getLogger(__name__) 63 | try: 64 | return get_cli_profile() 65 | except CLIError: 66 | logger.info("Not logged in, running az login") 67 | run_az_cli_login() 68 | return get_cli_profile() 69 | 70 | 71 | def _prompt_sub_id_selection(profile: Profile) -> str: 72 | sub_list = list_subscriptions(profile=profile) 73 | pipe(sub_list, tabulate, print) 74 | prompt_result = prompt("Please type in index of subscription you want to use: ") 75 | selected_sub = sub_list[int(prompt_result)] 76 | print( 77 | f"You selected index {prompt_result} sub id {selected_sub['id']} name {selected_sub['Name']}" 78 | ) 79 | return selected_sub["id"] 80 | 81 | 82 | def select_subscription(profile: Profile = None, sub_name_or_id: str = None) -> Profile: 83 | """Sets active subscription 84 | 85 | If you don't supply a profile it will try to get the profile from your Azure CLI login 86 | If you don't supply a subscription name or id it will list ones from your account and ask you to select one 87 | 88 | Args: 89 | profile (azure.cli.core._profile.Profile, optional): Profile you wish to use. Defaults to None. 90 | sub_name_or_id (str, optional): The subscription name or id to use. Defaults to None. 91 | 92 | Returns: 93 | azure.cli.core._profile.Profile: Azure profile 94 | 95 | Example: 96 | >>> profile = select_subscription() 97 | """ 98 | if profile is None: 99 | profile = subscription_profile() 100 | 101 | if sub_name_or_id is None: 102 | sub_name_or_id = _prompt_sub_id_selection(profile) 103 | 104 | profile.set_active_subscription(sub_name_or_id) 105 | return profile 106 | -------------------------------------------------------------------------------- /azure_utils/azureml_tools/workspace.py: -------------------------------------------------------------------------------- 1 | """ 2 | AI-Utilities - workspace 3 | 4 | Copyright (c) Microsoft Corporation. All rights reserved. 5 | Licensed under the MIT License. 6 | """ 7 | 8 | import logging 9 | import os 10 | from pathlib import Path 11 | 12 | import azureml 13 | from azureml.core.authentication import ( 14 | AuthenticationException, 15 | AzureCliAuthentication, 16 | InteractiveLoginAuthentication, 17 | ServicePrincipalAuthentication, 18 | AbstractAuthentication, 19 | ) 20 | from azureml.core import Workspace 21 | 22 | _DEFAULT_AML_PATH = "aml_config/azml_config.json" 23 | 24 | 25 | def _get_auth() -> AbstractAuthentication: 26 | """Returns authentication to Azure Machine Learning workspace.""" 27 | logger = logging.getLogger(__name__) 28 | if os.environ.get("AML_SP_PASSWORD", None): 29 | logger.debug("Trying to authenticate with Service Principal") 30 | aml_sp_password = os.environ.get("AML_SP_PASSWORD") 31 | aml_sp_tenant_id = os.environ.get("AML_SP_TENNANT_ID") 32 | aml_sp_username = os.environ.get("AML_SP_USERNAME") 33 | auth = ServicePrincipalAuthentication( 34 | aml_sp_tenant_id, aml_sp_username, aml_sp_password 35 | ) 36 | else: 37 | logger.debug("Trying to authenticate with CLI Authentication") 38 | try: 39 | auth = AzureCliAuthentication() 40 | auth.get_authentication_header() 41 | except AuthenticationException: 42 | logger.debug("Trying to authenticate with Interactive login") 43 | auth = InteractiveLoginAuthentication() 44 | 45 | return auth 46 | 47 | 48 | def create_workspace( 49 | workspace_name: str, 50 | resource_group: str, 51 | subscription_id: str, 52 | workspace_region: str, 53 | filename: str = "azml_config.json", 54 | ) -> Workspace: 55 | """Creates Azure Machine Learning workspace.""" 56 | logger = logging.getLogger(__name__) 57 | auth = _get_auth() 58 | 59 | # noinspection PyTypeChecker 60 | ws = azureml.core.Workspace.create( 61 | name=workspace_name, 62 | subscription_id=subscription_id, 63 | resource_group=resource_group, 64 | location=workspace_region, 65 | exist_ok=True, 66 | auth=auth, 67 | ) 68 | 69 | logger.info(ws.get_details()) 70 | ws.write_config(file_name=filename) 71 | return ws 72 | 73 | 74 | def load_workspace(path: str) -> Workspace: 75 | """Loads Azure Machine Learning workspace from a config file.""" 76 | auth = _get_auth() 77 | # noinspection PyTypeChecker 78 | workspace = azureml.core.Workspace.from_config(auth=auth, path=path) 79 | logger = logging.getLogger(__name__) 80 | logger.info( 81 | "\n".join( 82 | [ 83 | "Workspace name: " + str(workspace.name), 84 | "Azure region: " + str(workspace.location), 85 | "Subscription id: " + str(workspace.subscription_id), 86 | "Resource group: " + str(workspace.resource_group), 87 | ] 88 | ) 89 | ) 90 | return workspace 91 | 92 | 93 | def workspace_for_user( 94 | workspace_name: str, 95 | resource_group: str, 96 | subscription_id: str, 97 | workspace_region: str, 98 | config_path: str = _DEFAULT_AML_PATH, 99 | ) -> Workspace: 100 | """Returns Azure Machine Learning workspace.""" 101 | if os.path.isfile(config_path): 102 | return load_workspace(config_path) 103 | 104 | path_obj = Path(config_path) 105 | filename = path_obj.name 106 | return create_workspace( 107 | workspace_name, 108 | resource_group, 109 | subscription_id=subscription_id, 110 | workspace_region=workspace_region, 111 | filename=filename, 112 | ) 113 | -------------------------------------------------------------------------------- /azure_utils/configuration/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | AI-Utilities - configuration/__init__.py 3 | 4 | Copyright (c) Microsoft Corporation. All rights reserved. 5 | Licensed under the MIT License. 6 | """ 7 | -------------------------------------------------------------------------------- /azure_utils/configuration/configuration_ui.py: -------------------------------------------------------------------------------- 1 | """ 2 | - configuration_ui.py 3 | 4 | Copyright (c) Microsoft Corporation. All rights reserved. 5 | Licensed under the MIT License. 6 | """ 7 | 8 | from tkinter import Button, END, Frame, Label, TRUE, Text, messagebox 9 | 10 | from azure_utils.configuration.configuration_validation import ( 11 | Validation, 12 | ValidationResult, 13 | ) 14 | from azure_utils.configuration.project_configuration import ProjectConfiguration 15 | 16 | 17 | class SettingsUpdate(Frame): 18 | """ 19 | UI Wrapper for project configuration settings. 20 | 21 | Provide a configuration file as described in configuration.ProjectConfiguration. 22 | 23 | A UI is built using a grid where each row consists of: 24 | setting_description | Text control to show accept values 25 | 26 | Final row of the grid has a save and cancel button. 27 | 28 | Save updates the configuration file with any settings put on the UI. 29 | """ 30 | 31 | def __init__(self, project_configuration, master): 32 | Frame.__init__(self, master=master) 33 | 34 | # self.configuration = Instance of ProjectConfiguration and master 35 | # self.master_win = Instance of Tk application. 36 | # self.settings = Will be a dictionary where 37 | # key = Setting name 38 | # value = Text control 39 | 40 | self.configuration = project_configuration 41 | self.master_win = master 42 | self.settings = {} 43 | 44 | # Set up validator 45 | self.validator = Validation() 46 | 47 | # Set up some window options 48 | 49 | self.master_win.title(self.configuration.project_name()) 50 | self.master_win.resizable(width=TRUE, height=TRUE) 51 | self.master_win.configure(padx=10, pady=10) 52 | 53 | # Populate the grid first with settings followed by the two buttons (cancel/save) 54 | current_row = 0 55 | for setting in self.configuration.get_settings(): 56 | 57 | if not isinstance(setting, dict): 58 | print("Found setting does not match pattern...") 59 | continue 60 | 61 | # Only can be one key as they are singletons with a list 62 | # of values 63 | if len(setting.keys()) == 1: 64 | for setting_name in setting.keys(): 65 | details = setting[setting_name] 66 | description = details[0][ProjectConfiguration.setting_description] 67 | value = details[1][ProjectConfiguration.setting_value] 68 | 69 | lbl = Label(self.master_win, text=description) 70 | lbl.grid(row=current_row, column=0, columnspan=1, sticky="nwse") 71 | txt = Text(self.master_win, height=1, width=40, wrap="none") 72 | txt.grid( 73 | row=current_row, column=1, columnspan=2, sticky="nwse", pady=10 74 | ) 75 | txt.insert(END, value) 76 | 77 | self.settings[setting_name] = txt 78 | current_row += 1 79 | 80 | # Add in the save/cancel buttons 81 | save_button = Button(self.master_win, text="Save", command=self.save_setting) 82 | save_button.grid(row=current_row, column=1, columnspan=1, sticky="nwse") 83 | close_button = Button(self.master_win, text="Cancel", command=self.cancel) 84 | close_button.grid(row=current_row, column=2, columnspan=1, sticky="nwse") 85 | 86 | def cancel(self): 87 | """ 88 | Cancel clicked, just close the window. 89 | """ 90 | self.master_win.destroy() 91 | 92 | def save_setting(self): 93 | """ 94 | Save clicked 95 | - For each row, collect the setting name and user input. 96 | - Clean user input 97 | - Set values for all settings 98 | - Save configuration 99 | - Close window 100 | """ 101 | validate_responses = self.prompt_field_validation() 102 | field_responses = [] 103 | 104 | for setting in self.settings: 105 | user_entered = self.settings[setting].get("1.0", END) 106 | user_entered = user_entered.strip().replace("\n", "") 107 | 108 | # Validate it 109 | if validate_responses: 110 | res = self.validator.validate_input(setting, user_entered) 111 | field_responses.append(res) 112 | Validation.dump_validation_result(res) 113 | else: 114 | print("Updating {} with '{}'".format(setting, user_entered)) 115 | 116 | self.configuration.set_value(setting, user_entered) 117 | 118 | if self.validate_responses(field_responses): 119 | print("Writing out new configuration options...") 120 | self.configuration.save_configuration() 121 | self.cancel() 122 | 123 | @staticmethod 124 | def validate_responses(validation_responses) -> bool: 125 | """ 126 | Determine if there are any failures or warnings. If so, give the user the 127 | option on staying on the screen to fix them. 128 | 129 | :param validation_responses: Response to validate 130 | :return: `bool` validation outcome 131 | """ 132 | 133 | if validation_responses: 134 | failed = [ 135 | x for x in validation_responses if x.status == ValidationResult.failure 136 | ] 137 | warn = [ 138 | x for x in validation_responses if x.status == ValidationResult.warning 139 | ] 140 | 141 | error_count, message = SettingsUpdate.get_failed_message(failed) 142 | error_count, message = SettingsUpdate.get_warning_message( 143 | warn, error_count, message 144 | ) 145 | 146 | return SettingsUpdate.print_if_errors(error_count, message) 147 | return True 148 | 149 | @staticmethod 150 | def print_if_errors(error_count, message): 151 | """ 152 | 153 | :param error_count: 154 | :param message: 155 | :return: 156 | """ 157 | if error_count > 0: 158 | user_prefix = "The following fields either failed validation or produced a warning :\n\n" 159 | user_postfix = "Click Yes to continue with these validation issues or No to correct them." 160 | return messagebox.askyesno( 161 | "Validate Errors", "{}{}{}".format(user_prefix, message, user_postfix) 162 | ) 163 | return True 164 | 165 | @staticmethod 166 | def get_warning_message(warn, error_count=0, message=""): 167 | """ 168 | 169 | :param warn: 170 | :param error_count: 171 | :param message: 172 | :return: 173 | """ 174 | if warn: 175 | message += "WARNINGS:\n" 176 | for resp in warn: 177 | if resp.reason != Validation.FIELD_NOT_RECOGNIZED: 178 | error_count += 1 179 | message += " {}:\n{}\n\n".format(resp.type, resp.reason) 180 | message += "\n" 181 | return error_count, message 182 | 183 | @staticmethod 184 | def get_failed_message(failed, error_count=0, message=""): 185 | """ 186 | 187 | :param failed: 188 | :param error_count: 189 | :param message: 190 | :return: 191 | """ 192 | if failed: 193 | message += "ERRORS:\n" 194 | for resp in failed: 195 | error_count += 1 196 | message += " {}\n".format(resp.type) 197 | message += "\n" 198 | return error_count, message 199 | 200 | def prompt_field_validation(self) -> bool: 201 | """ 202 | Prompt user for field to validation 203 | 204 | :return: `bool` based on user's response 205 | """ 206 | valid_fields = "\n" 207 | for setting in self.settings: 208 | if self.validator.is_field_valid(setting): 209 | valid_fields += "{}\n".format(setting) 210 | 211 | user_prefix = "The following fields can be validated :\n\n" 212 | user_postfix = "\nValidation will add several seconds to the save, would you like to validate these settings?" 213 | 214 | return messagebox.askyesno( 215 | "Validate Inputs", "{}{}{}".format(user_prefix, valid_fields, user_postfix) 216 | ) 217 | -------------------------------------------------------------------------------- /azure_utils/configuration/notebook_config.py: -------------------------------------------------------------------------------- 1 | """ 2 | - notebook_config.py 3 | 4 | Copyright (c) Microsoft Corporation. All rights reserved. 5 | Licensed under the MIT License. 6 | 7 | Import the needed functionality 8 | - tkinter : 9 | Python GUI library 10 | - configuration.ProjectConfiguration 11 | Configuration object that reads/writes to the configuration settings YAML file. 12 | - configuration_ui.SettingsUpdate 13 | tkinter based UI that dynamically loads any appropriate configuration file 14 | and displays it to the user to alter the settings. 15 | 16 | 17 | If you wish to run this file locally, uncomment the section below and then run 18 | the Python script directly from this directory. This will utilize the 19 | project.yml file as the configuration file under test. 20 | 21 | LOCAL_ONLY 22 | import os 23 | import sys 24 | if __name__ == "__main__": 25 | current = os.getcwd() 26 | az_utils = os.path.split(current) 27 | while not az_utils[0].endswith("AI-Utilities"): 28 | az_utils = os.path.split(az_utils[0]) 29 | 30 | if az_utils[0] not in sys.path: 31 | sys.path.append(az_utils[0]) 32 | """ 33 | 34 | from tkinter import Tk 35 | 36 | from azure_utils.configuration.configuration_ui import SettingsUpdate 37 | from azure_utils.configuration.project_configuration import ProjectConfiguration 38 | 39 | project_configuration_file = "project.yml" 40 | train_py_default = "train.py" 41 | score_py_default = "score.py" 42 | 43 | 44 | def get_or_configure_settings(configuration_yaml: str = project_configuration_file): 45 | """ 46 | Only configure the settings if the subscription ID has not been provided yet. 47 | This will help with automation in which the configuration file is provided. 48 | 49 | :param configuration_yaml: Location of configuration yaml 50 | """ 51 | settings_object = get_settings(configuration_yaml) 52 | sub_id = settings_object.get_value("subscription_id") 53 | 54 | if sub_id == "<>": 55 | configure_settings(configuration_yaml) 56 | 57 | return get_settings(configuration_yaml) 58 | 59 | 60 | def configure_settings(configuration_yaml: str = project_configuration_file): 61 | """ 62 | Launch a tkinter UI to configure the project settings in the provided 63 | configuration_yaml file. If a file is not provided, the default ./project.yml 64 | file will be created for the caller. 65 | 66 | configuration_yaml -> Disk location of the configuration file to modify. 67 | 68 | ProjectConfiguration will open an existing YAML file or create a new one. It is 69 | suggested that your project simply create a simple configuration file containing 70 | all of you settings so that the user simply need to modify it with the UI. 71 | 72 | In this instance, we assume that the default configuration file is called project.yml. 73 | This will be used if the user passes nothing else in. 74 | 75 | :param configuration_yaml: Location of configuration yaml 76 | """ 77 | project_configuration = ProjectConfiguration(configuration_yaml) 78 | 79 | # Finally, create a Tk window and pass that along with the configuration object 80 | # to the SettingsObject class for modification. 81 | 82 | window = Tk() 83 | app = SettingsUpdate(project_configuration, window) 84 | app.mainloop() 85 | 86 | 87 | def get_settings( 88 | configuration_yaml: str = project_configuration_file, 89 | ) -> ProjectConfiguration: 90 | """ 91 | Acquire the project settings from the provided configuration_yaml file. 92 | If a file is not provided, the default ./project.yml will be created and 93 | and empty set of settings will be returned to the user. 94 | 95 | configuration_yaml -> Disk location of the configuration file to modify. 96 | 97 | ProjectConfiguration will open an existing YAML file or create a new one. It is 98 | suggested that your project simply create a simple configuration file containing 99 | all of you settings so that the user simply need to modify it with the UI. 100 | 101 | In this instance, we assume that the default configuration file is called project.yml. 102 | This will be used if the user passes nothing else in. 103 | 104 | :param configuration_yaml: Project configuration yml 105 | :return: loaded ProjectConfiguration object 106 | """ 107 | return ProjectConfiguration(configuration_yaml) 108 | 109 | 110 | if __name__ == "__main__": 111 | configure_settings() 112 | -------------------------------------------------------------------------------- /azure_utils/configuration/project.yml: -------------------------------------------------------------------------------- 1 | project_name: AI Default Project 2 | settings: 3 | - subscription_id: 4 | - description: Azure Subscription Id 5 | - value: abcd1234 6 | - resource_group: 7 | - description: Azure Resource Group Name 8 | - value: <> 9 | - workspace_name: 10 | - description: Azure ML Workspace Name 11 | - value: <> 12 | - workspace_region: 13 | - description: Azure ML Workspace Region 14 | - value: <> 15 | - image_name: 16 | - description: Docker Container Image Name 17 | - value: <> 18 | - aks_service_name: 19 | - description: AKS Service Name 20 | - value: <> 21 | - aks_name: 22 | - description: AKS Cluster Name 23 | - value: <> 24 | - storage_account: 25 | - description: Azure Storage Account 26 | - value: my_account-name* 27 | -------------------------------------------------------------------------------- /azure_utils/dev_ops/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | AI-Utilities - __init__.py 3 | 4 | Copyright (c) Microsoft Corporation. All rights reserved. 5 | Licensed under the MIT License. 6 | """ 7 | -------------------------------------------------------------------------------- /azure_utils/dev_ops/testing_utilities.py: -------------------------------------------------------------------------------- 1 | """ 2 | AI-Utilities - testing_utilities.py 3 | 4 | Copyright (c) Microsoft Corporation. All rights reserved. 5 | Licensed under the MIT License. 6 | """ 7 | import json 8 | import os 9 | import re 10 | import sys 11 | 12 | import nbformat 13 | import papermill as pm 14 | from junit_xml import TestCase, TestSuite, to_xml_report_file 15 | from nbconvert import MarkdownExporter, RSTExporter 16 | 17 | notebook_output_ext = ".output_ipynb" 18 | 19 | 20 | def run_notebook( 21 | input_notebook, 22 | add_nunit_attachment, 23 | parameters=None, 24 | kernel_name="ai-architecture-template", 25 | root=".", 26 | ): 27 | """ 28 | Used to run a notebook in the correct directory. 29 | 30 | Parameters 31 | ---------- 32 | :param input_notebook: Name of Notebook to Test 33 | :param add_nunit_attachment: 34 | :param parameters: 35 | :param kernel_name: Jupyter Kernal 36 | :param root: 37 | """ 38 | 39 | output_notebook = input_notebook.replace(".ipynb", notebook_output_ext) 40 | try: 41 | results = pm.execute_notebook( 42 | os.path.join(root, input_notebook), 43 | os.path.join(root, output_notebook), 44 | parameters=parameters, 45 | kernel_name=kernel_name, 46 | ) 47 | 48 | for cell in results.cells: 49 | if cell.cell_type == "code": 50 | assert not cell.metadata.papermill.exception, "Error in Python Notebook" 51 | finally: 52 | with open(os.path.join(root, output_notebook)) as json_file: 53 | data = json.load(json_file) 54 | jupyter_output = nbformat.reads( 55 | json.dumps(data), as_version=nbformat.NO_CONVERT 56 | ) 57 | 58 | export_md( 59 | jupyter_output, 60 | output_notebook, 61 | add_nunit_attachment, 62 | file_ext=".txt", 63 | root=root, 64 | ) 65 | 66 | regex = r"Deployed (.*) with name (.*). Took (.*) seconds." 67 | 68 | with open(os.path.join(root, output_notebook)) as file: 69 | data = file.read() 70 | 71 | test_cases = [] 72 | for group in re.findall(regex, data): 73 | test_cases.append( 74 | TestCase( 75 | name=group[0] + " creation", 76 | classname=input_notebook, 77 | elapsed_sec=float(group[2]), 78 | status="Success", 79 | ) 80 | ) 81 | 82 | test_suite = TestSuite("my test suite", test_cases) 83 | 84 | with open("test-timing-output.xml", "w") as test_file: 85 | to_xml_report_file(test_file, [test_suite], prettyprint=False) 86 | 87 | 88 | def export_notebook( 89 | exporter, jupyter_output, output_notebook, add_nunit_attachment, file_ext, root="." 90 | ): 91 | """ 92 | Export Jupyter Output to File 93 | 94 | :param exporter: 95 | :param jupyter_output: 96 | :param output_notebook: 97 | :param add_nunit_attachment: 98 | :param file_ext: 99 | :param root: 100 | """ 101 | (body, _) = exporter.from_notebook_node(jupyter_output) 102 | path = os.path.join(root, output_notebook.replace(notebook_output_ext, file_ext)) 103 | with open(path, "w") as text_file: 104 | sys.stderr.write(body) 105 | text_file.write(body) 106 | 107 | if add_nunit_attachment is not None: 108 | add_nunit_attachment(path, output_notebook) 109 | 110 | 111 | def export_md( 112 | jupyter_output, output_notebook, add_nunit_attachment, file_ext=".md", root="." 113 | ): 114 | """ 115 | Export Jupyter Output to Markdown File 116 | 117 | :param jupyter_output: 118 | :param output_notebook: 119 | :param add_nunit_attachment: 120 | :param file_ext: 121 | :param root: 122 | """ 123 | markdown_exporter = MarkdownExporter() 124 | export_notebook( 125 | markdown_exporter, 126 | jupyter_output, 127 | output_notebook, 128 | add_nunit_attachment, 129 | file_ext, 130 | root=root, 131 | ) 132 | 133 | 134 | def export_rst( 135 | jupyter_output, output_notebook, add_nunit_attachment, file_ext=".rst", root="." 136 | ): 137 | """ 138 | Export Jupyter Output to RST File 139 | 140 | :param jupyter_output: 141 | :param output_notebook: 142 | :param add_nunit_attachment: 143 | :param file_ext: 144 | :param root: 145 | """ 146 | rst_exporter = RSTExporter() 147 | export_notebook( 148 | rst_exporter, 149 | jupyter_output, 150 | output_notebook, 151 | add_nunit_attachment, 152 | file_ext, 153 | root=root, 154 | ) 155 | -------------------------------------------------------------------------------- /azure_utils/logger/README.md: -------------------------------------------------------------------------------- 1 | # Data Tracker 2 | Dan Grecoe - A Microsoft Employee 3 | 4 | When running Python projects through Azure Dev Ops (https://dev.azure.com) there is a need to collect certain statistics 5 | such as deployment time, or to pass out information related to a deployment as the agent the build runs on will be torn 6 | down once the build os complete. 7 | 8 | Of course, there are several option for doing so and this repository contains one option. 9 | 10 | The code in this repository enables saving these data to an Azure Storage account for consumption at a later time. 11 | 12 | Descriptions of the class and how it performs can be found in the MetricsUtils/hpStatisticsCollection.py file. 13 | 14 | An example on how to use the code for various tasks can be found in the statsCollectionTest.py file. 15 | 16 | ## Pre-requisites 17 | To use this example, you must pip install the following into your environment: 18 | - azure-cli-core 19 | - azure-storage-blob 20 | 21 | These should be installed with the azml libraries, but if they don't work that is why. 22 | 23 | ## Use in a notebook with AZML 24 | First you need to include the following 25 | 26 | ``` 27 | from MetricsUtils.hpStatisticsCollection import statisticsCollector, CollectionEntry 28 | from MetricsUtils.storageutils import storageConnection 29 | ``` 30 | 31 | This gives you access to the code. This assumes that you have installed either as a submodule or manually, the files in 32 | a folder called MetricsUtils in the same directory as the notebooks themselves. 33 | 34 | ### First notebook 35 | In the first notebook, you can certainly make use of the tracker to collect stats before the workspace is created, for 36 | example: 37 | 38 | ``` 39 | statisticsCollector.startTask(CollectionEntry.AML_WORKSPACE_CREATION) 40 | 41 | ws = Workspace.create( 42 | name=workspace_name, 43 | subscription_id=subscription_id, 44 | resource_group=resource_group, 45 | location=workspace_region, 46 | create_resource_group=True, 47 | auth=get_auth(env_path), 48 | exist_ok=True, 49 | ) 50 | 51 | statisticsCollector.endTask(CollectionEntry.AML_WORKSPACE_CREATION) 52 | ``` 53 | 54 | In fact, you are going to need to create this workspace to get the storage account name. So, in that first notebook, you 55 | will likely want to save off the storage connection string into the environment or .env file. 56 | 57 | The storage account name can be found with this code: 58 | ``` 59 | stgAcctName = ws.get_details()['storageAccount'].split('/')[-1] 60 | ``` 61 | 62 | Once you have the storage account name, you save the statistics to storage using the following at or near the bottom of 63 | your notebook. If you believe there may be failures along the way, you can perform the upload multiple times, it will 64 | just overwrite what is there. 65 | 66 | Also note that this assumes the user is logged in to the same subscription as the storage account. 67 | ``` 68 | storageConnString = storageConnection.getConnectionStringWithAzCredentials(resource_group, stgAcct) 69 | statisticsCollector.uploadContent(storageConnString) 70 | ``` 71 | 72 | ### Follow on notebooks 73 | The difference in a follow up notebook is that settings have likely already been saved. Since we have the storage 74 | account name now in the environment, we just need to pull the information from storage into the tracking class such as: 75 | 76 | ``` 77 | storageConnString = storageConnection.getConnectionStringWithAzCredentials(resource_group, stgAcct) 78 | statisticsCollector.hydrateFromStorage(storageConnString) 79 | ``` 80 | 81 | Then continue to use the object as you did in the first notebook being sure to call teh uploadContent() method to save 82 | whatever changes you want to storage. 83 | 84 | # Contributing 85 | 86 | This project welcomes contributions and suggestions. Most contributions require you to agree to a 87 | Contributor License Agreement (CLA) declaring that you have the right to, and actually do, grant us 88 | the rights to use your contribution. For details, visit https://cla.opensource.microsoft.com. 89 | 90 | When you submit a pull request, a CLA bot will automatically determine whether you need to provide 91 | a CLA and decorate the PR appropriately (e.g., status check, comment). Simply follow the instructions 92 | provided by the bot. You will only need to do this once across all repos using our CLA. 93 | 94 | This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/). 95 | For more information see the [Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) or 96 | contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with any additional questions or comments. 97 | -------------------------------------------------------------------------------- /azure_utils/logger/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | AI-Utilities - logger/__init__.py 3 | 4 | Copyright (c) Microsoft Corporation. All rights reserved. 5 | Licensed under the MIT License. 6 | """ 7 | -------------------------------------------------------------------------------- /azure_utils/logger/ai_logger.py: -------------------------------------------------------------------------------- 1 | """ 2 | AI-Utilities - MetricUtils/blobStorage.py 3 | 4 | Copyright (c) Microsoft Corporation. All rights reserved. 5 | Licensed under the MIT License. 6 | 7 | Enumeration used in teh statisticsCollector class to track individual tasks. These enums can be used by 8 | both the producer (IPYNB path) and consumer (E2E path). 9 | 10 | The statisticsCollector requires the enum for tracking calls 11 | startTask() 12 | endTask() 13 | addEntry() 14 | getEntry() 15 | """ 16 | 17 | import json 18 | from datetime import datetime 19 | from enum import Enum 20 | 21 | from azure_utils.logger.blob_storage import BlobStorageAccount 22 | from azure_utils.logger.storageutils import StorageConnection 23 | 24 | 25 | class CollectionEntry(Enum): 26 | """ Deploy Steps Enums""" 27 | 28 | AKS_CLUSTER_CREATION = "akscreate" 29 | AML_COMPUTE_CREATION = "amlcompute" 30 | AML_WORKSPACE_CREATION = "amlworkspace" 31 | 32 | 33 | # Class used for keeping track of tasks during the execution of a path. Data can be archived and retrieved to/from 34 | # Azure Storage. 35 | # 36 | # Tasks can be started and completed with an internal timer keeping track of the MS it takes to run 37 | # startTask() 38 | # endTask() 39 | # 40 | # Entries can also be added with any other data point the user would require 41 | # addEntry() 42 | # 43 | # Regardless of how an entry was put in the collection, it can be retrieved 44 | # getEntry() 45 | # 46 | # Working with the collection itself 47 | # getCollection() -> Retrieves the internal collection as JSON 48 | # uploadContent() -? Uploads to the provided storage account in pre-defined container/blob 49 | # hydrateFromStorage() -> Resets the internal collection to the data found in the provided storage account from 50 | # the pre-defined container/blob. 51 | 52 | 53 | class StatisticsCollector: 54 | """ Statistics Collector """ 55 | 56 | __metrics__ = {} 57 | __running_tasks__ = {} 58 | __statscontainer__ = "pathmetrics" 59 | __statsblob__ = "statistics.json" 60 | 61 | # No need to create an instance, all methods are static. 62 | 63 | def __init__(self, path_name: str): 64 | self.path_name = path_name 65 | 66 | @staticmethod 67 | def start_task(collection_entry): 68 | """ 69 | Starts a task using one of the enumerators and records a start time of the task. If using this, 70 | the entry is not put into the __metrics__ connection until endTask() is called. 71 | 72 | :param collection_entry: an instance of a CollectionEntry enum 73 | """ 74 | StatisticsCollector.__running_tasks__[ 75 | collection_entry.value 76 | ] = datetime.utcnow() 77 | 78 | @staticmethod 79 | def end_task(collection_entry): 80 | """ 81 | Ends a task using one of the enumerators. If the start time was previously recorded using 82 | startTask() an entry for the specific enumeration is added to the __metrics__ collection that 83 | will be used to upload data to Azure Storage. 84 | 85 | :param collection_entry: an instance of a CollectionEntry enum 86 | """ 87 | if collection_entry.value in StatisticsCollector.__running_tasks__.keys(): 88 | time_diff = ( 89 | datetime.utcnow() 90 | - StatisticsCollector.__running_tasks__[collection_entry.value] 91 | ) 92 | ms_delta = time_diff.total_seconds() * 1000 93 | StatisticsCollector.__metrics__[collection_entry.value] = ms_delta 94 | 95 | @staticmethod 96 | def add_entry(collection_entry, data_point): 97 | """ 98 | Single call to add an entry to the __metrics__ collection. This would be used when you want to run 99 | the timers in the external code directly. 100 | 101 | This is used to set manual task times or any other valid data point. 102 | 103 | 104 | :param collection_entry: an instance of a CollectionEntry enum 105 | :param data_point: Any valid python data type (string, int, etc) 106 | """ 107 | StatisticsCollector.__metrics__[collection_entry.value] = data_point 108 | 109 | """ 110 | Retrieve an entry in the internal collection. 111 | 112 | Parameters: 113 | collectionEntry - an instance of a CollectionEntry enum 114 | 115 | Returns: 116 | The data in the collection or None if the entry is not present. 117 | """ 118 | 119 | @staticmethod 120 | def get_entry(collection_entry): 121 | """ 122 | 123 | :param collection_entry: 124 | :return: 125 | """ 126 | return_data_point = None 127 | if collection_entry.value in StatisticsCollector.__metrics__.keys(): 128 | return_data_point = StatisticsCollector.__metrics__[collection_entry.value] 129 | return return_data_point 130 | 131 | """ 132 | Returns the __metrics__ collection as a JSON string. 133 | 134 | Parameters: 135 | None 136 | 137 | Returns: 138 | String representation of the collection in JSON 139 | """ 140 | 141 | @staticmethod 142 | def get_collection(): 143 | """ 144 | 145 | :return: 146 | """ 147 | return json.dumps(StatisticsCollector.__metrics__) 148 | 149 | """ 150 | Uploads the JSON string representation of the __metrics__ collection to the specified 151 | storage account. 152 | 153 | Parameters: 154 | connectionString - A complete connection string to an Azure Storage account 155 | 156 | Returns: 157 | Nothing 158 | """ 159 | 160 | @staticmethod 161 | def upload_content(connection_string): 162 | """ 163 | 164 | :param connection_string: 165 | """ 166 | containers, storage_account = StatisticsCollector._get_containers( 167 | connection_string 168 | ) 169 | if StatisticsCollector.__statscontainer__ not in containers: 170 | storage_account.create_container(StatisticsCollector.__statscontainer__) 171 | storage_account.upload_blob( 172 | StatisticsCollector.__statscontainer__, 173 | StatisticsCollector.__statsblob__, 174 | StatisticsCollector.get_collection(), 175 | ) 176 | 177 | """ 178 | Download the content from blob storage as a string representation of the JSON. This can be used for collecting 179 | and pushing downstream to whomever is interested. This call does not affect the internal collection. 180 | 181 | Parameters: 182 | connectionString - A complete connection string to an Azure Storage account 183 | 184 | Returns: 185 | The uploaded collection that was pushed to storage or None if not present. 186 | 187 | """ 188 | 189 | @staticmethod 190 | def retrieve_content(connection_string): 191 | """ 192 | 193 | :param connection_string: 194 | :return: 195 | """ 196 | return_content = None 197 | containers, storage_account = StatisticsCollector._get_containers( 198 | connection_string 199 | ) 200 | if StatisticsCollector.__statscontainer__ in containers: 201 | return_content = storage_account.download_blob( 202 | StatisticsCollector.__statscontainer__, 203 | StatisticsCollector.__statsblob__, 204 | ) 205 | return return_content 206 | 207 | @staticmethod 208 | def _get_containers(connection_string): 209 | connection_object = StorageConnection(connection_string) 210 | storage_account = BlobStorageAccount(connection_object) 211 | # noinspection PyUnresolvedReferences 212 | containers = storage_account.getContainers() 213 | return containers, storage_account 214 | 215 | """ 216 | Retrieves the content in storage and hydrates the __metrics__ dictionary, dropping any existing information. 217 | 218 | Useful between IPYNB runs/stages in DevOps. 219 | 220 | Parameters: 221 | connectionString - A complete connection string to an Azure Storage account 222 | 223 | Returns: 224 | Nothing 225 | """ 226 | 227 | @staticmethod 228 | def hydrate_from_storage(connection_string): 229 | """ 230 | 231 | :param connection_string: 232 | """ 233 | return_content = StatisticsCollector.retrieve_content(connection_string) 234 | if return_content is not None: 235 | StatisticsCollector.__metrics__ = json.loads(return_content) 236 | else: 237 | print("There was no data in storage") 238 | -------------------------------------------------------------------------------- /azure_utils/logger/blob_storage.py: -------------------------------------------------------------------------------- 1 | """ 2 | AI-Utilities - MetricUtils/blobStorage.py 3 | 4 | Copyright (c) Microsoft Corporation. All rights reserved. 5 | Licensed under the MIT License. 6 | 7 | Class that performs work against an Azure Storage account with containers and blobs. 8 | 9 | To use, this library must be installed: 10 | 11 | pip install azure-storage-blob 12 | """ 13 | 14 | from datetime import datetime, timedelta 15 | 16 | from azure.storage.blob import BlobPermissions, BlockBlobService, PublicAccess 17 | 18 | 19 | class BlobStorageAccount: 20 | """ 21 | Constructor that receives a logger.storageutils.storageConnection instance 22 | """ 23 | 24 | def __init__(self, storage_connection): 25 | self.connection = storage_connection 26 | self.service = BlockBlobService( 27 | self.connection.AccountName, self.connection.AccountKey 28 | ) 29 | 30 | # Creates a new storage container in the Azure Storage account 31 | 32 | def create_container(self, container_name): 33 | """ 34 | 35 | :param container_name: 36 | """ 37 | if self.connection and self.service: 38 | self.service.create_container(container_name) 39 | self.service.set_container_acl( 40 | container_name, public_access=PublicAccess.Blob 41 | ) 42 | 43 | # Retrieve a blob SAS token on a specific blob 44 | 45 | def get_blob_sas_token(self, container_name, blob_name): 46 | """ 47 | 48 | :param container_name: 49 | :param blob_name: 50 | :return: 51 | """ 52 | return_token = None 53 | if self.connection and self.service: 54 | # noinspection PyUnresolvedReferences,PyTypeChecker 55 | return_token = self.service.generate_blob_shared_access_signature( 56 | container_name, 57 | blob_name, 58 | BlobPermissions.READ, 59 | datetime.utcnow() + timedelta(hours=1), 60 | ) 61 | 62 | return return_token 63 | 64 | # Retrieves a list of storage container names in the specific storage account pointed to by 65 | # the storageConnection object 66 | 67 | def get_containers(self): 68 | """ 69 | 70 | :return: 71 | """ 72 | return_list = [] 73 | if self.connection and self.service: 74 | containers = self.service.list_containers() 75 | for container in containers: 76 | return_list.append(container.name) 77 | 78 | return return_list 79 | 80 | # Retrieves a list of storage blob names in a container in the specific storage account pointed to by 81 | # the storageConnection object 82 | 83 | def get_blobs(self, container_name): 84 | """ 85 | 86 | :param container_name: 87 | :return: 88 | """ 89 | return_list = [] 90 | if self.connection and self.service: 91 | blobs = self.service.list_blobs(container_name) 92 | for blob in blobs: 93 | return_list.append(blob.name) 94 | return return_list 95 | 96 | # Upload text to a blob (fileContent is a simple string) 97 | 98 | def upload_blob(self, container_name, blob_name, file_content): 99 | """ 100 | 101 | :param container_name: 102 | :param blob_name: 103 | :param file_content: 104 | """ 105 | if self.connection and self.service: 106 | self.service.create_blob_from_text(container_name, blob_name, file_content) 107 | 108 | # Download the blob as a string. 109 | 110 | def download_blob(self, container_name, blob_name): 111 | """ 112 | 113 | :param container_name: 114 | :param blob_name: 115 | :return: 116 | """ 117 | return_content = None 118 | if self.connection and self.service: 119 | blob = self.service.get_blob_to_text(container_name, blob_name) 120 | return_content = blob.content 121 | return return_content 122 | -------------------------------------------------------------------------------- /azure_utils/logger/key_vault.py: -------------------------------------------------------------------------------- 1 | """ 2 | AI-Utilities - azureml_tools/key_vault.py 3 | 4 | Copyright (c) Microsoft Corporation. All rights reserved. 5 | Licensed under the MIT License. 6 | """ 7 | from azure.common.client_factory import get_client_from_cli_profile 8 | from azure.keyvault import KeyVaultClient 9 | from azure.mgmt.keyvault import KeyVaultManagementClient 10 | 11 | 12 | class KeyVaultInstance: 13 | """ 14 | Constructor taking the connection string to parse. 15 | 16 | EX: 17 | DefaultEndpointsProtocol=https;AccountName=STGACCT_NAME;AccountKey=STGACCT_KEY;EndpointSuffix=core.windows.net 18 | 19 | """ 20 | 21 | def __init__(self): 22 | self._vault_client = get_client_from_cli_profile(KeyVaultClient) 23 | self._kvmgmt_client = get_client_from_cli_profile(KeyVaultManagementClient) 24 | self.__setattr__("Dan", "test") 25 | 26 | def get_client(self): 27 | """ 28 | 29 | :return: 30 | """ 31 | return self._kvmgmt_client 32 | 33 | def get_vault_names(self): 34 | """ 35 | 36 | :return: 37 | """ 38 | vault_name = [] 39 | if self._kvmgmt_client is not None: 40 | for vlt in self._kvmgmt_client.vaults.list(): 41 | vault_name.append(vlt.name) 42 | 43 | return vault_name 44 | 45 | def get_key_vlt_client(self): 46 | """ 47 | 48 | :return: 49 | """ 50 | return self._vault_client 51 | 52 | def get_vault_secrets(self, vault_name): 53 | """ 54 | 55 | :param vault_name: 56 | :return: 57 | """ 58 | # https://thevault.vault.azure.net/ 59 | return_secrets = [] 60 | vault_address = "https://{}.vault.azure.net/".format(vault_name) 61 | if self._vault_client is not None: 62 | for sc in self._vault_client.get_secrets(vault_address): 63 | scname = sc.id.split("/")[-1] 64 | scbundle = self._vault_client.get_secret(vault_address, scname, "") 65 | scversion = scbundle.id.split("/")[-1] 66 | scvalue = scbundle.value 67 | return_secrets.append((scname, scversion, scvalue)) 68 | 69 | return return_secrets 70 | 71 | def set_vault_secret(self, vault_name, secret_name, secret_value): 72 | """ 73 | 74 | :param vault_name: 75 | :param secret_name: 76 | :param secret_value: 77 | """ 78 | # https://thevault.vault.azure.net/ 79 | vault_address = "https://{}.vault.azure.net/".format(vault_name) 80 | if self._vault_client is not None: 81 | self._vault_client.set_secret(vault_address, secret_name, secret_value) 82 | -------------------------------------------------------------------------------- /azure_utils/logger/storageutils.py: -------------------------------------------------------------------------------- 1 | """ 2 | AI-Utilities - storageutils.py 3 | 4 | Copyright (c) Microsoft Corporation. All rights reserved. 5 | Licensed under the MIT License. 6 | 7 | ImportError: You need to install 'azure-cli-core' to load CLI active Cloud 8 | 9 | REQUIREMENT : pip install azure-cli-core 10 | 11 | Class that parses out a true connection string from Azure Storage account in the form: 12 | 13 | DefaultEndpointsProtocol=https;AccountName=ACCT_NAME;AccountKey=ACCT_KEY;EndpointSuffix=core.windows.net 14 | 15 | Ends up with 4 attributes : 16 | DefaultEndpointsProtocol 17 | AccountName 18 | AccountKey 19 | EndpointSuffix 20 | """ 21 | from azure.common.client_factory import get_client_from_cli_profile 22 | from azure.mgmt.storage import StorageManagementClient 23 | 24 | 25 | class StorageConnection: 26 | """ 27 | Constructor taking the connection string to parse. 28 | 29 | EX: 30 | DefaultEndpointsProtocol=https;AccountName=STGACCT_NAME;AccountKey=STGACCT_KEY;EndpointSuffix=core.windows.net 31 | 32 | """ 33 | 34 | def __init__(self, connection_string): 35 | parsed_connection_string = self._parse_connection_string(connection_string) 36 | for key, value in parsed_connection_string.items(): 37 | self.__setattr__(key, value) 38 | 39 | """ 40 | Expects the full connection string from the Azure site and spits it into four components. 41 | 42 | EX: 43 | DefaultEndpointsProtocol=https;AccountName=STGACCT_NAME;AccountKey=STGACCT_KEY;EndpointSuffix=core.windows.net 44 | """ 45 | 46 | @staticmethod 47 | def _parse_connection_string(connection_string): 48 | return_value = {} 49 | if connection_string: 50 | segments = connection_string.split(";") 51 | for segment in segments: 52 | split_index = segment.index("=") 53 | second_part = (len(segment) - split_index - 1) * -1 54 | return_value[segment[:split_index]] = segment[second_part:] 55 | 56 | return return_value 57 | 58 | # Method to return the full connection string to an Azure Storage account give the resource group name and 59 | # storage account 60 | # name. 61 | # 62 | # Method expects that the environment has been logged into Azure and the subscription has been set to match the 63 | # incoming 64 | # resource group and storage account. 65 | 66 | @staticmethod 67 | def get_connection_string_with_az_credentials( 68 | resource_group_name, storage_account_name 69 | ): 70 | """ 71 | 72 | :param resource_group_name: 73 | :param storage_account_name: 74 | :return: 75 | """ 76 | connection_string_template = ( 77 | "DefaultEndpointsProtocol=https;AccountName={};AccountKey={" 78 | "};EndpointSuffix=core.windows.net" 79 | ) 80 | return_value = None 81 | 82 | client = get_client_from_cli_profile(StorageManagementClient) 83 | keys = client.storage_accounts.list_keys( 84 | resource_group_name, storage_account_name 85 | ) 86 | key_value = None 87 | for key in keys.keys: 88 | key_value = key.value 89 | break 90 | 91 | if key_value is not None: 92 | return_value = connection_string_template.format( 93 | storage_account_name, key_value 94 | ) 95 | 96 | return return_value 97 | -------------------------------------------------------------------------------- /azure_utils/logger/tests/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | azure_utils - logger/tests/__init__.py 3 | 4 | Copyright (c) Microsoft Corporation. All rights reserved. 5 | Licensed under the MIT License. 6 | """ 7 | -------------------------------------------------------------------------------- /azure_utils/logger/tests/statsCollectionTest.py: -------------------------------------------------------------------------------- 1 | """ 2 | AI-Utilities - statsCollectionTest.py 3 | 4 | Copyright (c) Microsoft Corporation. All rights reserved. 5 | Licensed under the MIT License. 6 | """ 7 | import time 8 | 9 | from azure_utils.logger.ai_logger import CollectionEntry, StatisticsCollector 10 | from azure_utils.logger.key_vault import KeyVaultInstance 11 | from azure_utils.logger.storageutils import StorageConnection 12 | 13 | DATA_IN_STORAGE_ = "Current data in storage ->" 14 | 15 | kvInst = KeyVaultInstance() 16 | 17 | sct = kvInst.get_vault_secrets("dangtestvault") 18 | print(sct) 19 | kvInst.set_vault_secret("dangtestvault", "secret2", "asecretvalue") 20 | sct = kvInst.get_vault_secrets("dangtestvault") 21 | print(sct) 22 | 23 | exit(1) 24 | 25 | # You will need a connection string to access the storage account. This can be done by either supplying the 26 | # connection string 27 | # in total, or, assuming your az cli has logged in to the appropriate subscription, you can generate one using the 28 | # storage client. 29 | 30 | 31 | storageConnString = None 32 | storageResourceGroup = "dangtest" 33 | storageAccountName = "hpstatstest" 34 | 35 | if storageConnString is None: 36 | storageConnString = StorageConnection.get_connection_string_with_az_credentials(storageResourceGroup, 37 | storageAccountName) 38 | 39 | ''' 40 | The stattisticsCollector is used in any python path you want. It's used for timing specific tasks and saving 41 | the results to blob storage, then pulling that data from blob storage. 42 | 43 | Timings can be collected in two ways 44 | 1. By starting and stopping a task 45 | 2. By simply putting in the time it took to run. 46 | 47 | Now, you can use a single instance to collect data across a single execution, or you can use it to get data and 48 | append to it 49 | between execution runs (think IPYNB seperate executions) 50 | ''' 51 | 52 | ''' 53 | Tests with putting in time indirectly 54 | ''' 55 | StatisticsCollector.start_task(CollectionEntry.AML_COMPUTE_CREATION) 56 | time.sleep(1.5) 57 | StatisticsCollector.end_task(CollectionEntry.AML_COMPUTE_CREATION) 58 | 59 | # Upload the content to storage 60 | StatisticsCollector.upload_content(storageConnString) 61 | 62 | # Retrieve the content from storage 63 | content = StatisticsCollector.retrieve_content(storageConnString) 64 | print(DATA_IN_STORAGE_) 65 | print(content) 66 | print("") 67 | 68 | ''' 69 | Tests with putting in time directly 70 | ''' 71 | 72 | StatisticsCollector.add_entry(CollectionEntry.AKS_CLUSTER_CREATION, 200) 73 | StatisticsCollector.add_entry(CollectionEntry.AML_COMPUTE_CREATION, 200) 74 | StatisticsCollector.add_entry(CollectionEntry.AML_WORKSPACE_CREATION, 200) 75 | 76 | # Upload the content to storage 77 | StatisticsCollector.upload_content(storageConnString) 78 | 79 | # Retrieve the content from storage 80 | content = StatisticsCollector.retrieve_content(storageConnString) 81 | print(DATA_IN_STORAGE_) 82 | print(content) 83 | print("") 84 | 85 | ''' 86 | Work with the data in storage and append to it.. 87 | 88 | First change a bunch of data so we know it's not cached.... 89 | ''' 90 | StatisticsCollector.add_entry(CollectionEntry.AKS_CLUSTER_CREATION, 0) 91 | StatisticsCollector.add_entry(CollectionEntry.AML_COMPUTE_CREATION, 0) 92 | 93 | StatisticsCollector.hydrate_from_storage(storageConnString) 94 | 95 | # Now change a 200 to 300 96 | StatisticsCollector.add_entry(CollectionEntry.AML_WORKSPACE_CREATION, 300) 97 | 98 | # Upload the content to storage 99 | StatisticsCollector.upload_content(storageConnString) 100 | 101 | # Retrieve the content from storage 102 | content = StatisticsCollector.retrieve_content(storageConnString) 103 | print(DATA_IN_STORAGE_) 104 | print(content) 105 | print("") 106 | -------------------------------------------------------------------------------- /azure_utils/machine_learning/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | ado-ml-batch-train - machine_learning/__init__.py 3 | 4 | Copyright (c) Microsoft Corporation. All rights reserved. 5 | Licensed under the MIT License. 6 | """ 7 | -------------------------------------------------------------------------------- /azure_utils/machine_learning/contexts/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | AI-Utilities - __init__.py 3 | 4 | Copyright (c) Microsoft Corporation. All rights reserved. 5 | Licensed under the MIT License. 6 | """ 7 | -------------------------------------------------------------------------------- /azure_utils/machine_learning/contexts/model_management_context.py: -------------------------------------------------------------------------------- 1 | """ 2 | AI-Utilities - model_management_context.py 3 | 4 | Copyright (c) Microsoft Corporation. All rights reserved. 5 | Licensed under the MIT License. 6 | """ 7 | import os 8 | from abc import ABC 9 | 10 | from azureml.core import Experiment, Model, ScriptRunConfig, Run 11 | from azureml.exceptions import ActivityFailedException 12 | 13 | from azure_utils.configuration.notebook_config import ( 14 | project_configuration_file, 15 | train_py_default, 16 | ) 17 | from azure_utils.machine_learning.contexts.workspace_contexts import WorkspaceContext 18 | from azure_utils.machine_learning.train_local import get_local_run_configuration 19 | 20 | 21 | class ModelManagementContext(WorkspaceContext): 22 | """ 23 | Interface for Contexts that require Model Management 24 | """ 25 | 26 | def __init__( 27 | self, 28 | subscription_id, 29 | resource_group, 30 | workspace_name, 31 | run_configuration, 32 | configuration_file: str = project_configuration_file, 33 | train_py=train_py_default, 34 | ): 35 | super().__init__( 36 | subscription_id, 37 | resource_group, 38 | workspace_name, 39 | configuration_file=configuration_file, 40 | train_py=train_py, 41 | ) 42 | self.configuration_file = configuration_file 43 | self.run_configuration = run_configuration 44 | self.model_name = None 45 | self.wait_for_completion = True 46 | self.model_path = None 47 | 48 | def get_or_create_model(self) -> Model: 49 | """ 50 | Get or Create Model 51 | 52 | :return: Model from Workspace 53 | """ 54 | assert self.model_name 55 | 56 | print("Check if Model exists.") 57 | if self.model_name in self.models: 58 | print("Model does exists.") 59 | # if get_model(self.model_name).tags['train_py_hash'] == self.get_file_md5( 60 | # self.source_directory + "/" + self.script): 61 | model = Model(self, name=self.model_name) 62 | if not os.path.isdir("outputs"): 63 | model.download("outputs", exist_ok=True) 64 | return model 65 | print("Model does not exists.") 66 | model = self.train_model() 67 | 68 | assert model 69 | if self.show_output: 70 | print(model.name, model.version, model.url, sep="\n") 71 | return model 72 | 73 | def train_model(self) -> Model: 74 | """ 75 | Train Model with Experiment Run 76 | 77 | :return: registered model from Experiment run. 78 | """ 79 | run = self.submit_experiment_run(wait_for_completion=self.wait_for_completion) 80 | model = run.register_model( 81 | model_name=self.model_name, model_path=self.model_path 82 | ) 83 | return model 84 | 85 | def submit_experiment_run(self, wait_for_completion: bool = True): 86 | """ 87 | Submit run to experiment context 88 | 89 | :param wait_for_completion: should program wait till success before returning 90 | """ 91 | raise NotImplementedError 92 | 93 | 94 | class ModelTrainingContext(ModelManagementContext, ABC): 95 | """ 96 | Interface for Model Management Contexts that Handle Model Training 97 | """ 98 | 99 | 100 | class LocalTrainingContext(ModelTrainingContext): 101 | """ 102 | Model Training Context used to run training locally. 103 | """ 104 | 105 | def __init__( 106 | self, 107 | subscription_id, 108 | resource_group, 109 | workspace_name, configuration_file: str = project_configuration_file, 110 | train_py=train_py_default, 111 | ): 112 | super().__init__( 113 | subscription_id=subscription_id, 114 | resource_group=resource_group, 115 | workspace_name=workspace_name, 116 | run_configuration=get_local_run_configuration(), 117 | configuration_file=configuration_file, 118 | train_py=train_py, 119 | ) 120 | self.args = None 121 | 122 | def submit_experiment_run(self, wait_for_completion=True) -> Run: 123 | """ 124 | 125 | :param wait_for_completion: 126 | :return: 127 | """ 128 | assert self.source_directory 129 | assert self.train_py 130 | assert self.run_configuration 131 | assert self.experiment_name 132 | assert os.path.isfile(self.source_directory + "/" + self.train_py), ( 133 | f"The file {self.train_py} could not be found at " 134 | f"{self.source_directory}" 135 | ) 136 | 137 | src = ScriptRunConfig( 138 | source_directory=self.source_directory, 139 | script=self.train_py, 140 | arguments=self.args, 141 | run_config=get_local_run_configuration(), 142 | ) 143 | self.image_tags["train_py_hash"] = self._get_file_md5( 144 | self.source_directory + "/" + self.train_py 145 | ) 146 | exp = Experiment(workspace=self, name=self.experiment_name) 147 | run = exp.submit(src) 148 | if wait_for_completion: 149 | try: 150 | run.wait_for_completion(show_output=self.show_output) 151 | except ActivityFailedException as e: 152 | print(run.get_details()) 153 | raise e 154 | return run 155 | -------------------------------------------------------------------------------- /azure_utils/machine_learning/contexts/workspace_contexts.py: -------------------------------------------------------------------------------- 1 | """ 2 | AI-Utilities - ai_workspace.py 3 | 4 | Copyright (c) Microsoft Corporation. All rights reserved. 5 | Licensed under the MIT License. 6 | """ 7 | import hashlib 8 | import warnings 9 | 10 | from azureml._base_sdk_common.common import check_valid_resource_name 11 | from azureml.core import Workspace 12 | from azureml.exceptions import UserErrorException 13 | 14 | from azure_utils.configuration.notebook_config import ( 15 | project_configuration_file, 16 | score_py_default, 17 | train_py_default, 18 | ) 19 | from azure_utils.configuration.project_configuration import ProjectConfiguration 20 | 21 | 22 | class WorkspaceContext(Workspace): 23 | """ 24 | AzureML Workspace Context - Base Framework Interface 25 | """ 26 | 27 | def __init__( 28 | self, 29 | subscription_id: str, 30 | resource_group: str, 31 | workspace_name: str, configuration_file: str = project_configuration_file, project_configuration=None, 32 | train_py: str = train_py_default, 33 | score_py: str = score_py_default, 34 | **kwargs 35 | ): 36 | """ 37 | Interface Constructor for Workspace Context 38 | 39 | :param subscription_id: Azure subscription id 40 | :param resource_group: Azure Resource Group name 41 | :param workspace_name: Azure Machine Learning Workspace 42 | :param configuration_file: path to project configuration file. default: project.yml 43 | :param train_py: python source file for training 44 | :param score_py: python source file for scoring 45 | """ 46 | super().__init__(subscription_id, resource_group, workspace_name, **kwargs) 47 | if not project_configuration: 48 | self.project_configuration = ProjectConfiguration(configuration_file) 49 | 50 | self.image_tags = None 51 | self.args = None 52 | self.train_py = train_py 53 | self.score_py = score_py 54 | self.show_output = True 55 | self.source_directory = "./script" 56 | self.experiment_name = None 57 | self.model_name = None 58 | self.wait_for_completion = True 59 | self.model_path = None 60 | 61 | @classmethod 62 | def get_or_create_workspace( 63 | cls, 64 | configuration_file: str = project_configuration_file, 65 | project_configuration: ProjectConfiguration = None, 66 | **kwargs 67 | ): 68 | """ Get or create a workspace if it doesn't exist. 69 | 70 | :param configuration_file: 71 | :param project_configuration: ProjectConfiguration 72 | """ 73 | if not project_configuration: 74 | project_configuration = ProjectConfiguration(configuration_file) 75 | assert project_configuration.has_value("subscription_id") 76 | assert project_configuration.has_value("resource_group") 77 | assert project_configuration.has_value("workspace_name") 78 | assert project_configuration.has_value("workspace_region") 79 | 80 | try: 81 | check_valid_resource_name( 82 | project_configuration.get_value("workspace_name"), "Workspace" 83 | ) 84 | except UserErrorException: 85 | print(project_configuration.get_value("workspace_name")) 86 | raise 87 | 88 | cls.create( 89 | subscription_id=project_configuration.get_value("subscription_id"), 90 | resource_group=project_configuration.get_value("resource_group"), 91 | name=project_configuration.get_value("workspace_name"), 92 | location=project_configuration.get_value("workspace_region"), 93 | exist_ok=True, 94 | ) 95 | 96 | ws = cls( 97 | subscription_id=project_configuration.get_value("subscription_id"), 98 | resource_group=project_configuration.get_value("resource_group"), 99 | workspace_name=project_configuration.get_value("workspace_name"), 100 | project_configuration=project_configuration, 101 | **kwargs 102 | ) 103 | return ws 104 | 105 | @staticmethod 106 | def _get_file_md5(file_name: str) -> str: 107 | hasher = hashlib.md5() 108 | with open(file_name, "rb") as afile: 109 | buf = afile.read() 110 | hasher.update(buf) 111 | file_hash = hasher.hexdigest() 112 | return file_hash 113 | 114 | def assert_and_get_value(self, setting_name: str) -> str: 115 | """ 116 | 117 | :param setting_name: 118 | :return: 119 | """ 120 | assert self.project_configuration.has_value(setting_name) 121 | return self.project_configuration.get_value(setting_name) 122 | -------------------------------------------------------------------------------- /azure_utils/machine_learning/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | AI-Utilities - __init__.py 3 | 4 | Copyright (c) Microsoft Corporation. All rights reserved. 5 | Licensed under the MIT License. 6 | """ 7 | -------------------------------------------------------------------------------- /azure_utils/machine_learning/deep/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | AI-Utilities - __init__.py 3 | 4 | Copyright (c) Microsoft Corporation. All rights reserved. 5 | Licensed under the MIT License. 6 | """ 7 | -------------------------------------------------------------------------------- /azure_utils/machine_learning/deep/create_deep_model.py: -------------------------------------------------------------------------------- 1 | """ 2 | AI-Utilities - create_deep_model.py 3 | 4 | Copyright (c) Microsoft Corporation. All rights reserved. 5 | Licensed under the MIT License. 6 | """ 7 | 8 | import numpy as np 9 | import wget 10 | from PIL import Image, ImageOps 11 | from azureml.core.conda_dependencies import CondaDependencies 12 | from azureml.core.image import ContainerImage 13 | 14 | from azure_utils.configuration.notebook_config import project_configuration_file 15 | from azure_utils.machine_learning.realtime.image import get_or_create_image 16 | 17 | 18 | def download_test_image(): 19 | """ 20 | 21 | :return: 22 | """ 23 | wget.download("https://bostondata.blob.core.windows.net/aksdeploymenttutorialaml/220px-Lynx_lynx_poing.jpg") 24 | img_path = "220px-Lynx_lynx_poing.jpg" 25 | print(Image.open(img_path).size) 26 | Image.open(img_path) 27 | # Below, we load the image by resizing to (224, 224) and then preprocessing using the methods from keras 28 | # preprocessing and imagenet utilities. 29 | # Evaluate the model using the input data 30 | img = Image.open(img_path).convert("RGB") 31 | img = ImageOps.fit(img, (224, 224), Image.ANTIALIAS) 32 | img = np.array(img) # shape: (224, 224, 3) 33 | img = np.expand_dims(img, axis=0) 34 | from keras.applications.imagenet_utils import preprocess_input 35 | return preprocess_input(img) 36 | 37 | 38 | def get_or_create_resnet_image(configuration_file: str = project_configuration_file, show_output=True, 39 | models: list = None, image_settings_name="deep_image_name"): 40 | """ 41 | Build Image 42 | 43 | :param models: 44 | :param configuration_file: path to project configuration file. default: project.yml 45 | :param show_output: toggle on/off standard output. default: `True` 46 | :param image_settings_name: Setting from Project Configuration 47 | """ 48 | image_config = create_resnet_image_config() 49 | 50 | return get_or_create_image(image_config, image_settings_name, show_output, models, configuration_file) 51 | 52 | 53 | def create_resnet_image_config(conda_file="img_env.yml", execution_script="driver.py"): 54 | """ 55 | 56 | :param conda_file: 57 | :param execution_script: 58 | :return: 59 | """ 60 | conda_pack = ["tensorflow-gpu==1.14.0"] 61 | requirements = ["keras==2.2.0", "Pillow==5.2.0", "azureml-defaults", "azureml-contrib-services", "toolz==0.9.0"] 62 | imgenv = CondaDependencies.create(conda_packages=conda_pack, pip_packages=requirements) 63 | with open("img_env.yml", "w") as file: 64 | file.write(imgenv.serialize_to_string()) 65 | 66 | description = "Image for AKS Deployment Tutorial" 67 | dependencies = ["resnet152.py"] 68 | tags = {"name": "AKS", "project": "AML"} 69 | return ContainerImage.image_configuration(execution_script=execution_script, runtime="python", 70 | conda_file=conda_file, description=description, tags=tags, 71 | dependencies=dependencies, enable_gpu=True) 72 | -------------------------------------------------------------------------------- /azure_utils/machine_learning/duplicate_model.py: -------------------------------------------------------------------------------- 1 | """ 2 | ai-utilities - machine_learning/duplicate_model.py 3 | 4 | Copyright (c) Microsoft Corporation. All rights reserved. 5 | Licensed under the MIT License. 6 | """ 7 | import pandas as pd 8 | import joblib 9 | 10 | 11 | class DuplicateModel: 12 | """ Create Copy of Model """ 13 | 14 | questions_cols = ["Id", "AnswerId", "Text"] 15 | dup_col = "Text_x" 16 | id_col = "Id_y" 17 | answer_id_col = "AnswerId_y" 18 | orig_col = "Text_y" 19 | feature_cols = [dup_col, orig_col] 20 | probabilities_col = "probabilities" 21 | 22 | def __init__(self, model_path, questions_path): 23 | self.model_path = model_path 24 | self.questions_path = questions_path 25 | self.model = joblib.load(model_path) 26 | self.questions = pd.read_csv(questions_path, sep="\t", encoding="latin1") 27 | self.questions = self.questions[self.questions_cols] 28 | self.questions.columns = [self.id_col, self.answer_id_col, self.orig_col] 29 | 30 | def score(self, text): 31 | """ 32 | Score Text Input 33 | 34 | :param text: Text Input 35 | :return: Input with Scores 36 | """ 37 | # Create a scoring dataframe. 38 | test = self.questions.copy() 39 | test[self.dup_col] = text 40 | test_x = test[self.feature_cols] 41 | 42 | # Score the text. 43 | test[self.probabilities_col] = self.model.predict_proba(test_x)[:, 1] 44 | 45 | # Order the data by descending probability. 46 | test.sort_values(by=self.probabilities_col, ascending=False, inplace=True) 47 | 48 | # Extract the original question ids, answer ids, and probabilities. 49 | scores = test[[self.id_col, self.answer_id_col, self.probabilities_col]] 50 | pairs = [x[1:] for x in scores.itertuples()] 51 | 52 | # Return the result. 53 | return pairs 54 | -------------------------------------------------------------------------------- /azure_utils/machine_learning/factories/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | AI-Utilities - __init__.py 3 | 4 | Copyright (c) Microsoft Corporation. All rights reserved. 5 | Licensed under the MIT License. 6 | """ 7 | -------------------------------------------------------------------------------- /azure_utils/machine_learning/factories/realtime_factory.py: -------------------------------------------------------------------------------- 1 | """ 2 | AI-Utilities - realtime_factory.py 3 | 4 | Copyright (c) Microsoft Corporation. All rights reserved. 5 | Licensed under the MIT License. 6 | """ 7 | import inspect 8 | 9 | from azureml.contrib.services import rawhttp 10 | 11 | 12 | class RealTimeFactory: 13 | """ 14 | 15 | Example Usage: 16 | from azure_utils.machine_learning.factories.realtime_factory import RealTimeFactory 17 | 18 | rts_factory = RealTimeFactory() 19 | init = rts_factory.score_init 20 | run = rts_factory.score_run 21 | or 22 | from azure_utils.machine_learning.factories.realtime_factory import RealTimeFactory 23 | from azure_utils.machine_learning.models.training_arg_parsers import get_training_parser 24 | 25 | if __name__ == '__main__': 26 | RealTimeFactory().train(get_training_parser()) 27 | 28 | """ 29 | 30 | def __init__(self): 31 | raise NotImplementedError 32 | 33 | def train(self, args): 34 | """ 35 | Train Abstract Method 36 | :param args: 37 | """ 38 | raise NotImplementedError 39 | 40 | def score_init(self): 41 | """ 42 | Score Init Abstract Method 43 | """ 44 | raise NotImplementedError 45 | 46 | @rawhttp 47 | def score_run(self, request): 48 | """ 49 | Score Run Abstract Method 50 | 51 | :param request: 52 | """ 53 | raise NotImplementedError 54 | 55 | @classmethod 56 | def make_file(cls): 57 | """ 58 | Make file from class 59 | 60 | :return: string of file of class 61 | """ 62 | file = inspect.getsource(cls) 63 | 64 | file = file.replace( 65 | inspect.getsource(RealTimeFactory.train), inspect.getsource(cls.train) 66 | ) 67 | file = file.replace( 68 | inspect.getsource(RealTimeFactory.score_init), 69 | inspect.getsource(cls.score_init), 70 | ) 71 | file = file.replace( 72 | inspect.getsource(RealTimeFactory.score_run), 73 | inspect.getsource(cls.score_run), 74 | ) 75 | file = file.replace( 76 | inspect.getsource(RealTimeFactory.__init__), inspect.getsource(cls.__init__) 77 | ) 78 | file = file.replace("RealTimeFactory", "DeepRealTimeFactory(RealTimeFactory)") 79 | return file 80 | -------------------------------------------------------------------------------- /azure_utils/machine_learning/item_selector.py: -------------------------------------------------------------------------------- 1 | """ 2 | ai-utilities - machine_learning/item_selector.py 3 | 4 | From: http://scikit-learn.org/0.18/auto_examples/hetero_feature_union.html 5 | 6 | Copyright (c) Microsoft Corporation. All rights reserved. 7 | Licensed under the MIT License. 8 | """ 9 | 10 | from sklearn.base import BaseEstimator, TransformerMixin 11 | 12 | 13 | class ItemSelector(BaseEstimator, TransformerMixin): 14 | """For data grouped by feature, select subset of data at provided 15 | key(s). 16 | 17 | The data are expected to be stored in a 2D data structure, where 18 | the first index is over features and the second is over samples, 19 | i.e. 20 | 21 | >> len(data[keys]) == n_samples 22 | 23 | Please note that this is the opposite convention to scikit-learn 24 | feature matrices (where the first index corresponds to sample). 25 | 26 | ItemSelector only requires that the collection implement getitem 27 | (data[keys]). Examples include: a dict of lists, 2D numpy array, 28 | Pandas DataFrame, numpy record array, etc. 29 | 30 | >> data = {'a': [1, 5, 2, 5, 2, 8], 31 | 'b': [9, 4, 1, 4, 1, 3]} 32 | >> ds = ItemSelector(key='a') 33 | >> data['a'] == ds.transform(data) 34 | 35 | ItemSelector is not designed to handle data grouped by sample 36 | (e.g. a list of dicts). If your data are structured this way, 37 | consider a transformer along the lines of 38 | `sklearn.feature_extraction.DictVectorizer`. 39 | 40 | Parameters 41 | ---------- 42 | keys : hashable or list of hashable, required 43 | The key(s) corresponding to the desired value(s) in a mappable. 44 | 45 | """ 46 | 47 | def __init__(self, keys): 48 | if isinstance(keys, list): 49 | if any([getattr(key, "__hash__", None) is None for key in keys]): 50 | raise TypeError("Not all keys are hashable") 51 | elif getattr(keys, "__hash__", None) is None: 52 | raise TypeError("keys is not hashable") 53 | self.keys = keys 54 | 55 | # noinspection PyUnusedLocal,PyUnusedLocal 56 | def fit(self, input_x, *args, **kwargs): 57 | """ 58 | 59 | :param input_x: Set of items to fit with keys 60 | :return: self 61 | """ 62 | if isinstance(self.keys, list): 63 | if not all([key in input_x for key in self.keys]): 64 | raise KeyError("Not all keys in data") 65 | elif self.keys not in input_x: 66 | raise KeyError("key not in data") 67 | return self 68 | 69 | # noinspection PyUnusedLocal,PyUnusedLocal 70 | def transform(self, data_dict, *args, **kwargs): 71 | """ 72 | Transform data based on keys 73 | 74 | :param data_dict: Data to Transform 75 | :return: Transformed data 76 | """ 77 | return data_dict[self.keys] 78 | 79 | def get_feature_names(self): 80 | """ 81 | Get Feature Names 82 | 83 | :return: get keys 84 | """ 85 | return self.keys 86 | -------------------------------------------------------------------------------- /azure_utils/machine_learning/label_rank.py: -------------------------------------------------------------------------------- 1 | """ 2 | ai-utilities - machine_learning/label_rank.py 3 | 4 | Copyright (c) Microsoft Corporation. All rights reserved. 5 | Licensed under the MIT License. 6 | """ 7 | import numpy as np 8 | import pandas as pd 9 | 10 | 11 | def score_rank(scores): 12 | """ 13 | Add Rank to Series 14 | 15 | :param scores: Series to Rank 16 | :return: Ranked Series 17 | """ 18 | return pd.Series(scores).rank(ascending=False) 19 | 20 | 21 | def label_index(label, label_order): 22 | """ 23 | Label Index 24 | 25 | :param label: Label to apply to items 26 | :param label_order: Order of labels to apply 27 | :return: Labeled Item 28 | """ 29 | loc = np.where(label == label_order)[0] 30 | if loc.shape[0] == 0: 31 | return None 32 | return loc[0] 33 | 34 | 35 | def label_rank(label, scores, label_order) -> int: 36 | """ 37 | Add Labels based on Rank 38 | 39 | :param label: Label to assign to item 40 | :param scores: Score to rank item 41 | :param label_order: Order of Labels 42 | :return: Return Index 43 | """ 44 | loc = label_index(label, label_order) 45 | if loc is None: 46 | return len(scores) + 1 47 | return score_rank(scores)[loc] 48 | -------------------------------------------------------------------------------- /azure_utils/machine_learning/realtime/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | AI-Utilities - __init__.py 3 | 4 | Copyright (c) Microsoft Corporation. All rights reserved. 5 | Licensed under the MIT License. 6 | """ 7 | from azureml.contrib.services.aml_response import AMLResponse 8 | 9 | 10 | def default_response(request) -> AMLResponse: 11 | """ 12 | 13 | :param request: 14 | :return: 15 | """ 16 | if request.method == "GET": 17 | return AMLResponse({"azEnvironment": "Azure"}, 201) 18 | return AMLResponse("bad request", 500) 19 | -------------------------------------------------------------------------------- /azure_utils/machine_learning/realtime/image.py: -------------------------------------------------------------------------------- 1 | """ 2 | AI-Utilities - image.py 3 | 4 | Copyright (c) Microsoft Corporation. All rights reserved. 5 | Licensed under the MIT License. 6 | """ 7 | import time 8 | 9 | from azureml.core import Image 10 | from azureml.core.conda_dependencies import CondaDependencies 11 | from azureml.core.image import ContainerImage 12 | from azureml.core.image.container import ContainerImageConfig 13 | 14 | from azure_utils.configuration.notebook_config import project_configuration_file 15 | from azure_utils.configuration.project_configuration import ProjectConfiguration 16 | from azure_utils.machine_learning.realtime.kubernetes import get_dupes_test 17 | from azure_utils.machine_learning.utils import get_or_create_workspace_from_project 18 | from azure_utils.utilities import text_to_json 19 | 20 | 21 | def get_or_create_lightgbm_image( 22 | configuration_file: str = project_configuration_file, 23 | show_output: bool = True, 24 | models: list = None, 25 | dependencies=None, 26 | image_settings_name="image_name", 27 | ) -> ContainerImage: 28 | """ 29 | Get or Create new Docker Image from Machine Learning Workspace 30 | 31 | :param configuration_file: path to project configuration file. default: project.yml 32 | :param show_output: toggle on/off standard output. default: `True` 33 | :param models Name of Model to package with Image from Machine Learning Workspace 34 | :param dependencies: List of files to include in image 35 | :param image_settings_name: Setting from Project Configuration 36 | :return: New or Existing Docker Image for deployment to Kubernetes Compute 37 | """ 38 | image_config = create_lightgbm_image_config(dependencies=dependencies) 39 | 40 | if not models: 41 | models = [] 42 | 43 | return get_or_create_image( 44 | image_config, image_settings_name, show_output, models, configuration_file 45 | ) 46 | 47 | 48 | def get_or_create_image( 49 | image_config, 50 | image_settings_name, 51 | show_output, 52 | models=None, 53 | configuration_file: str = project_configuration_file, 54 | ): 55 | """ 56 | 57 | :param image_config: 58 | :param image_settings_name: 59 | :param models: 60 | :param show_output: 61 | :param configuration_file: path to project configuration file. default: project.yml 62 | :return: 63 | """ 64 | if not models: 65 | models = [] 66 | 67 | project_configuration = ProjectConfiguration(configuration_file) 68 | 69 | assert project_configuration.has_value(image_settings_name) 70 | image_name = project_configuration.get_value(image_settings_name) 71 | 72 | workspace = get_or_create_workspace_from_project( 73 | project_configuration, show_output=show_output 74 | ) 75 | 76 | workspace_images = workspace.images 77 | if ( 78 | image_name in workspace_images 79 | and workspace_images[image_name].creation_state != "Failed" 80 | ): 81 | return workspace_images[image_name] 82 | 83 | image_create_start = time.time() 84 | image = ContainerImage.create( 85 | name=image_name, models=models, image_config=image_config, workspace=workspace 86 | ) 87 | image.wait_for_creation(show_output=show_output) 88 | assert image.creation_state != "Failed" 89 | if show_output: 90 | print_image_deployment_info(image, image_name, image_create_start) 91 | return image 92 | 93 | 94 | def print_deployment_time(service_name: str, deploy_start_time: float, service_id: str): 95 | """ 96 | Print the deployment time of the service so it can be captured in devops logs. 97 | 98 | :param service_name: 99 | :param deploy_start_time: 100 | :param service_id: 101 | """ 102 | deployment_time_secs = str(time.time() - deploy_start_time) 103 | print( 104 | f"Deployed {service_id} with name {service_name}. Took {deployment_time_secs} seconds." 105 | ) 106 | 107 | 108 | def print_image_deployment_info( 109 | image: Image, image_name: str, image_create_start: float 110 | ): 111 | """ 112 | Print general information about deploying an image. 113 | 114 | :param image: 115 | :param image_name: 116 | :param image_create_start: 117 | """ 118 | print_deployment_time(image_name, image_create_start, "Image") 119 | print(image.name) 120 | print(image.version) 121 | print(image.image_build_log_uri) 122 | 123 | 124 | def create_lightgbm_image_config( 125 | conda_file="lgbmenv.yml", execution_script="score.py", dependencies=None 126 | ) -> ContainerImageConfig: 127 | """ 128 | Image Configuration for running LightGBM in Azure Machine Learning Workspace 129 | 130 | :param conda_file: file name of LightGBM Conda Env File. This file is created if it does not exist. 131 | default: lgbmenv.yml 132 | :param execution_script: webservice file. default: score.py 133 | :param dependencies: Files required for image. 134 | :return: new image configuration for Machine Learning Workspace 135 | """ 136 | create_lightgbm_conda_file(conda_file) 137 | 138 | dockerfile = "dockerfile" 139 | with open(dockerfile, "w") as file: 140 | file.write( 141 | "RUN apt update -y && apt upgrade -y && apt install -y build-essential" 142 | ) 143 | 144 | with open("score.py", "w") as file: 145 | file.write( 146 | """ 147 | import json 148 | import logging 149 | 150 | 151 | def init(): 152 | logger = logging.getLogger("scoring_script") 153 | logger.info("init") 154 | 155 | 156 | def run(): 157 | logger = logging.getLogger("scoring_script") 158 | logger.info("run") 159 | return json.dumps({'call': True}) 160 | """ 161 | ) 162 | description = "Image with lightgbm model" 163 | tags = {"area": "text", "type": "lightgbm"} 164 | return ContainerImage.image_configuration( 165 | execution_script=execution_script, 166 | runtime="python", 167 | conda_file=conda_file, 168 | description=description, 169 | dependencies=dependencies, 170 | docker_file=dockerfile, 171 | tags=tags, 172 | ) 173 | 174 | 175 | def create_lightgbm_conda_file(conda_file: str = "lgbmenv.yml"): 176 | """ 177 | Create new Conda File with LightGBM requirements. 178 | 179 | :param conda_file: filename of LightGBM conda file, which is created during call. 180 | """ 181 | conda_pack = ["scikit-learn==0.19.1", "pandas==0.23.3"] 182 | requirements = [ 183 | "lightgbm==2.1.2", 184 | "azureml-defaults==1.0.57", 185 | "azureml-contrib-services", 186 | "Microsoft-AI-Azure-Utility-Samples", 187 | ] 188 | lgbmenv = CondaDependencies.create( 189 | conda_packages=conda_pack, pip_packages=requirements 190 | ) 191 | with open(conda_file, "w") as file: 192 | file.write(lgbmenv.serialize_to_string()) 193 | 194 | 195 | def lightgbm_test_image_locally(image: Image, directory: str): 196 | """ 197 | Test LightGBM image Locally. 198 | 199 | :param image: Machine Learning Image to test. 200 | :param directory: root directory that contains data directory. 201 | """ 202 | dupes_test = get_dupes_test(directory) 203 | text_to_score = dupes_test.iloc[0, 4] 204 | json_text = text_to_json(text_to_score) 205 | image.run(input_data=json_text) 206 | -------------------------------------------------------------------------------- /azure_utils/machine_learning/realtime/kubernetes.py: -------------------------------------------------------------------------------- 1 | """ 2 | AI-Utilities - kubernetes.py 3 | 4 | Copyright (c) Microsoft Corporation. All rights reserved. 5 | Licensed under the MIT License. 6 | """ 7 | 8 | import pandas as pd 9 | import requests 10 | from azureml.core.webservice import AksWebservice 11 | 12 | from azure_utils.utilities import text_to_json 13 | 14 | 15 | def test_aks(directory: str, aks_service: AksWebservice): 16 | """ 17 | Test AKS with sample call. 18 | 19 | :param directory: directory of data_folder with test data 20 | :param aks_service: AKS Web Service to Test 21 | """ 22 | num_dupes_to_score = 4 23 | 24 | dupes_test = get_dupes_test(directory) 25 | text_to_score = dupes_test.iloc[0, num_dupes_to_score] 26 | 27 | json_text = text_to_json(text_to_score) 28 | 29 | scoring_url = aks_service.scoring_uri 30 | api_key = aks_service.get_keys()[0] 31 | 32 | headers = { 33 | "content-type": "application/json", 34 | "Authorization": ("Bearer " + api_key), 35 | } 36 | requests.post( 37 | scoring_url, data=json_text, headers=headers 38 | ) # Run the request twice since the first time takes a 39 | r = requests.post( 40 | scoring_url, data=json_text, headers=headers 41 | ) # little longer due to the loading of the model 42 | print(r) 43 | 44 | dupes_to_score = dupes_test.iloc[:5, num_dupes_to_score] 45 | 46 | text_data = list(map(text_to_json, dupes_to_score)) # Retrieve the text data 47 | for text in text_data: 48 | r = requests.post(scoring_url, data=text, headers=headers) 49 | print(r) 50 | 51 | 52 | def get_dupes_test(directory: str) -> pd.DataFrame: 53 | """ 54 | Load Duplicate Test CSV into Pandas Dataframe. 55 | 56 | :param directory: root directory of data_folder 57 | :return: pd.DataFrame from the loaded csv 58 | """ 59 | dupes_test_path = directory + "/data_folder/dupes_test.tsv" 60 | dupes_test = pd.read_csv(dupes_test_path, sep="\t", encoding="latin1") 61 | return dupes_test 62 | -------------------------------------------------------------------------------- /azure_utils/machine_learning/register_datastores.py: -------------------------------------------------------------------------------- 1 | """ 2 | ado-ml-batch-train - machine_learning/register_datastores.py 3 | 4 | Copyright (c) Microsoft Corporation. All rights reserved. 5 | Licensed under the MIT License. 6 | """ 7 | from azureml.core import Datastore, Workspace 8 | from azureml.data.azure_sql_database_datastore import AzureSqlDatabaseDatastore 9 | from azureml.data.azure_storage_datastore import AzureBlobDatastore 10 | 11 | 12 | def register_blob_datastore( 13 | workspace: Workspace, 14 | blob_datastore_name: str, 15 | container_name: str, 16 | account_name: str, 17 | account_key: str, 18 | datastore_rg: str, 19 | ) -> AzureBlobDatastore: 20 | """ 21 | Register a Blob Storage Account with the Azure Machine Learning Workspace 22 | 23 | :param workspace: Azure Machine Learning Workspace 24 | :param blob_datastore_name: Name for blob datastore 25 | :param container_name: Name for blob container 26 | :param account_name: Name for blob account 27 | :param account_key: Blob Account Key using for auth 28 | :param datastore_rg: Resource Group containing Azure Storage Account 29 | :return: Pointer to Azure Machine Learning Blob Datastore 30 | """ 31 | return Datastore.register_azure_blob_container( 32 | workspace=workspace, 33 | datastore_name=blob_datastore_name, 34 | container_name=container_name, 35 | account_name=account_name, 36 | account_key=account_key, 37 | resource_group=datastore_rg, 38 | overwrite=True, 39 | ) 40 | 41 | 42 | def register_sql_datastore( 43 | workspace: Workspace, 44 | sql_datastore_name: str, 45 | sql_server_name: str, 46 | sql_database_name: str, 47 | sql_username: str, 48 | sql_password: str, 49 | ) -> AzureSqlDatabaseDatastore: 50 | """ 51 | Register a Azure SQL DB with the Azure Machine Learning Workspace 52 | 53 | :param workspace: Azure Machine Learning Workspace 54 | :param sql_datastore_name: Name used to id the SQL Datastore 55 | :param sql_server_name: Azure SQL Server Name 56 | :param sql_database_name: Azure SQL Database Name 57 | :param sql_username: Azure SQL Database Username 58 | :param sql_password: Azure SQL Database Password 59 | :return: Pointer to Azure Machine Learning SQL Datastore 60 | """ 61 | return Datastore.register_azure_sql_database( 62 | workspace=workspace, 63 | datastore_name=sql_datastore_name, 64 | server_name=sql_server_name, 65 | database_name=sql_database_name, 66 | username=sql_username, 67 | password=sql_password, 68 | ) 69 | -------------------------------------------------------------------------------- /azure_utils/machine_learning/templates/webtest.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#", 3 | "contentVersion": "1.0.0.0", 4 | "parameters": { 5 | "appName": { 6 | "type": "string" 7 | }, 8 | "pingURL": { 9 | "type": "string" 10 | }, 11 | "pingToken": { 12 | "type": "string" 13 | }, 14 | "pingText": { 15 | "type": "string", 16 | "defaultValue": "" 17 | }, 18 | "location": { 19 | "type": "string" 20 | }, 21 | "pingTestName": { 22 | "type": "string" 23 | } 24 | }, 25 | "variables": { 26 | "pingTestName": "[toLower(parameters('pingTestName'))]" 27 | }, 28 | "resources": [ 29 | { 30 | "name": "[variables('pingTestName')]", 31 | "type": "Microsoft.Insights/webtests", 32 | "apiVersion": "2014-04-01", 33 | "location": "[parameters('location')]", 34 | "tags": { 35 | "[concat('hidden-link:', resourceId('Microsoft.Insights/components', parameters('appName')))]": "Resource" 36 | }, 37 | "properties": { 38 | "Name": "[variables('pingTestName')]", 39 | "Description": "Basic ping test", 40 | "Enabled": true, 41 | "Frequency": 300, 42 | "Timeout": 120, 43 | "Kind": "ping", 44 | "RetryEnabled": true, 45 | "Locations": [ 46 | { 47 | "Id": "us-va-ash-azr" 48 | }, 49 | { 50 | "Id": "emea-nl-ams-azr" 51 | }, 52 | { 53 | "Id": "apac-jp-kaw-edge" 54 | } 55 | ], 56 | "Configuration": { 57 | "WebTest": "[concat('
')]" 58 | }, 59 | "SyntheticMonitorId": "[variables('pingTestName')]" 60 | } 61 | } 62 | ] 63 | } -------------------------------------------------------------------------------- /azure_utils/machine_learning/train_local.py: -------------------------------------------------------------------------------- 1 | """ 2 | AI-Utilities - train_local.py 3 | 4 | Copyright (c) Microsoft Corporation. All rights reserved. 5 | Licensed under the MIT License. 6 | 7 | 8 | coding: utf-8 9 | 10 | # Train Locally 11 | In this notebook, you will perform the following using Azure Machine Learning. 12 | * Load workspace. 13 | * Configure & execute a local run in a user-managed Python environment. 14 | * Configure & execute a local run in a system-managed Python environment. 15 | * Configure & execute a local run in a Docker environment. 16 | # * Register model for operationalization. 17 | """ 18 | 19 | import os 20 | import sys 21 | 22 | from azureml.core.runconfig import RunConfiguration 23 | 24 | 25 | def get_or_create_model_driver(train_py: str = "create_model.py"): 26 | """ Create Model Script for LightGBM with Stack Overflow Data """ 27 | if not os.path.isfile(f"script/{train_py}"): 28 | os.makedirs("script", exist_ok=True) 29 | 30 | create_model_py = ( 31 | "from azure_utils.machine_learning import create_model\n\nif __name__ == '__main__':\n " 32 | "create_model.main() " 33 | ) 34 | with open(train_py, "w") as file: 35 | file.write(create_model_py) 36 | 37 | 38 | def get_local_run_configuration() -> RunConfiguration: 39 | """ 40 | Get Local Run Config 41 | 42 | :return: 43 | """ 44 | # Editing a run configuration property on-fly. 45 | run_config_user_managed = RunConfiguration() 46 | run_config_user_managed.environment.python.user_managed_dependencies = True 47 | # Choose the specific Python environment of this tutorial by pointing to the Python path 48 | run_config_user_managed.environment.python.interpreter_path = sys.executable 49 | return run_config_user_managed 50 | -------------------------------------------------------------------------------- /azure_utils/machine_learning/training_arg_parsers.py: -------------------------------------------------------------------------------- 1 | """ 2 | AI-Utilities - training_arg_parsers.py 3 | 4 | Copyright (c) Microsoft Corporation. All rights reserved. 5 | Licensed under the MIT License. 6 | """ 7 | import argparse 8 | import os 9 | from argparse import Namespace 10 | 11 | import numpy as np 12 | from PIL import Image, ImageOps 13 | from azureml.contrib.services.aml_response import AMLResponse 14 | from keras.applications.imagenet_utils import decode_predictions, preprocess_input 15 | from keras.preprocessing import image 16 | from toolz import compose 17 | 18 | 19 | def get_training_parser() -> Namespace: 20 | """ 21 | Argument Parser for Training Model Scripts 22 | 23 | :return: parsed args 24 | """ 25 | parser = argparse.ArgumentParser( 26 | description="Fit and evaluate a model based on train-test datasets." 27 | ) 28 | parser.add_argument("--outputs", help="the outputs directory", default="outputs") 29 | parser.add_argument("--model", help="the model file", default="model.pkl") 30 | return parser.parse_args() 31 | 32 | 33 | NUMBER_RESULTS = 3 34 | 35 | 36 | def get_model_path(model_pkl: str = "model.pkl"): 37 | """ 38 | Get Model Path either locally or in web service 39 | 40 | :param model_pkl: filename of file 41 | :return: Model Directory 42 | """ 43 | model_dir = "outputs" 44 | if os.getenv("AZUREML_MODEL_DIR"): 45 | model_dir = os.getenv("AZUREML_MODEL_DIR") 46 | assert os.path.isfile(model_dir + "/" + model_pkl), """Model not found.""" 47 | return model_dir + "/" + model_pkl 48 | 49 | 50 | def image_ref_to_pil_image(image_ref: str): 51 | """ Load image with PIL (RGB) """ 52 | return Image.open(image_ref).convert("RGB") 53 | 54 | 55 | def pil_to_numpy(pil_image): 56 | """ 57 | 58 | :param pil_image: 59 | :return: 60 | """ 61 | img = ImageOps.fit(pil_image, (224, 224), Image.ANTIALIAS) 62 | img = image.img_to_array(img) 63 | return img 64 | 65 | 66 | def default_response(request) -> AMLResponse: 67 | """ 68 | 69 | :param request: 70 | :return: 71 | """ 72 | if request.method == "GET": 73 | return AMLResponse({"azEnvironment": "Azure"}, 201) 74 | return AMLResponse("bad request", 500) 75 | 76 | 77 | def prepare_response(preds, transformed_dict): 78 | """ 79 | 80 | :param preds: 81 | :param transformed_dict: 82 | :return: 83 | """ 84 | preds = decode_predictions(preds.astype(np.float64), top=NUMBER_RESULTS) 85 | return dict(zip(transformed_dict.keys(), preds)) 86 | 87 | 88 | def process_request(request): 89 | """ 90 | 91 | :param request: 92 | :return: 93 | """ 94 | transform_input = compose(pil_to_numpy, image_ref_to_pil_image) 95 | transformed_dict = { 96 | key: transform_input(img_ref) for key, img_ref in request.files.items() 97 | } 98 | img_array = preprocess_input(np.stack(list(transformed_dict.values()))) 99 | return img_array, transformed_dict 100 | -------------------------------------------------------------------------------- /azure_utils/machine_learning/utils.py: -------------------------------------------------------------------------------- 1 | """ 2 | ado-ml-batch-train - machine_learning/utils.py 3 | 4 | Copyright (c) Microsoft Corporation. All rights reserved. 5 | Licensed under the MIT License. 6 | """ 7 | import json 8 | import os 9 | from typing import Union 10 | 11 | import azureml.core 12 | import yaml 13 | from azureml.core import Workspace 14 | from azureml.core.authentication import ( 15 | InteractiveLoginAuthentication, 16 | ServicePrincipalAuthentication, 17 | ) 18 | from deprecated import deprecated 19 | 20 | from azure_utils import directory 21 | from azure_utils.configuration.notebook_config import project_configuration_file 22 | from azure_utils.configuration.project_configuration import ProjectConfiguration 23 | 24 | 25 | @deprecated( 26 | version="0.2.8", 27 | reason="Switch to using ProjectConfiguration, this will be removed in 0.4.0", 28 | ) 29 | def load_configuration(configuration_file: str): 30 | """ 31 | Load the Workspace Configuration File. 32 | 33 | The workspace configuration file is used to protect against putting passwords within the code, or source control. 34 | To create the configuration file, make a copy of sample_workspace.conf named "workspace_conf.yml" and fill in 35 | each field. 36 | This file is set to in the .gitignore to prevent accidental comments. 37 | 38 | :param configuration_file: File Path to configuration yml 39 | :return: Returns the parameters needed to configure the AML Workspace and Experiments 40 | :rtype: Union[Dict[Hashable, Any], list, None], str, str, str, str, Workspace, str, str 41 | """ 42 | if not os.path.isfile(configuration_file): 43 | configuration_file = directory + "/../sample_workspace_conf.yml" 44 | 45 | with open(configuration_file) as ymlfile: 46 | cfg = yaml.safe_load(ymlfile) 47 | 48 | return cfg 49 | 50 | 51 | def get_or_create_workspace( 52 | workspace_name: str, 53 | subscription_id: str, 54 | resource_group: str, 55 | workspace_region: str, 56 | auth: Union[ 57 | InteractiveLoginAuthentication, ServicePrincipalAuthentication 58 | ] = InteractiveLoginAuthentication(), 59 | log=True, 60 | ) -> Workspace: 61 | """ 62 | Create a new Azure Machine Learning workspace. If the workspace already exists, the existing workspace will be 63 | returned. Also create a CONFIG file to quickly reload the workspace. 64 | 65 | This uses the :class:`azureml.core.authentication.InteractiveLoginAuthentication` or will default to use the 66 | :class:`azureml.core.authentication.AzureCliAuthentication` for logging into Azure. 67 | 68 | Run az login from the CLI in the project directory to avoid authentication when running the program. 69 | 70 | :param workspace_name: Name of Azure Machine Learning Workspace to get or create within the Azure Subscription 71 | :type workspace_name: str 72 | :param subscription_id: Azure subscription id 73 | Azure Subscription ID 74 | :type subscription_id: str 75 | :param resource_group: Azure Resource Group to get or create the workspace within. If the resource group does not 76 | exist it will be created. 77 | :type resource_group: str 78 | :param workspace_region: The Azure region to deploy the workspace. 79 | :type workspace_region: str 80 | :param auth: Derived classes provide different means to authenticate and acquire a token based on their targeted 81 | use case. 82 | For examples of authentication, see https://aka.ms/aml-notebook-auth. 83 | :type auth: azureml.core.authentication.AbstractAuthentication 84 | :param log: enable print output 85 | :return: Returns a :class:`azureml.core.Workspace` object, a pointer to Azure Machine Learning Workspace 86 | Learning Workspace 87 | :rtype: azureml.core.Workspace 88 | """ 89 | if log: 90 | print("AML SDK Version:", azureml.core.VERSION) 91 | 92 | workspace = Workspace.create( 93 | name=workspace_name, 94 | subscription_id=subscription_id, 95 | resource_group=resource_group, 96 | location=workspace_region, 97 | auth=auth, 98 | exist_ok=True, 99 | ) 100 | 101 | workspace.write_config() 102 | 103 | if log: 104 | ws_json = json.dumps(workspace.get_details(), indent=2) 105 | print(ws_json) 106 | 107 | return workspace 108 | 109 | 110 | def get_or_create_workspace_from_project( 111 | project_configuration: ProjectConfiguration, 112 | auth: Union[ 113 | InteractiveLoginAuthentication, ServicePrincipalAuthentication 114 | ] = InteractiveLoginAuthentication(), 115 | show_output=True, 116 | ) -> Workspace: 117 | """ 118 | Create a new Azure Machine Learning workspace. If the workspace already exists, the existing workspace will be 119 | returned. Also create a CONFIG file to quickly reload the workspace. 120 | 121 | This uses the :class:`azureml.core.authentication.InteractiveLoginAuthentication` or will default to use the 122 | 123 | :class:`azureml.core.authentication.AzureCliAuthentication` for logging into Azure. 124 | 125 | Run az login from the CLI in the project directory to avoid authentication when running the program. 126 | 127 | :param project_configuration: Project Configuration Container 128 | :param auth: Derived classes provide different means to authenticate and acquire a token based on their targeted 129 | use case. 130 | For examples of authentication, see https://aka.ms/aml-notebook-auth. 131 | :type auth: azureml.core.authentication.AbstractAuthentication 132 | :param show_output: enable print output 133 | :return: Returns a :class:`azureml.core.Workspace` object, a pointer to Azure Machine Learning Workspace 134 | Learning Workspace 135 | """ 136 | return get_or_create_workspace( 137 | project_configuration.get_value("workspace_name"), 138 | project_configuration.get_value("subscription_id"), 139 | project_configuration.get_value("resource_group"), 140 | project_configuration.get_value("workspace_region"), 141 | auth=auth, 142 | log=show_output, 143 | ) 144 | 145 | 146 | def get_or_create_workspace_from_file( 147 | configuration_file: str = project_configuration_file, 148 | auth: Union[ 149 | InteractiveLoginAuthentication, ServicePrincipalAuthentication 150 | ] = InteractiveLoginAuthentication(), 151 | log=True, 152 | ) -> Workspace: 153 | """ 154 | Create a new Azure Machine Learning workspace. If the workspace already exists, the existing workspace will be 155 | returned. Also create a CONFIG file to quickly reload the workspace. 156 | 157 | This uses the :class:`azureml.core.authentication.InteractiveLoginAuthentication` or will default to use the 158 | 159 | :class:`azureml.core.authentication.AzureCliAuthentication` for logging into Azure. 160 | 161 | Run az login from the CLI in the project directory to avoid authentication when running the program. 162 | 163 | :param configuration_file: File path to project configuration file. default: ../project.yml 164 | :param auth: Derived classes provide different means to authenticate and acquire a token based on their targeted 165 | use case. 166 | For examples of authentication, see https://aka.ms/aml-notebook-auth. 167 | :param log: enable print output 168 | :type auth: azureml.core.authentication.AbstractAuthentication 169 | :return: Returns a :class:`azureml.core.Workspace` object, a pointer to Azure Machine Learning Workspace 170 | Learning Workspace 171 | """ 172 | project_configuration = ProjectConfiguration(configuration_file) 173 | 174 | return get_or_create_workspace( 175 | project_configuration.get_value("workspace_name"), 176 | project_configuration.get_value("subscription_id"), 177 | project_configuration.get_value("resource_group"), 178 | project_configuration.get_value("workspace_region"), 179 | auth=auth, 180 | log=log, 181 | ) 182 | 183 | 184 | def get_workspace_from_config() -> Workspace: 185 | """ 186 | Retrieve an AML Workspace from a previously saved configuration 187 | 188 | :return: Azure Machine Learning Workspace 189 | :rtype: azureml.core.Workspace 190 | """ 191 | return Workspace.from_config() 192 | -------------------------------------------------------------------------------- /azure_utils/notebook_widgets/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | AI-Utilities - __init__.py 3 | 4 | Copyright (c) Microsoft Corporation. All rights reserved. 5 | Licensed under the MIT License. 6 | """ 7 | -------------------------------------------------------------------------------- /azure_utils/notebook_widgets/workspace_widget.py: -------------------------------------------------------------------------------- 1 | """ 2 | AI-Utilities - azure_utils/notebook_widgets/workspace_widget.py 3 | 4 | Copyright (c) Microsoft Corporation. All rights reserved. 5 | Licensed under the MIT License. 6 | """ 7 | from ipywidgets import widgets 8 | 9 | 10 | def make_vbox(model_dict: dict) -> widgets.VBox: 11 | """ 12 | 13 | :param model_dict: 14 | :return: 15 | """ 16 | labels = [] 17 | for k in model_dict: 18 | if type(model_dict[k]) is not dict: 19 | string = str(model_dict[k]) 20 | labels.append(make_setting_hbox(k, string)) 21 | else: 22 | mini_labels = [] 23 | mini_dic = model_dict[k] 24 | if mini_dic and mini_dic is not dict: 25 | for mini_k in mini_dic: 26 | string = str(mini_dic[mini_k]) 27 | mini_labels.append(make_setting_hbox(mini_k, string)) 28 | mini_model_accordion = widgets.Accordion( 29 | children=[widgets.VBox(mini_labels)] 30 | ) 31 | mini_model_accordion.set_title(0, k) 32 | labels.append(mini_model_accordion) 33 | 34 | model_widget = widgets.VBox(labels) 35 | return widgets.VBox(children=[model_widget]) 36 | 37 | 38 | def make_setting_hbox(mini_k: str, string: str) -> widgets.HBox: 39 | """ 40 | 41 | :param mini_k: 42 | :param string: 43 | :return: 44 | """ 45 | return widgets.HBox( 46 | [widgets.HTML(value="" + mini_k + ":"), widgets.Label(string)] 47 | ) 48 | 49 | 50 | def make_workspace_widget(model_dict: dict, aks_dict: dict) -> widgets.Widget: 51 | """ 52 | 53 | :param model_dict: 54 | :param aks_dict: 55 | :return: 56 | """ 57 | 58 | ws_image = widgets.HTML( 59 | value='' 60 | ) 61 | model_vbox = make_vbox(model_dict) 62 | aks_box = make_vbox(aks_dict) 63 | 64 | deployment_accordion = widgets.Accordion(children=[ws_image, model_vbox]) 65 | deployment_accordion.set_title(0, "Workspace") 66 | deployment_accordion.set_title(1, "Model") 67 | 68 | application_insights_images = [ 69 | widgets.HTML( 70 | value='' 72 | ), 73 | widgets.HTML( 74 | value='' 76 | ), 77 | widgets.HTML( 78 | value='' 80 | ), 81 | widgets.HTML( 82 | value='' 84 | ), 85 | ] 86 | application_insights_accordion = widgets.Accordion( 87 | children=application_insights_images 88 | ) 89 | application_insights_accordion.set_title(0, "Main") 90 | application_insights_accordion.set_title(1, "Availability") 91 | application_insights_accordion.set_title(2, "Performance") 92 | application_insights_accordion.set_title(3, "Load Testing") 93 | 94 | kubernetes_image = widgets.HTML( 95 | value='' 96 | ) 97 | kubernetes_accordion = widgets.Accordion(children=[aks_box, kubernetes_image]) 98 | kubernetes_accordion.set_title(0, "Main") 99 | kubernetes_accordion.set_title(1, "Performance") 100 | 101 | tab_nest = widgets.Tab() 102 | tab_nest.children = [ 103 | deployment_accordion, 104 | kubernetes_accordion, 105 | application_insights_accordion, 106 | ] 107 | tab_nest.set_title(0, "ML Studio") 108 | tab_nest.set_title(1, "Kubernetes") 109 | tab_nest.set_title(2, "Application Insights") 110 | return tab_nest 111 | -------------------------------------------------------------------------------- /azure_utils/rts_estimator.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ResNet152 model for Keras. 3 | 4 | # Reference: 5 | 6 | - [Deep Residual Learning for Image Recognition](https://arxiv.org/abs/1512.03385) 7 | 8 | Adaptation of code from flyyufelix, mvoelk, BigMoyan, fchollet at https://github.com/adamcasson/resnet152 9 | 10 | """ 11 | from typing import Any 12 | 13 | from azureml.contrib.services.aml_response import AMLResponse 14 | from azureml.core import Model 15 | 16 | 17 | class RTSEstimator: 18 | """Estimator for Real-time Scoring""" 19 | 20 | def predict(self, request) -> AMLResponse: 21 | """ 22 | 23 | :param request: 24 | """ 25 | raise NotImplementedError 26 | 27 | def load_model(self): 28 | """ 29 | Abstract Method for load model 30 | """ 31 | raise NotImplementedError 32 | 33 | def save_model(self, path: str): 34 | """ 35 | Abstract Method for Save Model 36 | """ 37 | raise NotImplementedError 38 | 39 | def train(self): 40 | """ 41 | Abstract Method for Train Model 42 | """ 43 | raise NotImplementedError 44 | 45 | def create_model( 46 | self, 47 | include_top: bool = True, 48 | weights: str = None, 49 | input_tensor: Any = None, 50 | input_shape: Any = None, 51 | large_input: bool = False, 52 | pooling: Any = None, 53 | classes: int = 1000, 54 | save_model: bool = False, 55 | model_path: str = None, 56 | ) -> Model: 57 | """ 58 | Abstract Method for Create Model 59 | 60 | :param include_top: 61 | :param weights: 62 | :param input_tensor: 63 | :param input_shape: 64 | :param large_input: 65 | :param pooling: 66 | :param classes: 67 | :param save_model: 68 | :param model_path: 69 | """ 70 | raise NotImplementedError 71 | -------------------------------------------------------------------------------- /azure_utils/samples/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | AI-Utilities - __init__.py 3 | 4 | Copyright (c) Microsoft Corporation. All rights reserved. 5 | Licensed under the MIT License. 6 | """ 7 | -------------------------------------------------------------------------------- /azure_utils/utilities.py: -------------------------------------------------------------------------------- 1 | """ 2 | ai-utilities - azure_utils/utilities.py 3 | 4 | Copyright (c) Microsoft Corporation. All rights reserved. 5 | Licensed under the MIT License. 6 | """ 7 | import gzip 8 | import json 9 | import logging 10 | import math 11 | import os 12 | import re 13 | 14 | import pandas as pd 15 | import requests 16 | from azureml.core.authentication import ( 17 | AuthenticationException, 18 | AzureCliAuthentication, 19 | InteractiveLoginAuthentication, 20 | ServicePrincipalAuthentication, 21 | AbstractAuthentication, 22 | ) 23 | from dotenv import get_key 24 | 25 | 26 | def check_login() -> bool: 27 | """ 28 | 29 | :return: 30 | """ 31 | try: 32 | os.popen("az account show") 33 | return True 34 | except OSError: 35 | return False 36 | 37 | 38 | def read_csv_gz(url, **kwargs): 39 | """Load raw data from a .tsv.gz file into Pandas data frame.""" 40 | dataframe = pd.read_csv( 41 | gzip.open(requests.get(url, stream=True).raw), 42 | sep="\t", 43 | encoding="utf8", 44 | **kwargs 45 | ) 46 | return dataframe.set_index("Id") 47 | 48 | 49 | def clean_text(text): 50 | """Remove embedded code chunks, HTML tags and links/URLs.""" 51 | if not isinstance(text, str): 52 | return text 53 | text = re.sub(r"
.*?
", "", text) 54 | text = re.sub(r"]+>(.*)", replace_link, text) 55 | return re.sub(r"<[^>]+>", "", text) 56 | 57 | 58 | def replace_link(match): 59 | """ 60 | 61 | :param match: 62 | :return: 63 | """ 64 | if re.match(r"[a-z]+://", match.group(1)): 65 | return "" 66 | return match.group(1) 67 | 68 | 69 | def round_sample(input_dataframe, frac=0.1, min_samples=1): 70 | """Sample X ensuring at least min samples are selected.""" 71 | num_samples = max(min_samples, math.floor(len(input_dataframe) * frac)) 72 | return input_dataframe.sample(num_samples) 73 | 74 | 75 | def round_sample_strat(input_dataframe, strat, **kwargs): 76 | """Sample X ensuring at least min samples are selected.""" 77 | return input_dataframe.groupby(strat).apply(round_sample, **kwargs) 78 | 79 | 80 | def random_merge( 81 | dataframe_a, dataframe_b, number_to_merge=20, merge_col="AnswerId", key="key", n="n" 82 | ): 83 | """Pair all rows of A with 1 matching row on "on" and N-1 random rows from B""" 84 | assert key not in dataframe_a and key not in dataframe_b 85 | dataframe_a_copy = dataframe_a.copy() 86 | dataframe_a_copy[key] = dataframe_a[merge_col] 87 | dataframe_b_copy = dataframe_b.copy() 88 | dataframe_b_copy[key] = dataframe_b[merge_col] 89 | match = dataframe_a_copy.merge(dataframe_b_copy, on=key).drop(key, axis=1) 90 | match[n] = 0 91 | df_list = [match] 92 | for i in dataframe_a.index: 93 | dataframe_a_copy = dataframe_a.loc[[i]] 94 | dataframe_b_copy = dataframe_b[ 95 | dataframe_b[merge_col] != dataframe_a_copy[merge_col].iloc[0] 96 | ].sample(number_to_merge - 1) 97 | dataframe_a_copy[key] = 1 98 | dataframe_b_copy[key] = 1 99 | z = dataframe_a_copy.merge(dataframe_b_copy, how="outer", on=key).drop( 100 | key, axis=1 101 | ) 102 | z[n] = range(1, number_to_merge) 103 | df_list.append(z) 104 | return pd.concat(df_list, ignore_index=True) 105 | 106 | 107 | def text_to_json(text): 108 | """ 109 | 110 | :param text: 111 | :return: 112 | """ 113 | return json.dumps({"input": "{0}".format(text)}) 114 | 115 | 116 | def get_auth(env_path: str) -> AbstractAuthentication: 117 | """ 118 | 119 | :param env_path: 120 | :return: 121 | """ 122 | logger = logging.getLogger(__name__) 123 | if get_key(env_path, "password") != "YOUR_SERVICE_PRINCIPAL_PASSWORD": 124 | logger.debug("Trying to create Workspace with Service Principal") 125 | aml_sp_password = get_key(env_path, "password") 126 | aml_sp_tennant_id = get_key(env_path, "tenant_id") 127 | aml_sp_username = get_key(env_path, "username") 128 | auth = ServicePrincipalAuthentication( 129 | tenant_id=aml_sp_tennant_id, 130 | service_principal_id=aml_sp_username, 131 | service_principal_password=aml_sp_password, 132 | ) 133 | else: 134 | logger.debug("Trying to create Workspace with CLI Authentication") 135 | try: 136 | auth = AzureCliAuthentication() 137 | auth.get_authentication_header() 138 | except AuthenticationException: 139 | logger.debug("Trying to create Workspace with Interactive login") 140 | auth = InteractiveLoginAuthentication() 141 | 142 | return auth 143 | -------------------------------------------------------------------------------- /docs/Configuration_ReadMe.md: -------------------------------------------------------------------------------- 1 | # Configuration 2 | Obtaining user information is critical to any project you will produce. At a minimum it is required to get an Azure Subscription but often it is important to collect many settings for a project to be successful. 3 | 4 | In the past we have used a combination of dotenv and cookiecutter. While these can be extremely useful, this configuration code provides a user interface for the user to provide information in a yml file. 5 | 6 | The structure of the yml file is as follows: 7 | 8 | ``` 9 | project_name: AI Default Project 10 | settings: 11 | - subscription_id: 12 | - description: Azure Subscription Id 13 | - value: <> 14 | - resource_group: 15 | - description: Azure Resource Group Name 16 | - value: <> 17 | [etc, continue adding settings as needed ] 18 | ``` 19 | 20 | ### Scripts 21 | |Name|Description| 22 | |------|------| 23 | |configuration.py|Contains a class called ProjectConfiguration. This class manages reading/writing the configuration settings file.| 24 | |configurationui.py|Contains a class called SettingsUpdate. This class reads any valid configuration file as defined by the yml structure. It dynamically builds a tkinter UI displaying the description of each setting and the ability for the user to input new values.| 25 | |config_tests.py|Unit tests for ProjectConfiguration.| 26 | |notebook_config.py|Provides a function to add into IPython Notebooks to simply add in UI driven configuration settings collection from the end user.| 27 | |exampleconfiguration.ipynb|Example IPython Notebook that utilizes the configuration settings objects.| -------------------------------------------------------------------------------- /docs/DEVELOPMENT_README.md: -------------------------------------------------------------------------------- 1 | # Development Readme 2 | 3 | ## Developer Setup 4 | 1. Windows Local 5 | 1. Windows 10 6 | 1. Windows Subsystem for Linux 7 | 1. Anaconda 8 | 1. PyCharm 9 | 1. Plugins 10 | 1. Pylint 11 | 1. SonarLint 12 | 1. PUTVTs 13 | 1. Configurations 14 | 1. Code Style 15 | 16 | 17 | 18 | ### PyCharm Configuration 19 | 20 | #### Code Style 21 | Import code style which matches pylint/autopep8 from `.developer/pycharm_code_style.xml` 22 | ![Load Configuration](images/pycharm_import_code_style.png) 23 | 24 | #### Inspections 25 | This will offer to automatically fix common issues with code. 26 | Import code style which matches pylint/autopep8 from `.developer/pycharm_inspections.xml`) 27 | -------------------------------------------------------------------------------- /docs/app_insights_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/ai-utilities/b6c097dddff00ae4f7321da52653d9d6d8a94884/docs/app_insights_1.png -------------------------------------------------------------------------------- /docs/app_insights_availability.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/ai-utilities/b6c097dddff00ae4f7321da52653d9d6d8a94884/docs/app_insights_availability.png -------------------------------------------------------------------------------- /docs/app_insights_perf.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/ai-utilities/b6c097dddff00ae4f7321da52653d9d6d8a94884/docs/app_insights_perf.png -------------------------------------------------------------------------------- /docs/app_insights_perf_dash.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/ai-utilities/b6c097dddff00ae4f7321da52653d9d6d8a94884/docs/app_insights_perf_dash.png -------------------------------------------------------------------------------- /docs/conda_ui.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/ai-utilities/b6c097dddff00ae4f7321da52653d9d6d8a94884/docs/conda_ui.png -------------------------------------------------------------------------------- /docs/images/pycharm_import_code_style.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/ai-utilities/b6c097dddff00ae4f7321da52653d9d6d8a94884/docs/images/pycharm_import_code_style.png -------------------------------------------------------------------------------- /docs/kubernetes.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/ai-utilities/b6c097dddff00ae4f7321da52653d9d6d8a94884/docs/kubernetes.png -------------------------------------------------------------------------------- /docs/studio.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/ai-utilities/b6c097dddff00ae4f7321da52653d9d6d8a94884/docs/studio.png -------------------------------------------------------------------------------- /docs/tkinter_ui.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/ai-utilities/b6c097dddff00ae4f7321da52653d9d6d8a94884/docs/tkinter_ui.png -------------------------------------------------------------------------------- /environment.yml: -------------------------------------------------------------------------------- 1 | name: ai-utilities 2 | channels: 3 | - conda-forge 4 | - defaults 5 | dependencies: 6 | - absl-py=0.9.0 7 | - ansiwrap=0.8.4 8 | - appdirs=1.4.3 9 | - astor=0.7.1 10 | - atomicwrites=1.3.0 11 | - attrs=19.3.0 12 | - backcall=0.1.0 13 | - black=19.10b0 14 | - blas=1.0 15 | - bleach 16 | - ca-certificates=2019.11.28 17 | - certifi=2019.11.28 18 | - cffi=1.14.0 19 | - chardet=3.0.4 20 | - click=7.1.1 21 | - colorama=0.4.3 22 | - cryptography=2.8 23 | - decorator=4.4.2 24 | - defusedxml=0.6.0 25 | - deprecated=1.2.7 26 | - entrypoints=0.3 27 | - freetype=2.10.0 28 | - gast=0.3.3 29 | - grpcio=1.23.0 30 | - h5py=2.10.0 31 | - hdf5=1.10.5 32 | - idna=2.9 33 | - importlib-metadata=1.5.0 34 | - intel-openmp=2019.4 35 | - ipython=7.13.0 36 | - ipython_genutils=0.2.0 37 | - ipywidgets=7.5.1 38 | - jedi=0.16.0 39 | - jinja2=2.11.1 40 | - joblib=0.14.1 41 | - jpeg=9 42 | - jsonschema=3.2.0 43 | - jupyter_client=6.0.0 44 | - jupyter_core=4.6.3 45 | - keras=2.3.1 46 | - lightgbm=2.3.1 47 | - lz4-c=1.8.3 48 | - mako=1.1.0 49 | - markdown=3.2.1 50 | - markupsafe=1.1.1 51 | - mistune=0.8.4 52 | - mkl=2019.4 53 | - mkl-service=2.3.0 54 | - more-itertools=8.2.0 55 | - mypy_extensions=0.4.3 56 | - nbclient=0.1.0 57 | - nbconvert=5.6.1 58 | - nbformat=5.0.4 59 | - nb_conda_kernels 60 | - notebook=6.0.3 61 | - numpy=1.18.1 62 | - olefile=0.46 63 | - openssl=1.1.1d 64 | - packaging=20.1 65 | - pandas=1.0.2 66 | - pandoc=2.9.2 67 | - pandocfilters=1.4.2 68 | - papermill=2.0.0 69 | - parso=0.6.2 70 | - pathspec=0.7.0 71 | - pickleshare=0.7.5 72 | - pillow=7.0.0 73 | - pip=20.0.2 74 | - pluggy=0.13.0 75 | - prometheus_client=0.7.1 76 | - prompt-toolkit=3.0.4 77 | - protobuf=3.11.4 78 | - py=1.8.1 79 | - pycparser=2.20 80 | - pygments=2.6.1 81 | - pygpu=0.7.6 82 | - pyopenssl=19.1.0 83 | - pyparsing=2.4.6 84 | - pyrsistent=0.15.7 85 | - pysocks=1.7.1 86 | - pytest=5.4.1 87 | - pytest-cov 88 | - pytest-nunit 89 | - python=3.7.6 90 | - python-dateutil=2.8.1 91 | - python-dotenv=0.12.0 92 | - python_abi=3.7 93 | - pytz=2019.3 94 | - pyyaml=5.3 95 | - pyzmq=19.0.0 96 | - regex=2020.2.20 97 | - requests=2.23.0 98 | - scikit-learn=0.22.2.post1 99 | - scipy=1.3.1 100 | - send2trash=1.5.0 101 | - setuptools=46.0.0 102 | - six=1.14.0 103 | - sqlite=3.30.1 104 | - tenacity=6.1.0 105 | - tensorboard=1.13.1 106 | - tensorflow=1.13.1 107 | - tensorflow-estimator=1.13.0 108 | - termcolor=1.1.0 109 | - terminado=0.8.3 110 | - testpath=0.4.4 111 | - textwrap3=0.9.2 112 | - theano=1.0.4 113 | - tk=8.6.10 114 | - toml=0.10.0 115 | - tornado=6.0.4 116 | - toolz=0.10.0 117 | - tqdm=4.43.0 118 | - traitlets=4.3.3 119 | - typed-ast=1.4.1 120 | - typing_extensions=3.7.4.1 121 | - wcwidth=0.1.8 122 | - webencodings=0.5.1 123 | - werkzeug=1.0.0 124 | - wheel=0.34.2 125 | - widgetsnbextension=3.5.1 126 | - wrapt=1.12.1 127 | - xz=5.2.4 128 | - yaml=0.2.2 129 | - zeromq=4.3.2 130 | - zipp=3.1.0 131 | - zlib=1.2.11 132 | - zstd=1.4.4 133 | - pip: 134 | - adal==1.2.2 135 | - applicationinsights==0.11.9 136 | - azure-common==1.1.25 137 | - azure-core==1.3.0 138 | - azure-graphrbac==0.61.1 139 | - azure-identity==1.3.0 140 | - azure-mgmt-authorization==0.60.0 141 | - azure-mgmt-containerregistry==2.8.0 142 | - azure-mgmt-deploymentmanager==0.2.0 143 | - azure-mgmt-keyvault==2.1.1 144 | - azure-mgmt-resource==8.0.1 145 | - azure-mgmt-storage==8.0.0 146 | - azure-storage-blob==12.3.0 147 | - azureml-accel-models==1.1.5 148 | - azureml-automl-core==1.1.5.1 149 | - azureml-contrib-functions==1.1.5 150 | - azureml-contrib-services==1.1.5 151 | - azureml-core==1.1.5.3 152 | - azureml-dataprep==1.3.5 153 | - azureml-dataprep-native==14.1.0 154 | - azureml-pipeline==1.1.5 155 | - azureml-pipeline-core==1.1.5 156 | - azureml-pipeline-steps==1.1.5 157 | - azureml-telemetry==1.1.5.3 158 | - azureml-train-automl-client==1.1.5.1 159 | - azureml-train-core==1.1.5 160 | - azureml-train-restclients-hyperdrive==1.1.5 161 | - backports-tempfile==1.0 162 | - backports-weakref==1.0.post1 163 | - cloudpickle==1.3.0 164 | - contextlib2==0.6.0.post1 165 | - distro==1.4.0 166 | - docker==4.2.0 167 | - dotnetcore2==2.1.13 168 | - flake8==3.7.9 169 | - flask==1.1.1 170 | - isodate==0.6.0 171 | - itsdangerous==1.1.0 172 | - jeepney==0.4.3 173 | - jmespath==0.9.5 174 | - jsonpickle==1.3 175 | - junit-xml==1.9 176 | - mccabe==0.6.1 177 | - msal==1.1.0 178 | - msal-extensions==0.1.3 179 | - msrest==0.6.11 180 | - msrestazure==0.6.2 181 | - ndg-httpsclient==0.5.1 182 | - oauthlib==3.1.0 183 | - portalocker==1.5.2 184 | - pyasn1==0.4.8 185 | - pycodestyle==2.5.0 186 | - pylint-junit 187 | - pytest-nunit 188 | - pyflakes==2.1.1 189 | - pyjwt==1.7.1 190 | - requests-oauthlib==1.3.0 191 | - resnet==0.1 192 | - secretstorage==3.1.2 193 | - urllib3==1.25.8 194 | - websocket-client==0.57.0 195 | - wget==3.2 196 | -------------------------------------------------------------------------------- /environment_r.yml: -------------------------------------------------------------------------------- 1 | name: ai-utilities 2 | channels: 3 | - conda-forge 4 | - r 5 | dependencies: 6 | - python=3.6.2 7 | - pip 8 | - jupyter 9 | - pytest 10 | - pytest-cov 11 | - nb_conda_kernels 12 | - pylint 13 | - pandas 14 | - scikit-learn 15 | - numpy 16 | - urllib3 17 | - cudatoolkit==9.0 18 | - tensorflow-gpu==1.14.0 19 | - matplotlib 20 | - jupyter_contrib_nbextensions 21 | - ipywidgets 22 | - ipykernel 23 | - r-irkernel 24 | - papermill==2.0.0 25 | - nbconvert==5.6.1 26 | - nbformat==5.0.4 27 | - PyYAML==5.3 28 | - msrestazure==0.6.2 29 | - lightgbm==2.3.1 30 | - Deprecated==1.2.7 31 | - pip: 32 | - resnet 33 | - azure-core 34 | - azure-common 35 | - azureml-core==1.0.85 36 | - azureml-train==1.0.85 37 | - azureml-contrib-services==1.0.85 38 | - azureml-contrib-functions==1.0.85 39 | - pylint-junit 40 | - pytest-nunit 41 | - junit-xml 42 | - python-dotenv 43 | - Pillow==6.1.0 44 | - wget==3.2 45 | - toolz==0.9.0 46 | - tqdm==4.32.2 47 | - keras==2.2.4 48 | - locustio==0.11.0 49 | - prompt-toolkit==2.0.9 50 | - PyOpenSSL 51 | - nb-clean 52 | -------------------------------------------------------------------------------- /notebooks/AzureMachineLearningConfig.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "pycharm": { 7 | "name": "#%% md\n" 8 | } 9 | }, 10 | "source": [ 11 | "# Azure Machine Learning Workspace Configuration\n", 12 | "\n", 13 | "\n", 14 | "Copyright (c) Microsoft Corporation. All rights reserved.\n", 15 | "Licensed under the MIT License.\n", 16 | "\n", 17 | "# Installation and configuration\n", 18 | "This notebook configures the notebooks in this tutorial to connect to an Azure Machine Learning (AML) Workspace. \n", 19 | "You can use an existing workspace or create a new one.\n", 20 | "\n", 21 | "## Prerequisites\n", 22 | "\n", 23 | "If you have already completed the prerequisites and selected the correct Kernel for this notebook, the AML Python SDK \n", 24 | "is already installed.\n", 25 | "\n", 26 | "## Set up your Azure Machine Learning workspace\n", 27 | "## Load Configurations from file\n", 28 | "\n", 29 | "Configurations are loaded by default from a file `project.yml`, to prevent accident commits of Azure secrets into \n", 30 | "source control. This file name is included in the `.gitignore` to also prevent accident commits. A template file \n", 31 | "is included that should be copied, and each parameter filled in.\n", 32 | "\n", 33 | "If the file is not present, and UI Prompt will pop up to insert configurations, and save to the file.\n", 34 | "\n", 35 | "![alt text](https://raw.githubusercontent.com/microsoft/ai-architecture-template/master/docs/popup.png \"UI Prompt\")\n", 36 | "\n", 37 | "## Create the workspace\n", 38 | "This cell will also create an AML workspace for you in a subscription, provided you have the correct permissions.\n", 39 | "\n", 40 | "For more details about the individual workspace creation steps please see this \n", 41 | "[sample notebook](https://github.com/Azure/MachineLearningNotebooks/blob/master/configuration.ipynb).\n", 42 | "\n", 43 | "This will fail when:\n", 44 | "1. You do not have permission to create a workspace in the resource group\n", 45 | "1. You do not have permission to create a resource group if it's non-existing.\n", 46 | "1. You are not a subscription owner or contributor and no Azure ML workspaces have ever been created in this \n", 47 | "subscription\n", 48 | "\n", 49 | "If workspace creation fails, please work with your IT admin to provide you with the appropriate permissions or \n", 50 | "to provision the required resources. If this cell succeeds, you're done configuring AML!\n", 51 | "\n", 52 | "After creation we will check the details of the workspace." 53 | ] 54 | }, 55 | { 56 | "cell_type": "code", 57 | "execution_count": null, 58 | "metadata": { 59 | "pycharm": { 60 | "name": "#%%\n" 61 | }, 62 | "python_cell": true 63 | }, 64 | "outputs": [], 65 | "source": [ 66 | "from azure_utils.machine_learning.utils import get_or_create_workspace_from_file\n", 67 | "\n", 68 | "ws = get_or_create_workspace_from_file()" 69 | ] 70 | }, 71 | { 72 | "cell_type": "code", 73 | "execution_count": null, 74 | "metadata": { 75 | "pycharm": { 76 | "name": "#%% \n" 77 | }, 78 | "r_cell": true 79 | }, 80 | "outputs": [], 81 | "source": [ 82 | "source(\"R/get_or_create_workspace.r\")\n", 83 | "\n", 84 | "ws <- get_or_create_workspace(\"project.yml\")" 85 | ] 86 | }, 87 | { 88 | "cell_type": "markdown", 89 | "metadata": {}, 90 | "source": [ 91 | "You are now ready to move on to the next notebook todo: add notebook here " 92 | ] 93 | } 94 | ], 95 | "metadata": { 96 | "kernelspec": { 97 | "display_name": "R", 98 | "language": "R", 99 | "name": "ir" 100 | }, 101 | "language_info": { 102 | "codemirror_mode": "r", 103 | "file_extension": ".r", 104 | "mimetype": "text/x-r-source", 105 | "name": "R", 106 | "pygments_lexer": "r", 107 | "version": "3.6.3" 108 | }, 109 | "pycharm": { 110 | "stem_cell": { 111 | "cell_type": "raw", 112 | "source": [], 113 | "metadata": { 114 | "collapsed": false 115 | } 116 | } 117 | } 118 | }, 119 | "nbformat": 4, 120 | "nbformat_minor": 2 121 | } -------------------------------------------------------------------------------- /notebooks/R/get_or_create_workspace.r: -------------------------------------------------------------------------------- 1 | # Install Azure ML SDK from CRAN 2 | install.packages("azuremlsdk") 3 | 4 | # Then, use `install_azureml()` to install the compiled code from the AzureML Python SDK. 5 | azuremlsdk::install_azureml() 6 | 7 | library(azuremlsdk) 8 | library(yaml) 9 | 10 | get_or_create_workspace <- function(yaml_file) { 11 | 12 | configuration <- read_yaml(yaml_file) 13 | subscription_id <- configuration[['settings']][[1]][[1]][[2]][['value']] 14 | resource_group <- configuration[['settings']][[2]][[1]][[2]][['value']] 15 | ws_name <- configuration[['settings']][[3]][[1]][[2]][['value']] 16 | location <- configuration[['settings']][[4]][[1]][[2]][['value']] 17 | 18 | if (ws_name %in% names(list_workspaces(subscription_id = subscription_id))) { 19 | ws <- get_workspace(name = ws_name, 20 | subscription_id = subscription_id, 21 | resource_group = resource_group) 22 | } else { 23 | ws <- create_workspace(name = ws_name, 24 | subscription_id = subscription_id, 25 | resource_group = resource_group, 26 | location = location) 27 | } 28 | write_workspace_config(ws) 29 | return(ws) 30 | } -------------------------------------------------------------------------------- /notebooks/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | azure_utils - notebooks/__init__.py 3 | 4 | Copyright (c) Microsoft Corporation. All rights reserved. 5 | Licensed under the MIT License. 6 | """ 7 | -------------------------------------------------------------------------------- /notebooks/exampleconfiguration.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "Using the configuration library is simple. Import the configure_settings function and call it. The only requirement\n", 8 | "for successful collection is to have an existing project.yml file in the current directory of the notebooks. \n", 9 | "\n", 10 | "This example project has one, so we will call configure_settings from there. \n", 11 | "\n", 12 | "Further usage require us to load the settings. Instead of having the user have to import other libraries, we expose a\n", 13 | "second function from notebook_config called get_settings() which will return an instance of ProjectConfiguration. \n", 14 | "\n", 15 | "To complete this example, we will obtain an instance and print out settings values. " 16 | ] 17 | }, 18 | { 19 | "cell_type": "code", 20 | "execution_count": null, 21 | "metadata": {}, 22 | "outputs": [], 23 | "source": [ 24 | "from azure_utils.configuration.notebook_config import get_or_configure_settings" 25 | ] 26 | }, 27 | { 28 | "cell_type": "markdown", 29 | "metadata": {}, 30 | "source": [ 31 | "Now that the functions are imported, lets bring up the UI to configure the settings ONLY if the subscription_id \n", 32 | "setting has not been modified from it's original value of '<>'.\n", 33 | "\n", 34 | "![tkinter ui](https://raw.githubusercontent.com/microsoft/AI-Utilities/master/docs/tkinter_ui.png \"tkinter ui\")\n" 35 | ] 36 | }, 37 | { 38 | "cell_type": "code", 39 | "execution_count": null, 40 | "metadata": {}, 41 | "outputs": [], 42 | "source": [ 43 | "settings_object = get_or_configure_settings()" 44 | ] 45 | }, 46 | { 47 | "cell_type": "markdown", 48 | "metadata": {}, 49 | "source": [ 50 | "Finally, get an instance of the settings. You will do this in the main (configurable) notebook, and all follow on \n", 51 | "notebooks. \n", 52 | "\n", 53 | "From the default provided file we know the following settings are there.\n", 54 | "\n", 55 | "subscription_id, resource_group" 56 | ] 57 | }, 58 | { 59 | "cell_type": "code", 60 | "execution_count": null, 61 | "metadata": { 62 | "pycharm": { 63 | "name": "#%%\n" 64 | } 65 | }, 66 | "outputs": [], 67 | "source": [ 68 | "sub_id = settings_object.get_value('subscription_id')\n", 69 | "rsrc_grp = settings_object.get_value('resource_group')\n", 70 | "\n", 71 | "print(sub_id, rsrc_grp)" 72 | ] 73 | }, 74 | { 75 | "cell_type": "markdown", 76 | "source": [ 77 | "You have completed this sample notebook." 78 | ], 79 | "metadata": { 80 | "collapsed": false 81 | } 82 | }, 83 | { 84 | "cell_type": "markdown", 85 | "source": [ 86 | "You are now ready to move on to the [AutoML Local](01_DataPrep.ipynb) notebook." 87 | ], 88 | "metadata": { 89 | "collapsed": false 90 | } 91 | } 92 | ], 93 | "metadata": { 94 | "kernelspec": { 95 | "display_name": "Python [conda env:ai-utilities] *", 96 | "language": "python", 97 | "name": "ai-utilities" 98 | }, 99 | "language_info": { 100 | "codemirror_mode": { 101 | "name": "ipython", 102 | "version": 3 103 | }, 104 | "file_extension": ".py", 105 | "mimetype": "text/x-python", 106 | "name": "python", 107 | "nbconvert_exporter": "python", 108 | "pygments_lexer": "ipython3", 109 | "version": "3.6.2" 110 | }, 111 | "pycharm": { 112 | "stem_cell": { 113 | "cell_type": "raw", 114 | "source": [], 115 | "metadata": { 116 | "collapsed": false 117 | } 118 | } 119 | } 120 | }, 121 | "nbformat": 4, 122 | "nbformat_minor": 4 123 | } -------------------------------------------------------------------------------- /project_sample.yml: -------------------------------------------------------------------------------- 1 | projects: 2 | - project_name: AI Default Project 3 | settings: 4 | subscription_id: 5 | description: Azure Subscription Id 6 | value: <> 7 | resource_group: 8 | description: Azure Resource Group Name 9 | value: $(User)-rg 10 | workspace_name: 11 | description: Azure ML Workspace Name 12 | value: $(User)ws 13 | workspace_region: 14 | description: Azure ML Workspace Region 15 | value: eastus 16 | image_name: 17 | description: Docker Container Image Name 18 | value: $(User)image 19 | aks_service_name: 20 | description: AKS Service Name 21 | value: $(User)aksservice 22 | aks_name: 23 | description: AKS Cluster Name 24 | value: $(User)aks 25 | aks_location: 26 | description: AKS Azure Region 27 | value: eastus 28 | vm_size: 29 | description: skew of vms in Kubernetes cluster 30 | value: Standard_D4_v2 31 | node_count: 32 | description: number of nodes in Kubernetes cluster 33 | value: 4 34 | num_replicas: 35 | description: number of replicas in Kubernetes cluster 36 | value: 2 37 | cpu_cores: 38 | description: cpu cores for web service 39 | value: 1 40 | num_estimators: 41 | description: number of replicas in Kubernetes cluster 42 | value: 2 43 | experiment_name: 44 | description: cpu cores for web service 45 | value: "mlaks-train-on-local" 46 | script: 47 | description: cpu cores for web service 48 | value: "create_model.py" 49 | -------------------------------------------------------------------------------- /pytest.ini: -------------------------------------------------------------------------------- 1 | [pytest] 2 | junit_family=xunit1 3 | markers = 4 | mock 5 | smoke 6 | integration 7 | -------------------------------------------------------------------------------- /sample_workspace_conf.yml: -------------------------------------------------------------------------------- 1 | subscription_id: "<>" 2 | resource_group: "<>" 3 | workspace_name: "<>" 4 | workspace_region: "<>" 5 | image_name: "<>" 6 | sql_server_name: "<>" 7 | sql_database_name: "<>" 8 | sql_username: "<>" 9 | sql_password: "<>" 10 | datastore_rg: "<>" 11 | container_name: "<>" 12 | account_name: "<>" 13 | account_key: "<>" 14 | -------------------------------------------------------------------------------- /scripts/add_ssh_ip.py: -------------------------------------------------------------------------------- 1 | from azure.common.client_factory import get_client_from_cli_profile 2 | from azure.mgmt.network import NetworkManagementClient 3 | import argparse 4 | import logging 5 | import requests 6 | import sys 7 | import time 8 | 9 | 10 | logger = logging.getLogger(__name__) 11 | rule_name = 'CleanupTool-Allow-100' 12 | 13 | 14 | if __name__ == '__main__': 15 | 16 | parser = argparse.ArgumentParser(description="Add IP to SSH whitelist in CleanupTool Security Rule") 17 | parser.add_argument("resource_group", help="Resource Group Name") 18 | parser.add_argument("--ip", "-i", help="Manually specified IP to add") 19 | parser.add_argument("--repeat", "-r", action="store_true", help="Repeat action every minute") 20 | parser.add_argument("--quiet", "-q", action="store_true", help="Run in quiet mode without logging info") 21 | args = parser.parse_args() 22 | 23 | level=logging.ERROR if args.quiet else logging.INFO 24 | logging.basicConfig(level=level) 25 | 26 | # get current extenal ip 27 | ip = args.ip or requests.get('https://api.ipify.org').text 28 | logger.info("Using IP Address: %s", ip) 29 | 30 | # get network mgmt client using cli credentials 31 | logger.info("Starting Network Management Client") 32 | try: 33 | client = get_client_from_cli_profile(NetworkManagementClient) 34 | except: 35 | logger.error("Could not find Azure credentials", exc_info=True) 36 | sys.exit(1) 37 | 38 | try: 39 | # get network security group name for resource group 40 | vnet = client.virtual_networks.list(args.resource_group).next() 41 | nsg_name = vnet.subnets[0].network_security_group.id.split('/')[-1] 42 | logger.info("Found Network Security Group: %s", nsg_name) 43 | except: 44 | logger.error("Could not find Network Security Group", exc_info=True) 45 | sys.exit(1) 46 | 47 | repeat = True 48 | while repeat: 49 | # find CleanupTool security rule 50 | for rg in ['cleanupservice', args.resource_group]: 51 | try: 52 | security_rule = client.security_rules.get(rg, nsg_name, rule_name) 53 | break 54 | except: 55 | pass 56 | else: 57 | logger.error("Could not find Security Rule") 58 | sys.exit(1) 59 | 60 | # get list of allowed ips 61 | allowed_ips = security_rule.source_address_prefixes 62 | 63 | if ip not in allowed_ips: 64 | # add current ip to list 65 | security_rule.source_address_prefixes = [ip] + allowed_ips 66 | 67 | try: 68 | # update the security rule 69 | logger.info("Updating SSH Security Rule") 70 | client.security_rules.create_or_update(rg, nsg_name, rule_name, security_rule) 71 | logger.info("Security Rule Updated: please wait a few seconds before attempting to connect") 72 | except: 73 | # TODO: catch update in progress error 74 | pass 75 | 76 | repeat = args.repeat 77 | if repeat: 78 | time.sleep(60) 79 | 80 | -------------------------------------------------------------------------------- /scripts/add_webtest.sh: -------------------------------------------------------------------------------- 1 | az group deployment create \ 2 | --name ping-test-ml-3 \ 3 | --resource-group azmlrts-eastus-stable \ 4 | --template-file webtest.json \ 5 | --parameters @webtest.parameters.json -------------------------------------------------------------------------------- /scripts/create_deep_model.py: -------------------------------------------------------------------------------- 1 | """ 2 | AI-Utilities - create_deep_model.py 3 | 4 | Copyright (c) Microsoft Corporation. All rights reserved. 5 | Licensed under the MIT License. 6 | """ 7 | import os 8 | 9 | from azure_utils.machine_learning.training_arg_parsers import get_training_parser 10 | from azure_utils.samples.deep_rts_samples import ResNet152 11 | 12 | if __name__ == '__main__': 13 | args = get_training_parser() 14 | 15 | ResNet152().create_model(weights="imagenet", save_model=True, model_path=os.path.join(args.outputs, args.model)) 16 | -------------------------------------------------------------------------------- /scripts/create_model.py: -------------------------------------------------------------------------------- 1 | 2 | from azure_utils.samples import deep_rts_samples 3 | 4 | if __name__ == '__main__': 5 | deep_rts_samples.main() 6 | 7 | -------------------------------------------------------------------------------- /scripts/deploy_app_insights_k8s.sh: -------------------------------------------------------------------------------- 1 | # Specify the Istio version that will be leveraged throughout these instructions 2 | ISTIO_VERSION=1.4.0 3 | 4 | curl -sL "https://github.com/istio/istio/releases/download/$ISTIO_VERSION/istio-$ISTIO_VERSION-linux.tar.gz" | tar xz 5 | 6 | cd istio-$ISTIO_VERSION 7 | sudo cp ./bin/istioctl /usr/local/bin/istioctl 8 | sudo chmod +x /usr/local/bin/istioctl 9 | 10 | # Generate the bash completion file and source it in your current shell 11 | mkdir -p ~/completions && istioctl collateral --bash -o ~/completions 12 | source ~/completions/istioctl.bash 13 | 14 | # Source the bash completion file in your .bashrc so that the command-line completions 15 | # are permanently available in your shell 16 | echo "source ~/completions/istioctl.bash" >> ~/.bashrc 17 | 18 | kubectl create namespace istio-system --save-config 19 | 20 | #apiVersion: install.istio.io/v1alpha2 21 | #kind: IstioControlPlane 22 | #spec: 23 | # # Use the default profile as the base 24 | # # More details at: https://istio.io/docs/setup/additional-setup/config-profiles/ 25 | # profile: default 26 | # values: 27 | # global: 28 | # # Ensure that the Istio pods are only scheduled to run on Linux nodes 29 | # defaultNodeSelector: 30 | # beta.kubernetes.io/os: linux 31 | # # Enable mutual TLS for the control plane 32 | # controlPlaneSecurityEnabled: true 33 | # mtls: 34 | # # Require all service to service communication to have mtls 35 | # enabled: false 36 | # grafana: 37 | # # Enable Grafana deployment for analytics and monitoring dashboards 38 | # enabled: true 39 | # security: 40 | # # Enable authentication for Grafana 41 | # enabled: true 42 | # kiali: 43 | # # Enable the Kiali deployment for a service mesh observability dashboard 44 | # enabled: true 45 | # tracing: 46 | # # Enable the Jaeger deployment for tracing 47 | # enabled: true 48 | 49 | istioctl manifest apply -f istio.aks.yaml --logtostderr --set installPackagePath=./install/kubernetes/operator/charts 50 | 51 | kubectl get svc --namespace istio-system --output wide 52 | 53 | kubectl label namespace istio-injection=enabled 54 | kubectl apply -f . 55 | kubectl get pods -n istio-system -l "app=application-insights-istio-mixer-adapter" -------------------------------------------------------------------------------- /scripts/deploy_function.sh: -------------------------------------------------------------------------------- 1 | myacr='azmlrtseastuae34fcc7' 2 | location='East US' 3 | myresourcegroup='' 4 | myplanname='' 5 | app-name='' 6 | acrinstance='azmlrtseastuae34fcc7' 7 | imagename='20200302061056' 8 | username='' 9 | imagetag='latest' 10 | password='' 11 | 12 | az storage account create \ 13 | --name "dcibfunctionstorage" \ 14 | --location 'East US' \ 15 | --resource-group "azmlrts-eastus-stable" \ 16 | --sku Standard_LRS 17 | 18 | az appservice plan create \ 19 | --name "dcibfuncplan" \ 20 | --resource-group "azmlrts-eastus-stable" \ 21 | --is-linux 22 | 23 | az functionapp create \ 24 | --resource-group "azmlrts-eastus-stable" \ 25 | --name "azmlrts" \ 26 | --plan "dcibfuncplan" \ 27 | --deployment-container-image-name azmlrtseastuae34fcc7.azurecr.io/package:93985ed7a0ffc24ccd583b1a1785b0de33a76ea2636528c29403943597a2a038 \ 28 | --storage-account "dcibfunctionstorage" 29 | 30 | az functionapp config container set \ 31 | --name "azmlrts" \ 32 | --resource-group "azmlrts-eastus-stable" \ 33 | --docker-custom-image-name azmlrtseastuae34fcc7.azurecr.io/package:20200302061056 \ 34 | --docker-registry-server-url https://azmlrtseastuae34fcc7.azurecr.io 35 | 36 | \ 37 | --docker-registry-server-user $username \ 38 | --docker-registry-server-password $password -------------------------------------------------------------------------------- /scripts/version.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | git checkout master 3 | 4 | var=$(sed -ne "s/version=['\"]\([^'\"]*\)['\"] *,.*/\1/p" ./setup.py) 5 | IFS='.' read -r -a array <<<"$var" 6 | 7 | major="${array[1]}" 8 | minor="${array[2]}" 9 | 10 | if [ "${array[2]}" -eq 9 ]; then 11 | echo $major 12 | major=$((major + 1)) 13 | echo $major 14 | else 15 | minor=$((minor + 1)) 16 | fi 17 | 18 | version=0.$major.$minor 19 | 20 | sed -i "s/version=['\"]\([^'\"]*\)['\"] *,.*/version=\"$version\",/" ./setup.py 21 | 22 | git add setup.py 23 | git commit -m "update version number for next release" 24 | git push -------------------------------------------------------------------------------- /scripts/webtest.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#", 3 | "contentVersion": "1.0.0.0", 4 | "parameters": { 5 | "appName": { 6 | "type": "string" 7 | }, 8 | "pingURL": { 9 | "type": "string" 10 | }, 11 | "pingToken": { 12 | "type": "string" 13 | }, 14 | "pingText": { 15 | "type": "string", 16 | "defaultValue": "" 17 | }, 18 | "location": { 19 | "type": "string" 20 | }, 21 | "pingTestName": { 22 | "type": "string" 23 | } 24 | }, 25 | "variables": { 26 | "pingTestName": "[toLower(parameters('pingTestName'))]" 27 | }, 28 | "resources": [ 29 | { 30 | "name": "[variables('pingTestName')]", 31 | "type": "Microsoft.Insights/webtests", 32 | "apiVersion": "2014-04-01", 33 | "location": "[parameters('location')]", 34 | "tags": { 35 | "[concat('hidden-link:', resourceId('Microsoft.Insights/components', parameters('appName')))]": "Resource" 36 | }, 37 | "properties": { 38 | "Name": "[variables('pingTestName')]", 39 | "Description": "Basic ping test", 40 | "Enabled": true, 41 | "Frequency": 300, 42 | "Timeout": 120, 43 | "Kind": "ping", 44 | "RetryEnabled": true, 45 | "Locations": [ 46 | { 47 | "Id": "us-va-ash-azr" 48 | }, 49 | { 50 | "Id": "emea-nl-ams-azr" 51 | }, 52 | { 53 | "Id": "apac-jp-kaw-edge" 54 | } 55 | ], 56 | "Configuration": { 57 | "WebTest": "[concat('
')]" 58 | }, 59 | "SyntheticMonitorId": "[variables('pingTestName')]" 60 | } 61 | } 62 | ] 63 | } -------------------------------------------------------------------------------- /scripts/webtest.parameters.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://schema.management.azure.com/schemas/2015-01-01/deploymentParameters.json#", 3 | "contentVersion": "1.0.0.0", 4 | "parameters": { 5 | "appName": { 6 | "value": "azmlrtseinsightsbdbf93ae" 7 | }, 8 | "pingURL": { 9 | "value": "http://13.90.141.205:80/api/v1/service/aksservice2/score" 10 | }, 11 | "pingToken": { 12 | "value": "O4Yqa1gl5sZ1N7xc9vaZ1uOmdeFhZwtw" 13 | }, 14 | "location": { 15 | "value": "East US" 16 | }, 17 | "pingTestName": { 18 | "value": "ping-test-ml-3" 19 | } 20 | } 21 | } -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | """ 2 | AI-Utilities - setup.py 3 | 4 | Copyright (c) Microsoft Corporation. All rights reserved. 5 | Licensed under the MIT License. 6 | """ 7 | from setuptools import find_packages, setup 8 | 9 | # This is the name of your PyPI-package. 10 | setup(name="Microsoft-AI-Azure-Utility-Samples", version="0.3.9", description="Utility Samples for AI Solutions", 11 | author="Daniel Ciborowski & Daniel Grecoe", author_email="dciborow@microsoft.com", 12 | url="https://github.com/microsoft/AI-Utilities", license="MIT", packages=find_packages(), 13 | install_requires=['lightgbm']) 14 | 15 | -------------------------------------------------------------------------------- /source/score.py: -------------------------------------------------------------------------------- 1 | 2 | import sys 3 | sys.setrecursionlimit(3000) 4 | 5 | from azureml.contrib.services.aml_request import rawhttp 6 | 7 | def init(): 8 | """ Initialise the model and scoring function 9 | """ 10 | global process_and_score 11 | from azure_utils.samples.deep_rts_samples import get_model_api 12 | process_and_score = get_model_api() 13 | 14 | 15 | @rawhttp 16 | def run(request): 17 | """ Make a prediction based on the data passed in using the preloaded model 18 | """ 19 | from azure_utils.machine_learning.realtime import default_response 20 | if request.method == 'POST': 21 | return process_and_score(request.files) 22 | return default_response(request) -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | ai-utilities - tests/__init__.py 3 | 4 | Copyright (c) Microsoft Corporation. All rights reserved. 5 | Licensed under the MIT License. 6 | """ 7 | -------------------------------------------------------------------------------- /tests/configuration/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | AI-Utilities - __init__.py 3 | 4 | Copyright (c) Microsoft Corporation. All rights reserved. 5 | Licensed under the MIT License. 6 | """ 7 | -------------------------------------------------------------------------------- /tests/configuration/test_config.py: -------------------------------------------------------------------------------- 1 | """ 2 | ai-utilities - test_config.py 3 | 4 | Copyright (c) Microsoft Corporation. All rights reserved. 5 | Licensed under the MIT License. 6 | """ 7 | import os 8 | 9 | from azure_utils.configuration.project_configuration import ProjectConfiguration, find_file 10 | 11 | 12 | def dont_test_config() -> None: 13 | new_config_file = "./testconfiguration.yml" 14 | project_name = "Test Project" 15 | 16 | # Make sure file doesn't exist 17 | remove_config_file(new_config_file) 18 | 19 | # Create a new one with a specific name and two settings 20 | proj_config = ProjectConfiguration(new_config_file) 21 | proj_config.set_project_name(project_name) 22 | proj_config.add_setting("sub_id", "Your Azure Subscription", "my_sub") 23 | proj_config.add_setting("workspace", "Your Azure ML Workspace", "my_ws") 24 | 25 | assert proj_config.project_name() == project_name 26 | assert proj_config.get_value('sub_id') == 'my_sub' 27 | assert proj_config.get_value('workspace') == 'my_ws' 28 | assert len(proj_config.get_settings()) == 14 29 | 30 | # Save it and ensure the file exists 31 | assert proj_config.configuration 32 | proj_config.save_configuration() 33 | assert os.path.isfile(new_config_file) 34 | 35 | # Load it and check what we have 36 | proj_config = ProjectConfiguration(new_config_file) 37 | assert proj_config.project_name() == project_name 38 | assert proj_config.get_value('sub_id') == 'my_sub' 39 | assert proj_config.get_value('workspace') == 'my_ws' 40 | assert len(proj_config.get_settings()) == 14 41 | 42 | # Change a setting and test we get the right value 43 | proj_config.set_value('sub_id', 'new_sub') 44 | assert proj_config.get_value('sub_id') == 'new_sub' 45 | 46 | remove_config_file(new_config_file) 47 | 48 | file1 = "this_is_file.txt" 49 | open(file1, "w+") 50 | found, path = find_file(file1) 51 | assert found 52 | os.remove(file1) 53 | 54 | found, path = find_file("not_this_is_file.txt") 55 | assert not found 56 | 57 | file2 = "../this_is_higher_file.txt" 58 | open(file2, "w+") 59 | found, path = find_file(file2) 60 | assert found 61 | os.remove(file2) 62 | 63 | 64 | def remove_config_file(conf_file: str): 65 | """ 66 | Clean up configuration file 67 | 68 | :param conf_file: location of configuration file 69 | """ 70 | if os.path.isfile(conf_file): 71 | os.remove(conf_file) 72 | -------------------------------------------------------------------------------- /tests/configuration/test_validation.py: -------------------------------------------------------------------------------- 1 | # """ 2 | # AI-Utilities - test_validation.py 3 | # 4 | # Copyright (c) Microsoft Corporation. All rights reserved. 5 | # Licensed under the MIT License. 6 | # """ 7 | # 8 | # from azure_utils import directory 9 | # from azure_utils.configuration.configuration_validation import Validation, ValidationResult 10 | # from azure_utils.machine_learning.utils import load_configuration 11 | # from azure_utils.utilities import check_login 12 | # 13 | # cfg = load_configuration(directory.replace("azure_utils", "workspace_conf.yml")) 14 | # v = Validation() 15 | # 16 | # 17 | # def test_subscription_validation_success(): 18 | # if check_login(): 19 | # r = v.validate_input("subscription_id", cfg['subscription_id']) 20 | # assert_validation_result(r, ValidationResult.success) 21 | # 22 | # 23 | # def test_subscription_validation_failure(): 24 | # r = v.validate_input("subscription_id", "<>>") 25 | # assert_validation_result(r, ValidationResult.failure) 26 | # 27 | # 28 | # def test_validation_warning(): 29 | # r = v.validate_input("foo", "asbas;klj;ijer;kasdf") 30 | # assert_validation_result(r, ValidationResult.warning) 31 | # 32 | # 33 | # def test_workspace_validation_failure(): 34 | # r = v.validate_input("workspace_name", "") 35 | # assert_validation_result(r, ValidationResult.failure) 36 | # 37 | # 38 | # def assert_validation_result(r, result): 39 | # """ 40 | # Args: 41 | # r: 42 | # result: 43 | # """ 44 | # Validation.dump_validation_result(r) 45 | # assert r.status is result 46 | -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- 1 | """ 2 | AI-Utilities - tests/conftest.py 3 | 4 | Copyright (c) Microsoft Corporation. All rights reserved. 5 | Licensed under the MIT License. 6 | """ 7 | import pytest 8 | 9 | 10 | def pytest_collection_modifyitems(items): 11 | for item in items: 12 | if "mock" in item.nodeid: 13 | item.add_marker(pytest.mark.interface) 14 | if "integration" in item.nodeid: 15 | item.add_marker(pytest.mark.interface) 16 | -------------------------------------------------------------------------------- /tests/machine_learning/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | ai-utilities - __init__.py 3 | 4 | Copyright (c) Microsoft Corporation. All rights reserved. 5 | Licensed under the MIT License. 6 | """ 7 | import os 8 | 9 | if os.path.exists(os.path.join(os.curdir, "machine_learning")): 10 | os.chdir("machine_learning") 11 | -------------------------------------------------------------------------------- /tests/machine_learning/contexts/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/ai-utilities/b6c097dddff00ae4f7321da52653d9d6d8a94884/tests/machine_learning/contexts/__init__.py -------------------------------------------------------------------------------- /tests/machine_learning/contexts/test_fpga_deploy.py: -------------------------------------------------------------------------------- 1 | import wget 2 | 3 | from azure_utils.machine_learning.contexts.realtime_score_context import ( 4 | FPGARealtimeScore, 5 | ) 6 | 7 | from azure_utils.machine_learning.contexts.workspace_contexts import WorkspaceContext 8 | from azureml.core.webservice import AksWebservice 9 | import requests 10 | import pytest 11 | 12 | 13 | @pytest.fixture 14 | def workspace(): 15 | return WorkspaceContext.get_or_create_workspace() 16 | 17 | 18 | def test_fpga_deploy(workspace): 19 | model_name = "resnet50" 20 | image_name = "{}-image".format(model_name) 21 | aks_name = "my-aks-cluster" 22 | 23 | image = FPGARealtimeScore.register_resnet_50(workspace, model_name, image_name) 24 | assert image 25 | # Create the cluster 26 | aks_target = FPGARealtimeScore.create_aks(workspace, aks_name) 27 | assert aks_target 28 | aks_service = FPGARealtimeScore.create_aks_service(workspace, aks_target, image) 29 | assert aks_service 30 | 31 | 32 | def test_gpu_service(workspace): 33 | aks_service_name = "deepaksservice" 34 | 35 | assert aks_service_name in workspace.webservices, f"{aks_service_name} not found." 36 | aks_service = AksWebservice(workspace, name=aks_service_name) 37 | assert ( 38 | aks_service.state == "Healthy" 39 | ), f"{aks_service_name} is in state {aks_service.state}." 40 | scoring_url = aks_service.scoring_uri 41 | print(scoring_url) 42 | api_key = aks_service.get_keys()[0] 43 | import requests 44 | 45 | headers = {"Authorization": ("Bearer " + api_key)} 46 | 47 | files = {"image": open("snowleopardgaze.jpg", "rb")} 48 | r_get = requests.get(scoring_url, headers=headers) 49 | assert r_get 50 | r_post = requests.post(scoring_url, files=files, headers=headers) 51 | assert r_post 52 | 53 | 54 | def test_fpga_service(workspace): 55 | # Using the grpc client in Azure ML Accelerated Models SDK package 56 | aks_service_name = "my-aks-service" 57 | aks_service = AksWebservice(workspace=workspace, name=aks_service_name) 58 | client = FPGARealtimeScore.get_prediction_client(aks_service) 59 | 60 | # Score image with input and output tensor names 61 | input_tensors, output_tensors = FPGARealtimeScore.get_resnet50_IO() 62 | wget.download( 63 | "https://raw.githubusercontent.com/Azure/MachineLearningNotebooks/" 64 | "master/how-to-use-azureml/deployment/accelerated-models/snowleopardgaze.jpg" 65 | ) 66 | 67 | results = client.score_file( 68 | path="snowleopardgaze.jpg", input_name=input_tensors, outputs=output_tensors 69 | ) 70 | 71 | # map results [class_id] => [confidence] 72 | results = enumerate(results) 73 | # sort results by confidence 74 | sorted_results = sorted(results, key=lambda x: x[1], reverse=True) 75 | # print top 5 results 76 | classes_entries = requests.get( 77 | "https://raw.githubusercontent.com/Lasagne/Recipes/" 78 | "master/examples/resnet50/imagenet_classes.txt" 79 | ).text.splitlines() 80 | for top in sorted_results[:5]: 81 | print(classes_entries[top[0]], "confidence:", top[1]) 82 | -------------------------------------------------------------------------------- /tests/machine_learning/contexts/test_realtime_contexts.py: -------------------------------------------------------------------------------- 1 | """ 2 | AI-Utilities - test_realtime_contexts 3 | 4 | Copyright (c) Microsoft Corporation. All rights reserved. 5 | Licensed under the MIT License. 6 | """ 7 | import os 8 | import os.path 9 | import pytest 10 | 11 | from azure_utils.machine_learning.contexts.model_management_context import ( 12 | ModelManagementContext, 13 | ) 14 | from azure_utils.machine_learning.contexts.realtime_score_context import ( 15 | DeepRealtimeScore, 16 | MLRealtimeScore, 17 | RealtimeScoreAKSContext, 18 | RealtimeScoreFunctionsContext, 19 | ) 20 | from azure_utils.machine_learning.contexts.workspace_contexts import WorkspaceContext 21 | 22 | 23 | # noinspection PyMethodMayBeStatic 24 | @pytest.mark.smoke 25 | class WorkspaceCreationTests: 26 | """Workspace Creation Test Suite""" 27 | 28 | @pytest.fixture(scope="class") 29 | def context_type(self): 30 | """ 31 | Abstract Workspace Type Fixture - Update with Workspace Context to test 32 | """ 33 | raise NotImplementedError 34 | 35 | @pytest.fixture(scope="class") 36 | def files_for_testing(self): 37 | """ 38 | 39 | :return: 40 | """ 41 | raise NotImplementedError 42 | 43 | @pytest.fixture(scope="class") 44 | def realtime_score_context( 45 | self, context_type: RealtimeScoreAKSContext, files_for_testing 46 | ) -> RealtimeScoreAKSContext: 47 | """ 48 | Get or Create Context for Testing 49 | :param context_type: impl of WorkspaceContext 50 | :param test_files: Dict of input Files 51 | :return: 52 | """ 53 | raise NotImplementedError 54 | 55 | def test_get_or_create( 56 | self, 57 | realtime_score_context: RealtimeScoreAKSContext, 58 | context_type: WorkspaceContext, 59 | ): 60 | """ 61 | Assert Context Type and Creation 62 | 63 | :param realtime_score_context: Testing Context 64 | :param context_type: Expected Context Type 65 | """ 66 | assert type(realtime_score_context) is context_type 67 | assert realtime_score_context 68 | 69 | def test_get_or_create_model(self, realtime_score_context: ModelManagementContext): 70 | """ 71 | 72 | :param realtime_score_context: Testing Context 73 | """ 74 | assert realtime_score_context.get_or_create_model() 75 | 76 | def test_get_images(self, realtime_score_context: RealtimeScoreAKSContext): 77 | """ 78 | Assert images have been created 79 | 80 | :param realtime_score_context: Testing Context 81 | """ 82 | assert hasattr(realtime_score_context, "images") 83 | 84 | def test_get_compute_targets(self, realtime_score_context: RealtimeScoreAKSContext): 85 | """ 86 | 87 | :param realtime_score_context: Testing Context 88 | """ 89 | assert hasattr(realtime_score_context, "compute_targets") 90 | 91 | def test_get_webservices(self, realtime_score_context: RealtimeScoreAKSContext): 92 | """ 93 | 94 | :param realtime_score_context: Testing Context 95 | """ 96 | assert hasattr(realtime_score_context, "webservices") 97 | 98 | def test_get_or_create_aks(self, realtime_score_context: RealtimeScoreAKSContext): 99 | """ 100 | 101 | :param realtime_score_context: Testing Context 102 | """ 103 | assert realtime_score_context.get_or_create_aks() 104 | 105 | 106 | class TestDeployRTS(WorkspaceCreationTests): 107 | @pytest.fixture(scope="class") 108 | def realtime_score_context( 109 | self, context_type: MLRealtimeScore, files_for_testing 110 | ) -> MLRealtimeScore: 111 | """ 112 | Get or Create Context for Testing 113 | :param files_for_testing: 114 | :param context_type: impl of WorkspaceContext 115 | :return: 116 | """ 117 | return context_type.get_or_create_workspace( 118 | train_py=files_for_testing["train_py"], 119 | score_py=files_for_testing["score_py"], 120 | ) 121 | 122 | @pytest.fixture(scope="class") 123 | def context_type(self): 124 | """ 125 | 126 | :return: 127 | """ 128 | return MLRealtimeScore 129 | 130 | @pytest.fixture(scope="class") 131 | def files_for_testing(self): 132 | return {"train_py": "create_model.py", "score_py": "driver.py"} 133 | 134 | 135 | class TestDeployDeepRTS(WorkspaceCreationTests): 136 | @pytest.fixture(scope="class") 137 | def context_type(self): 138 | """ 139 | 140 | :return: 141 | """ 142 | return DeepRealtimeScore 143 | 144 | @pytest.fixture(scope="class") 145 | def files_for_testing(self): 146 | return {"train_py": "create_deep_model.py", "score_py": "score_dl.py"} 147 | 148 | @pytest.fixture(scope="class") 149 | def realtime_score_context( 150 | self, context_type: DeepRealtimeScore, files_for_testing 151 | ) -> DeepRealtimeScore: 152 | """ 153 | Get or Create Context for Testing 154 | :param context_type: impl of WorkspaceContext 155 | :param test_files: Dict of input Files 156 | :return: 157 | """ 158 | return context_type.get_or_create_workspace( 159 | train_py=files_for_testing["train_py"], 160 | score_py=files_for_testing["score_py"], 161 | ) 162 | 163 | def test_get_or_create_model(self, realtime_score_context: ModelManagementContext): 164 | """ 165 | 166 | :param realtime_score_context: Testing Context 167 | """ 168 | assert realtime_score_context.get_or_create_model() 169 | 170 | def test_get_or_create_webservices(self, realtime_score_context: DeepRealtimeScore, files_for_testing): 171 | """ 172 | 173 | :param realtime_score_context: Testing Context 174 | """ 175 | 176 | if not os.path.isfile(f"source/{files_for_testing['score_py']}"): 177 | os.makedirs("source", exist_ok=True) 178 | 179 | score_py = """ 180 | import sys 181 | sys.setrecursionlimit(3000) 182 | 183 | from azureml.contrib.services.aml_request import rawhttp 184 | 185 | def init(): 186 | global process_and_score 187 | from azure_utils.samples.deep_rts_samples import get_model_api 188 | process_and_score = get_model_api() 189 | 190 | 191 | @rawhttp 192 | def run(request): 193 | from azure_utils.machine_learning.realtime import default_response 194 | if request.method == 'POST': 195 | return process_and_score(request.files) 196 | return default_response(request) 197 | 198 | """ 199 | with open(f"source/{files_for_testing['score_py']}", "w") as file: 200 | file.write(score_py) 201 | 202 | model = realtime_score_context.get_or_create_model() 203 | inference_config = realtime_score_context.get_inference_config() 204 | aks_target = realtime_score_context.get_or_create_aks() 205 | web_service = realtime_score_context.get_or_create_aks_service(model, aks_target, inference_config) 206 | assert web_service.state == "Healthy" 207 | 208 | def test_web_service(self, realtime_score_context: DeepRealtimeScore): 209 | realtime_score_context.test_service_local() 210 | 211 | 212 | # noinspection PyUnresolvedReferences,PyUnresolvedReferences,PyUnresolvedReferences 213 | class TestDeployDeepRTSLocally: 214 | def dont_test_train_py(self): 215 | if not os.path.isdir("outputs"): 216 | os.mkdir("../outputs") 217 | if os.path.isfile("script/create_deep_model_new.py"): 218 | os.system("python script/create_deep_model_new.py") 219 | 220 | assert os.path.isfile("../outputs/model.pkl") 221 | 222 | 223 | def dont_test_get_or_create_function_endpoint(): 224 | """Test creation of Azure Function for ML Scoring""" 225 | RealtimeScoreFunctionsContext.get_or_or_create_function_endpoint() 226 | -------------------------------------------------------------------------------- /tests/machine_learning/contexts/test_realtime_contexts_integration.py: -------------------------------------------------------------------------------- 1 | """ 2 | AI-Utilities - test_realtime_contexts 3 | 4 | Copyright (c) Microsoft Corporation. All rights reserved. 5 | Licensed under the MIT License. 6 | """ 7 | import random 8 | import string 9 | 10 | import pytest 11 | from azure.mgmt.resource import ResourceManagementClient 12 | 13 | from azure_utils.configuration.notebook_config import project_configuration_file 14 | from azure_utils.configuration.project_configuration import ProjectConfiguration 15 | from azure_utils.machine_learning.contexts.model_management_context import (ModelManagementContext, ) 16 | from azure_utils.machine_learning.contexts.realtime_score_context import (DeepRealtimeScore, MLRealtimeScore, 17 | RealtimeScoreAKSContext, ) 18 | from azure_utils.machine_learning.contexts.workspace_contexts import WorkspaceContext 19 | 20 | 21 | # noinspection PyMethodMayBeStatic 22 | @pytest.mark.skip 23 | class WorkspaceIntegrationTests: 24 | """Workspace Creation Test Suite""" 25 | 26 | @pytest.fixture(scope="class") 27 | def unique_configuration(self): 28 | project_configuration = ProjectConfiguration(project_configuration_file) 29 | 30 | allchar = string.ascii_letters + string.digits 31 | append = "".join(random.choice(allchar) for _ in range(1, 5)) 32 | 33 | settings = [ 34 | "resource_group", 35 | "workspace_name", 36 | "image_name", 37 | "aks_service_name", 38 | "aks_location", 39 | "aks_name", 40 | "deep_image_name", 41 | "deep_aks_service_name", 42 | "deep_aks_name", 43 | "deep_aks_location", 44 | ] 45 | for setting in settings: 46 | project_configuration.append_value(setting, append) 47 | yield project_configuration 48 | ws = WorkspaceContext.get_or_create_workspace( 49 | project_configuration=project_configuration 50 | ) 51 | rg_client = ResourceManagementClient( 52 | ws._auth, project_configuration.get_value("subscription_id") 53 | ) 54 | rg_client.resource_groups.delete( 55 | resource_group_name=project_configuration.get_value("resource_group") 56 | ) 57 | 58 | @pytest.fixture(scope="class") 59 | def context_type(self): 60 | """ 61 | Abstract Workspace Type Fixture - Update with Workspace Context to test 62 | """ 63 | raise NotImplementedError 64 | 65 | @pytest.fixture(scope="class") 66 | def files_for_testing(self): 67 | """ 68 | 69 | :return: 70 | """ 71 | raise NotImplementedError 72 | 73 | @pytest.fixture(scope="class") 74 | def realtime_score_context( 75 | self, context_type: RealtimeScoreAKSContext, files_for_testing 76 | ) -> RealtimeScoreAKSContext: 77 | """ 78 | Get or Create Context for Testing 79 | :param context_type: impl of WorkspaceContext 80 | :param test_files: Dict of input Files 81 | :return: 82 | """ 83 | raise NotImplementedError 84 | 85 | def test_integration_get_or_create( 86 | self, 87 | realtime_score_context: RealtimeScoreAKSContext, 88 | context_type: WorkspaceContext, 89 | ): 90 | """ 91 | Assert Context Type and Creation 92 | 93 | :param realtime_score_context: Testing Context 94 | :param context_type: Expected Context Type 95 | """ 96 | assert type(realtime_score_context) is context_type 97 | assert realtime_score_context 98 | 99 | def test_integration_get_or_create_model( 100 | self, realtime_score_context: ModelManagementContext 101 | ): 102 | """ 103 | 104 | :param realtime_score_context: Testing Context 105 | """ 106 | assert realtime_score_context.get_or_create_model() 107 | 108 | def test_integration_get_or_create_aks( 109 | self, realtime_score_context: RealtimeScoreAKSContext 110 | ): 111 | """ 112 | 113 | :param realtime_score_context: Testing Context 114 | """ 115 | assert realtime_score_context.get_or_create_aks() 116 | 117 | 118 | class TestIntegrationRTS(WorkspaceIntegrationTests): 119 | @pytest.fixture(scope="class") 120 | def realtime_score_context( 121 | self, context_type: MLRealtimeScore, files_for_testing, unique_configuration 122 | ) -> MLRealtimeScore: 123 | """ 124 | Get or Create Context for Testing 125 | :param files_for_testing: 126 | :param context_type: impl of WorkspaceContext 127 | :return: 128 | """ 129 | return context_type.get_or_create_workspace( 130 | project_configuration=unique_configuration, 131 | train_py=files_for_testing["train_py"], 132 | score_py=files_for_testing["score_py"], 133 | ) 134 | 135 | @pytest.fixture(scope="class") 136 | def context_type(self): 137 | """ 138 | 139 | :return: 140 | """ 141 | return MLRealtimeScore 142 | 143 | @pytest.fixture(scope="class") 144 | def files_for_testing(self): 145 | return {"train_py": "create_model.py", "score_py": "driver.py"} 146 | 147 | 148 | class TestDeployDeepRTS(WorkspaceIntegrationTests): 149 | @pytest.fixture(scope="class") 150 | def context_type(self): 151 | """ 152 | 153 | :return: 154 | """ 155 | return DeepRealtimeScore 156 | 157 | @pytest.fixture(scope="class") 158 | def files_for_testing(self): 159 | return {"train_py": "create_deep_model.py", "score_py": "score_dl.py"} 160 | 161 | @pytest.fixture(scope="class") 162 | def realtime_score_context( 163 | self, context_type: DeepRealtimeScore, files_for_testing, unique_configuration 164 | ) -> DeepRealtimeScore: 165 | """ 166 | Get or Create Context for Testing 167 | :param context_type: impl of WorkspaceContext 168 | :param test_files: Dict of input Files 169 | :return: 170 | """ 171 | return context_type.get_or_create_workspace( 172 | project_configuration=unique_configuration, 173 | train_py=files_for_testing["train_py"], 174 | score_py=files_for_testing["score_py"], 175 | ) 176 | 177 | def test_get_or_create_model(self, realtime_score_context: ModelManagementContext): 178 | """ 179 | 180 | :param realtime_score_context: Testing Context 181 | """ 182 | assert realtime_score_context.get_or_create_model() 183 | -------------------------------------------------------------------------------- /tests/machine_learning/script/create_model.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | 4 | import lightgbm as lgb 5 | import pandas as pd 6 | from azureml.core import Run 7 | import joblib 8 | from sklearn.feature_extraction import text 9 | from sklearn.pipeline import Pipeline, FeatureUnion, make_pipeline 10 | 11 | from azure_utils.machine_learning.item_selector import ItemSelector 12 | 13 | if __name__ == '__main__': 14 | # """ Main Method to use with AzureML""" 15 | # Define the arguments. 16 | parser = argparse.ArgumentParser(description='Fit and evaluate a model based on train-test datasets.') 17 | parser.add_argument('-d', '--train_data', help='the training dataset name', default='balanced_pairs_train.tsv') 18 | parser.add_argument('-t', '--test_data', help='the test dataset name', default='balanced_pairs_test.tsv') 19 | parser.add_argument('-i', '--estimators', help='the number of learner estimators', type=int, default=1) 20 | parser.add_argument('--min_child_samples', help='the minimum number of samples in a child(leaf)', type=int, 21 | default=20) 22 | parser.add_argument('-v', '--verbose', help='the verbosity of the estimator', type=int, default=-1) 23 | parser.add_argument('-n', '--ngrams', help='the maximum size of word ngrams', type=int, default=1) 24 | parser.add_argument('-u', '--unweighted', help='do not use instance weights', action='store_true', default=False) 25 | parser.add_argument('-m', '--match', help='the maximum number of duplicate matches', type=int, default=20) 26 | parser.add_argument('--outputs', help='the outputs directory', default='.') 27 | parser.add_argument('--inputs', help='the inputs directory', default='.') 28 | parser.add_argument('-s', '--save', help='save the model', action='store_true', default=True) 29 | parser.add_argument('--model', help='the model file', default='model.pkl') 30 | parser.add_argument('--instances', help='the instances file', default='inst.txt') 31 | parser.add_argument('--labels', help='the labels file', default='labels.txt') 32 | parser.add_argument('-r', '--rank', help='the maximum rank of correct answers', type=int, default=3) 33 | args = parser.parse_args() 34 | 35 | run = Run.get_context() 36 | 37 | # The training and testing datasets. 38 | inputs_path = args.inputs 39 | data_path = os.path.join(inputs_path, args.train_data) 40 | test_path = os.path.join(inputs_path, args.test_data) 41 | 42 | # Create the outputs folder. 43 | outputs_path = args.outputs 44 | os.makedirs(outputs_path, exist_ok=True) 45 | model_path = os.path.join(outputs_path, args.model) 46 | instances_path = os.path.join(outputs_path, args.instances) 47 | labels_path = os.path.join(outputs_path, args.labels) 48 | 49 | # Load the training data 50 | print('Reading {}'.format(data_path)) 51 | train = pd.read_csv(data_path, sep='\t', encoding='latin1', error_bad_lines=False) 52 | 53 | # Limit the number of duplicate-original question matches. 54 | train = train[train.n < args.match] 55 | 56 | # Define the roles of the columns in the training data. 57 | feature_columns = ['Text_x', 'Text_y'] 58 | label_column = 'Label' 59 | duplicates_id_column = 'Id_x' 60 | answer_id_column = 'AnswerId_y' 61 | 62 | # Report on the training dataset: the number of rows and the proportion of true matches. 63 | print('train: {:,} rows with {:.2%} matches'.format( 64 | train.shape[0], train[label_column].mean())) 65 | 66 | # Compute the instance weights used to correct for class imbalance in training. 67 | weight_column = 'Weight' 68 | if args.unweighted: 69 | weight = pd.Series([1.0], train[label_column].unique()) 70 | else: 71 | label_counts = train[label_column].value_counts() 72 | weight = train.shape[0] / (label_counts.shape[0] * label_counts) 73 | train[weight_column] = train[label_column].apply(lambda x: weight[x]) 74 | 75 | # Collect the unique ids that identify each original question's answer. 76 | labels = sorted(train[answer_id_column].unique()) 77 | label_order = pd.DataFrame({'label': labels}) 78 | 79 | # Collect the parts of the training data by role. 80 | train_x = train[feature_columns] 81 | train_y = train[label_column] 82 | sample_weight = train[weight_column] 83 | 84 | # Use the inputs to define the hyperparameters used in training. 85 | n_estimators = args.estimators 86 | min_child_samples = args.min_child_samples 87 | if args.ngrams > 0: 88 | ngram_range = (1, args.ngrams) 89 | else: 90 | ngram_range = None 91 | 92 | # Verify that the hyperparameter values are valid. 93 | assert n_estimators > 0 94 | assert min_child_samples > 1 95 | assert isinstance(ngram_range, tuple) and len(ngram_range) == 2 96 | assert 0 < ngram_range[0] <= ngram_range[1] 97 | 98 | # Define the pipeline that featurizes the text columns. 99 | featurization = [ 100 | (column, 101 | make_pipeline(ItemSelector(column), 102 | text.TfidfVectorizer(ngram_range=ngram_range))) 103 | for column in feature_columns] 104 | features = FeatureUnion(featurization) 105 | 106 | # Define the estimator that learns how to classify duplicate-original question pairs. 107 | estimator = lgb.LGBMClassifier(n_estimators=n_estimators, 108 | min_child_samples=min_child_samples, 109 | verbose=args.verbose) 110 | 111 | # Define the model pipeline as feeding the features into the estimator. 112 | model = Pipeline([ 113 | ('features', features), 114 | ('model', estimator) 115 | ]) 116 | 117 | # Fit the model. 118 | print('Training...') 119 | model.fit(train_x, train_y, model__sample_weight=sample_weight) 120 | 121 | # Save the model to a file, and report on its size. 122 | if args.save: 123 | joblib.dump(model, model_path) 124 | print('{} size: {:.2f} MB'.format(model_path, os.path.getsize(model_path) / (2 ** 20))) 125 | -------------------------------------------------------------------------------- /tests/machine_learning/test_deep_rts_samples.py: -------------------------------------------------------------------------------- 1 | from azure_utils.samples.deep_rts_samples import _image_ref_to_pil_image, _pil_to_numpy 2 | from toolz import compose 3 | import wget 4 | 5 | def test_image_ref_to_pil_image(): 6 | IMAGEURL = "https://bostondata.blob.core.windows.net/aksdeploymenttutorialaml/220px-Lynx_lynx_poing.jpg" 7 | 8 | import urllib 9 | import toolz 10 | from io import BytesIO 11 | img_data = toolz.pipe(IMAGEURL, urllib.request.urlopen, lambda x: x.read(), BytesIO).read() 12 | transform_input = compose(_pil_to_numpy, _image_ref_to_pil_image) 13 | # transform_input(img_data) 14 | wget.download("https://raw.githubusercontent.com/Azure/MachineLearningNotebooks/master/how-to-use-azureml/deployment/accelerated-models/snowleopardgaze.jpg", "snowleopardgaze.jpg") 15 | images_dict = {"lynx": open("snowleopardgaze.jpg", "rb")} 16 | transformed_dict = {key: transform_input(img_ref) for key, img_ref in images_dict.items()} 17 | # _pil_to_numpy(img_data) 18 | # _image_ref_to_pil_image(_pil_to_numpy(img_data)) 19 | -------------------------------------------------------------------------------- /tests/machine_learning/test_realtime.py: -------------------------------------------------------------------------------- 1 | """ 2 | AI-Utilities - test_realtime.py 3 | 4 | Copyright (c) Microsoft Corporation. All rights reserved. 5 | Licensed under the MIT License. 6 | 7 | This are long tests and are not currently tested in this SDK. 8 | """ 9 | from azure_utils.machine_learning.realtime.image import get_or_create_lightgbm_image 10 | from azure_utils.machine_learning.utils import get_or_create_workspace_from_file 11 | 12 | 13 | def dont_test_get_or_create_workspace(): 14 | """Test Get or Create Machine Learning Workspace""" 15 | get_or_create_workspace_from_file() 16 | 17 | 18 | def dont_test_get_or_create_image(): 19 | """Test Get or Create Machine Learning Docker Image""" 20 | image = get_or_create_lightgbm_image() 21 | assert image.creation_state != "Failed" 22 | -------------------------------------------------------------------------------- /tests/machine_learning/test_register_datastores.py: -------------------------------------------------------------------------------- 1 | # """ 2 | # ai-utilities - test_register_datastores.py 3 | # 4 | # Copyright (c) Microsoft Corporation. All rights reserved. 5 | # Licensed under the MIT License. 6 | # """ 7 | # import pytest 8 | # from msrest.exceptions import HttpOperationError 9 | # 10 | # from azure_utils import directory 11 | # from azure_utils.machine_learning.register_datastores import register_blob_datastore, register_sql_datastore 12 | # from azure_utils.machine_learning.utils import get_or_create_workspace, load_configuration 13 | # 14 | # 15 | # @pytest.fixture 16 | # def init_test_vars(): 17 | # """ 18 | # Load Common Vars for Testing 19 | # 20 | # :return: CONFIG, SUBSCRIPTION_ID, RESOURCE_GROUP, WORKSPACE_NAME, WORKSPACE_REGION, workspace, SQL_DATASTORE_NAME, 21 | # BLOB_DATASTORE_NAME 22 | # :rtype: Union[Dict[Hashable, Any], list, None], str, str, str, str, Workspace, str, str 23 | # """ 24 | # cfg = load_configuration(directory + "/../workspace_conf.yml") 25 | # 26 | # subscription_id = cfg['subscription_id'] 27 | # resource_group = cfg['resource_group'] 28 | # workspace_name = cfg['workspace_name'] 29 | # workspace_region = cfg['workspace_region'] 30 | # 31 | # workspace = get_or_create_workspace(workspace_name, subscription_id, resource_group, workspace_region) 32 | # 33 | # sql_datastore_name = "ado_sql_datastore" 34 | # blob_datastore_name = "ado_blob_datastore" 35 | # 36 | # return {'cfg': cfg, 'subscription_id': subscription_id, "resource_group": resource_group, 37 | # "workspace_name": workspace_name, "workspace_region": workspace_region, "workspace": workspace, 38 | # "sql_datastore_name": sql_datastore_name, "blob_datastore_name": blob_datastore_name} 39 | # 40 | # 41 | # def test_register_blob_datastore(init_test_vars): 42 | # """ Test Register Blob Datastore Method """ 43 | # datastore_rg = init_test_vars['cfg']['datastore_rg'] 44 | # container_name = init_test_vars['cfg']['container_name'] # Name of Azure blob container 45 | # account_name = init_test_vars['cfg']['account_name'] # Storage account name 46 | # account_key = init_test_vars['cfg']['account_key'] # Storage account key 47 | # 48 | # blob_datastore = register_blob_datastore(init_test_vars['workspace'], init_test_vars['blob_datastore_name'], 49 | # container_name, account_name, account_key, datastore_rg) 50 | # 51 | # assert blob_datastore 52 | # 53 | # 54 | # def test_register_sql_datastore(init_test_vars): 55 | # """ Test Register SQL Datatstore Method """ 56 | # sql_server_name = init_test_vars['cfg']['sql_server_name'] # Name of Azure SQL server 57 | # sql_database_name = init_test_vars['cfg']['sql_database_name'] # Name of Azure SQL database 58 | # sql_username = init_test_vars['cfg']['sql_username'] # The username of the database user to access the database. 59 | # sql_password = init_test_vars['cfg']['sql_password'] # The password of the database user to access the database. 60 | # 61 | # try: 62 | # sql_datastore = register_sql_datastore(init_test_vars['workspace'], init_test_vars['blob_datastore_name'], 63 | # sql_server_name, sql_database_name, sql_username, sql_password) 64 | # 65 | # assert sql_datastore 66 | # except HttpOperationError: 67 | # pass 68 | -------------------------------------------------------------------------------- /tests/machine_learning/test_utils.py: -------------------------------------------------------------------------------- 1 | """ 2 | ai-utilities - test_utils.py 3 | 4 | Copyright (c) Microsoft Corporation. All rights reserved. 5 | Licensed under the MIT License. 6 | """ 7 | import os 8 | 9 | import pytest 10 | from azureml.core import Workspace 11 | 12 | from azure_utils import directory 13 | from azure_utils.machine_learning.contexts.workspace_contexts import WorkspaceContext 14 | from azure_utils.machine_learning.utils import get_or_create_workspace, get_or_create_workspace_from_file, \ 15 | get_workspace_from_config, load_configuration 16 | 17 | filepath = directory 18 | 19 | 20 | # def test_load_configuration(): 21 | # """Test Loading Configuration to check sample file contents""" 22 | # 23 | # cfg = load_configuration(filepath + "/../sample_workspace_conf.yml") 24 | # assert cfg 25 | # 26 | # assert cfg['subscription_id'] == '<>' 27 | # assert cfg['resource_group'] == '<>' 28 | # assert cfg['workspace_name'] == '<>' 29 | # assert cfg['workspace_region'] == '<>' 30 | # assert cfg['image_name'] == '<>' 31 | # 32 | # assert cfg['sql_server_name'] == '<>' 33 | # assert cfg['sql_database_name'] == '<>' 34 | # assert cfg['sql_username'] == '<>' 35 | # assert cfg['sql_password'] == '<>' 36 | # 37 | # assert cfg['datastore_rg'] == '<>' 38 | # assert cfg['container_name'] == '<>' 39 | # assert cfg['account_name'] == '<>' 40 | # assert cfg['account_key'] == '<>' 41 | 42 | 43 | # def test_get_or_create_workspace(workspace): 44 | # """Test Get or Create Workspace Method""" 45 | # assert isinstance(workspace, Workspace) 46 | # assert os.path.isfile('./.azureml/config.json') 47 | 48 | 49 | def dont_test_get_workspace_from_config(): 50 | """ Test Get Workspace From Config File""" 51 | 52 | workspace = get_workspace_from_config() 53 | assert isinstance(workspace, Workspace) 54 | 55 | 56 | def dont_test_get_workspace_from_project_config(): 57 | """ Test Get Workspace From Project File""" 58 | 59 | get_or_create_workspace_from_file() 60 | 61 | workspace = WorkspaceContext.get_or_create_workspace() 62 | assert isinstance(workspace, Workspace) 63 | 64 | 65 | # @pytest.fixture 66 | # def cfg(): 67 | # """ 68 | # 69 | # :return: 70 | # """ 71 | # cfg = load_configuration(filepath + "/../workspace_conf.yml") 72 | # return cfg 73 | 74 | 75 | @pytest.fixture 76 | def workspace(cfg): 77 | """ 78 | 79 | :param cfg: 80 | :return: 81 | """ 82 | return get_or_create_workspace(cfg['workspace_name'], cfg['subscription_id'], cfg['resource_group'], 83 | cfg['workspace_region']) 84 | -------------------------------------------------------------------------------- /tests/mocks/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/ai-utilities/b6c097dddff00ae4f7321da52653d9d6d8a94884/tests/mocks/__init__.py -------------------------------------------------------------------------------- /tests/mocks/azureml/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/ai-utilities/b6c097dddff00ae4f7321da52653d9d6d8a94884/tests/mocks/azureml/__init__.py -------------------------------------------------------------------------------- /tests/test_notebooks.py: -------------------------------------------------------------------------------- 1 | """ 2 | AI-Utilities - test_notebooks.py 3 | 4 | Copyright (c) Microsoft Corporation. All rights reserved. 5 | Licensed under the MIT License. 6 | """ 7 | 8 | import pytest 9 | 10 | from azure_utils import notebook_directory 11 | from azure_utils.dev_ops.testing_utilities import run_notebook 12 | 13 | 14 | 15 | @pytest.mark.parametrize("notebook", [notebook_directory + "/exampleconfiguration.ipynb"]) 16 | def dont_test_notebook(notebook: str, add_nunit_attachment: pytest.fixture): 17 | """ 18 | Jupyter Notebook Test 19 | :param notebook: input notebook 20 | :param add_nunit_attachment: pytest fixture 21 | """ 22 | run_notebook(notebook, add_nunit_attachment, kernel_name="ai-utilities", root=notebook_directory) 23 | -------------------------------------------------------------------------------- /workspace_conf.yml: -------------------------------------------------------------------------------- 1 | subscription_id: "0ca618d2-22a8-413a-96d0-0f1b531129c3" 2 | resource_group: "dciborow-lap-test" 3 | workspace_name: "dciborowlapws" 4 | workspace_region: "westus" 5 | --------------------------------------------------------------------------------