├── walgreens-boots-demo ├── README.md ├── conda.yml └── wba-demo.ipynb ├── .gitignore └── README.md /walgreens-boots-demo/README.md: -------------------------------------------------------------------------------- 1 | # Walgrens Boots Alliance Demo 2 | 3 | ## Introduction 4 | 5 | Boots UK, trading as Boots, is a health and beauty retailer and pharmacy chain in the United Kingdom, Ireland, other territories. The company is part of the Walgreens Boots Alliance, which also owns the US-based Walgreens retailer. 6 | 7 | Their loyalty program, the Boots Advantage Card, offers personalized offers to over 14.4 million registered customers. These are typically based on product categories or specific brands. However, they have a problem: their data science team is struggling to build specific, tuned models for every possible promotion leading to suboptimal or missed opportunities for customer recommendations. 8 | 9 | In this demo, we'll use a simulated data set to see how Azure Machine Learning with Automated Machine Learning can help solve this challenge. 10 | 11 | ## Getting Started 12 | 13 | For the full demo experience, we recommend the following: 14 | 15 | 1. Create a new Azure Machine Learning workspace in the Azure Portal. 16 | 1. Create a new Notebook VM from your AML workspace. 17 | 1. Open Jupyter Notebooks on your new Notebook VM by clicking the link the Azure Portal. 18 | 1. Open a terminal within Jupyer Notebooks and clone this repository. 19 | 1. Create a new Conda environment with the `conda.yml` file (i.e. `conda create -n wba-demo -f ./conda.yml`). 20 | 1. Open the `wba-demo.ipynb`, switch to the new Python kernel (i.e. `wba-demo`), and follow the enclosed instructions. 21 | 22 | **Media Elements and Templates.** You may copy and use images, clip art, animations, sounds, music, shapes, video clips and templates provided with the sample application and identified for such use in documents and projects that you create using the sample application. These use rights only apply to your use of the sample application and you may not redistribute such media otherwise. -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | 2 | # Created by https://www.gitignore.io/api/python,visualstudiocode,jupyternotebooks 3 | # Edit at https://www.gitignore.io/?templates=python,visualstudiocode,jupyternotebooks 4 | 5 | ### JupyterNotebooks ### 6 | # gitignore template for Jupyter Notebooks 7 | # website: http://jupyter.org/ 8 | 9 | .ipynb_checkpoints 10 | */.ipynb_checkpoints/* 11 | 12 | # Remove previous ipynb_checkpoints 13 | # git rm -r .ipynb_checkpoints/ 14 | # 15 | 16 | ### Python ### 17 | # Byte-compiled / optimized / DLL files 18 | __pycache__/ 19 | *.py[cod] 20 | *$py.class 21 | 22 | # C extensions 23 | *.so 24 | 25 | # Distribution / packaging 26 | .Python 27 | build/ 28 | develop-eggs/ 29 | dist/ 30 | downloads/ 31 | eggs/ 32 | .eggs/ 33 | lib/ 34 | lib64/ 35 | parts/ 36 | sdist/ 37 | var/ 38 | wheels/ 39 | pip-wheel-metadata/ 40 | share/python-wheels/ 41 | *.egg-info/ 42 | .installed.cfg 43 | *.egg 44 | MANIFEST 45 | 46 | # PyInstaller 47 | # Usually these files are written by a python script from a template 48 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 49 | *.manifest 50 | *.spec 51 | 52 | # Installer logs 53 | pip-log.txt 54 | pip-delete-this-directory.txt 55 | 56 | # Unit test / coverage reports 57 | htmlcov/ 58 | .tox/ 59 | .nox/ 60 | .coverage 61 | .coverage.* 62 | .cache 63 | nosetests.xml 64 | coverage.xml 65 | *.cover 66 | .hypothesis/ 67 | .pytest_cache/ 68 | 69 | # Translations 70 | *.mo 71 | *.pot 72 | 73 | # Django stuff: 74 | *.log 75 | local_settings.py 76 | db.sqlite3 77 | 78 | # Flask stuff: 79 | instance/ 80 | .webassets-cache 81 | 82 | # Scrapy stuff: 83 | .scrapy 84 | 85 | # Sphinx documentation 86 | docs/_build/ 87 | 88 | # PyBuilder 89 | target/ 90 | 91 | # Jupyter Notebook 92 | 93 | # IPython 94 | profile_default/ 95 | ipython_config.py 96 | 97 | # pyenv 98 | .python-version 99 | 100 | # pipenv 101 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 102 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 103 | # having no cross-platform support, pipenv may install dependencies that don’t work, or not 104 | # install all needed dependencies. 105 | #Pipfile.lock 106 | 107 | # celery beat schedule file 108 | celerybeat-schedule 109 | 110 | # SageMath parsed files 111 | *.sage.py 112 | 113 | # Environments 114 | .env 115 | .venv 116 | env/ 117 | venv/ 118 | ENV/ 119 | env.bak/ 120 | venv.bak/ 121 | 122 | # Spyder project settings 123 | .spyderproject 124 | .spyproject 125 | 126 | # Rope project settings 127 | .ropeproject 128 | 129 | # mkdocs documentation 130 | /site 131 | 132 | # mypy 133 | .mypy_cache/ 134 | .dmypy.json 135 | dmypy.json 136 | 137 | # Pyre type checker 138 | .pyre/ 139 | 140 | ### VisualStudioCode ### 141 | .vscode/* 142 | !.vscode/settings.json 143 | !.vscode/tasks.json 144 | !.vscode/launch.json 145 | !.vscode/extensions.json 146 | 147 | ### VisualStudioCode Patch ### 148 | # Ignore all local history of files 149 | .history 150 | 151 | # End of https://www.gitignore.io/api/python,visualstudiocode,jupyternotebooks -------------------------------------------------------------------------------- /walgreens-boots-demo/conda.yml: -------------------------------------------------------------------------------- 1 | name: wba 2 | channels: 3 | - defaults 4 | dependencies: 5 | - attrs=19.1.0 6 | - backcall=0.1.0 7 | - blas=1.0 8 | - bleach=3.1.0 9 | - ca-certificates=2019.1.23 10 | - certifi=2019.3.9 11 | - cycler=0.10.0 12 | - dbus=1.13.6 13 | - decorator=4.4.0 14 | - defusedxml=0.5.0 15 | - entrypoints=0.3 16 | - expat=2.2.6 17 | - freetype=2.9.1 18 | - gettext=0.19.8.1 19 | - glib=2.56.2 20 | - icu=58.2 21 | - intel-openmp=2019.3 22 | - ipykernel=5.1.0 23 | - ipython=7.4.0 24 | - ipython_genutils=0.2.0 25 | - ipywidgets=7.4.2 26 | - jedi=0.13.3 27 | - jinja2=2.10.1 28 | - jpeg=9b 29 | - jsonschema=3.0.1 30 | - jupyter=1.0.0 31 | - jupyter_client=5.2.4 32 | - jupyter_console=6.0.0 33 | - jupyter_core=4.4.0 34 | - kiwisolver=1.0.1 35 | - libedit=3.1.20181209 36 | - libffi=3.2.1 37 | - libiconv=1.15 38 | - libpng=1.6.36 39 | - libsodium=1.0.16 40 | - markupsafe=1.1.1 41 | - matplotlib=3.0.3 42 | - mistune=0.8.4 43 | - mkl=2019.3 44 | - mkl_fft=1.0.10 45 | - mkl_random=1.0.2 46 | - nbconvert=5.4.1 47 | - nbformat=4.4.0 48 | - ncurses=6.1 49 | - notebook=5.7.8 50 | - openssl=1.1.1b 51 | - pandoc=2.2.3.2 52 | - pandocfilters=1.4.2 53 | - parso=0.4.0 54 | - pcre=8.43 55 | - pexpect=4.7.0 56 | - pickleshare=0.7.5 57 | - pip=19.0.3 58 | - prometheus_client=0.6.0 59 | - prompt_toolkit=2.0.9 60 | - ptyprocess=0.6.0 61 | - pygments=2.3.1 62 | - pyparsing=2.4.0 63 | - pyqt=5.9.2 64 | - pyrsistent=0.14.11 65 | - python=3.6.8 66 | - python-dateutil=2.8.0 67 | - pytz=2019.1 68 | - pyzmq=18.0.0 69 | - qt=5.9.7 70 | - qtconsole=4.4.3 71 | - readline=7.0 72 | - send2trash=1.5.0 73 | - setuptools=41.0.0 74 | - sip=4.19.8 75 | - six=1.12.0 76 | - sqlite=3.28.0 77 | - terminado=0.8.2 78 | - testpath=0.4.2 79 | - tk=8.6.8 80 | - tornado=6.0.2 81 | - traitlets=4.3.2 82 | - wcwidth=0.1.7 83 | - webencodings=0.5.1 84 | - widgetsnbextension=3.4.2 85 | - xz=5.2.4 86 | - zeromq=4.3.1 87 | - zlib=1.2.11 88 | - pip: 89 | - adal==1.2.1 90 | - applicationinsights==0.11.8 91 | - asn1crypto==0.24.0 92 | - azure-common==1.1.19 93 | - azure-graphrbac==0.61.0 94 | - azure-mgmt-authorization==0.51.1 95 | - azure-mgmt-containerregistry==2.7.0 96 | - azure-mgmt-keyvault==1.1.0 97 | - azure-mgmt-nspkg==3.0.2 98 | - azure-mgmt-resource==2.1.0 99 | - azure-mgmt-storage==3.1.1 100 | - azure-nspkg==3.0.2 101 | - azureml-contrib-explain-model==1.0.33 102 | - azureml-contrib-notebook==1.0.33 103 | - azureml-core==1.0.33.1 104 | - azureml-dataprep==1.1.2 105 | - azureml-dataprep-native==12.0.1 106 | - azureml-explain-model==1.0.33 107 | - azureml-pipeline==1.0.33 108 | - azureml-pipeline-core==1.0.33 109 | - azureml-pipeline-steps==1.0.33 110 | - azureml-sdk==1.0.33 111 | - azureml-telemetry==1.0.33 112 | - azureml-train==1.0.33 113 | - azureml-train-automl==1.0.33 114 | - azureml-train-core==1.0.33 115 | - azureml-train-restclients-hyperdrive==1.0.33 116 | - azureml-widgets==1.0.33 117 | - backports-tempfile==1.0 118 | - backports-weakref==1.0.post1 119 | - bcrypt==3.1.6 120 | - boto==2.49.0 121 | - boto3==1.9.134 122 | - botocore==1.12.134 123 | - bz2file==0.98 124 | - cffi==1.12.3 125 | - chardet==3.0.4 126 | - contextlib2==0.5.5 127 | - cryptography==2.6.1 128 | - dill==0.2.9 129 | - distro==1.4.0 130 | - docker==3.7.2 131 | - docker-pycreds==0.4.0 132 | - docutils==0.14 133 | - dotnetcore2==2.1.8 134 | - gensim==3.7.2 135 | - h5py==2.9.0 136 | - idna==2.8 137 | - imageio==2.5.0 138 | - isodate==0.6.0 139 | - jeepney==0.4 140 | - jmespath==0.9.4 141 | - jsonform==0.0.2 142 | - jsonpickle==1.1 143 | - jsonsir==0.0.2 144 | - keras==2.2.4 145 | - keras-applications==1.0.7 146 | - keras-preprocessing==1.0.9 147 | - keras2onnx==1.3.2 148 | - lightgbm==2.2.1 149 | - msrest==0.6.6 150 | - msrestazure==0.6.0 151 | - ndg-httpsclient==0.5.1 152 | - networkx==2.3 153 | - nimbusml==0.6.5 154 | - numpy==1.16.2 155 | - oauthlib==3.0.1 156 | - onnx==1.4.1 157 | - onnxmltools==1.4.0 158 | - onnxtk==0.0.1 159 | - pandas==0.24.2 160 | - paramiko==2.4.2 161 | - pathspec==0.5.9 162 | - pillow==6.0.0 163 | - protobuf==3.7.1 164 | - pyasn1==0.4.5 165 | - pycparser==2.19 166 | - pyjwt==1.7.1 167 | - pynacl==1.3.0 168 | - pyopenssl==19.0.0 169 | - python-easyconfig==0.1.7 170 | - pywavelets==1.0.3 171 | - pyyaml==5.1 172 | - requests==2.21.0 173 | - requests-oauthlib==1.2.0 174 | - resource==0.2.1 175 | - ruamel-yaml==0.15.89 176 | - s3transfer==0.2.0 177 | - scikit-image==0.15.0 178 | - scikit-learn==0.20.3 179 | - scipy==1.1.0 180 | - seaborn==0.9.0 181 | - secretstorage==3.1.1 182 | - shap==0.28.5 183 | - skl2onnx==1.4.5 184 | - sklearn-pandas==1.7.0 185 | - smart-open==1.8.2 186 | - tf2onnx==1.4.1 187 | - tqdm==4.31.1 188 | - typing==3.6.6 189 | - typing-extensions==3.7.2 190 | - urllib3==1.25 191 | - websocket-client==0.56.0 192 | - wheel==0.30.0 193 | 194 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Microsoft Build 2019 - AI Track recordings and demo resources 2 | 3 | ## Azure AI Keynote 4 | 5 | BRK2006 Azure AI: Powering AI for every developer and every organization 6 | 7 | [Recording](https://mybuild.techcommunity.microsoft.com/sessions/76980?source=sessions#top-anchor) 8 | 9 | [Demo code LaLiga](https://github.com/trojenguri/LaligaBot) 10 | 11 | [Demo code Walgreens Boots Alliance](https://github.com/buildaidemos/demos/tree/master/walgreens-boots-demo) 12 | 13 | ## Machine Learning - Azure Machine Learning service 14 | 15 | BRK2004 Breaking the Wall between Data Scientists and App Developers with MLOps 16 | 17 | [Recording](https://mybuild.techcommunity.microsoft.com/sessions/76973?source=sessions#top-anchor) 18 | 19 | [Demo code](https://github.com/Microsoft/MLOps) 20 | 21 | BRK2005 Want to *actually* do machine learning? Wrangle data, build models, and deploy them with Azure Machine Learning 22 | 23 | [Recording](https://mybuild.techcommunity.microsoft.com/sessions/76974?source=sessions#top-anchor) 24 | 25 | BRK3008 Build “zero code” machine learning models with visual workflow capabilities in Azure Machine Learning service 26 | 27 | [Recording](https://mybuild.techcommunity.microsoft.com/sessions/76972?source=sessions#top-anchor) 28 | 29 | BRK3009 From Zero to AI Hero–Automatically generate ML models using Azure Machine Learning service, Automated ML 30 | 31 | [Recording](https://mybuild.techcommunity.microsoft.com/sessions/76975?source=sessions#top-anchor) 32 | 33 | [Demo code](https://notebooks.azure.com/frlazzeri/projects/automatedml-ms-build) 34 | 35 | BRK3010 Managing your ML lifecycle with Azure Databricks and Azure ML 36 | 37 | [Recording](https://mybuild.techcommunity.microsoft.com/sessions/76976?source=sessions#top-anchor) 38 | 39 | BRK3011 Welcome to the world of Machine Learning with ML.NET 1.0 40 | 41 | [Recording](https://mybuild.techcommunity.microsoft.com/sessions/76977?source=sessions#top-anchor) 42 | 43 | [Demo code](https://github.com/dotnet/machinelearning-samples) 44 | 45 | BRK3012 Open Neural Network Exchange (ONNX) in the enterprise: how Microsoft scales ML across the world and across devices 46 | 47 | [Recording](https://mybuild.techcommunity.microsoft.com/sessions/76978?source=sessions#top-anchor) 48 | 49 | BRK3013 How to build enterprise ready ML: Privacy and Security best practices, in the cloud and on the edge 50 | 51 | [Recording](https://mybuild.techcommunity.microsoft.com/sessions/76979?source=sessions#top-anchor) 52 | 53 | BRK3014 Build an AI-powered Pet Detector with Python, TensorFlow, and Visual Studio Code 54 | 55 | [Recording](https://mybuild.techcommunity.microsoft.com/sessions/76981?source=sessions#top-anchor) 56 | 57 | [Demo code](https://github.com/Microsoft/connect-petdetector) 58 | 59 | ## Knowledge Mining - Azure Search and Form Recognizer 60 | 61 | BRK2001 Introducing AI-driven content understanding with Cognitive Search and Cognitive Services 62 | 63 | [Recording](https://mybuild.techcommunity.microsoft.com/sessions/76961?source=sessions#top-anchor) 64 | 65 | BRK2002 Announcing Form Recognizer: Create real value in your business processes by automating extraction of text, key value pairs and tables from forms, and easily customizing state-of-the-art AI 66 | 67 | [Recording](https://mybuild.techcommunity.microsoft.com/sessions/76962?source=sessions#top-anchor) 68 | 69 | [Demo code](https://formdemoux.azurewebsites.net/) 70 | 71 | BRK3001 Unlock Knowledge Mining on your domain: build custom skills to tailor content understanding to your industry 72 | 73 | [Recording](https://mybuild.techcommunity.microsoft.com/sessions/76963?source=sessions#top-anchor) 74 | 75 | [Demo code 1](http://aka.ms/KmSolutions) 76 | 77 | [Demo code 2](https://aka.ms/seazcs) 78 | 79 | BRK3002 Try this one weird AI trick on your data. Turn any data into structured knowledge using the new Knowledge Mining capabilities of the Azure AI platform 80 | 81 | [Recording](https://mybuild.techcommunity.microsoft.com/sessions/76964?source=sessions#top-anchor) 82 | 83 | [Demo code](http://aka.ms/build2019kmdemo) 84 | 85 | 86 | ## AI apps and agents - Azure Cognitive Services and Azure Bot Service 87 | 88 | BRK2003 Designing AI Responsibly 89 | 90 | [Recording](https://mybuild.techcommunity.microsoft.com/sessions/76971?source=sessions#top-anchor) 91 | 92 | [Demo code 1](https://github.com/microsoft/interpret) 93 | 94 | [Demo code 2](https://github.com/microsoft/seal-demo) 95 | 96 | BRK3003 How to use Azure Conversational AI to scale your business for the next generation- A deep dive into La Liga’s story 97 | 98 | [Recording](https://mybuild.techcommunity.microsoft.com/sessions/76965?source=sessions#top-anchor) 99 | 100 | [Demo code 1](https://github.com/trojenguri/LaligaBot) 101 | 102 | [Demo code 2](https://github.com/microsoft/ailab/tree/master/GoogleAssistantConnector) 103 | 104 | BRK3004 How to build enterprise ready, scalable AI solutions using Azure Cognitive Services 105 | 106 | [Recording](https://mybuild.techcommunity.microsoft.com/sessions/76966?source=sessions#top-anchor) 107 | 108 | [Demo code 1](https://github.com/Azure/mmlspark) 109 | 110 | [Demo code 2](https://github.com/Microsoft/Cognitive-Samples-IntelligentKiosk) 111 | 112 | BRK3005 5 industries that are getting disrupted by Computer Vision on Cloud and on Edge 113 | 114 | [Recording](https://mybuild.techcommunity.microsoft.com/sessions/76967?source=sessions#top-anchor) 115 | 116 | [Demo code](https://github.com/CarlosP-MS/Cognitive-Services-Vision-Solution-Templates) 117 | 118 | BRK3006 What’s new in Speech Services and how to utilize them to build speech-enabled scenarios and solutions 119 | 120 | [Recording](https://mybuild.techcommunity.microsoft.com/sessions/76968?source=sessions#top-anchor) 121 | 122 | BRK3007 Deliver the Right Experiences & Content like Xbox with Cognitive Services Personalizer 123 | 124 | [Recording](https://mybuild.techcommunity.microsoft.com/sessions/76970?source=sessions#top-anchor) 125 | 126 | [Demo code 1](https://github.com/Azure-Samples/cognitive-services-personalizer-samples) 127 | 128 | [Demo code 2](https://github.com/Azure-Samples/cognitive-services-personalizer-samples/tree/master/demos/PersonalizerDemo) 129 | 130 | -------------------------------------------------------------------------------- /walgreens-boots-demo/wba-demo.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Walgreens Boots Alliance Demo\n", 8 | "\n", 9 | "## Setup your environment\n", 10 | "\n", 11 | "Before running any cells, make sure you have:\n", 12 | "* Created your conda environment as per the README.md, \n", 13 | "* Selected your new conda environment to run this notebook (from the Kernel menu).\n", 14 | "* Updated the values as indicated in the cell below." 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": null, 20 | "metadata": {}, 21 | "outputs": [], 22 | "source": [ 23 | "# Global constants\n", 24 | "subscription_id = '' \n", 25 | "resource_group = ''\n", 26 | "workspace_name = ''\n", 27 | "experiment_name = 'category-based-propensity'\n", 28 | "cluster_name = 'wba-cluster'\n", 29 | "project_folder = 'scripts'" 30 | ] 31 | }, 32 | { 33 | "cell_type": "code", 34 | "execution_count": null, 35 | "metadata": {}, 36 | "outputs": [], 37 | "source": [ 38 | "# Key open source data analysis packages\n", 39 | "import json\n", 40 | "import logging\n", 41 | "import matplotlib.pyplot as plt\n", 42 | "import numpy as np\n", 43 | "import os\n", 44 | "import pandas as pd\n", 45 | "import re\n", 46 | "import seaborn as sns\n", 47 | "\n", 48 | "%matplotlib inline\n", 49 | "sns.set(color_codes='True')" 50 | ] 51 | }, 52 | { 53 | "cell_type": "code", 54 | "execution_count": null, 55 | "metadata": {}, 56 | "outputs": [], 57 | "source": [ 58 | "# Working directories\n", 59 | "if not os.path.exists(project_folder):\n", 60 | " os.makedirs(project_folder)" 61 | ] 62 | }, 63 | { 64 | "cell_type": "markdown", 65 | "metadata": {}, 66 | "source": [ 67 | "## Exploring our dataset\n", 68 | "\n", 69 | "Let's take a look at our dataset before building our models." 70 | ] 71 | }, 72 | { 73 | "cell_type": "code", 74 | "execution_count": null, 75 | "metadata": {}, 76 | "outputs": [], 77 | "source": [ 78 | "# Read the data in to analyze\n", 79 | "df = pd.read_csv('./data.csv')" 80 | ] 81 | }, 82 | { 83 | "cell_type": "code", 84 | "execution_count": null, 85 | "metadata": {}, 86 | "outputs": [], 87 | "source": [ 88 | "# Preview available columns\n", 89 | "df.head(10)" 90 | ] 91 | }, 92 | { 93 | "cell_type": "code", 94 | "execution_count": null, 95 | "metadata": {}, 96 | "outputs": [], 97 | "source": [ 98 | "# Analyze distribution of ages in the dataset\n", 99 | "sns.distplot(df[['AGE']], bins=[10,20,30,40,50,60,70,80,90,100])" 100 | ] 101 | }, 102 | { 103 | "cell_type": "code", 104 | "execution_count": null, 105 | "metadata": {}, 106 | "outputs": [], 107 | "source": [ 108 | "# Analyze distribution of spend in category #1\n", 109 | "sns.distplot(df[('CATEGORY_1_SPEND')])" 110 | ] 111 | }, 112 | { 113 | "cell_type": "code", 114 | "execution_count": null, 115 | "metadata": {}, 116 | "outputs": [], 117 | "source": [ 118 | "# Analyze how age influences whether customers have responded to category #1 campaigns\n", 119 | "g = sns.FacetGrid(df, col='BOUGHT_CATEGORY_1')\n", 120 | "g.map(sns.distplot, 'AGE')" 121 | ] 122 | }, 123 | { 124 | "cell_type": "code", 125 | "execution_count": null, 126 | "metadata": {}, 127 | "outputs": [], 128 | "source": [ 129 | "# Analyze how gender influences whether customers have responded to category #1 campaigns\n", 130 | "g = sns.FacetGrid(df, col='BOUGHT_CATEGORY_1')\n", 131 | "g.map(sns.countplot, 'GENDER')" 132 | ] 133 | }, 134 | { 135 | "cell_type": "code", 136 | "execution_count": null, 137 | "metadata": {}, 138 | "outputs": [], 139 | "source": [ 140 | "# Analyze how age and category #1 & #2 spend influences responding to category #1 campaigns\n", 141 | "sns.pairplot(df[['AGE', 'CATEGORY_1_SPEND', 'CATEGORY_2_SPEND', 'BOUGHT_CATEGORY_1']], hue='BOUGHT_CATEGORY_1')" 142 | ] 143 | }, 144 | { 145 | "cell_type": "markdown", 146 | "metadata": {}, 147 | "source": [ 148 | "## Setup Azure Machine Learning\n", 149 | "\n", 150 | "Let's connect, provision our compute, and upload our data." 151 | ] 152 | }, 153 | { 154 | "cell_type": "code", 155 | "execution_count": null, 156 | "metadata": {}, 157 | "outputs": [], 158 | "source": [ 159 | "# Setup Azure Machine Learning\n", 160 | "from azureml.core import Run\n", 161 | "from azureml.core.compute import AksCompute, ComputeTarget, AmlCompute\n", 162 | "from azureml.core.compute_target import ComputeTargetException\n", 163 | "from azureml.core.conda_dependencies import CondaDependencies\n", 164 | "from azureml.core.container_registry import ContainerRegistry\n", 165 | "from azureml.core.experiment import Experiment\n", 166 | "from azureml.core.runconfig import DataReferenceConfiguration, RunConfiguration\n", 167 | "from azureml.core.webservice import AciWebservice\n", 168 | "from azureml.core.workspace import Workspace\n", 169 | "from azureml.data.data_reference import DataReference\n", 170 | "from azureml.pipeline.core import Pipeline, PipelineData, PublishedPipeline, PipelineRun, Schedule, TrainingOutput\n", 171 | "from azureml.pipeline.core.graph import PipelineParameter\n", 172 | "from azureml.pipeline.steps import PythonScriptStep\n", 173 | "from azureml.train.automl import AutoMLConfig, AutoMLStep\n", 174 | "from azureml.train.automl.automlexplainer import retrieve_model_explanation\n", 175 | "from azureml.train.automl.run import AutoMLRun\n", 176 | "from azureml.widgets import RunDetails\n", 177 | "\n", 178 | "import azureml\n", 179 | "\n", 180 | "# Connect to Azure Machine Learning\n", 181 | "try:\n", 182 | " ws = Workspace.from_config()\n", 183 | "except:\n", 184 | " ws = Workspace(subscription_id = subscription_id,\n", 185 | " resource_group = resource_group,\n", 186 | " workspace_name = workspace_name)\n", 187 | " ws.write_config()\n", 188 | " \n", 189 | " print('Workspace config file written')\n", 190 | " \n", 191 | "output = {}\n", 192 | "output['SDK version'] = azureml.core.VERSION\n", 193 | "output['Subscription ID'] = ws.subscription_id\n", 194 | "output['Workspace'] = ws.name\n", 195 | "output['Resource Group'] = ws.resource_group\n", 196 | "output['Location'] = ws.location\n", 197 | "pd.set_option('display.max_colwidth', -1)\n", 198 | "pd.DataFrame(data=output, index=['']).T" 199 | ] 200 | }, 201 | { 202 | "cell_type": "code", 203 | "execution_count": null, 204 | "metadata": {}, 205 | "outputs": [], 206 | "source": [ 207 | "# Provision a compute target\n", 208 | "try:\n", 209 | " compute_target = ComputeTarget(workspace=ws, name=cluster_name)\n", 210 | " print('Found existing compute target')\n", 211 | "except ComputeTargetException:\n", 212 | " print('Creating a new compute target...')\n", 213 | " compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_DS12_V2',\n", 214 | " min_nodes=0,\n", 215 | " max_nodes=4)\n", 216 | "\n", 217 | " compute_target = ComputeTarget.create(ws, cluster_name, compute_config)\n", 218 | " compute_target.wait_for_completion(show_output=True)\n", 219 | "\n", 220 | "compute_target.status.serialize()" 221 | ] 222 | }, 223 | { 224 | "cell_type": "code", 225 | "execution_count": null, 226 | "metadata": {}, 227 | "outputs": [], 228 | "source": [ 229 | "# Upload our data\n", 230 | "ds = ws.get_default_datastore()\n", 231 | "ds.upload_files(['./data.csv'], target_path = 'boots', overwrite=True)" 232 | ] 233 | }, 234 | { 235 | "cell_type": "markdown", 236 | "metadata": {}, 237 | "source": [ 238 | "## Experiment with Automated ML\n", 239 | "\n", 240 | "Let's submit a training run using our data and Automated ML." 241 | ] 242 | }, 243 | { 244 | "cell_type": "code", 245 | "execution_count": null, 246 | "metadata": {}, 247 | "outputs": [], 248 | "source": [ 249 | "%%writefile $project_folder/get_data.py\n", 250 | "\n", 251 | "import pandas as pd\n", 252 | "from sklearn.preprocessing import LabelEncoder\n", 253 | "\n", 254 | "def get_data():\n", 255 | " df = pd.read_csv('/tmp/azureml_runs/boots/data.csv')\n", 256 | "\n", 257 | " le = LabelEncoder()\n", 258 | " le.fit(df['BOUGHT_CATEGORY_1'].values)\n", 259 | " y = le.transform(df['BOUGHT_CATEGORY_1'].values)\n", 260 | "\n", 261 | " df = df.drop(['BOUGHT_CATEGORY_1'], axis=1)\n", 262 | "\n", 263 | " return { \"X\" : df, \"y\" : y }" 264 | ] 265 | }, 266 | { 267 | "cell_type": "code", 268 | "execution_count": null, 269 | "metadata": {}, 270 | "outputs": [], 271 | "source": [ 272 | "# Define the experiment\n", 273 | "experiment = Experiment(ws, experiment_name)" 274 | ] 275 | }, 276 | { 277 | "cell_type": "code", 278 | "execution_count": null, 279 | "metadata": {}, 280 | "outputs": [], 281 | "source": [ 282 | "# Create our run configuration including our data source reference and base image configuration\n", 283 | "dr = DataReferenceConfiguration(datastore_name=ds.name, \n", 284 | " path_on_compute='/tmp/azureml_runs',\n", 285 | " path_on_datastore='boots',\n", 286 | " mode='download',\n", 287 | " overwrite=False)\n", 288 | "\n", 289 | "run_config = RunConfiguration(framework=\"python\")\n", 290 | "run_config.target = compute_target\n", 291 | "run_config.data_references = {ds.name: dr}\n", 292 | "run_config.environment.docker.enabled = True\n", 293 | "run_config.environment.docker.base_image = azureml.core.runconfig.DEFAULT_CPU_IMAGE\n", 294 | "run_config.environment.python.conda_dependencies = CondaDependencies.create(pip_packages=['azureml-sdk[automl]','azureml-explain-model'], conda_packages=['numpy','py-xgboost<=0.80'])\n", 295 | "\n", 296 | "automl_config = AutoMLConfig(task = 'classification',\n", 297 | " iterations = 25,\n", 298 | " iteration_timeout_minutes = 5, \n", 299 | " max_cores_per_iteration = 2,\n", 300 | " max_concurrent_iterations = 4,\n", 301 | " primary_metric = 'accuracy',\n", 302 | " data_script = project_folder + '/get_data.py',\n", 303 | " run_configuration = run_config,\n", 304 | " path = project_folder,\n", 305 | " n_cross_validations = 2,\n", 306 | " preprocess = True)" 307 | ] 308 | }, 309 | { 310 | "cell_type": "code", 311 | "execution_count": null, 312 | "metadata": {}, 313 | "outputs": [], 314 | "source": [ 315 | "remote_run = experiment.submit(automl_config, show_output=False)\n", 316 | "remote_run" 317 | ] 318 | }, 319 | { 320 | "cell_type": "code", 321 | "execution_count": null, 322 | "metadata": {}, 323 | "outputs": [], 324 | "source": [ 325 | "remote_run.wait_for_completion(show_output=True)" 326 | ] 327 | }, 328 | { 329 | "cell_type": "markdown", 330 | "metadata": {}, 331 | "source": [ 332 | "# Review our results\n", 333 | "\n", 334 | "Once the experiment completes, let's review the results." 335 | ] 336 | }, 337 | { 338 | "cell_type": "code", 339 | "execution_count": null, 340 | "metadata": {}, 341 | "outputs": [], 342 | "source": [ 343 | "# Run details capture configuration and exact Git commit used for the run\n", 344 | "remote_run_df = pd.read_json('[' + json.dumps(remote_run.get_details()['properties']) + ']', orient='columns')\n", 345 | "remote_run_df[['azureml.git.branch','azureml.git.commit','azureml.git.repository_uri']].T" 346 | ] 347 | }, 348 | { 349 | "cell_type": "code", 350 | "execution_count": null, 351 | "metadata": {}, 352 | "outputs": [], 353 | "source": [ 354 | "# Easily explore results using interactive widgets\n", 355 | "RunDetails(remote_run).show()" 356 | ] 357 | }, 358 | { 359 | "cell_type": "code", 360 | "execution_count": null, 361 | "metadata": {}, 362 | "outputs": [], 363 | "source": [ 364 | "# Programmatically find the best model based on different metrics\n", 365 | "lookup_metric = 'accuracy'\n", 366 | "best_run, fitted_model = remote_run.get_output(metric = lookup_metric)\n", 367 | "print(best_run)\n", 368 | "print(fitted_model)" 369 | ] 370 | }, 371 | { 372 | "cell_type": "markdown", 373 | "metadata": {}, 374 | "source": [ 375 | "# Publish our model\n", 376 | "\n", 377 | "Once we've selected our preferred model, we can register it for management (and optional deployment)." 378 | ] 379 | }, 380 | { 381 | "cell_type": "code", 382 | "execution_count": null, 383 | "metadata": {}, 384 | "outputs": [], 385 | "source": [ 386 | "# Store the preferred model for your team to use\n", 387 | "model = best_run.register_model(model_name = 'category_1_model.pkl',\n", 388 | " model_path = 'outputs/model.pkl',\n", 389 | " tags = {'area': 'CATEGORY 1', 'type': 'classification'})\n", 390 | "print(model.name, model.version)" 391 | ] 392 | }, 393 | { 394 | "cell_type": "code", 395 | "execution_count": null, 396 | "metadata": {}, 397 | "outputs": [], 398 | "source": [ 399 | "# Captures training details\n", 400 | "model.serialize()" 401 | ] 402 | }, 403 | { 404 | "cell_type": "markdown", 405 | "metadata": {}, 406 | "source": [ 407 | "# Build a pipeline\n", 408 | "\n", 409 | "We can build an AML pipeline to make our experiment easy to re-run as data changes." 410 | ] 411 | }, 412 | { 413 | "cell_type": "code", 414 | "execution_count": null, 415 | "metadata": {}, 416 | "outputs": [], 417 | "source": [ 418 | "%%writefile $project_folder/register.py\n", 419 | "\n", 420 | "from azureml.core import Workspace\n", 421 | "from azureml.core.model import Model\n", 422 | "from azureml.core import Run\n", 423 | "import argparse\n", 424 | "import json\n", 425 | "\n", 426 | "if __name__ == '__main__':\n", 427 | " parser = argparse.ArgumentParser()\n", 428 | " parser.add_argument(\n", 429 | " '--model_name',\n", 430 | " type=str,\n", 431 | " default='',\n", 432 | " help='Variant name you want to give to the model.'\n", 433 | " )\n", 434 | " parser.add_argument(\n", 435 | " '--model_path',\n", 436 | " type=str,\n", 437 | " default='outputs',\n", 438 | " help='Location of trained model.'\n", 439 | " )\n", 440 | "\n", 441 | " args, unparsed = parser.parse_known_args()\n", 442 | " print(args.model_name)\n", 443 | " print(args.model_path)\n", 444 | " \n", 445 | " run = Run.get_context()\n", 446 | " ws = run.experiment.workspace\n", 447 | " \n", 448 | " tags = {\n", 449 | " \"runId\": str(run.id)\n", 450 | " }\n", 451 | "\n", 452 | " print(json.dumps(tags))\n", 453 | "\n", 454 | " model = Model.register(ws, model_name = args.model_name, model_path = args.model_path, tags=tags)\n", 455 | "\n", 456 | " print('Model registered: {} \\nModel Description: {} \\nModel Version: {}'.format(model.name, model.description, model.version))" 457 | ] 458 | }, 459 | { 460 | "cell_type": "code", 461 | "execution_count": null, 462 | "metadata": {}, 463 | "outputs": [], 464 | "source": [ 465 | "# Re-use our experiment configuration\n", 466 | "input_data = DataReference(datastore=ds, \n", 467 | " data_reference_name='training_data',\n", 468 | " path_on_datastore='boots',\n", 469 | " mode='download',\n", 470 | " path_on_compute='/tmp/azureml_runs',\n", 471 | " overwrite=True)\n", 472 | "\n", 473 | "run_config = RunConfiguration(framework=\"python\")\n", 474 | "run_config.environment.docker.enabled = True\n", 475 | "run_config.environment.docker.base_image = azureml.core.runconfig.DEFAULT_CPU_IMAGE\n", 476 | "run_config.environment.python.conda_dependencies = CondaDependencies.create(pip_packages=['azureml-sdk[automl]'], conda_packages=['numpy','py-xgboost<=0.80'])" 477 | ] 478 | }, 479 | { 480 | "cell_type": "code", 481 | "execution_count": null, 482 | "metadata": {}, 483 | "outputs": [], 484 | "source": [ 485 | "# Build a pipeline\n", 486 | "steps = []\n", 487 | "\n", 488 | "# These are the two outputs from AutoML\n", 489 | "metrics_data = PipelineData(name='metrics_data_category_1',\n", 490 | " datastore=ds,\n", 491 | " pipeline_output_name='metrics_output_category_1',\n", 492 | " training_output=TrainingOutput(type='Metrics'))\n", 493 | "\n", 494 | "model_data = PipelineData(name='model_data_category_1'.format(cat),\n", 495 | " datastore=ds,\n", 496 | " pipeline_output_name='best_model_output_category_1',\n", 497 | " training_output=TrainingOutput(type='Model'))\n", 498 | "\n", 499 | "# AutoML config (note different data files for each model so it's not shared)\n", 500 | "automl_config = AutoMLConfig(task = 'classification',\n", 501 | " iterations = 25,\n", 502 | " iteration_timeout_minutes = 5, \n", 503 | " max_cores_per_iteration = 2,\n", 504 | " max_concurrent_iterations = 4,\n", 505 | " primary_metric = 'accuracy',\n", 506 | " data_script = '{}/get_data.py',\n", 507 | " run_configuration = run_config,\n", 508 | " compute_target = compute_target,\n", 509 | " path = project_folder,\n", 510 | " n_cross_validations = 2,\n", 511 | " preprocess = True)\n", 512 | "\n", 513 | "# AutoML action\n", 514 | "automl_step = AutoMLStep(name='automl_module_category_1',\n", 515 | " automl_config=automl_config,\n", 516 | " inputs=[input_data],\n", 517 | " outputs=[metrics_data, model_data],\n", 518 | " allow_reuse=False)\n", 519 | "\n", 520 | "# Custom script action to register the model afterwards\n", 521 | "register_step = PythonScriptStep(name='register_category_1',\n", 522 | " script_name='register.py',\n", 523 | " compute_target=compute_target,\n", 524 | " source_directory=project_folder,\n", 525 | " arguments=['--model_name', 'category_1_model.pkl', '--model_path', model_data],\n", 526 | " inputs=[model_data],\n", 527 | " allow_reuse=False)\n", 528 | "\n", 529 | "pipeline = Pipeline(description='Generate recommendation models',\n", 530 | " workspace=ws,\n", 531 | " steps=[automl_step, register_step])\n", 532 | "\n", 533 | "pipeline.validate()\n", 534 | "\n", 535 | "# Once published, we can invoke on demand via the SDK or via a REST endpoint\n", 536 | "published_pipeline = pipeline.publish(name='category-based-propensity-pipeline')" 537 | ] 538 | }, 539 | { 540 | "cell_type": "markdown", 541 | "metadata": {}, 542 | "source": [ 543 | "## Schedule our pipeline\n", 544 | "\n", 545 | "Now that our experiment is available as a pipeline, we can schedule it or run it on demand." 546 | ] 547 | }, 548 | { 549 | "cell_type": "code", 550 | "execution_count": null, 551 | "metadata": {}, 552 | "outputs": [], 553 | "source": [ 554 | "# Automatically run our pipeline when the data changes\n", 555 | "schedule = Schedule.create(workspace=ws,\n", 556 | " name='category-based-propensity-schedule',\n", 557 | " pipeline_id=published_pipeline.id, \n", 558 | " experiment_name='category-based-propensity-schedule',\n", 559 | " datastore=ds,\n", 560 | " path_on_datastore='boots',\n", 561 | " wait_for_provisioning=True,\n", 562 | " polling_interval=1,\n", 563 | " description='Scheduled run of category-based-propensity')" 564 | ] 565 | }, 566 | { 567 | "cell_type": "code", 568 | "execution_count": null, 569 | "metadata": {}, 570 | "outputs": [], 571 | "source": [ 572 | "# Or, run it on demand\n", 573 | "published_pipeline.submit(ws, published_pipeline.name)" 574 | ] 575 | } 576 | ], 577 | "metadata": { 578 | "kernelspec": { 579 | "display_name": "Python 3", 580 | "language": "python", 581 | "name": "python3" 582 | }, 583 | "language_info": { 584 | "codemirror_mode": { 585 | "name": "ipython", 586 | "version": 3 587 | }, 588 | "file_extension": ".py", 589 | "mimetype": "text/x-python", 590 | "name": "python", 591 | "nbconvert_exporter": "python", 592 | "pygments_lexer": "ipython3", 593 | "version": "3.6.8" 594 | } 595 | }, 596 | "nbformat": 4, 597 | "nbformat_minor": 2 598 | } 599 | --------------------------------------------------------------------------------