├── walgreens-boots-demo
    ├── README.md
    ├── conda.yml
    └── wba-demo.ipynb
├── .gitignore
└── README.md


/walgreens-boots-demo/README.md:
--------------------------------------------------------------------------------
 1 | # Walgrens Boots Alliance Demo
 2 | 
 3 | ## Introduction
 4 | 
 5 | Boots UK, trading as Boots, is a health and beauty retailer and pharmacy chain in the United Kingdom, Ireland, other territories. The company is part of the Walgreens Boots Alliance, which also owns the US-based Walgreens retailer.
 6 | 
 7 | Their loyalty program, the Boots Advantage Card, offers personalized offers to over 14.4 million registered customers. These are typically based on product categories or specific brands. However, they have a problem: their data science team is struggling to build specific, tuned models for every possible promotion leading to suboptimal or missed opportunities for customer recommendations.
 8 | 
 9 | In this demo, we'll use a simulated data set to see how Azure Machine Learning with Automated Machine Learning can help solve this challenge.
10 | 
11 | ## Getting Started
12 | 
13 | For the full demo experience, we recommend the following:
14 | 
15 | 1. Create a new Azure Machine Learning workspace in the Azure Portal.
16 | 1. Create a new Notebook VM from your AML workspace.
17 | 1. Open Jupyter Notebooks on your new Notebook VM by clicking the link the Azure Portal.
18 | 1. Open a terminal within Jupyer Notebooks and clone this repository.
19 | 1. Create a new Conda environment with the `conda.yml` file (i.e. `conda create -n wba-demo -f ./conda.yml`).
20 | 1. Open the `wba-demo.ipynb`, switch to the new Python kernel (i.e. `wba-demo`), and follow the enclosed instructions.
21 | 
22 | **Media Elements and Templates.** You may copy and use images, clip art, animations, sounds, music, shapes, video clips and templates provided with the sample application and identified for such use in documents and projects that you create using the sample application. These use rights only apply to your use of the sample application and you may not redistribute such media otherwise.


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | 
  2 | # Created by https://www.gitignore.io/api/python,visualstudiocode,jupyternotebooks
  3 | # Edit at https://www.gitignore.io/?templates=python,visualstudiocode,jupyternotebooks
  4 | 
  5 | ### JupyterNotebooks ###
  6 | # gitignore template for Jupyter Notebooks
  7 | # website: http://jupyter.org/
  8 | 
  9 | .ipynb_checkpoints
 10 | */.ipynb_checkpoints/*
 11 | 
 12 | # Remove previous ipynb_checkpoints
 13 | #   git rm -r .ipynb_checkpoints/
 14 | #
 15 | 
 16 | ### Python ###
 17 | # Byte-compiled / optimized / DLL files
 18 | __pycache__/
 19 | *.py[cod]
 20 | *$py.class
 21 | 
 22 | # C extensions
 23 | *.so
 24 | 
 25 | # Distribution / packaging
 26 | .Python
 27 | build/
 28 | develop-eggs/
 29 | dist/
 30 | downloads/
 31 | eggs/
 32 | .eggs/
 33 | lib/
 34 | lib64/
 35 | parts/
 36 | sdist/
 37 | var/
 38 | wheels/
 39 | pip-wheel-metadata/
 40 | share/python-wheels/
 41 | *.egg-info/
 42 | .installed.cfg
 43 | *.egg
 44 | MANIFEST
 45 | 
 46 | # PyInstaller
 47 | #  Usually these files are written by a python script from a template
 48 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 49 | *.manifest
 50 | *.spec
 51 | 
 52 | # Installer logs
 53 | pip-log.txt
 54 | pip-delete-this-directory.txt
 55 | 
 56 | # Unit test / coverage reports
 57 | htmlcov/
 58 | .tox/
 59 | .nox/
 60 | .coverage
 61 | .coverage.*
 62 | .cache
 63 | nosetests.xml
 64 | coverage.xml
 65 | *.cover
 66 | .hypothesis/
 67 | .pytest_cache/
 68 | 
 69 | # Translations
 70 | *.mo
 71 | *.pot
 72 | 
 73 | # Django stuff:
 74 | *.log
 75 | local_settings.py
 76 | db.sqlite3
 77 | 
 78 | # Flask stuff:
 79 | instance/
 80 | .webassets-cache
 81 | 
 82 | # Scrapy stuff:
 83 | .scrapy
 84 | 
 85 | # Sphinx documentation
 86 | docs/_build/
 87 | 
 88 | # PyBuilder
 89 | target/
 90 | 
 91 | # Jupyter Notebook
 92 | 
 93 | # IPython
 94 | profile_default/
 95 | ipython_config.py
 96 | 
 97 | # pyenv
 98 | .python-version
 99 | 
100 | # pipenv
101 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
102 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
103 | #   having no cross-platform support, pipenv may install dependencies that don’t work, or not
104 | #   install all needed dependencies.
105 | #Pipfile.lock
106 | 
107 | # celery beat schedule file
108 | celerybeat-schedule
109 | 
110 | # SageMath parsed files
111 | *.sage.py
112 | 
113 | # Environments
114 | .env
115 | .venv
116 | env/
117 | venv/
118 | ENV/
119 | env.bak/
120 | venv.bak/
121 | 
122 | # Spyder project settings
123 | .spyderproject
124 | .spyproject
125 | 
126 | # Rope project settings
127 | .ropeproject
128 | 
129 | # mkdocs documentation
130 | /site
131 | 
132 | # mypy
133 | .mypy_cache/
134 | .dmypy.json
135 | dmypy.json
136 | 
137 | # Pyre type checker
138 | .pyre/
139 | 
140 | ### VisualStudioCode ###
141 | .vscode/*
142 | !.vscode/settings.json
143 | !.vscode/tasks.json
144 | !.vscode/launch.json
145 | !.vscode/extensions.json
146 | 
147 | ### VisualStudioCode Patch ###
148 | # Ignore all local history of files
149 | .history
150 | 
151 | # End of https://www.gitignore.io/api/python,visualstudiocode,jupyternotebooks


--------------------------------------------------------------------------------
/walgreens-boots-demo/conda.yml:
--------------------------------------------------------------------------------
  1 | name: wba
  2 | channels:
  3 |   - defaults
  4 | dependencies:
  5 |   - attrs=19.1.0
  6 |   - backcall=0.1.0
  7 |   - blas=1.0
  8 |   - bleach=3.1.0
  9 |   - ca-certificates=2019.1.23
 10 |   - certifi=2019.3.9
 11 |   - cycler=0.10.0
 12 |   - dbus=1.13.6
 13 |   - decorator=4.4.0
 14 |   - defusedxml=0.5.0
 15 |   - entrypoints=0.3
 16 |   - expat=2.2.6
 17 |   - freetype=2.9.1
 18 |   - gettext=0.19.8.1
 19 |   - glib=2.56.2
 20 |   - icu=58.2
 21 |   - intel-openmp=2019.3
 22 |   - ipykernel=5.1.0
 23 |   - ipython=7.4.0
 24 |   - ipython_genutils=0.2.0
 25 |   - ipywidgets=7.4.2
 26 |   - jedi=0.13.3
 27 |   - jinja2=2.10.1
 28 |   - jpeg=9b
 29 |   - jsonschema=3.0.1
 30 |   - jupyter=1.0.0
 31 |   - jupyter_client=5.2.4
 32 |   - jupyter_console=6.0.0
 33 |   - jupyter_core=4.4.0
 34 |   - kiwisolver=1.0.1
 35 |   - libedit=3.1.20181209
 36 |   - libffi=3.2.1
 37 |   - libiconv=1.15
 38 |   - libpng=1.6.36
 39 |   - libsodium=1.0.16
 40 |   - markupsafe=1.1.1
 41 |   - matplotlib=3.0.3
 42 |   - mistune=0.8.4
 43 |   - mkl=2019.3
 44 |   - mkl_fft=1.0.10
 45 |   - mkl_random=1.0.2
 46 |   - nbconvert=5.4.1
 47 |   - nbformat=4.4.0
 48 |   - ncurses=6.1
 49 |   - notebook=5.7.8
 50 |   - openssl=1.1.1b
 51 |   - pandoc=2.2.3.2
 52 |   - pandocfilters=1.4.2
 53 |   - parso=0.4.0
 54 |   - pcre=8.43
 55 |   - pexpect=4.7.0
 56 |   - pickleshare=0.7.5
 57 |   - pip=19.0.3
 58 |   - prometheus_client=0.6.0
 59 |   - prompt_toolkit=2.0.9
 60 |   - ptyprocess=0.6.0
 61 |   - pygments=2.3.1
 62 |   - pyparsing=2.4.0
 63 |   - pyqt=5.9.2
 64 |   - pyrsistent=0.14.11
 65 |   - python=3.6.8
 66 |   - python-dateutil=2.8.0
 67 |   - pytz=2019.1
 68 |   - pyzmq=18.0.0
 69 |   - qt=5.9.7
 70 |   - qtconsole=4.4.3
 71 |   - readline=7.0
 72 |   - send2trash=1.5.0
 73 |   - setuptools=41.0.0
 74 |   - sip=4.19.8
 75 |   - six=1.12.0
 76 |   - sqlite=3.28.0
 77 |   - terminado=0.8.2
 78 |   - testpath=0.4.2
 79 |   - tk=8.6.8
 80 |   - tornado=6.0.2
 81 |   - traitlets=4.3.2
 82 |   - wcwidth=0.1.7
 83 |   - webencodings=0.5.1
 84 |   - widgetsnbextension=3.4.2
 85 |   - xz=5.2.4
 86 |   - zeromq=4.3.1
 87 |   - zlib=1.2.11
 88 |   - pip:
 89 |     - adal==1.2.1
 90 |     - applicationinsights==0.11.8
 91 |     - asn1crypto==0.24.0
 92 |     - azure-common==1.1.19
 93 |     - azure-graphrbac==0.61.0
 94 |     - azure-mgmt-authorization==0.51.1
 95 |     - azure-mgmt-containerregistry==2.7.0
 96 |     - azure-mgmt-keyvault==1.1.0
 97 |     - azure-mgmt-nspkg==3.0.2
 98 |     - azure-mgmt-resource==2.1.0
 99 |     - azure-mgmt-storage==3.1.1
100 |     - azure-nspkg==3.0.2
101 |     - azureml-contrib-explain-model==1.0.33
102 |     - azureml-contrib-notebook==1.0.33
103 |     - azureml-core==1.0.33.1
104 |     - azureml-dataprep==1.1.2
105 |     - azureml-dataprep-native==12.0.1
106 |     - azureml-explain-model==1.0.33
107 |     - azureml-pipeline==1.0.33
108 |     - azureml-pipeline-core==1.0.33
109 |     - azureml-pipeline-steps==1.0.33
110 |     - azureml-sdk==1.0.33
111 |     - azureml-telemetry==1.0.33
112 |     - azureml-train==1.0.33
113 |     - azureml-train-automl==1.0.33
114 |     - azureml-train-core==1.0.33
115 |     - azureml-train-restclients-hyperdrive==1.0.33
116 |     - azureml-widgets==1.0.33
117 |     - backports-tempfile==1.0
118 |     - backports-weakref==1.0.post1
119 |     - bcrypt==3.1.6
120 |     - boto==2.49.0
121 |     - boto3==1.9.134
122 |     - botocore==1.12.134
123 |     - bz2file==0.98
124 |     - cffi==1.12.3
125 |     - chardet==3.0.4
126 |     - contextlib2==0.5.5
127 |     - cryptography==2.6.1
128 |     - dill==0.2.9
129 |     - distro==1.4.0
130 |     - docker==3.7.2
131 |     - docker-pycreds==0.4.0
132 |     - docutils==0.14
133 |     - dotnetcore2==2.1.8
134 |     - gensim==3.7.2
135 |     - h5py==2.9.0
136 |     - idna==2.8
137 |     - imageio==2.5.0
138 |     - isodate==0.6.0
139 |     - jeepney==0.4
140 |     - jmespath==0.9.4
141 |     - jsonform==0.0.2
142 |     - jsonpickle==1.1
143 |     - jsonsir==0.0.2
144 |     - keras==2.2.4
145 |     - keras-applications==1.0.7
146 |     - keras-preprocessing==1.0.9
147 |     - keras2onnx==1.3.2
148 |     - lightgbm==2.2.1
149 |     - msrest==0.6.6
150 |     - msrestazure==0.6.0
151 |     - ndg-httpsclient==0.5.1
152 |     - networkx==2.3
153 |     - nimbusml==0.6.5
154 |     - numpy==1.16.2
155 |     - oauthlib==3.0.1
156 |     - onnx==1.4.1
157 |     - onnxmltools==1.4.0
158 |     - onnxtk==0.0.1
159 |     - pandas==0.24.2
160 |     - paramiko==2.4.2
161 |     - pathspec==0.5.9
162 |     - pillow==6.0.0
163 |     - protobuf==3.7.1
164 |     - pyasn1==0.4.5
165 |     - pycparser==2.19
166 |     - pyjwt==1.7.1
167 |     - pynacl==1.3.0
168 |     - pyopenssl==19.0.0
169 |     - python-easyconfig==0.1.7
170 |     - pywavelets==1.0.3
171 |     - pyyaml==5.1
172 |     - requests==2.21.0
173 |     - requests-oauthlib==1.2.0
174 |     - resource==0.2.1
175 |     - ruamel-yaml==0.15.89
176 |     - s3transfer==0.2.0
177 |     - scikit-image==0.15.0
178 |     - scikit-learn==0.20.3
179 |     - scipy==1.1.0
180 |     - seaborn==0.9.0
181 |     - secretstorage==3.1.1
182 |     - shap==0.28.5
183 |     - skl2onnx==1.4.5
184 |     - sklearn-pandas==1.7.0
185 |     - smart-open==1.8.2
186 |     - tf2onnx==1.4.1
187 |     - tqdm==4.31.1
188 |     - typing==3.6.6
189 |     - typing-extensions==3.7.2
190 |     - urllib3==1.25
191 |     - websocket-client==0.56.0
192 |     - wheel==0.30.0
193 | 
194 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Microsoft Build 2019 - AI Track recordings and demo resources
  2 | 
  3 | ## Azure AI Keynote
  4 | 
  5 | BRK2006 Azure AI: Powering AI for every developer and every organization
  6 | 
  7 | [Recording](https://mybuild.techcommunity.microsoft.com/sessions/76980?source=sessions#top-anchor)
  8 | 
  9 | [Demo code LaLiga](https://github.com/trojenguri/LaligaBot)
 10 | 
 11 | [Demo code Walgreens Boots Alliance](https://github.com/buildaidemos/demos/tree/master/walgreens-boots-demo)
 12 | 
 13 | ## Machine Learning - Azure Machine Learning service
 14 | 
 15 | BRK2004 Breaking the Wall between Data Scientists and App Developers with MLOps
 16 | 
 17 | [Recording](https://mybuild.techcommunity.microsoft.com/sessions/76973?source=sessions#top-anchor)
 18 | 
 19 | [Demo code](https://github.com/Microsoft/MLOps)
 20 | 
 21 | BRK2005 Want to *actually* do machine learning? Wrangle data, build models, and deploy them with Azure Machine Learning
 22 | 
 23 | [Recording](https://mybuild.techcommunity.microsoft.com/sessions/76974?source=sessions#top-anchor)
 24 | 
 25 | BRK3008 Build “zero code” machine learning models with visual workflow capabilities in Azure Machine Learning service
 26 | 
 27 | [Recording](https://mybuild.techcommunity.microsoft.com/sessions/76972?source=sessions#top-anchor)
 28 | 
 29 | BRK3009 From Zero to AI Hero–Automatically generate ML models using Azure Machine Learning service, Automated ML
 30 | 
 31 | [Recording](https://mybuild.techcommunity.microsoft.com/sessions/76975?source=sessions#top-anchor) 
 32 | 
 33 | [Demo code](https://notebooks.azure.com/frlazzeri/projects/automatedml-ms-build)
 34 | 
 35 | BRK3010 Managing your ML lifecycle with Azure Databricks and Azure ML
 36 | 
 37 | [Recording](https://mybuild.techcommunity.microsoft.com/sessions/76976?source=sessions#top-anchor)
 38 | 
 39 | BRK3011 Welcome to the world of Machine Learning with ML.NET 1.0
 40 | 
 41 | [Recording](https://mybuild.techcommunity.microsoft.com/sessions/76977?source=sessions#top-anchor)
 42 | 
 43 | [Demo code](https://github.com/dotnet/machinelearning-samples)
 44 | 
 45 | BRK3012 Open Neural Network Exchange (ONNX) in the enterprise: how Microsoft scales ML across the world and across devices
 46 | 
 47 | [Recording](https://mybuild.techcommunity.microsoft.com/sessions/76978?source=sessions#top-anchor)
 48 | 
 49 | BRK3013 How to build enterprise ready ML: Privacy and Security best practices, in the cloud and on the edge
 50 | 
 51 | [Recording](https://mybuild.techcommunity.microsoft.com/sessions/76979?source=sessions#top-anchor)
 52 | 
 53 | BRK3014 Build an AI-powered Pet Detector with Python, TensorFlow, and Visual Studio Code
 54 | 
 55 | [Recording](https://mybuild.techcommunity.microsoft.com/sessions/76981?source=sessions#top-anchor)
 56 | 
 57 | [Demo code](https://github.com/Microsoft/connect-petdetector)
 58 | 
 59 | ## Knowledge Mining - Azure Search and Form Recognizer
 60 | 
 61 | BRK2001 Introducing AI-driven content understanding with Cognitive Search and Cognitive Services
 62 | 
 63 | [Recording](https://mybuild.techcommunity.microsoft.com/sessions/76961?source=sessions#top-anchor)
 64 | 
 65 | BRK2002 Announcing Form Recognizer: Create real value in your business processes by automating extraction of text, key value pairs and tables from forms, and easily customizing state-of-the-art AI
 66 | 
 67 | [Recording](https://mybuild.techcommunity.microsoft.com/sessions/76962?source=sessions#top-anchor)
 68 | 
 69 | [Demo code](https://formdemoux.azurewebsites.net/)
 70 | 
 71 | BRK3001 Unlock Knowledge Mining on your domain: build custom skills to tailor content understanding to your industry
 72 | 
 73 | [Recording](https://mybuild.techcommunity.microsoft.com/sessions/76963?source=sessions#top-anchor)
 74 | 
 75 | [Demo code 1](http://aka.ms/KmSolutions)
 76 | 
 77 | [Demo code 2](https://aka.ms/seazcs)
 78 | 
 79 | BRK3002 Try this one weird AI trick on your data.  Turn any data into structured knowledge using the new Knowledge Mining capabilities of the Azure AI platform
 80 | 
 81 | [Recording](https://mybuild.techcommunity.microsoft.com/sessions/76964?source=sessions#top-anchor)
 82 | 
 83 | [Demo code](http://aka.ms/build2019kmdemo)
 84 | 
 85 | 
 86 | ## AI apps and agents - Azure Cognitive Services and Azure Bot Service 
 87 | 
 88 | BRK2003 Designing AI Responsibly
 89 | 
 90 | [Recording](https://mybuild.techcommunity.microsoft.com/sessions/76971?source=sessions#top-anchor)
 91 | 
 92 | [Demo code 1](https://github.com/microsoft/interpret)
 93 | 
 94 | [Demo code 2](https://github.com/microsoft/seal-demo)
 95 | 
 96 | BRK3003 How to use Azure Conversational AI to scale your business for the next generation- A deep dive into La Liga’s story
 97 | 
 98 | [Recording](https://mybuild.techcommunity.microsoft.com/sessions/76965?source=sessions#top-anchor)
 99 | 
100 | [Demo code 1](https://github.com/trojenguri/LaligaBot)
101 | 
102 | [Demo code 2](https://github.com/microsoft/ailab/tree/master/GoogleAssistantConnector)
103 | 
104 | BRK3004 How to build enterprise ready, scalable AI solutions using Azure Cognitive Services
105 | 
106 | [Recording](https://mybuild.techcommunity.microsoft.com/sessions/76966?source=sessions#top-anchor)
107 | 
108 | [Demo code 1](https://github.com/Azure/mmlspark)
109 | 
110 | [Demo code 2](https://github.com/Microsoft/Cognitive-Samples-IntelligentKiosk)
111 | 
112 | BRK3005 5 industries that are getting disrupted by Computer Vision on Cloud and on Edge
113 | 
114 | [Recording](https://mybuild.techcommunity.microsoft.com/sessions/76967?source=sessions#top-anchor)
115 | 
116 | [Demo code](https://github.com/CarlosP-MS/Cognitive-Services-Vision-Solution-Templates)
117 | 
118 | BRK3006 What’s new in Speech Services and how to utilize them to build speech-enabled scenarios and solutions
119 | 
120 | [Recording](https://mybuild.techcommunity.microsoft.com/sessions/76968?source=sessions#top-anchor)
121 | 
122 | BRK3007 Deliver the Right Experiences & Content like Xbox with Cognitive Services Personalizer
123 | 
124 | [Recording](https://mybuild.techcommunity.microsoft.com/sessions/76970?source=sessions#top-anchor)
125 | 
126 | [Demo code 1](https://github.com/Azure-Samples/cognitive-services-personalizer-samples)
127 | 
128 | [Demo code 2](https://github.com/Azure-Samples/cognitive-services-personalizer-samples/tree/master/demos/PersonalizerDemo)
129 | 
130 | 


--------------------------------------------------------------------------------
/walgreens-boots-demo/wba-demo.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Walgreens Boots Alliance Demo\n",
  8 |     "\n",
  9 |     "## Setup your environment\n",
 10 |     "\n",
 11 |     "Before running any cells, make sure you have:\n",
 12 |     "* Created your conda environment as per the README.md, \n",
 13 |     "* Selected your new conda environment to run this notebook (from the Kernel menu).\n",
 14 |     "* Updated the values as indicated in the cell below."
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "code",
 19 |    "execution_count": null,
 20 |    "metadata": {},
 21 |    "outputs": [],
 22 |    "source": [
 23 |     "# Global constants\n",
 24 |     "subscription_id = '<insert your Azure subscription ID here>' \n",
 25 |     "resource_group  = '<insert the name of the resource group that holds your AML workspace here>'\n",
 26 |     "workspace_name  = '<insert your AML workspace name here>'\n",
 27 |     "experiment_name = 'category-based-propensity'\n",
 28 |     "cluster_name = 'wba-cluster'\n",
 29 |     "project_folder = 'scripts'"
 30 |    ]
 31 |   },
 32 |   {
 33 |    "cell_type": "code",
 34 |    "execution_count": null,
 35 |    "metadata": {},
 36 |    "outputs": [],
 37 |    "source": [
 38 |     "# Key open source data analysis packages\n",
 39 |     "import json\n",
 40 |     "import logging\n",
 41 |     "import matplotlib.pyplot as plt\n",
 42 |     "import numpy as np\n",
 43 |     "import os\n",
 44 |     "import pandas as pd\n",
 45 |     "import re\n",
 46 |     "import seaborn as sns\n",
 47 |     "\n",
 48 |     "%matplotlib inline\n",
 49 |     "sns.set(color_codes='True')"
 50 |    ]
 51 |   },
 52 |   {
 53 |    "cell_type": "code",
 54 |    "execution_count": null,
 55 |    "metadata": {},
 56 |    "outputs": [],
 57 |    "source": [
 58 |     "# Working directories\n",
 59 |     "if not os.path.exists(project_folder):\n",
 60 |     "    os.makedirs(project_folder)"
 61 |    ]
 62 |   },
 63 |   {
 64 |    "cell_type": "markdown",
 65 |    "metadata": {},
 66 |    "source": [
 67 |     "## Exploring our dataset\n",
 68 |     "\n",
 69 |     "Let's take a look at our dataset before building our models."
 70 |    ]
 71 |   },
 72 |   {
 73 |    "cell_type": "code",
 74 |    "execution_count": null,
 75 |    "metadata": {},
 76 |    "outputs": [],
 77 |    "source": [
 78 |     "# Read the data in to analyze\n",
 79 |     "df = pd.read_csv('./data.csv')"
 80 |    ]
 81 |   },
 82 |   {
 83 |    "cell_type": "code",
 84 |    "execution_count": null,
 85 |    "metadata": {},
 86 |    "outputs": [],
 87 |    "source": [
 88 |     "# Preview available columns\n",
 89 |     "df.head(10)"
 90 |    ]
 91 |   },
 92 |   {
 93 |    "cell_type": "code",
 94 |    "execution_count": null,
 95 |    "metadata": {},
 96 |    "outputs": [],
 97 |    "source": [
 98 |     "# Analyze distribution of ages in the dataset\n",
 99 |     "sns.distplot(df[['AGE']], bins=[10,20,30,40,50,60,70,80,90,100])"
100 |    ]
101 |   },
102 |   {
103 |    "cell_type": "code",
104 |    "execution_count": null,
105 |    "metadata": {},
106 |    "outputs": [],
107 |    "source": [
108 |     "# Analyze distribution of spend in category #1\n",
109 |     "sns.distplot(df[('CATEGORY_1_SPEND')])"
110 |    ]
111 |   },
112 |   {
113 |    "cell_type": "code",
114 |    "execution_count": null,
115 |    "metadata": {},
116 |    "outputs": [],
117 |    "source": [
118 |     "# Analyze how age influences whether customers have responded to category #1 campaigns\n",
119 |     "g = sns.FacetGrid(df, col='BOUGHT_CATEGORY_1')\n",
120 |     "g.map(sns.distplot, 'AGE')"
121 |    ]
122 |   },
123 |   {
124 |    "cell_type": "code",
125 |    "execution_count": null,
126 |    "metadata": {},
127 |    "outputs": [],
128 |    "source": [
129 |     "# Analyze how gender influences whether customers have responded to category #1 campaigns\n",
130 |     "g = sns.FacetGrid(df, col='BOUGHT_CATEGORY_1')\n",
131 |     "g.map(sns.countplot, 'GENDER')"
132 |    ]
133 |   },
134 |   {
135 |    "cell_type": "code",
136 |    "execution_count": null,
137 |    "metadata": {},
138 |    "outputs": [],
139 |    "source": [
140 |     "# Analyze how age and category #1 & #2 spend influences responding to category #1 campaigns\n",
141 |     "sns.pairplot(df[['AGE', 'CATEGORY_1_SPEND', 'CATEGORY_2_SPEND', 'BOUGHT_CATEGORY_1']], hue='BOUGHT_CATEGORY_1')"
142 |    ]
143 |   },
144 |   {
145 |    "cell_type": "markdown",
146 |    "metadata": {},
147 |    "source": [
148 |     "## Setup Azure Machine Learning\n",
149 |     "\n",
150 |     "Let's connect, provision our compute, and upload our data."
151 |    ]
152 |   },
153 |   {
154 |    "cell_type": "code",
155 |    "execution_count": null,
156 |    "metadata": {},
157 |    "outputs": [],
158 |    "source": [
159 |     "# Setup Azure Machine Learning\n",
160 |     "from azureml.core import Run\n",
161 |     "from azureml.core.compute import AksCompute, ComputeTarget, AmlCompute\n",
162 |     "from azureml.core.compute_target import ComputeTargetException\n",
163 |     "from azureml.core.conda_dependencies import CondaDependencies\n",
164 |     "from azureml.core.container_registry import ContainerRegistry\n",
165 |     "from azureml.core.experiment import Experiment\n",
166 |     "from azureml.core.runconfig import DataReferenceConfiguration, RunConfiguration\n",
167 |     "from azureml.core.webservice import AciWebservice\n",
168 |     "from azureml.core.workspace import Workspace\n",
169 |     "from azureml.data.data_reference import DataReference\n",
170 |     "from azureml.pipeline.core import Pipeline, PipelineData, PublishedPipeline, PipelineRun, Schedule, TrainingOutput\n",
171 |     "from azureml.pipeline.core.graph import PipelineParameter\n",
172 |     "from azureml.pipeline.steps import PythonScriptStep\n",
173 |     "from azureml.train.automl import AutoMLConfig, AutoMLStep\n",
174 |     "from azureml.train.automl.automlexplainer import retrieve_model_explanation\n",
175 |     "from azureml.train.automl.run import AutoMLRun\n",
176 |     "from azureml.widgets import RunDetails\n",
177 |     "\n",
178 |     "import azureml\n",
179 |     "\n",
180 |     "# Connect to Azure Machine Learning\n",
181 |     "try:\n",
182 |     "    ws = Workspace.from_config()\n",
183 |     "except:\n",
184 |     "    ws = Workspace(subscription_id = subscription_id,\n",
185 |     "                   resource_group = resource_group,\n",
186 |     "                   workspace_name = workspace_name)\n",
187 |     "    ws.write_config()\n",
188 |     "    \n",
189 |     "    print('Workspace config file written')\n",
190 |     "    \n",
191 |     "output = {}\n",
192 |     "output['SDK version'] = azureml.core.VERSION\n",
193 |     "output['Subscription ID'] = ws.subscription_id\n",
194 |     "output['Workspace'] = ws.name\n",
195 |     "output['Resource Group'] = ws.resource_group\n",
196 |     "output['Location'] = ws.location\n",
197 |     "pd.set_option('display.max_colwidth', -1)\n",
198 |     "pd.DataFrame(data=output, index=['']).T"
199 |    ]
200 |   },
201 |   {
202 |    "cell_type": "code",
203 |    "execution_count": null,
204 |    "metadata": {},
205 |    "outputs": [],
206 |    "source": [
207 |     "# Provision a compute target\n",
208 |     "try:\n",
209 |     "    compute_target = ComputeTarget(workspace=ws, name=cluster_name)\n",
210 |     "    print('Found existing compute target')\n",
211 |     "except ComputeTargetException:\n",
212 |     "    print('Creating a new compute target...')\n",
213 |     "    compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_DS12_V2',\n",
214 |     "                                                           min_nodes=0,\n",
215 |     "                                                           max_nodes=4)\n",
216 |     "\n",
217 |     "    compute_target = ComputeTarget.create(ws, cluster_name, compute_config)\n",
218 |     "    compute_target.wait_for_completion(show_output=True)\n",
219 |     "\n",
220 |     "compute_target.status.serialize()"
221 |    ]
222 |   },
223 |   {
224 |    "cell_type": "code",
225 |    "execution_count": null,
226 |    "metadata": {},
227 |    "outputs": [],
228 |    "source": [
229 |     "# Upload our data\n",
230 |     "ds = ws.get_default_datastore()\n",
231 |     "ds.upload_files(['./data.csv'], target_path = 'boots', overwrite=True)"
232 |    ]
233 |   },
234 |   {
235 |    "cell_type": "markdown",
236 |    "metadata": {},
237 |    "source": [
238 |     "## Experiment with Automated ML\n",
239 |     "\n",
240 |     "Let's submit a training run using our data and Automated ML."
241 |    ]
242 |   },
243 |   {
244 |    "cell_type": "code",
245 |    "execution_count": null,
246 |    "metadata": {},
247 |    "outputs": [],
248 |    "source": [
249 |     "%%writefile $project_folder/get_data.py\n",
250 |     "\n",
251 |     "import pandas as pd\n",
252 |     "from sklearn.preprocessing import LabelEncoder\n",
253 |     "\n",
254 |     "def get_data():\n",
255 |     "    df = pd.read_csv('/tmp/azureml_runs/boots/data.csv')\n",
256 |     "\n",
257 |     "    le = LabelEncoder()\n",
258 |     "    le.fit(df['BOUGHT_CATEGORY_1'].values)\n",
259 |     "    y = le.transform(df['BOUGHT_CATEGORY_1'].values)\n",
260 |     "\n",
261 |     "    df = df.drop(['BOUGHT_CATEGORY_1'], axis=1)\n",
262 |     "\n",
263 |     "    return { \"X\" : df, \"y\" : y }"
264 |    ]
265 |   },
266 |   {
267 |    "cell_type": "code",
268 |    "execution_count": null,
269 |    "metadata": {},
270 |    "outputs": [],
271 |    "source": [
272 |     "# Define the experiment\n",
273 |     "experiment = Experiment(ws, experiment_name)"
274 |    ]
275 |   },
276 |   {
277 |    "cell_type": "code",
278 |    "execution_count": null,
279 |    "metadata": {},
280 |    "outputs": [],
281 |    "source": [
282 |     "# Create our run configuration including our data source reference and base image configuration\n",
283 |     "dr = DataReferenceConfiguration(datastore_name=ds.name, \n",
284 |     "                                path_on_compute='/tmp/azureml_runs',\n",
285 |     "                                path_on_datastore='boots',\n",
286 |     "                                mode='download',\n",
287 |     "                                overwrite=False)\n",
288 |     "\n",
289 |     "run_config = RunConfiguration(framework=\"python\")\n",
290 |     "run_config.target = compute_target\n",
291 |     "run_config.data_references = {ds.name: dr}\n",
292 |     "run_config.environment.docker.enabled = True\n",
293 |     "run_config.environment.docker.base_image = azureml.core.runconfig.DEFAULT_CPU_IMAGE\n",
294 |     "run_config.environment.python.conda_dependencies = CondaDependencies.create(pip_packages=['azureml-sdk[automl]','azureml-explain-model'], conda_packages=['numpy','py-xgboost<=0.80'])\n",
295 |     "\n",
296 |     "automl_config = AutoMLConfig(task = 'classification',\n",
297 |     "                             iterations = 25,\n",
298 |     "                             iteration_timeout_minutes = 5, \n",
299 |     "                             max_cores_per_iteration = 2,\n",
300 |     "                             max_concurrent_iterations = 4,\n",
301 |     "                             primary_metric = 'accuracy',\n",
302 |     "                             data_script = project_folder + '/get_data.py',\n",
303 |     "                             run_configuration = run_config,\n",
304 |     "                             path = project_folder,\n",
305 |     "                             n_cross_validations = 2,\n",
306 |     "                             preprocess = True)"
307 |    ]
308 |   },
309 |   {
310 |    "cell_type": "code",
311 |    "execution_count": null,
312 |    "metadata": {},
313 |    "outputs": [],
314 |    "source": [
315 |     "remote_run = experiment.submit(automl_config, show_output=False)\n",
316 |     "remote_run"
317 |    ]
318 |   },
319 |   {
320 |    "cell_type": "code",
321 |    "execution_count": null,
322 |    "metadata": {},
323 |    "outputs": [],
324 |    "source": [
325 |     "remote_run.wait_for_completion(show_output=True)"
326 |    ]
327 |   },
328 |   {
329 |    "cell_type": "markdown",
330 |    "metadata": {},
331 |    "source": [
332 |     "# Review our results\n",
333 |     "\n",
334 |     "Once the experiment completes, let's review the results."
335 |    ]
336 |   },
337 |   {
338 |    "cell_type": "code",
339 |    "execution_count": null,
340 |    "metadata": {},
341 |    "outputs": [],
342 |    "source": [
343 |     "# Run details capture configuration and exact Git commit used for the run\n",
344 |     "remote_run_df = pd.read_json('[' + json.dumps(remote_run.get_details()['properties']) + ']', orient='columns')\n",
345 |     "remote_run_df[['azureml.git.branch','azureml.git.commit','azureml.git.repository_uri']].T"
346 |    ]
347 |   },
348 |   {
349 |    "cell_type": "code",
350 |    "execution_count": null,
351 |    "metadata": {},
352 |    "outputs": [],
353 |    "source": [
354 |     "# Easily explore results using interactive widgets\n",
355 |     "RunDetails(remote_run).show()"
356 |    ]
357 |   },
358 |   {
359 |    "cell_type": "code",
360 |    "execution_count": null,
361 |    "metadata": {},
362 |    "outputs": [],
363 |    "source": [
364 |     "# Programmatically find the best model based on different metrics\n",
365 |     "lookup_metric = 'accuracy'\n",
366 |     "best_run, fitted_model = remote_run.get_output(metric = lookup_metric)\n",
367 |     "print(best_run)\n",
368 |     "print(fitted_model)"
369 |    ]
370 |   },
371 |   {
372 |    "cell_type": "markdown",
373 |    "metadata": {},
374 |    "source": [
375 |     "# Publish our model\n",
376 |     "\n",
377 |     "Once we've selected our preferred model, we can register it for management (and optional deployment)."
378 |    ]
379 |   },
380 |   {
381 |    "cell_type": "code",
382 |    "execution_count": null,
383 |    "metadata": {},
384 |    "outputs": [],
385 |    "source": [
386 |     "# Store the preferred model for your team to use\n",
387 |     "model = best_run.register_model(model_name = 'category_1_model.pkl',\n",
388 |     "                                model_path = 'outputs/model.pkl',\n",
389 |     "                                tags = {'area': 'CATEGORY 1', 'type': 'classification'})\n",
390 |     "print(model.name, model.version)"
391 |    ]
392 |   },
393 |   {
394 |    "cell_type": "code",
395 |    "execution_count": null,
396 |    "metadata": {},
397 |    "outputs": [],
398 |    "source": [
399 |     "# Captures training details\n",
400 |     "model.serialize()"
401 |    ]
402 |   },
403 |   {
404 |    "cell_type": "markdown",
405 |    "metadata": {},
406 |    "source": [
407 |     "# Build a pipeline\n",
408 |     "\n",
409 |     "We can build an AML pipeline to make our experiment easy to re-run as data changes."
410 |    ]
411 |   },
412 |   {
413 |    "cell_type": "code",
414 |    "execution_count": null,
415 |    "metadata": {},
416 |    "outputs": [],
417 |    "source": [
418 |     "%%writefile $project_folder/register.py\n",
419 |     "\n",
420 |     "from azureml.core import Workspace\n",
421 |     "from azureml.core.model import Model\n",
422 |     "from azureml.core import Run\n",
423 |     "import argparse\n",
424 |     "import json\n",
425 |     "\n",
426 |     "if __name__ == '__main__':\n",
427 |     "    parser = argparse.ArgumentParser()\n",
428 |     "    parser.add_argument(\n",
429 |     "        '--model_name',\n",
430 |     "        type=str,\n",
431 |     "        default='',\n",
432 |     "        help='Variant name you want to give to the model.'\n",
433 |     "    )\n",
434 |     "    parser.add_argument(\n",
435 |     "        '--model_path',\n",
436 |     "        type=str,\n",
437 |     "        default='outputs',\n",
438 |     "        help='Location of trained model.'\n",
439 |     "    )\n",
440 |     "\n",
441 |     "    args, unparsed = parser.parse_known_args()\n",
442 |     "    print(args.model_name)\n",
443 |     "    print(args.model_path)\n",
444 |     "    \n",
445 |     "    run = Run.get_context()\n",
446 |     "    ws = run.experiment.workspace\n",
447 |     "    \n",
448 |     "    tags = {\n",
449 |     "        \"runId\": str(run.id)\n",
450 |     "    }\n",
451 |     "\n",
452 |     "    print(json.dumps(tags))\n",
453 |     "\n",
454 |     "    model = Model.register(ws, model_name = args.model_name, model_path = args.model_path, tags=tags)\n",
455 |     "\n",
456 |     "    print('Model registered: {} \\nModel Description: {} \\nModel Version: {}'.format(model.name, model.description, model.version))"
457 |    ]
458 |   },
459 |   {
460 |    "cell_type": "code",
461 |    "execution_count": null,
462 |    "metadata": {},
463 |    "outputs": [],
464 |    "source": [
465 |     "# Re-use our experiment configuration\n",
466 |     "input_data = DataReference(datastore=ds, \n",
467 |     "                           data_reference_name='training_data',\n",
468 |     "                           path_on_datastore='boots',\n",
469 |     "                           mode='download',\n",
470 |     "                           path_on_compute='/tmp/azureml_runs',\n",
471 |     "                           overwrite=True)\n",
472 |     "\n",
473 |     "run_config = RunConfiguration(framework=\"python\")\n",
474 |     "run_config.environment.docker.enabled = True\n",
475 |     "run_config.environment.docker.base_image = azureml.core.runconfig.DEFAULT_CPU_IMAGE\n",
476 |     "run_config.environment.python.conda_dependencies = CondaDependencies.create(pip_packages=['azureml-sdk[automl]'], conda_packages=['numpy','py-xgboost<=0.80'])"
477 |    ]
478 |   },
479 |   {
480 |    "cell_type": "code",
481 |    "execution_count": null,
482 |    "metadata": {},
483 |    "outputs": [],
484 |    "source": [
485 |     "# Build a pipeline\n",
486 |     "steps = []\n",
487 |     "\n",
488 |     "# These are the two outputs from AutoML\n",
489 |     "metrics_data = PipelineData(name='metrics_data_category_1',\n",
490 |     "                            datastore=ds,\n",
491 |     "                            pipeline_output_name='metrics_output_category_1',\n",
492 |     "                            training_output=TrainingOutput(type='Metrics'))\n",
493 |     "\n",
494 |     "model_data = PipelineData(name='model_data_category_1'.format(cat),\n",
495 |     "                          datastore=ds,\n",
496 |     "                          pipeline_output_name='best_model_output_category_1',\n",
497 |     "                          training_output=TrainingOutput(type='Model'))\n",
498 |     "\n",
499 |     "# AutoML config (note different data files for each model so it's not shared)\n",
500 |     "automl_config = AutoMLConfig(task = 'classification',\n",
501 |     "                             iterations = 25,\n",
502 |     "                             iteration_timeout_minutes = 5, \n",
503 |     "                             max_cores_per_iteration = 2,\n",
504 |     "                             max_concurrent_iterations = 4,\n",
505 |     "                             primary_metric = 'accuracy',\n",
506 |     "                             data_script = '{}/get_data.py',\n",
507 |     "                             run_configuration = run_config,\n",
508 |     "                             compute_target = compute_target,\n",
509 |     "                             path = project_folder,\n",
510 |     "                             n_cross_validations = 2,\n",
511 |     "                             preprocess = True)\n",
512 |     "\n",
513 |     "# AutoML action\n",
514 |     "automl_step = AutoMLStep(name='automl_module_category_1',\n",
515 |     "                         automl_config=automl_config,\n",
516 |     "                         inputs=[input_data],\n",
517 |     "                         outputs=[metrics_data, model_data],\n",
518 |     "                         allow_reuse=False)\n",
519 |     "\n",
520 |     "# Custom script action to register the model afterwards\n",
521 |     "register_step = PythonScriptStep(name='register_category_1',\n",
522 |     "                                 script_name='register.py',\n",
523 |     "                                 compute_target=compute_target,\n",
524 |     "                                 source_directory=project_folder,\n",
525 |     "                                 arguments=['--model_name', 'category_1_model.pkl', '--model_path', model_data],\n",
526 |     "                                 inputs=[model_data],\n",
527 |     "                                 allow_reuse=False)\n",
528 |     "\n",
529 |     "pipeline = Pipeline(description='Generate recommendation models',\n",
530 |     "                    workspace=ws,\n",
531 |     "                    steps=[automl_step, register_step])\n",
532 |     "\n",
533 |     "pipeline.validate()\n",
534 |     "\n",
535 |     "# Once published, we can invoke on demand via the SDK or via a REST endpoint\n",
536 |     "published_pipeline = pipeline.publish(name='category-based-propensity-pipeline')"
537 |    ]
538 |   },
539 |   {
540 |    "cell_type": "markdown",
541 |    "metadata": {},
542 |    "source": [
543 |     "## Schedule our pipeline\n",
544 |     "\n",
545 |     "Now that our experiment is available as a pipeline, we can schedule it or run it on demand."
546 |    ]
547 |   },
548 |   {
549 |    "cell_type": "code",
550 |    "execution_count": null,
551 |    "metadata": {},
552 |    "outputs": [],
553 |    "source": [
554 |     "# Automatically run our pipeline when the data changes\n",
555 |     "schedule = Schedule.create(workspace=ws,\n",
556 |     "                           name='category-based-propensity-schedule',\n",
557 |     "                           pipeline_id=published_pipeline.id, \n",
558 |     "                           experiment_name='category-based-propensity-schedule',\n",
559 |     "                           datastore=ds,\n",
560 |     "                           path_on_datastore='boots',\n",
561 |     "                           wait_for_provisioning=True,\n",
562 |     "                           polling_interval=1,\n",
563 |     "                           description='Scheduled run of category-based-propensity')"
564 |    ]
565 |   },
566 |   {
567 |    "cell_type": "code",
568 |    "execution_count": null,
569 |    "metadata": {},
570 |    "outputs": [],
571 |    "source": [
572 |     "# Or, run it on demand\n",
573 |     "published_pipeline.submit(ws, published_pipeline.name)"
574 |    ]
575 |   }
576 |  ],
577 |  "metadata": {
578 |   "kernelspec": {
579 |    "display_name": "Python 3",
580 |    "language": "python",
581 |    "name": "python3"
582 |   },
583 |   "language_info": {
584 |    "codemirror_mode": {
585 |     "name": "ipython",
586 |     "version": 3
587 |    },
588 |    "file_extension": ".py",
589 |    "mimetype": "text/x-python",
590 |    "name": "python",
591 |    "nbconvert_exporter": "python",
592 |    "pygments_lexer": "ipython3",
593 |    "version": "3.6.8"
594 |   }
595 |  },
596 |  "nbformat": 4,
597 |  "nbformat_minor": 2
598 | }
599 | 


--------------------------------------------------------------------------------