├── .gitignore
├── .project-metadata.yaml
├── 0_bootstrap.py
├── 1_data_ingest.py
├── 2_data_exploration.ipynb
├── 3_model_building.ipynb
├── 4_train_models.py
├── 5_model_serve_explainer.py
├── 6_application.py
├── 7a_ml_ops_simulation.py
├── 7b_ml_ops_visual.py
├── 8_check_model.py
├── 9_build_project.py
├── README.md
├── cdsw-build.sh
├── churnexplainer.py
├── flask
    ├── ajax-loader.gif
    ├── churn_vis.css
    ├── churn_vis.js
    ├── env_vars.png
    ├── single_view.html
    └── table_view.html
├── images
    ├── data.png
    ├── disable_auth.png
    ├── model_accuracy.png
    ├── model_id.png
    ├── single_view_1.png
    ├── single_view_2.png
    └── table_view.png
├── lineage.yml
├── models
    └── telco_linear
    │   └── telco_linear.pkl
├── raw
    ├── WA_Fn-UseC_-Telco-Customer-Churn-.csv
    └── telco-data
    │   ├── _SUCCESS
    │   └── part-00000-bfdb203d-eea4-4b80-bda3-d369976e785a-c000.csv
└── requirements.txt


/.gitignore:
--------------------------------------------------------------------------------
1 | R
2 | node_modules
3 | *.pyc
4 | __pycache__
5 | .*
6 | !.gitignore


--------------------------------------------------------------------------------
/.project-metadata.yaml:
--------------------------------------------------------------------------------
  1 | name: ML Churn Demo
  2 | description: Prototype to demonstrate building a churn model on CML
  3 | author: Cloudera Engineer
  4 | specification_version: 1.0
  5 | prototype_version: 1.0
  6 | date: "2020-09-29"
  7 | api_version: 1
  8 | 
  9 | environment_variables:
 10 |   AWS_ACCESS_KEY:
 11 |     default: "AWS ACCESS KEY"
 12 |     description: "Access Key ID for accessing S3 bucket"
 13 |     prompt_user: true
 14 |   AWS_SECRET_KEY:
 15 |     default: "AWS SECRET KEY"
 16 |     description: "Secret Access Key for accessing S3 bucket"
 17 |     prompt_user: true
 18 |   HADOOP_DATA_SOURCE:
 19 |     default: "S3 URL FOR DATASET"
 20 |     description: "S3 URL to large data set"
 21 |     prompt_user: true
 22 |   MODEL_REPLICAS:
 23 |     default: "3"
 24 |     description: "Number of model replicas, 3 is standard for redundancy"
 25 |     prompt_user: false
 26 |   TASK_TYPE:
 27 |     default: NOT_OVERRIDEN
 28 |     prompt_user: false
 29 | 
 30 | tasks:
 31 |   - type: create_job
 32 |     name: Install dependencies
 33 |     entity_label: install_deps
 34 |     script: 0_bootstrap.py
 35 |     arguments: None
 36 |     short_summary: Job to install dependencies and download training data.
 37 |     environment:
 38 |       TASK_TYPE: CREATE/RUN_JOB
 39 |     kernel: python3
 40 | 
 41 |   - type: run_job
 42 |     entity_label: install_deps
 43 |     short_summary: Running install dependencies training job.
 44 |     long_summary: >-
 45 |       Running the job to install dependencies.
 46 | 
 47 |   - type: create_job
 48 |     name: Train Churn Model
 49 |     entity_label: train_model
 50 |     script: 4_train_models.py
 51 |     arguments: None
 52 |     short_summary: Job to train models.
 53 |     environment:
 54 |       TASK_TYPE: CREATE/RUN_JOB
 55 |     kernel: python3
 56 | 
 57 |   - type: run_job
 58 |     entity_label: train_model
 59 |     short_summary: Run model training job.
 60 |     long_summary: >-
 61 |       Running the job to train models.
 62 | 
 63 |   - type: create_model
 64 |     name: Create Churn Model API Endpoint
 65 |     entity_label: telco_churn_model
 66 |     description: This model api endpoint predicts churn
 67 |     short_summary: Create the churn model prediction api endpoint
 68 |     access_key_environment_variable: SHTM_ACCESS_KEY
 69 |     # default_resources:
 70 |     #   cpu: 1
 71 |     #   memory: 2
 72 |     default_replication_policy:
 73 |       type: fixed
 74 |       num_replicas: 1
 75 | 
 76 |     # auth_enabled: false
 77 |   - type: build_model
 78 |     name: Build Telco Churn Model Endpoint
 79 |     entity_label: telco_churn_model
 80 |     comment: Build churn model
 81 |     examples:
 82 |       - request: '{"StreamingTV":"No","MonthlyCharges":70.35,"PhoneService":"No","PaperlessBilling":"No","Partner":"No","OnlineBackup":"No","gender":"Female","Contract":"Month-to-month","TotalCharges":1397.475,"StreamingMovies":"No","DeviceProtection":"No","PaymentMethod":"Bank transfer (automatic)","tenure":29,"Dependents":"No","OnlineSecurity":"No","MultipleLines":"No","InternetService":"DSL","SeniorCitizen":"No","TechSupport":"No"}'
 83 |         response: ""
 84 |     target_file_path: 5_model_serve_explainer.py
 85 |     target_function_name: explain
 86 |     kernel: python3
 87 |     environment_variables:
 88 |       TASK_TYPE: CREATE/BUILD/DEPLOY_MODEL
 89 | 
 90 |   - type: deploy_model
 91 |     name: telco_churn_model
 92 |     entity_label: telco_churn_model
 93 |     cpu: 1
 94 |     gpu: 0
 95 |     environment_variables:
 96 |       TASK_TYPE: CREATE/BUILD/DEPLOY_MODEL
 97 | 
 98 |   - type: start_application
 99 |     name: Application to serve Churn  front app UI
100 |     subdomain: churn
101 |     script: 6_application.py
102 |     environment_variables:
103 |       TASK_TYPE: START_APPLICATION
104 |     kernel: python3
105 | 


--------------------------------------------------------------------------------
/0_bootstrap.py:
--------------------------------------------------------------------------------
 1 | # # Part 0: Bootstrap File
 2 | # You need to at the start of the project. It will install the requirements, creates the 
 3 | # STORAGE environment variable and copy the data from 
 4 | # raw/WA_Fn-UseC_-Telco-Customer-Churn-.csv into /datalake/data/churn of the STORAGE 
 5 | # location.
 6 | 
 7 | # The STORAGE environment variable is the Cloud Storage location used by the DataLake 
 8 | # to store hive data. On AWS it will s3a://[something], on Azure it will be 
 9 | # abfs://[something] and on CDSW cluster, it will be hdfs://[something]
10 | 
11 | # Install the requirements
12 | !pip3 install -r requirements.txt --progress-bar off
13 |   
14 | # Create the directories and upload data
15 | 
16 | from cmlbootstrap import CMLBootstrap
17 | from IPython.display import Javascript, HTML
18 | import os
19 | import time
20 | import json
21 | import requests
22 | import xml.etree.ElementTree as ET
23 | import datetime
24 | 
25 | try: 
26 |   os.environ["SPARK_HOME"]
27 |   print("Spark is enabled")
28 | except:
29 |   print('Spark is not enabled, please enable spark before running this script')
30 |   raise KeyError('Spark is not enabled, please enable spark before running this script')
31 | 
32 | run_time_suffix = datetime.datetime.now()
33 | run_time_suffix = run_time_suffix.strftime("%d%m%Y%H%M%S")
34 | 
35 | # Instantiate API Wrapper
36 | cml = CMLBootstrap()
37 | 
38 | # Set the STORAGE environment variable
39 | try : 
40 |   storage=os.environ["STORAGE"]
41 | except:
42 |   storage = cml.get_cloud_storage()
43 |   storage_environment_params = {"STORAGE":storage}
44 |   storage_environment = cml.create_environment_variable(storage_environment_params)
45 |   os.environ["STORAGE"] = storage
46 | 
47 | # Upload the data to the cloud storage
48 | !hadoop fs -mkdir -p $STORAGE/datalake
49 | !hadoop fs -mkdir -p $STORAGE/datalake/data
50 | !hadoop fs -mkdir -p $STORAGE/datalake/data/churn
51 | !hadoop fs -copyFromLocal /home/cdsw/raw/WA_Fn-UseC_-Telco-Customer-Churn-.csv $STORAGE/datalake/data/churn/WA_Fn-UseC_-Telco-Customer-Churn-.csv
52 | 
53 | 


--------------------------------------------------------------------------------
/1_data_ingest.py:
--------------------------------------------------------------------------------
  1 | # Part 1: Data Ingest
  2 | # A data scientist should never be blocked in getting data into their environment,
  3 | # so CML is able to ingest data from many sources.
  4 | # Whether you have data in .csv files, modern formats like parquet or feather,
  5 | # in cloud storage or a SQL database, CML will let you work with it in a data
  6 | # scientist-friendly environment.
  7 | 
  8 | # Access local data on your computer
  9 | #
 10 | # Accessing data stored on your computer is a matter of [uploading a file to the CML filesystem and
 11 | # referencing from there](https://docs.cloudera.com/machine-learning/cloud/import-data/topics/ml-accessing-local-data-from-your-computer.html).
 12 | #
 13 | # > Go to the project's **Overview** page. Under the **Files** section, click **Upload**, select the relevant data files to be uploaded and a destination folder.
 14 | #
 15 | # If, for example, you upload a file called, `mydata.csv` to a folder called `data`, the
 16 | # following example code would work.
 17 | 
 18 | # ```
 19 | # import pandas as pd
 20 | #
 21 | # df = pd.read_csv('data/mydata.csv')
 22 | #
 23 | # # Or:
 24 | # df = pd.read_csv('/home/cdsw/data/mydata.csv')
 25 | # ```
 26 | 
 27 | # Access data in S3
 28 | #
 29 | # Accessing [data in Amazon S3](https://docs.cloudera.com/machine-learning/cloud/import-data/topics/ml-accessing-data-in-amazon-s3-buckets.html)
 30 | # follows a familiar procedure of fetching and storing in the CML filesystem.
 31 | # > Add your Amazon Web Services access keys to your project's
 32 | # > [environment variables](https://docs.cloudera.com/machine-learning/cloud/import-data/topics/ml-environment-variables.html)
 33 | # > as `AWS_ACCESS_KEY_ID` and `AWS_SECRET_ACCESS_KEY`.
 34 | #
 35 | # To get the the access keys that are used for your in the CDP DataLake, you can follow
 36 | # [this Cloudera Community Tutorial](https://community.cloudera.com/t5/Community-Articles/How-to-get-AWS-access-keys-via-IDBroker-in-CDP/ta-p/295485)
 37 | 
 38 | #
 39 | # The following sample code would fetch a file called `myfile.csv` from the S3 bucket, `data_bucket`, and store it in the CML home folder.
 40 | # ```
 41 | # # Create the Boto S3 connection object.
 42 | # from boto.s3.connection import S3Connection
 43 | # aws_connection = S3Connection()
 44 | #
 45 | # # Download the dataset to file 'myfile.csv'.
 46 | # bucket = aws_connection.get_bucket('data_bucket')
 47 | # key = bucket.get_key('myfile.csv')
 48 | # key.get_contents_to_filename('/home/cdsw/myfile.csv')
 49 | # ```
 50 | 
 51 | 
 52 | # Access data from Cloud Storage or the Hive metastore
 53 | #
 54 | # Accessing data from [the Hive metastore](https://docs.cloudera.com/machine-learning/cloud/import-data/topics/ml-accessing-data-from-apache-hive.html)
 55 | # that comes with CML only takes a few more steps.
 56 | # But first we need to fetch the data from Cloud Storage and save it as a Hive table.
 57 | #
 58 | # > Specify `STORAGE` as an
 59 | # > [environment variable](https://docs.cloudera.com/machine-learning/cloud/import-data/topics/ml-environment-variables.html)
 60 | # > in your project settings containing the Cloud Storage location used by the DataLake to store
 61 | # > Hive data. On AWS it will `s3a://[something]`, on Azure it will be `abfs://[something]` and on
 62 | # > on prem CDSW cluster, it will be `hdfs://[something]`
 63 | #
 64 | # This was done for you when you ran `0_bootstrap.py`, so the following code is set up to run as is.
 65 | # It begins with imports and creating a `SparkSession`.
 66 | 
 67 | import os
 68 | import sys
 69 | from pyspark.sql import SparkSession
 70 | from pyspark.sql.types import *
 71 | 
 72 | 
 73 | 
 74 | spark = SparkSession\
 75 |     .builder\
 76 |     .appName("PythonSQL")\
 77 |     .master("local[*]")\
 78 |     .getOrCreate()
 79 | 
 80 | # **Note:**
 81 | # Our file isn't big, so running it in Spark local mode is fine but you can add the following config
 82 | # if you want to run Spark on the kubernetes cluster
 83 | #
 84 | # > .config("spark.yarn.access.hadoopFileSystems",os.getenv['STORAGE'])\
 85 | #
 86 | # and remove `.master("local[*]")\`
 87 | #
 88 | 
 89 | # Since we know the data already, we can add schema upfront. This is good practice as Spark will
 90 | # read *all* the Data if you try infer the schema.
 91 | 
 92 | schema = StructType(
 93 |     [
 94 |         StructField("customerID", StringType(), True),
 95 |         StructField("gender", StringType(), True),
 96 |         StructField("SeniorCitizen", StringType(), True),
 97 |         StructField("Partner", StringType(), True),
 98 |         StructField("Dependents", StringType(), True),
 99 |         StructField("tenure", DoubleType(), True),
100 |         StructField("PhoneService", StringType(), True),
101 |         StructField("MultipleLines", StringType(), True),
102 |         StructField("InternetService", StringType(), True),
103 |         StructField("OnlineSecurity", StringType(), True),
104 |         StructField("OnlineBackup", StringType(), True),
105 |         StructField("DeviceProtection", StringType(), True),
106 |         StructField("TechSupport", StringType(), True),
107 |         StructField("StreamingTV", StringType(), True),
108 |         StructField("StreamingMovies", StringType(), True),
109 |         StructField("Contract", StringType(), True),
110 |         StructField("PaperlessBilling", StringType(), True),
111 |         StructField("PaymentMethod", StringType(), True),
112 |         StructField("MonthlyCharges", DoubleType(), True),
113 |         StructField("TotalCharges", DoubleType(), True),
114 |         StructField("Churn", StringType(), True)
115 |     ]
116 | )
117 | 
118 | # Now we can read in the data from Cloud Storage into Spark...
119 | 
120 | storage = os.environ['STORAGE']
121 | 
122 | telco_data = spark.read.csv(
123 |     "{}/datalake/data/churn/WA_Fn-UseC_-Telco-Customer-Churn-.csv".format(
124 |         storage),
125 |     header=True,
126 |     schema=schema,
127 |     sep=',',
128 |     nullValue='NA'
129 | )
130 | 
131 | # ...and inspect the data.
132 | 
133 | telco_data.show()
134 | 
135 | telco_data.printSchema()
136 | 
137 | # Now we can store the Spark DataFrame as a file in the local CML file system
138 | # *and* as a table in Hive used by the other parts of the project.
139 | 
140 | telco_data.coalesce(1).write.csv(
141 |     "file:/home/cdsw/raw/telco-data/",
142 |     mode='overwrite',
143 |     header=True
144 | )
145 | 
146 | spark.sql("show databases").show()
147 | 
148 | spark.sql("show tables in default").show()
149 | 
150 | # Create the Hive table
151 | # This is here to create the table in Hive used be the other parts of the project, if it
152 | # does not already exist.
153 | 
154 | if ('telco_churn' not in list(spark.sql("show tables in default").toPandas()['tableName'])):
155 |     print("creating the telco_churn database")
156 |     telco_data\
157 |         .write.format("parquet")\
158 |         .mode("overwrite")\
159 |         .saveAsTable(
160 |             'default.telco_churn'
161 |         )
162 | 
163 | # Show the data in the hive table
164 | spark.sql("select * from default.telco_churn").show()
165 | 
166 | # To get more detailed information about the hive table you can run this:
167 | spark.sql("describe formatted default.telco_churn").toPandas()
168 | 
169 | # Other ways to access data
170 | 
171 | # To access data from other locations, refer to the
172 | # [CML documentation](https://docs.cloudera.com/machine-learning/cloud/import-data/index.html).
173 | 
174 | # Scheduled Jobs
175 | #
176 | # One of the features of CML is the ability to schedule code to run at regular intervals,
177 | # similar to cron jobs. This is useful for **data pipelines**, **ETL**, and **regular reporting**
178 | # among other use cases. If new data files are created regularly, e.g. hourly log files, you could
179 | # schedule a Job to run a data loading script with code like the above.
180 | 
181 | # > Any script [can be scheduled as a Job](https://docs.cloudera.com/machine-learning/cloud/jobs-pipelines/topics/ml-creating-a-job.html).
182 | # > You can create a Job with specified command line arguments or environment variables.
183 | # > Jobs can be triggered by the completion of other jobs, forming a
184 | # > [Pipeline](https://docs.cloudera.com/machine-learning/cloud/jobs-pipelines/topics/ml-creating-a-pipeline.html)
185 | # > You can configure the job to email individuals with an attachment, e.g. a csv report which your
186 | # > script saves at: `/home/cdsw/job1/output.csv`.
187 | 
188 | # Try running this script `1_data_ingest.py` for use in such a Job.
189 | 
190 | 


--------------------------------------------------------------------------------
/3_model_building.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Part 3: Model Building"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {},
 13 |    "source": [
 14 |     "This notebook explores building the **customer churn model** and an **interpretability model** to explain each prediction.\n",
 15 |     "In addition to making a prediction of whether a customer will churn, we will also be able to answer the question, \"why are they expected to churn?\"\n",
 16 |     "\n",
 17 |     "The following work will look fairly standard to anyone having trained machine learning models using python Jupyter notebooks.\n",
 18 |     "The CML platform provides a **fully capable Jupyter notebook environment** that data scientists know and love.\n",
 19 |     "\n",
 20 |     "If you haven't yet, run through the initialization steps in the README file and Part 1. \n",
 21 |     "In Part 1, the data is imported into the `default.telco_churn` table in Hive. All data accesses fetch from Hive.\n"
 22 |    ]
 23 |   },
 24 |   {
 25 |    "cell_type": "markdown",
 26 |    "metadata": {},
 27 |    "source": [
 28 |     "## Load data\n",
 29 |     "\n",
 30 |     "We again start by creating a `SparkSession` to fetch the data using Spark SQL, only this time we convert to a pandas `DataFrame` since we saw earlier that there are only 7k records in the dataset.\n"
 31 |    ]
 32 |   },
 33 |   {
 34 |    "cell_type": "code",
 35 |    "execution_count": 6,
 36 |    "metadata": {
 37 |     "scrolled": false
 38 |    },
 39 |    "outputs": [
 40 |     {
 41 |      "name": "stdout",
 42 |      "output_type": "stream",
 43 |      "text": [
 44 |       "root\n",
 45 |       " |-- customerID: string (nullable = true)\n",
 46 |       " |-- gender: string (nullable = true)\n",
 47 |       " |-- SeniorCitizen: string (nullable = true)\n",
 48 |       " |-- Partner: string (nullable = true)\n",
 49 |       " |-- Dependents: string (nullable = true)\n",
 50 |       " |-- tenure: double (nullable = true)\n",
 51 |       " |-- PhoneService: string (nullable = true)\n",
 52 |       " |-- MultipleLines: string (nullable = true)\n",
 53 |       " |-- InternetService: string (nullable = true)\n",
 54 |       " |-- OnlineSecurity: string (nullable = true)\n",
 55 |       " |-- OnlineBackup: string (nullable = true)\n",
 56 |       " |-- DeviceProtection: string (nullable = true)\n",
 57 |       " |-- TechSupport: string (nullable = true)\n",
 58 |       " |-- StreamingTV: string (nullable = true)\n",
 59 |       " |-- StreamingMovies: string (nullable = true)\n",
 60 |       " |-- Contract: string (nullable = true)\n",
 61 |       " |-- PaperlessBilling: string (nullable = true)\n",
 62 |       " |-- PaymentMethod: string (nullable = true)\n",
 63 |       " |-- MonthlyCharges: double (nullable = true)\n",
 64 |       " |-- TotalCharges: double (nullable = true)\n",
 65 |       " |-- Churn: string (nullable = true)\n",
 66 |       "\n"
 67 |      ]
 68 |     }
 69 |    ],
 70 |    "source": [
 71 |     "from pyspark.sql import SparkSession\n",
 72 |     "\n",
 73 |     "spark = SparkSession\\\n",
 74 |     "    .builder\\\n",
 75 |     "    .appName(\"PythonSQL\")\\\n",
 76 |     "    .master(\"local[*]\")\\\n",
 77 |     "    .getOrCreate()\n",
 78 |     "\n",
 79 |     "spark_df = spark.sql(\"SELECT * FROM default.telco_churn\")\n",
 80 |     "spark_df.printSchema()\n",
 81 |     "df = spark_df.toPandas()"
 82 |    ]
 83 |   },
 84 |   {
 85 |    "cell_type": "markdown",
 86 |    "metadata": {},
 87 |    "source": [
 88 |     "**Note:** If you don't have the Hive table, you can read the csv from the CML filesystem using pandas directly:\n",
 89 |     "\n",
 90 |     "```python\n",
 91 |     "import pandas as pd\n",
 92 |     "import os\n",
 93 |     "\n",
 94 |     "data_dir = '/home/cdsw'\n",
 95 |     "df = pd.read_csv(os.path.join(data_dir, 'raw', 'WA_Fn-UseC_-Telco-Customer-Churn-.csv'))\n",
 96 |     "```\n"
 97 |    ]
 98 |   },
 99 |   {
100 |    "cell_type": "markdown",
101 |    "metadata": {},
102 |    "source": [
103 |     "## Basic feature engineering\n",
104 |     "\n",
105 |     "\n",
106 |     "Next we munge the data into appropriate types for later steps. \n",
107 |     "In particular, we want to convert all the binary and string columns into pandas `Categorical` types.\n",
108 |     "\n"
109 |    ]
110 |   },
111 |   {
112 |    "cell_type": "code",
113 |    "execution_count": 7,
114 |    "metadata": {},
115 |    "outputs": [],
116 |    "source": [
117 |     "import subprocess, glob, sys\n",
118 |     "import dill  # a better pickle\n",
119 |     "import pandas as pd\n",
120 |     "import numpy as np\n",
121 |     "import matplotlib.pyplot as plt\n",
122 |     "\n",
123 |     "data_dir = '/home/cdsw'"
124 |    ]
125 |   },
126 |   {
127 |    "cell_type": "code",
128 |    "execution_count": 8,
129 |    "metadata": {},
130 |    "outputs": [],
131 |    "source": [
132 |     "idcol = 'customerID'  # ID column\n",
133 |     "labelcol = 'Churn'  # label column\n",
134 |     "cols = (('gender', True),  # (feature column, Categorical?)\n",
135 |     "        ('SeniorCitizen', True),\n",
136 |     "        ('Partner', True),\n",
137 |     "        ('Dependents', True),\n",
138 |     "        ('tenure', False),\n",
139 |     "        ('PhoneService', True),\n",
140 |     "        ('MultipleLines', True),\n",
141 |     "        ('InternetService', True),\n",
142 |     "        ('OnlineSecurity', True),\n",
143 |     "        ('OnlineBackup', True),\n",
144 |     "        ('DeviceProtection', True),\n",
145 |     "        ('TechSupport', True),\n",
146 |     "        ('StreamingTV', True),\n",
147 |     "        ('StreamingMovies', True),\n",
148 |     "        ('Contract', True),\n",
149 |     "        ('PaperlessBilling', True),\n",
150 |     "        ('PaymentMethod', True),\n",
151 |     "        ('MonthlyCharges', False),\n",
152 |     "        ('TotalCharges', False))"
153 |    ]
154 |   },
155 |   {
156 |    "cell_type": "code",
157 |    "execution_count": 9,
158 |    "metadata": {},
159 |    "outputs": [
160 |     {
161 |      "data": {
162 |       "text/html": [
163 |        "<div>\n",
164 |        "<style scoped>\n",
165 |        "    .dataframe tbody tr th:only-of-type {\n",
166 |        "        vertical-align: middle;\n",
167 |        "    }\n",
168 |        "\n",
169 |        "    .dataframe tbody tr th {\n",
170 |        "        vertical-align: top;\n",
171 |        "    }\n",
172 |        "\n",
173 |        "    .dataframe thead th {\n",
174 |        "        text-align: right;\n",
175 |        "    }\n",
176 |        "</style>\n",
177 |        "<table border=\"1\" class=\"dataframe\">\n",
178 |        "  <thead>\n",
179 |        "    <tr style=\"text-align: right;\">\n",
180 |        "      <th></th>\n",
181 |        "      <th>gender</th>\n",
182 |        "      <th>SeniorCitizen</th>\n",
183 |        "      <th>Partner</th>\n",
184 |        "      <th>Dependents</th>\n",
185 |        "      <th>tenure</th>\n",
186 |        "      <th>PhoneService</th>\n",
187 |        "      <th>MultipleLines</th>\n",
188 |        "      <th>InternetService</th>\n",
189 |        "      <th>OnlineSecurity</th>\n",
190 |        "      <th>OnlineBackup</th>\n",
191 |        "      <th>DeviceProtection</th>\n",
192 |        "      <th>TechSupport</th>\n",
193 |        "      <th>StreamingTV</th>\n",
194 |        "      <th>StreamingMovies</th>\n",
195 |        "      <th>Contract</th>\n",
196 |        "      <th>PaperlessBilling</th>\n",
197 |        "      <th>PaymentMethod</th>\n",
198 |        "      <th>MonthlyCharges</th>\n",
199 |        "      <th>TotalCharges</th>\n",
200 |        "    </tr>\n",
201 |        "    <tr>\n",
202 |        "      <th>id</th>\n",
203 |        "      <th></th>\n",
204 |        "      <th></th>\n",
205 |        "      <th></th>\n",
206 |        "      <th></th>\n",
207 |        "      <th></th>\n",
208 |        "      <th></th>\n",
209 |        "      <th></th>\n",
210 |        "      <th></th>\n",
211 |        "      <th></th>\n",
212 |        "      <th></th>\n",
213 |        "      <th></th>\n",
214 |        "      <th></th>\n",
215 |        "      <th></th>\n",
216 |        "      <th></th>\n",
217 |        "      <th></th>\n",
218 |        "      <th></th>\n",
219 |        "      <th></th>\n",
220 |        "      <th></th>\n",
221 |        "      <th></th>\n",
222 |        "    </tr>\n",
223 |        "  </thead>\n",
224 |        "  <tbody>\n",
225 |        "    <tr>\n",
226 |        "      <td>0</td>\n",
227 |        "      <td>Female</td>\n",
228 |        "      <td>0</td>\n",
229 |        "      <td>Yes</td>\n",
230 |        "      <td>No</td>\n",
231 |        "      <td>1.0</td>\n",
232 |        "      <td>No</td>\n",
233 |        "      <td>No phone service</td>\n",
234 |        "      <td>DSL</td>\n",
235 |        "      <td>No</td>\n",
236 |        "      <td>Yes</td>\n",
237 |        "      <td>No</td>\n",
238 |        "      <td>No</td>\n",
239 |        "      <td>No</td>\n",
240 |        "      <td>No</td>\n",
241 |        "      <td>Month-to-month</td>\n",
242 |        "      <td>Yes</td>\n",
243 |        "      <td>Electronic check</td>\n",
244 |        "      <td>29.85</td>\n",
245 |        "      <td>29.85</td>\n",
246 |        "    </tr>\n",
247 |        "    <tr>\n",
248 |        "      <td>1</td>\n",
249 |        "      <td>Male</td>\n",
250 |        "      <td>0</td>\n",
251 |        "      <td>No</td>\n",
252 |        "      <td>No</td>\n",
253 |        "      <td>34.0</td>\n",
254 |        "      <td>Yes</td>\n",
255 |        "      <td>No</td>\n",
256 |        "      <td>DSL</td>\n",
257 |        "      <td>Yes</td>\n",
258 |        "      <td>No</td>\n",
259 |        "      <td>Yes</td>\n",
260 |        "      <td>No</td>\n",
261 |        "      <td>No</td>\n",
262 |        "      <td>No</td>\n",
263 |        "      <td>One year</td>\n",
264 |        "      <td>No</td>\n",
265 |        "      <td>Mailed check</td>\n",
266 |        "      <td>56.95</td>\n",
267 |        "      <td>1889.50</td>\n",
268 |        "    </tr>\n",
269 |        "    <tr>\n",
270 |        "      <td>2</td>\n",
271 |        "      <td>Male</td>\n",
272 |        "      <td>0</td>\n",
273 |        "      <td>No</td>\n",
274 |        "      <td>No</td>\n",
275 |        "      <td>2.0</td>\n",
276 |        "      <td>Yes</td>\n",
277 |        "      <td>No</td>\n",
278 |        "      <td>DSL</td>\n",
279 |        "      <td>Yes</td>\n",
280 |        "      <td>Yes</td>\n",
281 |        "      <td>No</td>\n",
282 |        "      <td>No</td>\n",
283 |        "      <td>No</td>\n",
284 |        "      <td>No</td>\n",
285 |        "      <td>Month-to-month</td>\n",
286 |        "      <td>Yes</td>\n",
287 |        "      <td>Mailed check</td>\n",
288 |        "      <td>53.85</td>\n",
289 |        "      <td>108.15</td>\n",
290 |        "    </tr>\n",
291 |        "    <tr>\n",
292 |        "      <td>3</td>\n",
293 |        "      <td>Male</td>\n",
294 |        "      <td>0</td>\n",
295 |        "      <td>No</td>\n",
296 |        "      <td>No</td>\n",
297 |        "      <td>45.0</td>\n",
298 |        "      <td>No</td>\n",
299 |        "      <td>No phone service</td>\n",
300 |        "      <td>DSL</td>\n",
301 |        "      <td>Yes</td>\n",
302 |        "      <td>No</td>\n",
303 |        "      <td>Yes</td>\n",
304 |        "      <td>Yes</td>\n",
305 |        "      <td>No</td>\n",
306 |        "      <td>No</td>\n",
307 |        "      <td>One year</td>\n",
308 |        "      <td>No</td>\n",
309 |        "      <td>Bank transfer (automatic)</td>\n",
310 |        "      <td>42.30</td>\n",
311 |        "      <td>1840.75</td>\n",
312 |        "    </tr>\n",
313 |        "    <tr>\n",
314 |        "      <td>4</td>\n",
315 |        "      <td>Female</td>\n",
316 |        "      <td>0</td>\n",
317 |        "      <td>No</td>\n",
318 |        "      <td>No</td>\n",
319 |        "      <td>2.0</td>\n",
320 |        "      <td>Yes</td>\n",
321 |        "      <td>No</td>\n",
322 |        "      <td>Fiber optic</td>\n",
323 |        "      <td>No</td>\n",
324 |        "      <td>No</td>\n",
325 |        "      <td>No</td>\n",
326 |        "      <td>No</td>\n",
327 |        "      <td>No</td>\n",
328 |        "      <td>No</td>\n",
329 |        "      <td>Month-to-month</td>\n",
330 |        "      <td>Yes</td>\n",
331 |        "      <td>Electronic check</td>\n",
332 |        "      <td>70.70</td>\n",
333 |        "      <td>151.65</td>\n",
334 |        "    </tr>\n",
335 |        "  </tbody>\n",
336 |        "</table>\n",
337 |        "</div>"
338 |       ],
339 |       "text/plain": [
340 |        "    gender SeniorCitizen Partner Dependents  tenure PhoneService  \\\n",
341 |        "id                                                                 \n",
342 |        "0   Female             0     Yes         No     1.0           No   \n",
343 |        "1     Male             0      No         No    34.0          Yes   \n",
344 |        "2     Male             0      No         No     2.0          Yes   \n",
345 |        "3     Male             0      No         No    45.0           No   \n",
346 |        "4   Female             0      No         No     2.0          Yes   \n",
347 |        "\n",
348 |        "       MultipleLines InternetService OnlineSecurity OnlineBackup  \\\n",
349 |        "id                                                                 \n",
350 |        "0   No phone service             DSL             No          Yes   \n",
351 |        "1                 No             DSL            Yes           No   \n",
352 |        "2                 No             DSL            Yes          Yes   \n",
353 |        "3   No phone service             DSL            Yes           No   \n",
354 |        "4                 No     Fiber optic             No           No   \n",
355 |        "\n",
356 |        "   DeviceProtection TechSupport StreamingTV StreamingMovies        Contract  \\\n",
357 |        "id                                                                            \n",
358 |        "0                No          No          No              No  Month-to-month   \n",
359 |        "1               Yes          No          No              No        One year   \n",
360 |        "2                No          No          No              No  Month-to-month   \n",
361 |        "3               Yes         Yes          No              No        One year   \n",
362 |        "4                No          No          No              No  Month-to-month   \n",
363 |        "\n",
364 |        "   PaperlessBilling              PaymentMethod  MonthlyCharges  TotalCharges  \n",
365 |        "id                                                                            \n",
366 |        "0               Yes           Electronic check           29.85         29.85  \n",
367 |        "1                No               Mailed check           56.95       1889.50  \n",
368 |        "2               Yes               Mailed check           53.85        108.15  \n",
369 |        "3                No  Bank transfer (automatic)           42.30       1840.75  \n",
370 |        "4               Yes           Electronic check           70.70        151.65  "
371 |       ]
372 |      },
373 |      "execution_count": 9,
374 |      "metadata": {},
375 |      "output_type": "execute_result"
376 |     }
377 |    ],
378 |    "source": [
379 |     "df = df.replace(r'^\\s$', np.nan, regex=True).dropna().reset_index()  # drop blank rows\n",
380 |     "df.index.name = 'id'  # name the index\n",
381 |     "data, labels = df.drop(labelcol, axis=1), df[labelcol]  # separate out the labels\n",
382 |     "data = data[[c for c, _ in cols]]  # only use the columns named in `cols`\n",
383 |     "data = data.replace({'SeniorCitizen': {1: 'Yes', 0: 'No'}})  # Change 1/0 to Yes/No to match the other binary features\n",
384 |     "\n",
385 |     "# convert the categorical columns to pd.Categorical form\n",
386 |     "for col, iscat in cols:\n",
387 |     "    if iscat:\n",
388 |     "        data[col] = pd.Categorical(data[col])\n",
389 |     "labels = (labels == 'Yes')  # convert labels from str to bool\n",
390 |     "\n",
391 |     "data.head()"
392 |    ]
393 |   },
394 |   {
395 |    "cell_type": "markdown",
396 |    "metadata": {},
397 |    "source": [
398 |     "## Machine learning model\n",
399 |     "\n",
400 |     "This step follows a fairly standard ML workflow, which is to create a pipeline to:\n",
401 |     "\n",
402 |     "* Encode the categorical features as numeric\n",
403 |     "* Normalize the numeric features\n",
404 |     "* Train a classification model using these processed features\n",
405 |     "\n",
406 |     "We use *one-hot encoding*, *standardization*, and *logistic regression with cross-validation* for the three steps.\n",
407 |     "Then we can evaluate the model's performance.\n",
408 |     "\n",
409 |     "Note: `CategoricalEncoder` and, later, `ExplainedModel` are helper classes pulled and edited from the original CFFL [interpretability report code](https://ff06-2020.fastforwardlabs.com/).\n",
410 |     "You can inspect `churnexplainer.py` to see what they do under the hood.\n",
411 |     "CML lets you continue to write modular code to keep things segregated and clean."
412 |    ]
413 |   },
414 |   {
415 |    "cell_type": "code",
416 |    "execution_count": 10,
417 |    "metadata": {},
418 |    "outputs": [],
419 |    "source": [
420 |     "from sklearn.model_selection import train_test_split\n",
421 |     "from sklearn.metrics import classification_report\n",
422 |     "from sklearn.preprocessing import OneHotEncoder, StandardScaler\n",
423 |     "from sklearn.pipeline import Pipeline\n",
424 |     "from sklearn.linear_model import LogisticRegressionCV, LogisticRegression\n",
425 |     "from sklearn.compose import ColumnTransformer\n",
426 |     "\n",
427 |     "from churnexplainer import CategoricalEncoder  # convert Categorical columns into numeric"
428 |    ]
429 |   },
430 |   {
431 |    "cell_type": "code",
432 |    "execution_count": 11,
433 |    "metadata": {},
434 |    "outputs": [],
435 |    "source": [
436 |     "ce = CategoricalEncoder()\n",
437 |     "X = ce.fit_transform(data)  # Categorical columns now have values 0 to num_categories-1\n",
438 |     "y = labels.values\n",
439 |     "X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)\n",
440 |     "\n",
441 |     "cat_cols = list(ce.cat_columns_ix_.values())  # indices of the categorical columns (now numeric)\n",
442 |     "ct = ColumnTransformer(\n",
443 |     "    [('ohe', OneHotEncoder(), cat_cols)],\n",
444 |     "    remainder='passthrough'\n",
445 |     ")\n",
446 |     "clf = LogisticRegressionCV(cv=5,solver='lbfgs', max_iter=100)"
447 |    ]
448 |   },
449 |   {
450 |    "cell_type": "code",
451 |    "execution_count": 23,
452 |    "metadata": {},
453 |    "outputs": [
454 |     {
455 |      "name": "stderr",
456 |      "output_type": "stream",
457 |      "text": [
458 |       "/home/cdsw/.local/lib/python3.6/site-packages/sklearn/preprocessing/_encoders.py:415: FutureWarning: The handling of integer data will change in version 0.22. Currently, the categories are determined based on the range [0, max(values)], while in the future they will be determined based on the unique values.\n",
459 |       "If you want the future behaviour and silence this warning, you can specify \"categories='auto'\".\n",
460 |       "In case you used a LabelEncoder before this OneHotEncoder to convert the categories to integers, then you can now use the OneHotEncoder directly.\n",
461 |       "  warnings.warn(msg, FutureWarning)\n"
462 |      ]
463 |     },
464 |     {
465 |      "name": "stdout",
466 |      "output_type": "stream",
467 |      "text": [
468 |       "train 0.8077360637087599\n",
469 |       "test 0.7912400455062572\n",
470 |       "              precision    recall  f1-score   support\n",
471 |       "\n",
472 |       "       False       0.84      0.89      0.86      1300\n",
473 |       "        True       0.62      0.52      0.56       458\n",
474 |       "\n",
475 |       "    accuracy                           0.79      1758\n",
476 |       "   macro avg       0.73      0.70      0.71      1758\n",
477 |       "weighted avg       0.78      0.79      0.79      1758\n",
478 |       "\n"
479 |      ]
480 |     }
481 |    ],
482 |    "source": [
483 |     "pipe = Pipeline([('ct', ct),  # 1. Encode the categorical features as numeric\n",
484 |     "                 ('scaler', StandardScaler()),  # 2. Normalize the numeric features\n",
485 |     "                 ('clf', clf)])  # 3. Train a classification model using these processed features\n",
486 |     "pipe.fit(X_train, y_train)\n",
487 |     "train_score = pipe.score(X_train, y_train)\n",
488 |     "test_score = pipe.score(X_test, y_test)\n",
489 |     "print(\"train\",train_score)\n",
490 |     "print(\"test\", test_score)    \n",
491 |     "print(classification_report(y_test, pipe.predict(X_test)))"
492 |    ]
493 |   },
494 |   {
495 |    "cell_type": "markdown",
496 |    "metadata": {},
497 |    "source": [
498 |     "### Compare with Random Forest\n",
499 |     "Just for a comparison, lets compare this model to a Random Forest model.\n",
500 |     "This is simpler since Random Forests do not need the categorical features encoded with a `OneHotEncoder`."
501 |    ]
502 |   },
503 |   {
504 |    "cell_type": "code",
505 |    "execution_count": 14,
506 |    "metadata": {},
507 |    "outputs": [
508 |     {
509 |      "name": "stdout",
510 |      "output_type": "stream",
511 |      "text": [
512 |       "train 0.9981039059537353\n",
513 |       "test 0.7895335608646189\n",
514 |       "              precision    recall  f1-score   support\n",
515 |       "\n",
516 |       "       False       0.83      0.90      0.86      1300\n",
517 |       "        True       0.63      0.47      0.54       458\n",
518 |       "\n",
519 |       "    accuracy                           0.79      1758\n",
520 |       "   macro avg       0.73      0.69      0.70      1758\n",
521 |       "weighted avg       0.78      0.79      0.78      1758\n",
522 |       "\n"
523 |      ]
524 |     }
525 |    ],
526 |    "source": [
527 |     "from sklearn.ensemble import RandomForestClassifier\n",
528 |     "clf_rf = RandomForestClassifier(n_estimators=100)\n",
529 |     "pipe_rf = Pipeline([('scaler', StandardScaler()),\n",
530 |     "                 ('clf', clf_rf)])\n",
531 |     "pipe_rf.fit(X_train, y_train)\n",
532 |     "train_score = pipe_rf.score(X_train, y_train)\n",
533 |     "test_score = pipe_rf.score(X_test, y_test)\n",
534 |     "print(\"train\",train_score)\n",
535 |     "print(\"test\", test_score)\n",
536 |     "print(classification_report(y_test, pipe_rf.predict(X_test)))"
537 |    ]
538 |   },
539 |   {
540 |    "cell_type": "markdown",
541 |    "metadata": {},
542 |    "source": [
543 |     "### Plot ROC Curve\n",
544 |     "\n",
545 |     "We can also generate an ROC Curve to visualize the model's performance and calculate the AUROC"
546 |    ]
547 |   },
548 |   {
549 |    "cell_type": "code",
550 |    "execution_count": 24,
551 |    "metadata": {},
552 |    "outputs": [
553 |     {
554 |      "name": "stdout",
555 |      "output_type": "stream",
556 |      "text": [
557 |       "Logistic: AUROC=0.834\n"
558 |      ]
559 |     },
560 |     {
561 |      "data": {
562 |       "image/png": "iVBORw0KGgoAAAANSUhEUgAAAtgAAAHpCAYAAAC4HUuQAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAAWJQAAFiUBSVIk8AAAHvBJREFUeJzt3f+rZfdd7/HXJDOj/R4kjVB7i+ZS1EhscxRyTXMtEvpDJXiFaz8/tOK1FGJABvqT+Ru8v1iJFOy113DjF/Ke5oLSNmJIqG0I5gfP/BA7hVpLrTcFk2grNJPpTJK5P8w54549+8zsc/Znf1l7PR5QmLNm7VmrfHqmz/OZz/qsY5cuXQoAANDHTeu+AQAA2CYCGwAAOhLYAADQkcAGAICOBDYAAHQksAEAoCOBDQAAHQlsAADoSGADAEBHAhsAADoS2AAA0JHABgCAjo6v+wYO66mnnrq07nsAAGAc7rvvvmOH/YwZbAAA6GhwM9j7dnZ2Vnq93d3dtVyX1TLO42Cct58xHgfjPA7rGuf96x6FGWwAAOhIYAMAQEcCGwAAOhLYAADQkcAGAICOBDYAAHQksAEAoCOBDQAAHQlsAADoSGADAEBHAhsAADoS2AAA0JHABgCAjo73+oNaa7+Q5ERVffmA3/+ZJL+U5HyStyT5XFW90Ov6AACwCRYO7Nba/Unek+TLSd5/wDnHknygqv5g4tgDST6z6PUBAGCT9Fgi8oWq+nRV/f11zvlAkqemjn2jtfbuDtcHAICNsfAMdlVdmuO091TVM1PHnk/yviT/7yjX3d3dPcrHjuzcuXNruS6rZZzHwThvP2M8DsZ5c3zqzIU8//IbS73GZzOcce62BvsGZs2Uv5zknSu6PgDAIK0iXulrVYF9jaq61Fq7+aif39nZ6Xk7N7T/0/Gqr8tqGedxMM7bzxiPw6aN86nTZ/PMN7+37ttYm3tvvyUPf+SO7n/uusZ5kX8ZWVVgX7OMZO/BRz+OAQBbYZlxvax4ZTlWFdizQvqdSV5a0fUBgC22SbPHZx66Z923wJqtKrD/ubX23qr6h4lj70vy9RVdHwBYg5WE75PPLvfPP4R7b79l3bfABlhJYFfVM621B5NMBvZPV9WTq7g+ALB6q55VtoyCTbG0wG6t3Z0kVfXc3qGvtNZOJXk1yTuSPL6sawPAGGzSsojr2baH3+BGugZ2Vf3JxK+fm/q9ryb5as/rAcAYDSWsE7PKjNPatukDAOZ3UFQLWNg8AhsANpiwhuER2ACwgWaFtaiGYRDYALBBhDUMn8AGgCXo8SCisIZhEtgA0Emv3T2ENQybwAaATiztABKBDQCHMs8s9ZmH7lnR3QCbSGADQPou7wDGTWADMGpHCWtLP4DrEdgAbKyFZ5WffPZQpwtnoAeBDcDG6rFkYx7CGuhJYAOwkU6dPnvl14d9aHB3dzdJsrOz0/WeAOYhsAFYm3mWgHhoEBgagQ3ASniYEBgLgQ1AV4cNaRENbBuBDcCBeu0NvU9MA2MgsAG4oufLVoQ0MFYCG2BErIMGWD6BDbDlrIkGWC2BDbAFDhPRAhpguQQ2wBaYZy9pUQ2wGgIbYIsc9o2HAPQnsAEGpvfWeQD0JbABNthh11YDsH4CG2CDzBvU1lQDbC6BDbBits0D2G4CG2AFbKMHMB4CG6ADs9IA7BPYAIewyA4eohpgHAQ2wBzmCWsBDUAisAEOdFBUC2kArkdgA0wR1gAsQmADRFQD0I/ABkiuiWthDcBRCWxglD515kKef/LZa46feeieNdwNANtEYAOjcnkpyPmZv3fv7bes+G4A2EYCG9hKttUDYF0ENrCVrhfXd956Uz5518ns7IhrAPoT2MBWm7Wmend3dw13AsBY3LTuGwDo7dTps+u+BQBGzAw2MHjX28MaAFZNYAODdb0HGT3ACMC6CGxgkKbjWlADsCkENjA4k3EtrAHYNAIbGIRZy0HENQCbyC4iwMYT1wAMiRlsYONZDgLAkAhsYCPM82pzcQ3AEAhsYC3mCepJ9rQGYCgENrBy13sxjFlqAIZOYAMrZYs9ALadXUSAlRLXAGw7M9jA0s1aEiKuAdhWZrCBpZu1hzUAbCsz2MDC5t0R5MxD96zgbgBgvQQ2cGSH2WrPrDUAYyGwgUOxxR4AXJ/ABg5l1npqYQ0A/0FgA9d10Iy19dQAMJvABq5xo7XV1lMDwMEENnCVWXFtGQgAzE9gA6IaADoS2DBC8ywBEdcAcDQCG0bkemEtqgGgD4ENIzIZ14IaAJZDYMNInDp99sqvbbEHAMuzcGC31k4keSDJhSQ3Jfl2VT0x47xfTvLje+e9PckTVfW1Ra8P3Njk0hBb7AHAcvWYwW5JHqmqV5KktXZ/a+1dVfWdKye09pNJ3qiqT08c+2Rr7RtVdbHDPQAzTK+5tiwEAJavR2Cf2I/rPV9M8rEkj04c+/mq+tOpz30pyU8leb7DPQB7DnqQUVwDwGrctMiHW2snk3x/8lhVvZHktalTj7XWpmP+Tbm8XAToaNZ+1mceukdcA8CKLDqDfVuSl2Ycnw73zyf5RGvtM1V1qbX2llye1X74qBfe3d096keP5Ny5c2u5Lqu1TeP82Q/98N6vzm/Ff5+etmmcmc0Yj4NxHochjvNCM9i5HOivzzh+bPKLqvpekq8m+dPW2u8k+aMkjy14bWDKp874RyEAWLdFZ7BfS3LzjU5qrb0/yW1V9dG9r08m+e29Ge1Xrv/p2XZ2do7ysSPb/6lp1ddltYY8zqdOn83zL59PcnlZyM6OJSEHGfI4Mx9jPA7GeRzWNc6LzJgvGtgvJrl7jvN2qup/739RVRdaa59O8t+T/NmC9wCjNeuBRg8zAsB6LbREpKr297S+orV2U5KTU6de8+/WVfWDRa8PYyeuAWDz9Ajci621t018fX+Sp6fOebW19mOTB1prH07ydx2uD6M0/WZGO4UAwGbosQ/2Y0kebK2dT3IiyQtV9a3W2t1JUlXPVdXjrbWP77318fUkP5Tk697kCIc36+UxAMDmWDiw997EeM12e1X13NTXf7zotWDsvJkRADZfjxlsYEm8lREAhkdgwwY5KKgniWsA2GwCG9bsRlEtqAFgWAQ2rJF9rAFg+whsWJPJuBbVALA9vOgF1kRcA8B2EtiwBpMviRHXALBdLBGBFbjednsAwHYxgw0rYC9rABgPM9iwJLNmrc88dM+a7gYAWBUz2LAEB22/BwBsPzPYsAR2CAGA8TKDDUskrgFgfAQ2dDa5BR8AMD4CGzqbXB4CAIyPwIYlsTwEAMZJYAMAQEcCGzqy/hoAENjQkfXXAIDAhiWw/hoAxsuLZmABs97YCACMmxlsWMCsuLY8BADGzQw2HMH0zPWZh+5Z490AAJvEDDYcwWRcm7EGACaZwYYFmLkGAKYJbJiTBxoBgHlYIgJzmo5rS0MAgFnMYMMhWRYCAFyPGWyYg1egAwDzEthwA5Nrry0LAQBuRGDDDUzGtVegAwA3IrBhTuIaAJiHhxxhBlvyAQBHJbBhz+WoPr/31fmrfs/aawBgXgIb9sza59qyEADgsAQ25Opt+D77oR/Ozs7OGu8GABgygc1ozVpnfeetnvsFABYjsBmtWUtCPv6fzx9wNgDAfAQ2ozf56vPd3d013gkAsA0ENqNjCz4AYJkENqMxK6xtvwcA9CawGYXpuLYFHwCwLAKbUdiPa2ENACybPcnYepN7XItrAGDZzGCztWYtCwEAWDYz2Gwla64BgHUxg81WsuYaAFgXM9hsHWuuAYB1EthsncnZawCAVRPYbBWz1wDAuglstorZawBg3QQ2W8PsNQCwCQQ2W8PsNQCwCQQ2W8HsNQCwKeyDzaB5WyMAsGnMYDNo3tYIAGwaM9hshTMP3bPuWwAASGIGmwGbXHcNALApBDaDNLn22rprAGCTCGwGZzqurbsGADaJwGZQxDUAsOkENoMhrgGAIRDYDIK4BgCGQmAzCOIaABgK+2Cz0abf1CiuAYBNZwabjeU16ADAEJnBZmNZFgIADNHCgd1aO5HkgSQXcnlG/NtV9cQB596R5ENJziX566r6p0Wvz3aafEujuAYAhqTHDHZL8khVvZIkrbX7W2vvqqrvXHVSaz+X5I6q+v0O12TLeUsjADBUPdZgn9iP6z1fTHLfjPM+WFWPdrgeI2L2GgAYmoUCu7V2Msn3J49V1RtJXps6784kf7vItRiPyeUhAABDs+gSkduSvDTj+HS47yT5i9baR5Oc3Lvu41X13aNeeHd396gfPZJz586t5bpj86kzF/L8y28kSe689SbjzFIY5+1njMfBOI/DEMd50SUix5O8PuP4samv35rkV5J8rqoeSfLHST6x94AkXDEZ15+86+Sa7wYA4PAWncF+LcnNc5x3LMmfV9XFJKmq11trn03y4SR/eZQL7+zsHOVjR7b/U9Oqrzs6Tz6bJPk/n/gva7m8cR4H47z9jPE4GOdxWNc4LzJjvugM9otJbp3jvO/ux/W+veUh71jw+mwRa68BgG2wUGBX1YUkb5881lq7KZfXWU96tbX25qnz3pTZy0sYKVvzAQDboMc2fRdba2+b+Pr+JE9PnfNXST42dezX944zYqdOn81dv/ts7vrdZ68cszUfADBkPV4081iSB1tr55OcSPJCVX2rtXZ3klTVc1V1rrX2pdbabyf5QZI3JTlTVf/W4foMzKnTZ6/MVk8zew0ADN3Cgb23tvrhGcefm/r6H5L8w6LXY9hmxfW9t99i1hoA2Bo9ZrBhbpPrrEU1ALCNeqzBhkMT1wDAtjKDzUpcb901AMA2MYPNSkzGtQcZAYBtZgabpZt8gcyZh+5Z450AACyfGWyWzgtkAIAxEdisjAcbAYAxENgAANCRwAYAgI485MjS2JoPABgjgU13B70OHQBgDAQ2XU3HtVeiAwBjI7DpanJLPmENAIyRhxxZCnENAIyVwAYAgI4ENt1MvhIdAGCsBDbdeCU6AIDAppPJ2WvrrwGAMRPYdGH2GgDgMoFNV2avAYCxE9gAANCRwAYAgI4ENguzPR8AwH/wqnSO7NTps1cebkw84AgAkJjBZgHTce0BRwAAM9gc0eSykDMP3bPGOwEA2CwCm0OxLAQA4PosEWFus+LashAAgKuZwWYuk3EtrAEADiawmWl6tnqfuAYAuD5LRJhJXAMAHI0ZbK5hhxAAgKMzg81VptdaAwBwOAKbKzzICACwOIHNFeIaAGBxAptriGsAgKMT2CS5+sFGAACOTmCTJB5sBADoRGBz1ey15SEAAIsR2CNnWz4AgL4E9sjZOQQAoC+BTRJLQwAAehHYI2bnEACA/gT2iFl7DQDQn8DG8hAAgI4ENgAAdCSwAQCgI4E9Uh5wBABYDoE9Ql4uAwCwPMfXfQOszmRYJ14uAwCwDGawR0JcAwCshhnsEZheEiKsAQCWxwz2CIhrAIDVEdhbbnK3EHENALB8AnvL2S0EAGC1BPYWM3sNALB6AnuLmb0GAFg9gT0CZq8BAFZHYAMAQEcCGwAAOhLYW2ryAUcAAFZHYG+h6Tc3AgCwOgJ7C3lzIwDA+gjsLSauAQBWT2BvGWuvAQDWS2BvGWuvAQDWS2BvKctDAADW4/iif0Br7USSB5JcyOVg/3ZVPXGDz/xqkrdW1Z8sen3+g+UhAADr12MGuyV5pKr+V1X9YZKbW2vvOvDk1n40HcKea1keAgCwfj0C+0RVvTLx9ReT3Hed81uSxztclwNYHgIAsD4LBXZr7WSS708eq6o3krx2wPkfSvJ0VV1a5LoAALCpFl2qcVuSl2YcvybcW2tvSfITVfXkgtdMkuzu7vb4Y+Z27ty5tVz3KIZwj5tqSOPM0Rnn7WeMx8E4j8MQx3nRJSLHk7w+4/ixGcc+nuTRBa/HAT515sK6bwEAgCw+g/1akptvdFJr7eeSfK2qXl3welfs7Oz0+qPmsv9T06qvO49Tp8/m+ZfPJ7n8gOPOjjXYR7XJ40w/xnn7GeNxMM7jsK5xXmTGfNHAfjHJ3XOc94tJvtta+82JY7/QWjue5M+qyvTrEZ06ffaq3UM84AgAsF4LBXZVXWitvX3yWGvtpiQnp877venPttZesw/24sQ1AMBm6bFN38XW2tsmvr4/ydMd/lxuYPLFMuIaAGAz9Hjhy2NJHmytnU9yIskLVfWt1trdSVJVz3W4BjN4sQwAwOZZOLCr6mKSh2ccv25YWx7Sj9lrAIDN0WOJCAAAsEdgAwBARwJ7oCYfcAQAYHMI7IHygCMAwGYS2ANkez4AgM0lsAfI7DUAwOYS2ANm9hoAYPMIbAAA6EhgAwBARwJ7YGzPBwCw2QT2gJw6fdYDjgAAG05gD8hkXHvAEQBgMwnsgbD3NQDAMAjsAbA0BABgOAT2hpuOa7PXAACbTWBvOHENADAsAnsgxDUAwDAI7A1mz2sAgOER2BvKg40AAMMksDeQBxsBAIZLYG8YcQ0AMGwCe8OIawCAYRPYG0pcAwAMk8AGAICOjq/7Brhscu01AADDJbDX6KCoti0fAMBwCew1uF5YW3sNADBsAnsNJuNaVAMAbBeBvUZnHrpn3bcAAEBnAnuFPMgIALD9bNO3QtNLQwAA2D5msNfA0hAAgO1lBntFTp0+u+5bAABgBQT2iuwvD7E0BABguwnsFbMlHwDAdhPYAADQkcAGAICOBDYAAHQksAEAoCOBDQAAHQlsAADoSGCvgJfMAACMh8BeAS+ZAQAYD4G9Ql4yAwCw/QQ2AAB0JLCXzPprAIBxEdhLZv01AMC4COwVsf4aAGAcjq/7BrbVqdNnr8xeAwAwHmawl2Qyri0PAQAYDzPYS3bmoXvWfQsAAKyQGewlsHMIAMB4CewlsHMIAMB4CewlsnMIAMD4CGwAAOhIYAMAQEcCGwAAOhLYndlBBABg3AR2Z3YQAQAYN4G9JHYQAQAYJ4ENAAAdCWwAAOhIYHfkAUcAAAR2Rx5wBABAYC+BBxwBAMZLYAMAQEcCGwAAOjq+6B/QWjuR5IEkF3I52L9dVU/MOO/+JO9OcjHJm5M8VlUvLnp9AADYJAsHdpKW5JGqeiW5HNKttXdV1XeunNDa+5P8e1V9fuLYbyX5ww7XBwCAjdFjiciJ/bje88Uk902dc2dVfWXq2Pc7XBsAADbKQoHdWjuZqVCuqjeSvDZ16hdmfPzmRa4NAACbaNElIrcleWnG8avCvar+bfLr1tpNWTCwd3d3F/n4oZ07d27u66763ujnMOPMcBnn7WeMx8E4j8MQx3nRJSLHk7w+4/ixG3zu15L85YLXBgCAjbPoDPZrOeRMdGvtZ5P8S1X96yIX3tnZWeTjh7b/U9N1r/vkszc+h4021zgzeMZ5+xnjcTDO47CucV5kxnzRGewXk9w678mttR9JcldV/c2C1wUAgI20UGBX1YUkb588tre++uT0uXvHfyPJo4tcEwAANlmPbfouttbeNvH1/UmennHeJ5I8urfLCAAAbKUeL5p5LMmDrbXzSU4keaGqvtVauztJquq51toHk9yV5FJrbfKzZ6rq7zrcAwAAbISFA7uqLiZ5eMbx5yZ+/TdJrLsGAGDr9VgiAgAA7BHYnZw6fXbdtwAAwAYQ2J08883vJUnuvf2WNd8JAADrJLA7e/gjd6z7FgAAWCOBDQAAHQlsAADoSGADAEBHAhsAADoS2B3Yog8AgH0CuwNb9AEAsE9gd2SLPgAABDYAAHQksAEAoCOBDQAAHQlsAADoSGADAEBHAhsAADoS2AAA0JHABgCAjgQ2AAB0JLABAKAjgQ0AAB0JbAAA6EhgAwBARwIbAAA6EtgLOnX67LpvAQCADSKwF/TMN7+XJLn39lvWfCcAAGwCgd3Jwx+5Y923AADABhDYAADQkcAGAICOBDYAAHQksAEAoCOBDQAAHQnsBdgDGwCAaQJ7AfbABgBgmsDuwB7YAADsE9gAANCRwD4i668BAJhFYB+R9dcAAMwisBdk/TUAAJMENgAAdCSwj8D6awAADiKwj8D6awAADiKwF2D9NQAA0wQ2AAB0JLABAKAjgQ0AAB0JbAAA6EhgAwBARwIbAAA6EtgAANCRwAYAgI4ENgAAdCSwD+nU6bPrvgUAADaYwD6kZ775vSTJvbffsuY7AQBgEwnsI3r4I3es+xYAANhAAhsAADoS2AAA0JHABgCAjgQ2AAB0JLABAKAjgX0InzpzYd23AADAhhPYh/D8y28ksQc2AAAHE9hHYA9sAAAOIrABAKAjgQ0AAB0JbAAA6Oj4on9Aa+1EkgeSXMjlYP92VT0x47yfSfJLSc4neUuSz1XVC4tef1XsIAIAwDwWDuwkLckjVfVKkrTW7m+tvauqvnPlhNaOJflAVf3BxLEHknymw/VXwg4iAADMo8cSkRP7cb3ni0numzrnA0memjr2jdbauztcf6XsIAIAwPUsFNittZNJvj95rKreSPLa1Knvqap/nDr2fJKfWuT6AACwaRZdInJbkpdmHJ8O91kh/3KSdx71wru7u0f96ELWdV1W49y5c0mM87YzztvPGI+DcR6HIY7zoktEjid5fcbxYzf6YFVdSnLzgtcHAICNsugM9muZL5IvTR/Ye/DxjaNeeGdn56gfPZLPZnct12W19n86Ns7bzThvP2M8DsZ5HNY1zovMmC86g/1iklvnOG9WSL8zs5eXAADAYC0U2FV1IcnbJ4+11m5KcnLq1H9urb136tj7knx9kesDAMCm6bFN38XW2tsmvr4/ydOTJ1TVM7l2676frqp/6nB9AADYGD1eNPNYkgdba+eTnEjyQlV9q7V2d5JU1XN7532ltXYqyatJ3pHk8Q7XBgCAjbJwYFfVxSQPzzj+3NTXX03y1UWvBwAAm6zHEhEAAGCPwAYAgI4ENgAAdCSwAQCgI4ENAAAdCWwAAOhIYAMAQEcCGwAAOhLYAADQkcAGAICOBDYAAHQksAEAoKNjly5dWvc9HMpTTz01rBsGAGCw7rvvvmOH/YwZbAAA6GhwM9gAALDJzGADAEBHAhsAADoS2AAA0JHABgCAjgQ2AAB0JLABAKAjgQ0AAB0JbAAA6EhgAwBARwIbAAA6Or7uG9g0rbUTSR5IciGXfwD5dlU9MeO8n0nyS0nOJ3lLks9V1QurvFeO7hDjfH+Sdye5mOTNSR6rqhdXea8czbxjPPWZX03y1qr6kxXcIh0cZpxba3ck+VCSc0n+uqr+aWU3ykIO8Xf2Lyf58b3z3p7kiar62gpvlQW11n4hyYmq+vIBvz+I/hLY12pJHqmqV5LLgdVae1dVfefKCa0dS/KBqvqDiWMPJPnMyu+Wo5pnnN+f5N+r6vMTx34ryR+u/G45ihuO8VUnt/aj8XfiEM01zq21n0tyR1X9/jpukoXN83f2TyZ5o6o+PXHsk621b1TVxdXfMoexN6H1niRfTvL+A84ZTH9ZInKtE/vfwHu+mOS+qXM+kOSpqWPfaK29e6l3Rk/zjPOdVfWVqWPfX+5t0dE8YzypJXl8ubfEEsw7zh+sqkdXdE/0N884/3xV/dXUsS8l+all3hjdfKGqPl1Vf3+dcwbTXwJ7QmvtZKYCqqreSPLa1Knvqap/nDr2fHwTD8IhxvkLMz5+87Lui34OMcb7538oydNVdWkFt0cn845za+3OJH+7wlujo0N8Px9rrU3/K9Sbcnm5CBtuzr9/B9Nf/jn0arcleWnG8ekfRGb9YPJyknd2vyOWYa5xrqp/m/y6tXZTBPZQzPu9nNbaW5L8RFU9ufS7ord5x3knyV+01j6a5GQu/3/f41X13SXfH33MO86fT/KJ1tpnqurS3vf2z1fVw0u/Q1ZlMP1lBvtqx5O8PuP4sRt9cO8nL/E1DEcd519L8pf9b4clOMwYfzyJpQPDNO84vzXJr+Tyw1CPJPnjXA6xE8u9PTqZa5yr6ntJvprkT1trv5Pkj5I8tvzbY502tb8E9tVey3yDdM0/Y+wtvH+j+x2xDPOO8xWttZ9N8i9V9a/LuSU6m2uM9x58+1pVvbr8W2IJ5v1ePpbkz6vqQpJU1etJPpvkw0u8N/qZ9/v5/Uluq6qPVtX/TPI/knxsbyab7TCY/hLYV3sxya1znDdrIN+Z2f+ExeaZd5yTJK21H0lyV1X9zfJuic7mHeNfTPKfWmu/uf+fJP9179cnl3qH9DDvOH93eheJveUh71jKXdHbvOO8U1X/d/+LvR+oPp3kvy3rxli5wfSXwJ6w98349slje+tup/+P9p9ba++dOva+JF9f4u3RySHGef/4b8QSgkGZd4yr6veq6pHJ/yT5yt6vPRi14Q7xvfxqa+3NU+e9KbOXHbBhDjHO13zPVtUPonW2yWD6y//ornWxtfa2ia/vT/L05AlV9Uyu3R7op720YFBuOM57PpHk0b0n1hmWeceYYZtnnP8qycemjv363nGGYZ5xfrW19mOTB1prH07yd8u+OVZjSP1lF5FrPZbkwdba+SQnkrxQVd9qrd2dJFX13N55X2mtnUryai7/M6P9c4flhuPcWvtgkruSXGqtTX72TFX5C3vzzfu9zLDdcJyr6lxr7Uuttd9O8oNc3rrtzPROQWy0ecb58dbax/ceXn09yQ8l+bo3OQ7bUPvr2KVLtn0FAIBeLBEBAICOBDYAAHQksAEAoCOBDQAAHQlsAADoSGADAEBHAhsAADoS2AAA0JHABgCAjgQ2AAB0JLABAKAjgQ0AAB0JbAAA6EhgAwBARwIbAAA6EtgAANDR/wcTWcaQAppCuAAAAABJRU5ErkJggg==\n",
563 |       "text/plain": [
564 |        "<matplotlib.figure.Figure at 0x7fd5f5821160>"
565 |       ]
566 |      },
567 |      "metadata": {
568 |       "needs_background": "light"
569 |      },
570 |      "output_type": "display_data"
571 |     }
572 |    ],
573 |    "source": [
574 |     "from sklearn.metrics import roc_curve\n",
575 |     "from sklearn.metrics import roc_auc_score\n",
576 |     "from matplotlib import pyplot\n",
577 |     "\n",
578 |     "logistic_regression_probabilities = pipe.predict_proba(X_test)\n",
579 |     "logistic_regression_probabilities = logistic_regression_probabilities[:, 1]\n",
580 |     "logistic_regression_auc = roc_auc_score(y_test, logistic_regression_probabilities)\n",
581 |     "print('Logistic: AUROC=%.3f' % (logistic_regression_auc))\n",
582 |     "logistic_regression_fpr, logistic_regression_tpr, _ = roc_curve(y_test, logistic_regression_probabilities)\n",
583 |     "pyplot.plot(logistic_regression_fpr, logistic_regression_tpr, label='Logistic')\n",
584 |     "pyplot.show()"
585 |    ]
586 |   },
587 |   {
588 |    "cell_type": "markdown",
589 |    "metadata": {},
590 |    "source": [
591 |     "We find an AUC of 0.83. Not bad for a quick exercise without fine tuning.\n"
592 |    ]
593 |   },
594 |   {
595 |    "cell_type": "markdown",
596 |    "metadata": {},
597 |    "source": [
598 |     "## Interpretability model\n",
599 |     "We use [lime](https://github.com/marcotcr/lime) (Local Interpretable Model-Agnostic Explanations) to explain the predictions.\n",
600 |     "It is a method of determining which feature has the greatest effect on the predicted value,\n",
601 |     "and is explained in depth in the the [FFL report](https://ff06-2020.fastforwardlabs.com/).\n",
602 |     "For more information, refer to the [lime documentation](https://lime-ml.readthedocs.io/en/latest/lime.html)."
603 |    ]
604 |   },
605 |   {
606 |    "cell_type": "code",
607 |    "execution_count": 16,
608 |    "metadata": {},
609 |    "outputs": [],
610 |    "source": [
611 |     "from lime.lime_tabular import LimeTabularExplainer\n",
612 |     "\n",
613 |     "data[labels.name + ' probability'] = pipe.predict_proba(X)[:, 1]\n",
614 |     "\n",
615 |     "# List of length number of features, containing names of features in order\n",
616 |     "# in which they appear in X\n",
617 |     "feature_names = list(ce.columns_)\n",
618 |     "\n",
619 |     "# List of indices of columns of X containing categorical features\n",
620 |     "categorical_features = list(ce.cat_columns_ix_.values())\n",
621 |     "\n",
622 |     "# List of (index, [cat1, cat2...]) index-strings tuples, where each index\n",
623 |     "# is that of a categorical column in X, and the list of strings are the\n",
624 |     "# possible values it can take\n",
625 |     "categorical_names = {i: ce.classes_[c]\n",
626 |     "                     for c, i in ce.cat_columns_ix_.items()}\n",
627 |     "class_names = ['No ' + labels.name, labels.name]\n",
628 |     "explainer = LimeTabularExplainer(ce.transform(data),\n",
629 |     "                                 feature_names=feature_names,\n",
630 |     "                                 class_names=class_names,\n",
631 |     "                                 categorical_features=categorical_features,\n",
632 |     "                                 categorical_names=categorical_names)    \n",
633 |     "\n"
634 |    ]
635 |   },
636 |   {
637 |    "cell_type": "markdown",
638 |    "metadata": {},
639 |    "source": [
640 |     "## Explaining a Single Prediction\n",
641 |     "\n",
642 |     "Let's look at how one specfic prediction would be interpreted.\n",
643 |     "Lime explains the prediction by giving every feature a weight from -1 to 1.\n",
644 |     "Features with weights closer to -1 have a stronger impact in coming up with a 0 prediction result (will not churn) and vice versa."
645 |    ]
646 |   },
647 |   {
648 |    "cell_type": "code",
649 |    "execution_count": 17,
650 |    "metadata": {},
651 |    "outputs": [
652 |     {
653 |      "data": {
654 |       "text/html": [
655 |        "<div>\n",
656 |        "<style scoped>\n",
657 |        "    .dataframe tbody tr th:only-of-type {\n",
658 |        "        vertical-align: middle;\n",
659 |        "    }\n",
660 |        "\n",
661 |        "    .dataframe tbody tr th {\n",
662 |        "        vertical-align: top;\n",
663 |        "    }\n",
664 |        "\n",
665 |        "    .dataframe thead th {\n",
666 |        "        text-align: right;\n",
667 |        "    }\n",
668 |        "</style>\n",
669 |        "<table border=\"1\" class=\"dataframe\">\n",
670 |        "  <thead>\n",
671 |        "    <tr style=\"text-align: right;\">\n",
672 |        "      <th>id</th>\n",
673 |        "      <th>4809</th>\n",
674 |        "    </tr>\n",
675 |        "  </thead>\n",
676 |        "  <tbody>\n",
677 |        "    <tr>\n",
678 |        "      <td>gender</td>\n",
679 |        "      <td>Female</td>\n",
680 |        "    </tr>\n",
681 |        "    <tr>\n",
682 |        "      <td>SeniorCitizen</td>\n",
683 |        "      <td>0</td>\n",
684 |        "    </tr>\n",
685 |        "    <tr>\n",
686 |        "      <td>Partner</td>\n",
687 |        "      <td>No</td>\n",
688 |        "    </tr>\n",
689 |        "    <tr>\n",
690 |        "      <td>Dependents</td>\n",
691 |        "      <td>No</td>\n",
692 |        "    </tr>\n",
693 |        "    <tr>\n",
694 |        "      <td>tenure</td>\n",
695 |        "      <td>1</td>\n",
696 |        "    </tr>\n",
697 |        "    <tr>\n",
698 |        "      <td>PhoneService</td>\n",
699 |        "      <td>Yes</td>\n",
700 |        "    </tr>\n",
701 |        "    <tr>\n",
702 |        "      <td>MultipleLines</td>\n",
703 |        "      <td>No</td>\n",
704 |        "    </tr>\n",
705 |        "    <tr>\n",
706 |        "      <td>InternetService</td>\n",
707 |        "      <td>No</td>\n",
708 |        "    </tr>\n",
709 |        "    <tr>\n",
710 |        "      <td>OnlineSecurity</td>\n",
711 |        "      <td>No internet service</td>\n",
712 |        "    </tr>\n",
713 |        "    <tr>\n",
714 |        "      <td>OnlineBackup</td>\n",
715 |        "      <td>No internet service</td>\n",
716 |        "    </tr>\n",
717 |        "    <tr>\n",
718 |        "      <td>DeviceProtection</td>\n",
719 |        "      <td>No internet service</td>\n",
720 |        "    </tr>\n",
721 |        "    <tr>\n",
722 |        "      <td>TechSupport</td>\n",
723 |        "      <td>No internet service</td>\n",
724 |        "    </tr>\n",
725 |        "    <tr>\n",
726 |        "      <td>StreamingTV</td>\n",
727 |        "      <td>No internet service</td>\n",
728 |        "    </tr>\n",
729 |        "    <tr>\n",
730 |        "      <td>StreamingMovies</td>\n",
731 |        "      <td>No internet service</td>\n",
732 |        "    </tr>\n",
733 |        "    <tr>\n",
734 |        "      <td>Contract</td>\n",
735 |        "      <td>Month-to-month</td>\n",
736 |        "    </tr>\n",
737 |        "    <tr>\n",
738 |        "      <td>PaperlessBilling</td>\n",
739 |        "      <td>No</td>\n",
740 |        "    </tr>\n",
741 |        "    <tr>\n",
742 |        "      <td>PaymentMethod</td>\n",
743 |        "      <td>Mailed check</td>\n",
744 |        "    </tr>\n",
745 |        "    <tr>\n",
746 |        "      <td>MonthlyCharges</td>\n",
747 |        "      <td>19.9</td>\n",
748 |        "    </tr>\n",
749 |        "    <tr>\n",
750 |        "      <td>TotalCharges</td>\n",
751 |        "      <td>19.9</td>\n",
752 |        "    </tr>\n",
753 |        "    <tr>\n",
754 |        "      <td>Churn probability</td>\n",
755 |        "      <td>0.220148</td>\n",
756 |        "    </tr>\n",
757 |        "  </tbody>\n",
758 |        "</table>\n",
759 |        "</div>"
760 |       ],
761 |       "text/plain": [
762 |        "id                                4809\n",
763 |        "gender                          Female\n",
764 |        "SeniorCitizen                        0\n",
765 |        "Partner                             No\n",
766 |        "Dependents                          No\n",
767 |        "tenure                               1\n",
768 |        "PhoneService                       Yes\n",
769 |        "MultipleLines                       No\n",
770 |        "InternetService                     No\n",
771 |        "OnlineSecurity     No internet service\n",
772 |        "OnlineBackup       No internet service\n",
773 |        "DeviceProtection   No internet service\n",
774 |        "TechSupport        No internet service\n",
775 |        "StreamingTV        No internet service\n",
776 |        "StreamingMovies    No internet service\n",
777 |        "Contract                Month-to-month\n",
778 |        "PaperlessBilling                    No\n",
779 |        "PaymentMethod             Mailed check\n",
780 |        "MonthlyCharges                    19.9\n",
781 |        "TotalCharges                      19.9\n",
782 |        "Churn probability             0.220148"
783 |       ]
784 |      },
785 |      "execution_count": 17,
786 |      "metadata": {},
787 |      "output_type": "execute_result"
788 |     }
789 |    ],
790 |    "source": [
791 |     "data.sample().T  # reminder of the features"
792 |    ]
793 |   },
794 |   {
795 |    "cell_type": "code",
796 |    "execution_count": 18,
797 |    "metadata": {},
798 |    "outputs": [
799 |     {
800 |      "name": "stdout",
801 |      "output_type": "stream",
802 |      "text": [
803 |       "('tenure > 55.00', -0.2764138466515261)\n",
804 |       "('MonthlyCharges > 89.86', -0.24321978003513584)\n",
805 |       "('InternetService=Fiber optic', 0.2096249701592442)\n",
806 |       "('TotalCharges > 3794.74', 0.2031826086609449)\n",
807 |       "('StreamingMovies=Yes', 0.08274884799449057)\n",
808 |       "('StreamingTV=Yes', 0.07781839117828696)\n",
809 |       "('PhoneService=Yes', 0.04962121848245511)\n",
810 |       "('MultipleLines=Yes', 0.04446637536101756)\n",
811 |       "('Contract=One year', -0.04392535067270691)\n",
812 |       "('TechSupport=No', 0.04173749428961184)\n"
813 |      ]
814 |     }
815 |    ],
816 |    "source": [
817 |     "exp = explainer.explain_instance(ce.transform(data.sample())[0],pipe.predict_proba)\n",
818 |     "for cols in exp.as_list():\n",
819 |     "    print(cols)"
820 |    ]
821 |   },
822 |   {
823 |    "cell_type": "code",
824 |    "execution_count": 19,
825 |    "metadata": {},
826 |    "outputs": [
827 |     {
828 |      "data": {
829 |       "image/png": "iVBORw0KGgoAAAANSUhEUgAAA1AAAAIwCAYAAACImIrfAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAAWJQAAFiUBSVIk8AAAIABJREFUeJzs3Xm8XVV99/HPTgKEMCOCIk6Viog4pFasUEES0eLYan84wAOOLVXRVh/b56l9OmifTpa2zsgsOP2wKghOTRAsiEyXwTJJHgoqKFOYEwhJ9vPHXgd2Dufcu29yk5Ob+3m/Xnlxz15rr7323ifhfO9ae52qrmskSZIkSRObNeoOSJIkSdJ0YYCSJEmSpI4MUJIkSZLUkQFKkiRJkjoyQEmSJElSRwYoSZIkSerIACVJkiRJHRmgJEmSJKkjA5QkSZIkdWSAkiRJkqSODFCSJEmS1JEBSpIkSZI6MkBJkiRJUkdzRt0BSZI0PVRV9STgvUAFXFDX9ekj7pIkbXBVXdej7oMkaR1UVfWxuq4/Mup+jKeqqgOAOXVdLxp1XwaZDtcQoKqqC4ED6rpePoJjbwYcAxxV1/X9G/r4w1RVdQRwXl3XS0bdl2GqqnoicCSwGTCbZgbQauCYuq7/X1/dafFelGYyR6AkSdrIVFX1V8DH6rpe2Vf00rquHxpBlwD2BXJjCk/TQVVVzwT+CPjLuq7vaW2fC3y0qqrT6rq+aGQdlDRpPgMlSdI0McLwBPBk4L9HePxpp6qqCngf8KF2eAKo6/pB4H8B76mqys9j0jTiX1hJktTFbGDVqDsxzewPfHvASCIAZfvZwG9t0F5JWidO4ZOkGaSqqlcDLweW0zyPsQXw2bqurxpQ9wjgeTTPamwJ3A/8bd80pM1ppiftSrOwwKzS9j/1/8a9Y/+eCryztDOv/PlsXdeXl/IXAK+o6/rvB+z7J8CP67r+UVVVTwMWApcC/wOoy5/ZwNF1Xf9sgn7sDLyf5v+Ts2mu09U0z6ysbtX7WF3XH6mq6pXAK4AV5Th3luOsatWdBRwO7NXqSw18vK7rX5Y6+wOvAV4CbFNVVQ18o67r89vH6+vrvkDw6D3dEji1rusf9dX7GPAXNPfrGaWvWwDn1nX9zXGuxW7AB0q/96uq6m5geV3Xf9Gq0+l9VfpwbLm2q4Gr67o+YZxjPxt4N7CynNds4OS6ri8Ytk/Zb3/g9cDDZZ8tgRPqur6kr96TgaNKX2YBc4Frgc/13btn07wvV9K8z7eiee7qS+P1AzgA+NgEdRYBeww4hy7vqaHPS/WXlWt/NPBhYHPg5rqu/7n3fCJwDfCeUraS5h4eXdf1zyfovzTjGKAkaYaoquoPgYfqun5/a9tc4J+qqvpiXdc/bm1/H3BdXdcntbY9BfhUVVVva/1G/ZM0H7Kua9V7BnA88MZJ9u/pwJuAv+61XxYu+HhVVcfVdf2Tuq4vq6rqoKqqXtR+bqSqqucBW/eFhhcCjwM+2As9VVXNA/6lqqoP1XV935B+7AT8X5ppV3e3tr8W+Mvyp13/EGBuXdd/3Nq2D/CHwKdbVf+OJgyd2Kq3C/D5qqoOqev6wbquzwXOHecZqP6+vp7mw/cft85xNvCRqqp2HhCM/ho4o67rT7fa+GBVVc+r6/qKQceo6/oXwIeGLdYwmfcVsA1NePpIXdfLJji3PWjC74da74c5wN9VVbVq2HNDVVW9Cdi+735sDnyuqqp/7oW6su1/0bw/lrfqvhz4KPC/y+udgbcDH27fj6qqDq+q6r11XX9qnNOoJ7qHdV3fDNzcdw5d31OTMRv4P8BH67q+s69sF5rg/hd1Xd9bjrcV8E80gVtSi1P4JGkGKOFkl/aHd3jkOYz3A39UntforRi2VV3X3++r+zPgZOC1pd5+wOJ2eCr1/h9wQfmt/WS8FfjH9gfOuq4fBj4EvKtV7+PAe8sHvN6H9Q/SBJS2Z9R1/Q/tEaPyof2faZ5LGeYPgD9vh6ey7xnAbn115wG/Vtf1yX11L2zXLSMdd/SFCeq6vhX4AnDgOP0ZqKqqbYADB5zjqrqu/xr4nVKnZx/gov5RGOATwBsme/zSh87vq+L1wN9MFJ6Kd9N8oG+/H1bShJ5Dx9lv37quP9fXnxXA/6QJZD3PBb7Xv6JhXdf/AWzd2vRS4JT+IFTu+TMnOIe1+UV1p/fUWngl8IkB4QmakdE/74WncrwHaML889fhmNImyQAlSTPDm4CBvykvH77PpFllDZoPucOmJl1AM0UL4EbgO0Pq/QR4YtfOle8X+ll7elKrfw/TTB/svV4F/A3NKBE0I0J/X+q1nTboWHVd/xTYcZzufKMEm0FuLSM8PdsBpwyp2+7PAzRBaZBJXauW1wEnjlN+fKnTs7qu6zP7Kw24bpMxmfcVNCNYdw+q31ZV1dbA0kF9K0Hm8nF2/+yQ/vQHh1uApw1p40Otn39OM+VxonqDVBOUD9L1PTVZV9d1fcOQsnPqwasr/hR4+jocU9okOYVPkmaGeUN+89xzHvB75b+7DXtGqPxW+qvl51+M094KJvf/mBcALx1n1OopVVXN6o201HW9pKqqa6qq+hfgv+u6vnrAPqsHbOuZPaxgSFs9K1jzQ/Evx7kOj/ySsq7rpZNos6tfB744TvnFrBmgLh6n7tr+QnUy7yvovorfHjTP5Aw03nNTE9y/h1v1bqmq6sGqqj5CM8J0U6tsRevnC6uq+vOqqj4AfKmu69sG1ZtCnd5Ta2G8az9sGfX7aAKdpBYDlCTNDBN9QL8D2KH83Pkb1sv0rJcDL+PRRQkAngIM/ZA7wI7AP9d1PfRD8wCn0jzw/5xJ7NMz0fM3WwKH0ISUmkf/f7kfEy8KMF67vwW8mkcf0ofmA+qla9Hc7Lquh96ruq7rsgjF+jSZ99VkbEXz4X2tlKmFb6UZLd2cR/vZntJIXdfHVFX1eOB1ZbGM1cBXB0xL/dvyDGCU+g/ShK7xfokgaRNlgJKkmWGiD7o7AL1Rkk4fuss0q8/SjIL8eb3m6nQHMLn/x9xJs5LfZALU/wF+t/z3TyaxH4xzPaqqeg7N8zL/1F5Eo5T91SSP09tvNs21WkSzSMaKVtnTaFYMnFIl3K7vZccn876ajPsZf5rlUGU1xs2BT/VPFywr0a2hruvbgeNK+Vzgf1ZVdWNd16f01fsZZbpiVVXbAX9ZVdW367peNE53VldVtUU9zvd3lYVE9p6gnWHWd0CWNIDPQEnSzPBAWV1umBcDvVXYbi6LHjxGVVVzqqr6jfLyfTQren23HZ7W0uVMYiGFsmz2FXVdXwz8qKqq35vk8cb74P9e4N11Xf/XJNsczyHA1+q6zimc9vXTqlnWfZjnAtdP0bGGmcz7ajJ+Cuw5rLCsSDdo+27ADnVd/32XZ636lZUQP8qaz20NqncPzcIlEy2+cQ7NCO14fotm+feptsXEVSStDQOUJM0MX2XIcsRlpOJVwPll0+k0058GeQ6wd/l5K4Z/QJ/UiEpZynm3Mj1qUB9ntX7eiea7oL5Y9v0a8NtVVT2hb7ffYICyyuDt43Rn6aARg9KHSa+WV+wCjA0pG3StHmLiD8CnA28bp/wI4IwJe7ZuJvO+6qwsaLBTWcZ+kLcM2f54hgS2qqr2ohnl7L3+yDjtt5c1/+shfaxppvKN5xzgNWX59WEW0CzOsjZW91ajbCtLtE/5qKakhgFKkmaAsrT4nVVVvb29vXyA/BhwUu95mrr5UtcHq6p6RV/d7WlGZ7JsOpfm+3HadWZXVfXe8rK9FHQXHwE+Xb53p93mQppllnv+qvyhb9tf9m17YlVV7WWre1O0/hQ4Zpx+3Fu+V6q93zbAPwDXMfnzAvg+cGT/xqqq3kzzob+/zSU0X2I8VBkFObeqqj/rC5hVVVV/DPxne1nq9WEy76u1cCzwt+2QU87t/cC3h+xzFXBQVVVrhM+qqubThK72dMJvAx/sW2adqqpeyJojlBdXVdV+//XqHQzc1r+9rYzM/ivN96et8SxYVVWbV1X1UeDL6zCCewrwF+1zKO/xfwSuXMs2JU3AZ6Akafp7WVVVfz+k7Ja6rj8BUNf1p6uqem1VVZ+hWUThYZpnRU7sn65W1/W/VlX1tqqq/o1mcYjVNKMif9r7Dp+6rv+jqqq3VFX1idLeLJoPnqfSfLCcaOrSGuq6/nlVVUcBHy6LH6ykWS3v8t73DJVAdFb/ym91Xd9TVdVpVVW9q67rY8vmM2mmI/4bzbNAq0v/Pl5WExzmn4E/q6rqsNKHWeXc/5Hmu5SeBExqelhd11dVVbVbVVWfplkcYVb5cyZNIP3jvl1OB44uUxOvrev6uCHt/nvVfB/XJ6uqWk5zT+fSTBec9MjP2pjM+2qS7V5bVdXJwD9XVdV7D84BTqvreuCITV3XK6qq+jjNly8/QHO/Z9E8W/cRmvDcqztW3md/V/7bW3DiF8CHW/XOrKrqZVVV/WOrD1vSBJRhf+/afbqmPHv1ZyUM9t6HNXBs/4IVk1FWo7wAOL6qqvta5/sJ1vzOK0lTqFr7XwxJkrRx6i3MMCx4SJK0tpzCJ0mSJEkdGaAkSZIkqSMDlCRJkiR15DNQkiRJktSRI1CSJEmS1JEBSpIkSZI6MkBJkiRJUkcGKEmSJEnqyAAlSZIkSR0ZoCRJkiSpozmj7oC0vi1evNi1+iVJkvQYCxYsqCa7jyNQkiRJktSRI1CaMebPnz/qLsxYY2NjgPdgJvLez1ze+5nLez9zTad73+vr2nAESpIkSZI6MkBJkiRJUkcGKEmSJEnqyAAlSZIkSR0ZoCRJkiSpIwOUJEmSJHVkgJIkSZKkjgxQkiRJktSRAUqSJEmSOjJASZIkSVJHBihJkiRJ6sgAJUmSJEkdGaAkSZIkqSMDlCRJkiR1ZICSJEmSpI4MUJIkSZLUkQFKkiRJkjoyQEmSJElSRwYoSZIkSerIACVJkiRJHc0ZdQekDWWHHXccdRdmrAWj7oBGxns/cy0AFi9aNOpuSNKUcwRKkiRJkjoyQEmSJElSRwYoSZIkSerIACVJkiRJHRmgJEmSJKkjA5QkSZIkdWSAkiRJkqSODFCSJEmS1JEBSpIkSZI6MkBJkiRJUkcGKEmSJEnqaM6oO6DpKSIeB/w5cHVr868y88xS/g6g6tvtPzPzugFtvYMmzFfA3ZmZrbLdgN8DlgFbAosz8+r+NiRJkqQNwQC1gUTEoZl56qj7MYV2AU7NzLEh5Q91Od+IeA3w7cz8ZXk9PyJemJmXlCqvysxPtOq/mzVDmyRJkrTBOIVPa2tn4LYpaOdxvfAEUALZXgAR8RTg+r76Z0fES6bguJIkSdKkOQK1AUTEq4EDI2JrYElmLirb3wJsBywHNgNOyMxVEXEocDFwYCnbGvhKZt5R9ltjNKtMc9s9M88p+94M7AncmJnfjogdgMNppsFVwNWZ+Z8D+nkYcH5m3tDhtB4P/Gic8rpDGwAPD9i2ovz3WcBl7YLMXBIRvznBsSVJkqT1wgC1AWTmmRGxfV/oeQ3w415YiYidgTcASROm9s3Mz5ayCvgfwMkdDrcNsENmfqa17TDgE5m5urQXEbFzZvaPIJ0KvCoiXgt8f4JnjTYH9o+IJ9GEpata0+4AtisBcTOa0PaNzLxnQDv9z0nBoyOjOwF3jFMuSZIkbVAGqNHZvj3Sk5m3RUQvGOwMfLZVVkfEoJGaQeYC3+q9iIhn04wqrW7V+QbwWuDf2ztmZg30FoH4nYg4mGbRhjVGgYr/Bm7KzP8o9Q+IiN/KzAtK+a+A08uI2hzgfRHxycxc2eEceqFqVunTsHJJ0kZs2bJljI0Ne1RWm6ply5YBeO9noJly7w1Qo7NPRMzu27Zt+e/NmXlvX1nXUZfbM7MdtvYCtomIvfvqTfT80vnAk4DfoG8aHUBm/qjv9TkRcThwQXn99VbZyog4ETgYOKOvqfGm+q2KiGpIiJIkSZI2OAPU6FyYmadsgONUNKvlrZiwJhAROwK/T/Mc0pcz84FJHGvo6FJm3h0R2wwoGi8c3UEzje/2SewjSdpIzJs3j/nz54+6G9rAeqMP3vuZZzrd+3UZJfNZktHZYh327Q8QW45T9yfAb3ZpNCKOAF4FnJSZJ44XniLijcP6FRFPiIj9+urPAlYN2GfQdZhb/nst8Ny+dp4J/HxYvyRJkqT1yQC14SyPiHmt17f2T6sr4aCLrfpev2ZYxcy8BnhJRGzWOs7jy8p8/U7OzFMy86EOfZhTRqt6bc6jPJuUmb8CntdX/5XAY1b+A+4oC1H02pkPXFPa+TnQf01eMaQdSZIkab1zCt+Gcw7w/ohYkpmnZea3IuKNEfFimulyc3ns80HDXBAR76dZlnwuE3+x7DHAkRFxH80o0cOZ+cX+SpN81iiBt0dEb5/NgBNb5d+JiCNpzm0W8LPMvBkgIo7qfTluuQ7vKisNzgIe6Jva+K2I+ABwP81y7uf4TJQkSZJGpaprP4tq07Z48eIaYMHChaPuiiTNKIsXLZoWz0Joak2n52A0tabTve/1dcGCBZNe3dkpfJIkSZLUkQFKkiRJkjoyQEmSJElSRwYoSZIkSerIACVJkiRJHRmgJEmSJKkjA5QkSZIkdWSAkiRJkqSODFCSJEmS1JEBSpIkSZI6MkBJkiRJUkdzRt0BaUO5a+nSUXdhxhobGwNg/vz5I+6JNjTv/czVu/eStKlxBEqSJEmSOjJASZIkSVJHBihJkiRJ6sgAJUmSJEkdGaAkSZIkqSMDlCRJkiR1ZICSJEmSpI4MUJIkSZLUkV+kK21Edthxx1F3Yb1YMOoOaGS89zPXAmDxokWj7oYkTTlHoCRJkiSpIwOUJEmSJHVkgJIkSZKkjgxQkiRJktSRAUqSJEmSOjJASZIkSVJHBihJkiRJ6sgAJUmSJEkdGaAkSZIkqSMDlCRJkiR1ZICSJEmSpI7mjOKgEfEZ4KOZ+csh5U8D/iQzj1oPxz40M09d2/JWvc2Bw4FVQAXMBb6XmUsm0850FRFvBralOf/tgS9n5s2lbGvg3cC9wObALZn5zSHtHAw8DVhR2vtOZl7TKp8DHAZsAfxqWDuSJEnShjCSAAVcDBwMHD+k/OXAJVNxoIg4AFiSmb+YivZa/gA4PjOXtY717oj4VWbeP8XH2qhExEuBazPzsvK6Aj4IfLxUeQfw6cx8qJS/LiKemZk/7WtnD2B1Zn6mte0DEbEkMx8um/4ncEJm3rp+z0qSJEma2Kim8D0MzC4fvNcQEbOBGli9wXvVURkhW9IOT8UpNMFwU/eUXngCyMwauD4ito2IbWlGnB5q1T8DeNmAdl6Ymd/t23YO8Cx4ZHTqG4YnSZIkbSxGNQIFcDbNh+qz+7YfBHwP2L+3ISJ2BN4GLKcJV/f1pseVEaZfAgtL+WbAhZl5eUTsD7wWuC0irsvMb5R9dgF+H3iAR6fe3dDuRES8MzOP69v2OuAi4KXAl/tPKDOXR8QVrfpDjxMRLwKeTzMFbhbwX5l5QSk7FLgZ2BO4MTO/HRE70EyLu48mXF4DPDEzv1L22R94DrAM2AY4NTOXlrK382hYfhzw+cy8q+/cnkRzP76WmQ/2n1ufQe+bLcq5PBu4ol2QmXVEDGqziog5mbmytW1LYGn5+SmZ+e0J+iJJkiRtMCNbRKI8K7T7gKLdMvPnfdv+CPhkZn4mMz8LjEXEm1rlbwY+l5knZOYxNMGEzDyXZvTj1F54Kl6fmZ/KzBNLey8d0I8bIuLpfdt2Lc9tVa0pZv3ndd1Ex4mIzYBnZ+bnM/P4zDwW2D0ievdjG2CHcr69APFG4Oiy7XM0gexxpb1nANtm5qcz80Tg08AhpWwBsDgzjyuB8F96ZX39vhk4Ezg8It5RnmMa5ocREb0XEbFTOf4DNOFu0PuqHrDtTOAdvZHIiNiKZlSqdw1XRcRvRsThEfG2iFg4Tp8kSZKk9W6UI1AAt0fETpl5B0BE7Eoz8vKIiNgb+EFmruhty8yry4hLz/mZuar1egXDzeaxo16D6p8DHAH8d+nHdkBv1GbVgPqTOc5q4Et9ZTcCuwK/oAlH3+rftx3aMvN7EfHk8vIlmXlKq2xVRNxepkOuphmV65WtiIgc1OHMvBs4pkzDe2vZ/0tle7veDRHxgog4Hvh/wB7AkaX4auD1wLW9+iUY7gN8of94EXEV8MWIuBx4AfD+VpX9aEYTTy7tPDciXpuZZwzq/0TGxsbWZrcNasGoOyBJU2jZsmXT4t9eTa1ly5onHLz3M89MufejXsb8LOB3Wq970/fa9gQuH7Bve6GGa/rKxjuvVZl5/UT1M3M1PLJAAsCrgMlMJxt6nBL2toyIIyLinRFxBPBKHg20t7fDUhnhuW3AMXrT4p5a2nrkD83Kdttm5g+AF0fEoRHx3HL8pQPaekRm3gucD2xX2llDRBwI3JGZ78jM/wu8FzgyImZl5nJgizIlsDfa9l7gsgHtPB/YOTPfkpn/SLOq4VvLSBTA5Zl5ZatfVwI7jtd3SZIkaX0a6QhUGQ3ZDB4NKn0jST2DRog2G7Btqv2QZtrducB2JVgA1AOe3QFg0GpzA+psQ/M802fKtDciYr9xdlnN+Itq3NQegerXel7suRHxAeCsAeGu17cX0oz8XJmZfzekyadl5gmt9u+LiK8BB9CMup0MvLmMZFU0z4vtP6Cd+X3trChL3L+BZoRuUGjsMvo30Pz589d2V0nSWpg3b57/9s5AvdEH7/3MM53u/bqMko16Ch/A+RHxEpppaz8YUH4NzdSui/q2r/cAlZlLIuKAiFhCM7Wu50KaYLXGFL2I2AJ4HjBugKIJE5/vhadi7jj9WBoRjx+nvS2GFUTEvN5qgWUE58qIOBK4vq/erjQLXlycmf86Qf8HBdq7gK3LcWpaUxQj4teA/ufaBraTmQ+1ngV7zCqNNN8rJUmSJI3EqKfw9RZd2AN4ambeNKD8J8D+5XkcACLiWTz6PNJE7qf5gta1dT/NAg6PTC0sC2DsPWChhbcC/ctyD3IXsFvvRQleb2D8ULh5b7Su7PMioBeqFkXEW9qVI+KZ5cffLe33ts9i8GjWLzPz3zLzRx36v3kZRWt7HfCYfSNiS+DlmXnhgHaW96b6ter/DnBpeflwRDyhVbYdG0folyRJ0gy1sXwYvZvxF344AXhfRPSWA7+3t7BAB1cAH46IfcoKdZN1FvBX7UUsis8C74yIh2kCyeY0q93dN1GDmXl++dLdF9GMsswBPgNsP85uXwc+UK7BauAm4NbS3o0R8YQysrSMJoj1nhv7GnBERKwox9mevsUcShuDVskb5lSa1fMeplldb0vgot5iIAAR8eayvQZObG3fpxzvwsz897K63mY0U/O2AH6amdeUOl8tz27NK7tvARwziX5KkiRJU6qq68l8bp55ymjXzpn5w1H3pa08M/bGzDxt1H3Z2C1evLiG6TEfd4cdXSND0qZj8aJF0+LfXk2t6fQcjKbWdLr3vb4uWLBg0CMj4xr5FL5p4GXAf466ExGxT2tFQGi+OLjLdDtJkiRJU2RjmcK30SlLfu8P/Nckp7etLzcB74+I3vLtN5Qvv5UkSZK0gRighuitWDfqfvRk5q+AiVbHkyRJkrQeOYVPkiRJkjoyQEmSJElSRwYoSZIkSerIACVJkiRJHRmgJEmSJKkjA5QkSZIkdWSAkiRJkqSODFCSJEmS1JEBSpIkSZI6mjPqDkh61F1Ll466C+vF2NgYAPPnzx9xT7Shee9nrt69l6RNjSNQkiRJktSRAUqSJEmSOjJASZIkSVJHBihJkiRJ6sgAJUmSJEkdGaAkSZIkqSMDlCRJkiR15PdASZKk9WLheQvhvFH3QiPjvZ+5JnHvlx41/b4D0xEoSZIkSerIACVJkiRJHRmgJEmSJKkjA5QkSZIkdWSAkiRJkqSODFCSJEmS1JEBSpIkSZI6MkBJkiRJUkcGKEmSJEnqyAAlSZIkSR0ZoCRJkiSpIwOUJEmSJHU0Z112johDM/PUqerMhhIRuwCHAA8AmwHzgLMz8/IpPs5bgO9n5h1T2W7fMT4MLB1QdElmXh4RR2XmJ0rdjf5+RcQs4I8y81Oj7oskSZLUb50C1GRExAHAksz8xYY6ZuvY/cHhUODozKxbdY6MiJsz8/apOm5mfmmq2hrHLeOFol542pi1709mrgYMT5IkSdoozbgpfBGxPXB9OzwVxwIHj6BLkiRJkqaJKRuBKiNMvwQWAstppsZdWKaR7Q+8FrgtIq7LzG+UfV4DPBlYBmwNHJ+ZyyPiUOAqYD/gtsz8atn2K+DpwMPAFpl5TOv4+wPPKW1tA5wKPAQcBrwoIrYGzip9fHp//zNzZUScXtqaC7wTWAmsohnlOauUHUoTPOcClwC/kZnH9l2LtwFfAN7cG1mJiHnAkcCK0q+7MvO0UjYLeHu5Zg8BD07V6FXf6NtWEfEemus3D7gxM7/ZqvsWYDsevX8nZOaqcs43A3uWfb494DiHAdsDDwLbAidl5p2l7IjSXkVz3e7KzFMi4gnA63n0/nwlM+9u9zkiDgL2Au4v+349M2+eimsjSZIkTdZUT+F7M/DRzFwFj3xwvjwzz42IitYUvoh4MU04+lZ5PQ94C3B8aWvvzPxkq+1n0gSZY0v9J0fEgsxcHBHPALbNzE+XstnAuzPzs8DnIuL+9jS3iLgiIt5H84H9kSl7mXl3+fEI4LjMfLDUPyAi9srMq4B9gE9n5rWlbK+I2DIzl7f6ulkJHv3X5rOZuazs94KIeFFmXkQzpfDrmbm0lD07Ig7MzLMj4pXAE/qu8wO98DVJrwQiMx8ux3lDRDwxM39ZwuyPM/OGUrYz8AYgaQLpDpn5mUGNRsSbgEsz8+ryeg7wv4G/KVUWAkdm5n2l/EUR8RuZeSkD7k+r3Z2B7TPzX1rb/hD43FqcO2NjY2uzm6bAsmXLAO/BTOS9n7l6916SxjMd//8w1VP4zu+Fp2LFOHV/PTMv7L0oweKB8nI28L2++rMz8+xW/Z8Du5SXL+kFsVK2Cri9BKnHyMxzgOOAA8qzT4f06kbEdsDNvfDUqr93efmLXngqvgu8qvciIl4IDHonPNwLT6XNy4C5JViu6oWnUnY1sHP5+buZeVJ8DmdTAAAgAElEQVTfn3Z4elJE/GHfn98bdN7Amb3wVHwTWFB+3r4Xnspxb+PR98dc4FsMt0MvPJV9VwKLIuK5ZdPZvfBUyi8Cnj1Oez0HAf/et+3qQRUlSZKkDWGqR6Cu6Xs9XkDbq4xQte1a/rsqM2+doO12+08d0NZONFPJ7hp08DJi1JtC90TggxHxKWAP4FkR8bi+XXrhZ43pY5l5e5mK1vOi/pGaiNgJuG1AH35YRlmeGxGbDTm3idw8iZX1Huo7/qqI6D0Lts+AwLlt+e/tfcFr3HaLS2im513J4CC9asC2x+gL5GTmD7vsN8j8+fPXdleto95vl7wHM4/3fuaajr9VlrThjer/D+vyb9QGW4VvgKsy85Qpauumrm1FxLaZeW97W5nCdjTNlLUbgW9m5vWTOP5tZWn0O4HVA8rr8meQCliUmf8xpL+vAp7Yt/mBzPzyJPo3nl5Qu3Ad7seggNQfCPsNHB3sM+haSpIkSSMzygC1xYja2i8iruxfTr0sIrElcC3N1LHJBKizaJ7fuhlY1F+YmXeWkaY1RMSzaUbWnjKs4d7iFetDGXHqhZR1uR9zB2x7Ps1CIMN0GWGrImJWWdocaK5Ze7qgJEmStCFtyGXM7+fRKWEAP4mIhe0KEfHMtWx7UVlBblhb7fP8Ac3KfGsoi1rcmJn3ALtExLatsi0jYrdhB8/MB2jObY/MXDKk2uZloYxem7sDO5fl1FdGxJNaZbNK+VQ7uDxz1fMaHg18t0bE3u3Kk7gfd7f7W6YjvrQsugFwYERs3irfh2ZqX8+w9+F/AL/ft+0lHfskSZIkTbkNOQJ1BfDhiNgnM0/MzAsj4qCI+AOapa+3AM5Zm4Yz88aIeEJEHEnzrNJmwOWtKneU1dsuyMwrIuIrEfEhmuW6VwKbAz/LzNNL/c8D74iIh2hGaGYDJ0/QjauBp45T/mXgj0qby4F7MzNL2ReAw8uI0Cqaa7E+voT3QuDIiOgtY35D71mzzPxWRLyxBMkVNKNKZ3RpNDO/FhFvLYF4Vmn7860qP6a5nqtKu0v7ntt6OCLeTTOVcY2FLCLiroj4E+C+0vb31+7UJUmSpHVX1fWwR3OkqdH3XVQb3OLFi2vwIfZRciGBmct7P3ONjY2x8LyFE1eUNKMtPWrpxJXWg97/nxYsWFBNUPUxNuQUPkmSJEma1gxQkiRJktSRAUrr3Sin70mSJElTyQAlSZIkSR0ZoCRJkiSpIwOUJEmSJHVkgJIkSZKkjgxQkiRJktSRAUqSJEmSOjJASZIkSVJHBihJkiRJ6mjOqDsgSZI2TYv2W8T8+fNH3Q1tYGNjYwDe+xloptx7R6AkSZIkqSMDlCRJkiR1ZICSJEmSpI4MUJIkSZLUkQFKkiRJkjoyQEmSJElSRwYoSZIkSerIACVJkiRJHflFupIkab1YeN5COG/UvdDIeO9nrta9X3rU0tH1Yz1xBEqSJEmSOjJASZIkSVJHBihJkiRJ6sgAJUmSJEkdGaAkSZIkqSMDlCRJkiR1ZICSJEmSpI4MUJIkSZLUkQFKkiRJkjoyQEmSJElSRwYoSZIkSepozigPHhH7Anu2Nr0Y+HH5+eHMPHk9HPPQzDy1/FwBhwKbl+ItgB9n5lgpPwBYkpm/mOp+bAwi4oU01/whYDPggsy8LCJ2AN4wYJd9gD/OzPsj4gnAW4F7gC2Bn2TmOUOO8wHgksw8r2/7k4FXDNjlxZn5zgHtvBR4fmZ+ous5SpIkSVNppAEqM88Hzu+9jogHe+Gmq3YgWgtHAGdm5u2t9g6JiNs21dDUExHbAntl5qda294eEVdl5l3AcQP2eSgz7y8vDwM+npl1KXtXRFyemXf37fMc4G4GyMyf9x+nhNrlA469JbA7sHQSpylJkiRNqRk7ha98IK/b4QkgM78KHDiaXm1QLwPO6Nt2MbDHoMoRsQdwbfl5T+BHvfBUfAl4Td8+s4ADgHMm2a8fDNh+GPDlSbQjSZIkTbmRjkB1FRGHAdsDDwLbAidl5p0R8WrgwIjYmmaq3aKI2Ibmw/aDZffVmXnSgGZfDPxwyCEvbP28VUS8h2ZUZDPgwsy8vPRrT+ClwEqaMHpTZn6/lB0KXAXsB9yWmV+NiC2A99BMmVsJ3AJslZlfKfs8r7S3DJgHnJGZN5Wy3y/XoAZ2AE7LzBv7rtM2QABfy8x7hl3P4oy+AEQ55rD99snML5Sfn0Nf+MrMB0pgajsEOI1mil9Xu2Xm2e0NEfFc4PrMXB4Rk2hKkiRJmlobfYCKiDcBl2bm1eX1HOB/A3+TmWdGxPZ9U/h+HzgmM1eV+jtHxO9k5nf6mn4CcB4DZOZ1rZdvBj7aau8I4PJS9tuZeUyrrwdHxONbo1p7Z+Yn26cDHJuZ95X6LwBeX37enub5nk+22nsP8OmI2B24NTNPa5W9D2i3TWbeFxFfAg6JiHnNprxjyDmuEZ5KCH1RX397ZXNZc1rdagaPXj7SZkQ8iSa83hoRTxvUhwHH2RW4uW/bLODAzPzXLm2MZ2xsbF2b0FpatmwZ4D2Yibz3M1fv3kua2TbFf/+nwxS+HXrhCSAzVwKLyqjEIF/phZ1S/zaaEZt+szPz4Q7HP7/dHrCi9XP/s1c/4dEpcLOB7w045n2tvl0G/Ly8PHhAe5dFxFNogslmfWVfHNTZzFxeRtyOB14VER8owWSgiJgfEX8GnAicPqTa7wDfbb2+BHhJXztbA3u3Nr0JyGHHHeJA4Oy+bW/FqXuSJEnaSGz0I1A00936XUIzcnNlf0FmLouIVwK7AatowsegKWSrI2JWZq6e4PjX9L1uh87VEfEWmqlvK2mC2mWlbFVm3tq37yoeqzfVcGfgsL4pavOAOzPzuoh4TkQcTvMc0sWZOe5iCpn5UEScS7Oa3rNopgsOqjcGjEXEbOAPImLQqNW2fcHvpoh4bURcnJn3RsRWwLt75x4RrwK+O2CK4FDl+HV7n7JK38oB13GtzJ8/fyqa0Vro/fbJezDzeO9nrk3xt86SJm9j/fd/Xf6Nmg4BasWAbf2jMY+IiHfSjBp9t7Xt0AFVbwOeSN+UsVL/mZn50/E6VaaWvR84LjPvLNt2o1kpbphBAarnzsw8ZVhhZp5ejrE7TdC5NDMvGtK3PWiWB78ROHpQkImIzdojcJm5KiI+R/P82MmtenszIKgCxwCHlue6VgLH8uiS5POBx0fEb5bXOwE7RMRTMvNLQ05xIbCob9tBNCH1iNa23yrTOM+YKERKkiRJU206BKi5A7Y9n2aBhkEezMz+UaMtBtS7gGYUa9BUuBcB4wYoYC/gO73wNE5f28abMllHxNzMfLC/ICLmZeYygMxcAiyJiCOBi/rqbQ28A7iuw3clHU7fEuKZuToiVvbVmz/o+7gycwVwQuvYv035Dq/M/Ghfv55GszjEwGfOiif0jzRl5vH9lSJi5TosWy9JkiStk+nwDNTdZdQFaEZOgJdmZi9ALS8LJvS0fyYi3ghs3d9oZj4AzC1fCNuu/3vAuV36BTyptV9F89zP0NExYFVZKa+3z6/x6BcJf4sm/LT78szy40ERsWNfW9WA9h/IzH9rj76N47KIWONLbCNif+DS1uutgPv7d+wXETsBT13b786KiKfSjJZJkiRJG7WNfgQqM78WEW+NiIU0gW8e8PlWlXOA90fEkrJK3VVldOZBYHOakaZhoeYE4G0llK0s9S4pX/A6Ub9+HhH7limDq2lGuU4CXjjObv8OvCciVpTj3UUZScvMeyLi7LLy3gPlXG+kGQk7HTi8TBucRbOU+1kD+tT5maPMvDQiDoqI9/Loc2Y3Zea1rWoHDzoOPBJk30qzWMZyhixqMWTfo/pGyA4AvjCkuiRJkrTRqOq682durQcRcUj58l6tJ4sXL65h432IcSZwIYGZy3s/c42NjbHwvIWj7oakEVt61Mb5yHrv/08LFiwYNKtrXNNhCt8moywZvnnr9fOY+FkrSZIkSRuJjX4K3ybmeuDIiHiAZnn1OzPzmyPukyRJkqSODFAbUPkupX8bdT8kSZIkrR2n8EmSJElSRwYoSZIkSerIACVJkiRJHRmgJEmSJKkjA5QkSZIkdWSAkiRJkqSODFCSJEmS1JEBSpIkSZI68ot0JUnSerFov0XMnz9/1N3QBjY2NgbgvZ+BZsq9dwRKkiRJkjoyQEmSJElSRwYoSZIkSerIACVJkiRJHRmgJEmSJKkjA5QkSZIkdWSAkiRJkqSODFCSJEmS1JFfpCtJktaLhecthPNG3QuNZ+lRS0fdBWnacQRKkiRJkjoyQEmSJElSRwYoSZIkSerIACVJkiRJHRmgJEmSJKkjA5QkSZIkdWSAkiRJkqSODFCSJEmS1JEBSpIkSZI6MkBJkiRJUkcGKEmSJEnqaM6oO9BFROwCHAI8AGwGzAPOzszLR9qxASLiqMz8xBS08xngo5n5yyHlTwP+JDOPWsfjvBf4TGauXpd2xmn/Q8CnMvPBAWUfyMx/XR/HlSRJktaHaRGggEOBozOz7m2IiCMj4mZgL2BJZv5iZL1rmYrwVFwMHAwcP6T85cAl63qQzPzUurYxgZOBtwGfbW+MiNcC31nPx5YkSZKm1EY/hS8itgeub4en4liagLGpehiYHRFVf0FEzAZqYL2MGk2lzLyd5jwe19sWEZsBu2fmdaPrmSRJkjR502EE6n7g6f0bM3NlRNwIvA64LSKuy8xvRMShwFXAfsBtmfnViJgLvBNYCawCbsnMswAiYk/gpaVsFnBTZn6/lB0KfK8cYwWwNXARcB+wb9lnO+CszLyht09mntravwI2pwk8t2fmt3rnEBGHA9sCD5V+PZiZX2yd5tnAy8p/2w4q/dq/1daONCM9y8ux7svMUyNiFnBEZp7QbiAi3pWZx/b1d7zr9JxyTR8GtgR+lpln9N+XIU4A3gf8Q3n9P4BHzrP08e000zMfKtfhS6XsyTTXfzmwRTmvUzoeV5IkSZpSG32AKkHpioh4H/CVMqLRKzu3jND0T+HbOzM/2Xp9BHBc7zmciDggIvbKzKuA387MY3oVI+LgiHh86zivzszjWuXvAZa1A0lEvBO4YUD3Xwt8IDNvKfV+NyK2y8x7ImIf4L8y89JSth3wKVrBIjOXRMSBPDZA7ZaZ34mI9rY/Av4xM1eU9p4dEW/KzK9ExBr3OSK2oAmE/ca7Tvtk5udabRwSETtn5m0R8UaacNl2a2Z+p5zHsoj4VUTsDtwBzMvMW1t1DwW+nplLW30/MDPPBg5qTzOMiP0iYn5mjg3ovyRJkrRebfQBCiAzz4mIC4FXR8ROwFLga5m5akD12cB3ey9KMLm5vYhBae9NNCNVp/bt/xNgD+D20tZ5feUPA1/v27Z8SNev74Wn4irg12meXdozM09q9emeiDh3QBu3R8ROmXlHOZ9dgZvbFSJib+AHvfBU2rs6InojVBdFxG9l5gXl9cG0rlFpY6LrNLuvX9+kvH8y82tDzr/tVOAvgXuBdhCrgFW98NTq+3PLy/5ppucD23c43mOMjZm5RmXZsmWA92Am8t7PXL17r43b+vi76d/7mWum3PtpEaAAMnM5cBpARDwR+GBEDFoAYVXf6MYewLPaz+AUvX/ZV0fEW2hW9lsJ7ABc1mrr+v79MvOevm2PeU6puGbAMXfttT2g/mNWqgPOolmBsDdt7aDWzz17lnr97gfIzMvLanu9ALVr3zWCia/TuRHxLuBXwLmZeS/NdLtOMnNVRFwK7JKZ97eKHg88tzwX1dYLTmdFxB/QBNofliB5V9fjSpIkSVNpow9QEbFt+bD+iMz8ZUQcDbwB6A8C/SrgmwOCUO/Zm/fTTFu7s2zbDdh9Sjo/vkEB6jEyc0UvXPQWlBgy8jZoSl47lDwcEZsD2wB3Dqg79DqVY14HXBcR2wCvKFMRjy/9OqS02/arzDyzb9sVwG4DjrsoM/9jyHFvAY4pz2ctiIhn0CyLPukFNObPnz/ZXTRFer+J8h7MPN77mWtT/w30pmJ9/N307/3MNZ3u/br8G7XRByhgv4i4sn+Z8vJs1JYd9r+WZtRmUDDYC/hOLzwVc9e+q5MymRUQz4+Il9D07QcDyq8BXkCzwEVbO0B9j2bq3g7AoCl3Q69TWfVvdmauyMz7gK9FxIER8czM/GlmfnUS59LvNuApwwojYl5mLitTC8+KiF8DFgLfX4djSpIkSWtlo1/GnCYwHNa/MSJeDNxIM01t22E7l+l2u0TEI3UiYssy0nQ38KTW9gp4E2sGj/Xl2oh4JJ73RlgGVSyjP3sAT83MmwaU/wTYvwSdXnvPojXVLTNvBJ4BbF9CUH8b412nWTTTCNu2YvizX52V5elXRkT7PswqC04AvLlvl21ovlBZkiRJ2uA2+hGozFweEV+JiA/RfGBfSbMs+M8y8/Qyve3DEbFPZp44pJnPA++IiIdovjtpNnByGcXat6yit5pmmeyTgBeu59MiMy+JiEMjYl+aZ4kGLVjRdjeDp+n1nAC8LyIeoBmpujczT+6rcw/jT3kc7zrdUK5TXdq/NTN/Pk5bk/EF4PASAFfR3IcvlbJL+u7Pit7UQUmSJGlDq+q6//tpNSoRccg6TofTAIsXL65heszH3VRNpznRmlre+5lrbGyMhectHHU3NIGlRy2duNIk+fd+5ppO977X1wULFgxbDG6o6TCFb5MUEbuWL4ntvZ5HM8oiSZIkaSO10U/h21Rl5i0R8bYyBXElzXS5k0bbK0mSJEnjMUCN0DjPbEmSJEnaCDmFT5IkSZI6MkBJkiRJUkcGKEmSJEnqyAAlSZIkSR0ZoCRJkiSpIwOUJEmSJHVkgJIkSZKkjgxQkiRJktSRX6QrSZLWi0X7LWL+/Pmj7oYkTSlHoCRJkiSpIwOUJEmSJHVkgJIkSZKkjgxQkiRJktSRAUqSJEmSOjJASZIkSVJHBihJkiRJ6sgAJUmSJEkd+UW6kiRpvVh43kI4b9S9EMDSo5aOugvSJsMRKEmSJEnqyAAlSZIkSR0ZoCRJkiSpIwOUJEmSJHVkgJIkSZKkjgxQkiRJktSRAUqSJEmSOjJASZIkSVJHBihJkiRJ6sgAJUmSJEkdGaAkSZIkqSMDlCRJkiR1NGfUHdiYRMQuwCHAA8BmwDzg7My8fKQdGyAijsrMT6xjG/sCe7Y2vRj4cfn5YeCnwAOZeeWAfZ8BPD0zF43T/m7AyzPzxAFlLwPu2hivrSRJkjSMAWpNhwJHZ2bd2xARR0bEzcBewJLM/MXIeteyruGptHE+cH7vdUQ8mJmntl5XwLuAxwQo4ADgpAna/0VEbB8R22Tmfa12ZwEvyMyj1+0MJEmSpA3LKXxFRGwPXN8OT8WxwMEj6NLIDbgWbXMyc1WHZk4A3tG37RDg39e6Y5IkSdKIOAL1qPuBp/dvzMyVEXEj8Drgtoi4LjO/ERGHAlcB+wG3ZeZXI2Iu8E5gJbAKuCUzzwKIiD2Bl5ayWcBNmfn9UnYo8L1yjBXA1sBFwH3AvmWf7YCzMvOG3j690aKyfwVsDtTA7Zn5rd45RMThwLbAQ6VfD2bmFztelysj4rntaXxl+t71XXbOzHsiYllEPCkzb46IecAumXlTq73nlWuzjGba5Bm98oj4fWD7cl47AKdl5o0d+y5JkiRNKQNUUYLSFRHxPuArmXl7q+zcMp2tfwrf3pn5ydbrI4DjMvNBgIg4ICL2ysyrgN/OzGN6FSPi4Ih4fOs4r87M41rl7wGWZeYJrW3vBG4Y0P3XAh/IzFtKvd+NiO1KeNkH+K/MvLSUbQd8CugUoDLzxxHRP41vf+Dk0t7uNCGy3/cy85fl55OAPwU+Cry9t2/Zf3vg+e3rWM7906XtWzPztFbZ+4D2Ne9sbGxsbXbTFFi2bBngPZiJvPczV+/ea+OwIf8O+vd+5pop994A1ZKZ50TEhcCrI2InYCnwtSFT1WYD3+29KMHk5l54arX3JpqRqlP79v8JsAdwe2nrvL7yh4Gv921bPqTr1/fCU3EV8OvAJcCemXlSq0/3RMS5Q9rp6pHpe5m5BFgyXuXMXBERP42IVwArM/OuVvHBPPbaXBYRT6EZddqsr6zryJkkSZI05QxQfTJzOXAaQEQ8EfhgRHxqQNVVmXlr6/UewLMi4nF99Xq/glsdEW+hmaK2kmY62mWttvqnxC3LzHv6tlVDun3NgGPu2mt7QP0HB2wbz5UR8bzMvKKMCnWavtcnaa7rW/u27wwcFhHtbfOAOzPzuoh4TpmCeC1wcWYuXYtjAzB//vy13VXrqPebKO/BzOO9n7k29d9ATzcb8u+gf+9nrul079fl3ygDVBER22bmve1tmfnLiDgaeANw6+A9H1EB3xwQhHqrzr2fZnrfnWXbbsDuU9L58XVZ6GFcmXlhmT54Bc2zSu0peL9OM6Wv37fbo2KZWUfENzPzob56d2bmKeMc+/RynN2BP4iISzPzonU4HUmSJGmtGaAetV9EXNm/THl5NmrLDvtfCxzE4NGZvYDv9MJTMXftuzopU73S4uz2lMYSGNdmRKqnjoi57amPPRExLzOXleMsAZZExJE0C2xIkiRJG5zLmD/qB8Bh/Rsj4sXAjTSr9G07bOcy3W6XiHikTkRsWUaa7gae1NpeAW/isc/3rA/XRsQj46hlpcAFa9HOFRHxRpov151K36JvmfOIeGb58aCI2LGv/rBpjJIkSdJ65whUkZnLI+IrEfEhmsUaVtIsC/6zzDw9IjYDPhwR+2TmiUOa+Tzwjoh4CFhNszjEyWUUa98yDW41sAXNynQvXM+nRWZeEhGHRsS+NMuYD1qwoks7F0fEnwIxYeXJtXtPRJxdVt57gCbU30gT1E4HDi9TIGfRBNizpvL4kiRJ0mRUdT3ed6VqUxQRh2TmV0fdjw1l8eLFNUyPBxo3VdPpoVJNLe/9zDU2NsbC8xaOuhsqlh611mswTZp/72eu6XTve31dsGDBpGc3OYVvExcRu0bEk1uv59GMgkmSJEmaJKfwbeIy85aIeFuZgriSZgrfSaPtlSRJkjQ9GaBmgHGe2ZIkSZI0CU7hkyRJkqSODFCSJEmS1JEBSpIkSZI6MkBJkiRJUkcGKEmSJEnqyAAlSZIkSR0ZoCRJkiSpIwOUJEmSJHXkF+lKkqT1YtF+i5g/f/6ouyFJU8oRKEmSJEnqyAAlSZIkSR0ZoCRJkiSpIwOUJEmSJHVkgJIkSZKkjgxQkiRJktSRAUqSJEmSOjJASZIkSVJHfpGuJElaLxaetxDOG3UvRmPpUUtH3QVJ64kjUJIkSZLUkQFKkiRJkjoyQEmSJElSRwYoSZIkSerIACVJkiRJHRmgJEmSJKkjA5QkSZIkdWSAkiRJkqSODFCSJEmS1JEBSpIkSZI6MkBJkiRJUkdzRt2BUYuIDwNL/3979xplZ1XnefxbJCEQLuEiUZQR7VZREFujDTTYDZiIouBlxD+IEVFRGh1plz2r16w1L2bWmhezpteM3aN4QWgEAcW/2Cg0QmuCCGEExOKmIBcRBFQChktIQRKSmhdnH3h4cqrqqapz6lB1vp+1slJn7+fZzz5n10nqV3s/+1SKFgB3ZuaqiFiRmef1oU8vBo4F1pf+LAKuyMybunyd44EfZeYj3Wy30v4ngYsz86EOdZ8D/m9mjvbi2pIkSVIvDHyAAn5fD0kR8e6I2KdfHQJWAF+ohouIOCUiHszMh7t1kcz8VrfaGsM3gc8B/6taGBFvBm42PEmSJGm2cQlfB5l5KXBAP64dEbsAd3UIF2cA7+pDl6YsMzcA90bEa2tVyzPzJ/3okyRJkjQdzkCNbRSgzES9DXgK2BG4oL3kLSL2Bj4IPEkrjD6QmReXuhXAj0v9emA74N8z855SvyvwUWAEGAJuy8yrS1uvrHcmM5+JiB+Uc7cDTgKeATbTmkW7tHLdbcr1bgDenJlnVNuKiI/Rmh36UHv2LSIWAacAG4ENwKOZ+d1Stw3wcVrLCTcAT09i9iqB/wb899LW0cBltf4cDywur/EC4KzM3BwRi8trtJ7W9+p84CvOXEmSJKlfDFAdRMR8WiFkHnBIZn61lA8BJwDnRMS2wIeB/9n+gT4iDo+IwyuzK+/LzNMq7Z4I3FMefgT4YmZuKXUREUsyc01E3BwRn6UV1p5dspeZj5UvTwTOzMyny7mHRcR+mfkr4EDgy5n561K3X0Rsn5lPVZ7ighJQqk/7Q8BXM3OknPemiDggM6+ntaTwXzNzbanbNyLelplXRMQ7gZfUXsL17fCVmaMRsToiDgWuAV6fmZdUXpOjgWsrwXIJ8AFawet95blsLnV7A0cDF281aA0MDw9P5TR1wcjICOAYDCLHfnC1x35QDfL3vO/7wTUoY+8Svpoy2/I54FJgCXBhu64EpU3l4TuAc6qzISU4tZerzQOuqDW/sVxjX+CadngqLgL+urRzJXAmcFi59+nYiJhXzl0MPNgOT5Xj9y8PH2iHp+Jy4N2V5/cWoNN39aZ2eCpt3ghsV0Lj5nZ4KnW3ldeGzLw8M8+u/fluteHMXAm8HfgYUN+UY5d2eCrHruG578t5tXbuA1Z36LskSZI0I5yBgj3LzBC0ltItAM7PzD+VTRueqB3f/uF+j8x8sEN7T5a/N2fmXWOcux+wU0TsX6tf0/6izBi1l9DtCfx9RJwG7AO8NiJ2r53bDj/P61NmPhwR1RmiAzLzK9VjIuJF1WtXzr2qzAi9ISIWjPFcmroQ+GBm3l8rP7AdDit2rpxzUkQ8Cvy/zHygGuQma+nSpVM9VdPU/k2UYzB4HPvBNdd/Az2RQf6e930/uGbT2E/n3ygDFPxhiluVbxyjvB40OhkCzsvMrdqIiJ3roS0z/xARX6C1tO1e4Psdwtl41pSt0f8EbOlQP1r+jNXXlZn5406VEfFuYM9a8frM/Ha1IDNviojXd2jiusw8t1Pb5XU4vSyp/JuIOI7Wkr6nOh0vSf3+cLQAABnWSURBVJIk9ZoBauoejYiXZOYfa+VNAtStwF/Suieo7q0RcUtmPlAtLJtIbA/8GjgCmEyAuhQ4ntbs1Mp6ZZltW1IvL0sNbwdePlbD7c0rpmHhWBURsSgzRzLzGeCKiLieVoic8c/mkiRJksB7oKbjx7RCybPKRgm/7nz4czLzduDg6rK4iNij7Mz3E1obTDxPRBwE3JuZjwMvjoidK3XbR8Re41xvPa1lcftk5t1jHLZt2Ymv3eargCXlHq9nIuJllbptSn03PFRfyhgRrylffqh27CLgaSRJkqQ+cQZqijJzY0RcFBF/R+v+o+2B+zPzooZNnA6cEhHraC2f25SZ5wNExAUR8Z9pbev9DLAt8LvM/EE59+vAJyJiA60lefOAcya43m3A3uPUfxv4dGnzKeCJzMxS903go+Vepc20Zo268iG8mXlJRBxTAuJGWtuvt3fZuyIiPkVr445ty3VP69ySJEmS1HtDo6N+pI7mtlWrVo3C7Lihca6aTTeVqrsc+8E1PDzM8tXL+92Nvll76pT3PJr1fN8Prtk09u2+Llu2bGiy57qET5IkSZIaMkBJkiRJUkMGKEmSJElqyAAlSZIkSQ0ZoCRJkiSpIQOUJEmSJDVkgJIkSZKkhgxQkiRJktSQAUqSJEmSGjJASZIkSVJDBihJkiRJamh+vzsgSZLmppVvXcnSpUv73Q1J6ipnoCRJkiSpIQOUJEmSJDVkgJIkSZKkhgxQkiRJktSQAUqSJEmSGjJASZIkSVJDBihJkiRJasgAJUmSJEkN+UG6kiSpJ5avXg6r+92L3lp76tp+d0HSDHMGSpIkSZIaMkBJkiRJUkMGKEmSJElqyAAlSZIkSQ0ZoCRJkiSpIQOUJEmSJDVkgJIkSZKkhgxQkiRJktSQAUqSJEmSGjJASZIkSVJDBihJkiRJasgAVRERX4mIPcepf0VEfLFBO4dFxF5j1L0oIo5v2J8V0z0uIk5t0kYvRMRbIuLIMeqOG++1liRJkl6IDFDP93PgXePUvx24YbKNVgNOZj6Smd+aQt+mJDMnDHw9vPYNwNKImFctj4gdgF0z8w/96ZkkSZI0NQao59sEzIuIoXpFCQGjwJYZ79Xs9m3gw7WyjwLn9aEvkiRJ0rTM73cHXoCuAA4vf1cdAfw7cCi0ZpUy83khoF4WEa8GlgEHRMSOwBmZubl9XEQcBgwB+wLPAIuByzPzlnqnImJXWsFjpJxzW2ZePdGTqfapzIStB3Yv13um1t+jgf9QrrEj8C+Z+VRELAA+BTxNK3QvBr6YmRsnun5m3hMRx0TEDpm5PiJeCoxk5rrKdQ8FXl+uuxNwXmauLXUf57mgvzvw9cx8dKLrSpIkSb1ggKrJzLsj4m1sHaD2yszLImIybd0F3BURT9bDVsUbM/Of2g8i4hRgqwAFfIRWaNlSjouIWJKZaxp3CA4GvpqZF5U2/ioi9snMOyLiIGBNZl5S6hYBxwP/AvxH4JzMfLLU7QIcC5wbEW8E3tjhWt+rhKSzgE8C/wycAPyfyvP9c2DnzPxyeTyPVlj7akQsA1Zl5n2lblvg48DXJvGcJUmSpK4xQHX2cES8KDMfASizJg/26Fo/qD2+JiL+IjNvbhdExL7ANe3wVFwEvAf43iSu9Whm3lp5PAwcBdwBvDozz21XZOZIRKwvD7ehNevVrnssIi4tX98E3DTeRTPzkYjYHBHvAO7IzE2V6oNr190cEQ+XILUFWFCp2xgROYnn+zzDw8NTPVXTNDIyAjgGg8ixH1ztsZ/r/N7emu/7wTUoY2+A6uxSygxLeXxE5etuqy+D+w2wHLi5UrYfsFNE7F87djKzTwC3Vx9k5oaI2L59jYg4sXb8S8vfFwIfjYgR4OeZeVd7id0knAl8MzM/WCvfu8N1X0RrVuonEbGizI7dkpm3TOG6kiRJUtcYoDooMx0LANobSmTm5hm6/Hxam1VUDdG6L2jCe46m4VfVmaCqMmN0ZnktDoyI99IKQ2siYimwtPNp+UTlwVMRUZ9tA7hvrOuW89r3b70hIj4HXFqWRk7a0qWduqmZ0P5NlGMweBz7wTXXfwPd5vf21nzfD67ZNPbT+TfKADW2ayLiYGA74Ccd6p8XciJiIZVlbtPwZ8Bva2W3An8JXNOF9seycKyKiFiUmSOZOQpcGxE/p3Uv0hmZOUxrKWDPrgtQNta4pdwjNqUAJUmSJE2X25iPITPvAPYB9m5vYlCzQ+3xUYwdoMZ7nd9Ze3xg7T4lMvN24OD2rBhAROxRdubrllsjYnm1ICJeU748tra1+0JaW753w8r6BwtXrvv+Ekzb5dvgNvKSJEnqI2egxvcYW9+j1HZZRHweeIJWoLgJeOUYx24fEZ8Evp+ZD9fq/lhmVbYAOwOXj9HG6cApEbGO1uzXpsw8v9S9NCJOqh3/cGZ2WjLXUWZeFxFHRMTJtLYrXwhcWap/AJwcERtofc/sAny5adsTXPfeiHhJeQ1GaG0a0d6U4kLgxIjYWLnuN7txXUmSJGkqhkZH67fbaKaUz4G6OzMf6Hdf5rJVq1aNwuxYjztXzaY10eoux35wDQ8Ps3z18okPnOXWnureRnW+7wfXbBr7dl+XLVs26VtwXMInSZIkSQ0ZoCRJkiSpIe+B6qPMvLLffZAkSZLUnDNQkiRJktSQAUqSJEmSGjJASZIkSVJDBihJkiRJasgAJUmSJEkNGaAkSZIkqSEDlCRJkiQ1ZICSJEmSpIb8IF1JktQTK9+6kqVLl/a7G5LUVc5ASZIkSVJDBihJkiRJasgAJUmSJEkNGaAkSZIkqSEDlCRJkiQ1ZICSJEmSpIYMUJIkSZLUkAFKkiRJkhryg3QlSeqTXXfbrd9d6JllwKqVK/vdDUnqOmegJEmSJKkhA5QkSZIkNWSAkiRJkqSGDFCSJEmS1JABSpIkSZIaMkBJkiRJUkMGKEmSJElqyAAlSZIkSQ0ZoCRJkiSpIQOUJEmSJDVkgJIkSZKkhub3uwP9FhEHAfsDm2kFyqeAb2XmaF871gMRMQ/4MK1x31L+vi4zb+1rxyRJkqRZYqADVES8FliSmWdUyl4GHAd8uwvtHwbcnZkPTLetMdpfkZnnTeKUk2iFw3WVNo6NiMcy8/7u91CSJEmaWwZ9Cd/hmXlxtSAzHwS27VN/eiYiXg7cXw1PRQJH9qFLkiRJ0qwz0DNQwKYxyldHxLzM3BwRuwEfo7W0bxRY1571KTNMfwCWl/oFtJbE3RQRhwLvAdZExB2ZeVFErAB+BbwVWJOZ34mI1wF/AzxDK9Del5k/anckIj4B7Ag8CSwEzgB2B94HHBAROwIXZOZjEzzXw+gwq5aZoxGxuXK9FcAQrRA5CjycmZdU6o8HFlee71mZubnaZkSclJln1so+Rmv2a8NYbUTETsBHgKfLaVsy8+xy/iuANwCvBh4vz/nJCZ6zJEmS1FUGqA4y8zeVh58G/jEzNwJExL4RcVxmXlDqPwT8j3aIiIgTgZsy86cRMcTWS/j2z8wvVR7/dWae3n4QEe+KiD0y8+GIOAK4OjPvLHU7AZGZ5wNfi4gnK2FuR+CYDk/n5sy8ERjKzLECY7X8PcDnMvP3pd33R8TizHw8Io4Grs3Me0rdEuADtGaxqp6MiB1rAWe7Ep7Ga+ODwOmV13JJRByZmZeVNo4CTp6L96dJkiRpdhj0ALV5vMqI2B/4STs8AWTmbWV2qe2a2gzMRsY2D7i8Vla/h+lWYB/gYWDP6mxUZq6LiI73KpWwcvY41x7vuW6pfH1XOzwVv6I163MDsEs7+JRrromITstAf0hrWeB34dl7zW4vdeO1cUH1tSx1u1baXTWd8DQ8PDzVUzVNIyMjgGMwiBz78S3rdwd6bGRkxLEfQL7vB9egjP2gB6iJvA64tEN5dWbl9lrdePeVbc7Mh2plW8qStkW0lvHtCtzYPr7eQGZeNW6PxzY0Tt28ytf15zMCvLR8fWDZya9q53pjmflERCyuFB1Ka+nhuG1k5khEvBPYi9ZzHwW2rxz34DjPQZIkSeq5QQ9Q9R/kAYiIPwfuLQ87zSgt6MbFy8zL3wFnZuafStlewKvKIVvGOrdDWzvS2j2w7sbM/AWwOSLmZ+YzHY5p+nyuy8xzGx57T0S8MjN/S2v5YPu5jNlGRJxEa0bv8krZiobXm9DSpUu71ZQmqf2bKMdg8Dj2g23RokWO/QDyfT+4ZtPYT2eWbNAD1MIxyv8qM38TEbcDbwKur9V3JUAB+wGXtcNTsV3l663GJyL2zczb6uVlCd+Z9fKKn9HaSGJlrb0hxgiSHYz1enVyJXBiRLykXLtJG09nZn0GbDLXlCRJknpq0LcxXx0R768WRMSLaS2lo3zA7KHVJWflfp5HG7b/JB2WuFU8Brys0vYQrVmkdkD7Y0TsU6lfCLy5cn7j8SszQftExC61qg8Al3U4pZOHyn1hz4qI14xxvfaM05sy8+aGbSyqlR9DawdCSZIk6QVhoGegyoYQiyPiMzy3dfbTwLcqh50FfDYi1tOaHXoiM89peImbgX+IiAMz8xsdrn9/RBxSlq5toTXbcjbwllL/o4g4qdwXtI7W1uJnV5rYFBGfAlZWN2YYx+nAJ0pQ20Br5un6zPxdkyeTmZdExDERcRCtpY3bARePc8o1tHbOa9rGryLiFFpjsC2tmatuzfZJkiRJ0zY0OuqO0OqNiHgH8OvMvK+f/Vi1atUozI71uHPVbFoTre5y7Me362679bsLPbVq5UrHfgD5vh9cs2ns231dtmzZeButdTToS/jUW6/qd3iSJEmSummgl/CpNyLiEOBAYFW/+yJJkiR1kwFKXZeZ19C6/0mSJEmaU1zCJ0mSJEkNGaAkSZIkqSEDlCRJkiQ1ZICSJEmSpIYMUJIkSZLUkAFKkiRJkhoyQEmSJElSQwYoSZIkSWrIACVJkiRJDc3vdwckSRpUj65d2+8u9Mzw8HC/uyBJPeEMlCRJkiQ1ZICSJEmSpIYMUJIkSZLUkAFKkiRJkhoyQEmSJElSQwYoSZIkSWrIACVJkiRJDfk5UJIkqSeWr14Oq/vdi+5ae+rc/ewuSc04AyVJkiRJDRmgJEmSJKkhA5QkSZIkNWSAkiRJkqSGDFCSJEmS1JABSpIkSZIaMkBJkiRJUkMGKEmSJElqyAAlSZIkSQ0ZoCRJkiSpIQOUJEmSJDVkgJIkSZKkhub3uwMvdBFxCPC6StFBwLXl602Zec4U212RmeeNU/8q4EhgBFgIbA98LzPvncr1Xkgi4mvA5zNzpEPduK+LJEmS1E8GqAlk5jXANe3HEfF0r3/Aj4htgSMz80u18v8SEV/IzI29vP5kTSH03ACsAL7eoy5JkiRJPeESvhem1wE/7VB+PnD4DPelF54G7oyI/fvdEUmSJGkynIHqgog4HlgMPAUsAM7KzM0RMQ/4NDBEaynelsw8q3LePsDbynk7Ahdk5iPAH4B3ALdUr5OZ90fEE+XcrWZ92mURsQK4HTgQ2ALsApyfmfeX404s/RwCtgMezcxzK+28F9i79Gsn4F/bSwdL29uU824A3gIcFBE7Aldl5m1NXrPMvDIiPhsRv8zM0TFe1zeW12d96e8vM/PKJu1LkiRJvWCAmqaIOBq4NjPvKY+XAB8AEvgg8O0SioiIvSLi3Zl5Ka1AcEhmfrXUDQEnAOdk5pqIWBcRJwGZmU+0r5eZjzfo1hDwisz8SqXtk4GvlfrlwCmZua7UHxARb87MX0TEocC6zPxi5Tn+14j435m5gVYo+3Jm/rpUD0fESDXMRcQxtAJh1UOZeVmtLIEAvlN/AhGxO/A3mfmFStnxEfHayrUnZXh4eCqnqQtGRlq3uzkGg8exH1ztsZ9r/F6emO/7wTUoY+8SvunbpR2eADJzDc+9rgvb4anUPQCsKw+XABdW6kaBTZXH3we+C7wnIj5dgtpk/KDW9gMRsWspuqIdnkr99cC+5eG+mXlFra1zgLeXrx+YKMBk5oWZeXbtTz08kZkPAUMR8eIOzRwFnFE7/lvAYeNdW5IkSeolZ6Cm78CyVK9q5/L35vrBmXlV+fLB6sxSsU3t2MeB86C1K19E/D3wT5m5ZYI+jWbmM7Wy3wIvBx4FOm1C0e7rhg59fiAi2vdePTjBtScrgc8AX6qVz8vMpzocv36qF1q6dOlUT9U0tX8T5RgMHsd+cM3V30D7vTwx3/eDazaN/XT+jTJATd911fuHaiYKOh1FxM71cJWZd0fEWcA7gR9Oodn5QMd7jYp2CBxrh78FTS8UEcfSuneq6o+Z+W/1YzNzS0RcFRGH1e5vmnY/JEmSpG4zQE3fwnHqtnp9I2LfBhstHBMR36zPImXmoxGxR3n4vDAUEQtp3fs0lr2Bq8apb89+bdehzy8DHpqgz9V+bnVP0wTH3xwRfxsR11aKt4mIBZn57LLGci9XfbZPkiRJmjHeAzV9D9W3446I15QvN1UCT3tjhH0atHk5cFK9sOyOd315uEOt+iieC1DbRMTbK+cNAXtm5mOl6G3ls6ba9Qfy3I5/d0bEwbW2PwL8eJz+duP76LxynbZLgRNrxxxH5+3dJUmSpBnhDNQ0ZeYlEXFMRBxEa9nZdsDF7WrgMxEBrXt3RoFvNGjz9xHxs4j4PK3PTNpCa6br5sy8vRx2Wal/otTdBLyy1G0BNkTEKeWai2l9hlTbtcAnImJz6e/a9i56mXlVRBxdOXcn4DsTfHjv3RHxn4CbMnP1RM9vjOf8ZEQ8AOxeHv8pIm4o7W4Etgduzcw7p9K+JEmS1A1Do6Pj3Raj2ajTZ0RNpn6uWbVq1SjMjhsa56rZdFOpusuxH1zDw8MsX728393ourWnru13F17wfN8Prtk09u2+Llu2bLxbYDpyCZ8kSZIkNWSAkiRJkqSGDFBz0ETL8wZp+Z4kSZLUTQYoSZIkSWrIACVJkiRJDRmgJEmSJKkhA5QkSZIkNWSAkiRJkqSGDFCSJEmS1JABSpIkSZIaMkBJkiRJUkPz+90BSZI0N61860qWLl3a725IUlc5AyVJkiRJDRmgJEmSJKkhA5QkSZIkNWSAkiRJkqSGDFCSJEmS1JABSpIkSZIaMkBJkiRJUkMGKEmSJElqyAAlSZIkSQ0ZoCRJkiSpIQOUJEmSJDVkgJIkSZKkhgxQkiRJktSQAUqSJEmSGjJASZIkSVJDBihJkiRJasgAJUmSJEkNGaAkSZIkqSEDlCRJkiQ1ZICSJEmSpIaGRkdH+90HqadWrVrlN7kkSZK2smzZsqHJnuMMlCRJkiQ15AyUJEmSJDXkDJQkSZIkNWSAkiRJkqSGDFCSJEmS1JABSpIkSZIaMkBJkiRJUkMGKEmSJElqyAAlSZIkSQ0ZoCRJkiSpIQOUJEmSJDVkgJIkSZKkhub3uwOS5p6IWAycCKwHtgVuzMyfjXHs8cBOwCZgEfCNzFw/Q11Vl01m7MvxuwKRmafPTA/VbRGxAPgUsJHWL2Z/l5mXdThuP+Bw4GlgB+DCzHxwJvuq7mo69uXYRcAHgEsy87GZ66V6YRLv+6OAvXju//jvZOaamexrLxigJPXCMcBpmbkZICJOiIgbM/Pp6kERcQTwi8y8ozzeFlgBnDXTHVbXNB37PYDjgIeBLTPfTXVRAGe3f/EREUdFxEsz8/fPHhAxBBySmadVyj4FfH3Ge6tuajL2C4FPAuuAx4EdAQPU7Ndk7N8IPJ6Z/1YpOxmY9b8wcwmfpK6KiO2A9e0foIvvA0d2OPzF7fAEkJkbgQ097qJ6ZJJj/0hmfikzLwCempEOqlcW1GaNfwgsqx1zCLCqVnZ3ROzV056p1yYc+8zckJmnZeY5GJzmkibv+/0z8+pa2ZO97dbMMEBJ6raXA7+pFmTmE8B2HY69tEPZvF50SjOi8dhn5uhMdUq9U2aNn/cDUWZuAZ6pHfryzPxNrexW4LU97J56aBJjrzlmEmM/Z/+PN0BJ6rYlQKf1zVv9e5OZa6uPI2IHWuupNTs1HnvNGUtoLcOsq495p++BR4A9ut4jzZSmY6+5p9HYd/g/fhvmSIDyHihJUxIR/wjsViu+C7gO2Lz1GQw1aPY44Lxpdk091qOx1+w0nymOeWaORsSc+GFqQE157DXrTXXsjwEu7n53Zp4BStKUZOY/dCqPiLcyhd8wRcRhwPWZ6T1QL3DdHnvNas/QbMy3WrJZNpZwA5HZq+nYa+6Z9NhHxBuAhzLzT73p0sxymlVStz1Ea3q/sYj4M2CXzLy1N13SDJn02GvWWwO8qMFxnYLSHnReBqTZoenYa+6Z1NhHxG7AmzLzp73r0swyQEnqtvuBV1ULImJnxthdr3w2yJGZ+f0Z6Jt6a1Jjr9mv7Jy5c7Ws3Oewbe3Q+yPi1bWyvwDu7GH31EOTGHvNMZMZ+1J+AnDuzPRuZhigJHVV+byfHSOiukT4GFpbnHZyMnBmzzumnpvC2Gtu2BQRO1UeHwVcUT0gM1ez9RbHr8vM+3rdOfXUhGOvOavp2H8COLfs0jdneA+UpF5I4LMRsY7WFta/rHzY3nuBmzLzvog4Adgd+EhEVM+/uvr5UJpVGo19PzuorvsO8LcR8TSwAHgwM++NiAMBMvO6ctzVEfFZWp/7tRj4Xl96q25qOvaaeyYc+4g4FHgTMFr7P/7GzPzFjPe4i4ZGR/0oDkmSJElqwiV8kiRJktSQAUqSJEmSGjJASZIkSVJDBihJkiRJasgAJUmSJEkNGaAkSZIkqSEDlCRJkiQ1ZICSJEmSpIYMUJIkSZLUkAFKkiRJkhoyQEmSJElSQwYoSZIkSWrIACVJkiRJDRmgJEmSJKkhA5QkSZIkNWSAkiRJkqSG/j8Q5gq6gSUXTQAAAABJRU5ErkJggg==\n",
830 |       "text/plain": [
831 |        "<matplotlib.figure.Figure at 0x7fd5f5821470>"
832 |       ]
833 |      },
834 |      "metadata": {
835 |       "needs_background": "light"
836 |      },
837 |      "output_type": "display_data"
838 |     }
839 |    ],
840 |    "source": [
841 |     "exp.as_pyplot_figure()\n",
842 |     "plt.tight_layout()"
843 |    ]
844 |   },
845 |   {
846 |    "cell_type": "markdown",
847 |    "metadata": {},
848 |    "source": [
849 |     "We see that one of the features that contributes most strongly to the positive prediction is the short tenure of the customer."
850 |    ]
851 |   },
852 |   {
853 |    "cell_type": "markdown",
854 |    "metadata": {},
855 |    "source": [
856 |     "## Saving the model\n",
857 |     "Now that we've done all this work to build the models, we want to be able to use them later.\n",
858 |     "The `ExplainedModel` class is a handy wrapper for using the `CategoricalEncoder`, the `Pipeline` object which *is* the churn model, and the Lime Explainer.\n",
859 |     "Here, we use it to save these trained models for use in later parts of the Project."
860 |    ]
861 |   },
862 |   {
863 |    "cell_type": "code",
864 |    "execution_count": 20,
865 |    "metadata": {},
866 |    "outputs": [],
867 |    "source": [
868 |     "from churnexplainer import ExplainedModel\n",
869 |     "explainedmodel = ExplainedModel(data=data, labels=labels, model_name='telco_linear',\n",
870 |     "                                categoricalencoder=ce, pipeline=pipe,\n",
871 |     "                                explainer=explainer,data_dir=data_dir)\n",
872 |     "explainedmodel.save()"
873 |    ]
874 |   },
875 |   {
876 |    "cell_type": "code",
877 |    "execution_count": 21,
878 |    "metadata": {},
879 |    "outputs": [],
880 |    "source": [
881 |     "spark.stop()"
882 |    ]
883 |   },
884 |   {
885 |    "cell_type": "markdown",
886 |    "metadata": {},
887 |    "source": [
888 |     "## Wrap up\n",
889 |     "We've now covered all the steps to **building a machine learning model** including interpretability\n",
890 |     "and saved our work for use in later sections.\n",
891 |     "\n",
892 |     "In the next part of the series we will explore how to use the **Experiments** feature of CML\n",
893 |     "for when we want to test lots of combinations of hyperparameters to fine tune our models.\n"
894 |    ]
895 |   }
896 |  ],
897 |  "metadata": {
898 |   "kernelspec": {
899 |    "display_name": "Python 3",
900 |    "language": "python",
901 |    "name": "python3"
902 |   },
903 |   "language_info": {
904 |    "codemirror_mode": {
905 |     "name": "ipython",
906 |     "version": 3
907 |    },
908 |    "file_extension": ".py",
909 |    "mimetype": "text/x-python",
910 |    "name": "python",
911 |    "nbconvert_exporter": "python",
912 |    "pygments_lexer": "ipython3",
913 |    "version": "3.6.9"
914 |   }
915 |  },
916 |  "nbformat": 4,
917 |  "nbformat_minor": 4
918 | }
919 | 


--------------------------------------------------------------------------------
/4_train_models.py:
--------------------------------------------------------------------------------
  1 | # Part 4: Model Training
  2 | 
  3 | # This script is used to train an Explained model and also how to use the
  4 | # Jobs to run model training and the Experiments feature of CML to facilitate model
  5 | # tuning.
  6 | 
  7 | # If you haven't yet, run through the initialization steps in the README file and Part 1.
  8 | # In Part 1, the data is imported into the `default.telco_churn` table in Hive.
  9 | # All data accesses fetch from Hive.
 10 | #
 11 | # To simply train the model once, run this file in a workbench session.
 12 | #
 13 | # There are 2 other ways of running the model training process
 14 | #
 15 | # ***Scheduled Jobs***
 16 | #
 17 | # The **[Jobs](https://docs.cloudera.com/machine-learning/cloud/jobs-pipelines/topics/ml-creating-a-job.html)**
 18 | # feature allows for adhoc, recurring and depend jobs to run specific scripts. To run this model
 19 | # training process as a job, create a new job by going to the Project window and clicking _Jobs >
 20 | # New Job_ and entering the following settings:
 21 | # * **Name** : Train Mdoel
 22 | # * **Script** : 4_train_models.py
 23 | # * **Arguments** : _Leave blank_
 24 | # * **Kernel** : Python 3
 25 | # * **Schedule** : Manual
 26 | # * **Engine Profile** : 1 vCPU / 2 GiB
 27 | # The rest can be left as is. Once the job has been created, click **Run** to start a manual
 28 | # run for that job.
 29 | 
 30 | # ***Experiments***
 31 | #
 32 | # Training a model for use in production requires testing many combinations of model parameters
 33 | # and picking the best one based on one or more metrics.
 34 | # In order to do this in a *principled*, *reproducible* way, an Experiment executes model training code with **versioning** of the **project code**, **input parameters**, and **output artifacts**.
 35 | # This is a very useful feature for testing a large number of hyperparameters in parallel on elastic cloud resources.
 36 | 
 37 | # **[Experiments](https://docs.cloudera.com/machine-learning/cloud/experiments/topics/ml-running-an-experiment.html)**.
 38 | # run immediately and are used for testing different parameters in a model training process.
 39 | # In this instance it would be use for hyperparameter optimisation. To run an experiment, from the
 40 | # Project window click Experiments > Run Experiment with the following settings.
 41 | # * **Script** : 4_train_models.py
 42 | # * **Arguments** : 5 lbfgs 100 _(these the cv, solver and max_iter parameters to be passed to
 43 | # LogisticRegressionCV() function)
 44 | # * **Kernel** : Python 3
 45 | # * **Engine Profile** : 1 vCPU / 2 GiB
 46 | 
 47 | # Click **Start Run** and the expriment will be sheduled to build and run. Once the Run is
 48 | # completed you can view the outputs that are tracked with the experiment using the
 49 | # `cdsw.track_metrics` function. It's worth reading through the code to get a sense of what
 50 | # all is going on.
 51 | 
 52 | # More Details on Running Experiments
 53 | # Requirements
 54 | # Experiments have a few requirements:
 55 | # - model training code in a `.py` script, not a notebook
 56 | # - `requirements.txt` file listing package dependencies
 57 | # - a `cdsw-build.sh` script containing code to install all dependencies
 58 | #
 59 | # These three components are provided for the churn model as `4_train_models.py`, `requirements.txt`,
 60 | # and `cdsw-build.sh`, respectively.
 61 | # You can see that `cdsw-build.sh` simply installs packages from `requirements.txt`.
 62 | # The code in `4_train_models.py` is largely identical to the code in the last notebook.
 63 | # with a few differences.
 64 | #
 65 | # The first difference from the last notebook is at the "Experiments options" section.
 66 | # When you set up a new Experiment, you can enter
 67 | # [**command line arguments**](https://docs.python.org/3/library/sys.html#sys.argv)
 68 | # in standard Python fashion.
 69 | # This will be where you enter the combination of model hyperparameters that you wish to test.
 70 | #
 71 | # The other difference is at the end of the script.
 72 | # Here, the `cdsw` package (available by default) provides
 73 | # [two methods](https://docs.cloudera.com/machine-learning/cloud/experiments/topics/ml-tracking-metrics.html)
 74 | # to let the user evaluate results.
 75 | #
 76 | # **`cdsw.track_metric`** stores a single value which can be viewed in the Experiments UI.
 77 | # Here we store two metrics and the filepath to the saved model.
 78 | #
 79 | # **`cdsw.track_file`** stores a file for later inspection.
 80 | # Here we store the saved model, but we could also have saved a report csv, plot, or any other
 81 | # output file.
 82 | #
 83 | 
 84 | 
 85 | from pyspark.sql.types import *
 86 | from pyspark.sql import SparkSession
 87 | import sys
 88 | import os
 89 | import os
 90 | import datetime
 91 | import subprocess
 92 | import glob
 93 | import dill
 94 | import pandas as pd
 95 | import numpy as np
 96 | import cdsw
 97 | 
 98 | from sklearn.model_selection import train_test_split
 99 | from sklearn.metrics import classification_report
100 | from sklearn.preprocessing import OneHotEncoder, StandardScaler
101 | from sklearn.pipeline import Pipeline
102 | from sklearn.linear_model import LogisticRegressionCV
103 | from sklearn.pipeline import TransformerMixin
104 | from sklearn.preprocessing import LabelEncoder
105 | from sklearn.compose import ColumnTransformer
106 | 
107 | from lime.lime_tabular import LimeTabularExplainer
108 | 
109 | from churnexplainer import ExplainedModel, CategoricalEncoder
110 | 
111 | data_dir = '/home/cdsw'
112 | 
113 | idcol = 'customerID'
114 | labelcol = 'Churn'
115 | cols = (('gender', True),
116 |         ('SeniorCitizen', True),
117 |         ('Partner', True),
118 |         ('Dependents', True),
119 |         ('tenure', False),
120 |         ('PhoneService', True),
121 |         ('MultipleLines', True),
122 |         ('InternetService', True),
123 |         ('OnlineSecurity', True),
124 |         ('OnlineBackup', True),
125 |         ('DeviceProtection', True),
126 |         ('TechSupport', True),
127 |         ('StreamingTV', True),
128 |         ('StreamingMovies', True),
129 |         ('Contract', True),
130 |         ('PaperlessBilling', True),
131 |         ('PaymentMethod', True),
132 |         ('MonthlyCharges', False),
133 |         ('TotalCharges', False))
134 | 
135 | 
136 | # This is a fail safe incase the hive table did not get created in the last step.
137 | try:
138 |     spark = SparkSession\
139 |         .builder\
140 |         .appName("PythonSQL")\
141 |         .master("local[*]")\
142 |         .getOrCreate()
143 | 
144 |     if (spark.sql("SELECT count(*) FROM default.telco_churn").collect()[0][0] > 0):
145 |         df = spark.sql("SELECT * FROM default.telco_churn").toPandas()
146 | except:
147 |     print("Hive table has not been created")
148 |     df = pd.read_csv(os.path.join(
149 |         'raw', 'WA_Fn-UseC_-Telco-Customer-Churn-.csv'))
150 | 
151 | # Clean and shape the data from lr and LIME
152 | df = df.replace(r'^\s$', np.nan, regex=True).dropna().reset_index()
153 | df.index.name = 'id'
154 | data, labels = df.drop(labelcol, axis=1), df[labelcol]
155 | data = data.replace({'SeniorCitizen': {1: 'Yes', 0: 'No'}})
156 | # This is Mike's lovely short hand syntax for looping through data and doing useful things. I think if we started to pay him by the ASCII char, we'd get more readable code.
157 | data = data[[c for c, _ in cols]]
158 | catcols = (c for c, iscat in cols if iscat)
159 | for col in catcols:
160 |     data[col] = pd.Categorical(data[col])
161 | labels = (labels == 'Yes')
162 | 
163 | # Prepare the pipeline and split the data for model training
164 | ce = CategoricalEncoder()
165 | X = ce.fit_transform(data)
166 | y = labels.values
167 | X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)
168 | ct = ColumnTransformer(
169 |     [('ohe', OneHotEncoder(), list(ce.cat_columns_ix_.values()))],
170 |     remainder='passthrough'
171 | )
172 | 
173 | # Experiments options
174 | # If you are running this as an experiment, pass the cv, solver and max_iter values
175 | # as arguments in that order. e.g. `5 lbfgs 100`.
176 | 
177 | if len(sys.argv) == 4:
178 |     try:
179 |         cv = int(sys.argv[1])
180 |         solver = str(sys.argv[2])
181 |         max_iter = int(sys.argv[3])
182 |     except:
183 |         sys.exit("Invalid Arguments passed to Experiment")
184 | else:
185 |     cv = 5
186 |     solver = 'lbfgs'  # one of newton-cg, lbfgs, liblinear, sag, saga
187 |     max_iter = 100
188 | 
189 | clf = LogisticRegressionCV(cv=cv, solver=solver, max_iter=max_iter)
190 | pipe = Pipeline([('ct', ct),
191 |                  ('scaler', StandardScaler()),
192 |                  ('clf', clf)])
193 | 
194 | # The magical model.fit()
195 | pipe.fit(X_train, y_train)
196 | train_score = pipe.score(X_train, y_train)
197 | test_score = pipe.score(X_test, y_test)
198 | print("train", train_score)
199 | print("test", test_score)
200 | print(classification_report(y_test, pipe.predict(X_test)))
201 | data[labels.name + ' probability'] = pipe.predict_proba(X)[:, 1]
202 | 
203 | 
204 | # Create LIME Explainer
205 | feature_names = list(ce.columns_)
206 | categorical_features = list(ce.cat_columns_ix_.values())
207 | categorical_names = {i: ce.classes_[c]
208 |                      for c, i in ce.cat_columns_ix_.items()}
209 | class_names = ['No ' + labels.name, labels.name]
210 | explainer = LimeTabularExplainer(ce.transform(data),
211 |                                  feature_names=feature_names,
212 |                                  class_names=class_names,
213 |                                  categorical_features=categorical_features,
214 |                                  categorical_names=categorical_names)
215 | 
216 | 
217 | # Create and save the combined Logistic Regression and LIME Explained Model.
218 | explainedmodel = ExplainedModel(data=data, labels=labels, model_name='telco_linear',
219 |                                 categoricalencoder=ce, pipeline=pipe,
220 |                                 explainer=explainer, data_dir=data_dir)
221 | explainedmodel.save()
222 | 
223 | 
224 | # If running as as experiment, this will track the metrics and add the model trained in this
225 | # training run to the experiment history.
226 | cdsw.track_metric("train_score", round(train_score, 2))
227 | cdsw.track_metric("test_score", round(test_score, 2))
228 | cdsw.track_metric("model_path", explainedmodel.model_path)
229 | cdsw.track_file(explainedmodel.model_path)
230 | 
231 | # Wrap up
232 | 
233 | # We've now covered all the steps to **running Experiments**.
234 | #
235 | # Notice also that any script that will run as an Experiment can also be run as a Job or in a Session.
236 | # Our provided script can be run with the same settings as for Experiments.
237 | # A common use case is to **automate periodic model updates**.
238 | # Jobs can be scheduled to run the same model training script once a week using the latest data.
239 | # Another Job dependent on the first one can update the model parameters being used in production
240 | # if model metrics are favorable.
241 | 


--------------------------------------------------------------------------------
/5_model_serve_explainer.py:
--------------------------------------------------------------------------------
  1 | ## Part 5: Model Serving
  2 | #
  3 | # This notebook explains how to create and deploy Models in CML which function as a 
  4 | # REST API to serve predictions. This feature makes it very easy for a data scientist 
  5 | # to make trained models available and usable to other developers and data scientists 
  6 | # in your organization.
  7 | #
  8 | # In the last part of the series, you learned: 
  9 | # - the requirements for running an Experiment
 10 | # - how to set up a new Experiment
 11 | # - how to monitor the results of an Experiment
 12 | # - limitations of the feature
 13 | #
 14 | # In this part, you will learn:
 15 | # - the requirements for creating and deploying a Model
 16 | # - how to deploy a Model
 17 | # - how to test and use a Model
 18 | # - limitations of the feature
 19 | #
 20 | # If you haven't yet, run through the initialization steps in the README file and Part 1. 
 21 | # In Part 1, the data is imported into the `default.telco_churn` table in Hive. 
 22 | # All data accesses fetch from Hive.
 23 | #
 24 | ### Requirements
 25 | # Models have the same requirements as Experiments:
 26 | # - model code in a `.py` script, not a notebook
 27 | # - a `requirements.txt` file listing package dependencies
 28 | # - a `cdsw-build.sh` script containing code to install all dependencies
 29 | #
 30 | # > In addition, Models *must* be designed with one main function that takes a dictionary as its sole argument
 31 | # > and returns a single dictionary.
 32 | # > CML handles the JSON serialization and deserialization.
 33 | 
 34 | # In this file, there is minimal code since calculating predictions is much simpler 
 35 | # than training a machine learning model.
 36 | # Once again, we use the `ExplainedModel` helper class in `churnexplainer.py`.
 37 | # When a Model API is called, CML will translate the input and returned JSON blobs to and from python dictionaries.
 38 | # Thus, the script simply loads the model we saved at the end of the last notebook,
 39 | # passes the input dictionary into the model, and returns the results as a dictionary with the following format:
 40 | #    
 41 | #    {
 42 | #        'data': dict(data),
 43 | #        'probability': probability,
 44 | #        'explanation': explanation
 45 | #    }
 46 | #
 47 | # The Model API will return this dictionary serialized as JSON.
 48 | # 
 49 | ### Model Operations
 50 | # 
 51 | # This model is deployed using the model operations feature of CML which consists of 
 52 | # [Model Metrics](https://docs.cloudera.com/machine-learning/cloud/model-metrics/topics/ml-enabling-model-metrics.html)
 53 | # and [Model Governance](https://docs.cloudera.com/machine-learning/cloud/model-governance/topics/ml-enabling-model-governance.html)
 54 | # 
 55 | # The first requirement to make the model use the model metrics feature by adding the 
 56 | # `@cdsw.model_metrics` [Python Decorator](https://wiki.python.org/moin/PythonDecorators)
 57 | # before the fuction. 
 58 | #
 59 | # Then you can use the *`cdsw.track_metric`* function to add additional
 60 | # data to the underlying database for each call made to the model. 
 61 | # **Note:** `cdsw.track_metric` has different functionality depening on if its being 
 62 | # used in an *Experiment* or a *Model*.
 63 | # 
 64 | # More detail is available
 65 | # using the `help(cdsw.track_mertic)` function
 66 | #```
 67 | # help(cdsw.track_metric)
 68 | # Help on function track_metric in module cdsw:
 69 | #
 70 | # track_metric(key, value)
 71 | #    Description
 72 | #    -----------
 73 | #    
 74 | #    Tracks a metric for an experiment or model deployment
 75 | #        Example:
 76 | #            model deployment usage:
 77 | #                >>>@cdsw.model_metrics
 78 | #                >>>predict_func(args):
 79 | #                >>>   cdsw.track_metric("input_args", args)
 80 | #                >>>   return {"result": "prediction"}
 81 | #    
 82 | #            experiment usage:
 83 | #                >>>cdsw.track_metric("input_args", args)
 84 | #    
 85 | #    Parameters
 86 | #    ----------
 87 | #    key: string
 88 | #        The metric key to track
 89 | #    value: string, boolean, numeric
 90 | #        The metric value to track
 91 | #```
 92 | #
 93 | #
 94 | ### Creating and deploying a Model
 95 | # To create a Model using our `5_model_serve_explainer.py` script, use the following settings:
 96 | # * **Name**: Explainer
 97 | # * **Description**: Explain customer churn prediction
 98 | # * **File**: `5_model_serve_explainer.py`
 99 | # * **Function**: explain
100 | # * **Input**: 
101 | # ```
102 | # {
103 | # 	"StreamingTV": "No",
104 | # 	"MonthlyCharges": 70.35,
105 | # 	"PhoneService": "No",
106 | # 	"PaperlessBilling": "No",
107 | # 	"Partner": "No",
108 | # 	"OnlineBackup": "No",
109 | # 	"gender": "Female",
110 | # 	"Contract": "Month-to-month",
111 | # 	"TotalCharges": 1397.475,
112 | # 	"StreamingMovies": "No",
113 | #	  "DeviceProtection": "No",
114 | #	  "PaymentMethod": "Bank transfer (automatic)",
115 | #	  "tenure": 29,
116 | #	  "Dependents": "No",
117 | #	  "OnlineSecurity": "No",
118 | #	  "MultipleLines": "No",
119 | #	  "InternetService": "DSL",
120 | #	  "SeniorCitizen": "No",
121 | #	  "TechSupport": "No"
122 | # }
123 | # ```
124 | #* **Kernel**: Python 3
125 | #* **Engine Profile**: 1 vCPU / 2 GiB Memory
126 | #
127 | # The rest can be left as is.
128 | #
129 | # After accepting the dialog, CML will *build* a new Docker image using `cdsw-build.sh`,
130 | # then *assign an endpoint* for sending requests to the new Model.
131 | 
132 | ## Testing the Model
133 | # > To verify it's returning the right results in the format you expect, you can 
134 | # > test any Model from it's *Overview* page.
135 | #
136 | # If you entered an *Example Input* before, it will be the default input here, 
137 | # though you can enter your own.
138 | 
139 | ## Using the Model
140 | #
141 | # > The *Overview* page also provides sample `curl` or Python commands for calling your Model API.
142 | # > You can adapt these samples for other code that will call this API.
143 | #
144 | # This is also where you can find the full endpoint to share with other developers 
145 | # and data scientists.
146 | #
147 | # **Note:** for security, you can specify 
148 | # [Model API Keys](https://docs.cloudera.com/machine-learning/cloud/models/topics/ml-model-api-key-for-models.html) 
149 | # to add authentication.
150 | 
151 | ## Limitations
152 | #
153 | # Models do have a few limitations that are important to know:
154 | # - re-deploying or re-building Models results in Model downtime (usually brief)
155 | # - re-starting CML does not automatically restart active Models
156 | # - Model logs and statistics are only preserved so long as the individual replica is active
157 | #
158 | # A current list of known limitations are 
159 | # [documented here](https://docs.cloudera.com/machine-learning/cloud/models/topics/ml-models-known-issues-and-limitations.html).
160 | 
161 | 
162 | from collections import ChainMap
163 | import cdsw, numpy
164 | from churnexplainer import ExplainedModel
165 | 
166 | #Load the model save earlier.
167 | em = ExplainedModel(model_name='telco_linear',data_dir='/home/cdsw')
168 | 
169 | # *Note:* If you want to test this in a session, comment out the line 
170 | #`@cdsw.model_metrics` below. Don't forget to uncomment when you
171 | # deploy, or it won't write the metrics to the database 
172 | 
173 | @cdsw.model_metrics
174 | # This is the main function used for serving the model. It will take in the JSON formatted arguments , calculate the probablity of 
175 | # churn and create a LIME explainer explained instance and return that as JSON.
176 | def explain(args):
177 |     data = dict(ChainMap(args, em.default_data))
178 |     data = em.cast_dct(data)
179 |     probability, explanation = em.explain_dct(data)
180 |     
181 |     # Track inputs
182 |     cdsw.track_metric('input_data', data)
183 |     
184 |     # Track our prediction
185 |     cdsw.track_metric('probability', probability)
186 |     
187 |     # Track explanation
188 |     cdsw.track_metric('explanation', explanation)
189 |     
190 |     return {
191 |         'data': dict(data),
192 |         'probability': probability,
193 |         'explanation': explanation
194 |         }
195 | 
196 | # To test this is a session, comment out the `@cdsw.model_metrics`  line,
197 | # uncomment the and run the two rows below.
198 | #x={"StreamingTV":"No","MonthlyCharges":70.35,"PhoneService":"No","PaperlessBilling":"No","Partner":"No","OnlineBackup":"No","gender":"Female","Contract":"Month-to-month","TotalCharges":1397.475,"StreamingMovies":"No","DeviceProtection":"No","PaymentMethod":"Bank transfer (automatic)","tenure":29,"Dependents":"No","OnlineSecurity":"No","MultipleLines":"No","InternetService":"DSL","SeniorCitizen":"No","TechSupport":"No"}
199 | #explain(x)
200 | 
201 | ## Wrap up
202 | #
203 | # We've now covered all the steps to **deploying and serving Models**, including the 
204 | # requirements, limitations, and how to set up, test, and use them.
205 | # This is a powerful way to get data scientists' work in use by other people quickly.
206 | #
207 | # In the next part of the project we will explore how to launch a **web application** 
208 | # served through CML.
209 | # Your team is busy building models to solve problems.
210 | # CML-hosted Applications are a simple way to get these solutions in front of 
211 | # stakeholders quickly.


--------------------------------------------------------------------------------
/6_application.py:
--------------------------------------------------------------------------------
  1 | # Part 6: Application
  2 | 
  3 | # This script explains how to create and deploy Applications in CML.
  4 | # This feature allows data scientists to **get ML solutions in front of stakeholders quickly**,
  5 | # including business users who need results fast.
  6 | # This may be good for sharing a **highly customized dashboard**, a **monitoring tool**, or a **product mockup**.
  7 | 
  8 | # CML is agnostic regarding frameworks.
  9 | # [Flask](https://flask.palletsprojects.com/en/1.1.x/),
 10 | # [Dash](https://plotly.com/dash/),
 11 | # or even [Tornado](https://www.tornadoweb.org/en/stable/) apps will all work.
 12 | # R users will find it easy to deploy Shiny apps.
 13 | 
 14 | # If you haven't yet, run through the initialization steps in the README file. Do that
 15 | # now
 16 | 
 17 | # This file is provides a sample Flask app script, ready for deployment,
 18 | # which displays churn predictions and explanations using the Model API deployed in
 19 | # Part 5
 20 | 
 21 | # Deploying the Application
 22 | #
 23 | # > Once you have written an app that is working as desired, including in a test Session,
 24 | # > it can be deployed using the *New Application* dialog in the *Applications* tab in CML.
 25 | 
 26 | # After accepting the dialog, CML will deploy the application then *assign a URL* to
 27 | # the Application using the subdomain you chose.
 28 | #
 29 | # *Note:* This does not requirement the `cdsw-build.sh* file as it doen now follows a
 30 | # seperate build process to deploy an application.
 31 | #
 32 | 
 33 | # To create an Application using our sample Flask app, perform the following.
 34 | # This is a special step for this particular app:
 35 | #
 36 | # In the deployed Model from step 5, go to *Model* > *Settings* in CML and make a note (i.e. copy) the
 37 | # "**Access Key**". eg - `mqc8ypo...pmj056y`
 38 | #
 39 | # While you're there, **disable** the additional Model authentication feature by unticking **Enable Authentication**.
 40 | #
 41 | # **Note**: Disabling authentication is only necessary for this Application to work.
 42 | # Ordinarily, you may want to keep Authentication in place.
 43 | #
 44 | # Next, from the Project level, click on *Open Workbench* (note you don't actually have to Launch a
 45 | # Session) in order to edit a file. Select the `flask/single_view.html` file and paste the Access
 46 | # Key in at line 19.
 47 | #
 48 | # `        const accessKey = "mp3ebluylxh4yn5h9xurh1r0430y76ca";`
 49 | #
 50 | # Save the file (if it has not auto saved already) and go back to the Project.
 51 | #
 52 | # Finally, go to the *Applications* section of the Project and select *New Application* with the following:
 53 | # * **Name**: Churn Analysis App
 54 | # * **Subdomain**: churn-app _(note: this needs to be unique, so if you've done this before,
 55 | # pick a more random subdomain name)_
 56 | # * **Script**: 6_application.py
 57 | # * **Kernel**: Python 3
 58 | # * **Engine Profile**: 1 vCPU / 2 GiB Memory
 59 | #
 60 | # Accept the inputs, and in a few minutes the Application will be ready to use.
 61 | 
 62 | # Using the Application
 63 | 
 64 | # >  A few minutes after deploying, the *Applications* page will show the app as Running.
 65 | # You can then click on its name to access it.
 66 | # CML Applications are accessible by any user with read-only (or higher) access to the project.
 67 | #
 68 | 
 69 | # This deploys a basic flask application for serving the HTML and some specific data
 70 | # use for project Application.
 71 | 
 72 | # At this point, you will be able to open the Churn Analysis App.
 73 | # The initial view is a table of randomly selected customers from the dataset.
 74 | # This provides a snapshot of the customer base as a whole.
 75 | # The colors in the *Probability* column correspond to the prediction, with red customers being deemed more likely to churn.
 76 | # The colors of the features show which are most important for each prediction.
 77 | # Deeper red indicates incresed importance for predicting that a customer **will churn**
 78 | # while deeper blue indicates incresed importance for predicting that a customer **will not**.
 79 | #
 80 | from flask import Flask, send_from_directory, request
 81 | from IPython.display import Javascript, HTML
 82 | import random
 83 | import os
 84 | from churnexplainer import ExplainedModel
 85 | from collections import ChainMap
 86 | from flask import Flask
 87 | from pandas.io.json import dumps as jsonify
 88 | import logging
 89 | import subprocess
 90 | from IPython.display import Image
 91 | Image("images/table_view.png")
 92 | #
 93 | # Clicking on any row will show a "local" interpreted model for that particular customer.
 94 | # Here, you can see how adjusting any one of the features will change that customer's churn prediction.
 95 | #
 96 | Image("images/single_view_1.png")
 97 | #
 98 | # Changing the *InternetService* to *DSL* lowers the probablity of churn.
 99 | # **Note**: this obviously does *not* mean that you should change that customer's internet service to DSL
100 | # and expect they will be less likely to churn.
101 | # Imagine if your ISP did that to you.
102 | # Rather, the model is more optimistic about an otherwise identical customer who has been using DSL.
103 | # This information simply gives you a clearer view of what to expect given specific factors
104 | # as a starting point for developing your business strategies.
105 | # Furthermore, as you start implementing changes based on the model, it may change customers' behavior
106 | # so that the predictions stop being reliable.
107 | # It's important to use Jobs to keep models up-to-date.
108 | #
109 | Image("images/single_view_2.png")
110 | #
111 | # There are many frameworks that ease the development of interactive, informative webapps.
112 | # Once written, it is straightforward to deploy them in CML.
113 | 
114 | 
115 | # This reduces the the output to the console window
116 | log = logging.getLogger('werkzeug')
117 | log.setLevel(logging.ERROR)
118 | 
119 | # Since we have access in an environment variable, we want to write it to our UI
120 | # Change the line in the flask/single_view.html file.
121 | if os.environ.get('SHTM_ACCESS_KEY') != None:
122 |   access_key = os.environ.get('SHTM_ACCESS_KEY', "")
123 |   subprocess.call(["sed", "-i",  's/const\saccessKey.*/const accessKey = "' +
124 |                    access_key + '";/', "/home/cdsw/flask/single_view.html"])
125 | 
126 | 
127 | # Load the explained model
128 | em = ExplainedModel(model_name='telco_linear', data_dir='/home/cdsw')
129 | 
130 | # Creates an explained version of a partiuclar data point. This is almost exactly the same as the data used in the model serving code.
131 | 
132 | 
133 | def explainid(N):
134 |     customer_data = dataid(N)[0]
135 |     customer_data.pop('id')
136 |     customer_data.pop('Churn probability')
137 |     data = em.cast_dct(customer_data)
138 |     probability, explanation = em.explain_dct(data)
139 |     return {'data': dict(data),
140 |             'probability': probability,
141 |             'explanation': explanation,
142 |             'id': int(N)}
143 | 
144 | # Gets the rest of the row data for a particular customer.
145 | 
146 | 
147 | def dataid(N):
148 |     customer_id = em.data.index.dtype.type(N)
149 |     customer_df = em.data.loc[[customer_id]].reset_index()
150 |     return customer_df.to_dict(orient='records')
151 | 
152 | 
153 | # Flask doing flasky things
154 | flask_app = Flask(__name__, static_url_path='')
155 | 
156 | 
157 | @flask_app.route('/')
158 | def home():
159 |     return "<script> window.location.href = '/flask/table_view.html'</script>"
160 | 
161 | 
162 | @flask_app.route('/flask/<path:path>')
163 | def send_file(path):
164 |     return send_from_directory('flask', path)
165 | 
166 | # Grabs a sample explained dataset for 10 randomly selected customers.
167 | 
168 | 
169 | @flask_app.route('/sample_table')
170 | def sample_table():
171 |     sample_ids = random.sample(range(1, len(em.data)), 10)
172 |     sample_table = []
173 |     for ids in sample_ids:
174 |         sample_table.append(explainid(str(ids)))
175 |     return jsonify(sample_table)
176 | 
177 | # Shows the names and all the catagories of the categorical variables.
178 | 
179 | 
180 | @flask_app.route("/categories")
181 | def categories():
182 |     return jsonify({feat: dict(enumerate(cats))
183 |                     for feat, cats in em.categories.items()})
184 | 
185 | # Shows the names and all the statistical variations of the numerica variables.
186 | 
187 | 
188 | @flask_app.route("/stats")
189 | def stats():
190 |     return jsonify(em.stats)
191 | 
192 | 
193 | # A handy way to get the link if you are running in a session.
194 | HTML("<a href='https://{}.{}'>Open Table View</a>".format(
195 |     os.environ['CDSW_ENGINE_ID'], os.environ['CDSW_DOMAIN']))
196 | 
197 | # Launches flask. Note the host and port details. This is specific to CML/CDSW
198 | if __name__ == "__main__":
199 |     flask_app.run(host='127.0.0.1', port=int(os.environ['CDSW_APP_PORT']))
200 | 


--------------------------------------------------------------------------------
/7a_ml_ops_simulation.py:
--------------------------------------------------------------------------------
  1 | ## Part 7a - Model Operations - Drift Simulation
  2 | #
  3 | # This script show cases how to use the model operations features of CML.
  4 | # # This feature allows machine learning engineering to **measure and manage models 
  5 | # through their life cycle**, and know how a model is performing over time. As part
  6 | # of the larger machine learning lifecycle, this closes the loop on managing
  7 | # models that have been deployed into production.
  8 | 
  9 | ### Add Model Metrics
 10 | # New  metrics can be added to a model and existing ones updated using the `cdsw` 
 11 | # library and the [model metrics SDK](https://docs.cloudera.com/machine-learning/cloud/model-metrics/topics/ml-tracking-model-metrics-using-python.html)
 12 | # If model metrics is enabled for a model, then every call to that model is recorded
 13 | # in the model metric database. There are situations in which its necessary to update or
 14 | # add to those recordered metrics. This script shows you how this works.
 15 | 
 16 | #### Update Exsiting Tracked Metrics
 17 | # This is part of what is called "ground truth". Certain machine learning implemetations, 
 18 | # (like this very project) will use a supervised approach where a model is making a 
 19 | # prediction and the acutal value (or lable) is only available at later stage. To check
 20 | # how well a model is performing, these actual values need to be compared with the 
 21 | # prediction the model. Each time a model end point is called, it provides the response
 22 | # from the function, some other details and a unique uuid for that response.
 23 | # This tracked model response entry can then be updated at a later date to add the 
 24 | # actual "ground truth" value, or any other data that you want to add.
 25 | #
 26 | # Data can be added to a tracked model response using the `cdsw.track_delayed_metrics`. 
 27 | #
 28 | # ```python
 29 | # help(cdsw.track_delayed_metrics)
 30 | # Help on function track_delayed_metrics in module cdsw:
 31 | #
 32 | # track_delayed_metrics(metrics, prediction_uuid)
 33 | #    Description
 34 | #    -----------
 35 | #    
 36 | #    Track a metric for a model prediction that is only known after prediction time.
 37 | #    For example, for a model that makes a binary or categorical prediction, the actual
 38 | #    correctness of the prediction is not known at prediction time. This function can be
 39 | #    used to retroactively to track a prediction's correctness later, when ground truth
 40 | #    is available
 41 | #        Example:
 42 | #            >>>track_delayed_metrics({"ground_truth": "value"}, "prediction_uuid")
 43 | #    
 44 | #    Parameters
 45 | #    ----------
 46 | #    metrics: object
 47 | #        metrics object
 48 | #    prediction_uuid: string, UUID
 49 | #        prediction UUID of model metrics
 50 | # ```
 51 | 
 52 | #### Adding Additional Metrics
 53 | # It is also possible to add additional data/metrics to the model database to track
 54 | # things like aggrerate metrics that aren't associated with the one particular response.
 55 | # This can be done using the `cdsw.track_aggregate_metrics` function.
 56 | 
 57 | # ```python
 58 | # help(cdsw.track_aggregate_metrics)
 59 | # Help on function track_aggregate_metrics in module cdsw:
 60 | # 
 61 | # track_aggregate_metrics(metrics, start_timestamp_ms, end_timestamp_ms, model_deployment_crn=None)
 62 | #    Description
 63 | #    -----------
 64 | #    
 65 | #    Track aggregate metric data for model deployment or model build or model
 66 | #        Example:
 67 | #            >>>track_aggregate_metrics({"val_count": 125}, 1585685142786,
 68 | #            ... 1585685153602, model_deployment_crn="/db401b6a-4b26-4c8f-8ea6-a1b09b93db88"))
 69 | #    
 70 | #    Parameters
 71 | #    ----------
 72 | #    metrics: object
 73 | #        metrics data object
 74 | #    start_timestamp_ms: int
 75 | #        aggregated metrics start timestamp in milliseconds
 76 | #    end_timestamp_ms: int
 77 | #        aggregated metrics end timestamp in milliseconds
 78 | #    model_deployment_crn: string
 79 | #       model deployment Crn
 80 | # ```
 81 | # 
 82 | 
 83 | ### Model Drift Simlation
 84 | # This script simulates making calls to the model using sample data, and slowly
 85 | # introducting an increasing amount of random variation to the churn value so that
 86 | # the model will be less accurate over time. 
 87 | 
 88 | # The script will grab 1000 random samples from the data set and simulate 1000 
 89 | # predictions. The live model will be called each time in the loop and while the 
 90 | # `churn_error` function adds an increasing amount of error to the data to make 
 91 | # the model less accurate. The actual value, the response value and the uuid are 
 92 | # added to an array.
 93 | # 
 94 | # Then there is "ground truth" loop that iterates though the array and updates the
 95 | # recorded metric to add the actual lable value using the uuid. At the same time, the
 96 | # model accruacy is evaluated every 100 samples and added as an aggregate metric.
 97 | # Overtime this accuracy metric falls due the error introduced into the data.
 98 | 
 99 | 
100 | import cdsw, time, os, random, json
101 | import numpy as np
102 | import pandas as pd
103 | import matplotlib.pyplot as plt
104 | from sklearn.metrics import classification_report
105 | from cmlbootstrap import CMLBootstrap
106 | import seaborn as sns
107 | import copy
108 | 
109 | 
110 | ## Set the model ID
111 | # Get the model id from the model you deployed in step 5. These are unique to each 
112 | # model on CML.
113 | 
114 | model_id = "63"
115 | 
116 | # Grab the data from Hive.
117 | from pyspark.sql import SparkSession
118 | from pyspark.sql.types import *
119 | spark = SparkSession\
120 |     .builder\
121 |     .appName("PythonSQL")\
122 |     .master("local[*]")\
123 |     .getOrCreate()
124 | 
125 | df = spark.sql("SELECT * FROM default.telco_churn").toPandas()
126 | 
127 | # Get the various Model CRN details
128 | HOST = os.getenv("CDSW_API_URL").split(":")[0] + "://" + os.getenv("CDSW_DOMAIN")
129 | cml = CMLBootstrap()
130 | 
131 | latest_model = cml.get_model({"id": model_id, "latestModelDeployment": True, "latestModelBuild": True})
132 | 
133 | Model_CRN = latest_model ["crn"]
134 | Deployment_CRN = latest_model["latestModelDeployment"]["crn"]
135 | model_endpoint = HOST.split("//")[0] + "//modelservice." + HOST.split("//")[1] + "/model"
136 | 
137 | # This will randomly return True for input and increases the likelihood of returning 
138 | # true based on `percent`
139 | def churn_error(item,percent):
140 |   if random.random() < percent:
141 |     return True
142 |   else:
143 |     return True if item=='Yes' else False
144 | 
145 |   
146 | # Get 1000 samples  
147 | df_sample = df.sample(1000)
148 | 
149 | df_sample.groupby('Churn')['Churn'].count() 
150 | 
151 | df_sample_clean = df_sample.\
152 |   replace({'SeniorCitizen': {"1": 'Yes', "0": 'No'}}).\
153 |   replace(r'^\s$', np.nan, regex=True).\
154 |   dropna()
155 | 
156 | # Create an array of model responses.
157 | response_labels_sample = []
158 | 
159 | # Make 1000 calls to the model with increasing error
160 | percent_counter = 0
161 | percent_max = len(df_sample_clean)
162 | 
163 | for record in json.loads(df_sample_clean.to_json(orient='records')):
164 |   print("Added {} records".format(percent_counter)) if (percent_counter%50 == 0) else None
165 |   percent_counter += 1
166 |   no_churn_record = copy.deepcopy(record)
167 |   no_churn_record.pop('customerID')
168 |   no_churn_record.pop('Churn')
169 |   # **note** this is an easy way to interact with a model in a script
170 |   response = cdsw.call_model(latest_model["accessKey"],no_churn_record)
171 |   response_labels_sample.append(
172 |     {
173 |       "uuid":response["response"]["uuid"],
174 |       "final_label":churn_error(record["Churn"],percent_counter/percent_max),
175 |       "response_label":response["response"]["prediction"]["probability"] >= 0.5,
176 |       "timestamp_ms":int(round(time.time() * 1000))
177 |     }
178 |   )
179 | 
180 | # The "ground truth" loop adds the updated actual label value and an accuracy measure
181 | # every 100 calls to the model.
182 | for index, vals in enumerate(response_labels_sample):
183 |   print("Update {} records".format(index)) if (index%50 == 0) else None  
184 |   cdsw.track_delayed_metrics({"final_label":vals['final_label']}, vals['uuid'])
185 |   if (index%100 == 0):
186 |     start_timestamp_ms = vals['timestamp_ms']
187 |     final_labels = []
188 |     response_labels = []
189 |   final_labels.append(vals['final_label'])
190 |   response_labels.append(vals['response_label'])
191 |   if (index%100 == 99):
192 |     print("Adding accuracy metrc")
193 |     end_timestamp_ms = vals['timestamp_ms']
194 |     accuracy = classification_report(final_labels,response_labels,output_dict=True)["accuracy"]
195 |     cdsw.track_aggregate_metrics({"accuracy": accuracy}, start_timestamp_ms , end_timestamp_ms, model_deployment_crn=Deployment_CRN)
196 | 
197 | 
198 | 


--------------------------------------------------------------------------------
/7b_ml_ops_visual.py:
--------------------------------------------------------------------------------
 1 | ## Part 7b - Model Operations - Visualising Model Metrics
 2 | 
 3 | # This is a continuation of the previous process started in the 
 4 | # `7a_ml_ops_simulations.py` script.
 5 | # Here we will load in the metrics saved to the model database in the previous step 
 6 | # into a Pandas dataframe, and display different features as graphs. 
 7 | 
 8 | #```python
 9 | # help(cdsw.read_metrics)
10 | # Help on function read_metrics in module cdsw:
11 | #
12 | # read_metrics(model_deployment_crn=None, start_timestamp_ms=None, end_timestamp_ms=None, model_crn=None, model_build_crn=None)
13 | #    Description
14 | #    -----------
15 | #    
16 | #    Read metrics data for given Crn with start and end time stamp
17 | #    
18 | #    Parameters
19 | #    ----------
20 | #    model_deployment_crn: string
21 | #        model deployment Crn
22 | #    model_crn: string
23 | #        model Crn
24 | #    model_build_crn: string
25 | #        model build Crn
26 | #    start_timestamp_ms: int, optional
27 | #        metrics data start timestamp in milliseconds , if not passed
28 | #        default value 0 is used to fetch data
29 | #    end_timestamp_ms: int, optional
30 | #        metrics data end timestamp in milliseconds , if not passed
31 | #        current timestamp is used to fetch data
32 | #    
33 | #    Returns
34 | #    -------
35 | #    object
36 | #        metrics data
37 | #```
38 |  
39 | 
40 | import cdsw, time, os
41 | import pandas as pd
42 | import matplotlib.pyplot as plt
43 | import numpy as np
44 | from sklearn.metrics import classification_report
45 | from cmlbootstrap import CMLBootstrap
46 | import seaborn as sns
47 | import sqlite3
48 | 
49 | 
50 | ## Set the model ID
51 | # Get the model id from the model you deployed in step 5. These are unique to each 
52 | # model on CML.
53 | 
54 | model_id = "63"
55 | 
56 | # Get the various Model CRN details
57 | cml = CMLBootstrap()
58 | 
59 | latest_model = cml.get_model({"id": model_id, "latestModelDeployment": True, "latestModelBuild": True})
60 | 
61 | Model_CRN = latest_model ["crn"]
62 | Deployment_CRN = latest_model["latestModelDeployment"]["crn"]
63 | 
64 | # Read in the model metrics dict.
65 | model_metrics = cdsw.read_metrics(model_crn=Model_CRN,model_deployment_crn=Deployment_CRN)
66 | 
67 | # This is a handy way to unravel the dict into a big pandas dataframe.
68 | metrics_df = pd.io.json.json_normalize(model_metrics["metrics"])
69 | metrics_df.tail().T
70 | 
71 | # Write the data to SQL lite for Viz Apps
72 | if not(os.path.exists("model_metrics.db")):
73 |   conn = sqlite3.connect('model_metrics.db')
74 |   metrics_df.to_sql(name='model_metrics', con=conn)
75 | 
76 | # Do some conversions & calculations
77 | metrics_df['startTimeStampMs'] = pd.to_datetime(metrics_df['startTimeStampMs'], unit='ms')
78 | metrics_df['endTimeStampMs'] = pd.to_datetime(metrics_df['endTimeStampMs'], unit='ms')
79 | metrics_df["processing_time"] = (metrics_df["endTimeStampMs"] - metrics_df["startTimeStampMs"]).dt.microseconds * 1000
80 | 
81 | # This shows how to plot specific metrics.
82 | sns.set_style("whitegrid")
83 | sns.despine(left=True,bottom=True)
84 | 
85 | prob_metrics = metrics_df.dropna(subset=['metrics.probability']).sort_values('startTimeStampMs')
86 | sns.lineplot(x=range(len(prob_metrics)), y="metrics.probability", data=prob_metrics, color='grey')
87 | 
88 | time_metrics = metrics_df.dropna(subset=['processing_time']).sort_values('startTimeStampMs')
89 | sns.lineplot(x=range(len(prob_metrics)), y="processing_time", data=prob_metrics, color='grey')
90 | 
91 | # This shows how the model accuracy drops over time.
92 | agg_metrics = metrics_df.dropna(subset=["metrics.accuracy"]).sort_values('startTimeStampMs')
93 | sns.barplot(x=list(range(1,len(agg_metrics)+1)), y="metrics.accuracy", color="grey", data=agg_metrics)
94 | 


--------------------------------------------------------------------------------
/8_check_model.py:
--------------------------------------------------------------------------------
 1 | # # Check Model
 2 | # This file should be run in a job that will periodically check the current model's accuracy and trigger the 
 3 | # model retrain job if its below the required thresh hold. 
 4 | 
 5 | import cdsw, time, os
 6 | import pandas as pd
 7 | from sklearn.metrics import classification_report
 8 | from cmlbootstrap import CMLBootstrap
 9 | 
10 | # replace this with these values relevant values from the project
11 | model_id = "63"
12 | job_id = "107"
13 | 
14 | # Get the various Model CRN details
15 | cml = CMLBootstrap()
16 | 
17 | latest_model = cml.get_model({"id": model_id, "latestModelDeployment": True, "latestModelBuild": True})
18 | 
19 | Model_CRN = latest_model ["crn"]
20 | Deployment_CRN = latest_model["latestModelDeployment"]["crn"]
21 | 
22 | # Read in the model metrics dict.
23 | model_metrics = cdsw.read_metrics(model_crn=Model_CRN,model_deployment_crn=Deployment_CRN)
24 | 
25 | # This is a handy way to unravel the dict into a big pandas dataframe.
26 | metrics_df = pd.io.json.json_normalize(model_metrics["metrics"])
27 | 
28 | latest_aggregate_metric = metrics_df.dropna(subset=["metrics.accuracy"]).sort_values('startTimeStampMs')[-1:]["metrics.accuracy"]
29 | 
30 | 
31 | if latest_aggregate_metric.to_list()[0] < 0.6:
32 |   print("model is below threshold, retraining")
33 |   cml.start_job(job_id,{})
34 |   #TODO reploy new model
35 | else:
36 |   print("model does not need to be retrained")
37 | 


--------------------------------------------------------------------------------
/9_build_project.py:
--------------------------------------------------------------------------------
  1 | # Run this file to auto deploy the model, run a job, and deploy the application
  2 | 
  3 | # Install the requirements
  4 | !pip3 install -r requirements.txt --progress-bar off
  5 | import subprocess
  6 | import datetime
  7 | import xml.etree.ElementTree as ET
  8 | import requests
  9 | import json
 10 | import time
 11 | import os
 12 | from IPython.display import Javascript, HTML
 13 | from cmlbootstrap import CMLBootstrap
 14 | 
 15 | try: 
 16 |   os.environ["SPARK_HOME"]
 17 |   print("Spark is enabled")
 18 | except:
 19 |   print('Spark is not enabled, please enable spark before running this script')
 20 |   raise KeyError('Spark is not enabled, please enable spark before running this script')
 21 | 
 22 | run_time_suffix = datetime.datetime.now()
 23 | run_time_suffix = run_time_suffix.strftime("%d%m%Y%H%M%S")
 24 | 
 25 | 
 26 | # Instantiate API Wrapper
 27 | cml = CMLBootstrap()
 28 | 
 29 | # Set the STORAGE environment variable
 30 | try : 
 31 |   storage=os.environ["STORAGE"]
 32 | except:
 33 |   storage = cml.get_cloud_storage()
 34 |   storage_environment_params = {"STORAGE":storage}
 35 |   storage_environment = cml.create_environment_variable(storage_environment_params)
 36 |   os.environ["STORAGE"] = storage
 37 | 
 38 | # Create the directories and upload data  
 39 | !hadoop fs -mkdir -p $STORAGE/datalake
 40 | !hadoop fs -mkdir -p $STORAGE/datalake/data
 41 | !hadoop fs -mkdir -p $STORAGE/datalake/data/churn
 42 | !hadoop fs -copyFromLocal /home/cdsw/raw/WA_Fn-UseC_-Telco-Customer-Churn-.csv $STORAGE/datalake/data/churn/WA_Fn-UseC_-Telco-Customer-Churn-.csv
 43 | 
 44 | # This will run the data ingest file. You need this to create the hive table from the
 45 | # csv file.
 46 | exec(open("1_data_ingest.py").read())
 47 | 
 48 | # Get User Details
 49 | user_details = cml.get_user({})
 50 | user_obj = {"id": user_details["id"], "username": os.getenv("CDSW_PROJECT_URL").split("/")[6],
 51 |             "name": user_details["name"],
 52 |             "type": user_details["type"],
 53 |             "html_url": user_details["html_url"],
 54 |             "url": user_details["url"]
 55 |             }
 56 | 
 57 | # Get Project Details
 58 | project_details = cml.get_project({})
 59 | project_id = project_details["id"]
 60 | 
 61 | #Get the runtime_id
 62 | runtime_id = 14
 63 | for ids in cml.get_runtimes()["runtimes"]:
 64 |   if ids["kernel"] == "Python 3.7" and ids["edition"] == "Standard" and ids["shortVersion"] == "2021.09" and ids["editor"] == "Workbench":
 65 |     runtime_id = ids["id"]
 66 |     
 67 | #Get runtime addon numbers
 68 | addon_val = cml.get_runtimes_addons()[0]['identifier'] 
 69 |     
 70 | # Create Job
 71 | create_jobs_params = {"name": "Train Model",
 72 |                       "type": "manual",
 73 |                       "script": "4_train_models.py",
 74 |                       "timezone": "America/Los_Angeles",
 75 |                       "environment": {},
 76 |                       "kernel": "python3",
 77 |                       "cpu": 1,
 78 |                       "memory": 2,
 79 |                       "nvidia_gpu": 0,
 80 |                       "include_logs": True,
 81 |                       "notifications": [
 82 |                           {"user_id": user_obj["id"],
 83 |                            "user":  user_obj,
 84 |                            "success": False, "failure": False, "timeout": False, "stopped": False
 85 |                            }
 86 |                       ],
 87 |                       "recipients": {},
 88 |                       "attachments": [],
 89 |                       "include_logs": True,
 90 |                       "report_attachments": [],
 91 |                       "success_recipients": [],
 92 |                       "failure_recipients": [],
 93 |                       "timeout_recipients": [],
 94 |                       "stopped_recipients": []
 95 |                       }
 96 | 
 97 | 
 98 | if os.getenv("ML_RUNTIME_EDITION") != None:
 99 |   create_jobs_params["runtime_id"] = runtime_id
100 |   create_jobs_params["addons"] = [addon_val-1,addon_val]
101 |   create_jobs_params["kernel"] = ""
102 |   
103 |   
104 | 
105 | new_job = cml.create_job(create_jobs_params)
106 | new_job_id = new_job["id"]
107 | print("Created new job with jobid", new_job_id)
108 | 
109 | ##
110 | # Start a job
111 | job_env_params = {}
112 | start_job_params = {"environment": job_env_params}
113 | job_id = new_job_id
114 | job_status = cml.start_job(job_id, start_job_params)
115 | print("Job started")
116 | 
117 | # Stop a job
118 | #job_dict = cml.start_job(job_id, start_job_params)
119 | #cml.stop_job(job_id, start_job_params)
120 | 
121 | 
122 | # Get Default Engine Details
123 | default_engine_details = cml.get_default_engine({})
124 | default_engine_image_id = default_engine_details["id"]
125 | 
126 | # Create the YAML file for the model lineage
127 | yaml_text = \
128 |     """"Model Explainer {}":
129 |   hive_table_qualified_names:                # this is a predefined key to link to training data
130 |     - "default.telco_churn@cm"               # the qualifiedName of the hive_table object representing                
131 |   metadata:                                  # this is a predefined key for additional metadata
132 |     query: "select * from historical_data"   # suggested use case: query used to extract training data
133 |     training_file: "4_train_models.py"       # suggested use case: training file used
134 | """.format(run_time_suffix)
135 | 
136 | with open('lineage.yml', 'w') as lineage:
137 |     lineage.write(yaml_text)
138 | 
139 | 
140 | # Create Model
141 | example_model_input = {"StreamingTV": "No", "MonthlyCharges": 70.35, "PhoneService": "No", "PaperlessBilling": "No", "Partner": "No", "OnlineBackup": "No", "gender": "Female", "Contract": "Month-to-month", "TotalCharges": 1397.475,
142 |                        "StreamingMovies": "No", "DeviceProtection": "No", "PaymentMethod": "Bank transfer (automatic)", "tenure": 29, "Dependents": "No", "OnlineSecurity": "No", "MultipleLines": "No", "InternetService": "DSL", "SeniorCitizen": "No", "TechSupport": "No"}
143 | 
144 | 
145 | create_model_params = {
146 |     "projectId": project_id,
147 |     "name": "Model Explainer 2",
148 |     "description": "Explain a given model prediction",
149 |     "visibility": "private",
150 |     "enableAuth": False,
151 |     "targetFilePath": "5_model_serve_explainer.py",
152 |     "targetFunctionName": "explain",
153 |     "engineImageId": default_engine_image_id,
154 |     "kernel": "python3",
155 |     "examples": [
156 |         {
157 |             "request": example_model_input,
158 |             "response": {}
159 |         }],
160 |     "cpuMillicores": 1000,
161 |     "memoryMb": 2048,
162 |     "nvidiaGPUs": 0,
163 |     "replicationPolicy": {"type": "fixed", "numReplicas": 1},
164 |     "environment": {}}
165 | 
166 | if os.getenv("ML_RUNTIME_EDITION") != None:
167 |   create_model_params["runtimeId"] = runtime_id
168 | 
169 | new_model_details = cml.create_model(create_model_params)
170 | access_key = new_model_details["accessKey"]  # todo check for bad response
171 | model_id = new_model_details["id"]
172 | 
173 | print("New model created with access key", access_key)
174 | 
175 | # Disable model_authentication
176 | cml.set_model_auth({"id": model_id, "enableAuth": False})
177 | 
178 | # Wait for the model to deploy.
179 | is_deployed = False
180 | while is_deployed == False:
181 |     model = cml.get_model({"id": str(
182 |         new_model_details["id"]), "latestModelDeployment": True, "latestModelBuild": True})
183 |     if model["latestModelDeployment"]["status"] == 'deployed':
184 |         print("Model is deployed")
185 |         break
186 |     else:
187 |         print("Deploying Model.....")
188 |         time.sleep(10)
189 | 
190 | 
191 | # Change the line in the flask/single_view.html file.
192 | subprocess.call(["sed", "-i",  's/const\saccessKey.*/const accessKey = "' +
193 |                  access_key + '";/', "/home/cdsw/flask/single_view.html"])
194 | 
195 | # Change the model_id value in the 7a_model_operations.py, 7b_ml_ops_visual.py and 8_check_model.py file
196 | subprocess.call(["sed", "-i",  's/model_id =.*/model_id = "' +
197 |                  model_id + '"/', "/home/cdsw/7a_ml_ops_simulation.py"])
198 | subprocess.call(["sed", "-i",  's/model_id =.*/model_id = "' +
199 |                  model_id + '"/', "/home/cdsw/7b_ml_ops_visual.py"])
200 | subprocess.call(["sed", "-i",  's/model_id =.*/model_id = "' +
201 |                  model_id + '"/', "/home/cdsw/8_check_model.py"])
202 | 
203 | 
204 | # Create Application
205 | create_application_params = {
206 |     "name": "Explainer App",
207 |     "subdomain": run_time_suffix[:],
208 |     "description": "Explainer web application",
209 |     "type": "manual",
210 |     "script": "6_application.py", "environment": {},
211 |     "kernel": "python3", "cpu": 1, "memory": 2,
212 |     "nvidia_gpu": 0
213 | }
214 | 
215 | if os.getenv("ML_RUNTIME_EDITION") != "":
216 |   create_application_params["runtime_id"] = runtime_id
217 |   create_application_params["addons"] = [addon_val-1,addon_val]
218 |   create_application_params["kernel"] = ""
219 | 
220 | new_application_details = cml.create_application(create_application_params)
221 | application_url = new_application_details["url"]
222 | application_id = new_application_details["id"]
223 | 
224 | # print("Application may need a few minutes to finish deploying. Open link below in about a minute ..")
225 | print("Application created, deploying at ", application_url)
226 | 
227 | # Wait for the application to deploy.
228 | is_deployed = False
229 | while is_deployed == False:
230 |     # Wait for the application to deploy.
231 |     app = cml.get_application(str(application_id), {})
232 |     if app["status"] == 'running':
233 |         print("Application is deployed")
234 |         break
235 |     else:
236 |         print("Deploying Application.....")
237 |         time.sleep(10)
238 | 
239 | HTML("<a href='{}'>Open Application UI</a>".format(application_url))
240 | 
241 | # This will run the model operations section that makes calls to the model to track
242 | # mertics and track metric aggregations
243 | 
244 | exec(open("7a_ml_ops_simulation.py").read())
245 | 
246 | # Change the job_id value in the 8_check_model.py file
247 | subprocess.call(["sed", "-i",  's/job_id =.*/job_id = "' +
248 |                  str(new_job_id) + '"/', "/home/cdsw/8_check_model.py"])
249 | 
250 | # Create the check model Job
251 | # Create Job
252 | create_jobs_params = {"name": "Check Model",
253 |                       "type": "manual",
254 |                       "script": "8_check_model.py",
255 |                       "timezone": "America/Los_Angeles",
256 |                       "environment": {},
257 |                       "kernel": "python3",
258 |                       "cpu": 1,
259 |                       "memory": 2,
260 |                       "nvidia_gpu": 0,
261 |                       "include_logs": True,
262 |                       "notifications": [
263 |                           {"user_id": user_obj["id"],
264 |                            "user":  user_obj,
265 |                            "success": False, "failure": False, "timeout": False, "stopped": False
266 |                            }
267 |                       ],
268 |                       "recipients": {},
269 |                       "attachments": [],
270 |                       "include_logs": True,
271 |                       "report_attachments": [],
272 |                       "success_recipients": [],
273 |                       "failure_recipients": [],
274 |                       "timeout_recipients": [],
275 |                       "stopped_recipients": []
276 |                       }
277 | 
278 | 
279 | if os.getenv("ML_RUNTIME_EDITION") != None:
280 |   create_jobs_params["runtime_id"] = runtime_id
281 |   create_jobs_params["addons"] = [addon_val-1,addon_val]
282 |   create_jobs_params["kernel"] = ""
283 |   
284 | new_job = cml.create_job(create_jobs_params)
285 | new_job_id = new_job["id"]
286 | print("Created new job with jobid", new_job_id)
287 | 
288 | # Start a job
289 | job_env_params = {}
290 | start_job_params = {"environment": job_env_params}
291 | job_id = new_job_id
292 | job_status = cml.start_job(job_id, start_job_params)
293 | print("Job started")
294 | 
295 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Churn Prediction Prototype
  2 | This project is a Cloudera Machine Learning 
  3 | ([CML](https://www.cloudera.com/products/machine-learning.html)) **Applied Machine Learning 
  4 | Project Prototype**. It has all the code and data needed to deploy an end-to-end machine 
  5 | learning project in a running CML instance.
  6 | 
  7 | ## Project Overview
  8 | This project builds the telco churn with model interpretability project discussed in more 
  9 | detail [this blog post](https://blog.cloudera.com/visual-model-interpretability-for-telco-churn-in-cloudera-data-science-workbench/). 
 10 | The initial idea and code comes from the FFL Interpretability report which is now freely 
 11 | available and you can read the full report [here](https://ff06-2020.fastforwardlabs.com/)
 12 | 
 13 | ![table_view](images/table_view.png)
 14 | 
 15 | The goal is to build a classifier model using Logistic Regression to predict the churn 
 16 | probability for a group of customers from a telecoms company. On top that, the model 
 17 | can then be interpreted using [LIME](https://github.com/marcotcr/lime). Both the Logistic 
 18 | Regression and LIME models are then deployed using CML's real-time model deployment 
 19 | capability and finally a basic flask based web application is deployed that will let 
 20 | you interact with the real-time model to see which factors in the data have the most 
 21 | influence on the churn probability.
 22 | 
 23 | By following the notebooks in this project, you will understand how to perform similar 
 24 | classification tasks on CML as well as how to use the platform's major features to your 
 25 | advantage. These features include **streamlined model experimentation**, 
 26 | **point-and-click model deployment**, and **ML app hosting**.
 27 | 
 28 | We will focus our attention on working within CML, using all it has to offer, while
 29 | glossing over the details that are simply standard data science.
 30 | We trust that you are familiar with typical data science workflows
 31 | and do not need detailed explanations of the code.
 32 | Notes that are *specific to CML* will be emphasized in **block quotes**.
 33 | 
 34 | ### Initialize the Project
 35 | There are a couple of steps needed at the start to configure the Project and Workspace 
 36 | settings so each step will run sucessfully. You **must** run the project bootstrap 
 37 | before running other steps. If you just want to launch the model interpretability 
 38 | application without going through each step manually, then you can also deploy the 
 39 | complete project. 
 40 | 
 41 | ***Project bootstrap***
 42 | 
 43 | Open the file `0_bootstrap.py` in a normal workbench python3 session. You only need a 
 44 | 1 vCPU / 2 GiB instance. Once the session is loaded, click **Run > Run All Lines**. 
 45 | This will file will create an Environment Variable for the project called **STORAGE**, 
 46 | which is the root of default file storage location for the Hive Metastore in the 
 47 | DataLake (e.g. `s3a://my-default-bucket`). It will also upload the data used in the 
 48 | project to `$STORAGE/datalake/data/churn/`. The original file comes as part of this 
 49 | git repo in the `raw` folder.
 50 |   
 51 | ***Deploy the Complete Project***
 52 | 
 53 | If you just wish build the project artifacts without going through each step manually, 
 54 | run the `9_build_projet.py` file in a python3 session. Again a 1 vCPU / 2 GiB instance 
 55 | will be suffient. This script will: 
 56 | * run the bootstrap
 57 | * then create the Hive Table and import the data
 58 | * deploy the model
 59 | * update the application files to use this new model
 60 | * deploy the application
 61 | * run the model drift simulation
 62 | Once the script has completed you will see the new model and application are now available 
 63 | in the project.
 64 | 
 65 | ## Project Build
 66 | If you want go through each of the steps manually to build and understand how the project 
 67 | works, follow the steps below. There is a lot more detail and explanation/comments in each 
 68 | of the files/notebooks so its worth looking into those. Follow the steps below and you 
 69 | will end up with a running application.
 70 | 
 71 | ### 0 Bootstrap
 72 | Just to reiterate that you have run the bootstrap for this project before anything else. 
 73 | So make sure you run step 0 first. 
 74 | 
 75 | Open the file `0_bootstrap.py` in a normal workbench python3 session. You only need a 
 76 | 1 CPU / 2 GB instance. Then **Run > Run All Lines**
 77 | 
 78 | ### 1 Ingest Data
 79 | This script will read in the data csv from the file uploaded to the object store (s3/adls) setup 
 80 | during the bootstrap and create a managed table in Hive. This is all done using Spark.
 81 | 
 82 | Open `1_data_ingest.py` in a Workbench session: python3, 1 CPU, 2 GB. Run the file.
 83 | 
 84 | ### 2 Explore Data
 85 | This is a Jupyter Notebook that does some basic data exploration and visualistaion. It 
 86 | is to show how this would be part of the data science workflow.
 87 | 
 88 | ![data](images/data.png)
 89 | 
 90 | Open a Jupyter Notebook session (rather than a work bench): python3, 1 CPU, 2 GB and 
 91 | open the `2_data_exploration.ipynb` file. 
 92 | 
 93 | At the top of the page click **Cells > Run All**.
 94 | 
 95 | ### 3 Model Building
 96 | This is also a Jupyter Notebook to show the process of selecting and building the model 
 97 | to predict churn. It also shows more details on how the LIME model is created and a bit 
 98 | more on what LIME is actually doing.
 99 | 
100 | Open a Jupyter Notebook session (rather than a work bench): python3, 1 CPU, 2 GB and 
101 | open the `	3_model_building.ipynb` file. 
102 | 
103 | At the top of the page click **Cells > Run All**.
104 | 
105 | ### 4 Model Training
106 | A model pre-trained is saved with the repo has been and placed in the `models` directory. 
107 | If you want to retrain the model, open the `4_train_models.py` file in a workbench  session: 
108 | python3 1 vCPU, 2 GiB and run the file. The newly model will be saved in the models directory 
109 | named `telco_linear`. 
110 | 
111 | There are 2 other ways of running the model training process
112 | 
113 | ***1. Jobs***
114 | 
115 | The **[Jobs](https://docs.cloudera.com/machine-learning/cloud/jobs-pipelines/topics/ml-creating-a-job.html)**
116 | feature allows for adhoc, recurring and depend jobs to run specific scripts. To run this model 
117 | training process as a job, create a new job by going to the Project window and clicking _Jobs >
118 | New Job_ and entering the following settings:
119 | * **Name** : Train Mdoel
120 | * **Script** : 4_train_models.py
121 | * **Arguments** : _Leave blank_
122 | * **Kernel** : Python 3
123 | * **Schedule** : Manual
124 | * **Engine Profile** : 1 vCPU / 2 GiB
125 | The rest can be left as is. Once the job has been created, click **Run** to start a manual 
126 | run for that job.
127 | 
128 | ***2. Experiments***
129 | 
130 | The other option is running an **[Experiment](https://docs.cloudera.com/machine-learning/cloud/experiments/topics/ml-running-an-experiment.html)**. Experiments run immediately and are used for testing different parameters in a model training process. In this instance it would be use for hyperparameter optimisation. To run an experiment, from the Project window click Experiments > Run Experiment with the following settings.
131 | * **Script** : 4_train_models.py
132 | * **Arguments** : 5 lbfgs 100 _(these the cv, solver and max_iter parameters to be passed to 
133 | LogisticRegressionCV() function)
134 | * **Kernel** : Python 3
135 | * **Engine Profile** : 1 vCPU / 2 GiB
136 | 
137 | Click **Start Run** and the expriment will be sheduled to build and run. Once the Run is 
138 | completed you can view the outputs that are tracked with the experiment using the 
139 | `cdsw.track_metrics` function. It's worth reading through the code to get a sense of what 
140 | all is going on.
141 | 
142 | 
143 | ### 5 Serve Model
144 | The **[Models](https://docs.cloudera.com/machine-learning/cloud/models/topics/ml-creating-and-deploying-a-model.html)** 
145 | is used top deploy a machine learning model into production for real-time prediction. To 
146 | deploy the model trailed in the previous step, from  to the Project page, click **Models > New
147 | Model** and create a new model with the following details:
148 | 
149 | * **Name**: Explainer
150 | * **Description**: Explain customer churn prediction
151 | * **File**: 5_model_serve_explainer.py
152 | * **Function**: explain
153 | * **Input**: 
154 | ```
155 | {
156 | 	"StreamingTV": "No",
157 | 	"MonthlyCharges": 70.35,
158 | 	"PhoneService": "No",
159 | 	"PaperlessBilling": "No",
160 | 	"Partner": "No",
161 | 	"OnlineBackup": "No",
162 | 	"gender": "Female",
163 | 	"Contract": "Month-to-month",
164 | 	"TotalCharges": 1397.475,
165 | 	"StreamingMovies": "No",
166 | 	"DeviceProtection": "No",
167 | 	"PaymentMethod": "Bank transfer (automatic)",
168 | 	"tenure": 29,
169 | 	"Dependents": "No",
170 | 	"OnlineSecurity": "No",
171 | 	"MultipleLines": "No",
172 | 	"InternetService": "DSL",
173 | 	"SeniorCitizen": "No",
174 | 	"TechSupport": "No"
175 | }
176 | ```
177 | * **Kernel**: Python 3
178 | * **Engine Profile**: 1vCPU / 2 GiB Memory
179 | 
180 | Leave the rest unchanged. Click **Deploy Model** and the model will go through the build 
181 | process and deploy a REST endpoint. Once the model is deployed, you can test it is working 
182 | from the model Model Overview page.
183 | 
184 | _**Note: This is important**_
185 | 
186 | Once the model is deployed, you must disable the additional model authentication feature. In the model settings page, untick **Enable Authentication**.
187 | 
188 | ![disable_auth](images/disable_auth.png)
189 | 
190 | ### 6 Deploy Application
191 | The next step is to deploy the Flask application. The **[Applications](https://docs.cloudera.com/machine-learning/cloud/applications/topics/ml-applications.html)** feature is still quite new for CML. For this project it is used to deploy a web based application that interacts with the underlying model created in the previous step.
192 | 
193 | _**Note: This next step is important**_
194 | 
195 | _In the deployed model from step 5, go to **Model > Settings** and make a note (i.e. copy) the 
196 | "Access Key". It will look something like this (ie. mukd9sit7tacnfq2phhn3whc4unq1f38)_
197 | 
198 | _From the Project level click on "Open Workbench" (note you don't actually have to Launch a 
199 | session) in order to edit a file. Select the flask/single_view.html file and paste the Access 
200 | Key in at line 19._
201 | 
202 | `        const accessKey = "mp3ebluylxh4yn5h9xurh1r0430y76ca";`
203 | 
204 | _Save the file (if it has not auto saved already) and go back to the Project._
205 | 
206 | From the Go to the **Applications** section and select "New Application" with the following:
207 | * **Name**: Churn Analysis App
208 | * **Subdomain**: churn-app _(note: this needs to be unique, so if you've done this before, 
209 | pick a more random subdomain name)_
210 | * **Script**: 6_application.py
211 | * **Kernel**: Python 3
212 | * **Engine Profile**: 1vCPU / 2 GiB Memory
213 | 
214 | 
215 | After the Application deploys, click on the blue-arrow next to the name. The initial view is a 
216 | table of randomly selected from the dataset. This shows a global view of which features are 
217 | most important for the predictor model. The reds show incresed importance for preditcting a 
218 | cusomter that will churn and the blues for for customers that will not.
219 | 
220 | ![table_view](images/table_view.png)
221 | 
222 | Clicking on any single row will show a "local" interpreted model for that particular data point 
223 | instance. Here you can see how adjusting any one of the features will change the instance's 
224 | churn prediction.
225 | 
226 | 
227 | ![single_view_1](images/single_view_1.png)
228 | 
229 | Changing the InternetService to DSL lowers the probablity of churn. *Note: this does not mean 
230 | that changing the Internet Service to DSL cause the probability to go down, this is just what 
231 | the model would predict for a customer with those data points*
232 | 
233 | 
234 | ![single_view_2](images/single_view_2.png)
235 | 
236 | ### 7 Model Operations
237 | The final step is the model operations which consists of [Model Metrics](https://docs.cloudera.com/machine-learning/cloud/model-metrics/topics/ml-enabling-model-metrics.html)
238 | and [Model Governance](https://docs.cloudera.com/machine-learning/cloud/model-governance/topics/ml-enabling-model-governance.html)
239 | 
240 | **Model Governance** is setup in the `0_bootstrap.py` script, which writes out the lineage.yml file at
241 | the start of the project. For the **Model Metrics** open a workbench session (1 vCPU / 2 GiB) and open the
242 | `7a_ml_ops_simulation.py` file. You need to set the `model_id` number from the model created in step 5 on line
243 | 113. The model number is on the model's main page.
244 | 
245 | ![model_id](images/model_id.png)
246 | 
247 | `model_id = "95"`
248 | 
249 | From there, run the file. This goes through a process of simulating an model that drifts over 
250 | over 1000 calls to the model. The file contains comments with details of how this is done.
251 | 
252 | In the next step you can interact and display the model metrics. Open a workbench 
253 | session (1 vCPU / 2 GiB) and open and run the `7b_ml_ops_visual.py` file. Again you 
254 | need to set the `model_id` number from the model created in step 5 on line 53. 
255 | The model number is on the model's main page.
256 | 
257 | ![model_accuracy](images/model_accuracy.png)
258 | 
259 | 


--------------------------------------------------------------------------------
/cdsw-build.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | pip3 install -r requirements.txt


--------------------------------------------------------------------------------
/churnexplainer.py:
--------------------------------------------------------------------------------
  1 | import datetime, dill, os
  2 | import pandas as pd
  3 | 
  4 | from sklearn.pipeline import TransformerMixin
  5 | from sklearn.preprocessing import LabelEncoder
  6 | 
  7 | 
  8 | """
  9 | Explained model is a class that has attributes:
 10 | 
 11 |  - data, i.e. the features you get for a given dataset from load_dataset. This
 12 |    is a pandas dataframe that may include categorical variables.
 13 |  - labels, i.e. the boolean labels you get for a given dataset from
 14 |    load_dataset.
 15 |  - categoricalencoder, a fitted sklearn Transformer object that transforms
 16 |    the categorical columns in `data` to deterministic integer codes, yielding a
 17 |    plain numpy array often called `X` (leaves non-categorical columns
 18 |    untouched)
 19 |  - pipeline, a trained sklearn pipeline that takes `X` as input and predicts.
 20 |  - explainer, an instantiated LIME explainer that yields an explanation when
 21 |    it's explain instance method is run on an example `X`
 22 | 
 23 | properties:
 24 |  - default_data
 25 |  - categorical_features
 26 |  - non_categorical_features
 27 |  - dtypes
 28 | 
 29 | and methods for API (which works in terms of dictionaries):
 30 |  - cast_dct, converts values of dictionary to dtype corresponding to key
 31 |  - explain_dct, returns prediction and explanation for example dictionary
 32 | 
 33 | and methods for users (who usually have dataframes):
 34 |  - predict_df, returns predictions for a df, i.e. runs it through categorical
 35 |    encoder and pipeline
 36 |  - explain_df, returns predictions and explanation for example dataframe
 37 | """
 38 | 
 39 | class ExplainedModel():
 40 | 
 41 |     def __init__(self, model_name=None, labels=None, data=None, #dataset=None, data=None, labels=None,
 42 |                  categoricalencoder=None, pipeline=None, explainer=None, data_dir=None,
 43 |                  load=True):
 44 |         if model_name is not None:
 45 |           self.model_name = model_name
 46 |           self.is_loaded = False
 47 |         else:
 48 |           self.data = data
 49 |           self.labels = labels
 50 |           self.categoricalencoder = categoricalencoder
 51 |           self.pipeline = pipeline
 52 |           self.explainer = explainer
 53 |           self.is_loaded = True
 54 |         self.model_dir = os.path.join(data_dir, 'models', self.model_name)
 55 |         self.model_path = os.path.join(self.model_dir,
 56 |                                        self.model_name + '.pkl')
 57 |         # if asked to load and not yet loaded, load model!
 58 |         if load and not self.is_loaded:
 59 |             self.load()
 60 | 
 61 |     def load(self):
 62 |         if not self.is_loaded:
 63 |             with open(self.model_path, 'rb') as f:
 64 |                 self.__dict__.update(dill.load(f))
 65 |             self.is_loaded = True
 66 | 
 67 |     def save(self):
 68 |         dilldict = {
 69 |             'data': self.data,
 70 |             'labels': self.labels,
 71 |             'categoricalencoder': self.categoricalencoder,
 72 |             'pipeline': self.pipeline,
 73 |             'explainer': self.explainer
 74 |         }
 75 |         #self._make_model_dir()
 76 |         with open(self.model_path, 'wb') as f:
 77 |             dill.dump(dilldict, f)
 78 | 
 79 | #    def _make_model_name(self):
 80 | #        now = datetime.datetime.now().strftime("%Y%m%dT%H%M%S")
 81 | #        model_type = os.environ.get('CHURN_MODEL_TYPE', 'linear')
 82 | #        #model_name = '_'.join([now, self.dataset, model_type, get_git_hash()])
 83 | #        model_name = '_'.join([now, self.dataset, model_type])
 84 | #        return model_name
 85 | #
 86 | #    def _make_model_dir(self):
 87 | #        if not os.path.exists(self.model_dir):
 88 | #            os.makedirs(self.model_dir)
 89 | 
 90 |     def predict_df(self, df):
 91 |         X = self.categoricalencoder.transform(df)
 92 |         return self.pipeline.predict_proba(X)[:, 1]
 93 | 
 94 |     def explain_df(self, df):
 95 |         X = self.categoricalencoder.transform(df)
 96 |         probability = self.pipeline.predict_proba(X)[0, 1]
 97 |         e = self.explainer.explain_instance(
 98 |             X[0], self.pipeline.predict_proba
 99 |         ).as_map()[1]
100 |         explanations = {self.explainer.feature_names[c]: weight
101 |                         for c, weight in e}
102 |         return probability, explanations
103 | 
104 |     def explain_dct(self, dct):
105 |         return self.explain_df(pd.DataFrame([dct]))
106 | 
107 |     def cast_dct(self, dct):
108 |         return {k: self.dtypes[k].type(v) for k, v in dct.items()}
109 | 
110 |     @property
111 |     def dtypes(self):
112 |         if not hasattr(self, '_dtypes'):
113 |             d = self.data[self.non_categorical_features].dtypes.to_dict()
114 |             d.update({c: self.data[c].cat.categories.dtype
115 |                       for c in self.categorical_features})
116 |             self._dtypes = d
117 |         return self._dtypes
118 | 
119 |     @property
120 |     def non_categorical_features(self):
121 |         return list(self.data.select_dtypes(exclude=['category']).columns
122 |                     .drop(self.labels.name + ' probability'))
123 | 
124 |     @property
125 |     def categorical_features(self):
126 |         return list(self.data.select_dtypes(include=['category']).columns)
127 | 
128 |     @property
129 |     def stats(self):
130 |         def describe(s):
131 |             return {'median': s.median(),
132 |                     'mean': s.mean(),
133 |                     'min': s.min(),
134 |                     'max': s.max(),
135 |                     'std': s.std()}
136 |         if not hasattr(self, '_stats'):
137 |             self._stats = {c: describe(self.data[c])
138 |                            for c in self.non_categorical_features}
139 |         return self._stats
140 | 
141 |     @property
142 |     def label_name(self):
143 |         return self.labels.name + ' probability'
144 | 
145 |     @property
146 |     def categories(self):
147 |         return {feature: list(self.categoricalencoder.classes_[feature])
148 |                 for feature in self.categorical_features}
149 | 
150 |     @property
151 |     def default_data(self):
152 |         # 0th class for categorical variables and mean for continuous
153 |         if not hasattr(self, '_default_data'):
154 |             d = {}
155 |             d.update({feature: self.categoricalencoder.classes_[feature][0]
156 |                       for feature in self.categorical_features})
157 |             d.update({feature: self.data[feature].median()
158 |                       for feature in self.non_categorical_features})
159 |             self._default_data = d
160 |         return self._default_data
161 | 
162 | class CategoricalEncoder(TransformerMixin):
163 | 
164 |     def fit(self, X, y=None, *args, **kwargs):
165 |         self.columns_ = X.columns
166 |         self.cat_columns_ix_ = {c: i for i, c in enumerate(X.columns)
167 |                                 if pd.api.types.is_categorical_dtype(X[c])}
168 |         self.cat_columns_ = pd.Index(self.cat_columns_ix_.keys())
169 |         self.non_cat_columns_ = X.columns.drop(self.cat_columns_)
170 |         self.les_ = {c: LabelEncoder().fit(X[c])
171 |                      for c in self.cat_columns_}
172 |         self.classes_ = {c: list(self.les_[c].classes_)
173 |                          for c in self.cat_columns_}
174 |         return self
175 | 
176 |     def transform(self, X, y=None, *args, **kwargs):
177 |         data = X[self.columns_].values
178 |         for c, i in self.cat_columns_ix_.items():
179 |             data[:, i] = self.les_[c].transform(data[:, i])
180 |         return data.astype(float)
181 | 
182 |     def __repr__(self):
183 |         return('{}()'.format(self.__class__.__name__))


--------------------------------------------------------------------------------
/flask/ajax-loader.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastforwardlabs/cml_churn_demo_mlops/0a189a7b250f682d8db14205878510591bcad529/flask/ajax-loader.gif


--------------------------------------------------------------------------------
/flask/churn_vis.css:
--------------------------------------------------------------------------------
  1 | @import url('https://fonts.googleapis.com/css?family=Open+Sans');
  2 | 
  3 | 
  4 | body {
  5 | 
  6 |     margin: 0 auto;
  7 |     font-family: 'Open Sans', sans-serif;
  8 |     font-size: 12px;
  9 | }
 10 | 
 11 | table {
 12 |     border: 0px solid black;
 13 | /*    border-collapse: collapse;*/
 14 | }
 15 | 
 16 | tr {
 17 |   cursor: pointer;
 18 | }
 19 | 
 20 | th, td {
 21 |     padding: 4px;
 22 |     
 23 | }
 24 | .header {
 25 |     font-family: 'Open Sans', sans-serif;
 26 |     font-weight: 300;
 27 |     font-size: 35px;
 28 |     text-align: center;
 29 |     padding-top: 20px;
 30 |     vertical-align: top;
 31 |     line-height: 55px;
 32 | }
 33 | #loader {
 34 |     padding-left: 330px;
 35 |     padding-top: 100px; 
 36 | }
 37 | 
 38 | .churn_div {
 39 |     font-size: 15px;
 40 |     padding-bottom: 0;
 41 | }
 42 | 
 43 | .explanation {
 44 |     width: 680px;
 45 |     margin: 0 auto;
 46 |     font-family: "Open Sans", sans-serif;
 47 |     font-size: 10pt;
 48 |     font-weight: 300;
 49 |     padding-bottom: 20px;
 50 |     padding-top:10px;    
 51 | }
 52 | 
 53 | h1 {
 54 |     width: 500px;
 55 |     padding-top:8px;
 56 |     padding-left: 20px;
 57 |     float: left;
 58 |     font-family: "Open Sans", sans-serif;
 59 |     font-size: 15pt;
 60 |     font-weight: 300;
 61 | 
 62 | }
 63 | 
 64 | input {
 65 |     width: 70px;
 66 | }
 67 | 
 68 | .submit_div {
 69 |   float:right;
 70 |   padding: 0 10px 0 10px;
 71 | }
 72 | 
 73 | .input_div {
 74 |   float:left;
 75 |   padding: 5px 10px 0 10px;
 76 | }
 77 | 
 78 | .inner_div {
 79 |   float:left;
 80 |   padding: 5px 5px 5px 5px;
 81 |   margin: 0 2px 0 2px;
 82 | }
 83 | 
 84 | 
 85 | div {
 86 | /*  float:left;*/
 87 |   padding:10px 10px 0 10px;
 88 | }
 89 | 
 90 | .main_div {
 91 |   clear:both;
 92 | }
 93 | 
 94 | #pred_value {
 95 |   float:left;
 96 | }
 97 | 
 98 | #loader {
 99 |     /*background-color: #fff;*/
100 |     /*opacity: 0.9;*/
101 |     position: absolute;
102 |     padding: 100px 10px 10px 300px;
103 |     width: 400px;
104 |     height: 500px;
105 | }
106 | 
107 | 
108 | /* I got the button CSS from http://www.lab.tommasoraspo.com/simple-web-buttoms/ */
109 | 
110 | .button {
111 |     float: left;
112 |     cursor: pointer;
113 |     margin: 0 5px;
114 |     text-align: center;
115 |     /*display: inline-block;*/
116 |     text-decoration: none;
117 |     font: bold 12px/12px HelveticaNeue, Arial;
118 |     padding: 8px 11px;
119 |     color: #555;
120 |     border: 1px solid #dedede;
121 |     -webkit-border-radius: 3px;
122 |     -moz-border-radius: 3px;
123 |     border-radius: 3px;
124 | }
125 | .button.white {
126 |     background: #f5f5f5;
127 |     filter: progid: DXImageTransform.Microsoft.gradient(startColorstr='#f9f9f9', endColorstr='#f0f0f0');
128 |     /*  IE */
129 |     background: -webkit-gradient(linear, left top, left bottom, from(#f9f9f9), to(#f0f0f0));
130 |     /*  WebKit */
131 |     background: -moz-linear-gradient(top, #f9f9f9, #f0f0f0);
132 |     border-color: #dedede #d8d8d8 #d3d3d3;
133 |     color: #555;
134 |     text-shadow: 0 1px 0 #fff;
135 |     -webkit-box-shadow: 0 1px 1px #eaeaea, inset 0 1px 0 #fbfbfb;
136 |     -moz-box-shadow: 0 1px 1px #eaeaea, inset 0 1px 0 #fbfbfb;
137 |     box-shadow: 0 1px 1px #eaeaea, inset 0 1px 0 #fbfbfb;
138 | }
139 | .button.white:hover {
140 |     background: #f4f4f4;
141 |     filter: progid: DXImageTransform.Microsoft.gradient(startColorstr='#efefef', endColorstr='#f8f8f8');
142 |     /*  IE */
143 |     background: -webkit-gradient(linear, left top, left bottom, from(#efefef), to(#f8f8f8));
144 |     /*  WebKit */
145 |     background: -moz-linear-gradient(top, #efefef, #f8f8f8);
146 |     border-color: #c7c7c7 #c3c3c3 #bebebe;
147 |     text-shadow: 0 1px 0 #fdfdfd;
148 |     -webkit-box-shadow: 0 1px 1px #ebebeb, inset 0 1px 0 #f3f3f3;
149 |     -moz-box-shadow: 0 1px 1px #ebebeb, inset 0 1px 0 #f3f3f3;
150 |     box-shadow: 0 1px 1px #ebebeb, inset 0 1px 0 #f3f3f3;
151 | 


--------------------------------------------------------------------------------
/flask/churn_vis.js:
--------------------------------------------------------------------------------
 1 | //This is the javascript code that builds and updates the bar graph
 2 | 
 3 | window.updater = function(data) {
 4 |   //d3.select("#svg_container").text(data);
 5 |   my_data = data;
 6 |   console.log(data);
 7 | 
 8 | //    var svg_margin = { top: 20, right: 20, bottom: 20, left: 40 };
 9 | //    var svg_width = d3.select("body").node().getBoundingClientRect().width - svg_margin.left - svg_margin.right;
10 | //    var svg_height = 300 - svg_margin.top - svg_margin.bottom;
11 | //
12 | //    var y = d3.scaleLinear()
13 | //        .domain([0, d3.max(data, function(d) { return d.petal_length; })])
14 | //        .range([svg_height, 0]);
15 | //
16 | //    var x = d3.scaleBand()
17 | //        .domain(d3.range(data.length))
18 | //        .range([0, svg_width])
19 | //        .padding(0.1);
20 | //
21 | //    var species_list = d3.map(data, function (d) { return d.species;}).keys();
22 | //
23 | //    if (d3.select("#svg_container").select("svg").empty()) {
24 | //
25 | //
26 | //        svg = d3.select("#svg_container").append("svg")
27 | //          .attr("width", svg_width + svg_margin.left + svg_margin.right)
28 | //          .attr("height", svg_height + svg_margin.top + svg_margin.bottom)
29 | //          .append("g")
30 | //          .attr("transform",
31 | //              "translate(" + svg_margin.left + "," + svg_margin.top + ")");
32 | //
33 | //        svg.append("g")
34 | //            .attr("transform", "translate(0," + svg_height + ")")
35 | //            .attr("class", "x axis")
36 | //            .call(d3.axisBottom(x));
37 | //
38 | //        // add the y Axis
39 | //        svg.append("g")
40 | //            .attr("class", "y axis")
41 | //            .call(d3.axisLeft(y));
42 | //    } else {
43 | //        svg.attr("width", svg_width + svg_margin.left + svg_margin.right)
44 | //        svg.selectAll("g.y.axis")
45 | //            .call(d3.axisLeft(y));
46 | //
47 | //        svg.selectAll("g.x.axis")
48 | //            .call(d3.axisBottom(x));
49 | //    }
50 | //
51 | //    // DATA JOIN
52 | //    // Join new data with old elements, if any.
53 | //
54 | //    var bars = svg.selectAll(".bar")
55 | //        .data(data);
56 | //
57 | //    // UPDATE
58 | //    // Update old elements as needed.
59 | //
60 | //    bars
61 | //        .attr("style",function(d) { return "fill:" + d3.schemeCategory10[species_list.indexOf(d.species)];})
62 | //        .attr("x", function(d, i) { return x(i); })
63 | //        .attr("width", x.bandwidth())
64 | //        .transition()
65 | //        .duration(100)
66 | //        .attr("y", function(d) { return y(d.petal_length); })
67 | //        .attr("height", function(d) { return svg_height - y(d.petal_length); });
68 | //
69 | //    // ENTER + UPDATE
70 | //    // After merging the entered elements with the update selection,
71 | //    // apply operations to both.
72 | //
73 | //    bars.enter().append("rect")
74 | //        .attr("class", "bar")
75 | //        .attr("style",function(d) { return "fill:" + d3.schemeCategory10[species_list.indexOf(d.species)];})
76 | //        .attr("x", function(d, i) { return x(i); })
77 | //        .attr("width", x.bandwidth())
78 | //        .attr("y", function(d) { return y(d.petal_length); })
79 | //        .attr("height", function(d) { return svg_height - y(d.petal_length); })
80 | //        .merge(bars);
81 | //
82 | //    // EXIT
83 | //    // Remove old elements as needed.
84 | //
85 | //    bars.exit().remove();
86 | 
87 | };


--------------------------------------------------------------------------------
/flask/env_vars.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastforwardlabs/cml_churn_demo_mlops/0a189a7b250f682d8db14205878510591bcad529/flask/env_vars.png


--------------------------------------------------------------------------------
/flask/single_view.html:
--------------------------------------------------------------------------------
  1 | <!DOCTYPE html>
  2 | 
  3 | <head>
  4 |     <meta charset="utf-8">
  5 |     <script src="https://d3js.org/d3.v5.min.js"></script>
  6 |     <script src='https://cdnjs.cloudflare.com/ajax/libs/lodash.js/4.17.11/lodash.min.js'></script>
  7 |     <link rel="stylesheet" type="text/css" href="churn_vis.css">
  8 | </head>
  9 | 
 10 | <body>
 11 |     <h1>Single Prediction View</h1>
 12 |     <div style='clear:both;' class="churn_div">
 13 |         <div style='float:left;padding-left:20px'>Churn Probability</div>
 14 |         <div id='pred_value'></div>
 15 | 
 16 |     </div>
 17 |     <div id='features' style='clear:both;'></div>
 18 |     <script>
 19 |         const accessKey = "m6ngb689nsak0dsrr3oqhpjqcht33agm";
 20 |         in_url = new URL(window.location.href)
 21 |         out_url = new URL(window.location.origin + window.location.pathname)
 22 | 
 23 |         params = {}
 24 |         for (let p of in_url.searchParams.entries()) {
 25 |             params[p[0]] = p[1]
 26 |         }
 27 |         var features_numeric = d3.json('/stats').then(json => {
 28 |             return json;
 29 | 
 30 |         });
 31 |         var features_categorical = d3.json('/categories').then(json => {
 32 |             return json
 33 |         });
 34 |         
 35 |         const prob_color = d3.scaleQuantize()
 36 |                     .domain([-0.3,0.3])
 37 |                     .range([
 38 |                         '#4393c3', '#92c5de', '#d1e5f0', '#f7f7f7', '#fddbc7', '#f4a582', '#d6604d'
 39 |                     ]);
 40 |         const color = d3.scaleQuantize()
 41 |                     .domain([0,1])
 42 |                     .range([
 43 |                         '#4393c3', '#92c5de', '#d1e5f0', '#f7f7f7', '#fddbc7', '#f4a582', '#d6604d'
 44 |                     ]);
 45 |         var dataset = d3.json(
 46 |                 //window.location.origin.substr(0,window.location.origin.indexOf(":")+1) + "//" + window.location.origin.substr(window.location.origin.indexOf(".")+1) + '/api/altus-ds-1/models/call-model', {
 47 |                 window.location.origin.substr(0,window.location.origin.indexOf(":")+1) + "//" + "modelservice." + window.location.origin.substr(window.location.origin.indexOf(".")+1) + '/model', {
 48 |                 method: 'POST',
 49 |                 body: '{"accessKey":"' + accessKey + '","request":' + JSON.stringify(params) +
 50 |                     '}',
 51 |                 headers: {
 52 |                     'Content-type': 'application/json'
 53 |                 }
 54 |             })
 55 |             .then(json => {
 56 |                 return json
 57 |             });
 58 | 
 59 |         Promise.all([features_numeric, features_categorical, dataset]).then(values => {
 60 |             console.log(values[2])
 61 |             _.each(values[2].response.prediction.data, function (value, key) {
 62 |                 out_url.searchParams.set(key, value)
 63 |             })
 64 |             //metadata = values;
 65 |             var features_all = _.merge(values[0], values[1]);
 66 |             var merged_data = _.map(d3.entries(values[2].response.prediction.data), function (e) {
 67 |                 return _.concat([{
 68 |                     key: e.key
 69 |                 }, {
 70 |                     value: e.value
 71 |                 }, {
 72 |                     explanation: values[2].response.prediction.explanation[e.key]
 73 |                 }], d3.entries(features_all[e.key]))
 74 |             });
 75 |             merged_data = _.map(merged_data, function (d) {
 76 |                 return _.filter(d, function (e) {
 77 |                     return ((e.key === "median") || (e.key === "std")) ? 0 : 1
 78 |                 })
 79 |             })
 80 | 
 81 |             var prediction_value = d3.select("#pred_value")
 82 |                 .text(values[2].response.prediction.probability.toFixed(3))
 83 |                 .attr("style","background:" + color(values[2].response.prediction.probability) +";padding:10px;");
 84 | 
 85 |             var main_divs = d3.select("#features").selectAll(".main_div")
 86 |                 .data(merged_data)
 87 |                 .enter()
 88 |                 .append("div")
 89 |                 .attr("id", function (d) {
 90 |                     return d[0].key
 91 |                 })
 92 |                 .attr("class", "main_div");
 93 | 
 94 | 
 95 |             var little_divs = main_divs.selectAll(".inner_div")
 96 |                 .data(function (h) {
 97 |                     return h
 98 |                 })
 99 |                 .enter()
100 |                 .append("div")
101 |                 .attr("class", "inner_div")
102 |                 .text(function (f, i) {
103 | 
104 |                     if (i === 0) {
105 |                         return f.key
106 |                     } else if (i === 1) {
107 |                         return f.value
108 |                     } else if (i === 2) {
109 |                         if (f.explanation !== undefined) {
110 |                             return f.explanation.toFixed(2);
111 |                         } else {
112 |                             return 0
113 |                         }
114 |                     } else {
115 | 
116 |                         if (isNaN(parseInt(f.key))) {
117 |                             return f.key + " " + f.value.toFixed(2);
118 |                         } else {
119 |                             return f.value;
120 |                         }
121 |                     }
122 | 
123 | 
124 |                 })
125 |                 .on("click", function (f, i) {
126 | 
127 |                     if (i >= 3) {
128 |                         if (!isNaN(parseInt(f.key))) {
129 |                             local_url = new URL(out_url)
130 |                             local_url.searchParams.set(this.parentNode.id, f.value)
131 |                             return window.location = local_url.href;
132 |                         }
133 |                     }
134 |                 })
135 |                 .attr("style", function (f, i) {
136 |                     if (i === 0) {
137 |                         return "width:100px"
138 |                     } else if (i === 1) {
139 |                         return "width:100px;border: 1px solid #fff; background:#eee;";
140 |                     } else if (i === 2) {
141 |                       if (f.explanation !== undefined) {
142 |                         return "width:30px;background:" + prob_color(f.explanation) + ";";
143 |                       } else {
144 |                         return "width:30px;";
145 |                       }
146 |                       
147 |                     } else {
148 |                         if (!isNaN(parseInt(f.key))) {
149 |                             return "cursor: pointer; border: 1px solid #ccc; background:#ddd;"
150 |                         }
151 |                     }
152 | 
153 | 
154 |                 })
155 | 
156 |             main_divs.insert("div").html(function (d) {
157 |                     if (d[3].key === "0") {
158 |                         return ""
159 |                     } else {
160 |                         return "<input type='text'>"
161 |                     }
162 | 
163 |                 }).attr("class", "input_div")
164 |                 .append("div")
165 |                 .attr("class", "submit_div")
166 |                 .html(function (d) {
167 |                     if (d[3].key === "0") {
168 |                         return ""
169 |                     } else {
170 |                         return "<input type='submit' value='Submit'>"
171 |                     }
172 |                 })
173 |                 .on("click", function (d) {
174 |                         local_url = new URL(out_url)
175 |                         if (!isNaN(parseInt(d3.select(this.parentNode).select("input").property("value")))) {
176 |                             local_url.searchParams.set(this.parentNode.parentNode.id, d3.select(this.parentNode)
177 |                                 .select("input").property("value"))
178 |                             return window.location = local_url.href;
179 |                         }
180 |                     }
181 | 
182 |                 )
183 |         }).catch(err => alert("Unable to connect to the model. Please check the model is running and that you have updated the model access key."));
184 |     </script>
185 | </body>
186 | 


--------------------------------------------------------------------------------
/flask/table_view.html:
--------------------------------------------------------------------------------
  1 | <!DOCTYPE html>
  2 | 
  3 | <head>
  4 |     <meta charset="utf-8">
  5 |     <script src="https://d3js.org/d3.v5.min.js"></script>
  6 |     <script src='https://cdnjs.cloudflare.com/ajax/libs/lodash.js/4.17.11/lodash.min.js'></script>
  7 |     <link rel="stylesheet" type="text/css" href="churn_vis.css">
  8 | </head>
  9 | 
 10 | <body>
 11 |     <h1>Refractor</h1>
 12 |     <div id="loader" style="clear: both;">
 13 |         Loading Sample Data...
 14 |         <br>
 15 |         <img src="ajax-loader.gif">
 16 |     </div>
 17 |     <script>
 18 |         d3.json('/sample_table', {
 19 |                 headers: {
 20 |                     'Content-type': 'application/json'
 21 |                 }
 22 |             })
 23 |             .then(json => {
 24 |                 metadata = json
 25 |                 d3.select("#loader").attr("style", "display:none;")
 26 |                 json = json.sort(function (a, b) {
 27 |                     return b.probability - a.probability
 28 |                 })
 29 | 
 30 |                 const color = d3.scaleQuantize()
 31 |                     .domain([d3.min(_.map(_.map(json, d => {
 32 |                         return d3.values(d.explanation)
 33 |                     }), e => {
 34 |                         return d3.min(e)
 35 |                     })), d3.max(_.map(_.map(json, d => {
 36 |                         return d3.values(d.explanation)
 37 |                     }), e => {
 38 |                         return d3.max(e)
 39 |                     }))])
 40 |                     .range([
 41 |                         '#4393c3', '#92c5de', '#d1e5f0', '#f7f7f7', '#fddbc7', '#f4a582', '#d6604d'
 42 |                     ]);
 43 | 
 44 |                 const prob_color = d3.scaleQuantize()
 45 |                     .domain(d3.extent(_.map(json, function (d) {
 46 |                         return d.probability
 47 |                     })))
 48 |                     .range([
 49 |                         '#4393c3', '#92c5de', '#d1e5f0', '#f7f7f7', '#fddbc7', '#f4a582', '#d6604d'
 50 |                     ]);
 51 | 
 52 |                 var body = d3.select("body");
 53 | 
 54 |                 var table = body.append("table");
 55 | 
 56 |                 var thead = table.append("thead");
 57 |                 var tbody = table.append("tbody");
 58 | 
 59 |                 var th = thead.append("tr")
 60 |                     .selectAll("th")
 61 |                     .data(_.concat(['id'], _.concat(["Probability"], d3.keys(json[0].data))))
 62 |                     .enter()
 63 |                     .append("th")
 64 |                     .text(function (d) {
 65 |                         return d;
 66 |                     });
 67 | 
 68 | 
 69 |                 var tr = tbody.selectAll("tr")
 70 |                     .data(json)
 71 |                     .enter()
 72 |                     .append("tr")
 73 |                     .on("click", function (d) {
 74 |                         local_url = new URL(window.location.origin + "/flask/single_view.html")
 75 |                         _.each(d.data, function (values, keys) {
 76 |                             local_url.searchParams.set(keys, values)
 77 |                         })
 78 |                         return window.location = local_url.href;
 79 |                     });
 80 | 
 81 |                 var td = tr.selectAll("td")
 82 |                     .data(function (d, i) {
 83 |                         return _.concat({
 84 |                             key: "id",
 85 |                             values: {
 86 |                                 value: d.id
 87 |                             }
 88 |                         }, _.concat({
 89 |                                 key: "probability",
 90 |                                 values: {
 91 |                                     value: d.probability
 92 |                                 }
 93 |                             },
 94 |                             _.map(d3.entries(d.data), function (e) {
 95 |                                 return {
 96 |                                     key: e.key,
 97 |                                     values: {
 98 |                                         value: e.value,
 99 |                                         prediction: d.explanation[e.key]
100 |                                     }
101 |                                 }
102 |                             })))
103 |                     })
104 |                     .enter()
105 |                     .append("td")
106 |                     .text(
107 |                         function (e, i) {
108 |                             return String(e.values.value).substring(0, 5);
109 |                         })
110 |                     .attr("style", function (e) {
111 | 
112 |                         if (e.values.prediction !== undefined) {
113 |                             return "background:" + color(e.values.prediction);
114 |                         }
115 |                         if (e.key === "probability") {
116 |                             return "background:" + prob_color(e.values.value);
117 |                         }
118 |                     })
119 | 
120 |             });
121 |     </script>
122 | 
123 | </body>


--------------------------------------------------------------------------------
/images/data.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastforwardlabs/cml_churn_demo_mlops/0a189a7b250f682d8db14205878510591bcad529/images/data.png


--------------------------------------------------------------------------------
/images/disable_auth.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastforwardlabs/cml_churn_demo_mlops/0a189a7b250f682d8db14205878510591bcad529/images/disable_auth.png


--------------------------------------------------------------------------------
/images/model_accuracy.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastforwardlabs/cml_churn_demo_mlops/0a189a7b250f682d8db14205878510591bcad529/images/model_accuracy.png


--------------------------------------------------------------------------------
/images/model_id.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastforwardlabs/cml_churn_demo_mlops/0a189a7b250f682d8db14205878510591bcad529/images/model_id.png


--------------------------------------------------------------------------------
/images/single_view_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastforwardlabs/cml_churn_demo_mlops/0a189a7b250f682d8db14205878510591bcad529/images/single_view_1.png


--------------------------------------------------------------------------------
/images/single_view_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastforwardlabs/cml_churn_demo_mlops/0a189a7b250f682d8db14205878510591bcad529/images/single_view_2.png


--------------------------------------------------------------------------------
/images/table_view.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastforwardlabs/cml_churn_demo_mlops/0a189a7b250f682d8db14205878510591bcad529/images/table_view.png


--------------------------------------------------------------------------------
/lineage.yml:
--------------------------------------------------------------------------------
1 | "Model Explainer 29072021101927":
2 |   hive_table_qualified_names:                # this is a predefined key to link to training data
3 |     - "default.telco_churn@cm"               # the qualifiedName of the hive_table object representing                
4 |   metadata:                                  # this is a predefined key for additional metadata
5 |     query: "select * from historical_data"   # suggested use case: query used to extract training data
6 |     training_file: "4_train_models.py"       # suggested use case: training file used
7 | 


--------------------------------------------------------------------------------
/models/telco_linear/telco_linear.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastforwardlabs/cml_churn_demo_mlops/0a189a7b250f682d8db14205878510591bcad529/models/telco_linear/telco_linear.pkl


--------------------------------------------------------------------------------
/raw/telco-data/_SUCCESS:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastforwardlabs/cml_churn_demo_mlops/0a189a7b250f682d8db14205878510591bcad529/raw/telco-data/_SUCCESS


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | git+https://github.com/fastforwardlabs/cmlbootstrap#egg=cmlbootstrap
 2 | seaborn==0.9.0
 3 | dill==0.3.1.1 
 4 | lime==0.1.1.36 
 5 | scikit-learn==0.21.3 
 6 | xlrd==1.2.0
 7 | pandas==0.25.1
 8 | numpy==1.17.2
 9 | flask==1.1.2
10 | 


--------------------------------------------------------------------------------