├── CONTRIBUTING.md ├── LICENSE ├── MAINTAINERS.md ├── README.md ├── data └── Telco-Customer-Churn.csv ├── doc └── source │ └── images │ ├── ActionsDeployModel.png │ ├── ChooseAnalyticsDeployments.png │ ├── JupyterStopKernel.png │ ├── ModelDeploymentEndpoint.png │ ├── OnlineDeploymentCreate.png │ ├── StatusDeployed.png │ ├── TestingDeployedModel.png │ ├── addNewDeploymentSpace.png │ ├── api-reference-curl.png │ ├── architecture.png │ ├── cleanup-models-and-deployments.png │ ├── cpd-add-data-set.png │ ├── cpd-create-empty-project.png │ ├── cpd-new-project-name.png │ ├── cpd-new-project.png │ ├── cpd-projects-menu.png │ ├── createEmptyDeploymentSpace.png │ ├── createNewDeploymentSpace.png │ ├── deployment-select-model.png │ ├── deployment-space.png │ ├── input.png │ ├── score.png │ ├── wml-1-add-asset.png │ ├── wml-2-add-name-and-url.png │ ├── wml-3-add-local-dataframe.png │ ├── wml-3-notebook-loaded.png │ ├── wml-3.2-install-packages.png │ ├── wml-4-add-dataframe.png │ ├── wml-5-generated-code-dataframe.png │ ├── wml-6-build-pipeline-and-model.png │ ├── wml-7-update-wml-credentials.png │ └── wml-provide-model-and-space-name.png ├── examples └── Telco-customer-churn-ICP4D-with-output.ipynb ├── flaskapp ├── Procfile ├── env.sample ├── manifest.yml ├── requirements.txt ├── runtime.txt ├── telcochurn.py └── templates │ ├── input.html │ ├── layout.html │ └── score.html └── notebooks └── Telco-customer-churn-ICP4D.ipynb /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing 2 | 3 | This is an open source project, and we appreciate your help! 4 | 5 | We use the GitHub issue tracker to discuss new features and non-trivial bugs. 6 | 7 | To contribute code, documentation, or tests, please submit a pull request to 8 | the GitHub repository. Generally, we expect two maintainers to review your pull 9 | request before it is approved for merging. For more details, see the 10 | [MAINTAINERS](MAINTAINERS.md) page. 11 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /MAINTAINERS.md: -------------------------------------------------------------------------------- 1 | # Maintainers Guide 2 | 3 | This guide is intended for maintainers - anybody with commit access to one or 4 | more Code Pattern repositories. 5 | 6 | ## Methodology 7 | 8 | This repository does not have a traditional release management cycle, but 9 | should instead be maintained as a useful, working, and polished reference at 10 | all times. While all work can therefore be focused on the master branch, the 11 | quality of this branch should never be compromised. 12 | 13 | The remainder of this document details how to merge pull requests to the 14 | repositories. 15 | 16 | ## Merge approval 17 | 18 | The project maintainers use LGTM (Looks Good To Me) in comments on the pull 19 | request to indicate acceptance prior to merging. A change requires LGTMs from 20 | two project maintainers. If the code is written by a maintainer, the change 21 | only requires one additional LGTM. 22 | 23 | ## Reviewing Pull Requests 24 | 25 | We recommend reviewing pull requests directly within GitHub. This allows a 26 | public commentary on changes, providing transparency for all users. When 27 | providing feedback be civil, courteous, and kind. Disagreement is fine, so long 28 | as the discourse is carried out politely. If we see a record of uncivil or 29 | abusive comments, we will revoke your commit privileges and invite you to leave 30 | the project. 31 | 32 | During your review, consider the following points: 33 | 34 | ### Does the change have positive impact? 35 | 36 | Some proposed changes may not represent a positive impact to the project. Ask 37 | whether or not the change will make understanding the code easier, or if it 38 | could simply be a personal preference on the part of the author (see 39 | [bikeshedding](https://en.wiktionary.org/wiki/bikeshedding)). 40 | 41 | Pull requests that do not have a clear positive impact should be closed without 42 | merging. 43 | 44 | ### Do the changes make sense? 45 | 46 | If you do not understand what the changes are or what they accomplish, ask the 47 | author for clarification. Ask the author to add comments and/or clarify test 48 | case names to make the intentions clear. 49 | 50 | At times, such clarification will reveal that the author may not be using the 51 | code correctly, or is unaware of features that accommodate their needs. If you 52 | feel this is the case, work up a code sample that would address the pull 53 | request for them, and feel free to close the pull request once they confirm. 54 | 55 | ### Does the change introduce a new feature? 56 | 57 | For any given pull request, ask yourself "is this a new feature?" If so, does 58 | the pull request (or associated issue) contain narrative indicating the need 59 | for the feature? If not, ask them to provide that information. 60 | 61 | Are new unit tests in place that test all new behaviors introduced? If not, do 62 | not merge the feature until they are! Is documentation in place for the new 63 | feature? (See the documentation guidelines). If not do not merge the feature 64 | until it is! Is the feature necessary for general use cases? Try and keep the 65 | scope of any given component narrow. If a proposed feature does not fit that 66 | scope, recommend to the user that they maintain the feature on their own, and 67 | close the request. You may also recommend that they see if the feature gains 68 | traction among other users, and suggest they re-submit when they can show such 69 | support. 70 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Predict Customer Churn using Watson Machine Learning and Jupyter Notebooks on Cloud Pak for Data 2 | 3 | In this Code Pattern, we use IBM Cloud Pak for Data to go through the whole data science pipeline to solve a business problem and predict customer churn using a Telco customer churn dataset. Cloud Pak for Data is an interactive, collaborative, cloud-based environment where data scientists, developers, and others interested in data science can use tools (e.g., RStudio, Jupyter Notebooks, Spark, etc.) to collaborate, share, and gather insight from their data as well as build and deploy machine learning and deep learning models. 4 | 5 | When the reader has completed this Code Pattern, they will understand how to: 6 | 7 | * Use [Jupyter Notebooks](https://jupyter.org/) to load, visualize, and analyze data 8 | * Run Notebooks in [IBM Cloud Pak for Data](https://www.ibm.com/analytics/cloud-pak-for-data) 9 | * Build, test and deploy a machine learning model using [Spark MLib](https://spark.apache.org/mllib/) on Cloud Pak for Data. 10 | * Deploy a selected machine learning model to production using Cloud Pak for Data 11 | * Create a front-end application to interface with the client and start consuming your deployed model. 12 | 13 | ![architecture diagram](doc/source/images/architecture.png) 14 | 15 | ## Flow 16 | 17 | 1. User loads the Jupyter notebook into the Cloud Pak for Data platform. 18 | 1. [Telco customer churn data set](https://raw.githubusercontent.com/IBM/telco-customer-churn-on-icp4d/master/data/Telco-Customer-Churn.csv) is loaded into the Jupyter Notebook, either directly from the github repo, or as Virtualized Data after following the [Data Virtualization Tutorial](https://developer.ibm.com/tutorials/virtualizing-db2-warehouse-data-with-data-virtualization) from the [IBM Cloud Pak for Data Learning Path](https://developer.ibm.com/series/cloud-pak-for-data-learning-path/). 19 | 1. Preprocess the data, build machine learning models and save to Watson Machine Learning on Cloud Pak for Data. 20 | 1. Deploy a selected machine learning model into production on the Cloud Pak for Data platform and obtain a scoring endpoint. 21 | 1. Use the model for credit prediction using a frontend application. 22 | 23 | ## Included components 24 | 25 | * [IBM Cloud Pak for Data](https://www.ibm.com/products/cloud-pak-for-data) 26 | * [Watson Machine Learning Add On for Cloud Pak for Data](https://www.ibm.com/cloud/machine-learning) 27 | 28 | ## Featured technologies 29 | 30 | * [Jupyter Notebooks](https://jupyter.org/): An open-source web application that allows you to create and share documents that contain live code, equations, visualizations, and explanatory text. 31 | * [Pandas](https://pandas.pydata.org/): An open source library providing high-performance, easy-to-use data structures and data analysis tools for the Python programming language. 32 | * [Seaborn](https://seaborn.pydata.org/): A Python data visualization library based on matplotlib. It provides a high-level interface for drawing attractive and informative statistical graphics. 33 | * [Spark MLib](https://spark.apache.org/mllib/): Apache Spark's scalable machine learning library. 34 | 35 | ## Prerequisites 36 | 37 | * [IBM Cloud Pak for Data](https://www.ibm.com/analytics/cloud-pak-for-data) 38 | * [Watson Machine Learning Add On for Cloud Pak for Data](https://www.ibm.com/support/producthub/icpdata/docs/content/SSQNUZ_current/wsj/analyze-data/ml-install-overview.html) 39 | 40 | ## Steps 41 | 42 | 1. [Create a new Project](#1-create-a-new-project) 43 | 1. [Create a Space for Machine Learning Deployments](#2-create-a-space-for-machine-learning-deployments) 44 | 1. [Upload the dataset](#3-upload-the-dataset) if you are not on the [Cloud Pak for Data Learning Path](https://developer.ibm.com/series/cloud-pak-for-data-learning-path/). 45 | 1. [Import notebook to Cloud Pak for Data](#4-import-notebook-to-cloud-pak-for-data) 46 | 1. [Run the notebook](#5-run-the-notebook) 47 | 1. [Deploying the model using the Cloud Pak for Data UI](#6-deploying-the-model-using-the-Cloud-Pak-for-Data-UI) 48 | 1. [Testing the model](#7-testing-the-model) 49 | 1. [Create a Python Flask app that uses the model](#8-create-a-python-flask-app-that-uses-the-model) 50 | 51 | ### 1. Create a new project 52 | 53 | * Launch a browser and navigate to your Cloud Pak for Data deployment. 54 | 55 | * Go the (☰) menu and click *Projects*: 56 | 57 | ![(☰) Menu -> Projects](doc/source/images/cpd-projects-menu.png) 58 | 59 | * Click on *New project*. In the dialog that pops up, select the project type as `Analytics project` and click `Next`: 60 | 61 | ![Start a new project](doc/source/images/cpd-new-project.png) 62 | 63 | * Click on the top tile for `Create an empty project`: 64 | 65 | ![Create an empty project](doc/source/images/cpd-create-empty-project.png) 66 | 67 | * Give the project a unique name, an optional description and click `Create`: 68 | 69 | ![Pick a name](doc/source/images/cpd-new-project-name.png) 70 | 71 | ### 2. Create a Space for Machine Learning Deployments 72 | 73 | Before we create a machine learning model, we will have to set up a deployment space where we can save and deploy the model. 74 | 75 | Follow the steps in this section to create a new deployment space. If you already have a deployment space set up, you can skip this section and follow the steps to [upload the dataset](#3-upload-the-dataset). 76 | 77 | * Navigate to the left-hand (☰) hamburger menu and choose `Analyze` -> `Analytics deployments`: 78 | 79 | ![(☰) Menu -> Analytics deployments](doc/source/images/ChooseAnalyticsDeployments.png) 80 | 81 | * Click on `New deployment space +`: 82 | 83 | ![Add New deployment space](doc/source/images/addNewDeploymentSpace.png) 84 | 85 | * Click on the top tile for 'Create an empty space': 86 | 87 | ![Create empty deployment space](doc/source/images/createEmptyDeploymentSpace.png) 88 | 89 | * Give your deployment space a unique name, an optional description, then click `Create`. 90 | 91 | ![Create New deployment space](doc/source/images/createNewDeploymentSpace.png) 92 | 93 | ### 3. Upload the dataset 94 | 95 | * If you are not on the [Cloud Pak for Data Learning Path](https://developer.ibm.com/series/cloud-pak-for-data-learning-path/), which uses Virtualized Data, upload the dataset into your project now, else skip down to [import notebook to Cloud Pak for Data](#4-import-notebook-to-cloud-pak-for-data). 96 | 97 | * Clone this repository: 98 | 99 | ```bash 100 | git clone https://github.com/IBM/telco-customer-churn-on-icp4d/ 101 | cd telco-customer-churn-on-icp4d 102 | ``` 103 | 104 | * In your project, on the `Assets` tab click the `01/00` icon and the `Load` tab, then either drag the `data/Telco-Customer-Churn.csv` file from the cloned repository to the window or navigate to it using `browse for files to upload`: 105 | 106 | ![Add data set](doc/source/images/cpd-add-data-set.png) 107 | 108 | ### 4. Import notebook to Cloud Pak for Data 109 | 110 | * In your project, either click the `Add to project +` button, and choose `Notebook`, or, if the *Notebooks* section exists, to the right of *Notebooks* click `New notebook +`: 111 | 112 | ![Add notebook](doc/source/images/wml-1-add-asset.png) 113 | 114 | * On the next screen, select the *From URL* tab, give your notebook a *name* and an optional *description*, provide the following URL as the *Notebook URL*, and choose the `Python 3.6` environment as the *Runtime*: 115 | 116 | ```bash 117 | https://raw.githubusercontent.com/IBM/telco-customer-churn-on-icp4d/master/notebooks/Telco-customer-churn-ICP4D.ipynb 118 | ``` 119 | 120 | ![Add notebook name and URL](doc/source/images/wml-2-add-name-and-url.png) 121 | 122 | * When the Jupyter notebook is loaded and the kernel is ready then we can start executing cells. 123 | 124 | ![Notebook loaded](doc/source/images/wml-3-notebook-loaded.png) 125 | 126 | > **Important**: *Make sure that you stop the kernel of your notebook(s) when you are done, in order to conserve memory resources!* 127 | 128 | ![Stop kernel](doc/source/images/JupyterStopKernel.png) 129 | 130 | > **Note**: The Jupyter notebook included in the project has been cleared of output. If you would like to see the notebook that has already been completed with output, you can refer [examples/Telco-customer-churn-ICP4D-with-output.ipynb](examples/Telco-customer-churn-ICP4D-with-output.ipynb). 131 | 132 | ### 5. Run the notebook 133 | 134 | Spend some time looking through the sections of the notebook to get an overview. A notebook is composed of text (markdown or heading) cells and code cells. The markdown cells provide comments on what the code is designed to do. 135 | 136 | You will run cells individually by highlighting each cell, then either click the `Run` button at the top of the notebook or hitting the keyboard short cut to run the cell (Shift + Enter but can vary based on platform). While the cell is running, an asterisk (`[*]`) will show up to the left of the cell. When that cell has finished executing a sequential number will show up (e.g. `[17]`). 137 | 138 | **Please note that some of the comments in the notebook are directions for you to modify specific sections of the code. Perform any changes as indicated before running / executing the cell.** 139 | 140 | #### Notebook sections 141 | 142 | With the notebook open, you will notice: 143 | 144 | - Section `1.0 Install required packages` will install some of the libraries we are going to use in the notebook (many libraries come pre-installed on Cloud Pak for Data). Note that we upgrade the installed version of Watson Machine Learning Python Client. Ensure the output of the first code cell is that the python packages were successfully installed. 145 | 146 | ![Install required packages](doc/source/images/wml-3.2-install-packages.png) 147 | 148 | - Section `2.0 Load and Clean data` will load the data set we will use to build out the machine learning model. In order to import the data into the notebook, we are going to use the code generation capability of Watson Studio. 149 | - Highlight the code cell shown in the image below by clicking it. Ensure you place the cursor below the commented line. 150 | - Click the 01/00 "Find data" icon in the upper right of the notebook to find the data asset you need to import. 151 | - If you are following the [Cloud Pak for Data Learning Path](https://developer.ibm.com/series/cloud-pak-for-data-learning-path/), choose the *Files* tab, and pick the virtualized data set that has all three joined tables (i.e. `User.BILLINGPRODUCTSCUSTOMERS`). Click `Insert to code` and choose `pandas DataFrame`. 152 | 153 | ![Add remote Pandas DataFrame](doc/source/images/wml-4-add-dataframe.png) 154 | 155 | - Otherwise, if you are using this notebook without virtualized data, you can use the [Telco-Customer-Churn.csv](data/Telco-Customer-Churn.csv) file version of the data set that has been included in this project and was uploaded to the Cloud Pak for Data project in [Step 3](#3-upload-the-dataset). Choose the *Files* tab. Select the *Telco-Customer-Churn.csv* file. Click `Insert to code` and choose `pandas DataFrame`. 156 | 157 | ![Add local Pandas DataFrame](doc/source/images/wml-3-add-local-dataframe.png) 158 | 159 | - The code to bring the data into the notebook environment and create a Pandas DataFrame will be added to the cell. 160 | - Run the cell and you will see the first five rows of the dataset. 161 | 162 | ![Generated code to handle Pandas DataFrame](doc/source/images/wml-5-generated-code-dataframe.png) 163 | 164 | > **IMPORTANT**: Since we are using generated code to import the data, you will need to update the next cell to assign the `df` variable. Copy the variable that was generated in the previous cell (it will look like data_df_1, data_df_2, etc) and assign it to the `df` variable (for example df=df_data_1). 165 | 166 | - Continue to run the remaining cells in section 2 to explore and clean the data. 167 | 168 | - Section `3.0 Create a model` cells will run through the steps to build a model pipeline. 169 | - We will split our data into training and test data, encode the categorial string values, create a model using the Random Forest Classifier algorithm, and evaluate the model against the test set. 170 | - Run all the cells in section 3 to build the model. 171 | 172 | ![Building the pipeline and model](doc/source/images/wml-6-build-pipeline-and-model.png) 173 | 174 | - Section `4.0 Save the model` will save the model to your project. 175 | 176 | - We will be saving and deploying the model to the Watson Machine Learning service within our Cloud Pak for Data platform. In the next code cell, be sure to update the `wml_credentials` variable. 177 | - The url should be the hostname of the Cloud Pak for Data instance. 178 | - The username and password should be the same credentials you used to log into Cloud Pak for Data. 179 | 180 | - Update the `MODEL_NAME` variable and provide a unique and easily identifiable model name. Next, update the `DEPLOYMENT_SPACE_NAME` variable, providing the name of your deployment space which was created in [Step 2](#2-create-a-space-for-machine-learning-deployments) above. 181 | 182 | ![Provide model and deployment space name](doc/source/images/wml-provide-model-and-space-name.png) 183 | 184 | ![Update WML credentials](doc/source/images/wml-7-update-wml-credentials.png) 185 | 186 | Continue to run the cells in the section to save the model to Cloud Pak for Data. We'll be able to test it out with the Cloud Pak for Data tools in just a few minutes! 187 | 188 | > **Note**: You can use the following cell for cleaning up any previously created models and deployments. 189 | 190 | ![Clean up models and deployments](doc/source/images/cleanup-models-and-deployments.png) 191 | 192 | ### 6. Deploying the model using the Cloud Pak for Data UI 193 | 194 | Now that we have created a model and saved it to our respository, we will want to deploy the model so it can be used by others. 195 | 196 | We will be creating an online deployment. This type of deployment will make an instance of the model available to make predictions in real time via an API. 197 | 198 | Although we use the Cloud Pak for Data UI to deploy the model here, the same can also be done programmatically. 199 | 200 | - Navigate to the left-hand (☰) hamburger menu and choose `Analyze` -> `Analytics deployments`: 201 | 202 | ![Analytics Analyze deployments](doc/source/images/ChooseAnalyticsDeployments.png) 203 | 204 | - Choose the deployment space you setup previously by clicking on the name of the space. 205 | 206 | ![Deployment space](doc/source/images/deployment-space.png) 207 | 208 | - In your space overview, click the model name that you just built in the notebook: 209 | 210 | ![select model](doc/source/images/deployment-select-model.png) 211 | 212 | - Click `Create deployment` on the top-right corner. 213 | 214 | ![Actions Deploy model](doc/source/images/ActionsDeployModel.png) 215 | 216 | - On the 'Create a deployment' screen, choose `Online` for the Deployment Type, give the Deployment a name and an optional description and click `Create`: 217 | 218 | ![Online Deployment Create](doc/source/images/OnlineDeploymentCreate.png) 219 | 220 | - The Deployment will show as *In progress* and then switch to *Deployed* when done. 221 | 222 | ![Status Deployed](doc/source/images/StatusDeployed.png) 223 | 224 | ### 7. Testing the model 225 | 226 | Cloud Pak for Data offers tools to quickly test out Watson Machine Learning models. We begin with the built-in tooling. 227 | 228 | - Click on the deployment. The Deployment *API reference* tab shows how to use the model using *cURL*, *Java*, *Javascript*, *Python*, and *Scala*. Click on the corresponding tab to get the code snippet in the language that you want to use: 229 | 230 | ![Deployment API reference](doc/source/images/api-reference-curl.png) 231 | 232 | #### Test the saved model with built-in tooling 233 | 234 | - To get to the built-in test tool, click on the Test tab. Click on the `Provide input data as JSON` icon and paste the following data under Body: 235 | 236 | ```json 237 | { 238 | "input_data":[ 239 | { 240 | "fields":[ 241 | "gender", 242 | "SeniorCitizen", 243 | "Partner", 244 | "Dependents", 245 | "tenure", 246 | "PhoneService", 247 | "MultipleLines", 248 | "InternetService", 249 | "OnlineSecurity", 250 | "OnlineBackup", 251 | "DeviceProtection", 252 | "TechSupport", 253 | "StreamingTV", 254 | "StreamingMovies", 255 | "Contract", 256 | "PaperlessBilling", 257 | "PaymentMethod", 258 | "MonthlyCharges", 259 | "TotalCharges" 260 | ], 261 | "values":[ 262 | [ 263 | "Female", 264 | 0, 265 | "No", 266 | "No", 267 | 1, 268 | "No", 269 | "No phone service", 270 | "DSL", 271 | "No", 272 | "No", 273 | "No", 274 | "No", 275 | "No", 276 | "No", 277 | "Month-to-month", 278 | "No", 279 | "Bank transfer (automatic)", 280 | 25.25, 281 | 25.25 282 | ] 283 | ] 284 | } 285 | ] 286 | } 287 | ``` 288 | 289 | - Click the `Predict` button and the model will be called with the input data. The results will display in the *Result* window. Scroll down to the bottom (Line #114) to see either a "Yes" or a "No" for Churn: 290 | 291 | ![Testing the deployed model](doc/source/images/TestingDeployedModel.png) 292 | 293 | #### Test the deployed model with cURL 294 | 295 | Now that the model is deployed, we can also test it from external applications. One way to invoke the model API is using the cURL command. 296 | 297 | > NOTE: Windows users will need the *cURL* command. It's recommended to [download gitbash](https://gitforwindows.org/) for this, as you will also have other tools and you will be able to easily use the shell environment variables in the following steps. Also note that if you are not using gitbash, you may need to change *export* commands to *set* commands. 298 | 299 | - In a terminal window (or command prompt in Windows), run the following command to get a token to access the API. Use your Cloud Pak for Data cluster `username` and `password`: 300 | 301 | ```bash 302 | curl -k -X GET https:///v1/preauth/validateAuth -u : 303 | ``` 304 | 305 | - A json string will be returned with a value for "accessToken" that will look *similar* to this: 306 | 307 | ```json 308 | {"username":"snyk","role":"Admin","permissions":["access_catalog","administrator","manage_catalog","can_provision"],"sub":"snyk","iss":"KNOXSSO","aud":"DSX","uid":"1000331002","authenticator":"default","accessToken":"eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9.eyJ1c2VybmFtZSI6InNueWstYWRtaW4iLCJyb2xlIjoiQWRtaW4iLCJwZXJtaXNzaW9ucyI6WyJhZG1pbmlzdHJhdG9yIiwiY2FuX3Byb3Zpc2lvbiIsIm1hbmFnZV9jYXRhbG9nIiwibWFuYWdlX3F1YWxpdHkiLCJtYW5hZ2VfaW5mb3JtYXRpb25fYXNzZXRzIiwibWFuYWdlX2Rpc2NvdmVyeSIsIm1hbmFnZV9tZXRhZGF0YV9pbXBvcnQiLCJtYW5hZ2VfZ292ZXJuYW5jZV93b3JrZmxvdyIsIm1hbmFnZV9jYXRlZ29yaWVzIiwiYXV0aG9yX2dvdmVycmFuY2VfYXJ0aWZhY3RzIiwiYWNjZXNzX2NhdGFsb2ciLCJhY2Nlc3NfaW5mb3JtYXRpb25fYXNzZXRzIiwidmlld19xdWFsaXR5Iiwic2lnbl9pbl9vbmx5Il0sInN1YiI6InNueWstYWRtaW4iLCJpc3MiOiJLTk9YU1NPIiwiYXVkIjoiRFNYIiwidWlkIjoiMTAwMDMzMTAwMiIsImF1dGhlbnRpY2F0b3IiOiJkZWZhdWx0IiwiaWp0IjoxNTkyOTI3MjcxLCJleHAiOjE1OTI5NzA0MzV9.MExzML-45SAWhrAK6FQG5gKAYAseqdCpublw3-OpB5OsdKJ7isMqXonRpHE7N7afiwU0XNrylbWZYc8CXDP5oiTLF79zVX3LAWlgsf7_E2gwTQYGedTpmPOJgtk6YBSYIB7kHHMYSflfNSRzpF05JdRIacz7LNofsXAd94Xv9n1T-Rxio2TVQ4d91viN9kTZPTKGOluLYsRyMEtdN28yjn_cvjH_vg86IYUwVeQOSdI97GHLwmrGypT4WuiytXRoQiiNc-asFp4h1JwEYkU97ailr1unH8NAKZtwZ7-yy1BPDOLeaR5Sq6mYNIICyXHsnB_sAxRIL3lbBN87De4zAg","_messageCode_":"success","message":"success"} 309 | ``` 310 | 311 | - Use the export command to save the "accessToken" part of this response in the terminal window to a variable called `WML_AUTH_TOKEN`. 312 | 313 | ```bash 314 | export WML_AUTH_TOKEN= 315 | ``` 316 | 317 | - Back on the model deployment page, gather the `URL` to invoke the model from the *API reference* by copying the `Endpoint`, and export it a variable called `URL`: 318 | 319 | ![Model Deployment Endpoint](doc/source/images/ModelDeploymentEndpoint.png) 320 | 321 | ```bash 322 | export URL=https://blahblahblah.com 323 | ``` 324 | 325 | Now run this curl command from a terminal window to invoke the model with the same payload that was used previously: 326 | 327 | ```bash 328 | curl -k -X POST --header 'Content-Type: application/json' --header 'Accept: application/json' --header "Authorization: Bearer $WML_AUTH_TOKEN" -d '{"input_data": [{"fields": ["gender","SeniorCitizen","Partner","Dependents","tenure","PhoneService","MultipleLines","InternetService","OnlineSecurity","OnlineBackup","DeviceProtection","TechSupport","StreamingTV","StreamingMovies","Contract","PaperlessBilling","PaymentMethod","MonthlyCharges","TotalCharges"],"values": [["Female",0,"No","No",1,"No","No phone service","DSL","No","No","No","No","No","No","Month-to-month","No","Bank transfer (automatic)",25.25,25.25]]}]}' $URL 329 | ``` 330 | 331 | A json string similar to the one below will be returned with the response, including a "Yes" or "No" at the end indicating the prediction of whether the customer will churn or not. 332 | 333 | ```json 334 | {"predictions":[{"fields":["gender","SeniorCitizen","Partner","Dependents","tenure","PhoneService","MultipleLines","InternetService","OnlineSecurity","OnlineBackup","DeviceProtection","TechSupport","StreamingTV","StreamingMovies","Contract","PaperlessBilling","PaymentMethod","MonthlyCharges","TotalCharges","gender_IX","Partner_IX","Dependents_IX","PhoneService_IX","MultipleLines_IX","InternetService_IX","OnlineSecurity_IX","OnlineBackup_IX","DeviceProtection_IX","TechSupport_IX","StreamingTV_IX","StreamingMovies_IX","Contract_IX","PaperlessBilling_IX","PaymentMethod_IX","label","features","rawPrediction","probability","prediction","predictedLabel"],"values":[["Female",0,"No","No",1,"No","No phone service","DSL","No","No","No","No","No","No","Month-to-month","No","Bank transfer (automatic)",25.25,25.25,1.0,0.0,0.0,1.0,2.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,2.0,0.0,[18,[0,4,5,6,14,15,16,17],[1.0,1.0,2.0,1.0,1.0,2.0,25.25,25.25]],[10.806165651100262,9.193834348899738],[0.5403082825550131,0.45969171744498694],0.0,"No"]]}]} 335 | ``` 336 | 337 | ### 8. Create a Python Flask app that uses the model 338 | 339 | You can also access the online model deployment directly through the REST API. This allows you to use your model for inference in any of your apps. For this code pattern, we'll be using a Python Flask application to collect information, score it against the model, and show the results. 340 | 341 | #### Install dependencies 342 | 343 | > **NOTE**: This application only runs on Python 3.6 and above, so the instructions here are for Python 3.6+ only. 344 | 345 | The general recommendation for Python development is to use a virtual environment ([`venv`](https://docs.python.org/3/tutorial/venv.html)). To install and initialize a virtual environment, use the `venv` module: 346 | 347 | In a terminal, go to the `flaskapp` folder within the cloned repo directory. 348 | 349 | ```bash 350 | git clone https://github.com/IBM/telco-customer-churn-on-icp4d/ 351 | cd telco-customer-churn-on-icp4d/flaskapp 352 | ``` 353 | 354 | Initialize a virtual environment with [`venv`](https://docs.python.org/3/tutorial/venv.html). 355 | 356 | ```bash 357 | # Create the virtual environment using Python. 358 | # Note, it may be named python3 on your system. 359 | python -m venv venv # Python 3.X 360 | 361 | # Source the virtual environment. Use one of the two commands depending on your OS. 362 | source venv/bin/activate # Mac or Linux 363 | ./venv/Scripts/activate # Windows PowerShell 364 | ``` 365 | 366 | > **TIP** To terminate the virtual environment use the `deactivate` command. 367 | 368 | Finally, install the Python requirements. 369 | 370 | ```bash 371 | pip install -r requirements.txt 372 | ``` 373 | 374 | #### Update environment variables 375 | 376 | It is best practice to store configurable information as environment variables, instead of hard-coding any important information. To reference our model and supply an API key, we will pass these values in via a file that is read; the key-value pairs in this file are stored as environment variables. 377 | 378 | Copy the `env.sample` file to `.env`. 379 | 380 | ```bash 381 | cp env.sample .env 382 | ``` 383 | 384 | Edit the .env file and fill in the `MODEL_URL` as well as the `AUTH_URL`, `AUTH_USERNAME`, and `AUTH_PASSWORD`. 385 | 386 | * `MODEL_URL` is your web service URL for scoring which you got from the section above 387 | * `AUTH_URL` is the preauth url of your CloudPak4Data and will look like this: https:///v1/preauth/validateAuth 388 | * `AUTH_USERNAME` is your username with which you login to the CloudPak4Data environment 389 | * `AUTH_PASSWORD` is your password with which you login to the CloudPak4Data environment 390 | 391 | > **NOTE**: Alternatively, you can fill in the AUTH_TOKEN instead of AUTH_URL, AUTH_USERNAME, and AUTH_PASSWORD. You will have generated this token in the section above. However, since tokens expire after a few hours and you would need to restart your app to update the token, this option is not suggested. Instead, if you use the username/password option, the app can generate a new token every time for you so it will always use a non-expired token. 392 | 393 | ```bash 394 | # Copy this file to .env. 395 | # Edit the .env file with the required settings before starting the app. 396 | 397 | # 1. Required: Provide your web service URL for scoring. 398 | # E.g., MODEL_URL=https:///v4/deployments//predictions 399 | MODEL_URL= 400 | 401 | 402 | # 2. Required: fill in EITHER section A OR B below: 403 | 404 | # ### A: Authentication using username and password 405 | # Fill in the authntication url, your CloudPak4Data username, and CloudPak4Data password. 406 | # Example: 407 | # AUTH_URL=/v1/preauth/validateAuth 408 | # AUTH_USERNAME=my_username 409 | # AUTH_PASSWORD=super_complex_password 410 | AUTH_URL= 411 | AUTH_USERNAME= 412 | AUTH_PASSWORD= 413 | 414 | # ### B: (advanced) Provide your bearer token. 415 | # Uncomment the "AUTH_TOKEN=" below and fill in your Bearer Token. 416 | # You can generate this token by followin the lab instuctions. This token should start with "Bearer ". 417 | # Note: that hese tokens will expire after a few hours so you'll need to generate a new one again later. 418 | # Example: 419 | # TOKEN=Bearer abCdwFghIjKLMnO1PqRsTuV2wWX3YzaBCDE4.fgH1r2... (and so on, tokens are long). 420 | # AUTH_TOKEN= 421 | 422 | 423 | # Optional: You can override the server's host and port here. 424 | HOST=0.0.0.0 425 | PORT=5000 426 | ``` 427 | 428 | #### Start the application 429 | 430 | Start the flask server by running the following command: 431 | 432 | ```bash 433 | python telcochurn.py 434 | ``` 435 | 436 | Use your browser to go to [http://0.0.0.0:5000](http://0.0.0.0:5000) and try it out. 437 | 438 | > **TIP**: Use `ctrl`+`c` to stop the Flask server when you are done. 439 | 440 | #### Sample output 441 | 442 | Enter some sample values into the form: 443 | 444 | ![Input a bunch of data...](doc/source/images/input.png) 445 | 446 | Click the `Submit` button and the churn percentage is returned: 447 | 448 | ![Get the churn percentage as a result](doc/source/images/score.png) 449 | 450 | ## Learn more 451 | * **Artificial Intelligence Code Patterns**: Enjoyed this Code Pattern? Check out our other [AI Code Patterns](https://developer.ibm.com/technologies/artificial-intelligence/). 452 | * **Data Analytics Code Patterns**: Enjoyed this Code Pattern? Check out our other [Data Analytics Code Patterns](https://developer.ibm.com/technologies/data-science/). 453 | * **AI and Data Code Pattern Playlist**: Bookmark our [playlist](https://www.youtube.com/playlist?list=PLzUbsvIyrNfknNewObx5N7uGZ5FKH0Fde) with all of our Code Pattern videos. 454 | * **With Watson**: Want to take your Watson app to the next level? Looking to utilize Watson Brand assets? [Join the With Watson program](https://www.ibm.com/watson/with-watson/) to leverage exclusive brand, marketing, and tech resources to amplify and accelerate your Watson embedded commercial solution. 455 | * **IBM Watson Studio**: Master the art of data science with IBM's [Watson Studio](https://www.ibm.com/cloud/watson-studio). 456 | 457 | ## License 458 | This code pattern is licensed under the Apache Software License, Version 2. Separate third party code objects invoked within this code pattern are licensed by their respective providers pursuant to their own separate licenses. Contributions are subject to the [Developer Certificate of Origin, Version 1.1 (DCO)](https://developercertificate.org/) and the [Apache Software License, Version 2](https://www.apache.org/licenses/LICENSE-2.0.txt). 459 | 460 | [Apache Software License (ASL) FAQ](https://www.apache.org/foundation/license-faq.html#WhatDoesItMEAN) 461 | -------------------------------------------------------------------------------- /doc/source/images/ActionsDeployModel.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/telco-customer-churn-on-icp4d/d5371f5d83a446ad5673cbcca3b814b926491f8a/doc/source/images/ActionsDeployModel.png -------------------------------------------------------------------------------- /doc/source/images/ChooseAnalyticsDeployments.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/telco-customer-churn-on-icp4d/d5371f5d83a446ad5673cbcca3b814b926491f8a/doc/source/images/ChooseAnalyticsDeployments.png -------------------------------------------------------------------------------- /doc/source/images/JupyterStopKernel.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/telco-customer-churn-on-icp4d/d5371f5d83a446ad5673cbcca3b814b926491f8a/doc/source/images/JupyterStopKernel.png -------------------------------------------------------------------------------- /doc/source/images/ModelDeploymentEndpoint.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/telco-customer-churn-on-icp4d/d5371f5d83a446ad5673cbcca3b814b926491f8a/doc/source/images/ModelDeploymentEndpoint.png -------------------------------------------------------------------------------- /doc/source/images/OnlineDeploymentCreate.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/telco-customer-churn-on-icp4d/d5371f5d83a446ad5673cbcca3b814b926491f8a/doc/source/images/OnlineDeploymentCreate.png -------------------------------------------------------------------------------- /doc/source/images/StatusDeployed.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/telco-customer-churn-on-icp4d/d5371f5d83a446ad5673cbcca3b814b926491f8a/doc/source/images/StatusDeployed.png -------------------------------------------------------------------------------- /doc/source/images/TestingDeployedModel.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/telco-customer-churn-on-icp4d/d5371f5d83a446ad5673cbcca3b814b926491f8a/doc/source/images/TestingDeployedModel.png -------------------------------------------------------------------------------- /doc/source/images/addNewDeploymentSpace.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/telco-customer-churn-on-icp4d/d5371f5d83a446ad5673cbcca3b814b926491f8a/doc/source/images/addNewDeploymentSpace.png -------------------------------------------------------------------------------- /doc/source/images/api-reference-curl.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/telco-customer-churn-on-icp4d/d5371f5d83a446ad5673cbcca3b814b926491f8a/doc/source/images/api-reference-curl.png -------------------------------------------------------------------------------- /doc/source/images/architecture.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/telco-customer-churn-on-icp4d/d5371f5d83a446ad5673cbcca3b814b926491f8a/doc/source/images/architecture.png -------------------------------------------------------------------------------- /doc/source/images/cleanup-models-and-deployments.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/telco-customer-churn-on-icp4d/d5371f5d83a446ad5673cbcca3b814b926491f8a/doc/source/images/cleanup-models-and-deployments.png -------------------------------------------------------------------------------- /doc/source/images/cpd-add-data-set.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/telco-customer-churn-on-icp4d/d5371f5d83a446ad5673cbcca3b814b926491f8a/doc/source/images/cpd-add-data-set.png -------------------------------------------------------------------------------- /doc/source/images/cpd-create-empty-project.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/telco-customer-churn-on-icp4d/d5371f5d83a446ad5673cbcca3b814b926491f8a/doc/source/images/cpd-create-empty-project.png -------------------------------------------------------------------------------- /doc/source/images/cpd-new-project-name.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/telco-customer-churn-on-icp4d/d5371f5d83a446ad5673cbcca3b814b926491f8a/doc/source/images/cpd-new-project-name.png -------------------------------------------------------------------------------- /doc/source/images/cpd-new-project.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/telco-customer-churn-on-icp4d/d5371f5d83a446ad5673cbcca3b814b926491f8a/doc/source/images/cpd-new-project.png -------------------------------------------------------------------------------- /doc/source/images/cpd-projects-menu.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/telco-customer-churn-on-icp4d/d5371f5d83a446ad5673cbcca3b814b926491f8a/doc/source/images/cpd-projects-menu.png -------------------------------------------------------------------------------- /doc/source/images/createEmptyDeploymentSpace.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/telco-customer-churn-on-icp4d/d5371f5d83a446ad5673cbcca3b814b926491f8a/doc/source/images/createEmptyDeploymentSpace.png -------------------------------------------------------------------------------- /doc/source/images/createNewDeploymentSpace.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/telco-customer-churn-on-icp4d/d5371f5d83a446ad5673cbcca3b814b926491f8a/doc/source/images/createNewDeploymentSpace.png -------------------------------------------------------------------------------- /doc/source/images/deployment-select-model.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/telco-customer-churn-on-icp4d/d5371f5d83a446ad5673cbcca3b814b926491f8a/doc/source/images/deployment-select-model.png -------------------------------------------------------------------------------- /doc/source/images/deployment-space.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/telco-customer-churn-on-icp4d/d5371f5d83a446ad5673cbcca3b814b926491f8a/doc/source/images/deployment-space.png -------------------------------------------------------------------------------- /doc/source/images/input.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/telco-customer-churn-on-icp4d/d5371f5d83a446ad5673cbcca3b814b926491f8a/doc/source/images/input.png -------------------------------------------------------------------------------- /doc/source/images/score.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/telco-customer-churn-on-icp4d/d5371f5d83a446ad5673cbcca3b814b926491f8a/doc/source/images/score.png -------------------------------------------------------------------------------- /doc/source/images/wml-1-add-asset.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/telco-customer-churn-on-icp4d/d5371f5d83a446ad5673cbcca3b814b926491f8a/doc/source/images/wml-1-add-asset.png -------------------------------------------------------------------------------- /doc/source/images/wml-2-add-name-and-url.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/telco-customer-churn-on-icp4d/d5371f5d83a446ad5673cbcca3b814b926491f8a/doc/source/images/wml-2-add-name-and-url.png -------------------------------------------------------------------------------- /doc/source/images/wml-3-add-local-dataframe.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/telco-customer-churn-on-icp4d/d5371f5d83a446ad5673cbcca3b814b926491f8a/doc/source/images/wml-3-add-local-dataframe.png -------------------------------------------------------------------------------- /doc/source/images/wml-3-notebook-loaded.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/telco-customer-churn-on-icp4d/d5371f5d83a446ad5673cbcca3b814b926491f8a/doc/source/images/wml-3-notebook-loaded.png -------------------------------------------------------------------------------- /doc/source/images/wml-3.2-install-packages.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/telco-customer-churn-on-icp4d/d5371f5d83a446ad5673cbcca3b814b926491f8a/doc/source/images/wml-3.2-install-packages.png -------------------------------------------------------------------------------- /doc/source/images/wml-4-add-dataframe.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/telco-customer-churn-on-icp4d/d5371f5d83a446ad5673cbcca3b814b926491f8a/doc/source/images/wml-4-add-dataframe.png -------------------------------------------------------------------------------- /doc/source/images/wml-5-generated-code-dataframe.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/telco-customer-churn-on-icp4d/d5371f5d83a446ad5673cbcca3b814b926491f8a/doc/source/images/wml-5-generated-code-dataframe.png -------------------------------------------------------------------------------- /doc/source/images/wml-6-build-pipeline-and-model.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/telco-customer-churn-on-icp4d/d5371f5d83a446ad5673cbcca3b814b926491f8a/doc/source/images/wml-6-build-pipeline-and-model.png -------------------------------------------------------------------------------- /doc/source/images/wml-7-update-wml-credentials.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/telco-customer-churn-on-icp4d/d5371f5d83a446ad5673cbcca3b814b926491f8a/doc/source/images/wml-7-update-wml-credentials.png -------------------------------------------------------------------------------- /doc/source/images/wml-provide-model-and-space-name.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/telco-customer-churn-on-icp4d/d5371f5d83a446ad5673cbcca3b814b926491f8a/doc/source/images/wml-provide-model-and-space-name.png -------------------------------------------------------------------------------- /flaskapp/Procfile: -------------------------------------------------------------------------------- 1 | web: python ./telcochurn.py -------------------------------------------------------------------------------- /flaskapp/env.sample: -------------------------------------------------------------------------------- 1 | # Copy this file to .env. 2 | # Edit the .env file with the required settings before starting the app. 3 | 4 | # 1. Required: Provide your web service URL for scoring. 5 | # E.g., MODEL_URL=https:///v4/deployments//predictions 6 | MODEL_URL= 7 | 8 | 9 | # 2. Required: fill in EITHER section A OR B below: 10 | 11 | # ### A: Authentication using username and password 12 | # Fill in the authntication url, your CloudPak4Data username, and CloudPak4Data password. 13 | # Example: 14 | # AUTH_URL=/v1/preauth/validateAuth 15 | # AUTH_USERNAME=my_username 16 | # AUTH_PASSWORD=super_complex_password 17 | AUTH_URL= 18 | AUTH_USERNAME= 19 | AUTH_PASSWORD= 20 | 21 | # ### B: (advanced) Provide your bearer token. 22 | # Uncomment the "AUTH_TOKEN=" below and fill in your Bearer Token. 23 | # You can generate this token by followin the lab instuctions. This token should start with "Bearer ". 24 | # Note: that hese tokens will expire after a few hours so you'll need to generate a new one again later. 25 | # Example: 26 | # TOKEN=Bearer abCdwFghIjKLMnO1PqRsTuV2wWX3YzaBCDE4.fgH1r2... (and so on, tokens are long). 27 | # AUTH_TOKEN= 28 | 29 | 30 | # Optional: You can override the server's host and port here. 31 | HOST=0.0.0.0 32 | PORT=5000 -------------------------------------------------------------------------------- /flaskapp/manifest.yml: -------------------------------------------------------------------------------- 1 | --- 2 | applications: 3 | - memory: 128MB 4 | disk_quota: 256MB 5 | name: telco-learning-path-icp4d -------------------------------------------------------------------------------- /flaskapp/requirements.txt: -------------------------------------------------------------------------------- 1 | python-dotenv==0.10.1 2 | Flask==1.1.1 3 | requests==2.20.0 -------------------------------------------------------------------------------- /flaskapp/runtime.txt: -------------------------------------------------------------------------------- 1 | python-3.6.10 -------------------------------------------------------------------------------- /flaskapp/telcochurn.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | 3 | # -*- coding: utf-8 -*- 4 | # Licensed under the Apache License, Version 2.0 (the "License"); you may 5 | # not use this file except in compliance with the License. You may obtain 6 | # a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 | # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 | # License for the specific language governing permissions and limitations 14 | # under the License. 15 | 16 | import json 17 | import os 18 | import requests 19 | from dotenv import load_dotenv 20 | from flask import Flask, request, session, render_template, flash 21 | from requests.auth import HTTPBasicAuth 22 | 23 | app = Flask(__name__) 24 | 25 | app.config.update(dict( 26 | DEBUG=True, 27 | SECRET_KEY=os.environ.get('SECRET_KEY', 'development key') 28 | )) 29 | 30 | strings = { 31 | "gender": ['Female', 'Male'], 32 | "Partner": ['Yes', 'No'], 33 | "Dependents": ['No', 'Yes'], 34 | "PhoneService": ['No', 'Yes'], 35 | "MultipleLines": ['No phone service', 'No', 'Yes'], 36 | "InternetService": ['DSL', 'Fiber optic', 'No'], 37 | "OnlineSecurity": ['No', 'Yes', 'No internet service'], 38 | "OnlineBackup": ['Yes', 'No', 'No internet service'], 39 | "DeviceProtection": ['No', 'Yes', 'No internet service'], 40 | "TechSupport": ['No', 'Yes', 'No internet service'], 41 | "StreamingTV": ['No', 'Yes', 'No internet service'], 42 | "StreamingMovies": ['No', 'Yes', 'No internet service'], 43 | "Contract": ['Month-to-month', 'One year', 'Two year'], 44 | "PaperlessBilling": ['Yes', 'No'], 45 | "PaymentMethod": ['Electronic check', 46 | 'Mailed check', 47 | 'Bank transfer (automatic)', 48 | 'Credit card (automatic)'] 49 | } 50 | 51 | # min, max, default value 52 | floats = { 53 | "MonthlyCharges": [0, 1000, 100], 54 | "TotalCharges": [0, 50000, 1000] 55 | } 56 | 57 | # min, max, default value 58 | ints = { 59 | "SeniorCitizen": [0, 1, 0], 60 | "tenure": [0, 100, 2], 61 | } 62 | 63 | labels = ["No Churn", "Churn"] 64 | 65 | 66 | def generate_input_lines(): 67 | result = f'' 68 | 69 | counter = 0 70 | for k in floats.keys(): 71 | minn, maxx, vall = floats[k] 72 | if (counter % 2 == 0): 73 | result += f'' 74 | result += f'' 77 | if (counter % 2 == 1): 78 | result += f'' 79 | counter = counter + 1 80 | 81 | counter = 0 82 | for k in ints.keys(): 83 | minn, maxx, vall = ints[k] 84 | if (counter % 2 == 0): 85 | result += f'' 86 | result += f'' 89 | if (counter % 2 == 1): 90 | result += f'' 91 | counter = counter + 1 92 | 93 | counter = 0 94 | for k in strings.keys(): 95 | if (counter % 2 == 0): 96 | result += f'' 97 | result += f'' 103 | if (counter % 2 == 1): 104 | result += f'' 105 | counter = counter + 1 106 | 107 | result += f'
{k}' 75 | result += f'' 76 | result += f'
{k}' 87 | result += f'' 88 | result += f'
{k}' 98 | result += f'' 102 | result += f'
' 108 | 109 | return result 110 | 111 | 112 | app.jinja_env.globals.update(generate_input_lines=generate_input_lines) 113 | 114 | 115 | def get_token(): 116 | auth_token = os.environ.get('AUTH_TOKEN') 117 | auth_username = os.environ.get('AUTH_USERNAME') 118 | auth_password = os.environ.get('AUTH_PASSWORD') 119 | auth_url = os.environ.get('AUTH_URL') 120 | 121 | if (auth_token): 122 | # All three are set. bad bad! 123 | if (auth_username and auth_password): 124 | raise EnvironmentError('[ENV VARIABLES] please set either "AUTH_TOKEN" or ("AUTH_USERNAME", "AUTH_PASSWORD", and "AUTH_URL"). Not both.') 125 | # Only TOKEN is set. good. 126 | else: 127 | return auth_token 128 | else: 129 | # Nothing is set. bad! 130 | if not (auth_username and auth_password): 131 | raise EnvironmentError('[ENV VARIABLES] please set "AUTH_USERNAME", "AUTH_PASSWORD", and "AUTH_URL" as "TOKEN" is not set.') 132 | # Only USERNAME, PASSWORD are set. good. 133 | else: 134 | response_preauth = requests.get(auth_url, auth=HTTPBasicAuth(auth_username, auth_password), verify=False) 135 | if response_preauth.status_code == 200: 136 | return json.loads(response_preauth.text)['accessToken'] 137 | else: 138 | raise Exception(f"Authentication returned {response_preauth}: {response_preauth.text}") 139 | 140 | 141 | class churnForm(): 142 | 143 | @app.route('/', methods=['GET', 'POST']) 144 | def index(): 145 | 146 | if request.method == 'POST': 147 | ID = 999 148 | 149 | session['ID'] = ID 150 | data = {} 151 | 152 | for k, v in request.form.items(): 153 | data[k] = v 154 | session[k] = v 155 | 156 | scoring_href = os.environ.get('MODEL_URL') 157 | 158 | if not (scoring_href): 159 | raise EnvironmentError('[ENV VARIABLES] Please set "URL".') 160 | 161 | for field in ints.keys(): 162 | data[field] = int(data[field]) 163 | for field in floats.keys(): 164 | data[field] = float(data[field]) 165 | 166 | input_data = list(data.keys()) 167 | input_values = list(data.values()) 168 | 169 | payload_scoring = {"input_data": [ 170 | {"fields": input_data, "values": [input_values]} 171 | ]} 172 | print("Payload is: ") 173 | print(payload_scoring) 174 | header_online = { 175 | 'Cache-Control': 'no-cache', 176 | 'Content-Type': 'application/json', 177 | 'Authorization': 'Bearer ' + get_token() 178 | } 179 | response_scoring = requests.post( 180 | scoring_href, 181 | verify=False, 182 | json=payload_scoring, 183 | headers=header_online) 184 | result = response_scoring.text 185 | print("Result is ", result) 186 | result_json = json.loads(result) 187 | 188 | result_keys = result_json['predictions'][0]['fields'] 189 | result_vals = result_json['predictions'][0]['values'] 190 | 191 | result_dict = dict(zip(result_keys, result_vals[0])) 192 | 193 | churn_risk = result_dict["predictedLabel"].lower() 194 | no_percent = result_dict["probability"][0] * 100 195 | yes_percent = result_dict["probability"][1] * 100 196 | flash('Percentage of this customer leaving is: %.0f%%' 197 | % yes_percent) 198 | return render_template( 199 | 'score.html', 200 | result=result_dict, 201 | churn_risk=churn_risk, 202 | yes_percent=yes_percent, 203 | no_percent=no_percent, 204 | response_scoring=response_scoring, 205 | labels=labels) 206 | 207 | else: 208 | return render_template('input.html') 209 | 210 | 211 | load_dotenv(os.path.join(os.path.dirname(__file__), ".env")) 212 | port = os.environ.get('PORT', '5000') 213 | host = os.environ.get('HOST', '0.0.0.0') 214 | if __name__ == "__main__": 215 | app.run(host=host, port=int(port)) -------------------------------------------------------------------------------- /flaskapp/templates/input.html: -------------------------------------------------------------------------------- 1 | 14 | 15 | {% extends "layout.html" %} 16 | 17 | {% block body %} 18 | 19 |

Will a customer stay or leave?

20 |

Input the following information and click submit to make a prediction.

21 | 22 |
23 |
24 |
25 |
{{ generate_input_lines() | safe }} 26 |
27 |
28 |
29 |
30 | 31 | {% endblock %} 32 | -------------------------------------------------------------------------------- /flaskapp/templates/layout.html: -------------------------------------------------------------------------------- 1 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | Customer Churn Predictor 21 | 22 | 23 | 24 | 25 | 26 | 27 | 46 | {% block headextension %}{% endblock %} 47 | 48 | 49 |
50 |

Customer Churn Predictor

51 |
52 | {% for message in get_flashed_messages() %} 53 |
{{ message }}
54 | {% endfor %} 55 | {% block body %}{% endblock %} 56 |
57 |
58 | -------------------------------------------------------------------------------- /flaskapp/templates/score.html: -------------------------------------------------------------------------------- 1 | 14 | 15 | {% extends "layout.html" %} 16 | 17 | {% block headextension %} 18 | 19 | 20 | 21 | 42 | {% endblock %} 43 | 44 | {% block body %} 45 | 46 |

Customer churn risk prediction and suggested offer

47 |
48 | {% if yes_percent > 25 %} 49 | This customer has High risk. The recommended offer is lower fees for 6 months. 50 | {% elif yes_percent > 15 %} 51 | This customer has Some risk. The recommended offer is lower fees for 3 month. 52 | {% else %} 53 | This customer has Low risk. The recommended offer is lower fees for 1 months. 54 | {% endif %} 55 |
56 | 57 |
58 | 59 |

Characteristics analyzed

60 |
61 |
62 | 63 | 64 | 65 | 66 | 67 | 68 | {% for k in request.form.keys() %} 69 | 70 | 71 | 72 | 73 | {% endfor %} 74 | 75 |
AttributeValue
{{k}}{{request.form[k]}}
76 |
77 |
78 | 79 | 82 | 83 | {% endblock %} 84 | 85 | -------------------------------------------------------------------------------- /notebooks/Telco-customer-churn-ICP4D.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Predicting Telco Customer Churn using SparkML on IBM Cloud Pak for Data (ICP4D)" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "We'll use this notebook to create a machine learning model to predict customer churn. In this notebook we will build the prediction model using the SparkML library.\n", 15 | "\n", 16 | "This notebook walks you through these steps:\n", 17 | "\n", 18 | "- Load and Visualize data set.\n", 19 | "- Build a predictive model with SparkML API\n", 20 | "- Save the model in the ML repository" 21 | ] 22 | }, 23 | { 24 | "cell_type": "markdown", 25 | "metadata": {}, 26 | "source": [ 27 | "## 1.0 Install required packages\n", 28 | "\n", 29 | "There are a couple of Python packages we will use in this notebook. First we make sure the Watson Machine Learning client v3 is removed (its not installed by default) and then install/upgrade the v4 version of the client (this package is installed by default on CP4D).\n", 30 | "\n", 31 | "WML Client: https://wml-api-pyclient-dev-v4.mybluemix.net/#repository" 32 | ] 33 | }, 34 | { 35 | "cell_type": "code", 36 | "execution_count": null, 37 | "metadata": {}, 38 | "outputs": [], 39 | "source": [ 40 | "!pip uninstall watson-machine-learning-client -y\n", 41 | "!pip install --user watson-machine-learning-client-v4==1.0.103 --upgrade | tail -n 1\n", 42 | "!pip install --user pyspark==2.3.3 --upgrade|tail -n 1\n", 43 | "!pip install --user scikit-learn==0.20.3 --upgrade|tail -n 1" 44 | ] 45 | }, 46 | { 47 | "cell_type": "code", 48 | "execution_count": null, 49 | "metadata": {}, 50 | "outputs": [], 51 | "source": [ 52 | "import pandas as pd\n", 53 | "import numpy as np\n", 54 | "import json\n", 55 | "import os\n", 56 | "\n", 57 | "# Import the Project Library to read/write project assets\n", 58 | "from project_lib import Project\n", 59 | "project = Project.access()\n", 60 | "\n", 61 | "import warnings\n", 62 | "warnings.filterwarnings(\"ignore\")" 63 | ] 64 | }, 65 | { 66 | "cell_type": "markdown", 67 | "metadata": {}, 68 | "source": [ 69 | "## 2.0 Load and Clean data\n", 70 | "\n", 71 | "We'll load our data as a pandas data frame.\n", 72 | "\n", 73 | "**<< FOLLOW THE INSTRUCTIONS BELOW TO LOAD THE DATASET >>**\n", 74 | "\n", 75 | "* Highlight the cell below by clicking it.\n", 76 | "* Click the `10/01` \"Find data\" icon in the upper right of the notebook.\n", 77 | "* If you are using Virtualized data, begin by choosing the `Files` tab. Then choose your virtualized data (i.e. MYSCHEMA.BILLINGPRODUCTCUSTOMERS), click `Insert to code` and choose `Insert Pandas DataFrame`.\n", 78 | "* If you are using this notebook without virtualized data, add the locally uploaded file `Telco-Customer-Churn.csv` by choosing the `Files` tab. Then choose the `Telco-Customer-Churn.csv`. Click `Insert to code` and choose `Insert Pandas DataFrame`.\n", 79 | "* The code to bring the data into the notebook environment and create a Pandas DataFrame will be added to the cell below.\n", 80 | "* Run the cell\n" 81 | ] 82 | }, 83 | { 84 | "cell_type": "code", 85 | "execution_count": null, 86 | "metadata": {}, 87 | "outputs": [], 88 | "source": [ 89 | "# Place cursor below and insert the Pandas DataFrame for the Telco churn data\n" 90 | ] 91 | }, 92 | { 93 | "cell_type": "markdown", 94 | "metadata": {}, 95 | "source": [ 96 | "We'll use the Pandas naming convention df for our DataFrame. Make sure that the cell below uses the name for the dataframe used above. For the locally uploaded file it should look like df_data_1 or df_data_2 or df_data_x. For the virtualized data case it should look like data_df_1 or data_df_2 or data_df_x.\n", 97 | "\n", 98 | "**<< UPDATE THE VARIABLE ASSIGNMENT TO THE VARIABLE GENERATED ABOVE. >>**" 99 | ] 100 | }, 101 | { 102 | "cell_type": "code", 103 | "execution_count": null, 104 | "metadata": {}, 105 | "outputs": [], 106 | "source": [ 107 | "# for virtualized data\n", 108 | "# df = data_df_1\n", 109 | "\n", 110 | "# for local upload\n", 111 | "df = df_data_1" 112 | ] 113 | }, 114 | { 115 | "cell_type": "markdown", 116 | "metadata": {}, 117 | "source": [ 118 | "### 2.1 Drop CustomerID feature (column)" 119 | ] 120 | }, 121 | { 122 | "cell_type": "code", 123 | "execution_count": null, 124 | "metadata": {}, 125 | "outputs": [], 126 | "source": [ 127 | "df = df.drop('customerID', axis=1)\n", 128 | "df.head(5)" 129 | ] 130 | }, 131 | { 132 | "cell_type": "markdown", 133 | "metadata": {}, 134 | "source": [ 135 | "### 2.2 Examine the data types of the features" 136 | ] 137 | }, 138 | { 139 | "cell_type": "code", 140 | "execution_count": null, 141 | "metadata": {}, 142 | "outputs": [], 143 | "source": [ 144 | "df.info()" 145 | ] 146 | }, 147 | { 148 | "cell_type": "code", 149 | "execution_count": null, 150 | "metadata": {}, 151 | "outputs": [], 152 | "source": [ 153 | "# Statistics for the columns (features). Set it to all, since default is to describe just the numeric features.\n", 154 | "df.describe(include = 'all')" 155 | ] 156 | }, 157 | { 158 | "cell_type": "markdown", 159 | "metadata": {}, 160 | "source": [ 161 | "We see that Tenure ranges from 0 (new customer) to 6 years, Monthly charges range from $18 to $118, etc" 162 | ] 163 | }, 164 | { 165 | "cell_type": "markdown", 166 | "metadata": {}, 167 | "source": [ 168 | "### 2.3 Check for need to Convert TotalCharges column to numeric if it is detected as object\n", 169 | "\n", 170 | "If the above `df.info` shows the \"TotalCharges\" columnn as an object, we'll need to convert it to numeric. If you have already done this during a previous exercise for \"Data Visualization with Data Refinery\", you can skip to step `2.4`." 171 | ] 172 | }, 173 | { 174 | "cell_type": "code", 175 | "execution_count": null, 176 | "metadata": {}, 177 | "outputs": [], 178 | "source": [ 179 | "totalCharges = df.columns.get_loc(\"TotalCharges\")\n", 180 | "new_col = pd.to_numeric(df.iloc[:, totalCharges], errors='coerce')\n", 181 | "df.iloc[:, totalCharges] = pd.Series(new_col)" 182 | ] 183 | }, 184 | { 185 | "cell_type": "code", 186 | "execution_count": null, 187 | "metadata": {}, 188 | "outputs": [], 189 | "source": [ 190 | "# Statistics for the columns (features). Set it to all, since default is to describe just the numeric features.\n", 191 | "df.describe(include = 'all')" 192 | ] 193 | }, 194 | { 195 | "cell_type": "markdown", 196 | "metadata": {}, 197 | "source": [ 198 | "We now see statistics for the `TotalCharges` feature." 199 | ] 200 | }, 201 | { 202 | "cell_type": "markdown", 203 | "metadata": {}, 204 | "source": [ 205 | "\n", 206 | "\n", 207 | "### 2.4 Any NaN values should be removed to create a more accurate model." 208 | ] 209 | }, 210 | { 211 | "cell_type": "code", 212 | "execution_count": null, 213 | "metadata": {}, 214 | "outputs": [], 215 | "source": [ 216 | "# Check if we have any NaN values and see which features have missing values that should be addressed\n", 217 | "print(df.isnull().values.any())\n", 218 | "df.isnull().sum()" 219 | ] 220 | }, 221 | { 222 | "cell_type": "markdown", 223 | "metadata": {}, 224 | "source": [ 225 | "We should see that the `TotalCharges` column has missing values. There are various ways we can address this issue:\n", 226 | "\n", 227 | "- Drop records with missing values \n", 228 | "- Fill in the missing value with one of the following strategies: Zero, Mean of the values for the column, Random value, etc)." 229 | ] 230 | }, 231 | { 232 | "cell_type": "code", 233 | "execution_count": null, 234 | "metadata": {}, 235 | "outputs": [], 236 | "source": [ 237 | "# Handle missing values for nan_column (TotalCharges)\n", 238 | "from sklearn.impute import SimpleImputer\n", 239 | "\n", 240 | "# Find the column number for TotalCharges (starting at 0).\n", 241 | "total_charges_idx = df.columns.get_loc(\"TotalCharges\")\n", 242 | "imputer = SimpleImputer(missing_values=np.nan, strategy='mean')\n", 243 | "\n", 244 | "df.iloc[:, total_charges_idx] = imputer.fit_transform(df.iloc[:, total_charges_idx].values.reshape(-1, 1))\n", 245 | "df.iloc[:, total_charges_idx] = pd.Series(df.iloc[:, total_charges_idx])" 246 | ] 247 | }, 248 | { 249 | "cell_type": "code", 250 | "execution_count": null, 251 | "metadata": {}, 252 | "outputs": [], 253 | "source": [ 254 | "# Validate that we have addressed any NaN values\n", 255 | "print(df.isnull().values.any())\n", 256 | "df.isnull().sum()" 257 | ] 258 | }, 259 | { 260 | "cell_type": "markdown", 261 | "metadata": {}, 262 | "source": [ 263 | "\n", 264 | "### 2.5 Categorize Features\n", 265 | "\n", 266 | "We will categorize some of the columns / features based on wether they are categorical values or continuous (i.e numerical) values. We will use this in later sections to build visualizations." 267 | ] 268 | }, 269 | { 270 | "cell_type": "code", 271 | "execution_count": null, 272 | "metadata": {}, 273 | "outputs": [], 274 | "source": [ 275 | "columns_idx = np.s_[0:] # Slice of first row(header) with all columns.\n", 276 | "first_record_idx = np.s_[0] # Index of first record\n", 277 | "\n", 278 | "string_fields = [type(fld) is str for fld in df.iloc[first_record_idx, columns_idx]] # All string fields\n", 279 | "all_features = [x for x in df.columns if x != 'Churn']\n", 280 | "categorical_columns = list(np.array(df.columns)[columns_idx][string_fields])\n", 281 | "categorical_features = [x for x in categorical_columns if x != 'Churn']\n", 282 | "continuous_features = [x for x in all_features if x not in categorical_features]\n", 283 | "\n", 284 | "#print('All Features: ', all_features)\n", 285 | "#print('\\nCategorical Features: ', categorical_features)\n", 286 | "#print('\\nContinuous Features: ', continuous_features)\n", 287 | "#print('\\nAll Categorical Columns: ', categorical_columns)" 288 | ] 289 | }, 290 | { 291 | "cell_type": "markdown", 292 | "metadata": {}, 293 | "source": [ 294 | "### 2.6 Visualize data\n", 295 | "\n", 296 | "Data visualization can be used to find patterns, detect outliers, understand distribution and more. We can use graphs such as:\n", 297 | "\n", 298 | "- Histograms, boxplots, etc: To find distribution / spread of our continuous variables.\n", 299 | "- Bar charts: To show frequency in categorical values.\n" 300 | ] 301 | }, 302 | { 303 | "cell_type": "code", 304 | "execution_count": null, 305 | "metadata": {}, 306 | "outputs": [], 307 | "source": [ 308 | "import seaborn as sns\n", 309 | "import matplotlib.pyplot as plt\n", 310 | "\n", 311 | "from sklearn.preprocessing import LabelEncoder\n", 312 | "\n", 313 | "%matplotlib inline\n", 314 | "sns.set(style=\"darkgrid\")\n", 315 | "sns.set_palette(\"hls\", 3)" 316 | ] 317 | }, 318 | { 319 | "cell_type": "markdown", 320 | "metadata": {}, 321 | "source": [ 322 | "First, we get a high level view of the distribution of `Churn`. What percentage of customer in our dataset are churning vs not churning. " 323 | ] 324 | }, 325 | { 326 | "cell_type": "code", 327 | "execution_count": null, 328 | "metadata": {}, 329 | "outputs": [], 330 | "source": [ 331 | "print(df.groupby(['Churn']).size())\n", 332 | "churn_plot = sns.countplot(data=df, x='Churn', order=df.Churn.value_counts().index)\n", 333 | "plt.ylabel('Count')\n", 334 | "for p in churn_plot.patches:\n", 335 | " height = p.get_height()\n", 336 | " churn_plot.text(p.get_x()+p.get_width()/2., height + 1,'{0:.0%}'.format(height/float(len(df))),ha=\"center\") \n", 337 | "plt.show()" 338 | ] 339 | }, 340 | { 341 | "cell_type": "markdown", 342 | "metadata": {}, 343 | "source": [ 344 | "We can get use frequency counts charts to get an understanding of the categorical features relative to `Churn` \n", 345 | "\n", 346 | "- We can see that for the `gender` feature. We have relatively equal rates of churn by `gender`\n", 347 | "- We can see that for the `InternetService` feature. We have higher churn for those that have \"Fiber optic\" service versus those with \"DSL\"\n" 348 | ] 349 | }, 350 | { 351 | "cell_type": "code", 352 | "execution_count": null, 353 | "metadata": {}, 354 | "outputs": [], 355 | "source": [ 356 | "# Categorical feature count plots\n", 357 | "f, ((ax1, ax2, ax3), (ax4, ax5, ax6), (ax7, ax8, ax9), (ax10, ax11, ax12), (ax13, ax14, ax15)) = plt.subplots(5, 3, figsize=(20, 20))\n", 358 | "ax = [ax1, ax2, ax3, ax4, ax5, ax6, ax7, ax8, ax9, ax10, ax11, ax12, ax13, ax14, ax15 ]\n", 359 | "\n", 360 | "for i in range(len(categorical_features)):\n", 361 | " sns.countplot(x = categorical_features[i], hue=\"Churn\", data=df, ax=ax[i])" 362 | ] 363 | }, 364 | { 365 | "cell_type": "markdown", 366 | "metadata": {}, 367 | "source": [ 368 | "We can get use histrogram charts to get an understanding of the distribution of our continuous / numerical features relative to Churn.\n", 369 | "\n", 370 | "- We can see that for the `MonthlyCharges` feature, customers that churn tend to pay higher monthly fees than those that stay.\n", 371 | "- We can see that for the `tenure` feature, customers that churn tend to be relatively new customers." 372 | ] 373 | }, 374 | { 375 | "cell_type": "code", 376 | "execution_count": null, 377 | "metadata": {}, 378 | "outputs": [], 379 | "source": [ 380 | "# Continuous feature histograms.\n", 381 | "fig, ax = plt.subplots(2, 2, figsize=(28, 8))\n", 382 | "df[df.Churn == 'No'][continuous_features].hist(bins=20, color=\"blue\", alpha=0.5, ax=ax)\n", 383 | "df[df.Churn == 'Yes'][continuous_features].hist(bins=20, color=\"orange\", alpha=0.5, ax=ax)\n", 384 | "\n", 385 | "# Or use displots\n", 386 | "#sns.set_palette(\"hls\", 3)\n", 387 | "#f, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(25, 25))\n", 388 | "#ax = [ax1, ax2, ax3, ax4]\n", 389 | "#for i in range(len(continuous_features)):\n", 390 | "# sns.distplot(df[continuous_features[i]], bins=20, hist=True, ax=ax[i])" 391 | ] 392 | }, 393 | { 394 | "cell_type": "code", 395 | "execution_count": null, 396 | "metadata": { 397 | "scrolled": true 398 | }, 399 | "outputs": [], 400 | "source": [ 401 | "# Create Grid for pairwise relationships\n", 402 | "gr = sns.PairGrid(df, height=5, hue=\"Churn\")\n", 403 | "gr = gr.map_diag(plt.hist)\n", 404 | "gr = gr.map_offdiag(plt.scatter)\n", 405 | "gr = gr.add_legend()" 406 | ] 407 | }, 408 | { 409 | "cell_type": "code", 410 | "execution_count": null, 411 | "metadata": {}, 412 | "outputs": [], 413 | "source": [ 414 | "# Plot boxplots of numerical columns. More variation in the boxplot implies higher significance. \n", 415 | "f, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(25, 25))\n", 416 | "ax = [ax1, ax2, ax3, ax4]\n", 417 | "\n", 418 | "for i in range(len(continuous_features)):\n", 419 | " sns.boxplot(x = 'Churn', y = continuous_features[i], data=df, ax=ax[i])" 420 | ] 421 | }, 422 | { 423 | "cell_type": "markdown", 424 | "metadata": {}, 425 | "source": [ 426 | "## 3.0 Create a model\n", 427 | "\n", 428 | "Now we can create our machine learning model. You could use the insights / intuition gained from the data visualization steps above to what kind of model to create or which features to use. We will create a simple classification model." 429 | ] 430 | }, 431 | { 432 | "cell_type": "code", 433 | "execution_count": null, 434 | "metadata": {}, 435 | "outputs": [], 436 | "source": [ 437 | "from pyspark.sql import SparkSession\n", 438 | "import pandas as pd\n", 439 | "import json\n", 440 | "\n", 441 | "spark = SparkSession.builder.getOrCreate()\n", 442 | "df_data = spark.createDataFrame(df)\n", 443 | "df_data.head()" 444 | ] 445 | }, 446 | { 447 | "cell_type": "markdown", 448 | "metadata": {}, 449 | "source": [ 450 | "### 3.1 Split the data into training and test sets" 451 | ] 452 | }, 453 | { 454 | "cell_type": "code", 455 | "execution_count": null, 456 | "metadata": {}, 457 | "outputs": [], 458 | "source": [ 459 | "spark_df = df_data\n", 460 | "(train_data, test_data) = spark_df.randomSplit([0.8, 0.2], 24)\n", 461 | "\n", 462 | "print(\"Number of records for training: \" + str(train_data.count()))\n", 463 | "print(\"Number of records for evaluation: \" + str(test_data.count()))" 464 | ] 465 | }, 466 | { 467 | "cell_type": "markdown", 468 | "metadata": {}, 469 | "source": [ 470 | "### 3.2 Examine the Spark DataFrame Schema\n", 471 | "Look at the data types to determine requirements for feature engineering" 472 | ] 473 | }, 474 | { 475 | "cell_type": "code", 476 | "execution_count": null, 477 | "metadata": {}, 478 | "outputs": [], 479 | "source": [ 480 | "spark_df.printSchema()" 481 | ] 482 | }, 483 | { 484 | "cell_type": "markdown", 485 | "metadata": {}, 486 | "source": [ 487 | "### 3.3 Use StringIndexer to encode a string column of labels to a column of label indices\n", 488 | "\n", 489 | "We are using the Pipeline package to build the development steps as pipeline. \n", 490 | "We are using StringIndexer to handle categorical / string features from the dataset. StringIndexer encodes a string column of labels to a column of label indices\n", 491 | "\n", 492 | "We then use VectorAssembler to asemble these features into a vector. Pipelines API requires that input variables are passed in a vector" 493 | ] 494 | }, 495 | { 496 | "cell_type": "code", 497 | "execution_count": null, 498 | "metadata": {}, 499 | "outputs": [], 500 | "source": [ 501 | "from pyspark.ml.classification import RandomForestClassifier\n", 502 | "from pyspark.ml.feature import StringIndexer, IndexToString, VectorAssembler\n", 503 | "from pyspark.ml.evaluation import BinaryClassificationEvaluator\n", 504 | "from pyspark.ml import Pipeline, Model\n", 505 | "\n", 506 | "\n", 507 | "si_gender = StringIndexer(inputCol = 'gender', outputCol = 'gender_IX')\n", 508 | "si_Partner = StringIndexer(inputCol = 'Partner', outputCol = 'Partner_IX')\n", 509 | "si_Dependents = StringIndexer(inputCol = 'Dependents', outputCol = 'Dependents_IX')\n", 510 | "si_PhoneService = StringIndexer(inputCol = 'PhoneService', outputCol = 'PhoneService_IX')\n", 511 | "si_MultipleLines = StringIndexer(inputCol = 'MultipleLines', outputCol = 'MultipleLines_IX')\n", 512 | "si_InternetService = StringIndexer(inputCol = 'InternetService', outputCol = 'InternetService_IX')\n", 513 | "si_OnlineSecurity = StringIndexer(inputCol = 'OnlineSecurity', outputCol = 'OnlineSecurity_IX')\n", 514 | "si_OnlineBackup = StringIndexer(inputCol = 'OnlineBackup', outputCol = 'OnlineBackup_IX')\n", 515 | "si_DeviceProtection = StringIndexer(inputCol = 'DeviceProtection', outputCol = 'DeviceProtection_IX')\n", 516 | "si_TechSupport = StringIndexer(inputCol = 'TechSupport', outputCol = 'TechSupport_IX')\n", 517 | "si_StreamingTV = StringIndexer(inputCol = 'StreamingTV', outputCol = 'StreamingTV_IX')\n", 518 | "si_StreamingMovies = StringIndexer(inputCol = 'StreamingMovies', outputCol = 'StreamingMovies_IX')\n", 519 | "si_Contract = StringIndexer(inputCol = 'Contract', outputCol = 'Contract_IX')\n", 520 | "si_PaperlessBilling = StringIndexer(inputCol = 'PaperlessBilling', outputCol = 'PaperlessBilling_IX')\n", 521 | "si_PaymentMethod = StringIndexer(inputCol = 'PaymentMethod', outputCol = 'PaymentMethod_IX')\n" 522 | ] 523 | }, 524 | { 525 | "cell_type": "code", 526 | "execution_count": null, 527 | "metadata": {}, 528 | "outputs": [], 529 | "source": [ 530 | "si_Label = StringIndexer(inputCol=\"Churn\", outputCol=\"label\").fit(spark_df)\n", 531 | "label_converter = IndexToString(inputCol=\"prediction\", outputCol=\"predictedLabel\", labels=si_Label.labels)" 532 | ] 533 | }, 534 | { 535 | "cell_type": "markdown", 536 | "metadata": {}, 537 | "source": [ 538 | "### 3.4 Create a single vector" 539 | ] 540 | }, 541 | { 542 | "cell_type": "code", 543 | "execution_count": null, 544 | "metadata": {}, 545 | "outputs": [], 546 | "source": [ 547 | "va_features = VectorAssembler(inputCols=['gender_IX', 'SeniorCitizen', 'Partner_IX', 'Dependents_IX', 'PhoneService_IX', 'MultipleLines_IX', 'InternetService_IX', \\\n", 548 | " 'OnlineSecurity_IX', 'OnlineBackup_IX', 'DeviceProtection_IX', 'TechSupport_IX', 'StreamingTV_IX', 'StreamingMovies_IX', \\\n", 549 | " 'Contract_IX', 'PaperlessBilling_IX', 'PaymentMethod_IX', 'TotalCharges', 'MonthlyCharges'], outputCol=\"features\")" 550 | ] 551 | }, 552 | { 553 | "cell_type": "markdown", 554 | "metadata": {}, 555 | "source": [ 556 | "### 3.5 Create a pipeline, and fit a model using RandomForestClassifier \n", 557 | "Assemble all the stages into a pipeline. We don't expect a clean linear regression, so we'll use RandomForestClassifier to find the best decision tree for the data." 558 | ] 559 | }, 560 | { 561 | "cell_type": "code", 562 | "execution_count": null, 563 | "metadata": {}, 564 | "outputs": [], 565 | "source": [ 566 | "classifier = RandomForestClassifier(featuresCol=\"features\")\n", 567 | "\n", 568 | "pipeline = Pipeline(stages=[si_gender, si_Partner, si_Dependents, si_PhoneService, si_MultipleLines, si_InternetService, si_OnlineSecurity, si_OnlineBackup, si_DeviceProtection, \\\n", 569 | " si_TechSupport, si_StreamingTV, si_StreamingMovies, si_Contract, si_PaperlessBilling, si_PaymentMethod, si_Label, va_features, \\\n", 570 | " classifier, label_converter])\n", 571 | "\n", 572 | "model = pipeline.fit(train_data)" 573 | ] 574 | }, 575 | { 576 | "cell_type": "code", 577 | "execution_count": null, 578 | "metadata": {}, 579 | "outputs": [], 580 | "source": [ 581 | "predictions = model.transform(test_data)\n", 582 | "evaluatorDT = BinaryClassificationEvaluator(rawPredictionCol=\"prediction\")\n", 583 | "area_under_curve = evaluatorDT.evaluate(predictions)\n", 584 | "\n", 585 | "evaluatorDT = BinaryClassificationEvaluator(rawPredictionCol=\"prediction\", metricName='areaUnderROC')\n", 586 | "area_under_curve = evaluatorDT.evaluate(predictions)\n", 587 | "evaluatorDT = BinaryClassificationEvaluator(rawPredictionCol=\"prediction\", metricName='areaUnderPR')\n", 588 | "area_under_PR = evaluatorDT.evaluate(predictions)\n", 589 | "print(\"areaUnderROC = %g\" % area_under_curve)" 590 | ] 591 | }, 592 | { 593 | "cell_type": "markdown", 594 | "metadata": {}, 595 | "source": [ 596 | "## 4.0 Save the model and test data\n", 597 | "\n", 598 | "Now the model can be saved for future deployment. The model will be saved using the Watson Machine Learning client, to a deployment space.\n", 599 | "\n", 600 | "**<< UPDATE THE VARIABLE 'MODEL_NAME' TO A UNIQUE NAME>>**\n", 601 | "\n", 602 | "**<< UPDATE THE VARIABLE 'DEPLOYMENT_SPACE_NAME' TO THE NAME OF THE DEPLOYMENT SPACE CREATED PREVIOUSLY>>**" 603 | ] 604 | }, 605 | { 606 | "cell_type": "code", 607 | "execution_count": null, 608 | "metadata": {}, 609 | "outputs": [], 610 | "source": [ 611 | "\n", 612 | "MODEL_NAME = \"INSERT-YOUR-MODEL-NAME-HERE\"\n", 613 | "DEPLOYMENT_SPACE_NAME = 'INSERT-YOUR-DEPLOYMENT-SPACE-NAME-HERE'\n" 614 | ] 615 | }, 616 | { 617 | "cell_type": "markdown", 618 | "metadata": {}, 619 | "source": [ 620 | "### 4.1 Save the model to ICP4D local Watson Machine Learning\n", 621 | "\n", 622 | "Replace the `username` and `password` values of `*****` with your Cloud Pak for Data `username` and `password`. The value for `url` should match the `url` for your Cloud Pak for Data cluster." 623 | ] 624 | }, 625 | { 626 | "cell_type": "code", 627 | "execution_count": null, 628 | "metadata": {}, 629 | "outputs": [], 630 | "source": [ 631 | "from watson_machine_learning_client import WatsonMachineLearningAPIClient\n", 632 | "\n", 633 | "wml_credentials = {\n", 634 | " \"url\": \"******\",\n", 635 | " \"username\": \"*****\",\n", 636 | " \"password\" : \"*****\",\n", 637 | " \"instance_id\": \"wml_local\",\n", 638 | " \"version\" : \"2.5.0\"\n", 639 | " }\n", 640 | "\n", 641 | "client = WatsonMachineLearningAPIClient(wml_credentials)" 642 | ] 643 | }, 644 | { 645 | "cell_type": "code", 646 | "execution_count": null, 647 | "metadata": {}, 648 | "outputs": [], 649 | "source": [ 650 | "client.spaces.list()" 651 | ] 652 | }, 653 | { 654 | "cell_type": "markdown", 655 | "metadata": {}, 656 | "source": [ 657 | "### Use the desired space as the `default_space`\n", 658 | "\n", 659 | "The deployment space ID will be looked up based on the name specified above. If you do not receive a space GUID as an output to the next cell, do not proceed until you have created a deployment space." 660 | ] 661 | }, 662 | { 663 | "cell_type": "code", 664 | "execution_count": null, 665 | "metadata": {}, 666 | "outputs": [], 667 | "source": [ 668 | "# Be sure to update the name of the space with the one you want to use.\n", 669 | "client.spaces.list()\n", 670 | "all_spaces = client.spaces.get_details()['resources']\n", 671 | "space_id = None\n", 672 | "for space in all_spaces:\n", 673 | " if space['entity']['name'] == DEPLOYMENT_SPACE_NAME:\n", 674 | " space_id = space[\"metadata\"][\"guid\"]\n", 675 | " print(\"\\nDeployment Space GUID: \", space_id)\n", 676 | "\n", 677 | "if space_id is None:\n", 678 | " print(\"WARNING: Your space does not exist. Create a deployment space before proceeding to the next cell.\")\n", 679 | " #space_id = client.spaces.store(meta_props={client.spaces.ConfigurationMetaNames.NAME: space_name})[\"metadata\"][\"guid\"]" 680 | ] 681 | }, 682 | { 683 | "cell_type": "markdown", 684 | "metadata": {}, 685 | "source": [ 686 | "**<< REPLACE space_id BELOW with the id for your space. For e.g.
client.set.default_space(\"6b39c537-f707-4078-9dc7-ce70b70ab22f\") >>
**" 687 | ] 688 | }, 689 | { 690 | "cell_type": "code", 691 | "execution_count": null, 692 | "metadata": {}, 693 | "outputs": [], 694 | "source": [ 695 | "# Now set the default space to the GUID for your deployment space. If this is successful, you will see a 'SUCCESS' message.\n", 696 | "client.set.default_space(space_id)" 697 | ] 698 | }, 699 | { 700 | "cell_type": "markdown", 701 | "metadata": {}, 702 | "source": [ 703 | "#### Save the Model" 704 | ] 705 | }, 706 | { 707 | "cell_type": "code", 708 | "execution_count": null, 709 | "metadata": {}, 710 | "outputs": [], 711 | "source": [ 712 | "# Store our model\n", 713 | "model_props = {client.repository.ModelMetaNames.NAME: MODEL_NAME,\n", 714 | " client.repository.ModelMetaNames.RUNTIME_UID : \"spark-mllib_2.3\",\n", 715 | " client.repository.ModelMetaNames.TYPE : \"mllib_2.3\"}\n", 716 | "published_model = client.repository.store_model(model=model, pipeline=pipeline, meta_props=model_props, training_data=train_data)\n", 717 | "\n", 718 | "print(json.dumps(published_model, indent=3))" 719 | ] 720 | }, 721 | { 722 | "cell_type": "code", 723 | "execution_count": null, 724 | "metadata": {}, 725 | "outputs": [], 726 | "source": [ 727 | "# Use this cell to do any cleanup of previously created models and deployments\n", 728 | "client.repository.list_models()\n", 729 | "client.deployments.list()\n", 730 | "\n", 731 | "# client.repository.delete('GUID of stored model')\n", 732 | "# client.deployments.delete('GUID of deployed model')\n" 733 | ] 734 | }, 735 | { 736 | "cell_type": "markdown", 737 | "metadata": {}, 738 | "source": [ 739 | "## 5.0 Save Test Data\n", 740 | "\n", 741 | "We will save the test data we used to evaluate the model to our project." 742 | ] 743 | }, 744 | { 745 | "cell_type": "code", 746 | "execution_count": null, 747 | "metadata": {}, 748 | "outputs": [], 749 | "source": [ 750 | "write_score_CSV=test_data.toPandas().drop(['Churn'], axis=1)\n", 751 | "write_score_CSV.to_csv('/project_data/data_asset/TelcoCustomerSparkMLBatchScore.csv', sep=',', index=False)\n", 752 | "#project.save_data('TelcoCustomerSparkMLBatchScore.csv', write_score_CSV.to_csv())\n", 753 | "\n", 754 | "write_eval_CSV=test_data.toPandas()\n", 755 | "write_eval_CSV.to_csv('/project_data/data_asset/TelcoCustomerSparkMLEval.csv', sep=',', index=False)\n", 756 | "#project.save_data('TelcoCustomerSparkMLEval.csv', write_eval_CSV.to_csv())" 757 | ] 758 | }, 759 | { 760 | "cell_type": "markdown", 761 | "metadata": {}, 762 | "source": [ 763 | "## Congratulations, you have created a model based on customer churn data, and deployed it to Watson Machine Learning!" 764 | ] 765 | } 766 | ], 767 | "metadata": { 768 | "kernelspec": { 769 | "display_name": "Python 3.6", 770 | "language": "python", 771 | "name": "python3" 772 | }, 773 | "language_info": { 774 | "codemirror_mode": { 775 | "name": "ipython", 776 | "version": 3 777 | }, 778 | "file_extension": ".py", 779 | "mimetype": "text/x-python", 780 | "name": "python", 781 | "nbconvert_exporter": "python", 782 | "pygments_lexer": "ipython3", 783 | "version": "3.6.10" 784 | } 785 | }, 786 | "nbformat": 4, 787 | "nbformat_minor": 1 788 | } 789 | --------------------------------------------------------------------------------