├── .gitignore ├── LICENSE ├── README.md ├── data └── german_credit_data.csv ├── doc └── source │ └── images │ ├── ExecuteAutoAINotebook.gif │ ├── ExportAutoAI.gif │ ├── RunAutoAI.gif │ ├── architecture.png │ ├── create_deployment_autoai.png │ ├── experiment_metadata.png │ ├── experiment_notebook.png │ ├── export-sklearn.png │ ├── feature-importance.png │ ├── format-as-code.png │ ├── input_parameters.png │ ├── modify-source-code.png │ ├── pipeline_notebook.png │ ├── pipelines-comparison.png │ ├── pretty_print-1.png │ ├── save-experiment-button.png │ ├── save-experiment-notebook.gif │ ├── score_webservice.png │ ├── visualize-1.png │ ├── visualize-2.png │ ├── wml-deployment-space.gif │ ├── wml_connection_autoai.png │ └── wml_model_deployed.png └── notebooks ├── Credit Data Analysis - P2 notebook.ipynb ├── Credit Data Analysis - experiment notebook.ipynb └── with-output ├── Credit Data Analysis - P2 notebook.ipynb └── Credit Data Analysis - experiment notebook.ipynb /.gitignore: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | # Byte-compiled / optimized / DLL files 5 | .DS_STORE 6 | __pycache__/ 7 | *.py[cod] 8 | *$py.class 9 | 10 | # C extensions 11 | *.so 12 | 13 | # Distribution / packaging 14 | .Python 15 | build/ 16 | develop-eggs/ 17 | dist/ 18 | downloads/ 19 | eggs/ 20 | .eggs/ 21 | lib/ 22 | lib64/ 23 | parts/ 24 | sdist/ 25 | var/ 26 | wheels/ 27 | share/python-wheels/ 28 | *.egg-info/ 29 | .installed.cfg 30 | *.egg 31 | MANIFEST 32 | 33 | # PyInstaller 34 | # Usually these files are written by a python script from a template 35 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 36 | *.manifest 37 | *.spec 38 | 39 | # Installer logs 40 | pip-log.txt 41 | pip-delete-this-directory.txt 42 | 43 | # Unit test / coverage reports 44 | htmlcov/ 45 | .tox/ 46 | .nox/ 47 | .coverage 48 | .coverage.* 49 | .cache 50 | nosetests.xml 51 | coverage.xml 52 | *.cover 53 | *.py,cover 54 | .hypothesis/ 55 | .pytest_cache/ 56 | cover/ 57 | 58 | # Translations 59 | *.mo 60 | *.pot 61 | 62 | # Django stuff: 63 | *.log 64 | local_settings.py 65 | db.sqlite3 66 | db.sqlite3-journal 67 | 68 | # Flask stuff: 69 | instance/ 70 | .webassets-cache 71 | 72 | # Scrapy stuff: 73 | .scrapy 74 | 75 | # Sphinx documentation 76 | docs/_build/ 77 | 78 | # PyBuilder 79 | .pybuilder/ 80 | target/ 81 | 82 | # Jupyter Notebook 83 | .ipynb_checkpoints 84 | 85 | # IPython 86 | profile_default/ 87 | ipython_config.py 88 | 89 | # pyenv 90 | # For a library or package, you might want to ignore these files since the code is 91 | # intended to run in multiple environments; otherwise, check them in: 92 | # .python-version 93 | 94 | # pipenv 95 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 96 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 97 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 98 | # install all needed dependencies. 99 | #Pipfile.lock 100 | 101 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 102 | __pypackages__/ 103 | 104 | # Celery stuff 105 | celerybeat-schedule 106 | celerybeat.pid 107 | 108 | # SageMath parsed files 109 | *.sage.py 110 | 111 | # Environments 112 | .env 113 | .venv 114 | env/ 115 | venv/ 116 | ENV/ 117 | env.bak/ 118 | venv.bak/ 119 | 120 | # Spyder project settings 121 | .spyderproject 122 | .spyproject 123 | 124 | # Rope project settings 125 | .ropeproject 126 | 127 | # mkdocs documentation 128 | /site 129 | 130 | # mypy 131 | .mypy_cache/ 132 | .dmypy.json 133 | dmypy.json 134 | 135 | # Pyre type checker 136 | .pyre/ 137 | 138 | # pytype static type analyzer 139 | .pytype/ 140 | 141 | # Cython debug symbols 142 | cython_debug/ 143 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Generate Python notebook for pipeline models using AutoAI 2 | 3 | ## Summary 4 | 5 | In this code pattern, we will learn how to automatically generate a Jupyter notebook that will contain Python code of a machine learning model using AutoAI. We will explore, modify and retrain this model pipeline using python code. Lastly, we will deploy this model in Watson Machine Learning using WML APIs. 6 | 7 | ## Description 8 | 9 | AutoAI is a graphical tool available within Watson Studio that analyzes your dataset, generates several model pipelines and ranks them based on the metric chosen for the problem. This code pattern shows extended features of AutoAI. More basic AutoAI exploration for the same dataset is covered in the [Generate machine learning model pipelines to choose the best model for your problem](https://developer.ibm.com/tutorials/generate-machine-learning-model-pipelines-to-choose-the-best-model-for-your-problem-autoai/) article. 10 | 11 | When you have completed this code pattern, you will understand how to: 12 | 13 | * Run an AutoAI experiment. 14 | * Generate and save a Python notebook. 15 | * Execute notebook and analyse results. 16 | * Make changes and retrain model using Watson Machine Learning SDKs. 17 | * Deploy model using Watson Machine Learning from within notebook . 18 | 19 | ## Architecture components 20 | 21 | ![architecture](doc/source/images/architecture.png) 22 | 23 | ## Flow 24 | 25 | 1. The user submits an AutoAI experiment using default settings. 26 | 1. Multiple pipeline models are generated. A pipeline model of choice from the leaderboard is saved as Jupyter notebook. 27 | 1. The Jupyter notebook is executed and a modified pipeline model is generated within the notebook. 28 | 1. Pipeline model is deployed in Watson Machine Learning using WML APIs. 29 | 30 | ## Included components 31 | 32 | * [IBM Watson Studio](https://cloud.ibm.com/catalog/services/watson-studio) - IBM Watson® Studio helps data scientists and analysts prepare data and build models at scale across any cloud. 33 | * [IBM Watson Machine Learning](https://cloud.ibm.com/catalog/services/machine-learning) - IBM Watson® Machine Learning helps data scientists and developers accelerate AI and machine-learning deployment. 34 | 35 | ## Featured Technologies 36 | 37 | * [Machine Learning](https://developer.ibm.com/articles/introduction-to-machine-learning/) - Science of predicting values by analysing historic data. 38 | * [Python](https://www.python.org/) - Python is an interpreted, object-oriented, high-level programming language. 39 | * [Jupyter notebook](https://jupyter.org/) - Open-source web application to help build live code. 40 | * [scikit-learn](https://scikit-learn.org/stable/) - Python based machine learning library. 41 | * [lale](https://github.com/IBM/lale) - Python library compatible with scikit-learn for semi-automated data science used in AutoAI SDK 42 | 43 | ## Prerequisites 44 | 45 | * [IBM Cloud account](https://tinyurl.com/y4mzxow5) This code pattern assumes you have an **IBM Cloud** account. Sign up for a no-charge trial account - no credit card required. 46 | 47 | > Instructions to get through the list of prerequistes are covered in [this](https://developer.ibm.com/technologies/artificial-intelligence/tutorials/generate-machine-learning-model-pipelines-to-choose-the-best-model-for-your-problem-autoai/) prequel. 48 | 49 | * Create a Cloud Object Storage service instance. 50 | * Create a Watson Studio service instance. 51 | * Create a Watson Machine Learning service instance. 52 | * Create a Watson Studio project and load data. 53 | 54 | ## Steps 55 | 56 | ### 1. Run AutoAI experiment 57 | 58 | ![Run AutoAI](doc/source/images/RunAutoAI.gif) 59 | 60 | 1. Open the project created within Watson Studio. Click `Add to project +` button on the right top and then click `AutoAI Experiment`. 61 | 1. Give the experiment a name(*Credit Risk Analysis*), associate a Watson Machine Learning service from the drop down and click `Create`. 62 | 1. On the *Add data source* screen, click `Select from project` and check *german_credit_data.csv* and click `Select asset`. 63 | 1. Under the *Configure details* section, Click on the `What do you want to predict?` drop down and select `Result` from the list. If you are using a different dataset, select the column that you want AutoAI to run predictions on. Click `Run experiment` on the right bottom. 64 | 65 | You will see a notification that indicates *AutoAI experiment has started*. Depending on the size of the data set, this step will take a few minutes to complete. 66 | 67 | ### 2. Generate experiment level notebook 68 | 69 | ![Experiment Notebook](doc/source/images/experiment_notebook.png) 70 | 71 | The `experiment notebook` provides annotated code so you can: 72 | 73 | * Interact with trained model pipelines 74 | * Access model details programmatically (including feature importance and machine learning metrics) 75 | * Visualize each pipeline as a graph, with each node documented, to provide transparency 76 | * Download selected pipelines and test locally 77 | * Create a deployment and score the model 78 | * Get the experiment configuration, which you can use for automation or integration with other applications 79 | 80 | To generate an experiment notebook, perform the following steps : 81 | 82 | ![Export AutoAI Notebook](doc/source/images/save-experiment-notebook.gif) 83 | 84 | 1. Once the AutoAI experiment completes, click on the `Save experiment code` button indicated by the floppy icon. 85 | 86 | ![Save experiment button](doc/source/images/save-experiment-button.png) 87 | 88 | 1. In the `Save experiment code` prompt, modify the default *Name* if needed and click `Save`. A pop up will show up that indicates that the notebook was saved successfully. You will now see this notebook under the *Notebooks* section within the the *Assets* tab. 89 | 90 | #### 2.0 Load and execute notebook 91 | 92 | Spend some time looking through the sections of the notebook to get an overview. 93 | A notebook is composed of text (markdown or heading) cells and code cells. The markdown cells provide comments on what the code is designed to do. 94 | 95 | You will run cells individually by highlighting each cell, then either click the `Run` button at the top of the notebook or hitting the keyboard short cut to run the cell (Shift + Enter but can vary based on platform). While the cell is running, an asterisk ([*]) will show up to the left of the cell. When that cell has finished executing a sequential number will show up (i.e. [17]). * 96 | 97 | ![Execute AutoAI Notebook](doc/source/images/ExecuteAutoAINotebook.gif) 98 | 99 | The notebook generated is pre filled with Python code and is divided into 4 main sections as follows. 100 | 101 | #### 2.1 Setup 102 | 103 | This section contains credentials to *Cloud Object Storage* through which the current AutoAI pipeline is retrieved. The cell contains code prefilled to extract the training data used to create the pipeline and the pipeline results. 104 | 105 | ![experiment metadata](doc/source/images/experiment_metadata.png) 106 | 107 | Also this section contains the metadata of the current pipeline that were used to run the experiment. 108 | 109 | ![input parameters](doc/source/images/input_parameters.png) 110 | 111 | ##### api_key 112 | 113 | To be able to access the WML instance, the user will need to generate an *api key* through the cloud account and paste it in the cell as shown in the cell below. The instructions to acquire the *cloud api key* is described in the markdown section of the screenshot shown below. 114 | 115 | ![wml connection](doc/source/images/wml_connection_autoai.png) 116 | 117 | #### 2.2 Pipelines comparison 118 | 119 | To compare all the pipelines that gets generated, call the `summary()` method on the pipeline object. The best performing model is stored under the `best_pipeline_name` variable 120 | 121 | ![pipelines comparison](doc/source/images/pipelines-comparison.png) 122 | 123 | By passing the variable name within the `get_pipeline()` method, all the feature importance generated with that particular pipeline is listed. 124 | 125 | ![feature importance](doc/source/images/feature-importance.png) 126 | 127 | #### 2.3 Inspect pipeline 128 | 129 | Within this section of the notebook, there is code to visualize the stages within the model as graph using Watson Machine Learning's AutoAI APIs. 130 | 131 | ![visualize -1](doc/source/images/visualize-1.png) 132 | 133 | This section also contains code that extracts the current model and prints it as Python code. 134 | 135 | ![pretty print -1](doc/source/images/pretty_print-1.png) 136 | 137 | #### 2.4 Deploy and score as web service using WML instance 138 | 139 | This section of the notebook contains code that deploys the pipeline model as a web service using Watson Machine Learning. This section requires users to enter credentials to be able to identify the right WML instance and deployment space. 140 | 141 | ##### target_space_id 142 | 143 | >*To create a deployment space and get the target_space_id*: 144 | ![wml deployment space](doc/source/images/wml-deployment-space.gif) 145 | > 146 | >1. Click on the hamburger menu on the top-left corner of the Watson Studio home page. 147 | >1. Click on `Deployment Spaces` from the list and select `View all spaces` 148 | >1. Click `New deployment space`, select `Create an empty space` option. 149 | >1. Provide a name, select a machine learning service that was previously created and click `Create` 150 | >1. Click `View new space` and switch to the `Settings` tab and copy the `space id` 151 | 152 | Acquire the *target_space_id* as shown in the steps above and paste within the create deployment section. The Watson Machine Learning API uses the `wml_credentials` and the `target_space_id` to deploy the machine learning model as a web service. 153 | 154 | ![create deployment](doc/source/images/create_deployment_autoai.png) 155 | 156 | Once the cells are executed, the model is promoted to the deployment space and is now available as a web service and can be verified from within the UI as shown below. 157 | 158 | ![model deployed](doc/source/images/wml_model_deployed.png) 159 | 160 | #### Score web service 161 | 162 | Scoring the web service can either be done from the UI by switching to the `test` tab shown in the screenshot above. Alternatively, the *score()* method from the WML API can be be used to submit a sample test payload. The results are returned as shown in the screenshot below. 163 | 164 | ![score web service](doc/source/images/score_webservice.png) 165 | 166 | ### 3. Generate pipeline level notebook 167 | 168 | ![Pipeline Notebook](doc/source/images/pipeline_notebook.png) 169 | 170 | The `pipeline notebook` provides annotated code that allow you to: 171 | 172 | * View the Scikit-learn pipeline definition 173 | * See the transformations applied for pipeline training 174 | * Preview hyper-parameters values found in HPO phase 175 | * Review the pipeline evaluation 176 | * Refine the pipeline definition 177 | * Re-fit and re-evaluate 178 | 179 | To generate a pipeline notebook, perform the following steps : 180 | 181 | ![Export AutoAI Notebook](doc/source/images/ExportAutoAI.gif) 182 | 183 | 1. Hover over the pipeline that you wish to save as notebook and click on the `Save as` dropdown on the right side and select `Notebook` from the drop down. 184 | 185 | 1. In the `Save as` prompt, you will notice that there are two types of assets that can be generated, namely *Model* and *Notebook*. We will select the `Notebook` option. 186 | 187 | 1. From the `Define details` section on the right, change the default *Name* if needed and click `Create`. A pop up will show up that indicates that the notebook was saved successfully. You will now see this notebook under the *Notebooks* section within the the *Assets* tab. 188 | 189 | Edit and execute each of the cells as shown in section `2.0`. The note contains more information in its markdown cells. 190 | 191 | ## Related Links 192 | 193 | * [Simplify your AI lifecycle with AutoAI](https://developer.ibm.com/series/explore-autoai/) 194 | 195 | ## License 196 | 197 | This code pattern is licensed under the Apache License, Version 2. Separate third-party code objects invoked within this code pattern are licensed by their respective providers pursuant to their own separate licenses. Contributions are subject to the [Developer Certificate of Origin, Version 1.1](https://developercertificate.org/) and the [Apache License, Version 2](https://www.apache.org/licenses/LICENSE-2.0.txt). 198 | 199 | [Apache License FAQ](https://www.apache.org/foundation/license-faq.html#WhatDoesItMEAN) 200 | -------------------------------------------------------------------------------- /doc/source/images/ExecuteAutoAINotebook.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/AutoAI-code-generation/30bdce187874c5e8f64282e41478ad2b4767f525/doc/source/images/ExecuteAutoAINotebook.gif -------------------------------------------------------------------------------- /doc/source/images/ExportAutoAI.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/AutoAI-code-generation/30bdce187874c5e8f64282e41478ad2b4767f525/doc/source/images/ExportAutoAI.gif -------------------------------------------------------------------------------- /doc/source/images/RunAutoAI.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/AutoAI-code-generation/30bdce187874c5e8f64282e41478ad2b4767f525/doc/source/images/RunAutoAI.gif -------------------------------------------------------------------------------- /doc/source/images/architecture.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/AutoAI-code-generation/30bdce187874c5e8f64282e41478ad2b4767f525/doc/source/images/architecture.png -------------------------------------------------------------------------------- /doc/source/images/create_deployment_autoai.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/AutoAI-code-generation/30bdce187874c5e8f64282e41478ad2b4767f525/doc/source/images/create_deployment_autoai.png -------------------------------------------------------------------------------- /doc/source/images/experiment_metadata.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/AutoAI-code-generation/30bdce187874c5e8f64282e41478ad2b4767f525/doc/source/images/experiment_metadata.png -------------------------------------------------------------------------------- /doc/source/images/experiment_notebook.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/AutoAI-code-generation/30bdce187874c5e8f64282e41478ad2b4767f525/doc/source/images/experiment_notebook.png -------------------------------------------------------------------------------- /doc/source/images/export-sklearn.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/AutoAI-code-generation/30bdce187874c5e8f64282e41478ad2b4767f525/doc/source/images/export-sklearn.png -------------------------------------------------------------------------------- /doc/source/images/feature-importance.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/AutoAI-code-generation/30bdce187874c5e8f64282e41478ad2b4767f525/doc/source/images/feature-importance.png -------------------------------------------------------------------------------- /doc/source/images/format-as-code.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/AutoAI-code-generation/30bdce187874c5e8f64282e41478ad2b4767f525/doc/source/images/format-as-code.png -------------------------------------------------------------------------------- /doc/source/images/input_parameters.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/AutoAI-code-generation/30bdce187874c5e8f64282e41478ad2b4767f525/doc/source/images/input_parameters.png -------------------------------------------------------------------------------- /doc/source/images/modify-source-code.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/AutoAI-code-generation/30bdce187874c5e8f64282e41478ad2b4767f525/doc/source/images/modify-source-code.png -------------------------------------------------------------------------------- /doc/source/images/pipeline_notebook.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/AutoAI-code-generation/30bdce187874c5e8f64282e41478ad2b4767f525/doc/source/images/pipeline_notebook.png -------------------------------------------------------------------------------- /doc/source/images/pipelines-comparison.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/AutoAI-code-generation/30bdce187874c5e8f64282e41478ad2b4767f525/doc/source/images/pipelines-comparison.png -------------------------------------------------------------------------------- /doc/source/images/pretty_print-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/AutoAI-code-generation/30bdce187874c5e8f64282e41478ad2b4767f525/doc/source/images/pretty_print-1.png -------------------------------------------------------------------------------- /doc/source/images/save-experiment-button.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/AutoAI-code-generation/30bdce187874c5e8f64282e41478ad2b4767f525/doc/source/images/save-experiment-button.png -------------------------------------------------------------------------------- /doc/source/images/save-experiment-notebook.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/AutoAI-code-generation/30bdce187874c5e8f64282e41478ad2b4767f525/doc/source/images/save-experiment-notebook.gif -------------------------------------------------------------------------------- /doc/source/images/score_webservice.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/AutoAI-code-generation/30bdce187874c5e8f64282e41478ad2b4767f525/doc/source/images/score_webservice.png -------------------------------------------------------------------------------- /doc/source/images/visualize-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/AutoAI-code-generation/30bdce187874c5e8f64282e41478ad2b4767f525/doc/source/images/visualize-1.png -------------------------------------------------------------------------------- /doc/source/images/visualize-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/AutoAI-code-generation/30bdce187874c5e8f64282e41478ad2b4767f525/doc/source/images/visualize-2.png -------------------------------------------------------------------------------- /doc/source/images/wml-deployment-space.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/AutoAI-code-generation/30bdce187874c5e8f64282e41478ad2b4767f525/doc/source/images/wml-deployment-space.gif -------------------------------------------------------------------------------- /doc/source/images/wml_connection_autoai.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/AutoAI-code-generation/30bdce187874c5e8f64282e41478ad2b4767f525/doc/source/images/wml_connection_autoai.png -------------------------------------------------------------------------------- /doc/source/images/wml_model_deployed.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/AutoAI-code-generation/30bdce187874c5e8f64282e41478ad2b4767f525/doc/source/images/wml_model_deployed.png -------------------------------------------------------------------------------- /notebooks/Credit Data Analysis - P2 notebook.ipynb: -------------------------------------------------------------------------------- 1 | {"cells":[{"metadata":{},"cell_type":"markdown","source":["![image](https://github.com/IBM/watson-machine-learning-samples/raw/master/cloud/notebooks/headers/AutoAI-Banner_Pipeline-Notebook.png)\n","# Pipeline 2 Notebook - AutoAI Notebook v1.15.0\n","\n","Consider these tips for working with an auto-generated notebook:\n","- Notebook code generated using AutoAI will execute successfully. If you modify the notebook, we cannot guarantee it will run successfully.\n","- This pipeline is optimized for the original data set. The pipeline might fail or produce sub-optimum results if used with different data. If you want to use a different data set, consider retraining the AutoAI experiment to generate a new pipeline. For more information, see Cloud Platform \n","- Before modifying the pipeline or trying to re-fit the pipeline, consider that the code converts dataframes to numpy arrays before fitting the pipeline (a current restriction of the preprocessor pipeline).\n"]},{"metadata":{},"cell_type":"markdown","source":["\n","## Notebook content\n","\n","This notebook contains a Scikit-learn representation of AutoAI pipeline. This notebook introduces commands for getting data, training the model, and testing the model. \n","\n","Some familiarity with Python is helpful. This notebook uses Python 3.7 and scikit-learn 0.23.2."]},{"metadata":{"pycharm":{"name":"#%% md\n"}},"cell_type":"markdown","source":["## Notebook goals\n","\n","- Scikit-learn pipeline definition\n","- Pipeline training \n","- Pipeline evaluation\n","\n","## Contents\n","\n","This notebook contains the following parts:\n","\n","**[Setup](#setup)**
\n","  [Package installation](#install)
\n","  [AutoAI experiment metadata](#variables_definition)
\n","**[Pipeline inspection](#inspection)**
\n","  [Read training data](#read)
\n","  [Train and test data split](#split)
\n","  [Make pipeline](#preview_model_to_python_code)
\n","  [Train pipeline model](#train)
\n","  [Test pipeline model](#test_model)
\n","**[Next steps](#next_steps)**
\n","**[Copyrights](#copyrights)**"]},{"metadata":{},"cell_type":"markdown","source":["\n","# Setup"]},{"metadata":{},"cell_type":"markdown","source":["\n","## Package installation\n","Before you use the sample code in this notebook, install the following packages:\n"," - ibm_watson_machine_learning,\n"," - autoai-libs,\n"," - scikit-learn,\n"," - xgboost.\n"]},{"metadata":{"execution":{"iopub.execute_input":"2020-10-12T14:00:45.009458Z","iopub.status.busy":"2020-10-12T14:00:45.007968Z","iopub.status.idle":"2020-10-12T14:00:46.037702Z","shell.execute_reply":"2020-10-12T14:00:46.038270Z"},"pycharm":{"name":"#%%\n"},"scrolled":true},"cell_type":"code","source":["!conda remove --force libxgboost -y\n","!pip install ibm-watson-machine-learning | tail -n 1\n","!pip install -U autoai-libs==1.12.7 | tail -n 1\n","!pip install -U scikit-learn==0.23.2 | tail -n 1\n","!pip install -U xgboost==1.3.3 | tail -n 1"],"execution_count":null,"outputs":[]},{"metadata":{},"cell_type":"markdown","source":["\n","## AutoAI experiment metadata\n","The following cell contains the training data connection details. \n","**Note**: The connection might contain authorization credentials, so be careful when sharing the notebook."]},{"metadata":{"execution":{"iopub.execute_input":"2020-10-12T14:00:49.797633Z","iopub.status.busy":"2020-10-12T14:00:49.796778Z","iopub.status.idle":"2020-10-12T14:00:57.182715Z","shell.execute_reply":"2020-10-12T14:00:57.183132Z"},"pycharm":{"is_executing":true}},"cell_type":"code","source":["#@hidden_cell\n","from ibm_watson_machine_learning.helpers import DataConnection\n","from ibm_watson_machine_learning.helpers import S3Connection, S3Location\n","\n","training_data_reference = [DataConnection(\n"," connection=S3Connection(\n"," api_key='',\n"," auth_endpoint='https://iam.bluemix.net/oidc/token/',\n"," endpoint_url='https://s3-api.us-geo.objectstorage.softlayer.net'\n"," ),\n"," location=S3Location(\n"," bucket='autoaitutorialnew-donotdelete-pr-zhaudczzjfda0e',\n"," path='german_credit_data.csv'\n"," )),\n","]\n","training_result_reference = DataConnection(\n"," connection=S3Connection(\n"," api_key='',\n"," auth_endpoint='https://iam.bluemix.net/oidc/token/',\n"," endpoint_url='https://s3-api.us-geo.objectstorage.softlayer.net'\n"," ),\n"," location=S3Location(\n"," bucket='autoaitutorialnew-donotdelete-pr-zhaudczzjfda0e',\n"," path='auto_ml/509ff920-3fcf-4eb9-9abc-bbc63b4a301a/wml_data/33aa26d1-15eb-482c-93ca-76c186c431a2/data/automl',\n"," model_location='auto_ml/509ff920-3fcf-4eb9-9abc-bbc63b4a301a/wml_data/33aa26d1-15eb-482c-93ca-76c186c431a2/data/automl/hpo_d_output/Pipeline1/model.pickle',\n"," training_status='auto_ml/509ff920-3fcf-4eb9-9abc-bbc63b4a301a/wml_data/33aa26d1-15eb-482c-93ca-76c186c431a2/training-status.json'\n"," ))"],"execution_count":null,"outputs":[]},{"metadata":{},"cell_type":"markdown","source":["Following cell contains input parameters provided to run the AutoAI experiment in Watson Studio."]},{"metadata":{"execution":{"iopub.execute_input":"2020-10-12T14:00:57.187305Z","iopub.status.busy":"2020-10-12T14:00:57.186602Z","iopub.status.idle":"2020-10-12T14:00:57.188392Z","shell.execute_reply":"2020-10-12T14:00:57.188878Z"},"pycharm":{"name":"#%%\n"}},"cell_type":"code","source":["experiment_metadata = dict(\n"," prediction_type='classification',\n"," prediction_column='Result',\n"," holdout_size=0.1,\n"," scoring='accuracy',\n"," csv_separator=',',\n"," random_state=33,\n"," max_number_of_estimators=2,\n"," training_data_reference=training_data_reference,\n"," training_result_reference=training_result_reference,\n"," deployment_url='https://us-south.ml.cloud.ibm.com',\n"," project_id='0e2a32c8-f2ed-4587-8479-3a22b2ea57b3',\n"," positive_label=1,\n"," drop_duplicates=True\n",")"],"execution_count":null,"outputs":[]},{"metadata":{},"cell_type":"markdown","source":["\n","# Pipeline inspection"]},{"metadata":{"pycharm":{"name":"#%% md\n"}},"cell_type":"markdown","source":["\n","## Read training data\n","\n","Retrieve training dataset from AutoAI experiment as pandas DataFrame. If reading data using Flight Service Connection results with error, please provide data as Pandas DataFrame object e.g. reading .CSV file with `pandas.read_csv()`"]},{"metadata":{"execution":{"iopub.execute_input":"2020-10-12T14:01:16.076169Z","iopub.status.busy":"2020-10-12T14:01:16.075589Z","iopub.status.idle":"2020-10-12T14:01:19.190233Z","shell.execute_reply":"2020-10-12T14:01:19.190807Z"},"pycharm":{"is_executing":true,"name":"#%%\n"}},"cell_type":"code","source":["df = training_data_reference[0].read(csv_separator=experiment_metadata['csv_separator'])\n","df.dropna('rows', how='any', subset=[experiment_metadata['prediction_column']], inplace=True)"],"execution_count":null,"outputs":[]},{"metadata":{},"cell_type":"markdown","source":["\n","## Train and test data split"]},{"metadata":{"pycharm":{"is_executing":true}},"cell_type":"code","source":["from sklearn.model_selection import train_test_split\n","\n","df.drop_duplicates(inplace=True)\n","X = df.drop([experiment_metadata['prediction_column']], axis=1).values\n","y = df[experiment_metadata['prediction_column']].values\n","\n","train_X, test_X, train_y, test_y = train_test_split(X, y, test_size=experiment_metadata['holdout_size'],\n"," stratify=y, random_state=experiment_metadata['random_state'])"],"execution_count":null,"outputs":[]},{"metadata":{"pycharm":{"name":"#%% md\n"}},"cell_type":"markdown","source":["\n","## Make pipeline\n","In the next cell, you can find the Scikit-learn definition of the selected AutoAI pipeline."]},{"metadata":{},"cell_type":"markdown","source":["Import statements."]},{"metadata":{"pycharm":{"is_executing":true,"name":"#%%\n"}},"cell_type":"code","source":["from autoai_libs.transformers.exportable import NumpyColumnSelector\n","from autoai_libs.transformers.exportable import CompressStrings\n","from autoai_libs.transformers.exportable import NumpyReplaceMissingValues\n","from autoai_libs.transformers.exportable import NumpyReplaceUnknownValues\n","from autoai_libs.transformers.exportable import boolean2float\n","from autoai_libs.transformers.exportable import CatImputer\n","from autoai_libs.transformers.exportable import CatEncoder\n","import numpy as np\n","from autoai_libs.transformers.exportable import float32_transform\n","from sklearn.pipeline import make_pipeline\n","from autoai_libs.transformers.exportable import FloatStr2Float\n","from autoai_libs.transformers.exportable import NumImputer\n","from autoai_libs.transformers.exportable import OptStandardScaler\n","from sklearn.pipeline import make_union\n","from autoai_libs.transformers.exportable import NumpyPermuteArray\n","from xgboost import XGBClassifier"],"execution_count":null,"outputs":[]},{"metadata":{},"cell_type":"markdown","source":["#### Pre-processing & Estimator."]},{"metadata":{"pycharm":{"is_executing":true,"name":"#%%\n"}},"cell_type":"code","source":["numpy_column_selector_0 = NumpyColumnSelector(\n"," columns=[\n"," 0, 1, 2, 3, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,\n"," ]\n",")\n","compress_strings = CompressStrings(\n"," compress_type=\"hash\",\n"," dtypes_list=[\n"," \"char_str\", \"int_num\", \"char_str\", \"char_str\", \"char_str\", \"char_str\",\n"," \"int_num\", \"char_str\", \"char_str\", \"int_num\", \"char_str\", \"int_num\",\n"," \"char_str\", \"char_str\", \"int_num\", \"char_str\", \"int_num\", \"char_str\",\n"," \"char_str\",\n"," ],\n"," missing_values_reference_list=[\"\", \"-\", \"?\", float(\"nan\")],\n"," misslist_list=[\n"," [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [],\n"," [], [],\n"," ],\n",")\n","numpy_replace_missing_values_0 = NumpyReplaceMissingValues(\n"," missing_values=[], filling_values=float(\"nan\")\n",")\n","numpy_replace_unknown_values = NumpyReplaceUnknownValues(\n"," filling_values=float(\"nan\"),\n"," filling_values_list=[\n"," float(\"nan\"), float(\"nan\"), float(\"nan\"), float(\"nan\"), float(\"nan\"),\n"," float(\"nan\"), float(\"nan\"), float(\"nan\"), float(\"nan\"), float(\"nan\"),\n"," float(\"nan\"), float(\"nan\"), float(\"nan\"), float(\"nan\"), float(\"nan\"),\n"," float(\"nan\"), float(\"nan\"), float(\"nan\"), float(\"nan\"),\n"," ],\n"," missing_values_reference_list=[\"\", \"-\", \"?\", float(\"nan\")],\n",")\n","cat_imputer = CatImputer(\n"," strategy=\"most_frequent\",\n"," missing_values=float(\"nan\"),\n"," sklearn_version_family=\"23\",\n",")\n","cat_encoder = CatEncoder(\n"," encoding=\"ordinal\",\n"," categories=\"auto\",\n"," dtype=np.float64,\n"," handle_unknown=\"error\",\n"," sklearn_version_family=\"23\",\n",")\n","pipeline_0 = make_pipeline(\n"," numpy_column_selector_0,\n"," compress_strings,\n"," numpy_replace_missing_values_0,\n"," numpy_replace_unknown_values,\n"," boolean2float(),\n"," cat_imputer,\n"," cat_encoder,\n"," float32_transform(),\n",")\n","numpy_column_selector_1 = NumpyColumnSelector(columns=[4])\n","float_str2_float = FloatStr2Float(\n"," dtypes_list=[\"int_num\"], missing_values_reference_list=[]\n",")\n","numpy_replace_missing_values_1 = NumpyReplaceMissingValues(\n"," missing_values=[], filling_values=float(\"nan\")\n",")\n","num_imputer = NumImputer(strategy=\"median\", missing_values=float(\"nan\"))\n","opt_standard_scaler = OptStandardScaler(\n"," num_scaler_copy=None,\n"," num_scaler_with_mean=None,\n"," num_scaler_with_std=None,\n"," use_scaler_flag=False,\n",")\n","pipeline_1 = make_pipeline(\n"," numpy_column_selector_1,\n"," float_str2_float,\n"," numpy_replace_missing_values_1,\n"," num_imputer,\n"," opt_standard_scaler,\n"," float32_transform(),\n",")\n","union = make_union(pipeline_0, pipeline_1)\n","numpy_permute_array = NumpyPermuteArray(\n"," axis=0,\n"," permutation_indices=[\n"," 0, 1, 2, 3, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 4,\n"," ],\n",")\n","xgb_classifier = XGBClassifier(\n"," base_score=0.5,\n"," booster=\"gbtree\",\n"," colsample_bylevel=1,\n"," colsample_bynode=1,\n"," colsample_bytree=1,\n"," gamma=0,\n"," gpu_id=-1,\n"," interaction_constraints=\"\",\n"," learning_rate=1.0,\n"," max_delta_step=0,\n"," max_depth=2,\n"," min_child_weight=2,\n"," missing=float(\"nan\"),\n"," monotone_constraints=\"()\",\n"," n_estimators=128,\n"," n_jobs=2,\n"," num_parallel_tree=1,\n"," random_state=33,\n"," reg_alpha=1,\n"," reg_lambda=1.0,\n"," scale_pos_weight=1,\n"," subsample=0.9992297983348898,\n"," tree_method=\"hist\",\n"," validate_parameters=1,\n"," verbosity=0,\n"," nthread=2,\n"," silent=True,\n"," seed=33,\n",")\n"],"execution_count":null,"outputs":[]},{"metadata":{},"cell_type":"markdown","source":["Pipeline."]},{"metadata":{"pycharm":{"is_executing":true,"name":"#%%\n"}},"cell_type":"code","source":["pipeline = make_pipeline(union, numpy_permute_array, xgb_classifier)"],"execution_count":null,"outputs":[]},{"metadata":{},"cell_type":"markdown","source":["\n","## Train pipeline model\n"]},{"metadata":{},"cell_type":"markdown","source":["### Define scorer from the optimization metric\n","This cell constructs the cell scorer based on the experiment metadata."]},{"metadata":{"pycharm":{"is_executing":true}},"cell_type":"code","source":["from sklearn.metrics import get_scorer\n","\n","scorer = get_scorer(experiment_metadata['scoring'])"],"execution_count":null,"outputs":[]},{"metadata":{"pycharm":{"name":"#%% md\n"}},"cell_type":"markdown","source":["\n","### Fit pipeline model\n","In this cell, the pipeline is fitted."]},{"metadata":{"execution":{"iopub.execute_input":"2020-10-12T14:01:19.291734Z","iopub.status.busy":"2020-10-12T14:01:19.244735Z","iopub.status.idle":"2020-10-12T14:01:19.338461Z","shell.execute_reply":"2020-10-12T14:01:19.338958Z"},"pycharm":{"is_executing":true,"name":"#%%\n"},"scrolled":true},"cell_type":"code","source":["pipeline.fit(train_X,train_y)"],"execution_count":null,"outputs":[]},{"metadata":{},"cell_type":"markdown","source":["\n","## Test pipeline model"]},{"metadata":{},"cell_type":"markdown","source":["Score the fitted pipeline with the generated scorer using the holdout dataset."]},{"metadata":{"execution":{"iopub.execute_input":"2020-10-12T14:02:03.910267Z","iopub.status.busy":"2020-10-12T14:02:03.909710Z","iopub.status.idle":"2020-10-12T14:02:03.914154Z","shell.execute_reply":"2020-10-12T14:02:03.914727Z"},"pycharm":{"is_executing":true,"name":"#%%\n"}},"cell_type":"code","source":["score = scorer(pipeline, test_X, test_y)\n","print(score)"],"execution_count":null,"outputs":[]},{"metadata":{},"cell_type":"markdown","source":["\n","# Next steps\n","\n","#### [Model deployment as webservice](https://github.com/IBM/watson-machine-learning-samples/tree/master/cloud/notebooks/python_sdk/deployments/autoai)\n","#### [Run AutoAI experiment with python SDK](https://github.com/IBM/watson-machine-learning-samples/tree/master/cloud/notebooks/python_sdk/experiments/autoai) "]},{"metadata":{"pycharm":{"name":"#%% md\n"}},"cell_type":"markdown","source":["\n","### Copyrights\n","\n","Licensed Materials - Copyright © 2021 IBM. This notebook and its source code are released under the terms of the ILAN License. Use, duplication disclosure restricted by GSA ADP Schedule Contract with IBM Corp.\n","\n","**Note:** The auto-generated notebooks are subject to the International License Agreement for Non-Warranted Programs (or equivalent) and License Information document for Watson Studio Auto-generated Notebook (License Terms), such agreements located in the link below. Specifically, the Source Components and Sample Materials clause included in the License Information document for Watson Studio Auto-generated Notebook applies to the auto-generated notebooks. \n","\n","By downloading, copying, accessing, or otherwise using the materials, you agree to the License Terms\n","\n","___"]}],"metadata":{"kernelspec":{"name":"python3","display_name":"Python 3.7","language":"python"},"language_info":{"name":"python","version":"3.7.10","mimetype":"text/x-python","codemirror_mode":{"name":"ipython","version":3},"pygments_lexer":"ipython3","nbconvert_exporter":"python","file_extension":".py"}},"nbformat":4,"nbformat_minor":2} -------------------------------------------------------------------------------- /notebooks/Credit Data Analysis - experiment notebook.ipynb: -------------------------------------------------------------------------------- 1 | {"cells":[{"metadata":{},"cell_type":"markdown","source":["![image](https://github.com/IBM/watson-machine-learning-samples/raw/master/cloud/notebooks/headers/AutoAI-Banner_Experiment-Notebook.png)\n","# Experiment Notebook - AutoAI Notebook v1.15.0\n","\n","\n","This notebook contains the steps and code to demonstrate support of AutoAI experiments in Watson Machine Learning service. It introduces Python SDK commands for data retrieval, training experiments, persisting pipelines, testing pipelines, refining pipelines, and scoring the resulting model.\n","\n","**Note:** Notebook code generated using AutoAI will execute successfully. If code is modified or reordered, there is no guarantee it will successfully execute. For details, see: Saving an Auto AI experiment as a notebook\n"]},{"metadata":{},"cell_type":"markdown","source":["Some familiarity with Python is helpful. This notebook uses Python 3.7 and `ibm_watson_machine_learning` package.\n","\n","\n","## Notebook goals\n","\n","The learning goals of this notebook are:\n","- Defining an AutoAI experiment\n","- Training AutoAI models \n","- Comparing trained models\n","- Deploying the model as a web service\n","- Scoring the model to generate predictions.\n","\n","\n","\n","## Contents\n","\n","This notebook contains the following parts:\n","\n","**[Setup](#setup)**
\n","  [Package installation](#install)
\n","  [Watson Machine Learning connection](#connection)
\n","**[Experiment configuration](#configuration)**
\n","  [Experiment metadata](#metadata)
\n","**[Working with completed AutoAI experiment](#work)**
\n","  [Get fitted AutoAI optimizer](#get)
\n","  [Pipelines comparison](#comparison)
\n","  [Get pipeline as scikit-learn pipeline model](#get_pipeline)
\n","  [Inspect pipeline](#inspect_pipeline)
\n","    [Visualize pipeline model](#visualize)
\n","    [Preview pipeline model as python code](#preview)
\n","**[Deploy and Score](#scoring)**
\n","  [Working with spaces](#working_spaces)
\n","**[Running AutoAI experiment with Python SDK](#run)**
\n","**[Clean up](#cleanup)**
\n","**[Next steps](#next_steps)**
\n","**[Copyrights](#copyrights)**"]},{"metadata":{},"cell_type":"markdown","source":["\n","# Setup"]},{"metadata":{},"cell_type":"markdown","source":["\n","## Package installation\n","Before you use the sample code in this notebook, install the following packages:\n"," - ibm_watson_machine_learning,\n"," - autoai-libs,\n"," - lale,\n"," - scikit-learn,\n"," - xgboost,\n"," - lightgbm.\n"]},{"metadata":{"pycharm":{"name":"#%%\n"}},"cell_type":"code","source":["!conda remove --force libxgboost -y\n","!pip install ibm-watson-machine-learning | tail -n 1\n","!pip install -U autoai-libs==1.12.7 | tail -n 1\n","!pip install -U 'lale>=0.5.1,<0.6' | tail -n 1\n","!pip install -U scikit-learn==0.23.2 | tail -n 1\n","!pip install -U xgboost==1.3.3 | tail -n 1\n","!pip install -U lightgbm==3.1.1 | tail -n 1"],"execution_count":null,"outputs":[]},{"metadata":{},"cell_type":"markdown","source":["\n","# Experiment configuration"]},{"metadata":{"pycharm":{"name":"#%% md\n"}},"cell_type":"markdown","source":["\n","## Experiment metadata\n","This cell defines the metadata for the experiment, including: training_data_reference, training_result_reference, experiment_metadata."]},{"metadata":{"pycharm":{"name":"#%%\n"}},"cell_type":"code","source":["#@hidden_cell\n","from ibm_watson_machine_learning.helpers import DataConnection\n","from ibm_watson_machine_learning.helpers import S3Connection, S3Location\n","\n","training_data_reference = [DataConnection(\n"," connection=S3Connection(\n"," api_key='',\n"," auth_endpoint='https://iam.bluemix.net/oidc/token/',\n"," endpoint_url='https://s3-api.us-geo.objectstorage.softlayer.net'\n"," ),\n"," location=S3Location(\n"," bucket='autoaitutorialnew-donotdelete-pr-zhaudczzjfda0e',\n"," path='german_credit_data.csv'\n"," )),\n","]\n","training_result_reference = DataConnection(\n"," connection=S3Connection(\n"," api_key='',\n"," auth_endpoint='https://iam.bluemix.net/oidc/token/',\n"," endpoint_url='https://s3-api.us-geo.objectstorage.softlayer.net'\n"," ),\n"," location=S3Location(\n"," bucket='autoaitutorialnew-donotdelete-pr-zhaudczzjfda0e',\n"," path='auto_ml/509ff920-3fcf-4eb9-9abc-bbc63b4a301a/wml_data/33aa26d1-15eb-482c-93ca-76c186c431a2/data/automl',\n"," model_location='auto_ml/509ff920-3fcf-4eb9-9abc-bbc63b4a301a/wml_data/33aa26d1-15eb-482c-93ca-76c186c431a2/data/automl/pre_hpo_d_output/Pipeline1/model.pickle',\n"," training_status='auto_ml/509ff920-3fcf-4eb9-9abc-bbc63b4a301a/wml_data/33aa26d1-15eb-482c-93ca-76c186c431a2/training-status.json'\n"," ))"],"execution_count":null,"outputs":[]},{"metadata":{"pycharm":{"name":"#%%\n"}},"cell_type":"code","source":["experiment_metadata = dict(\n"," prediction_type='classification',\n"," prediction_column='Result',\n"," holdout_size=0.1,\n"," scoring='accuracy',\n"," csv_separator=',',\n"," random_state=33,\n"," max_number_of_estimators=2,\n"," training_data_reference=training_data_reference,\n"," training_result_reference=training_result_reference,\n"," deployment_url='https://us-south.ml.cloud.ibm.com',\n"," project_id='0e2a32c8-f2ed-4587-8479-3a22b2ea57b3',\n"," positive_label=1,\n"," drop_duplicates=True\n",")"],"execution_count":null,"outputs":[]},{"metadata":{},"cell_type":"markdown","source":["\n","## Watson Machine Learning connection\n","\n","This cell defines the credentials required to work with the Watson Machine Learning service.\n","\n","**Action** Please provide IBM Cloud apikey following [docs](https://cloud.ibm.com/docs/account?topic=account-userapikey)."]},{"metadata":{"pycharm":{"name":"#%%\n"}},"cell_type":"code","source":["api_key = ''"],"execution_count":null,"outputs":[]},{"metadata":{"pycharm":{"name":"#%%\n"}},"cell_type":"code","source":["wml_credentials = {\n"," \"apikey\": api_key,\n"," \"url\": experiment_metadata['deployment_url']\n","}"],"execution_count":null,"outputs":[]},{"metadata":{},"cell_type":"markdown","source":["\n","\n","\n","# Working with completed AutoAI experiment\n","\n","This cell imports the pipelines generated for the experiment so they can be compared to find the optimal pipeline to save as a model."]},{"metadata":{},"cell_type":"markdown","source":["\n","\n","\n","## Get fitted AutoAI optimizer"]},{"metadata":{"pycharm":{"name":"#%%\n"}},"cell_type":"code","source":["from ibm_watson_machine_learning.experiment import AutoAI\n","\n","pipeline_optimizer = AutoAI(wml_credentials, project_id=experiment_metadata['project_id']).runs.get_optimizer(metadata=experiment_metadata)"],"execution_count":null,"outputs":[]},{"metadata":{},"cell_type":"markdown","source":["Use `get_params()`- to retrieve configuration parameters."]},{"metadata":{"pycharm":{"name":"#%%\n"}},"cell_type":"code","source":["pipeline_optimizer.get_params()"],"execution_count":null,"outputs":[]},{"metadata":{"pycharm":{"name":"#%% md\n"}},"cell_type":"markdown","source":["\n","## Pipelines comparison\n","\n","Use the `summary()` method to list trained pipelines and evaluation metrics information in\n","the form of a Pandas DataFrame. You can use the DataFrame to compare all discovered pipelines and select the one you like for further testing."]},{"metadata":{"pycharm":{"name":"#%%\n"}},"cell_type":"code","source":["summary = pipeline_optimizer.summary()\n","best_pipeline_name = list(summary.index)[0]\n","summary"],"execution_count":null,"outputs":[]},{"metadata":{},"cell_type":"markdown","source":["\n","### Get pipeline as scikit-learn pipeline model\n","\n","After you compare the pipelines, download and save a scikit-learn pipeline model object from the\n","AutoAI training job.\n","\n","**Tip:** If you want to get a specific pipeline you need to pass the pipeline name in:\n","```\n","pipeline_optimizer.get_pipeline(pipeline_name=pipeline_name)\n","```"]},{"metadata":{"pycharm":{"name":"#%%\n"}},"cell_type":"code","source":["pipeline_model = pipeline_optimizer.get_pipeline(pipeline_name=best_pipeline_name)"],"execution_count":null,"outputs":[]},{"metadata":{},"cell_type":"markdown","source":["Next, check features importance for selected pipeline."]},{"metadata":{"pycharm":{"name":"#%%\n"}},"cell_type":"code","source":["pipeline_optimizer.get_pipeline_details()['features_importance']"],"execution_count":null,"outputs":[]},{"metadata":{},"cell_type":"markdown","source":["**Tip:** If you want to check all model evaluation metrics-details, use:\n","```\n","pipeline_optimizer.get_pipeline_details()\n","```"]},{"metadata":{},"cell_type":"markdown","source":["\n","## Inspect pipeline"]},{"metadata":{},"cell_type":"markdown","source":["\n","### Visualize pipeline model\n","\n","Preview pipeline model stages as a graph. Each node's name links to a detailed description of the stage.\n"]},{"metadata":{"pycharm":{"name":"#%%\n"}},"cell_type":"code","source":["pipeline_model.visualize()"],"execution_count":null,"outputs":[]},{"metadata":{},"cell_type":"markdown","source":["\n","### Preview pipeline model as python code\n","In the next cell, you can preview the saved pipeline model as a python code. \n","You will be able to review the exact steps used to create the model.\n","\n","**Note:** If you want to get sklearn representation add following parameter to `pretty_print` call: `astype='sklearn'`."]},{"metadata":{"pycharm":{"name":"#%%\n"}},"cell_type":"code","source":["pipeline_model.pretty_print(combinators=False, ipython_display=True)"],"execution_count":null,"outputs":[]},{"metadata":{},"cell_type":"markdown","source":["\n","## Deploy and Score\n","\n","In this section you will learn how to deploy and score the model as a web service."]},{"metadata":{},"cell_type":"markdown","source":["\n","### Working with spaces\n","\n","In this section you will specify a deployment space for organizing the assets for deploying and scoring the model. If you do not have an existing space, you can use [Deployment Spaces Dashboard](https://dataplatform.cloud.ibm.com/ml-runtime/spaces?context=cpdaas) to create a new space, following these steps:\n","\n","- Click **New Deployment Space**.\n","- Create an empty space.\n","- Select Cloud Object Storage.\n","- Select Watson Machine Learning instance and press **Create**.\n","- Copy `space_id` and paste it below.\n","\n","**Tip**: You can also use the SDK to prepare the space for your work. Learn more [here](https://github.com/IBM/watson-machine-learning-samples/blob/master/notebooks/python_sdk/instance-management/Space%20management.ipynb).\n","\n","**Action**: assign or update space ID below"]},{"metadata":{},"cell_type":"markdown","source":["### Deployment creation"]},{"metadata":{"pycharm":{"name":"#%%\n"}},"cell_type":"code","source":["target_space_id = \"\"\n","\n","from ibm_watson_machine_learning.deployment import WebService\n","service = WebService(source_wml_credentials=wml_credentials,\n"," target_wml_credentials=wml_credentials,\n"," source_project_id=experiment_metadata['project_id'],\n"," target_space_id=target_space_id)\n","service.create(\n","model=best_pipeline_name,\n","metadata=experiment_metadata,\n","deployment_name='Best_pipeline_webservice'\n",")"],"execution_count":null,"outputs":[]},{"metadata":{},"cell_type":"markdown","source":["Use the `print` method for the deployment object to show basic information about the service: "]},{"metadata":{"pycharm":{"name":"#%%\n"}},"cell_type":"code","source":["print(service)"],"execution_count":null,"outputs":[]},{"metadata":{},"cell_type":"markdown","source":["To show all available information about the deployment use the `.get_params()` method:"]},{"metadata":{"pycharm":{"name":"#%%\n"}},"cell_type":"code","source":["service.get_params()"],"execution_count":null,"outputs":[]},{"metadata":{},"cell_type":"markdown","source":["### Scoring of webservice\n","You can make scoring request by calling `score()` on the deployed pipeline."]},{"metadata":{},"cell_type":"markdown","source":["If you want to work with the web service in an external Python application,follow these steps to retrieve the service object:\n","\n"," - Initialize the service by `service = WebService(wml_credentials)`\n"," - Get deployment_id by `service.list()` method\n"," - Get webservice object by `service.get('deployment_id')` method\n","\n","After that you can call `service.score()` method."]},{"metadata":{},"cell_type":"markdown","source":["### Deleting deployment\n","\n","You can delete the existing deployment by calling the `service.delete()` command.\n","To list the existing web services, use `service.list()`."]},{"metadata":{},"cell_type":"markdown","source":["\n","\n","## Running AutoAI experiment with Python SDK"]},{"metadata":{},"cell_type":"markdown","source":["If you want to run AutoAI experiment using python API follow up the steps decribed below. The experiment settings were generated basing on parameters set on UI.\n"," - Go to your COS dashboard.\n"," - In Service credentials tab, click New Credential.\n"," - Add the inline configuration parameter: `{“HMAC”:true}`, click Add.\n","This configuration parameter adds the following section to the instance credentials, (for use later in this notebook):\n","```\n","cos_hmac_keys”: {\n"," “access_key_id”: “***“,\n"," “secret_access_key”: “***”\n"," }\n"," ```\n","\n","**Action:** Please provide cos credentials in following cells.\n","\n","- Use provided markdown cells to run code.\n","\n"]},{"metadata":{"pycharm":{"name":"#%%\n"}},"cell_type":"markdown","source":["```\n","from ibm_watson_machine_learning.experiment import AutoAI\n","\n","experiment = AutoAI(wml_credentials, project_id=experiment_metadata['project_id'])\n","```"]},{"metadata":{},"cell_type":"markdown","source":["```\n","#@hidden_cell\n","cos_hmac_keys = {\n"," \"access_key_id\": \"PLACE_YOUR_ACCESS_KEY_ID_HERE\",\n"," \"secret_access_key\": \"PLACE_YOUR_SECRET_ACCESS_KEY_HERE\"\n"," }\n"," \n","cos_api_key = \"PLACE_YOUR_API_KEY_HERE\"\n","OPTIMIZER_NAME = 'custom_name'\n","```"]},{"metadata":{"pycharm":{"name":"#%% md\n"}},"cell_type":"markdown","source":["The experiment settings were generated basing on parameters set on UI."]},{"metadata":{"pycharm":{"name":"#%% raw\n"}},"cell_type":"markdown","source":["```\n","from ibm_watson_machine_learning.helpers import DataConnection\n","from ibm_watson_machine_learning.helpers import S3Connection, S3Location\n","\n","training_data_reference = [DataConnection(\n"," connection=S3Connection(\n"," api_key=cos_api_key,\n"," auth_endpoint='https://iam.bluemix.net/oidc/token/',\n"," endpoint_url='https://s3-api.us-geo.objectstorage.softlayer.net',\n"," access_key_id = cos_hmac_keys['access_key_id'],\n"," secret_access_key = cos_hmac_keys['secret_access_key']\n"," ),\n"," location=S3Location(\n"," bucket='autoaitutorialnew-donotdelete-pr-zhaudczzjfda0e',\n"," path='german_credit_data.csv'\n"," )),\n","]\n","from ibm_watson_machine_learning.helpers import S3Connection, S3Location\n","training_result_reference = DataConnection(\n"," connection=S3Connection(\n"," api_key=cos_api_key,\n"," auth_endpoint='https://iam.bluemix.net/oidc/token/',\n"," endpoint_url='https://s3-api.us-geo.objectstorage.softlayer.net',\n"," access_key_id = cos_hmac_keys['access_key_id'],\n"," secret_access_key = cos_hmac_keys['secret_access_key']\n"," ),\n"," location=S3Location(\n"," bucket='autoaitutorialnew-donotdelete-pr-zhaudczzjfda0e',\n"," path='auto_ml/509ff920-3fcf-4eb9-9abc-bbc63b4a301a/wml_data/33aa26d1-15eb-482c-93ca-76c186c431a2/data/automl',\n"," model_location='auto_ml/509ff920-3fcf-4eb9-9abc-bbc63b4a301a/wml_data/33aa26d1-15eb-482c-93ca-76c186c431a2/data/automl/pre_hpo_d_output/Pipeline1/model.pickle',\n"," training_status='auto_ml/509ff920-3fcf-4eb9-9abc-bbc63b4a301a/wml_data/33aa26d1-15eb-482c-93ca-76c186c431a2/training-status.json'\n"," ))\n","```"]},{"metadata":{"pycharm":{"name":"#%%raw\n"}},"cell_type":"markdown","source":["```\n","pipeline_optimizer = experiment.optimizer(\n"," name=OPTIMIZER_NAME,\n"," prediction_type=experiment_metadata['prediction_type'],\n"," prediction_column=experiment_metadata['prediction_column'],\n"," scoring=experiment_metadata['scoring'],\n"," holdout_size=experiment_metadata['holdout_size'],\n"," csv_separator=experiment_metadata['csv_separator'],\n"," positive_label=experiment_metadata['positive_label'],\n"," drop_duplicates=experiment_metadata['drop_duplicates'])\n","```"]},{"metadata":{},"cell_type":"markdown","source":["```\n","pipeline_optimizer.fit(training_data_reference=training_data_reference,\n"," training_results_reference=training_result_reference,\n"," background_mode=False)\n","```"]},{"metadata":{},"cell_type":"markdown","source":["\n","\n","# Next steps\n","\n","#### [Online Documentation](https://www.ibm.com/cloud/watson-studio/autoai)"]},{"metadata":{},"cell_type":"markdown","source":["\n","### Copyrights\n","\n","Licensed Materials - Copyright © 2021 IBM. This notebook and its source code are released under the terms of the ILAN License.\n","Use, duplication disclosure restricted by GSA ADP Schedule Contract with IBM Corp.\n","\n","**Note:** The auto-generated notebooks are subject to the International License Agreement for Non-Warranted Programs (or equivalent) and License Information document for Watson Studio Auto-generated Notebook (License Terms), such agreements located in the link below. Specifically, the Source Components and Sample Materials clause included in the License Information document for Watson Studio Auto-generated Notebook applies to the auto-generated notebooks. \n","\n","By downloading, copying, accessing, or otherwise using the materials, you agree to the License Terms \n","\n","___"]}],"metadata":{"kernelspec":{"name":"python3","display_name":"Python 3.7","language":"python"},"language_info":{"name":"python","version":"3.7.10","mimetype":"text/x-python","codemirror_mode":{"name":"ipython","version":3},"pygments_lexer":"ipython3","nbconvert_exporter":"python","file_extension":".py"},"pycharm":{"stem_cell":{"cell_type":"raw","metadata":{"collapsed":false},"source":["\n"]}}},"nbformat":4,"nbformat_minor":1} -------------------------------------------------------------------------------- /notebooks/with-output/Credit Data Analysis - P2 notebook.ipynb: -------------------------------------------------------------------------------- 1 | {"cells":[{"metadata":{},"cell_type":"markdown","source":["![image](https://github.com/IBM/watson-machine-learning-samples/raw/master/cloud/notebooks/headers/AutoAI-Banner_Pipeline-Notebook.png)\n","# Pipeline 2 Notebook - AutoAI Notebook v1.15.0\n","\n","Consider these tips for working with an auto-generated notebook:\n","- Notebook code generated using AutoAI will execute successfully. If you modify the notebook, we cannot guarantee it will run successfully.\n","- This pipeline is optimized for the original data set. The pipeline might fail or produce sub-optimum results if used with different data. If you want to use a different data set, consider retraining the AutoAI experiment to generate a new pipeline. For more information, see Cloud Platform \n","- Before modifying the pipeline or trying to re-fit the pipeline, consider that the code converts dataframes to numpy arrays before fitting the pipeline (a current restriction of the preprocessor pipeline).\n"]},{"metadata":{},"cell_type":"markdown","source":["\n","## Notebook content\n","\n","This notebook contains a Scikit-learn representation of AutoAI pipeline. This notebook introduces commands for getting data, training the model, and testing the model. \n","\n","Some familiarity with Python is helpful. This notebook uses Python 3.7 and scikit-learn 0.23.2."]},{"metadata":{"pycharm":{"name":"#%% md\n"}},"cell_type":"markdown","source":["## Notebook goals\n","\n","- Scikit-learn pipeline definition\n","- Pipeline training \n","- Pipeline evaluation\n","\n","## Contents\n","\n","This notebook contains the following parts:\n","\n","**[Setup](#setup)**
\n","  [Package installation](#install)
\n","  [AutoAI experiment metadata](#variables_definition)
\n","**[Pipeline inspection](#inspection)**
\n","  [Read training data](#read)
\n","  [Train and test data split](#split)
\n","  [Make pipeline](#preview_model_to_python_code)
\n","  [Train pipeline model](#train)
\n","  [Test pipeline model](#test_model)
\n","**[Next steps](#next_steps)**
\n","**[Copyrights](#copyrights)**"]},{"metadata":{},"cell_type":"markdown","source":["\n","# Setup"]},{"metadata":{},"cell_type":"markdown","source":["\n","## Package installation\n","Before you use the sample code in this notebook, install the following packages:\n"," - ibm_watson_machine_learning,\n"," - autoai-libs,\n"," - scikit-learn,\n"," - xgboost.\n"]},{"metadata":{"execution":{"iopub.execute_input":"2020-10-12T14:00:45.009458Z","iopub.status.busy":"2020-10-12T14:00:45.007968Z","iopub.status.idle":"2020-10-12T14:00:46.037702Z","shell.execute_reply":"2020-10-12T14:00:46.038270Z"},"pycharm":{"name":"#%%\n"},"scrolled":true},"cell_type":"code","source":["!conda remove --force libxgboost -y\n","!pip install ibm-watson-machine-learning | tail -n 1\n","!pip install -U autoai-libs==1.12.7 | tail -n 1\n","!pip install -U scikit-learn==0.23.2 | tail -n 1\n","!pip install -U xgboost==1.3.3 | tail -n 1"],"execution_count":1,"outputs":[{"output_type":"stream","text":"\n## Package Plan ##\n\n environment location: /opt/conda/envs/Python-3.7-main\n\n removed specs:\n - libxgboost\n\n\nThe following packages will be REMOVED:\n\n libxgboost-0.90-he6710b0_1\n\n\nPreparing transaction: done\nVerifying transaction: done\nExecuting transaction: done\n/opt/conda/envs/Python-3.7-main/lib/python3.7/site-packages/secretstorage/dhcrypto.py:16: CryptographyDeprecationWarning: int_from_bytes is deprecated, use int.from_bytes instead\n from cryptography.utils import int_from_bytes\n/opt/conda/envs/Python-3.7-main/lib/python3.7/site-packages/secretstorage/util.py:25: CryptographyDeprecationWarning: int_from_bytes is deprecated, use int.from_bytes instead\n from cryptography.utils import int_from_bytes\nRequirement already satisfied: docutils<0.16,>=0.10 in /opt/conda/envs/Python-3.7-main/lib/python3.7/site-packages (from ibm-cos-sdk-core==2.7.0->ibm-cos-sdk==2.7.*->ibm-watson-machine-learning) (0.15.2)\n/opt/conda/envs/Python-3.7-main/lib/python3.7/site-packages/secretstorage/dhcrypto.py:16: CryptographyDeprecationWarning: int_from_bytes is deprecated, use int.from_bytes instead\n from cryptography.utils import int_from_bytes\n/opt/conda/envs/Python-3.7-main/lib/python3.7/site-packages/secretstorage/util.py:25: CryptographyDeprecationWarning: int_from_bytes is deprecated, use int.from_bytes instead\n from cryptography.utils import int_from_bytes\n\u001b[31mERROR: tensorflow 2.1.0 has requirement scipy==1.4.1; python_version >= \"3\", but you'll have scipy 1.5.0 which is incompatible.\u001b[0m\nSuccessfully installed autoai-libs-1.12.7 gensim-3.8.3 numpy-1.19.2 smart-open-5.1.0\n/opt/conda/envs/Python-3.7-main/lib/python3.7/site-packages/secretstorage/dhcrypto.py:16: CryptographyDeprecationWarning: int_from_bytes is deprecated, use int.from_bytes instead\n from cryptography.utils import int_from_bytes\n/opt/conda/envs/Python-3.7-main/lib/python3.7/site-packages/secretstorage/util.py:25: CryptographyDeprecationWarning: int_from_bytes is deprecated, use int.from_bytes instead\n from cryptography.utils import int_from_bytes\nSuccessfully installed scikit-learn-0.23.2\n/opt/conda/envs/Python-3.7-main/lib/python3.7/site-packages/secretstorage/dhcrypto.py:16: CryptographyDeprecationWarning: int_from_bytes is deprecated, use int.from_bytes instead\n from cryptography.utils import int_from_bytes\n/opt/conda/envs/Python-3.7-main/lib/python3.7/site-packages/secretstorage/util.py:25: CryptographyDeprecationWarning: int_from_bytes is deprecated, use int.from_bytes instead\n from cryptography.utils import int_from_bytes\nSuccessfully installed xgboost-1.3.3\n","name":"stdout"}]},{"metadata":{},"cell_type":"markdown","source":["\n","## AutoAI experiment metadata\n","The following cell contains the training data connection details. \n","**Note**: The connection might contain authorization credentials, so be careful when sharing the notebook."]},{"metadata":{"execution":{"iopub.execute_input":"2020-10-12T14:00:49.797633Z","iopub.status.busy":"2020-10-12T14:00:49.796778Z","iopub.status.idle":"2020-10-12T14:00:57.182715Z","shell.execute_reply":"2020-10-12T14:00:57.183132Z"},"pycharm":{"is_executing":true}},"cell_type":"code","source":["#@hidden_cell\n","from ibm_watson_machine_learning.helpers import DataConnection\n","from ibm_watson_machine_learning.helpers import S3Connection, S3Location\n","\n","training_data_reference = [DataConnection(\n"," connection=S3Connection(\n"," api_key='',\n"," auth_endpoint='https://iam.bluemix.net/oidc/token/',\n"," endpoint_url='https://s3-api.us-geo.objectstorage.softlayer.net'\n"," ),\n"," location=S3Location(\n"," bucket='autoaitutorialnew-donotdelete-pr-zhaudczzjfda0e',\n"," path='german_credit_data.csv'\n"," )),\n","]\n","training_result_reference = DataConnection(\n"," connection=S3Connection(\n"," api_key='',\n"," auth_endpoint='https://iam.bluemix.net/oidc/token/',\n"," endpoint_url='https://s3-api.us-geo.objectstorage.softlayer.net'\n"," ),\n"," location=S3Location(\n"," bucket='autoaitutorialnew-donotdelete-pr-zhaudczzjfda0e',\n"," path='auto_ml/509ff920-3fcf-4eb9-9abc-bbc63b4a301a/wml_data/33aa26d1-15eb-482c-93ca-76c186c431a2/data/automl',\n"," model_location='auto_ml/509ff920-3fcf-4eb9-9abc-bbc63b4a301a/wml_data/33aa26d1-15eb-482c-93ca-76c186c431a2/data/automl/hpo_d_output/Pipeline1/model.pickle',\n"," training_status='auto_ml/509ff920-3fcf-4eb9-9abc-bbc63b4a301a/wml_data/33aa26d1-15eb-482c-93ca-76c186c431a2/training-status.json'\n"," ))"],"execution_count":2,"outputs":[]},{"metadata":{},"cell_type":"markdown","source":["Following cell contains input parameters provided to run the AutoAI experiment in Watson Studio."]},{"metadata":{"execution":{"iopub.execute_input":"2020-10-12T14:00:57.187305Z","iopub.status.busy":"2020-10-12T14:00:57.186602Z","iopub.status.idle":"2020-10-12T14:00:57.188392Z","shell.execute_reply":"2020-10-12T14:00:57.188878Z"},"pycharm":{"name":"#%%\n"}},"cell_type":"code","source":["experiment_metadata = dict(\n"," prediction_type='classification',\n"," prediction_column='Result',\n"," holdout_size=0.1,\n"," scoring='accuracy',\n"," csv_separator=',',\n"," random_state=33,\n"," max_number_of_estimators=2,\n"," training_data_reference=training_data_reference,\n"," training_result_reference=training_result_reference,\n"," deployment_url='https://us-south.ml.cloud.ibm.com',\n"," project_id='0e2a32c8-f2ed-4587-8479-3a22b2ea57b3',\n"," positive_label=1,\n"," drop_duplicates=True\n",")"],"execution_count":3,"outputs":[]},{"metadata":{},"cell_type":"markdown","source":["\n","# Pipeline inspection"]},{"metadata":{"pycharm":{"name":"#%% md\n"}},"cell_type":"markdown","source":["\n","## Read training data\n","\n","Retrieve training dataset from AutoAI experiment as pandas DataFrame. If reading data using Flight Service Connection results with error, please provide data as Pandas DataFrame object e.g. reading .CSV file with `pandas.read_csv()`"]},{"metadata":{"execution":{"iopub.execute_input":"2020-10-12T14:01:16.076169Z","iopub.status.busy":"2020-10-12T14:01:16.075589Z","iopub.status.idle":"2020-10-12T14:01:19.190233Z","shell.execute_reply":"2020-10-12T14:01:19.190807Z"},"pycharm":{"is_executing":true,"name":"#%%\n"}},"cell_type":"code","source":["df = training_data_reference[0].read(csv_separator=experiment_metadata['csv_separator'])\n","df.dropna('rows', how='any', subset=[experiment_metadata['prediction_column']], inplace=True)"],"execution_count":4,"outputs":[]},{"metadata":{},"cell_type":"markdown","source":["\n","## Train and test data split"]},{"metadata":{"pycharm":{"is_executing":true}},"cell_type":"code","source":["from sklearn.model_selection import train_test_split\n","\n","df.drop_duplicates(inplace=True)\n","X = df.drop([experiment_metadata['prediction_column']], axis=1).values\n","y = df[experiment_metadata['prediction_column']].values\n","\n","train_X, test_X, train_y, test_y = train_test_split(X, y, test_size=experiment_metadata['holdout_size'],\n"," stratify=y, random_state=experiment_metadata['random_state'])"],"execution_count":5,"outputs":[]},{"metadata":{"pycharm":{"name":"#%% md\n"}},"cell_type":"markdown","source":["\n","## Make pipeline\n","In the next cell, you can find the Scikit-learn definition of the selected AutoAI pipeline."]},{"metadata":{},"cell_type":"markdown","source":["Import statements."]},{"metadata":{"pycharm":{"is_executing":true,"name":"#%%\n"}},"cell_type":"code","source":["from autoai_libs.transformers.exportable import NumpyColumnSelector\n","from autoai_libs.transformers.exportable import CompressStrings\n","from autoai_libs.transformers.exportable import NumpyReplaceMissingValues\n","from autoai_libs.transformers.exportable import NumpyReplaceUnknownValues\n","from autoai_libs.transformers.exportable import boolean2float\n","from autoai_libs.transformers.exportable import CatImputer\n","from autoai_libs.transformers.exportable import CatEncoder\n","import numpy as np\n","from autoai_libs.transformers.exportable import float32_transform\n","from sklearn.pipeline import make_pipeline\n","from autoai_libs.transformers.exportable import FloatStr2Float\n","from autoai_libs.transformers.exportable import NumImputer\n","from autoai_libs.transformers.exportable import OptStandardScaler\n","from sklearn.pipeline import make_union\n","from autoai_libs.transformers.exportable import NumpyPermuteArray\n","from xgboost import XGBClassifier"],"execution_count":6,"outputs":[]},{"metadata":{},"cell_type":"markdown","source":["#### Pre-processing & Estimator."]},{"metadata":{"pycharm":{"is_executing":true,"name":"#%%\n"}},"cell_type":"code","source":["numpy_column_selector_0 = NumpyColumnSelector(\n"," columns=[\n"," 0, 1, 2, 3, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,\n"," ]\n",")\n","compress_strings = CompressStrings(\n"," compress_type=\"hash\",\n"," dtypes_list=[\n"," \"char_str\", \"int_num\", \"char_str\", \"char_str\", \"char_str\", \"char_str\",\n"," \"int_num\", \"char_str\", \"char_str\", \"int_num\", \"char_str\", \"int_num\",\n"," \"char_str\", \"char_str\", \"int_num\", \"char_str\", \"int_num\", \"char_str\",\n"," \"char_str\",\n"," ],\n"," missing_values_reference_list=[\"\", \"-\", \"?\", float(\"nan\")],\n"," misslist_list=[\n"," [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [],\n"," [], [],\n"," ],\n",")\n","numpy_replace_missing_values_0 = NumpyReplaceMissingValues(\n"," missing_values=[], filling_values=float(\"nan\")\n",")\n","numpy_replace_unknown_values = NumpyReplaceUnknownValues(\n"," filling_values=float(\"nan\"),\n"," filling_values_list=[\n"," float(\"nan\"), float(\"nan\"), float(\"nan\"), float(\"nan\"), float(\"nan\"),\n"," float(\"nan\"), float(\"nan\"), float(\"nan\"), float(\"nan\"), float(\"nan\"),\n"," float(\"nan\"), float(\"nan\"), float(\"nan\"), float(\"nan\"), float(\"nan\"),\n"," float(\"nan\"), float(\"nan\"), float(\"nan\"), float(\"nan\"),\n"," ],\n"," missing_values_reference_list=[\"\", \"-\", \"?\", float(\"nan\")],\n",")\n","cat_imputer = CatImputer(\n"," strategy=\"most_frequent\",\n"," missing_values=float(\"nan\"),\n"," sklearn_version_family=\"23\",\n",")\n","cat_encoder = CatEncoder(\n"," encoding=\"ordinal\",\n"," categories=\"auto\",\n"," dtype=np.float64,\n"," handle_unknown=\"error\",\n"," sklearn_version_family=\"23\",\n",")\n","pipeline_0 = make_pipeline(\n"," numpy_column_selector_0,\n"," compress_strings,\n"," numpy_replace_missing_values_0,\n"," numpy_replace_unknown_values,\n"," boolean2float(),\n"," cat_imputer,\n"," cat_encoder,\n"," float32_transform(),\n",")\n","numpy_column_selector_1 = NumpyColumnSelector(columns=[4])\n","float_str2_float = FloatStr2Float(\n"," dtypes_list=[\"int_num\"], missing_values_reference_list=[]\n",")\n","numpy_replace_missing_values_1 = NumpyReplaceMissingValues(\n"," missing_values=[], filling_values=float(\"nan\")\n",")\n","num_imputer = NumImputer(strategy=\"median\", missing_values=float(\"nan\"))\n","opt_standard_scaler = OptStandardScaler(\n"," num_scaler_copy=None,\n"," num_scaler_with_mean=None,\n"," num_scaler_with_std=None,\n"," use_scaler_flag=False,\n",")\n","pipeline_1 = make_pipeline(\n"," numpy_column_selector_1,\n"," float_str2_float,\n"," numpy_replace_missing_values_1,\n"," num_imputer,\n"," opt_standard_scaler,\n"," float32_transform(),\n",")\n","union = make_union(pipeline_0, pipeline_1)\n","numpy_permute_array = NumpyPermuteArray(\n"," axis=0,\n"," permutation_indices=[\n"," 0, 1, 2, 3, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 4,\n"," ],\n",")\n","xgb_classifier = XGBClassifier(\n"," base_score=0.5,\n"," booster=\"gbtree\",\n"," colsample_bylevel=1,\n"," colsample_bynode=1,\n"," colsample_bytree=1,\n"," gamma=0,\n"," gpu_id=-1,\n"," interaction_constraints=\"\",\n"," learning_rate=1.0,\n"," max_delta_step=0,\n"," max_depth=2,\n"," min_child_weight=2,\n"," missing=float(\"nan\"),\n"," monotone_constraints=\"()\",\n"," n_estimators=128,\n"," n_jobs=2,\n"," num_parallel_tree=1,\n"," random_state=33,\n"," reg_alpha=1,\n"," reg_lambda=1.0,\n"," scale_pos_weight=1,\n"," subsample=0.9992297983348898,\n"," tree_method=\"hist\",\n"," validate_parameters=1,\n"," verbosity=0,\n"," nthread=2,\n"," silent=True,\n"," seed=33,\n",")\n"],"execution_count":7,"outputs":[]},{"metadata":{},"cell_type":"markdown","source":["Pipeline."]},{"metadata":{"pycharm":{"is_executing":true,"name":"#%%\n"}},"cell_type":"code","source":["pipeline = make_pipeline(union, numpy_permute_array, xgb_classifier)"],"execution_count":8,"outputs":[]},{"metadata":{},"cell_type":"markdown","source":["\n","## Train pipeline model\n"]},{"metadata":{},"cell_type":"markdown","source":["### Define scorer from the optimization metric\n","This cell constructs the cell scorer based on the experiment metadata."]},{"metadata":{"pycharm":{"is_executing":true}},"cell_type":"code","source":["from sklearn.metrics import get_scorer\n","\n","scorer = get_scorer(experiment_metadata['scoring'])"],"execution_count":9,"outputs":[]},{"metadata":{"pycharm":{"name":"#%% md\n"}},"cell_type":"markdown","source":["\n","### Fit pipeline model\n","In this cell, the pipeline is fitted."]},{"metadata":{"execution":{"iopub.execute_input":"2020-10-12T14:01:19.291734Z","iopub.status.busy":"2020-10-12T14:01:19.244735Z","iopub.status.idle":"2020-10-12T14:01:19.338461Z","shell.execute_reply":"2020-10-12T14:01:19.338958Z"},"pycharm":{"is_executing":true,"name":"#%%\n"},"scrolled":true},"cell_type":"code","source":["pipeline.fit(train_X,train_y)"],"execution_count":10,"outputs":[{"output_type":"execute_result","execution_count":10,"data":{"text/plain":"Pipeline(steps=[('featureunion',\n FeatureUnion(transformer_list=[('pipeline-1',\n Pipeline(steps=[('numpycolumnselector',\n NumpyColumnSelector(columns=[0,\n 1,\n 2,\n 3,\n 5,\n 6,\n 7,\n 8,\n 9,\n 10,\n 11,\n 12,\n 13,\n 14,\n 15,\n 16,\n 17,\n 18,\n 19])),\n ('compressstrings',\n CompressStrings(compress_type='hash',\n dtypes_list=['char_str',\n 'int_num',\n 'char_str',\n 'char_str',\n 'char_str',\n 'char_str',\n 'int_num',\n 'char_str'...\n interaction_constraints='', learning_rate=1.0,\n max_delta_step=0, max_depth=2,\n min_child_weight=2, missing=nan,\n monotone_constraints='()', n_estimators=128,\n n_jobs=2, nthread=2, num_parallel_tree=1,\n random_state=33, reg_alpha=1, reg_lambda=1.0,\n scale_pos_weight=1, seed=33, silent=True,\n subsample=0.9992297983348898, tree_method='hist',\n validate_parameters=1, verbosity=0))])"},"metadata":{}}]},{"metadata":{},"cell_type":"markdown","source":["\n","## Test pipeline model"]},{"metadata":{},"cell_type":"markdown","source":["Score the fitted pipeline with the generated scorer using the holdout dataset."]},{"metadata":{"execution":{"iopub.execute_input":"2020-10-12T14:02:03.910267Z","iopub.status.busy":"2020-10-12T14:02:03.909710Z","iopub.status.idle":"2020-10-12T14:02:03.914154Z","shell.execute_reply":"2020-10-12T14:02:03.914727Z"},"pycharm":{"is_executing":true,"name":"#%%\n"}},"cell_type":"code","source":["score = scorer(pipeline, test_X, test_y)\n","print(score)"],"execution_count":11,"outputs":[{"output_type":"stream","text":"0.68\n","name":"stdout"}]},{"metadata":{},"cell_type":"markdown","source":["\n","# Next steps\n","\n","#### [Model deployment as webservice](https://github.com/IBM/watson-machine-learning-samples/tree/master/cloud/notebooks/python_sdk/deployments/autoai)\n","#### [Run AutoAI experiment with python SDK](https://github.com/IBM/watson-machine-learning-samples/tree/master/cloud/notebooks/python_sdk/experiments/autoai) "]},{"metadata":{"pycharm":{"name":"#%% md\n"}},"cell_type":"markdown","source":["\n","### Copyrights\n","\n","Licensed Materials - Copyright © 2021 IBM. This notebook and its source code are released under the terms of the ILAN License. Use, duplication disclosure restricted by GSA ADP Schedule Contract with IBM Corp.\n","\n","**Note:** The auto-generated notebooks are subject to the International License Agreement for Non-Warranted Programs (or equivalent) and License Information document for Watson Studio Auto-generated Notebook (License Terms), such agreements located in the link below. Specifically, the Source Components and Sample Materials clause included in the License Information document for Watson Studio Auto-generated Notebook applies to the auto-generated notebooks. \n","\n","By downloading, copying, accessing, or otherwise using the materials, you agree to the License Terms\n","\n","___"]}],"metadata":{"kernelspec":{"name":"python3","display_name":"Python 3.7","language":"python"},"language_info":{"name":"python","version":"3.7.10","mimetype":"text/x-python","codemirror_mode":{"name":"ipython","version":3},"pygments_lexer":"ipython3","nbconvert_exporter":"python","file_extension":".py"}},"nbformat":4,"nbformat_minor":2} -------------------------------------------------------------------------------- /notebooks/with-output/Credit Data Analysis - experiment notebook.ipynb: -------------------------------------------------------------------------------- 1 | {"cells":[{"metadata":{},"cell_type":"markdown","source":["![image](https://github.com/IBM/watson-machine-learning-samples/raw/master/cloud/notebooks/headers/AutoAI-Banner_Experiment-Notebook.png)\n","# Experiment Notebook - AutoAI Notebook v1.15.0\n","\n","\n","This notebook contains the steps and code to demonstrate support of AutoAI experiments in Watson Machine Learning service. It introduces Python SDK commands for data retrieval, training experiments, persisting pipelines, testing pipelines, refining pipelines, and scoring the resulting model.\n","\n","**Note:** Notebook code generated using AutoAI will execute successfully. If code is modified or reordered, there is no guarantee it will successfully execute. For details, see: Saving an Auto AI experiment as a notebook\n"]},{"metadata":{},"cell_type":"markdown","source":["Some familiarity with Python is helpful. This notebook uses Python 3.7 and `ibm_watson_machine_learning` package.\n","\n","\n","## Notebook goals\n","\n","The learning goals of this notebook are:\n","- Defining an AutoAI experiment\n","- Training AutoAI models \n","- Comparing trained models\n","- Deploying the model as a web service\n","- Scoring the model to generate predictions.\n","\n","\n","\n","## Contents\n","\n","This notebook contains the following parts:\n","\n","**[Setup](#setup)**
\n","  [Package installation](#install)
\n","  [Watson Machine Learning connection](#connection)
\n","**[Experiment configuration](#configuration)**
\n","  [Experiment metadata](#metadata)
\n","**[Working with completed AutoAI experiment](#work)**
\n","  [Get fitted AutoAI optimizer](#get)
\n","  [Pipelines comparison](#comparison)
\n","  [Get pipeline as scikit-learn pipeline model](#get_pipeline)
\n","  [Inspect pipeline](#inspect_pipeline)
\n","    [Visualize pipeline model](#visualize)
\n","    [Preview pipeline model as python code](#preview)
\n","**[Deploy and Score](#scoring)**
\n","  [Working with spaces](#working_spaces)
\n","**[Running AutoAI experiment with Python SDK](#run)**
\n","**[Clean up](#cleanup)**
\n","**[Next steps](#next_steps)**
\n","**[Copyrights](#copyrights)**"]},{"metadata":{},"cell_type":"markdown","source":["\n","# Setup"]},{"metadata":{},"cell_type":"markdown","source":["\n","## Package installation\n","Before you use the sample code in this notebook, install the following packages:\n"," - ibm_watson_machine_learning,\n"," - autoai-libs,\n"," - lale,\n"," - scikit-learn,\n"," - xgboost,\n"," - lightgbm.\n"]},{"metadata":{"pycharm":{"name":"#%%\n"}},"cell_type":"code","source":["!conda remove --force libxgboost -y\n","!pip install ibm-watson-machine-learning | tail -n 1\n","!pip install -U autoai-libs==1.12.7 | tail -n 1\n","!pip install -U 'lale>=0.5.1,<0.6' | tail -n 1\n","!pip install -U scikit-learn==0.23.2 | tail -n 1\n","!pip install -U xgboost==1.3.3 | tail -n 1\n","!pip install -U lightgbm==3.1.1 | tail -n 1"],"execution_count":null,"outputs":[]},{"metadata":{},"cell_type":"markdown","source":["\n","# Experiment configuration"]},{"metadata":{"pycharm":{"name":"#%% md\n"}},"cell_type":"markdown","source":["\n","## Experiment metadata\n","This cell defines the metadata for the experiment, including: training_data_reference, training_result_reference, experiment_metadata."]},{"metadata":{"pycharm":{"name":"#%%\n"}},"cell_type":"code","source":["#@hidden_cell\n","from ibm_watson_machine_learning.helpers import DataConnection\n","from ibm_watson_machine_learning.helpers import S3Connection, S3Location\n","\n","training_data_reference = [DataConnection(\n"," connection=S3Connection(\n"," api_key='',\n"," auth_endpoint='https://iam.bluemix.net/oidc/token/',\n"," endpoint_url='https://s3-api.us-geo.objectstorage.softlayer.net'\n"," ),\n"," location=S3Location(\n"," bucket='autoaitutorialnew-donotdelete-pr-zhaudczzjfda0e',\n"," path='german_credit_data.csv'\n"," )),\n","]\n","training_result_reference = DataConnection(\n"," connection=S3Connection(\n"," api_key='',\n"," auth_endpoint='https://iam.bluemix.net/oidc/token/',\n"," endpoint_url='https://s3-api.us-geo.objectstorage.softlayer.net'\n"," ),\n"," location=S3Location(\n"," bucket='autoaitutorialnew-donotdelete-pr-zhaudczzjfda0e',\n"," path='auto_ml/509ff920-3fcf-4eb9-9abc-bbc63b4a301a/wml_data/33aa26d1-15eb-482c-93ca-76c186c431a2/data/automl',\n"," model_location='auto_ml/509ff920-3fcf-4eb9-9abc-bbc63b4a301a/wml_data/33aa26d1-15eb-482c-93ca-76c186c431a2/data/automl/pre_hpo_d_output/Pipeline1/model.pickle',\n"," training_status='auto_ml/509ff920-3fcf-4eb9-9abc-bbc63b4a301a/wml_data/33aa26d1-15eb-482c-93ca-76c186c431a2/training-status.json'\n"," ))"],"execution_count":1,"outputs":[]},{"metadata":{"pycharm":{"name":"#%%\n"}},"cell_type":"code","source":["experiment_metadata = dict(\n"," prediction_type='classification',\n"," prediction_column='Result',\n"," holdout_size=0.1,\n"," scoring='accuracy',\n"," csv_separator=',',\n"," random_state=33,\n"," max_number_of_estimators=2,\n"," training_data_reference=training_data_reference,\n"," training_result_reference=training_result_reference,\n"," deployment_url='https://us-south.ml.cloud.ibm.com',\n"," project_id='0e2a32c8-f2ed-4587-8479-3a22b2ea57b3',\n"," positive_label=1,\n"," drop_duplicates=True\n",")"],"execution_count":2,"outputs":[]},{"metadata":{},"cell_type":"markdown","source":["\n","## Watson Machine Learning connection\n","\n","This cell defines the credentials required to work with the Watson Machine Learning service.\n","\n","**Action** Please provide IBM Cloud apikey following [docs](https://cloud.ibm.com/docs/account?topic=account-userapikey)."]},{"metadata":{"pycharm":{"name":"#%%\n"}},"cell_type":"code","source":["api_key = ''"],"execution_count":3,"outputs":[]},{"metadata":{"pycharm":{"name":"#%%\n"}},"cell_type":"code","source":["wml_credentials = {\n"," \"apikey\": api_key,\n"," \"url\": experiment_metadata['deployment_url']\n","}"],"execution_count":4,"outputs":[]},{"metadata":{},"cell_type":"markdown","source":["\n","\n","\n","# Working with completed AutoAI experiment\n","\n","This cell imports the pipelines generated for the experiment so they can be compared to find the optimal pipeline to save as a model."]},{"metadata":{},"cell_type":"markdown","source":["\n","\n","\n","## Get fitted AutoAI optimizer"]},{"metadata":{"pycharm":{"name":"#%%\n"}},"cell_type":"code","source":["from ibm_watson_machine_learning.experiment import AutoAI\n","\n","pipeline_optimizer = AutoAI(wml_credentials, project_id=experiment_metadata['project_id']).runs.get_optimizer(metadata=experiment_metadata)"],"execution_count":5,"outputs":[]},{"metadata":{},"cell_type":"markdown","source":["Use `get_params()`- to retrieve configuration parameters."]},{"metadata":{"pycharm":{"name":"#%%\n"}},"cell_type":"code","source":["pipeline_optimizer.get_params()"],"execution_count":6,"outputs":[{"output_type":"execute_result","execution_count":6,"data":{"text/plain":"{'name': 'Credit Data Analysis',\n 'desc': '',\n 'prediction_type': 'classification',\n 'prediction_column': 'Result',\n 'prediction_columns': None,\n 'timestamp_column_name': None,\n 'scoring': 'accuracy',\n 'holdout_size': 0.1,\n 'max_num_daub_ensembles': 2.0,\n 't_shirt_size': 'a6c4923b-b8e4-444c-9f43-8a7ec3020110',\n 'train_sample_rows_test_size': None,\n 'include_only_estimators': None,\n 'backtest_num': None,\n 'lookback_window': None,\n 'forecast_window': None,\n 'backtest_gap_length': None,\n 'cognito_transform_names': None,\n 'data_join_graph': False,\n 'csv_separator': ',',\n 'excel_sheet': 0,\n 'encoding': 'utf-8',\n 'positive_label': None,\n 'drop_duplicates': True,\n 'text_processing': None,\n 'word2vec_feature_number': None,\n 'daub_give_priority_to_runtime': None,\n 'run_id': '33aa26d1-15eb-482c-93ca-76c186c431a2'}"},"metadata":{}}]},{"metadata":{"pycharm":{"name":"#%% md\n"}},"cell_type":"markdown","source":["\n","## Pipelines comparison\n","\n","Use the `summary()` method to list trained pipelines and evaluation metrics information in\n","the form of a Pandas DataFrame. You can use the DataFrame to compare all discovered pipelines and select the one you like for further testing."]},{"metadata":{"pycharm":{"name":"#%%\n"}},"cell_type":"code","source":["summary = pipeline_optimizer.summary()\n","best_pipeline_name = list(summary.index)[0]\n","summary"],"execution_count":7,"outputs":[{"output_type":"execute_result","execution_count":7,"data":{"text/plain":" Enhancements Estimator training_roc_auc \\\nPipeline Name \nPipeline_2 HPO XGBClassifier 0.766155 \nPipeline_3 HPO, FE XGBClassifier 0.766155 \nPipeline_4 HPO, FE XGBClassifier 0.766155 \nPipeline_1 XGBClassifier 0.777372 \nPipeline_7 HPO, FE DecisionTreeClassifier 0.684921 \nPipeline_8 HPO, FE DecisionTreeClassifier 0.684921 \nPipeline_5 DecisionTreeClassifier 0.665344 \nPipeline_6 HPO DecisionTreeClassifier 0.665344 \n\n holdout_precision training_average_precision \\\nPipeline Name \nPipeline_2 0.756757 0.862943 \nPipeline_3 0.756757 0.862943 \nPipeline_4 0.756757 0.862943 \nPipeline_1 0.784810 0.874714 \nPipeline_7 0.736842 0.789945 \nPipeline_8 0.736842 0.789945 \nPipeline_5 0.740260 0.779152 \nPipeline_6 0.740260 0.779152 \n\n holdout_average_precision training_log_loss holdout_recall \\\nPipeline Name \nPipeline_2 0.809163 0.622273 0.800000 \nPipeline_3 0.809163 0.622273 0.800000 \nPipeline_4 0.809163 0.622273 0.800000 \nPipeline_1 0.842136 0.564745 0.885714 \nPipeline_7 0.729474 8.941815 0.800000 \nPipeline_8 0.729474 8.941815 0.800000 \nPipeline_5 0.732783 9.479092 0.814286 \nPipeline_6 0.732783 9.479092 0.814286 \n\n training_precision holdout_accuracy \\\nPipeline Name \nPipeline_2 0.811781 0.68 \nPipeline_3 0.811781 0.68 \nPipeline_4 0.811781 0.68 \nPipeline_1 0.810742 0.75 \nPipeline_7 0.808844 0.66 \nPipeline_8 0.808844 0.66 \nPipeline_5 0.796962 0.67 \nPipeline_6 0.796962 0.67 \n\n holdout_balanced_accuracy training_recall holdout_f1 \\\nPipeline Name \nPipeline_2 0.600000 0.869841 0.777778 \nPipeline_3 0.600000 0.869841 0.777778 \nPipeline_4 0.600000 0.869841 0.777778 \nPipeline_1 0.659524 0.861905 0.832215 \nPipeline_7 0.566667 0.825397 0.767123 \nPipeline_8 0.566667 0.825397 0.767123 \nPipeline_5 0.573810 0.815873 0.775510 \nPipeline_6 0.573810 0.815873 0.775510 \n\n holdout_log_loss training_accuracy_(optimized) \\\nPipeline Name \nPipeline_2 0.724710 0.767778 \nPipeline_3 0.724710 0.767778 \nPipeline_4 0.724710 0.767778 \nPipeline_1 0.589836 0.762222 \nPipeline_7 11.743344 0.741111 \nPipeline_8 11.743344 0.741111 \nPipeline_5 11.397956 0.725556 \nPipeline_6 11.397956 0.725556 \n\n holdout_roc_auc training_balanced_accuracy training_f1 \nPipeline Name \nPipeline_2 0.682381 0.699735 0.839790 \nPipeline_3 0.682381 0.699735 0.839790 \nPipeline_4 0.682381 0.699735 0.839790 \nPipeline_1 0.732857 0.695767 0.835308 \nPipeline_7 0.566667 0.684921 0.816988 \nPipeline_8 0.566667 0.684921 0.816988 \nPipeline_5 0.573810 0.665344 0.806298 \nPipeline_6 0.573810 0.665344 0.806298 ","text/html":"
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
EnhancementsEstimatortraining_roc_aucholdout_precisiontraining_average_precisionholdout_average_precisiontraining_log_lossholdout_recalltraining_precisionholdout_accuracyholdout_balanced_accuracytraining_recallholdout_f1holdout_log_losstraining_accuracy_(optimized)holdout_roc_auctraining_balanced_accuracytraining_f1
Pipeline Name
Pipeline_2HPOXGBClassifier0.7661550.7567570.8629430.8091630.6222730.8000000.8117810.680.6000000.8698410.7777780.7247100.7677780.6823810.6997350.839790
Pipeline_3HPO, FEXGBClassifier0.7661550.7567570.8629430.8091630.6222730.8000000.8117810.680.6000000.8698410.7777780.7247100.7677780.6823810.6997350.839790
Pipeline_4HPO, FEXGBClassifier0.7661550.7567570.8629430.8091630.6222730.8000000.8117810.680.6000000.8698410.7777780.7247100.7677780.6823810.6997350.839790
Pipeline_1XGBClassifier0.7773720.7848100.8747140.8421360.5647450.8857140.8107420.750.6595240.8619050.8322150.5898360.7622220.7328570.6957670.835308
Pipeline_7HPO, FEDecisionTreeClassifier0.6849210.7368420.7899450.7294748.9418150.8000000.8088440.660.5666670.8253970.76712311.7433440.7411110.5666670.6849210.816988
Pipeline_8HPO, FEDecisionTreeClassifier0.6849210.7368420.7899450.7294748.9418150.8000000.8088440.660.5666670.8253970.76712311.7433440.7411110.5666670.6849210.816988
Pipeline_5DecisionTreeClassifier0.6653440.7402600.7791520.7327839.4790920.8142860.7969620.670.5738100.8158730.77551011.3979560.7255560.5738100.6653440.806298
Pipeline_6HPODecisionTreeClassifier0.6653440.7402600.7791520.7327839.4790920.8142860.7969620.670.5738100.8158730.77551011.3979560.7255560.5738100.6653440.806298
\n
"},"metadata":{}}]},{"metadata":{},"cell_type":"markdown","source":["\n","### Get pipeline as scikit-learn pipeline model\n","\n","After you compare the pipelines, download and save a scikit-learn pipeline model object from the\n","AutoAI training job.\n","\n","**Tip:** If you want to get a specific pipeline you need to pass the pipeline name in:\n","```\n","pipeline_optimizer.get_pipeline(pipeline_name=pipeline_name)\n","```"]},{"metadata":{"pycharm":{"name":"#%%\n"}},"cell_type":"code","source":["pipeline_model = pipeline_optimizer.get_pipeline(pipeline_name=best_pipeline_name)"],"execution_count":8,"outputs":[]},{"metadata":{},"cell_type":"markdown","source":["Next, check features importance for selected pipeline."]},{"metadata":{"pycharm":{"name":"#%%\n"}},"cell_type":"code","source":["pipeline_optimizer.get_pipeline_details()['features_importance']"],"execution_count":9,"outputs":[{"output_type":"execute_result","execution_count":9,"data":{"text/plain":" features_importance\nCheck_Account 1.00\nPurpose 0.54\nCredit_history 0.38\nNewFeature_4_round(Credit amount ) 0.36\nNewFeature_3_round(Duration) 0.34\nSaving_account 0.32\nNewFeature_1_sqrt(Credit amount ) 0.31\nProperty 0.28\nAge 0.28\nPresent_residence 0.22\nInstall_rate 0.21\nNewFeature_2_sqrt(Age) 0.20\nDuration 0.19\nCredit amount 0.17\nInstallment_plant 0.17\nJob 0.16\nNum_dependents 0.13\nNewFeature_8_round(sqrt(Age)) 0.13\nNewFeature_5_round(Age) 0.11\nPersonal_status 0.10\nEmployment 0.07\nTelephone 0.07\nHousing 0.06\nNewFeature_0_sqrt(Duration) 0.06\nNewFeature_6_round(sqrt(Duration)) 0.06\nOther_debrotors 0.05\nNum_credits 0.02\nForeign 0.00\nNewFeature_7_round(sqrt(Credit amount )) 0.00","text/html":"
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
features_importance
Check_Account1.00
Purpose0.54
Credit_history0.38
NewFeature_4_round(Credit amount )0.36
NewFeature_3_round(Duration)0.34
Saving_account0.32
NewFeature_1_sqrt(Credit amount )0.31
Property0.28
Age0.28
Present_residence0.22
Install_rate0.21
NewFeature_2_sqrt(Age)0.20
Duration0.19
Credit amount0.17
Installment_plant0.17
Job0.16
Num_dependents0.13
NewFeature_8_round(sqrt(Age))0.13
NewFeature_5_round(Age)0.11
Personal_status0.10
Employment0.07
Telephone0.07
Housing0.06
NewFeature_0_sqrt(Duration)0.06
NewFeature_6_round(sqrt(Duration))0.06
Other_debrotors0.05
Num_credits0.02
Foreign0.00
NewFeature_7_round(sqrt(Credit amount ))0.00
\n
"},"metadata":{}}]},{"metadata":{},"cell_type":"markdown","source":["**Tip:** If you want to check all model evaluation metrics-details, use:\n","```\n","pipeline_optimizer.get_pipeline_details()\n","```"]},{"metadata":{},"cell_type":"markdown","source":["\n","## Inspect pipeline"]},{"metadata":{},"cell_type":"markdown","source":["\n","### Visualize pipeline model\n","\n","Preview pipeline model stages as a graph. Each node's name links to a detailed description of the stage.\n"]},{"metadata":{"pycharm":{"name":"#%%\n"}},"cell_type":"code","source":["pipeline_model.visualize()"],"execution_count":10,"outputs":[{"output_type":"display_data","data":{"text/plain":"","image/svg+xml":"\n\n\n\n\n\ncluster:(root)\n\n\n\n\n\n\nnumpy_column_selector_0\n\n\nNumpy-\nColumn-\nSelector\n\n\n\n\n\ncompress_strings\n\n\nCompress-\nStrings\n\n\n\n\n\nnumpy_column_selector_0->compress_strings\n\n\n\n\n\nnumpy_replace_missing_values_0\n\n\nNumpy-\nReplace-\nMissing-\nValues\n\n\n\n\n\ncompress_strings->numpy_replace_missing_values_0\n\n\n\n\n\nnumpy_replace_unknown_values\n\n\nNumpy-\nReplace-\nUnknown-\nValues\n\n\n\n\n\nnumpy_replace_missing_values_0->numpy_replace_unknown_values\n\n\n\n\n\nboolean2float\n\n\nboolean2float\n\n\n\n\n\nnumpy_replace_unknown_values->boolean2float\n\n\n\n\n\ncat_imputer\n\n\nCat-\nImputer\n\n\n\n\n\nboolean2float->cat_imputer\n\n\n\n\n\ncat_encoder\n\n\nCat-\nEncoder\n\n\n\n\n\ncat_imputer->cat_encoder\n\n\n\n\n\nfloat32_transform_0\n\n\nfloat32_-\ntransform\n\n\n\n\n\ncat_encoder->float32_transform_0\n\n\n\n\n\nconcat_features\n\n\nConcat-\nFeatures\n\n\n\n\n\nfloat32_transform_0->concat_features\n\n\n\n\n\nnumpy_column_selector_1\n\n\nNumpy-\nColumn-\nSelector\n\n\n\n\n\nfloat_str2_float\n\n\nFloat-\nStr2-\nFloat\n\n\n\n\n\nnumpy_column_selector_1->float_str2_float\n\n\n\n\n\nnumpy_replace_missing_values_1\n\n\nNumpy-\nReplace-\nMissing-\nValues\n\n\n\n\n\nfloat_str2_float->numpy_replace_missing_values_1\n\n\n\n\n\nnum_imputer\n\n\nNum-\nImputer\n\n\n\n\n\nnumpy_replace_missing_values_1->num_imputer\n\n\n\n\n\nopt_standard_scaler\n\n\nOpt-\nStandard-\nScaler\n\n\n\n\n\nnum_imputer->opt_standard_scaler\n\n\n\n\n\nfloat32_transform_1\n\n\nfloat32_-\ntransform\n\n\n\n\n\nopt_standard_scaler->float32_transform_1\n\n\n\n\n\nfloat32_transform_1->concat_features\n\n\n\n\n\nnumpy_permute_array\n\n\nNumpy-\nPermute-\nArray\n\n\n\n\n\nconcat_features->numpy_permute_array\n\n\n\n\n\nxgb_classifier\n\n\nXGB-\nClassifier\n\n\n\n\n\nnumpy_permute_array->xgb_classifier\n\n\n\n\n\n"},"metadata":{}}]},{"metadata":{},"cell_type":"markdown","source":["\n","### Preview pipeline model as python code\n","In the next cell, you can preview the saved pipeline model as a python code. \n","You will be able to review the exact steps used to create the model.\n","\n","**Note:** If you want to get sklearn representation add following parameter to `pretty_print` call: `astype='sklearn'`."]},{"metadata":{"pycharm":{"name":"#%%\n"}},"cell_type":"code","source":["pipeline_model.pretty_print(combinators=False, ipython_display=True)"],"execution_count":11,"outputs":[{"output_type":"display_data","data":{"text/plain":"","text/markdown":"```python\nfrom autoai_libs.transformers.exportable import NumpyColumnSelector\nfrom autoai_libs.transformers.exportable import CompressStrings\nfrom autoai_libs.transformers.exportable import NumpyReplaceMissingValues\nfrom autoai_libs.transformers.exportable import NumpyReplaceUnknownValues\nfrom autoai_libs.transformers.exportable import boolean2float\nfrom autoai_libs.transformers.exportable import CatImputer\nfrom autoai_libs.transformers.exportable import CatEncoder\nimport numpy as np\nfrom autoai_libs.transformers.exportable import float32_transform\nfrom lale.operators import make_pipeline\nfrom autoai_libs.transformers.exportable import FloatStr2Float\nfrom autoai_libs.transformers.exportable import NumImputer\nfrom autoai_libs.transformers.exportable import OptStandardScaler\nfrom lale.operators import make_union\nfrom autoai_libs.transformers.exportable import NumpyPermuteArray\nfrom xgboost import XGBClassifier\n\nnumpy_column_selector_0 = NumpyColumnSelector(\n columns=[\n 0, 1, 2, 3, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,\n ]\n)\ncompress_strings = CompressStrings(\n compress_type=\"hash\",\n dtypes_list=[\n \"char_str\", \"int_num\", \"char_str\", \"char_str\", \"char_str\", \"char_str\",\n \"int_num\", \"char_str\", \"char_str\", \"int_num\", \"char_str\", \"int_num\",\n \"char_str\", \"char_str\", \"int_num\", \"char_str\", \"int_num\", \"char_str\",\n \"char_str\",\n ],\n missing_values_reference_list=[\"\", \"-\", \"?\", float(\"nan\")],\n misslist_list=[\n [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [],\n [], [],\n ],\n)\nnumpy_replace_missing_values_0 = NumpyReplaceMissingValues(\n missing_values=[], filling_values=float(\"nan\")\n)\nnumpy_replace_unknown_values = NumpyReplaceUnknownValues(\n filling_values=float(\"nan\"),\n filling_values_list=[\n float(\"nan\"), float(\"nan\"), float(\"nan\"), float(\"nan\"), float(\"nan\"),\n float(\"nan\"), float(\"nan\"), float(\"nan\"), float(\"nan\"), float(\"nan\"),\n float(\"nan\"), float(\"nan\"), float(\"nan\"), float(\"nan\"), float(\"nan\"),\n float(\"nan\"), float(\"nan\"), float(\"nan\"), float(\"nan\"),\n ],\n missing_values_reference_list=[\"\", \"-\", \"?\", float(\"nan\")],\n)\ncat_imputer = CatImputer(\n strategy=\"most_frequent\",\n missing_values=float(\"nan\"),\n sklearn_version_family=\"23\",\n)\ncat_encoder = CatEncoder(\n encoding=\"ordinal\",\n categories=\"auto\",\n dtype=np.float64,\n handle_unknown=\"error\",\n sklearn_version_family=\"23\",\n)\npipeline_0 = make_pipeline(\n numpy_column_selector_0,\n compress_strings,\n numpy_replace_missing_values_0,\n numpy_replace_unknown_values,\n boolean2float(),\n cat_imputer,\n cat_encoder,\n float32_transform(),\n)\nnumpy_column_selector_1 = NumpyColumnSelector(columns=[4])\nfloat_str2_float = FloatStr2Float(\n dtypes_list=[\"int_num\"], missing_values_reference_list=[]\n)\nnumpy_replace_missing_values_1 = NumpyReplaceMissingValues(\n missing_values=[], filling_values=float(\"nan\")\n)\nnum_imputer = NumImputer(strategy=\"median\", missing_values=float(\"nan\"))\nopt_standard_scaler = OptStandardScaler(\n num_scaler_copy=None,\n num_scaler_with_mean=None,\n num_scaler_with_std=None,\n use_scaler_flag=False,\n)\npipeline_1 = make_pipeline(\n numpy_column_selector_1,\n float_str2_float,\n numpy_replace_missing_values_1,\n num_imputer,\n opt_standard_scaler,\n float32_transform(),\n)\nunion = make_union(pipeline_0, pipeline_1)\nnumpy_permute_array = NumpyPermuteArray(\n axis=0,\n permutation_indices=[\n 0, 1, 2, 3, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 4,\n ],\n)\nxgb_classifier = XGBClassifier(\n base_score=0.5,\n booster=\"gbtree\",\n colsample_bylevel=1,\n colsample_bynode=1,\n colsample_bytree=1,\n gamma=0,\n gpu_id=-1,\n interaction_constraints=\"\",\n learning_rate=1.0,\n max_delta_step=0,\n max_depth=2,\n min_child_weight=2,\n missing=float(\"nan\"),\n monotone_constraints=\"()\",\n n_estimators=128,\n n_jobs=2,\n num_parallel_tree=1,\n random_state=33,\n reg_alpha=1,\n reg_lambda=1.0,\n scale_pos_weight=1,\n subsample=0.9992297983348898,\n tree_method=\"hist\",\n validate_parameters=1,\n verbosity=0,\n nthread=2,\n silent=True,\n seed=33,\n)\npipeline = make_pipeline(union, numpy_permute_array, xgb_classifier)\n```"},"metadata":{}}]},{"metadata":{},"cell_type":"markdown","source":["\n","## Deploy and Score\n","\n","In this section you will learn how to deploy and score the model as a web service."]},{"metadata":{},"cell_type":"markdown","source":["\n","### Working with spaces\n","\n","In this section you will specify a deployment space for organizing the assets for deploying and scoring the model. If you do not have an existing space, you can use [Deployment Spaces Dashboard](https://dataplatform.cloud.ibm.com/ml-runtime/spaces?context=cpdaas) to create a new space, following these steps:\n","\n","- Click **New Deployment Space**.\n","- Create an empty space.\n","- Select Cloud Object Storage.\n","- Select Watson Machine Learning instance and press **Create**.\n","- Copy `space_id` and paste it below.\n","\n","**Tip**: You can also use the SDK to prepare the space for your work. Learn more [here](https://github.com/IBM/watson-machine-learning-samples/blob/master/notebooks/python_sdk/instance-management/Space%20management.ipynb).\n","\n","**Action**: assign or update space ID below"]},{"metadata":{},"cell_type":"markdown","source":["### Deployment creation"]},{"metadata":{"pycharm":{"name":"#%%\n"}},"cell_type":"code","source":["target_space_id = \"\"\n","\n","from ibm_watson_machine_learning.deployment import WebService\n","service = WebService(source_wml_credentials=wml_credentials,\n"," target_wml_credentials=wml_credentials,\n"," source_project_id=experiment_metadata['project_id'],\n"," target_space_id=target_space_id)\n","service.create(\n","model=best_pipeline_name,\n","metadata=experiment_metadata,\n","deployment_name='Best_pipeline_webservice'\n",")"],"execution_count":14,"outputs":[{"output_type":"stream","text":"Preparing an AutoAI Deployment...\nPublished model uid: 71a3a7f8-74e0-4b19-ad65-a20ddd5d4d43\nDeploying model 71a3a7f8-74e0-4b19-ad65-a20ddd5d4d43 using V4 client.\n\n\n#######################################################################################\n\nSynchronous deployment creation for uid: '71a3a7f8-74e0-4b19-ad65-a20ddd5d4d43' started\n\n#######################################################################################\n\n\ninitializing...\nready\n\n\n------------------------------------------------------------------------------------------------\nSuccessfully finished deployment creation, deployment_uid='986e0998-8110-4a76-a8e6-6b9961626a9c'\n------------------------------------------------------------------------------------------------\n\n\n","name":"stdout"}]},{"metadata":{},"cell_type":"markdown","source":["Use the `print` method for the deployment object to show basic information about the service: "]},{"metadata":{"pycharm":{"name":"#%%\n"}},"cell_type":"code","source":["print(service)"],"execution_count":15,"outputs":[{"output_type":"stream","text":"name: Best_pipeline_webservice, id: 986e0998-8110-4a76-a8e6-6b9961626a9c, scoring_url: https://us-south.ml.cloud.ibm.com/ml/v4/deployments/986e0998-8110-4a76-a8e6-6b9961626a9c/predictions, asset_id: 71a3a7f8-74e0-4b19-ad65-a20ddd5d4d43\n","name":"stdout"}]},{"metadata":{},"cell_type":"markdown","source":["To show all available information about the deployment use the `.get_params()` method:"]},{"metadata":{"pycharm":{"name":"#%%\n"}},"cell_type":"code","source":["service.get_params()"],"execution_count":16,"outputs":[{"output_type":"execute_result","execution_count":16,"data":{"text/plain":"{'entity': {'asset': {'id': '71a3a7f8-74e0-4b19-ad65-a20ddd5d4d43'},\n 'custom': {},\n 'deployed_asset_type': 'model',\n 'hardware_spec': {'id': 'c076e82c-b2a7-4d20-9c0f-1f0c2fdf5a24',\n 'name': 'M',\n 'num_nodes': 1},\n 'hybrid_pipeline_hardware_specs': [{'hardware_spec': {'name': 'S',\n 'num_nodes': 1},\n 'node_runtime_id': 'auto_ai.kb'}],\n 'name': 'Best_pipeline_webservice',\n 'online': {},\n 'space_id': 'd3a9968f-93de-4693-adce-5fb42ba2b068',\n 'status': {'online_url': {'url': 'https://us-south.ml.cloud.ibm.com/ml/v4/deployments/986e0998-8110-4a76-a8e6-6b9961626a9c/predictions'},\n 'state': 'ready'}},\n 'metadata': {'created_at': '2021-06-07T20:45:26.190Z',\n 'id': '986e0998-8110-4a76-a8e6-6b9961626a9c',\n 'modified_at': '2021-06-07T20:45:26.190Z',\n 'name': 'Best_pipeline_webservice',\n 'owner': 'IBMid-310001D061',\n 'space_id': 'd3a9968f-93de-4693-adce-5fb42ba2b068'}}"},"metadata":{}}]},{"metadata":{},"cell_type":"markdown","source":["### Scoring of webservice\n","You can make scoring request by calling `score()` on the deployed pipeline."]},{"metadata":{},"cell_type":"markdown","source":["If you want to work with the web service in an external Python application,follow these steps to retrieve the service object:\n","\n"," - Initialize the service by `service = WebService(wml_credentials)`\n"," - Get deployment_id by `service.list()` method\n"," - Get webservice object by `service.get('deployment_id')` method\n","\n","After that you can call `service.score()` method."]},{"metadata":{},"cell_type":"markdown","source":["### Deleting deployment\n","\n","You can delete the existing deployment by calling the `service.delete()` command.\n","To list the existing web services, use `service.list()`."]},{"metadata":{},"cell_type":"markdown","source":["\n","\n","## Running AutoAI experiment with Python SDK"]},{"metadata":{},"cell_type":"markdown","source":["If you want to run AutoAI experiment using python API follow up the steps decribed below. The experiment settings were generated basing on parameters set on UI.\n"," - Go to your COS dashboard.\n"," - In Service credentials tab, click New Credential.\n"," - Add the inline configuration parameter: `{“HMAC”:true}`, click Add.\n","This configuration parameter adds the following section to the instance credentials, (for use later in this notebook):\n","```\n","cos_hmac_keys”: {\n"," “access_key_id”: “***“,\n"," “secret_access_key”: “***”\n"," }\n"," ```\n","\n","**Action:** Please provide cos credentials in following cells.\n","\n","- Use provided markdown cells to run code.\n","\n"]},{"metadata":{"pycharm":{"name":"#%%\n"}},"cell_type":"markdown","source":["```\n","from ibm_watson_machine_learning.experiment import AutoAI\n","\n","experiment = AutoAI(wml_credentials, project_id=experiment_metadata['project_id'])\n","```"]},{"metadata":{},"cell_type":"markdown","source":["```\n","#@hidden_cell\n","cos_hmac_keys = {\n"," \"access_key_id\": \"PLACE_YOUR_ACCESS_KEY_ID_HERE\",\n"," \"secret_access_key\": \"PLACE_YOUR_SECRET_ACCESS_KEY_HERE\"\n"," }\n"," \n","cos_api_key = \"PLACE_YOUR_API_KEY_HERE\"\n","OPTIMIZER_NAME = 'custom_name'\n","```"]},{"metadata":{"pycharm":{"name":"#%% md\n"}},"cell_type":"markdown","source":["The experiment settings were generated basing on parameters set on UI."]},{"metadata":{"pycharm":{"name":"#%% raw\n"}},"cell_type":"markdown","source":["```\n","from ibm_watson_machine_learning.helpers import DataConnection\n","from ibm_watson_machine_learning.helpers import S3Connection, S3Location\n","\n","training_data_reference = [DataConnection(\n"," connection=S3Connection(\n"," api_key=cos_api_key,\n"," auth_endpoint='https://iam.bluemix.net/oidc/token/',\n"," endpoint_url='https://s3-api.us-geo.objectstorage.softlayer.net',\n"," access_key_id = cos_hmac_keys['access_key_id'],\n"," secret_access_key = cos_hmac_keys['secret_access_key']\n"," ),\n"," location=S3Location(\n"," bucket='autoaitutorialnew-donotdelete-pr-zhaudczzjfda0e',\n"," path='german_credit_data.csv'\n"," )),\n","]\n","from ibm_watson_machine_learning.helpers import S3Connection, S3Location\n","training_result_reference = DataConnection(\n"," connection=S3Connection(\n"," api_key=cos_api_key,\n"," auth_endpoint='https://iam.bluemix.net/oidc/token/',\n"," endpoint_url='https://s3-api.us-geo.objectstorage.softlayer.net',\n"," access_key_id = cos_hmac_keys['access_key_id'],\n"," secret_access_key = cos_hmac_keys['secret_access_key']\n"," ),\n"," location=S3Location(\n"," bucket='autoaitutorialnew-donotdelete-pr-zhaudczzjfda0e',\n"," path='auto_ml/509ff920-3fcf-4eb9-9abc-bbc63b4a301a/wml_data/33aa26d1-15eb-482c-93ca-76c186c431a2/data/automl',\n"," model_location='auto_ml/509ff920-3fcf-4eb9-9abc-bbc63b4a301a/wml_data/33aa26d1-15eb-482c-93ca-76c186c431a2/data/automl/pre_hpo_d_output/Pipeline1/model.pickle',\n"," training_status='auto_ml/509ff920-3fcf-4eb9-9abc-bbc63b4a301a/wml_data/33aa26d1-15eb-482c-93ca-76c186c431a2/training-status.json'\n"," ))\n","```"]},{"metadata":{"pycharm":{"name":"#%%raw\n"}},"cell_type":"markdown","source":["```\n","pipeline_optimizer = experiment.optimizer(\n"," name=OPTIMIZER_NAME,\n"," prediction_type=experiment_metadata['prediction_type'],\n"," prediction_column=experiment_metadata['prediction_column'],\n"," scoring=experiment_metadata['scoring'],\n"," holdout_size=experiment_metadata['holdout_size'],\n"," csv_separator=experiment_metadata['csv_separator'],\n"," positive_label=experiment_metadata['positive_label'],\n"," drop_duplicates=experiment_metadata['drop_duplicates'])\n","```"]},{"metadata":{},"cell_type":"markdown","source":["```\n","pipeline_optimizer.fit(training_data_reference=training_data_reference,\n"," training_results_reference=training_result_reference,\n"," background_mode=False)\n","```"]},{"metadata":{},"cell_type":"markdown","source":["\n","\n","# Next steps\n","\n","#### [Online Documentation](https://www.ibm.com/cloud/watson-studio/autoai)"]},{"metadata":{},"cell_type":"markdown","source":["\n","### Copyrights\n","\n","Licensed Materials - Copyright © 2021 IBM. This notebook and its source code are released under the terms of the ILAN License.\n","Use, duplication disclosure restricted by GSA ADP Schedule Contract with IBM Corp.\n","\n","**Note:** The auto-generated notebooks are subject to the International License Agreement for Non-Warranted Programs (or equivalent) and License Information document for Watson Studio Auto-generated Notebook (License Terms), such agreements located in the link below. Specifically, the Source Components and Sample Materials clause included in the License Information document for Watson Studio Auto-generated Notebook applies to the auto-generated notebooks. \n","\n","By downloading, copying, accessing, or otherwise using the materials, you agree to the License Terms \n","\n","___"]}],"metadata":{"kernelspec":{"name":"python3","display_name":"Python 3.7","language":"python"},"language_info":{"name":"python","version":"3.7.10","mimetype":"text/x-python","codemirror_mode":{"name":"ipython","version":3},"pygments_lexer":"ipython3","nbconvert_exporter":"python","file_extension":".py"},"pycharm":{"stem_cell":{"cell_type":"raw","metadata":{"collapsed":false},"source":["\n"]}}},"nbformat":4,"nbformat_minor":1} --------------------------------------------------------------------------------