├── .gitignore
├── LICENSE
├── README.md
├── data
└── german_credit_data.csv
├── doc
└── source
│ └── images
│ ├── ExecuteAutoAINotebook.gif
│ ├── ExportAutoAI.gif
│ ├── RunAutoAI.gif
│ ├── architecture.png
│ ├── create_deployment_autoai.png
│ ├── experiment_metadata.png
│ ├── experiment_notebook.png
│ ├── export-sklearn.png
│ ├── feature-importance.png
│ ├── format-as-code.png
│ ├── input_parameters.png
│ ├── modify-source-code.png
│ ├── pipeline_notebook.png
│ ├── pipelines-comparison.png
│ ├── pretty_print-1.png
│ ├── save-experiment-button.png
│ ├── save-experiment-notebook.gif
│ ├── score_webservice.png
│ ├── visualize-1.png
│ ├── visualize-2.png
│ ├── wml-deployment-space.gif
│ ├── wml_connection_autoai.png
│ └── wml_model_deployed.png
└── notebooks
├── Credit Data Analysis - P2 notebook.ipynb
├── Credit Data Analysis - experiment notebook.ipynb
└── with-output
├── Credit Data Analysis - P2 notebook.ipynb
└── Credit Data Analysis - experiment notebook.ipynb
/.gitignore:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | # Byte-compiled / optimized / DLL files
5 | .DS_STORE
6 | __pycache__/
7 | *.py[cod]
8 | *$py.class
9 |
10 | # C extensions
11 | *.so
12 |
13 | # Distribution / packaging
14 | .Python
15 | build/
16 | develop-eggs/
17 | dist/
18 | downloads/
19 | eggs/
20 | .eggs/
21 | lib/
22 | lib64/
23 | parts/
24 | sdist/
25 | var/
26 | wheels/
27 | share/python-wheels/
28 | *.egg-info/
29 | .installed.cfg
30 | *.egg
31 | MANIFEST
32 |
33 | # PyInstaller
34 | # Usually these files are written by a python script from a template
35 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
36 | *.manifest
37 | *.spec
38 |
39 | # Installer logs
40 | pip-log.txt
41 | pip-delete-this-directory.txt
42 |
43 | # Unit test / coverage reports
44 | htmlcov/
45 | .tox/
46 | .nox/
47 | .coverage
48 | .coverage.*
49 | .cache
50 | nosetests.xml
51 | coverage.xml
52 | *.cover
53 | *.py,cover
54 | .hypothesis/
55 | .pytest_cache/
56 | cover/
57 |
58 | # Translations
59 | *.mo
60 | *.pot
61 |
62 | # Django stuff:
63 | *.log
64 | local_settings.py
65 | db.sqlite3
66 | db.sqlite3-journal
67 |
68 | # Flask stuff:
69 | instance/
70 | .webassets-cache
71 |
72 | # Scrapy stuff:
73 | .scrapy
74 |
75 | # Sphinx documentation
76 | docs/_build/
77 |
78 | # PyBuilder
79 | .pybuilder/
80 | target/
81 |
82 | # Jupyter Notebook
83 | .ipynb_checkpoints
84 |
85 | # IPython
86 | profile_default/
87 | ipython_config.py
88 |
89 | # pyenv
90 | # For a library or package, you might want to ignore these files since the code is
91 | # intended to run in multiple environments; otherwise, check them in:
92 | # .python-version
93 |
94 | # pipenv
95 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
96 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
97 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
98 | # install all needed dependencies.
99 | #Pipfile.lock
100 |
101 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
102 | __pypackages__/
103 |
104 | # Celery stuff
105 | celerybeat-schedule
106 | celerybeat.pid
107 |
108 | # SageMath parsed files
109 | *.sage.py
110 |
111 | # Environments
112 | .env
113 | .venv
114 | env/
115 | venv/
116 | ENV/
117 | env.bak/
118 | venv.bak/
119 |
120 | # Spyder project settings
121 | .spyderproject
122 | .spyproject
123 |
124 | # Rope project settings
125 | .ropeproject
126 |
127 | # mkdocs documentation
128 | /site
129 |
130 | # mypy
131 | .mypy_cache/
132 | .dmypy.json
133 | dmypy.json
134 |
135 | # Pyre type checker
136 | .pyre/
137 |
138 | # pytype static type analyzer
139 | .pytype/
140 |
141 | # Cython debug symbols
142 | cython_debug/
143 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Apache License
2 | Version 2.0, January 2004
3 | http://www.apache.org/licenses/
4 |
5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6 |
7 | 1. Definitions.
8 |
9 | "License" shall mean the terms and conditions for use, reproduction,
10 | and distribution as defined by Sections 1 through 9 of this document.
11 |
12 | "Licensor" shall mean the copyright owner or entity authorized by
13 | the copyright owner that is granting the License.
14 |
15 | "Legal Entity" shall mean the union of the acting entity and all
16 | other entities that control, are controlled by, or are under common
17 | control with that entity. For the purposes of this definition,
18 | "control" means (i) the power, direct or indirect, to cause the
19 | direction or management of such entity, whether by contract or
20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
21 | outstanding shares, or (iii) beneficial ownership of such entity.
22 |
23 | "You" (or "Your") shall mean an individual or Legal Entity
24 | exercising permissions granted by this License.
25 |
26 | "Source" form shall mean the preferred form for making modifications,
27 | including but not limited to software source code, documentation
28 | source, and configuration files.
29 |
30 | "Object" form shall mean any form resulting from mechanical
31 | transformation or translation of a Source form, including but
32 | not limited to compiled object code, generated documentation,
33 | and conversions to other media types.
34 |
35 | "Work" shall mean the work of authorship, whether in Source or
36 | Object form, made available under the License, as indicated by a
37 | copyright notice that is included in or attached to the work
38 | (an example is provided in the Appendix below).
39 |
40 | "Derivative Works" shall mean any work, whether in Source or Object
41 | form, that is based on (or derived from) the Work and for which the
42 | editorial revisions, annotations, elaborations, or other modifications
43 | represent, as a whole, an original work of authorship. For the purposes
44 | of this License, Derivative Works shall not include works that remain
45 | separable from, or merely link (or bind by name) to the interfaces of,
46 | the Work and Derivative Works thereof.
47 |
48 | "Contribution" shall mean any work of authorship, including
49 | the original version of the Work and any modifications or additions
50 | to that Work or Derivative Works thereof, that is intentionally
51 | submitted to Licensor for inclusion in the Work by the copyright owner
52 | or by an individual or Legal Entity authorized to submit on behalf of
53 | the copyright owner. For the purposes of this definition, "submitted"
54 | means any form of electronic, verbal, or written communication sent
55 | to the Licensor or its representatives, including but not limited to
56 | communication on electronic mailing lists, source code control systems,
57 | and issue tracking systems that are managed by, or on behalf of, the
58 | Licensor for the purpose of discussing and improving the Work, but
59 | excluding communication that is conspicuously marked or otherwise
60 | designated in writing by the copyright owner as "Not a Contribution."
61 |
62 | "Contributor" shall mean Licensor and any individual or Legal Entity
63 | on behalf of whom a Contribution has been received by Licensor and
64 | subsequently incorporated within the Work.
65 |
66 | 2. Grant of Copyright License. Subject to the terms and conditions of
67 | this License, each Contributor hereby grants to You a perpetual,
68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69 | copyright license to reproduce, prepare Derivative Works of,
70 | publicly display, publicly perform, sublicense, and distribute the
71 | Work and such Derivative Works in Source or Object form.
72 |
73 | 3. Grant of Patent License. Subject to the terms and conditions of
74 | this License, each Contributor hereby grants to You a perpetual,
75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76 | (except as stated in this section) patent license to make, have made,
77 | use, offer to sell, sell, import, and otherwise transfer the Work,
78 | where such license applies only to those patent claims licensable
79 | by such Contributor that are necessarily infringed by their
80 | Contribution(s) alone or by combination of their Contribution(s)
81 | with the Work to which such Contribution(s) was submitted. If You
82 | institute patent litigation against any entity (including a
83 | cross-claim or counterclaim in a lawsuit) alleging that the Work
84 | or a Contribution incorporated within the Work constitutes direct
85 | or contributory patent infringement, then any patent licenses
86 | granted to You under this License for that Work shall terminate
87 | as of the date such litigation is filed.
88 |
89 | 4. Redistribution. You may reproduce and distribute copies of the
90 | Work or Derivative Works thereof in any medium, with or without
91 | modifications, and in Source or Object form, provided that You
92 | meet the following conditions:
93 |
94 | (a) You must give any other recipients of the Work or
95 | Derivative Works a copy of this License; and
96 |
97 | (b) You must cause any modified files to carry prominent notices
98 | stating that You changed the files; and
99 |
100 | (c) You must retain, in the Source form of any Derivative Works
101 | that You distribute, all copyright, patent, trademark, and
102 | attribution notices from the Source form of the Work,
103 | excluding those notices that do not pertain to any part of
104 | the Derivative Works; and
105 |
106 | (d) If the Work includes a "NOTICE" text file as part of its
107 | distribution, then any Derivative Works that You distribute must
108 | include a readable copy of the attribution notices contained
109 | within such NOTICE file, excluding those notices that do not
110 | pertain to any part of the Derivative Works, in at least one
111 | of the following places: within a NOTICE text file distributed
112 | as part of the Derivative Works; within the Source form or
113 | documentation, if provided along with the Derivative Works; or,
114 | within a display generated by the Derivative Works, if and
115 | wherever such third-party notices normally appear. The contents
116 | of the NOTICE file are for informational purposes only and
117 | do not modify the License. You may add Your own attribution
118 | notices within Derivative Works that You distribute, alongside
119 | or as an addendum to the NOTICE text from the Work, provided
120 | that such additional attribution notices cannot be construed
121 | as modifying the License.
122 |
123 | You may add Your own copyright statement to Your modifications and
124 | may provide additional or different license terms and conditions
125 | for use, reproduction, or distribution of Your modifications, or
126 | for any such Derivative Works as a whole, provided Your use,
127 | reproduction, and distribution of the Work otherwise complies with
128 | the conditions stated in this License.
129 |
130 | 5. Submission of Contributions. Unless You explicitly state otherwise,
131 | any Contribution intentionally submitted for inclusion in the Work
132 | by You to the Licensor shall be under the terms and conditions of
133 | this License, without any additional terms or conditions.
134 | Notwithstanding the above, nothing herein shall supersede or modify
135 | the terms of any separate license agreement you may have executed
136 | with Licensor regarding such Contributions.
137 |
138 | 6. Trademarks. This License does not grant permission to use the trade
139 | names, trademarks, service marks, or product names of the Licensor,
140 | except as required for reasonable and customary use in describing the
141 | origin of the Work and reproducing the content of the NOTICE file.
142 |
143 | 7. Disclaimer of Warranty. Unless required by applicable law or
144 | agreed to in writing, Licensor provides the Work (and each
145 | Contributor provides its Contributions) on an "AS IS" BASIS,
146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 | implied, including, without limitation, any warranties or conditions
148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 | PARTICULAR PURPOSE. You are solely responsible for determining the
150 | appropriateness of using or redistributing the Work and assume any
151 | risks associated with Your exercise of permissions under this License.
152 |
153 | 8. Limitation of Liability. In no event and under no legal theory,
154 | whether in tort (including negligence), contract, or otherwise,
155 | unless required by applicable law (such as deliberate and grossly
156 | negligent acts) or agreed to in writing, shall any Contributor be
157 | liable to You for damages, including any direct, indirect, special,
158 | incidental, or consequential damages of any character arising as a
159 | result of this License or out of the use or inability to use the
160 | Work (including but not limited to damages for loss of goodwill,
161 | work stoppage, computer failure or malfunction, or any and all
162 | other commercial damages or losses), even if such Contributor
163 | has been advised of the possibility of such damages.
164 |
165 | 9. Accepting Warranty or Additional Liability. While redistributing
166 | the Work or Derivative Works thereof, You may choose to offer,
167 | and charge a fee for, acceptance of support, warranty, indemnity,
168 | or other liability obligations and/or rights consistent with this
169 | License. However, in accepting such obligations, You may act only
170 | on Your own behalf and on Your sole responsibility, not on behalf
171 | of any other Contributor, and only if You agree to indemnify,
172 | defend, and hold each Contributor harmless for any liability
173 | incurred by, or claims asserted against, such Contributor by reason
174 | of your accepting any such warranty or additional liability.
175 |
176 | END OF TERMS AND CONDITIONS
177 |
178 | APPENDIX: How to apply the Apache License to your work.
179 |
180 | To apply the Apache License to your work, attach the following
181 | boilerplate notice, with the fields enclosed by brackets "[]"
182 | replaced with your own identifying information. (Don't include
183 | the brackets!) The text should be enclosed in the appropriate
184 | comment syntax for the file format. We also recommend that a
185 | file or class name and description of purpose be included on the
186 | same "printed page" as the copyright notice for easier
187 | identification within third-party archives.
188 |
189 | Copyright [yyyy] [name of copyright owner]
190 |
191 | Licensed under the Apache License, Version 2.0 (the "License");
192 | you may not use this file except in compliance with the License.
193 | You may obtain a copy of the License at
194 |
195 | http://www.apache.org/licenses/LICENSE-2.0
196 |
197 | Unless required by applicable law or agreed to in writing, software
198 | distributed under the License is distributed on an "AS IS" BASIS,
199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 | See the License for the specific language governing permissions and
201 | limitations under the License.
202 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Generate Python notebook for pipeline models using AutoAI
2 |
3 | ## Summary
4 |
5 | In this code pattern, we will learn how to automatically generate a Jupyter notebook that will contain Python code of a machine learning model using AutoAI. We will explore, modify and retrain this model pipeline using python code. Lastly, we will deploy this model in Watson Machine Learning using WML APIs.
6 |
7 | ## Description
8 |
9 | AutoAI is a graphical tool available within Watson Studio that analyzes your dataset, generates several model pipelines and ranks them based on the metric chosen for the problem. This code pattern shows extended features of AutoAI. More basic AutoAI exploration for the same dataset is covered in the [Generate machine learning model pipelines to choose the best model for your problem](https://developer.ibm.com/tutorials/generate-machine-learning-model-pipelines-to-choose-the-best-model-for-your-problem-autoai/) article.
10 |
11 | When you have completed this code pattern, you will understand how to:
12 |
13 | * Run an AutoAI experiment.
14 | * Generate and save a Python notebook.
15 | * Execute notebook and analyse results.
16 | * Make changes and retrain model using Watson Machine Learning SDKs.
17 | * Deploy model using Watson Machine Learning from within notebook .
18 |
19 | ## Architecture components
20 |
21 | 
22 |
23 | ## Flow
24 |
25 | 1. The user submits an AutoAI experiment using default settings.
26 | 1. Multiple pipeline models are generated. A pipeline model of choice from the leaderboard is saved as Jupyter notebook.
27 | 1. The Jupyter notebook is executed and a modified pipeline model is generated within the notebook.
28 | 1. Pipeline model is deployed in Watson Machine Learning using WML APIs.
29 |
30 | ## Included components
31 |
32 | * [IBM Watson Studio](https://cloud.ibm.com/catalog/services/watson-studio) - IBM Watson® Studio helps data scientists and analysts prepare data and build models at scale across any cloud.
33 | * [IBM Watson Machine Learning](https://cloud.ibm.com/catalog/services/machine-learning) - IBM Watson® Machine Learning helps data scientists and developers accelerate AI and machine-learning deployment.
34 |
35 | ## Featured Technologies
36 |
37 | * [Machine Learning](https://developer.ibm.com/articles/introduction-to-machine-learning/) - Science of predicting values by analysing historic data.
38 | * [Python](https://www.python.org/) - Python is an interpreted, object-oriented, high-level programming language.
39 | * [Jupyter notebook](https://jupyter.org/) - Open-source web application to help build live code.
40 | * [scikit-learn](https://scikit-learn.org/stable/) - Python based machine learning library.
41 | * [lale](https://github.com/IBM/lale) - Python library compatible with scikit-learn for semi-automated data science used in AutoAI SDK
42 |
43 | ## Prerequisites
44 |
45 | * [IBM Cloud account](https://tinyurl.com/y4mzxow5) This code pattern assumes you have an **IBM Cloud** account. Sign up for a no-charge trial account - no credit card required.
46 |
47 | > Instructions to get through the list of prerequistes are covered in [this](https://developer.ibm.com/technologies/artificial-intelligence/tutorials/generate-machine-learning-model-pipelines-to-choose-the-best-model-for-your-problem-autoai/) prequel.
48 |
49 | * Create a Cloud Object Storage service instance.
50 | * Create a Watson Studio service instance.
51 | * Create a Watson Machine Learning service instance.
52 | * Create a Watson Studio project and load data.
53 |
54 | ## Steps
55 |
56 | ### 1. Run AutoAI experiment
57 |
58 | 
59 |
60 | 1. Open the project created within Watson Studio. Click `Add to project +` button on the right top and then click `AutoAI Experiment`.
61 | 1. Give the experiment a name(*Credit Risk Analysis*), associate a Watson Machine Learning service from the drop down and click `Create`.
62 | 1. On the *Add data source* screen, click `Select from project` and check *german_credit_data.csv* and click `Select asset`.
63 | 1. Under the *Configure details* section, Click on the `What do you want to predict?` drop down and select `Result` from the list. If you are using a different dataset, select the column that you want AutoAI to run predictions on. Click `Run experiment` on the right bottom.
64 |
65 | You will see a notification that indicates *AutoAI experiment has started*. Depending on the size of the data set, this step will take a few minutes to complete.
66 |
67 | ### 2. Generate experiment level notebook
68 |
69 | 
70 |
71 | The `experiment notebook` provides annotated code so you can:
72 |
73 | * Interact with trained model pipelines
74 | * Access model details programmatically (including feature importance and machine learning metrics)
75 | * Visualize each pipeline as a graph, with each node documented, to provide transparency
76 | * Download selected pipelines and test locally
77 | * Create a deployment and score the model
78 | * Get the experiment configuration, which you can use for automation or integration with other applications
79 |
80 | To generate an experiment notebook, perform the following steps :
81 |
82 | 
83 |
84 | 1. Once the AutoAI experiment completes, click on the `Save experiment code` button indicated by the floppy icon.
85 |
86 | 
87 |
88 | 1. In the `Save experiment code` prompt, modify the default *Name* if needed and click `Save`. A pop up will show up that indicates that the notebook was saved successfully. You will now see this notebook under the *Notebooks* section within the the *Assets* tab.
89 |
90 | #### 2.0 Load and execute notebook
91 |
92 | Spend some time looking through the sections of the notebook to get an overview.
93 | A notebook is composed of text (markdown or heading) cells and code cells. The markdown cells provide comments on what the code is designed to do.
94 |
95 | You will run cells individually by highlighting each cell, then either click the `Run` button at the top of the notebook or hitting the keyboard short cut to run the cell (Shift + Enter but can vary based on platform). While the cell is running, an asterisk ([*]) will show up to the left of the cell. When that cell has finished executing a sequential number will show up (i.e. [17]). *
96 |
97 | 
98 |
99 | The notebook generated is pre filled with Python code and is divided into 4 main sections as follows.
100 |
101 | #### 2.1 Setup
102 |
103 | This section contains credentials to *Cloud Object Storage* through which the current AutoAI pipeline is retrieved. The cell contains code prefilled to extract the training data used to create the pipeline and the pipeline results.
104 |
105 | 
106 |
107 | Also this section contains the metadata of the current pipeline that were used to run the experiment.
108 |
109 | 
110 |
111 | ##### api_key
112 |
113 | To be able to access the WML instance, the user will need to generate an *api key* through the cloud account and paste it in the cell as shown in the cell below. The instructions to acquire the *cloud api key* is described in the markdown section of the screenshot shown below.
114 |
115 | 
116 |
117 | #### 2.2 Pipelines comparison
118 |
119 | To compare all the pipelines that gets generated, call the `summary()` method on the pipeline object. The best performing model is stored under the `best_pipeline_name` variable
120 |
121 | 
122 |
123 | By passing the variable name within the `get_pipeline()` method, all the feature importance generated with that particular pipeline is listed.
124 |
125 | 
126 |
127 | #### 2.3 Inspect pipeline
128 |
129 | Within this section of the notebook, there is code to visualize the stages within the model as graph using Watson Machine Learning's AutoAI APIs.
130 |
131 | 
132 |
133 | This section also contains code that extracts the current model and prints it as Python code.
134 |
135 | 
136 |
137 | #### 2.4 Deploy and score as web service using WML instance
138 |
139 | This section of the notebook contains code that deploys the pipeline model as a web service using Watson Machine Learning. This section requires users to enter credentials to be able to identify the right WML instance and deployment space.
140 |
141 | ##### target_space_id
142 |
143 | >*To create a deployment space and get the target_space_id*:
144 | 
145 | >
146 | >1. Click on the hamburger menu on the top-left corner of the Watson Studio home page.
147 | >1. Click on `Deployment Spaces` from the list and select `View all spaces`
148 | >1. Click `New deployment space`, select `Create an empty space` option.
149 | >1. Provide a name, select a machine learning service that was previously created and click `Create`
150 | >1. Click `View new space` and switch to the `Settings` tab and copy the `space id`
151 |
152 | Acquire the *target_space_id* as shown in the steps above and paste within the create deployment section. The Watson Machine Learning API uses the `wml_credentials` and the `target_space_id` to deploy the machine learning model as a web service.
153 |
154 | 
155 |
156 | Once the cells are executed, the model is promoted to the deployment space and is now available as a web service and can be verified from within the UI as shown below.
157 |
158 | 
159 |
160 | #### Score web service
161 |
162 | Scoring the web service can either be done from the UI by switching to the `test` tab shown in the screenshot above. Alternatively, the *score()* method from the WML API can be be used to submit a sample test payload. The results are returned as shown in the screenshot below.
163 |
164 | 
165 |
166 | ### 3. Generate pipeline level notebook
167 |
168 | 
169 |
170 | The `pipeline notebook` provides annotated code that allow you to:
171 |
172 | * View the Scikit-learn pipeline definition
173 | * See the transformations applied for pipeline training
174 | * Preview hyper-parameters values found in HPO phase
175 | * Review the pipeline evaluation
176 | * Refine the pipeline definition
177 | * Re-fit and re-evaluate
178 |
179 | To generate a pipeline notebook, perform the following steps :
180 |
181 | 
182 |
183 | 1. Hover over the pipeline that you wish to save as notebook and click on the `Save as` dropdown on the right side and select `Notebook` from the drop down.
184 |
185 | 1. In the `Save as` prompt, you will notice that there are two types of assets that can be generated, namely *Model* and *Notebook*. We will select the `Notebook` option.
186 |
187 | 1. From the `Define details` section on the right, change the default *Name* if needed and click `Create`. A pop up will show up that indicates that the notebook was saved successfully. You will now see this notebook under the *Notebooks* section within the the *Assets* tab.
188 |
189 | Edit and execute each of the cells as shown in section `2.0`. The note contains more information in its markdown cells.
190 |
191 | ## Related Links
192 |
193 | * [Simplify your AI lifecycle with AutoAI](https://developer.ibm.com/series/explore-autoai/)
194 |
195 | ## License
196 |
197 | This code pattern is licensed under the Apache License, Version 2. Separate third-party code objects invoked within this code pattern are licensed by their respective providers pursuant to their own separate licenses. Contributions are subject to the [Developer Certificate of Origin, Version 1.1](https://developercertificate.org/) and the [Apache License, Version 2](https://www.apache.org/licenses/LICENSE-2.0.txt).
198 |
199 | [Apache License FAQ](https://www.apache.org/foundation/license-faq.html#WhatDoesItMEAN)
200 |
--------------------------------------------------------------------------------
/doc/source/images/ExecuteAutoAINotebook.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IBM/AutoAI-code-generation/30bdce187874c5e8f64282e41478ad2b4767f525/doc/source/images/ExecuteAutoAINotebook.gif
--------------------------------------------------------------------------------
/doc/source/images/ExportAutoAI.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IBM/AutoAI-code-generation/30bdce187874c5e8f64282e41478ad2b4767f525/doc/source/images/ExportAutoAI.gif
--------------------------------------------------------------------------------
/doc/source/images/RunAutoAI.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IBM/AutoAI-code-generation/30bdce187874c5e8f64282e41478ad2b4767f525/doc/source/images/RunAutoAI.gif
--------------------------------------------------------------------------------
/doc/source/images/architecture.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IBM/AutoAI-code-generation/30bdce187874c5e8f64282e41478ad2b4767f525/doc/source/images/architecture.png
--------------------------------------------------------------------------------
/doc/source/images/create_deployment_autoai.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IBM/AutoAI-code-generation/30bdce187874c5e8f64282e41478ad2b4767f525/doc/source/images/create_deployment_autoai.png
--------------------------------------------------------------------------------
/doc/source/images/experiment_metadata.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IBM/AutoAI-code-generation/30bdce187874c5e8f64282e41478ad2b4767f525/doc/source/images/experiment_metadata.png
--------------------------------------------------------------------------------
/doc/source/images/experiment_notebook.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IBM/AutoAI-code-generation/30bdce187874c5e8f64282e41478ad2b4767f525/doc/source/images/experiment_notebook.png
--------------------------------------------------------------------------------
/doc/source/images/export-sklearn.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IBM/AutoAI-code-generation/30bdce187874c5e8f64282e41478ad2b4767f525/doc/source/images/export-sklearn.png
--------------------------------------------------------------------------------
/doc/source/images/feature-importance.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IBM/AutoAI-code-generation/30bdce187874c5e8f64282e41478ad2b4767f525/doc/source/images/feature-importance.png
--------------------------------------------------------------------------------
/doc/source/images/format-as-code.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IBM/AutoAI-code-generation/30bdce187874c5e8f64282e41478ad2b4767f525/doc/source/images/format-as-code.png
--------------------------------------------------------------------------------
/doc/source/images/input_parameters.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IBM/AutoAI-code-generation/30bdce187874c5e8f64282e41478ad2b4767f525/doc/source/images/input_parameters.png
--------------------------------------------------------------------------------
/doc/source/images/modify-source-code.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IBM/AutoAI-code-generation/30bdce187874c5e8f64282e41478ad2b4767f525/doc/source/images/modify-source-code.png
--------------------------------------------------------------------------------
/doc/source/images/pipeline_notebook.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IBM/AutoAI-code-generation/30bdce187874c5e8f64282e41478ad2b4767f525/doc/source/images/pipeline_notebook.png
--------------------------------------------------------------------------------
/doc/source/images/pipelines-comparison.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IBM/AutoAI-code-generation/30bdce187874c5e8f64282e41478ad2b4767f525/doc/source/images/pipelines-comparison.png
--------------------------------------------------------------------------------
/doc/source/images/pretty_print-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IBM/AutoAI-code-generation/30bdce187874c5e8f64282e41478ad2b4767f525/doc/source/images/pretty_print-1.png
--------------------------------------------------------------------------------
/doc/source/images/save-experiment-button.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IBM/AutoAI-code-generation/30bdce187874c5e8f64282e41478ad2b4767f525/doc/source/images/save-experiment-button.png
--------------------------------------------------------------------------------
/doc/source/images/save-experiment-notebook.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IBM/AutoAI-code-generation/30bdce187874c5e8f64282e41478ad2b4767f525/doc/source/images/save-experiment-notebook.gif
--------------------------------------------------------------------------------
/doc/source/images/score_webservice.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IBM/AutoAI-code-generation/30bdce187874c5e8f64282e41478ad2b4767f525/doc/source/images/score_webservice.png
--------------------------------------------------------------------------------
/doc/source/images/visualize-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IBM/AutoAI-code-generation/30bdce187874c5e8f64282e41478ad2b4767f525/doc/source/images/visualize-1.png
--------------------------------------------------------------------------------
/doc/source/images/visualize-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IBM/AutoAI-code-generation/30bdce187874c5e8f64282e41478ad2b4767f525/doc/source/images/visualize-2.png
--------------------------------------------------------------------------------
/doc/source/images/wml-deployment-space.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IBM/AutoAI-code-generation/30bdce187874c5e8f64282e41478ad2b4767f525/doc/source/images/wml-deployment-space.gif
--------------------------------------------------------------------------------
/doc/source/images/wml_connection_autoai.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IBM/AutoAI-code-generation/30bdce187874c5e8f64282e41478ad2b4767f525/doc/source/images/wml_connection_autoai.png
--------------------------------------------------------------------------------
/doc/source/images/wml_model_deployed.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IBM/AutoAI-code-generation/30bdce187874c5e8f64282e41478ad2b4767f525/doc/source/images/wml_model_deployed.png
--------------------------------------------------------------------------------
/notebooks/Credit Data Analysis - P2 notebook.ipynb:
--------------------------------------------------------------------------------
1 | {"cells":[{"metadata":{},"cell_type":"markdown","source":["\n","# Pipeline 2 Notebook - AutoAI Notebook v1.15.0\n","\n","Consider these tips for working with an auto-generated notebook:\n","- Notebook code generated using AutoAI will execute successfully. If you modify the notebook, we cannot guarantee it will run successfully.\n","- This pipeline is optimized for the original data set. The pipeline might fail or produce sub-optimum results if used with different data. If you want to use a different data set, consider retraining the AutoAI experiment to generate a new pipeline. For more information, see Cloud Platform \n","- Before modifying the pipeline or trying to re-fit the pipeline, consider that the code converts dataframes to numpy arrays before fitting the pipeline (a current restriction of the preprocessor pipeline).\n"]},{"metadata":{},"cell_type":"markdown","source":[" \n","## Notebook content\n","\n","This notebook contains a Scikit-learn representation of AutoAI pipeline. This notebook introduces commands for getting data, training the model, and testing the model. \n","\n","Some familiarity with Python is helpful. This notebook uses Python 3.7 and scikit-learn 0.23.2."]},{"metadata":{"pycharm":{"name":"#%% md\n"}},"cell_type":"markdown","source":["## Notebook goals\n","\n","- Scikit-learn pipeline definition\n","- Pipeline training \n","- Pipeline evaluation\n","\n","## Contents\n","\n","This notebook contains the following parts:\n","\n","**[Setup](#setup)** \n"," [Package installation](#install) \n"," [AutoAI experiment metadata](#variables_definition) \n","**[Pipeline inspection](#inspection)** \n"," [Read training data](#read) \n"," [Train and test data split](#split) \n"," [Make pipeline](#preview_model_to_python_code) \n"," [Train pipeline model](#train) \n"," [Test pipeline model](#test_model) \n","**[Next steps](#next_steps)** \n","**[Copyrights](#copyrights)**"]},{"metadata":{},"cell_type":"markdown","source":[" \n","# Setup"]},{"metadata":{},"cell_type":"markdown","source":[" \n","## Package installation\n","Before you use the sample code in this notebook, install the following packages:\n"," - ibm_watson_machine_learning,\n"," - autoai-libs,\n"," - scikit-learn,\n"," - xgboost.\n"]},{"metadata":{"execution":{"iopub.execute_input":"2020-10-12T14:00:45.009458Z","iopub.status.busy":"2020-10-12T14:00:45.007968Z","iopub.status.idle":"2020-10-12T14:00:46.037702Z","shell.execute_reply":"2020-10-12T14:00:46.038270Z"},"pycharm":{"name":"#%%\n"},"scrolled":true},"cell_type":"code","source":["!conda remove --force libxgboost -y\n","!pip install ibm-watson-machine-learning | tail -n 1\n","!pip install -U autoai-libs==1.12.7 | tail -n 1\n","!pip install -U scikit-learn==0.23.2 | tail -n 1\n","!pip install -U xgboost==1.3.3 | tail -n 1"],"execution_count":null,"outputs":[]},{"metadata":{},"cell_type":"markdown","source":[" \n","## AutoAI experiment metadata\n","The following cell contains the training data connection details. \n","**Note**: The connection might contain authorization credentials, so be careful when sharing the notebook."]},{"metadata":{"execution":{"iopub.execute_input":"2020-10-12T14:00:49.797633Z","iopub.status.busy":"2020-10-12T14:00:49.796778Z","iopub.status.idle":"2020-10-12T14:00:57.182715Z","shell.execute_reply":"2020-10-12T14:00:57.183132Z"},"pycharm":{"is_executing":true}},"cell_type":"code","source":["#@hidden_cell\n","from ibm_watson_machine_learning.helpers import DataConnection\n","from ibm_watson_machine_learning.helpers import S3Connection, S3Location\n","\n","training_data_reference = [DataConnection(\n"," connection=S3Connection(\n"," api_key='',\n"," auth_endpoint='https://iam.bluemix.net/oidc/token/',\n"," endpoint_url='https://s3-api.us-geo.objectstorage.softlayer.net'\n"," ),\n"," location=S3Location(\n"," bucket='autoaitutorialnew-donotdelete-pr-zhaudczzjfda0e',\n"," path='german_credit_data.csv'\n"," )),\n","]\n","training_result_reference = DataConnection(\n"," connection=S3Connection(\n"," api_key='',\n"," auth_endpoint='https://iam.bluemix.net/oidc/token/',\n"," endpoint_url='https://s3-api.us-geo.objectstorage.softlayer.net'\n"," ),\n"," location=S3Location(\n"," bucket='autoaitutorialnew-donotdelete-pr-zhaudczzjfda0e',\n"," path='auto_ml/509ff920-3fcf-4eb9-9abc-bbc63b4a301a/wml_data/33aa26d1-15eb-482c-93ca-76c186c431a2/data/automl',\n"," model_location='auto_ml/509ff920-3fcf-4eb9-9abc-bbc63b4a301a/wml_data/33aa26d1-15eb-482c-93ca-76c186c431a2/data/automl/hpo_d_output/Pipeline1/model.pickle',\n"," training_status='auto_ml/509ff920-3fcf-4eb9-9abc-bbc63b4a301a/wml_data/33aa26d1-15eb-482c-93ca-76c186c431a2/training-status.json'\n"," ))"],"execution_count":null,"outputs":[]},{"metadata":{},"cell_type":"markdown","source":["Following cell contains input parameters provided to run the AutoAI experiment in Watson Studio."]},{"metadata":{"execution":{"iopub.execute_input":"2020-10-12T14:00:57.187305Z","iopub.status.busy":"2020-10-12T14:00:57.186602Z","iopub.status.idle":"2020-10-12T14:00:57.188392Z","shell.execute_reply":"2020-10-12T14:00:57.188878Z"},"pycharm":{"name":"#%%\n"}},"cell_type":"code","source":["experiment_metadata = dict(\n"," prediction_type='classification',\n"," prediction_column='Result',\n"," holdout_size=0.1,\n"," scoring='accuracy',\n"," csv_separator=',',\n"," random_state=33,\n"," max_number_of_estimators=2,\n"," training_data_reference=training_data_reference,\n"," training_result_reference=training_result_reference,\n"," deployment_url='https://us-south.ml.cloud.ibm.com',\n"," project_id='0e2a32c8-f2ed-4587-8479-3a22b2ea57b3',\n"," positive_label=1,\n"," drop_duplicates=True\n",")"],"execution_count":null,"outputs":[]},{"metadata":{},"cell_type":"markdown","source":[" \n","# Pipeline inspection"]},{"metadata":{"pycharm":{"name":"#%% md\n"}},"cell_type":"markdown","source":[" \n","## Read training data\n","\n","Retrieve training dataset from AutoAI experiment as pandas DataFrame. If reading data using Flight Service Connection results with error, please provide data as Pandas DataFrame object e.g. reading .CSV file with `pandas.read_csv()`"]},{"metadata":{"execution":{"iopub.execute_input":"2020-10-12T14:01:16.076169Z","iopub.status.busy":"2020-10-12T14:01:16.075589Z","iopub.status.idle":"2020-10-12T14:01:19.190233Z","shell.execute_reply":"2020-10-12T14:01:19.190807Z"},"pycharm":{"is_executing":true,"name":"#%%\n"}},"cell_type":"code","source":["df = training_data_reference[0].read(csv_separator=experiment_metadata['csv_separator'])\n","df.dropna('rows', how='any', subset=[experiment_metadata['prediction_column']], inplace=True)"],"execution_count":null,"outputs":[]},{"metadata":{},"cell_type":"markdown","source":[" \n","## Train and test data split"]},{"metadata":{"pycharm":{"is_executing":true}},"cell_type":"code","source":["from sklearn.model_selection import train_test_split\n","\n","df.drop_duplicates(inplace=True)\n","X = df.drop([experiment_metadata['prediction_column']], axis=1).values\n","y = df[experiment_metadata['prediction_column']].values\n","\n","train_X, test_X, train_y, test_y = train_test_split(X, y, test_size=experiment_metadata['holdout_size'],\n"," stratify=y, random_state=experiment_metadata['random_state'])"],"execution_count":null,"outputs":[]},{"metadata":{"pycharm":{"name":"#%% md\n"}},"cell_type":"markdown","source":[" \n","## Make pipeline\n","In the next cell, you can find the Scikit-learn definition of the selected AutoAI pipeline."]},{"metadata":{},"cell_type":"markdown","source":["Import statements."]},{"metadata":{"pycharm":{"is_executing":true,"name":"#%%\n"}},"cell_type":"code","source":["from autoai_libs.transformers.exportable import NumpyColumnSelector\n","from autoai_libs.transformers.exportable import CompressStrings\n","from autoai_libs.transformers.exportable import NumpyReplaceMissingValues\n","from autoai_libs.transformers.exportable import NumpyReplaceUnknownValues\n","from autoai_libs.transformers.exportable import boolean2float\n","from autoai_libs.transformers.exportable import CatImputer\n","from autoai_libs.transformers.exportable import CatEncoder\n","import numpy as np\n","from autoai_libs.transformers.exportable import float32_transform\n","from sklearn.pipeline import make_pipeline\n","from autoai_libs.transformers.exportable import FloatStr2Float\n","from autoai_libs.transformers.exportable import NumImputer\n","from autoai_libs.transformers.exportable import OptStandardScaler\n","from sklearn.pipeline import make_union\n","from autoai_libs.transformers.exportable import NumpyPermuteArray\n","from xgboost import XGBClassifier"],"execution_count":null,"outputs":[]},{"metadata":{},"cell_type":"markdown","source":["#### Pre-processing & Estimator."]},{"metadata":{"pycharm":{"is_executing":true,"name":"#%%\n"}},"cell_type":"code","source":["numpy_column_selector_0 = NumpyColumnSelector(\n"," columns=[\n"," 0, 1, 2, 3, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,\n"," ]\n",")\n","compress_strings = CompressStrings(\n"," compress_type=\"hash\",\n"," dtypes_list=[\n"," \"char_str\", \"int_num\", \"char_str\", \"char_str\", \"char_str\", \"char_str\",\n"," \"int_num\", \"char_str\", \"char_str\", \"int_num\", \"char_str\", \"int_num\",\n"," \"char_str\", \"char_str\", \"int_num\", \"char_str\", \"int_num\", \"char_str\",\n"," \"char_str\",\n"," ],\n"," missing_values_reference_list=[\"\", \"-\", \"?\", float(\"nan\")],\n"," misslist_list=[\n"," [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [],\n"," [], [],\n"," ],\n",")\n","numpy_replace_missing_values_0 = NumpyReplaceMissingValues(\n"," missing_values=[], filling_values=float(\"nan\")\n",")\n","numpy_replace_unknown_values = NumpyReplaceUnknownValues(\n"," filling_values=float(\"nan\"),\n"," filling_values_list=[\n"," float(\"nan\"), float(\"nan\"), float(\"nan\"), float(\"nan\"), float(\"nan\"),\n"," float(\"nan\"), float(\"nan\"), float(\"nan\"), float(\"nan\"), float(\"nan\"),\n"," float(\"nan\"), float(\"nan\"), float(\"nan\"), float(\"nan\"), float(\"nan\"),\n"," float(\"nan\"), float(\"nan\"), float(\"nan\"), float(\"nan\"),\n"," ],\n"," missing_values_reference_list=[\"\", \"-\", \"?\", float(\"nan\")],\n",")\n","cat_imputer = CatImputer(\n"," strategy=\"most_frequent\",\n"," missing_values=float(\"nan\"),\n"," sklearn_version_family=\"23\",\n",")\n","cat_encoder = CatEncoder(\n"," encoding=\"ordinal\",\n"," categories=\"auto\",\n"," dtype=np.float64,\n"," handle_unknown=\"error\",\n"," sklearn_version_family=\"23\",\n",")\n","pipeline_0 = make_pipeline(\n"," numpy_column_selector_0,\n"," compress_strings,\n"," numpy_replace_missing_values_0,\n"," numpy_replace_unknown_values,\n"," boolean2float(),\n"," cat_imputer,\n"," cat_encoder,\n"," float32_transform(),\n",")\n","numpy_column_selector_1 = NumpyColumnSelector(columns=[4])\n","float_str2_float = FloatStr2Float(\n"," dtypes_list=[\"int_num\"], missing_values_reference_list=[]\n",")\n","numpy_replace_missing_values_1 = NumpyReplaceMissingValues(\n"," missing_values=[], filling_values=float(\"nan\")\n",")\n","num_imputer = NumImputer(strategy=\"median\", missing_values=float(\"nan\"))\n","opt_standard_scaler = OptStandardScaler(\n"," num_scaler_copy=None,\n"," num_scaler_with_mean=None,\n"," num_scaler_with_std=None,\n"," use_scaler_flag=False,\n",")\n","pipeline_1 = make_pipeline(\n"," numpy_column_selector_1,\n"," float_str2_float,\n"," numpy_replace_missing_values_1,\n"," num_imputer,\n"," opt_standard_scaler,\n"," float32_transform(),\n",")\n","union = make_union(pipeline_0, pipeline_1)\n","numpy_permute_array = NumpyPermuteArray(\n"," axis=0,\n"," permutation_indices=[\n"," 0, 1, 2, 3, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 4,\n"," ],\n",")\n","xgb_classifier = XGBClassifier(\n"," base_score=0.5,\n"," booster=\"gbtree\",\n"," colsample_bylevel=1,\n"," colsample_bynode=1,\n"," colsample_bytree=1,\n"," gamma=0,\n"," gpu_id=-1,\n"," interaction_constraints=\"\",\n"," learning_rate=1.0,\n"," max_delta_step=0,\n"," max_depth=2,\n"," min_child_weight=2,\n"," missing=float(\"nan\"),\n"," monotone_constraints=\"()\",\n"," n_estimators=128,\n"," n_jobs=2,\n"," num_parallel_tree=1,\n"," random_state=33,\n"," reg_alpha=1,\n"," reg_lambda=1.0,\n"," scale_pos_weight=1,\n"," subsample=0.9992297983348898,\n"," tree_method=\"hist\",\n"," validate_parameters=1,\n"," verbosity=0,\n"," nthread=2,\n"," silent=True,\n"," seed=33,\n",")\n"],"execution_count":null,"outputs":[]},{"metadata":{},"cell_type":"markdown","source":["Pipeline."]},{"metadata":{"pycharm":{"is_executing":true,"name":"#%%\n"}},"cell_type":"code","source":["pipeline = make_pipeline(union, numpy_permute_array, xgb_classifier)"],"execution_count":null,"outputs":[]},{"metadata":{},"cell_type":"markdown","source":[" \n","## Train pipeline model\n"]},{"metadata":{},"cell_type":"markdown","source":["### Define scorer from the optimization metric\n","This cell constructs the cell scorer based on the experiment metadata."]},{"metadata":{"pycharm":{"is_executing":true}},"cell_type":"code","source":["from sklearn.metrics import get_scorer\n","\n","scorer = get_scorer(experiment_metadata['scoring'])"],"execution_count":null,"outputs":[]},{"metadata":{"pycharm":{"name":"#%% md\n"}},"cell_type":"markdown","source":[" \n","### Fit pipeline model\n","In this cell, the pipeline is fitted."]},{"metadata":{"execution":{"iopub.execute_input":"2020-10-12T14:01:19.291734Z","iopub.status.busy":"2020-10-12T14:01:19.244735Z","iopub.status.idle":"2020-10-12T14:01:19.338461Z","shell.execute_reply":"2020-10-12T14:01:19.338958Z"},"pycharm":{"is_executing":true,"name":"#%%\n"},"scrolled":true},"cell_type":"code","source":["pipeline.fit(train_X,train_y)"],"execution_count":null,"outputs":[]},{"metadata":{},"cell_type":"markdown","source":[" \n","## Test pipeline model"]},{"metadata":{},"cell_type":"markdown","source":["Score the fitted pipeline with the generated scorer using the holdout dataset."]},{"metadata":{"execution":{"iopub.execute_input":"2020-10-12T14:02:03.910267Z","iopub.status.busy":"2020-10-12T14:02:03.909710Z","iopub.status.idle":"2020-10-12T14:02:03.914154Z","shell.execute_reply":"2020-10-12T14:02:03.914727Z"},"pycharm":{"is_executing":true,"name":"#%%\n"}},"cell_type":"code","source":["score = scorer(pipeline, test_X, test_y)\n","print(score)"],"execution_count":null,"outputs":[]},{"metadata":{},"cell_type":"markdown","source":[" \n","# Next steps\n","\n","#### [Model deployment as webservice](https://github.com/IBM/watson-machine-learning-samples/tree/master/cloud/notebooks/python_sdk/deployments/autoai)\n","#### [Run AutoAI experiment with python SDK](https://github.com/IBM/watson-machine-learning-samples/tree/master/cloud/notebooks/python_sdk/experiments/autoai) "]},{"metadata":{"pycharm":{"name":"#%% md\n"}},"cell_type":"markdown","source":[" \n","### Copyrights\n","\n","Licensed Materials - Copyright © 2021 IBM. This notebook and its source code are released under the terms of the ILAN License. Use, duplication disclosure restricted by GSA ADP Schedule Contract with IBM Corp.\n","\n","**Note:** The auto-generated notebooks are subject to the International License Agreement for Non-Warranted Programs (or equivalent) and License Information document for Watson Studio Auto-generated Notebook (License Terms), such agreements located in the link below. Specifically, the Source Components and Sample Materials clause included in the License Information document for Watson Studio Auto-generated Notebook applies to the auto-generated notebooks. \n","\n","By downloading, copying, accessing, or otherwise using the materials, you agree to the License Terms \n","\n","___"]}],"metadata":{"kernelspec":{"name":"python3","display_name":"Python 3.7","language":"python"},"language_info":{"name":"python","version":"3.7.10","mimetype":"text/x-python","codemirror_mode":{"name":"ipython","version":3},"pygments_lexer":"ipython3","nbconvert_exporter":"python","file_extension":".py"}},"nbformat":4,"nbformat_minor":2}
--------------------------------------------------------------------------------
/notebooks/Credit Data Analysis - experiment notebook.ipynb:
--------------------------------------------------------------------------------
1 | {"cells":[{"metadata":{},"cell_type":"markdown","source":["\n","# Experiment Notebook - AutoAI Notebook v1.15.0\n","\n","\n","This notebook contains the steps and code to demonstrate support of AutoAI experiments in Watson Machine Learning service. It introduces Python SDK commands for data retrieval, training experiments, persisting pipelines, testing pipelines, refining pipelines, and scoring the resulting model.\n","\n","**Note:** Notebook code generated using AutoAI will execute successfully. If code is modified or reordered, there is no guarantee it will successfully execute. For details, see: Saving an Auto AI experiment as a notebook \n"]},{"metadata":{},"cell_type":"markdown","source":["Some familiarity with Python is helpful. This notebook uses Python 3.7 and `ibm_watson_machine_learning` package.\n","\n","\n","## Notebook goals\n","\n","The learning goals of this notebook are:\n","- Defining an AutoAI experiment\n","- Training AutoAI models \n","- Comparing trained models\n","- Deploying the model as a web service\n","- Scoring the model to generate predictions.\n","\n","\n","\n","## Contents\n","\n","This notebook contains the following parts:\n","\n","**[Setup](#setup)** \n"," [Package installation](#install) \n"," [Watson Machine Learning connection](#connection) \n","**[Experiment configuration](#configuration)** \n"," [Experiment metadata](#metadata) \n","**[Working with completed AutoAI experiment](#work)** \n"," [Get fitted AutoAI optimizer](#get) \n"," [Pipelines comparison](#comparison) \n"," [Get pipeline as scikit-learn pipeline model](#get_pipeline) \n"," [Inspect pipeline](#inspect_pipeline) \n"," [Visualize pipeline model](#visualize) \n"," [Preview pipeline model as python code](#preview) \n","**[Deploy and Score](#scoring)** \n"," [Working with spaces](#working_spaces) \n","**[Running AutoAI experiment with Python SDK](#run)** \n","**[Clean up](#cleanup)** \n","**[Next steps](#next_steps)** \n","**[Copyrights](#copyrights)**"]},{"metadata":{},"cell_type":"markdown","source":[" \n","# Setup"]},{"metadata":{},"cell_type":"markdown","source":[" \n","## Package installation\n","Before you use the sample code in this notebook, install the following packages:\n"," - ibm_watson_machine_learning,\n"," - autoai-libs,\n"," - lale,\n"," - scikit-learn,\n"," - xgboost,\n"," - lightgbm.\n"]},{"metadata":{"pycharm":{"name":"#%%\n"}},"cell_type":"code","source":["!conda remove --force libxgboost -y\n","!pip install ibm-watson-machine-learning | tail -n 1\n","!pip install -U autoai-libs==1.12.7 | tail -n 1\n","!pip install -U 'lale>=0.5.1,<0.6' | tail -n 1\n","!pip install -U scikit-learn==0.23.2 | tail -n 1\n","!pip install -U xgboost==1.3.3 | tail -n 1\n","!pip install -U lightgbm==3.1.1 | tail -n 1"],"execution_count":null,"outputs":[]},{"metadata":{},"cell_type":"markdown","source":[" \n","# Experiment configuration"]},{"metadata":{"pycharm":{"name":"#%% md\n"}},"cell_type":"markdown","source":[" \n","## Experiment metadata\n","This cell defines the metadata for the experiment, including: training_data_reference, training_result_reference, experiment_metadata."]},{"metadata":{"pycharm":{"name":"#%%\n"}},"cell_type":"code","source":["#@hidden_cell\n","from ibm_watson_machine_learning.helpers import DataConnection\n","from ibm_watson_machine_learning.helpers import S3Connection, S3Location\n","\n","training_data_reference = [DataConnection(\n"," connection=S3Connection(\n"," api_key='',\n"," auth_endpoint='https://iam.bluemix.net/oidc/token/',\n"," endpoint_url='https://s3-api.us-geo.objectstorage.softlayer.net'\n"," ),\n"," location=S3Location(\n"," bucket='autoaitutorialnew-donotdelete-pr-zhaudczzjfda0e',\n"," path='german_credit_data.csv'\n"," )),\n","]\n","training_result_reference = DataConnection(\n"," connection=S3Connection(\n"," api_key='',\n"," auth_endpoint='https://iam.bluemix.net/oidc/token/',\n"," endpoint_url='https://s3-api.us-geo.objectstorage.softlayer.net'\n"," ),\n"," location=S3Location(\n"," bucket='autoaitutorialnew-donotdelete-pr-zhaudczzjfda0e',\n"," path='auto_ml/509ff920-3fcf-4eb9-9abc-bbc63b4a301a/wml_data/33aa26d1-15eb-482c-93ca-76c186c431a2/data/automl',\n"," model_location='auto_ml/509ff920-3fcf-4eb9-9abc-bbc63b4a301a/wml_data/33aa26d1-15eb-482c-93ca-76c186c431a2/data/automl/pre_hpo_d_output/Pipeline1/model.pickle',\n"," training_status='auto_ml/509ff920-3fcf-4eb9-9abc-bbc63b4a301a/wml_data/33aa26d1-15eb-482c-93ca-76c186c431a2/training-status.json'\n"," ))"],"execution_count":null,"outputs":[]},{"metadata":{"pycharm":{"name":"#%%\n"}},"cell_type":"code","source":["experiment_metadata = dict(\n"," prediction_type='classification',\n"," prediction_column='Result',\n"," holdout_size=0.1,\n"," scoring='accuracy',\n"," csv_separator=',',\n"," random_state=33,\n"," max_number_of_estimators=2,\n"," training_data_reference=training_data_reference,\n"," training_result_reference=training_result_reference,\n"," deployment_url='https://us-south.ml.cloud.ibm.com',\n"," project_id='0e2a32c8-f2ed-4587-8479-3a22b2ea57b3',\n"," positive_label=1,\n"," drop_duplicates=True\n",")"],"execution_count":null,"outputs":[]},{"metadata":{},"cell_type":"markdown","source":[" \n","## Watson Machine Learning connection\n","\n","This cell defines the credentials required to work with the Watson Machine Learning service.\n","\n","**Action** Please provide IBM Cloud apikey following [docs](https://cloud.ibm.com/docs/account?topic=account-userapikey)."]},{"metadata":{"pycharm":{"name":"#%%\n"}},"cell_type":"code","source":["api_key = ''"],"execution_count":null,"outputs":[]},{"metadata":{"pycharm":{"name":"#%%\n"}},"cell_type":"code","source":["wml_credentials = {\n"," \"apikey\": api_key,\n"," \"url\": experiment_metadata['deployment_url']\n","}"],"execution_count":null,"outputs":[]},{"metadata":{},"cell_type":"markdown","source":[" \n","\n","\n","# Working with completed AutoAI experiment\n","\n","This cell imports the pipelines generated for the experiment so they can be compared to find the optimal pipeline to save as a model."]},{"metadata":{},"cell_type":"markdown","source":[" \n","\n","\n","## Get fitted AutoAI optimizer"]},{"metadata":{"pycharm":{"name":"#%%\n"}},"cell_type":"code","source":["from ibm_watson_machine_learning.experiment import AutoAI\n","\n","pipeline_optimizer = AutoAI(wml_credentials, project_id=experiment_metadata['project_id']).runs.get_optimizer(metadata=experiment_metadata)"],"execution_count":null,"outputs":[]},{"metadata":{},"cell_type":"markdown","source":["Use `get_params()`- to retrieve configuration parameters."]},{"metadata":{"pycharm":{"name":"#%%\n"}},"cell_type":"code","source":["pipeline_optimizer.get_params()"],"execution_count":null,"outputs":[]},{"metadata":{"pycharm":{"name":"#%% md\n"}},"cell_type":"markdown","source":[" \n","## Pipelines comparison\n","\n","Use the `summary()` method to list trained pipelines and evaluation metrics information in\n","the form of a Pandas DataFrame. You can use the DataFrame to compare all discovered pipelines and select the one you like for further testing."]},{"metadata":{"pycharm":{"name":"#%%\n"}},"cell_type":"code","source":["summary = pipeline_optimizer.summary()\n","best_pipeline_name = list(summary.index)[0]\n","summary"],"execution_count":null,"outputs":[]},{"metadata":{},"cell_type":"markdown","source":[" \n","### Get pipeline as scikit-learn pipeline model\n","\n","After you compare the pipelines, download and save a scikit-learn pipeline model object from the\n","AutoAI training job.\n","\n","**Tip:** If you want to get a specific pipeline you need to pass the pipeline name in:\n","```\n","pipeline_optimizer.get_pipeline(pipeline_name=pipeline_name)\n","```"]},{"metadata":{"pycharm":{"name":"#%%\n"}},"cell_type":"code","source":["pipeline_model = pipeline_optimizer.get_pipeline(pipeline_name=best_pipeline_name)"],"execution_count":null,"outputs":[]},{"metadata":{},"cell_type":"markdown","source":["Next, check features importance for selected pipeline."]},{"metadata":{"pycharm":{"name":"#%%\n"}},"cell_type":"code","source":["pipeline_optimizer.get_pipeline_details()['features_importance']"],"execution_count":null,"outputs":[]},{"metadata":{},"cell_type":"markdown","source":["**Tip:** If you want to check all model evaluation metrics-details, use:\n","```\n","pipeline_optimizer.get_pipeline_details()\n","```"]},{"metadata":{},"cell_type":"markdown","source":[" \n","## Inspect pipeline"]},{"metadata":{},"cell_type":"markdown","source":[" \n","### Visualize pipeline model\n","\n","Preview pipeline model stages as a graph. Each node's name links to a detailed description of the stage.\n"]},{"metadata":{"pycharm":{"name":"#%%\n"}},"cell_type":"code","source":["pipeline_model.visualize()"],"execution_count":null,"outputs":[]},{"metadata":{},"cell_type":"markdown","source":[" \n","### Preview pipeline model as python code\n","In the next cell, you can preview the saved pipeline model as a python code. \n","You will be able to review the exact steps used to create the model.\n","\n","**Note:** If you want to get sklearn representation add following parameter to `pretty_print` call: `astype='sklearn'`."]},{"metadata":{"pycharm":{"name":"#%%\n"}},"cell_type":"code","source":["pipeline_model.pretty_print(combinators=False, ipython_display=True)"],"execution_count":null,"outputs":[]},{"metadata":{},"cell_type":"markdown","source":[" \n","## Deploy and Score\n","\n","In this section you will learn how to deploy and score the model as a web service."]},{"metadata":{},"cell_type":"markdown","source":[" \n","### Working with spaces\n","\n","In this section you will specify a deployment space for organizing the assets for deploying and scoring the model. If you do not have an existing space, you can use [Deployment Spaces Dashboard](https://dataplatform.cloud.ibm.com/ml-runtime/spaces?context=cpdaas) to create a new space, following these steps:\n","\n","- Click **New Deployment Space**.\n","- Create an empty space.\n","- Select Cloud Object Storage.\n","- Select Watson Machine Learning instance and press **Create**.\n","- Copy `space_id` and paste it below.\n","\n","**Tip**: You can also use the SDK to prepare the space for your work. Learn more [here](https://github.com/IBM/watson-machine-learning-samples/blob/master/notebooks/python_sdk/instance-management/Space%20management.ipynb).\n","\n","**Action**: assign or update space ID below"]},{"metadata":{},"cell_type":"markdown","source":["### Deployment creation"]},{"metadata":{"pycharm":{"name":"#%%\n"}},"cell_type":"code","source":["target_space_id = \"\"\n","\n","from ibm_watson_machine_learning.deployment import WebService\n","service = WebService(source_wml_credentials=wml_credentials,\n"," target_wml_credentials=wml_credentials,\n"," source_project_id=experiment_metadata['project_id'],\n"," target_space_id=target_space_id)\n","service.create(\n","model=best_pipeline_name,\n","metadata=experiment_metadata,\n","deployment_name='Best_pipeline_webservice'\n",")"],"execution_count":null,"outputs":[]},{"metadata":{},"cell_type":"markdown","source":["Use the `print` method for the deployment object to show basic information about the service: "]},{"metadata":{"pycharm":{"name":"#%%\n"}},"cell_type":"code","source":["print(service)"],"execution_count":null,"outputs":[]},{"metadata":{},"cell_type":"markdown","source":["To show all available information about the deployment use the `.get_params()` method:"]},{"metadata":{"pycharm":{"name":"#%%\n"}},"cell_type":"code","source":["service.get_params()"],"execution_count":null,"outputs":[]},{"metadata":{},"cell_type":"markdown","source":["### Scoring of webservice\n","You can make scoring request by calling `score()` on the deployed pipeline."]},{"metadata":{},"cell_type":"markdown","source":["If you want to work with the web service in an external Python application,follow these steps to retrieve the service object:\n","\n"," - Initialize the service by `service = WebService(wml_credentials)`\n"," - Get deployment_id by `service.list()` method\n"," - Get webservice object by `service.get('deployment_id')` method\n","\n","After that you can call `service.score()` method."]},{"metadata":{},"cell_type":"markdown","source":["### Deleting deployment\n"," \n","You can delete the existing deployment by calling the `service.delete()` command.\n","To list the existing web services, use `service.list()`."]},{"metadata":{},"cell_type":"markdown","source":[" \n","\n","## Running AutoAI experiment with Python SDK"]},{"metadata":{},"cell_type":"markdown","source":["If you want to run AutoAI experiment using python API follow up the steps decribed below. The experiment settings were generated basing on parameters set on UI.\n"," - Go to your COS dashboard.\n"," - In Service credentials tab, click New Credential.\n"," - Add the inline configuration parameter: `{“HMAC”:true}`, click Add.\n","This configuration parameter adds the following section to the instance credentials, (for use later in this notebook):\n","```\n","cos_hmac_keys”: {\n"," “access_key_id”: “***“,\n"," “secret_access_key”: “***”\n"," }\n"," ```\n","\n","**Action:** Please provide cos credentials in following cells.\n","\n","- Use provided markdown cells to run code.\n","\n"]},{"metadata":{"pycharm":{"name":"#%%\n"}},"cell_type":"markdown","source":["```\n","from ibm_watson_machine_learning.experiment import AutoAI\n","\n","experiment = AutoAI(wml_credentials, project_id=experiment_metadata['project_id'])\n","```"]},{"metadata":{},"cell_type":"markdown","source":["```\n","#@hidden_cell\n","cos_hmac_keys = {\n"," \"access_key_id\": \"PLACE_YOUR_ACCESS_KEY_ID_HERE\",\n"," \"secret_access_key\": \"PLACE_YOUR_SECRET_ACCESS_KEY_HERE\"\n"," }\n"," \n","cos_api_key = \"PLACE_YOUR_API_KEY_HERE\"\n","OPTIMIZER_NAME = 'custom_name'\n","```"]},{"metadata":{"pycharm":{"name":"#%% md\n"}},"cell_type":"markdown","source":["The experiment settings were generated basing on parameters set on UI."]},{"metadata":{"pycharm":{"name":"#%% raw\n"}},"cell_type":"markdown","source":["```\n","from ibm_watson_machine_learning.helpers import DataConnection\n","from ibm_watson_machine_learning.helpers import S3Connection, S3Location\n","\n","training_data_reference = [DataConnection(\n"," connection=S3Connection(\n"," api_key=cos_api_key,\n"," auth_endpoint='https://iam.bluemix.net/oidc/token/',\n"," endpoint_url='https://s3-api.us-geo.objectstorage.softlayer.net',\n"," access_key_id = cos_hmac_keys['access_key_id'],\n"," secret_access_key = cos_hmac_keys['secret_access_key']\n"," ),\n"," location=S3Location(\n"," bucket='autoaitutorialnew-donotdelete-pr-zhaudczzjfda0e',\n"," path='german_credit_data.csv'\n"," )),\n","]\n","from ibm_watson_machine_learning.helpers import S3Connection, S3Location\n","training_result_reference = DataConnection(\n"," connection=S3Connection(\n"," api_key=cos_api_key,\n"," auth_endpoint='https://iam.bluemix.net/oidc/token/',\n"," endpoint_url='https://s3-api.us-geo.objectstorage.softlayer.net',\n"," access_key_id = cos_hmac_keys['access_key_id'],\n"," secret_access_key = cos_hmac_keys['secret_access_key']\n"," ),\n"," location=S3Location(\n"," bucket='autoaitutorialnew-donotdelete-pr-zhaudczzjfda0e',\n"," path='auto_ml/509ff920-3fcf-4eb9-9abc-bbc63b4a301a/wml_data/33aa26d1-15eb-482c-93ca-76c186c431a2/data/automl',\n"," model_location='auto_ml/509ff920-3fcf-4eb9-9abc-bbc63b4a301a/wml_data/33aa26d1-15eb-482c-93ca-76c186c431a2/data/automl/pre_hpo_d_output/Pipeline1/model.pickle',\n"," training_status='auto_ml/509ff920-3fcf-4eb9-9abc-bbc63b4a301a/wml_data/33aa26d1-15eb-482c-93ca-76c186c431a2/training-status.json'\n"," ))\n","```"]},{"metadata":{"pycharm":{"name":"#%%raw\n"}},"cell_type":"markdown","source":["```\n","pipeline_optimizer = experiment.optimizer(\n"," name=OPTIMIZER_NAME,\n"," prediction_type=experiment_metadata['prediction_type'],\n"," prediction_column=experiment_metadata['prediction_column'],\n"," scoring=experiment_metadata['scoring'],\n"," holdout_size=experiment_metadata['holdout_size'],\n"," csv_separator=experiment_metadata['csv_separator'],\n"," positive_label=experiment_metadata['positive_label'],\n"," drop_duplicates=experiment_metadata['drop_duplicates'])\n","```"]},{"metadata":{},"cell_type":"markdown","source":["```\n","pipeline_optimizer.fit(training_data_reference=training_data_reference,\n"," training_results_reference=training_result_reference,\n"," background_mode=False)\n","```"]},{"metadata":{},"cell_type":"markdown","source":["\n"," \n","# Next steps\n","\n","#### [Online Documentation](https://www.ibm.com/cloud/watson-studio/autoai)"]},{"metadata":{},"cell_type":"markdown","source":[" \n","### Copyrights\n","\n","Licensed Materials - Copyright © 2021 IBM. This notebook and its source code are released under the terms of the ILAN License.\n","Use, duplication disclosure restricted by GSA ADP Schedule Contract with IBM Corp.\n","\n","**Note:** The auto-generated notebooks are subject to the International License Agreement for Non-Warranted Programs (or equivalent) and License Information document for Watson Studio Auto-generated Notebook (License Terms), such agreements located in the link below. Specifically, the Source Components and Sample Materials clause included in the License Information document for Watson Studio Auto-generated Notebook applies to the auto-generated notebooks. \n","\n","By downloading, copying, accessing, or otherwise using the materials, you agree to the License Terms \n","\n","___"]}],"metadata":{"kernelspec":{"name":"python3","display_name":"Python 3.7","language":"python"},"language_info":{"name":"python","version":"3.7.10","mimetype":"text/x-python","codemirror_mode":{"name":"ipython","version":3},"pygments_lexer":"ipython3","nbconvert_exporter":"python","file_extension":".py"},"pycharm":{"stem_cell":{"cell_type":"raw","metadata":{"collapsed":false},"source":["\n"]}}},"nbformat":4,"nbformat_minor":1}
--------------------------------------------------------------------------------
/notebooks/with-output/Credit Data Analysis - P2 notebook.ipynb:
--------------------------------------------------------------------------------
1 | {"cells":[{"metadata":{},"cell_type":"markdown","source":["\n","# Pipeline 2 Notebook - AutoAI Notebook v1.15.0\n","\n","Consider these tips for working with an auto-generated notebook:\n","- Notebook code generated using AutoAI will execute successfully. If you modify the notebook, we cannot guarantee it will run successfully.\n","- This pipeline is optimized for the original data set. The pipeline might fail or produce sub-optimum results if used with different data. If you want to use a different data set, consider retraining the AutoAI experiment to generate a new pipeline. For more information, see Cloud Platform \n","- Before modifying the pipeline or trying to re-fit the pipeline, consider that the code converts dataframes to numpy arrays before fitting the pipeline (a current restriction of the preprocessor pipeline).\n"]},{"metadata":{},"cell_type":"markdown","source":[" \n","## Notebook content\n","\n","This notebook contains a Scikit-learn representation of AutoAI pipeline. This notebook introduces commands for getting data, training the model, and testing the model. \n","\n","Some familiarity with Python is helpful. This notebook uses Python 3.7 and scikit-learn 0.23.2."]},{"metadata":{"pycharm":{"name":"#%% md\n"}},"cell_type":"markdown","source":["## Notebook goals\n","\n","- Scikit-learn pipeline definition\n","- Pipeline training \n","- Pipeline evaluation\n","\n","## Contents\n","\n","This notebook contains the following parts:\n","\n","**[Setup](#setup)** \n"," [Package installation](#install) \n"," [AutoAI experiment metadata](#variables_definition) \n","**[Pipeline inspection](#inspection)** \n"," [Read training data](#read) \n"," [Train and test data split](#split) \n"," [Make pipeline](#preview_model_to_python_code) \n"," [Train pipeline model](#train) \n"," [Test pipeline model](#test_model) \n","**[Next steps](#next_steps)** \n","**[Copyrights](#copyrights)**"]},{"metadata":{},"cell_type":"markdown","source":[" \n","# Setup"]},{"metadata":{},"cell_type":"markdown","source":[" \n","## Package installation\n","Before you use the sample code in this notebook, install the following packages:\n"," - ibm_watson_machine_learning,\n"," - autoai-libs,\n"," - scikit-learn,\n"," - xgboost.\n"]},{"metadata":{"execution":{"iopub.execute_input":"2020-10-12T14:00:45.009458Z","iopub.status.busy":"2020-10-12T14:00:45.007968Z","iopub.status.idle":"2020-10-12T14:00:46.037702Z","shell.execute_reply":"2020-10-12T14:00:46.038270Z"},"pycharm":{"name":"#%%\n"},"scrolled":true},"cell_type":"code","source":["!conda remove --force libxgboost -y\n","!pip install ibm-watson-machine-learning | tail -n 1\n","!pip install -U autoai-libs==1.12.7 | tail -n 1\n","!pip install -U scikit-learn==0.23.2 | tail -n 1\n","!pip install -U xgboost==1.3.3 | tail -n 1"],"execution_count":1,"outputs":[{"output_type":"stream","text":"\n## Package Plan ##\n\n environment location: /opt/conda/envs/Python-3.7-main\n\n removed specs:\n - libxgboost\n\n\nThe following packages will be REMOVED:\n\n libxgboost-0.90-he6710b0_1\n\n\nPreparing transaction: done\nVerifying transaction: done\nExecuting transaction: done\n/opt/conda/envs/Python-3.7-main/lib/python3.7/site-packages/secretstorage/dhcrypto.py:16: CryptographyDeprecationWarning: int_from_bytes is deprecated, use int.from_bytes instead\n from cryptography.utils import int_from_bytes\n/opt/conda/envs/Python-3.7-main/lib/python3.7/site-packages/secretstorage/util.py:25: CryptographyDeprecationWarning: int_from_bytes is deprecated, use int.from_bytes instead\n from cryptography.utils import int_from_bytes\nRequirement already satisfied: docutils<0.16,>=0.10 in /opt/conda/envs/Python-3.7-main/lib/python3.7/site-packages (from ibm-cos-sdk-core==2.7.0->ibm-cos-sdk==2.7.*->ibm-watson-machine-learning) (0.15.2)\n/opt/conda/envs/Python-3.7-main/lib/python3.7/site-packages/secretstorage/dhcrypto.py:16: CryptographyDeprecationWarning: int_from_bytes is deprecated, use int.from_bytes instead\n from cryptography.utils import int_from_bytes\n/opt/conda/envs/Python-3.7-main/lib/python3.7/site-packages/secretstorage/util.py:25: CryptographyDeprecationWarning: int_from_bytes is deprecated, use int.from_bytes instead\n from cryptography.utils import int_from_bytes\n\u001b[31mERROR: tensorflow 2.1.0 has requirement scipy==1.4.1; python_version >= \"3\", but you'll have scipy 1.5.0 which is incompatible.\u001b[0m\nSuccessfully installed autoai-libs-1.12.7 gensim-3.8.3 numpy-1.19.2 smart-open-5.1.0\n/opt/conda/envs/Python-3.7-main/lib/python3.7/site-packages/secretstorage/dhcrypto.py:16: CryptographyDeprecationWarning: int_from_bytes is deprecated, use int.from_bytes instead\n from cryptography.utils import int_from_bytes\n/opt/conda/envs/Python-3.7-main/lib/python3.7/site-packages/secretstorage/util.py:25: CryptographyDeprecationWarning: int_from_bytes is deprecated, use int.from_bytes instead\n from cryptography.utils import int_from_bytes\nSuccessfully installed scikit-learn-0.23.2\n/opt/conda/envs/Python-3.7-main/lib/python3.7/site-packages/secretstorage/dhcrypto.py:16: CryptographyDeprecationWarning: int_from_bytes is deprecated, use int.from_bytes instead\n from cryptography.utils import int_from_bytes\n/opt/conda/envs/Python-3.7-main/lib/python3.7/site-packages/secretstorage/util.py:25: CryptographyDeprecationWarning: int_from_bytes is deprecated, use int.from_bytes instead\n from cryptography.utils import int_from_bytes\nSuccessfully installed xgboost-1.3.3\n","name":"stdout"}]},{"metadata":{},"cell_type":"markdown","source":[" \n","## AutoAI experiment metadata\n","The following cell contains the training data connection details. \n","**Note**: The connection might contain authorization credentials, so be careful when sharing the notebook."]},{"metadata":{"execution":{"iopub.execute_input":"2020-10-12T14:00:49.797633Z","iopub.status.busy":"2020-10-12T14:00:49.796778Z","iopub.status.idle":"2020-10-12T14:00:57.182715Z","shell.execute_reply":"2020-10-12T14:00:57.183132Z"},"pycharm":{"is_executing":true}},"cell_type":"code","source":["#@hidden_cell\n","from ibm_watson_machine_learning.helpers import DataConnection\n","from ibm_watson_machine_learning.helpers import S3Connection, S3Location\n","\n","training_data_reference = [DataConnection(\n"," connection=S3Connection(\n"," api_key='',\n"," auth_endpoint='https://iam.bluemix.net/oidc/token/',\n"," endpoint_url='https://s3-api.us-geo.objectstorage.softlayer.net'\n"," ),\n"," location=S3Location(\n"," bucket='autoaitutorialnew-donotdelete-pr-zhaudczzjfda0e',\n"," path='german_credit_data.csv'\n"," )),\n","]\n","training_result_reference = DataConnection(\n"," connection=S3Connection(\n"," api_key='',\n"," auth_endpoint='https://iam.bluemix.net/oidc/token/',\n"," endpoint_url='https://s3-api.us-geo.objectstorage.softlayer.net'\n"," ),\n"," location=S3Location(\n"," bucket='autoaitutorialnew-donotdelete-pr-zhaudczzjfda0e',\n"," path='auto_ml/509ff920-3fcf-4eb9-9abc-bbc63b4a301a/wml_data/33aa26d1-15eb-482c-93ca-76c186c431a2/data/automl',\n"," model_location='auto_ml/509ff920-3fcf-4eb9-9abc-bbc63b4a301a/wml_data/33aa26d1-15eb-482c-93ca-76c186c431a2/data/automl/hpo_d_output/Pipeline1/model.pickle',\n"," training_status='auto_ml/509ff920-3fcf-4eb9-9abc-bbc63b4a301a/wml_data/33aa26d1-15eb-482c-93ca-76c186c431a2/training-status.json'\n"," ))"],"execution_count":2,"outputs":[]},{"metadata":{},"cell_type":"markdown","source":["Following cell contains input parameters provided to run the AutoAI experiment in Watson Studio."]},{"metadata":{"execution":{"iopub.execute_input":"2020-10-12T14:00:57.187305Z","iopub.status.busy":"2020-10-12T14:00:57.186602Z","iopub.status.idle":"2020-10-12T14:00:57.188392Z","shell.execute_reply":"2020-10-12T14:00:57.188878Z"},"pycharm":{"name":"#%%\n"}},"cell_type":"code","source":["experiment_metadata = dict(\n"," prediction_type='classification',\n"," prediction_column='Result',\n"," holdout_size=0.1,\n"," scoring='accuracy',\n"," csv_separator=',',\n"," random_state=33,\n"," max_number_of_estimators=2,\n"," training_data_reference=training_data_reference,\n"," training_result_reference=training_result_reference,\n"," deployment_url='https://us-south.ml.cloud.ibm.com',\n"," project_id='0e2a32c8-f2ed-4587-8479-3a22b2ea57b3',\n"," positive_label=1,\n"," drop_duplicates=True\n",")"],"execution_count":3,"outputs":[]},{"metadata":{},"cell_type":"markdown","source":[" \n","# Pipeline inspection"]},{"metadata":{"pycharm":{"name":"#%% md\n"}},"cell_type":"markdown","source":[" \n","## Read training data\n","\n","Retrieve training dataset from AutoAI experiment as pandas DataFrame. If reading data using Flight Service Connection results with error, please provide data as Pandas DataFrame object e.g. reading .CSV file with `pandas.read_csv()`"]},{"metadata":{"execution":{"iopub.execute_input":"2020-10-12T14:01:16.076169Z","iopub.status.busy":"2020-10-12T14:01:16.075589Z","iopub.status.idle":"2020-10-12T14:01:19.190233Z","shell.execute_reply":"2020-10-12T14:01:19.190807Z"},"pycharm":{"is_executing":true,"name":"#%%\n"}},"cell_type":"code","source":["df = training_data_reference[0].read(csv_separator=experiment_metadata['csv_separator'])\n","df.dropna('rows', how='any', subset=[experiment_metadata['prediction_column']], inplace=True)"],"execution_count":4,"outputs":[]},{"metadata":{},"cell_type":"markdown","source":[" \n","## Train and test data split"]},{"metadata":{"pycharm":{"is_executing":true}},"cell_type":"code","source":["from sklearn.model_selection import train_test_split\n","\n","df.drop_duplicates(inplace=True)\n","X = df.drop([experiment_metadata['prediction_column']], axis=1).values\n","y = df[experiment_metadata['prediction_column']].values\n","\n","train_X, test_X, train_y, test_y = train_test_split(X, y, test_size=experiment_metadata['holdout_size'],\n"," stratify=y, random_state=experiment_metadata['random_state'])"],"execution_count":5,"outputs":[]},{"metadata":{"pycharm":{"name":"#%% md\n"}},"cell_type":"markdown","source":[" \n","## Make pipeline\n","In the next cell, you can find the Scikit-learn definition of the selected AutoAI pipeline."]},{"metadata":{},"cell_type":"markdown","source":["Import statements."]},{"metadata":{"pycharm":{"is_executing":true,"name":"#%%\n"}},"cell_type":"code","source":["from autoai_libs.transformers.exportable import NumpyColumnSelector\n","from autoai_libs.transformers.exportable import CompressStrings\n","from autoai_libs.transformers.exportable import NumpyReplaceMissingValues\n","from autoai_libs.transformers.exportable import NumpyReplaceUnknownValues\n","from autoai_libs.transformers.exportable import boolean2float\n","from autoai_libs.transformers.exportable import CatImputer\n","from autoai_libs.transformers.exportable import CatEncoder\n","import numpy as np\n","from autoai_libs.transformers.exportable import float32_transform\n","from sklearn.pipeline import make_pipeline\n","from autoai_libs.transformers.exportable import FloatStr2Float\n","from autoai_libs.transformers.exportable import NumImputer\n","from autoai_libs.transformers.exportable import OptStandardScaler\n","from sklearn.pipeline import make_union\n","from autoai_libs.transformers.exportable import NumpyPermuteArray\n","from xgboost import XGBClassifier"],"execution_count":6,"outputs":[]},{"metadata":{},"cell_type":"markdown","source":["#### Pre-processing & Estimator."]},{"metadata":{"pycharm":{"is_executing":true,"name":"#%%\n"}},"cell_type":"code","source":["numpy_column_selector_0 = NumpyColumnSelector(\n"," columns=[\n"," 0, 1, 2, 3, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,\n"," ]\n",")\n","compress_strings = CompressStrings(\n"," compress_type=\"hash\",\n"," dtypes_list=[\n"," \"char_str\", \"int_num\", \"char_str\", \"char_str\", \"char_str\", \"char_str\",\n"," \"int_num\", \"char_str\", \"char_str\", \"int_num\", \"char_str\", \"int_num\",\n"," \"char_str\", \"char_str\", \"int_num\", \"char_str\", \"int_num\", \"char_str\",\n"," \"char_str\",\n"," ],\n"," missing_values_reference_list=[\"\", \"-\", \"?\", float(\"nan\")],\n"," misslist_list=[\n"," [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [],\n"," [], [],\n"," ],\n",")\n","numpy_replace_missing_values_0 = NumpyReplaceMissingValues(\n"," missing_values=[], filling_values=float(\"nan\")\n",")\n","numpy_replace_unknown_values = NumpyReplaceUnknownValues(\n"," filling_values=float(\"nan\"),\n"," filling_values_list=[\n"," float(\"nan\"), float(\"nan\"), float(\"nan\"), float(\"nan\"), float(\"nan\"),\n"," float(\"nan\"), float(\"nan\"), float(\"nan\"), float(\"nan\"), float(\"nan\"),\n"," float(\"nan\"), float(\"nan\"), float(\"nan\"), float(\"nan\"), float(\"nan\"),\n"," float(\"nan\"), float(\"nan\"), float(\"nan\"), float(\"nan\"),\n"," ],\n"," missing_values_reference_list=[\"\", \"-\", \"?\", float(\"nan\")],\n",")\n","cat_imputer = CatImputer(\n"," strategy=\"most_frequent\",\n"," missing_values=float(\"nan\"),\n"," sklearn_version_family=\"23\",\n",")\n","cat_encoder = CatEncoder(\n"," encoding=\"ordinal\",\n"," categories=\"auto\",\n"," dtype=np.float64,\n"," handle_unknown=\"error\",\n"," sklearn_version_family=\"23\",\n",")\n","pipeline_0 = make_pipeline(\n"," numpy_column_selector_0,\n"," compress_strings,\n"," numpy_replace_missing_values_0,\n"," numpy_replace_unknown_values,\n"," boolean2float(),\n"," cat_imputer,\n"," cat_encoder,\n"," float32_transform(),\n",")\n","numpy_column_selector_1 = NumpyColumnSelector(columns=[4])\n","float_str2_float = FloatStr2Float(\n"," dtypes_list=[\"int_num\"], missing_values_reference_list=[]\n",")\n","numpy_replace_missing_values_1 = NumpyReplaceMissingValues(\n"," missing_values=[], filling_values=float(\"nan\")\n",")\n","num_imputer = NumImputer(strategy=\"median\", missing_values=float(\"nan\"))\n","opt_standard_scaler = OptStandardScaler(\n"," num_scaler_copy=None,\n"," num_scaler_with_mean=None,\n"," num_scaler_with_std=None,\n"," use_scaler_flag=False,\n",")\n","pipeline_1 = make_pipeline(\n"," numpy_column_selector_1,\n"," float_str2_float,\n"," numpy_replace_missing_values_1,\n"," num_imputer,\n"," opt_standard_scaler,\n"," float32_transform(),\n",")\n","union = make_union(pipeline_0, pipeline_1)\n","numpy_permute_array = NumpyPermuteArray(\n"," axis=0,\n"," permutation_indices=[\n"," 0, 1, 2, 3, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 4,\n"," ],\n",")\n","xgb_classifier = XGBClassifier(\n"," base_score=0.5,\n"," booster=\"gbtree\",\n"," colsample_bylevel=1,\n"," colsample_bynode=1,\n"," colsample_bytree=1,\n"," gamma=0,\n"," gpu_id=-1,\n"," interaction_constraints=\"\",\n"," learning_rate=1.0,\n"," max_delta_step=0,\n"," max_depth=2,\n"," min_child_weight=2,\n"," missing=float(\"nan\"),\n"," monotone_constraints=\"()\",\n"," n_estimators=128,\n"," n_jobs=2,\n"," num_parallel_tree=1,\n"," random_state=33,\n"," reg_alpha=1,\n"," reg_lambda=1.0,\n"," scale_pos_weight=1,\n"," subsample=0.9992297983348898,\n"," tree_method=\"hist\",\n"," validate_parameters=1,\n"," verbosity=0,\n"," nthread=2,\n"," silent=True,\n"," seed=33,\n",")\n"],"execution_count":7,"outputs":[]},{"metadata":{},"cell_type":"markdown","source":["Pipeline."]},{"metadata":{"pycharm":{"is_executing":true,"name":"#%%\n"}},"cell_type":"code","source":["pipeline = make_pipeline(union, numpy_permute_array, xgb_classifier)"],"execution_count":8,"outputs":[]},{"metadata":{},"cell_type":"markdown","source":[" \n","## Train pipeline model\n"]},{"metadata":{},"cell_type":"markdown","source":["### Define scorer from the optimization metric\n","This cell constructs the cell scorer based on the experiment metadata."]},{"metadata":{"pycharm":{"is_executing":true}},"cell_type":"code","source":["from sklearn.metrics import get_scorer\n","\n","scorer = get_scorer(experiment_metadata['scoring'])"],"execution_count":9,"outputs":[]},{"metadata":{"pycharm":{"name":"#%% md\n"}},"cell_type":"markdown","source":[" \n","### Fit pipeline model\n","In this cell, the pipeline is fitted."]},{"metadata":{"execution":{"iopub.execute_input":"2020-10-12T14:01:19.291734Z","iopub.status.busy":"2020-10-12T14:01:19.244735Z","iopub.status.idle":"2020-10-12T14:01:19.338461Z","shell.execute_reply":"2020-10-12T14:01:19.338958Z"},"pycharm":{"is_executing":true,"name":"#%%\n"},"scrolled":true},"cell_type":"code","source":["pipeline.fit(train_X,train_y)"],"execution_count":10,"outputs":[{"output_type":"execute_result","execution_count":10,"data":{"text/plain":"Pipeline(steps=[('featureunion',\n FeatureUnion(transformer_list=[('pipeline-1',\n Pipeline(steps=[('numpycolumnselector',\n NumpyColumnSelector(columns=[0,\n 1,\n 2,\n 3,\n 5,\n 6,\n 7,\n 8,\n 9,\n 10,\n 11,\n 12,\n 13,\n 14,\n 15,\n 16,\n 17,\n 18,\n 19])),\n ('compressstrings',\n CompressStrings(compress_type='hash',\n dtypes_list=['char_str',\n 'int_num',\n 'char_str',\n 'char_str',\n 'char_str',\n 'char_str',\n 'int_num',\n 'char_str'...\n interaction_constraints='', learning_rate=1.0,\n max_delta_step=0, max_depth=2,\n min_child_weight=2, missing=nan,\n monotone_constraints='()', n_estimators=128,\n n_jobs=2, nthread=2, num_parallel_tree=1,\n random_state=33, reg_alpha=1, reg_lambda=1.0,\n scale_pos_weight=1, seed=33, silent=True,\n subsample=0.9992297983348898, tree_method='hist',\n validate_parameters=1, verbosity=0))])"},"metadata":{}}]},{"metadata":{},"cell_type":"markdown","source":[" \n","## Test pipeline model"]},{"metadata":{},"cell_type":"markdown","source":["Score the fitted pipeline with the generated scorer using the holdout dataset."]},{"metadata":{"execution":{"iopub.execute_input":"2020-10-12T14:02:03.910267Z","iopub.status.busy":"2020-10-12T14:02:03.909710Z","iopub.status.idle":"2020-10-12T14:02:03.914154Z","shell.execute_reply":"2020-10-12T14:02:03.914727Z"},"pycharm":{"is_executing":true,"name":"#%%\n"}},"cell_type":"code","source":["score = scorer(pipeline, test_X, test_y)\n","print(score)"],"execution_count":11,"outputs":[{"output_type":"stream","text":"0.68\n","name":"stdout"}]},{"metadata":{},"cell_type":"markdown","source":[" \n","# Next steps\n","\n","#### [Model deployment as webservice](https://github.com/IBM/watson-machine-learning-samples/tree/master/cloud/notebooks/python_sdk/deployments/autoai)\n","#### [Run AutoAI experiment with python SDK](https://github.com/IBM/watson-machine-learning-samples/tree/master/cloud/notebooks/python_sdk/experiments/autoai) "]},{"metadata":{"pycharm":{"name":"#%% md\n"}},"cell_type":"markdown","source":[" \n","### Copyrights\n","\n","Licensed Materials - Copyright © 2021 IBM. This notebook and its source code are released under the terms of the ILAN License. Use, duplication disclosure restricted by GSA ADP Schedule Contract with IBM Corp.\n","\n","**Note:** The auto-generated notebooks are subject to the International License Agreement for Non-Warranted Programs (or equivalent) and License Information document for Watson Studio Auto-generated Notebook (License Terms), such agreements located in the link below. Specifically, the Source Components and Sample Materials clause included in the License Information document for Watson Studio Auto-generated Notebook applies to the auto-generated notebooks. \n","\n","By downloading, copying, accessing, or otherwise using the materials, you agree to the License Terms \n","\n","___"]}],"metadata":{"kernelspec":{"name":"python3","display_name":"Python 3.7","language":"python"},"language_info":{"name":"python","version":"3.7.10","mimetype":"text/x-python","codemirror_mode":{"name":"ipython","version":3},"pygments_lexer":"ipython3","nbconvert_exporter":"python","file_extension":".py"}},"nbformat":4,"nbformat_minor":2}
--------------------------------------------------------------------------------
/notebooks/with-output/Credit Data Analysis - experiment notebook.ipynb:
--------------------------------------------------------------------------------
1 | {"cells":[{"metadata":{},"cell_type":"markdown","source":["\n","# Experiment Notebook - AutoAI Notebook v1.15.0\n","\n","\n","This notebook contains the steps and code to demonstrate support of AutoAI experiments in Watson Machine Learning service. It introduces Python SDK commands for data retrieval, training experiments, persisting pipelines, testing pipelines, refining pipelines, and scoring the resulting model.\n","\n","**Note:** Notebook code generated using AutoAI will execute successfully. If code is modified or reordered, there is no guarantee it will successfully execute. For details, see: Saving an Auto AI experiment as a notebook \n"]},{"metadata":{},"cell_type":"markdown","source":["Some familiarity with Python is helpful. This notebook uses Python 3.7 and `ibm_watson_machine_learning` package.\n","\n","\n","## Notebook goals\n","\n","The learning goals of this notebook are:\n","- Defining an AutoAI experiment\n","- Training AutoAI models \n","- Comparing trained models\n","- Deploying the model as a web service\n","- Scoring the model to generate predictions.\n","\n","\n","\n","## Contents\n","\n","This notebook contains the following parts:\n","\n","**[Setup](#setup)** \n"," [Package installation](#install) \n"," [Watson Machine Learning connection](#connection) \n","**[Experiment configuration](#configuration)** \n"," [Experiment metadata](#metadata) \n","**[Working with completed AutoAI experiment](#work)** \n"," [Get fitted AutoAI optimizer](#get) \n"," [Pipelines comparison](#comparison) \n"," [Get pipeline as scikit-learn pipeline model](#get_pipeline) \n"," [Inspect pipeline](#inspect_pipeline) \n"," [Visualize pipeline model](#visualize) \n"," [Preview pipeline model as python code](#preview) \n","**[Deploy and Score](#scoring)** \n"," [Working with spaces](#working_spaces) \n","**[Running AutoAI experiment with Python SDK](#run)** \n","**[Clean up](#cleanup)** \n","**[Next steps](#next_steps)** \n","**[Copyrights](#copyrights)**"]},{"metadata":{},"cell_type":"markdown","source":[" \n","# Setup"]},{"metadata":{},"cell_type":"markdown","source":[" \n","## Package installation\n","Before you use the sample code in this notebook, install the following packages:\n"," - ibm_watson_machine_learning,\n"," - autoai-libs,\n"," - lale,\n"," - scikit-learn,\n"," - xgboost,\n"," - lightgbm.\n"]},{"metadata":{"pycharm":{"name":"#%%\n"}},"cell_type":"code","source":["!conda remove --force libxgboost -y\n","!pip install ibm-watson-machine-learning | tail -n 1\n","!pip install -U autoai-libs==1.12.7 | tail -n 1\n","!pip install -U 'lale>=0.5.1,<0.6' | tail -n 1\n","!pip install -U scikit-learn==0.23.2 | tail -n 1\n","!pip install -U xgboost==1.3.3 | tail -n 1\n","!pip install -U lightgbm==3.1.1 | tail -n 1"],"execution_count":null,"outputs":[]},{"metadata":{},"cell_type":"markdown","source":[" \n","# Experiment configuration"]},{"metadata":{"pycharm":{"name":"#%% md\n"}},"cell_type":"markdown","source":[" \n","## Experiment metadata\n","This cell defines the metadata for the experiment, including: training_data_reference, training_result_reference, experiment_metadata."]},{"metadata":{"pycharm":{"name":"#%%\n"}},"cell_type":"code","source":["#@hidden_cell\n","from ibm_watson_machine_learning.helpers import DataConnection\n","from ibm_watson_machine_learning.helpers import S3Connection, S3Location\n","\n","training_data_reference = [DataConnection(\n"," connection=S3Connection(\n"," api_key='',\n"," auth_endpoint='https://iam.bluemix.net/oidc/token/',\n"," endpoint_url='https://s3-api.us-geo.objectstorage.softlayer.net'\n"," ),\n"," location=S3Location(\n"," bucket='autoaitutorialnew-donotdelete-pr-zhaudczzjfda0e',\n"," path='german_credit_data.csv'\n"," )),\n","]\n","training_result_reference = DataConnection(\n"," connection=S3Connection(\n"," api_key='',\n"," auth_endpoint='https://iam.bluemix.net/oidc/token/',\n"," endpoint_url='https://s3-api.us-geo.objectstorage.softlayer.net'\n"," ),\n"," location=S3Location(\n"," bucket='autoaitutorialnew-donotdelete-pr-zhaudczzjfda0e',\n"," path='auto_ml/509ff920-3fcf-4eb9-9abc-bbc63b4a301a/wml_data/33aa26d1-15eb-482c-93ca-76c186c431a2/data/automl',\n"," model_location='auto_ml/509ff920-3fcf-4eb9-9abc-bbc63b4a301a/wml_data/33aa26d1-15eb-482c-93ca-76c186c431a2/data/automl/pre_hpo_d_output/Pipeline1/model.pickle',\n"," training_status='auto_ml/509ff920-3fcf-4eb9-9abc-bbc63b4a301a/wml_data/33aa26d1-15eb-482c-93ca-76c186c431a2/training-status.json'\n"," ))"],"execution_count":1,"outputs":[]},{"metadata":{"pycharm":{"name":"#%%\n"}},"cell_type":"code","source":["experiment_metadata = dict(\n"," prediction_type='classification',\n"," prediction_column='Result',\n"," holdout_size=0.1,\n"," scoring='accuracy',\n"," csv_separator=',',\n"," random_state=33,\n"," max_number_of_estimators=2,\n"," training_data_reference=training_data_reference,\n"," training_result_reference=training_result_reference,\n"," deployment_url='https://us-south.ml.cloud.ibm.com',\n"," project_id='0e2a32c8-f2ed-4587-8479-3a22b2ea57b3',\n"," positive_label=1,\n"," drop_duplicates=True\n",")"],"execution_count":2,"outputs":[]},{"metadata":{},"cell_type":"markdown","source":[" \n","## Watson Machine Learning connection\n","\n","This cell defines the credentials required to work with the Watson Machine Learning service.\n","\n","**Action** Please provide IBM Cloud apikey following [docs](https://cloud.ibm.com/docs/account?topic=account-userapikey)."]},{"metadata":{"pycharm":{"name":"#%%\n"}},"cell_type":"code","source":["api_key = ''"],"execution_count":3,"outputs":[]},{"metadata":{"pycharm":{"name":"#%%\n"}},"cell_type":"code","source":["wml_credentials = {\n"," \"apikey\": api_key,\n"," \"url\": experiment_metadata['deployment_url']\n","}"],"execution_count":4,"outputs":[]},{"metadata":{},"cell_type":"markdown","source":[" \n","\n","\n","# Working with completed AutoAI experiment\n","\n","This cell imports the pipelines generated for the experiment so they can be compared to find the optimal pipeline to save as a model."]},{"metadata":{},"cell_type":"markdown","source":[" \n","\n","\n","## Get fitted AutoAI optimizer"]},{"metadata":{"pycharm":{"name":"#%%\n"}},"cell_type":"code","source":["from ibm_watson_machine_learning.experiment import AutoAI\n","\n","pipeline_optimizer = AutoAI(wml_credentials, project_id=experiment_metadata['project_id']).runs.get_optimizer(metadata=experiment_metadata)"],"execution_count":5,"outputs":[]},{"metadata":{},"cell_type":"markdown","source":["Use `get_params()`- to retrieve configuration parameters."]},{"metadata":{"pycharm":{"name":"#%%\n"}},"cell_type":"code","source":["pipeline_optimizer.get_params()"],"execution_count":6,"outputs":[{"output_type":"execute_result","execution_count":6,"data":{"text/plain":"{'name': 'Credit Data Analysis',\n 'desc': '',\n 'prediction_type': 'classification',\n 'prediction_column': 'Result',\n 'prediction_columns': None,\n 'timestamp_column_name': None,\n 'scoring': 'accuracy',\n 'holdout_size': 0.1,\n 'max_num_daub_ensembles': 2.0,\n 't_shirt_size': 'a6c4923b-b8e4-444c-9f43-8a7ec3020110',\n 'train_sample_rows_test_size': None,\n 'include_only_estimators': None,\n 'backtest_num': None,\n 'lookback_window': None,\n 'forecast_window': None,\n 'backtest_gap_length': None,\n 'cognito_transform_names': None,\n 'data_join_graph': False,\n 'csv_separator': ',',\n 'excel_sheet': 0,\n 'encoding': 'utf-8',\n 'positive_label': None,\n 'drop_duplicates': True,\n 'text_processing': None,\n 'word2vec_feature_number': None,\n 'daub_give_priority_to_runtime': None,\n 'run_id': '33aa26d1-15eb-482c-93ca-76c186c431a2'}"},"metadata":{}}]},{"metadata":{"pycharm":{"name":"#%% md\n"}},"cell_type":"markdown","source":[" \n","## Pipelines comparison\n","\n","Use the `summary()` method to list trained pipelines and evaluation metrics information in\n","the form of a Pandas DataFrame. You can use the DataFrame to compare all discovered pipelines and select the one you like for further testing."]},{"metadata":{"pycharm":{"name":"#%%\n"}},"cell_type":"code","source":["summary = pipeline_optimizer.summary()\n","best_pipeline_name = list(summary.index)[0]\n","summary"],"execution_count":7,"outputs":[{"output_type":"execute_result","execution_count":7,"data":{"text/plain":" Enhancements Estimator training_roc_auc \\\nPipeline Name \nPipeline_2 HPO XGBClassifier 0.766155 \nPipeline_3 HPO, FE XGBClassifier 0.766155 \nPipeline_4 HPO, FE XGBClassifier 0.766155 \nPipeline_1 XGBClassifier 0.777372 \nPipeline_7 HPO, FE DecisionTreeClassifier 0.684921 \nPipeline_8 HPO, FE DecisionTreeClassifier 0.684921 \nPipeline_5 DecisionTreeClassifier 0.665344 \nPipeline_6 HPO DecisionTreeClassifier 0.665344 \n\n holdout_precision training_average_precision \\\nPipeline Name \nPipeline_2 0.756757 0.862943 \nPipeline_3 0.756757 0.862943 \nPipeline_4 0.756757 0.862943 \nPipeline_1 0.784810 0.874714 \nPipeline_7 0.736842 0.789945 \nPipeline_8 0.736842 0.789945 \nPipeline_5 0.740260 0.779152 \nPipeline_6 0.740260 0.779152 \n\n holdout_average_precision training_log_loss holdout_recall \\\nPipeline Name \nPipeline_2 0.809163 0.622273 0.800000 \nPipeline_3 0.809163 0.622273 0.800000 \nPipeline_4 0.809163 0.622273 0.800000 \nPipeline_1 0.842136 0.564745 0.885714 \nPipeline_7 0.729474 8.941815 0.800000 \nPipeline_8 0.729474 8.941815 0.800000 \nPipeline_5 0.732783 9.479092 0.814286 \nPipeline_6 0.732783 9.479092 0.814286 \n\n training_precision holdout_accuracy \\\nPipeline Name \nPipeline_2 0.811781 0.68 \nPipeline_3 0.811781 0.68 \nPipeline_4 0.811781 0.68 \nPipeline_1 0.810742 0.75 \nPipeline_7 0.808844 0.66 \nPipeline_8 0.808844 0.66 \nPipeline_5 0.796962 0.67 \nPipeline_6 0.796962 0.67 \n\n holdout_balanced_accuracy training_recall holdout_f1 \\\nPipeline Name \nPipeline_2 0.600000 0.869841 0.777778 \nPipeline_3 0.600000 0.869841 0.777778 \nPipeline_4 0.600000 0.869841 0.777778 \nPipeline_1 0.659524 0.861905 0.832215 \nPipeline_7 0.566667 0.825397 0.767123 \nPipeline_8 0.566667 0.825397 0.767123 \nPipeline_5 0.573810 0.815873 0.775510 \nPipeline_6 0.573810 0.815873 0.775510 \n\n holdout_log_loss training_accuracy_(optimized) \\\nPipeline Name \nPipeline_2 0.724710 0.767778 \nPipeline_3 0.724710 0.767778 \nPipeline_4 0.724710 0.767778 \nPipeline_1 0.589836 0.762222 \nPipeline_7 11.743344 0.741111 \nPipeline_8 11.743344 0.741111 \nPipeline_5 11.397956 0.725556 \nPipeline_6 11.397956 0.725556 \n\n holdout_roc_auc training_balanced_accuracy training_f1 \nPipeline Name \nPipeline_2 0.682381 0.699735 0.839790 \nPipeline_3 0.682381 0.699735 0.839790 \nPipeline_4 0.682381 0.699735 0.839790 \nPipeline_1 0.732857 0.695767 0.835308 \nPipeline_7 0.566667 0.684921 0.816988 \nPipeline_8 0.566667 0.684921 0.816988 \nPipeline_5 0.573810 0.665344 0.806298 \nPipeline_6 0.573810 0.665344 0.806298 ","text/html":"
\n\n
\n \n \n \n Enhancements \n Estimator \n training_roc_auc \n holdout_precision \n training_average_precision \n holdout_average_precision \n training_log_loss \n holdout_recall \n training_precision \n holdout_accuracy \n holdout_balanced_accuracy \n training_recall \n holdout_f1 \n holdout_log_loss \n training_accuracy_(optimized) \n holdout_roc_auc \n training_balanced_accuracy \n training_f1 \n \n \n Pipeline Name \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n Pipeline_2 \n HPO \n XGBClassifier \n 0.766155 \n 0.756757 \n 0.862943 \n 0.809163 \n 0.622273 \n 0.800000 \n 0.811781 \n 0.68 \n 0.600000 \n 0.869841 \n 0.777778 \n 0.724710 \n 0.767778 \n 0.682381 \n 0.699735 \n 0.839790 \n \n \n Pipeline_3 \n HPO, FE \n XGBClassifier \n 0.766155 \n 0.756757 \n 0.862943 \n 0.809163 \n 0.622273 \n 0.800000 \n 0.811781 \n 0.68 \n 0.600000 \n 0.869841 \n 0.777778 \n 0.724710 \n 0.767778 \n 0.682381 \n 0.699735 \n 0.839790 \n \n \n Pipeline_4 \n HPO, FE \n XGBClassifier \n 0.766155 \n 0.756757 \n 0.862943 \n 0.809163 \n 0.622273 \n 0.800000 \n 0.811781 \n 0.68 \n 0.600000 \n 0.869841 \n 0.777778 \n 0.724710 \n 0.767778 \n 0.682381 \n 0.699735 \n 0.839790 \n \n \n Pipeline_1 \n \n XGBClassifier \n 0.777372 \n 0.784810 \n 0.874714 \n 0.842136 \n 0.564745 \n 0.885714 \n 0.810742 \n 0.75 \n 0.659524 \n 0.861905 \n 0.832215 \n 0.589836 \n 0.762222 \n 0.732857 \n 0.695767 \n 0.835308 \n \n \n Pipeline_7 \n HPO, FE \n DecisionTreeClassifier \n 0.684921 \n 0.736842 \n 0.789945 \n 0.729474 \n 8.941815 \n 0.800000 \n 0.808844 \n 0.66 \n 0.566667 \n 0.825397 \n 0.767123 \n 11.743344 \n 0.741111 \n 0.566667 \n 0.684921 \n 0.816988 \n \n \n Pipeline_8 \n HPO, FE \n DecisionTreeClassifier \n 0.684921 \n 0.736842 \n 0.789945 \n 0.729474 \n 8.941815 \n 0.800000 \n 0.808844 \n 0.66 \n 0.566667 \n 0.825397 \n 0.767123 \n 11.743344 \n 0.741111 \n 0.566667 \n 0.684921 \n 0.816988 \n \n \n Pipeline_5 \n \n DecisionTreeClassifier \n 0.665344 \n 0.740260 \n 0.779152 \n 0.732783 \n 9.479092 \n 0.814286 \n 0.796962 \n 0.67 \n 0.573810 \n 0.815873 \n 0.775510 \n 11.397956 \n 0.725556 \n 0.573810 \n 0.665344 \n 0.806298 \n \n \n Pipeline_6 \n HPO \n DecisionTreeClassifier \n 0.665344 \n 0.740260 \n 0.779152 \n 0.732783 \n 9.479092 \n 0.814286 \n 0.796962 \n 0.67 \n 0.573810 \n 0.815873 \n 0.775510 \n 11.397956 \n 0.725556 \n 0.573810 \n 0.665344 \n 0.806298 \n \n \n
\n
"},"metadata":{}}]},{"metadata":{},"cell_type":"markdown","source":[" \n","### Get pipeline as scikit-learn pipeline model\n","\n","After you compare the pipelines, download and save a scikit-learn pipeline model object from the\n","AutoAI training job.\n","\n","**Tip:** If you want to get a specific pipeline you need to pass the pipeline name in:\n","```\n","pipeline_optimizer.get_pipeline(pipeline_name=pipeline_name)\n","```"]},{"metadata":{"pycharm":{"name":"#%%\n"}},"cell_type":"code","source":["pipeline_model = pipeline_optimizer.get_pipeline(pipeline_name=best_pipeline_name)"],"execution_count":8,"outputs":[]},{"metadata":{},"cell_type":"markdown","source":["Next, check features importance for selected pipeline."]},{"metadata":{"pycharm":{"name":"#%%\n"}},"cell_type":"code","source":["pipeline_optimizer.get_pipeline_details()['features_importance']"],"execution_count":9,"outputs":[{"output_type":"execute_result","execution_count":9,"data":{"text/plain":" features_importance\nCheck_Account 1.00\nPurpose 0.54\nCredit_history 0.38\nNewFeature_4_round(Credit amount ) 0.36\nNewFeature_3_round(Duration) 0.34\nSaving_account 0.32\nNewFeature_1_sqrt(Credit amount ) 0.31\nProperty 0.28\nAge 0.28\nPresent_residence 0.22\nInstall_rate 0.21\nNewFeature_2_sqrt(Age) 0.20\nDuration 0.19\nCredit amount 0.17\nInstallment_plant 0.17\nJob 0.16\nNum_dependents 0.13\nNewFeature_8_round(sqrt(Age)) 0.13\nNewFeature_5_round(Age) 0.11\nPersonal_status 0.10\nEmployment 0.07\nTelephone 0.07\nHousing 0.06\nNewFeature_0_sqrt(Duration) 0.06\nNewFeature_6_round(sqrt(Duration)) 0.06\nOther_debrotors 0.05\nNum_credits 0.02\nForeign 0.00\nNewFeature_7_round(sqrt(Credit amount )) 0.00","text/html":"\n\n
\n \n \n \n features_importance \n \n \n \n \n Check_Account \n 1.00 \n \n \n Purpose \n 0.54 \n \n \n Credit_history \n 0.38 \n \n \n NewFeature_4_round(Credit amount ) \n 0.36 \n \n \n NewFeature_3_round(Duration) \n 0.34 \n \n \n Saving_account \n 0.32 \n \n \n NewFeature_1_sqrt(Credit amount ) \n 0.31 \n \n \n Property \n 0.28 \n \n \n Age \n 0.28 \n \n \n Present_residence \n 0.22 \n \n \n Install_rate \n 0.21 \n \n \n NewFeature_2_sqrt(Age) \n 0.20 \n \n \n Duration \n 0.19 \n \n \n Credit amount \n 0.17 \n \n \n Installment_plant \n 0.17 \n \n \n Job \n 0.16 \n \n \n Num_dependents \n 0.13 \n \n \n NewFeature_8_round(sqrt(Age)) \n 0.13 \n \n \n NewFeature_5_round(Age) \n 0.11 \n \n \n Personal_status \n 0.10 \n \n \n Employment \n 0.07 \n \n \n Telephone \n 0.07 \n \n \n Housing \n 0.06 \n \n \n NewFeature_0_sqrt(Duration) \n 0.06 \n \n \n NewFeature_6_round(sqrt(Duration)) \n 0.06 \n \n \n Other_debrotors \n 0.05 \n \n \n Num_credits \n 0.02 \n \n \n Foreign \n 0.00 \n \n \n NewFeature_7_round(sqrt(Credit amount )) \n 0.00 \n \n \n
\n
"},"metadata":{}}]},{"metadata":{},"cell_type":"markdown","source":["**Tip:** If you want to check all model evaluation metrics-details, use:\n","```\n","pipeline_optimizer.get_pipeline_details()\n","```"]},{"metadata":{},"cell_type":"markdown","source":[" \n","## Inspect pipeline"]},{"metadata":{},"cell_type":"markdown","source":[" \n","### Visualize pipeline model\n","\n","Preview pipeline model stages as a graph. Each node's name links to a detailed description of the stage.\n"]},{"metadata":{"pycharm":{"name":"#%%\n"}},"cell_type":"code","source":["pipeline_model.visualize()"],"execution_count":10,"outputs":[{"output_type":"display_data","data":{"text/plain":"","image/svg+xml":"\n\n\n\n\n\ncluster:(root) \n\n \n \n \n\n\nnumpy_column_selector_0 \n\n\nNumpy- \nColumn- \nSelector \n \n \n \n\n\ncompress_strings \n\n\nCompress- \nStrings \n \n \n \n\n\nnumpy_column_selector_0->compress_strings \n \n \n \n\n\nnumpy_replace_missing_values_0 \n\n\nNumpy- \nReplace- \nMissing- \nValues \n \n \n \n\n\ncompress_strings->numpy_replace_missing_values_0 \n \n \n \n\n\nnumpy_replace_unknown_values \n\n\nNumpy- \nReplace- \nUnknown- \nValues \n \n \n \n\n\nnumpy_replace_missing_values_0->numpy_replace_unknown_values \n \n \n \n\n\nboolean2float \n\n\nboolean2float \n \n \n \n\n\nnumpy_replace_unknown_values->boolean2float \n \n \n \n\n\ncat_imputer \n\n\nCat- \nImputer \n \n \n \n\n\nboolean2float->cat_imputer \n \n \n \n\n\ncat_encoder \n\n\nCat- \nEncoder \n \n \n \n\n\ncat_imputer->cat_encoder \n \n \n \n\n\nfloat32_transform_0 \n\n\nfloat32_- \ntransform \n \n \n \n\n\ncat_encoder->float32_transform_0 \n \n \n \n\n\nconcat_features \n\n\nConcat- \nFeatures \n \n \n \n\n\nfloat32_transform_0->concat_features \n \n \n \n\n\nnumpy_column_selector_1 \n\n\nNumpy- \nColumn- \nSelector \n \n \n \n\n\nfloat_str2_float \n\n\nFloat- \nStr2- \nFloat \n \n \n \n\n\nnumpy_column_selector_1->float_str2_float \n \n \n \n\n\nnumpy_replace_missing_values_1 \n\n\nNumpy- \nReplace- \nMissing- \nValues \n \n \n \n\n\nfloat_str2_float->numpy_replace_missing_values_1 \n \n \n \n\n\nnum_imputer \n\n\nNum- \nImputer \n \n \n \n\n\nnumpy_replace_missing_values_1->num_imputer \n \n \n \n\n\nopt_standard_scaler \n\n\nOpt- \nStandard- \nScaler \n \n \n \n\n\nnum_imputer->opt_standard_scaler \n \n \n \n\n\nfloat32_transform_1 \n\n\nfloat32_- \ntransform \n \n \n \n\n\nopt_standard_scaler->float32_transform_1 \n \n \n \n\n\nfloat32_transform_1->concat_features \n \n \n \n\n\nnumpy_permute_array \n\n\nNumpy- \nPermute- \nArray \n \n \n \n\n\nconcat_features->numpy_permute_array \n \n \n \n\n\nxgb_classifier \n\n\nXGB- \nClassifier \n \n \n \n\n\nnumpy_permute_array->xgb_classifier \n \n \n \n \n \n"},"metadata":{}}]},{"metadata":{},"cell_type":"markdown","source":[" \n","### Preview pipeline model as python code\n","In the next cell, you can preview the saved pipeline model as a python code. \n","You will be able to review the exact steps used to create the model.\n","\n","**Note:** If you want to get sklearn representation add following parameter to `pretty_print` call: `astype='sklearn'`."]},{"metadata":{"pycharm":{"name":"#%%\n"}},"cell_type":"code","source":["pipeline_model.pretty_print(combinators=False, ipython_display=True)"],"execution_count":11,"outputs":[{"output_type":"display_data","data":{"text/plain":"","text/markdown":"```python\nfrom autoai_libs.transformers.exportable import NumpyColumnSelector\nfrom autoai_libs.transformers.exportable import CompressStrings\nfrom autoai_libs.transformers.exportable import NumpyReplaceMissingValues\nfrom autoai_libs.transformers.exportable import NumpyReplaceUnknownValues\nfrom autoai_libs.transformers.exportable import boolean2float\nfrom autoai_libs.transformers.exportable import CatImputer\nfrom autoai_libs.transformers.exportable import CatEncoder\nimport numpy as np\nfrom autoai_libs.transformers.exportable import float32_transform\nfrom lale.operators import make_pipeline\nfrom autoai_libs.transformers.exportable import FloatStr2Float\nfrom autoai_libs.transformers.exportable import NumImputer\nfrom autoai_libs.transformers.exportable import OptStandardScaler\nfrom lale.operators import make_union\nfrom autoai_libs.transformers.exportable import NumpyPermuteArray\nfrom xgboost import XGBClassifier\n\nnumpy_column_selector_0 = NumpyColumnSelector(\n columns=[\n 0, 1, 2, 3, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,\n ]\n)\ncompress_strings = CompressStrings(\n compress_type=\"hash\",\n dtypes_list=[\n \"char_str\", \"int_num\", \"char_str\", \"char_str\", \"char_str\", \"char_str\",\n \"int_num\", \"char_str\", \"char_str\", \"int_num\", \"char_str\", \"int_num\",\n \"char_str\", \"char_str\", \"int_num\", \"char_str\", \"int_num\", \"char_str\",\n \"char_str\",\n ],\n missing_values_reference_list=[\"\", \"-\", \"?\", float(\"nan\")],\n misslist_list=[\n [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [],\n [], [],\n ],\n)\nnumpy_replace_missing_values_0 = NumpyReplaceMissingValues(\n missing_values=[], filling_values=float(\"nan\")\n)\nnumpy_replace_unknown_values = NumpyReplaceUnknownValues(\n filling_values=float(\"nan\"),\n filling_values_list=[\n float(\"nan\"), float(\"nan\"), float(\"nan\"), float(\"nan\"), float(\"nan\"),\n float(\"nan\"), float(\"nan\"), float(\"nan\"), float(\"nan\"), float(\"nan\"),\n float(\"nan\"), float(\"nan\"), float(\"nan\"), float(\"nan\"), float(\"nan\"),\n float(\"nan\"), float(\"nan\"), float(\"nan\"), float(\"nan\"),\n ],\n missing_values_reference_list=[\"\", \"-\", \"?\", float(\"nan\")],\n)\ncat_imputer = CatImputer(\n strategy=\"most_frequent\",\n missing_values=float(\"nan\"),\n sklearn_version_family=\"23\",\n)\ncat_encoder = CatEncoder(\n encoding=\"ordinal\",\n categories=\"auto\",\n dtype=np.float64,\n handle_unknown=\"error\",\n sklearn_version_family=\"23\",\n)\npipeline_0 = make_pipeline(\n numpy_column_selector_0,\n compress_strings,\n numpy_replace_missing_values_0,\n numpy_replace_unknown_values,\n boolean2float(),\n cat_imputer,\n cat_encoder,\n float32_transform(),\n)\nnumpy_column_selector_1 = NumpyColumnSelector(columns=[4])\nfloat_str2_float = FloatStr2Float(\n dtypes_list=[\"int_num\"], missing_values_reference_list=[]\n)\nnumpy_replace_missing_values_1 = NumpyReplaceMissingValues(\n missing_values=[], filling_values=float(\"nan\")\n)\nnum_imputer = NumImputer(strategy=\"median\", missing_values=float(\"nan\"))\nopt_standard_scaler = OptStandardScaler(\n num_scaler_copy=None,\n num_scaler_with_mean=None,\n num_scaler_with_std=None,\n use_scaler_flag=False,\n)\npipeline_1 = make_pipeline(\n numpy_column_selector_1,\n float_str2_float,\n numpy_replace_missing_values_1,\n num_imputer,\n opt_standard_scaler,\n float32_transform(),\n)\nunion = make_union(pipeline_0, pipeline_1)\nnumpy_permute_array = NumpyPermuteArray(\n axis=0,\n permutation_indices=[\n 0, 1, 2, 3, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 4,\n ],\n)\nxgb_classifier = XGBClassifier(\n base_score=0.5,\n booster=\"gbtree\",\n colsample_bylevel=1,\n colsample_bynode=1,\n colsample_bytree=1,\n gamma=0,\n gpu_id=-1,\n interaction_constraints=\"\",\n learning_rate=1.0,\n max_delta_step=0,\n max_depth=2,\n min_child_weight=2,\n missing=float(\"nan\"),\n monotone_constraints=\"()\",\n n_estimators=128,\n n_jobs=2,\n num_parallel_tree=1,\n random_state=33,\n reg_alpha=1,\n reg_lambda=1.0,\n scale_pos_weight=1,\n subsample=0.9992297983348898,\n tree_method=\"hist\",\n validate_parameters=1,\n verbosity=0,\n nthread=2,\n silent=True,\n seed=33,\n)\npipeline = make_pipeline(union, numpy_permute_array, xgb_classifier)\n```"},"metadata":{}}]},{"metadata":{},"cell_type":"markdown","source":[" \n","## Deploy and Score\n","\n","In this section you will learn how to deploy and score the model as a web service."]},{"metadata":{},"cell_type":"markdown","source":[" \n","### Working with spaces\n","\n","In this section you will specify a deployment space for organizing the assets for deploying and scoring the model. If you do not have an existing space, you can use [Deployment Spaces Dashboard](https://dataplatform.cloud.ibm.com/ml-runtime/spaces?context=cpdaas) to create a new space, following these steps:\n","\n","- Click **New Deployment Space**.\n","- Create an empty space.\n","- Select Cloud Object Storage.\n","- Select Watson Machine Learning instance and press **Create**.\n","- Copy `space_id` and paste it below.\n","\n","**Tip**: You can also use the SDK to prepare the space for your work. Learn more [here](https://github.com/IBM/watson-machine-learning-samples/blob/master/notebooks/python_sdk/instance-management/Space%20management.ipynb).\n","\n","**Action**: assign or update space ID below"]},{"metadata":{},"cell_type":"markdown","source":["### Deployment creation"]},{"metadata":{"pycharm":{"name":"#%%\n"}},"cell_type":"code","source":["target_space_id = \"\"\n","\n","from ibm_watson_machine_learning.deployment import WebService\n","service = WebService(source_wml_credentials=wml_credentials,\n"," target_wml_credentials=wml_credentials,\n"," source_project_id=experiment_metadata['project_id'],\n"," target_space_id=target_space_id)\n","service.create(\n","model=best_pipeline_name,\n","metadata=experiment_metadata,\n","deployment_name='Best_pipeline_webservice'\n",")"],"execution_count":14,"outputs":[{"output_type":"stream","text":"Preparing an AutoAI Deployment...\nPublished model uid: 71a3a7f8-74e0-4b19-ad65-a20ddd5d4d43\nDeploying model 71a3a7f8-74e0-4b19-ad65-a20ddd5d4d43 using V4 client.\n\n\n#######################################################################################\n\nSynchronous deployment creation for uid: '71a3a7f8-74e0-4b19-ad65-a20ddd5d4d43' started\n\n#######################################################################################\n\n\ninitializing...\nready\n\n\n------------------------------------------------------------------------------------------------\nSuccessfully finished deployment creation, deployment_uid='986e0998-8110-4a76-a8e6-6b9961626a9c'\n------------------------------------------------------------------------------------------------\n\n\n","name":"stdout"}]},{"metadata":{},"cell_type":"markdown","source":["Use the `print` method for the deployment object to show basic information about the service: "]},{"metadata":{"pycharm":{"name":"#%%\n"}},"cell_type":"code","source":["print(service)"],"execution_count":15,"outputs":[{"output_type":"stream","text":"name: Best_pipeline_webservice, id: 986e0998-8110-4a76-a8e6-6b9961626a9c, scoring_url: https://us-south.ml.cloud.ibm.com/ml/v4/deployments/986e0998-8110-4a76-a8e6-6b9961626a9c/predictions, asset_id: 71a3a7f8-74e0-4b19-ad65-a20ddd5d4d43\n","name":"stdout"}]},{"metadata":{},"cell_type":"markdown","source":["To show all available information about the deployment use the `.get_params()` method:"]},{"metadata":{"pycharm":{"name":"#%%\n"}},"cell_type":"code","source":["service.get_params()"],"execution_count":16,"outputs":[{"output_type":"execute_result","execution_count":16,"data":{"text/plain":"{'entity': {'asset': {'id': '71a3a7f8-74e0-4b19-ad65-a20ddd5d4d43'},\n 'custom': {},\n 'deployed_asset_type': 'model',\n 'hardware_spec': {'id': 'c076e82c-b2a7-4d20-9c0f-1f0c2fdf5a24',\n 'name': 'M',\n 'num_nodes': 1},\n 'hybrid_pipeline_hardware_specs': [{'hardware_spec': {'name': 'S',\n 'num_nodes': 1},\n 'node_runtime_id': 'auto_ai.kb'}],\n 'name': 'Best_pipeline_webservice',\n 'online': {},\n 'space_id': 'd3a9968f-93de-4693-adce-5fb42ba2b068',\n 'status': {'online_url': {'url': 'https://us-south.ml.cloud.ibm.com/ml/v4/deployments/986e0998-8110-4a76-a8e6-6b9961626a9c/predictions'},\n 'state': 'ready'}},\n 'metadata': {'created_at': '2021-06-07T20:45:26.190Z',\n 'id': '986e0998-8110-4a76-a8e6-6b9961626a9c',\n 'modified_at': '2021-06-07T20:45:26.190Z',\n 'name': 'Best_pipeline_webservice',\n 'owner': 'IBMid-310001D061',\n 'space_id': 'd3a9968f-93de-4693-adce-5fb42ba2b068'}}"},"metadata":{}}]},{"metadata":{},"cell_type":"markdown","source":["### Scoring of webservice\n","You can make scoring request by calling `score()` on the deployed pipeline."]},{"metadata":{},"cell_type":"markdown","source":["If you want to work with the web service in an external Python application,follow these steps to retrieve the service object:\n","\n"," - Initialize the service by `service = WebService(wml_credentials)`\n"," - Get deployment_id by `service.list()` method\n"," - Get webservice object by `service.get('deployment_id')` method\n","\n","After that you can call `service.score()` method."]},{"metadata":{},"cell_type":"markdown","source":["### Deleting deployment\n"," \n","You can delete the existing deployment by calling the `service.delete()` command.\n","To list the existing web services, use `service.list()`."]},{"metadata":{},"cell_type":"markdown","source":[" \n","\n","## Running AutoAI experiment with Python SDK"]},{"metadata":{},"cell_type":"markdown","source":["If you want to run AutoAI experiment using python API follow up the steps decribed below. The experiment settings were generated basing on parameters set on UI.\n"," - Go to your COS dashboard.\n"," - In Service credentials tab, click New Credential.\n"," - Add the inline configuration parameter: `{“HMAC”:true}`, click Add.\n","This configuration parameter adds the following section to the instance credentials, (for use later in this notebook):\n","```\n","cos_hmac_keys”: {\n"," “access_key_id”: “***“,\n"," “secret_access_key”: “***”\n"," }\n"," ```\n","\n","**Action:** Please provide cos credentials in following cells.\n","\n","- Use provided markdown cells to run code.\n","\n"]},{"metadata":{"pycharm":{"name":"#%%\n"}},"cell_type":"markdown","source":["```\n","from ibm_watson_machine_learning.experiment import AutoAI\n","\n","experiment = AutoAI(wml_credentials, project_id=experiment_metadata['project_id'])\n","```"]},{"metadata":{},"cell_type":"markdown","source":["```\n","#@hidden_cell\n","cos_hmac_keys = {\n"," \"access_key_id\": \"PLACE_YOUR_ACCESS_KEY_ID_HERE\",\n"," \"secret_access_key\": \"PLACE_YOUR_SECRET_ACCESS_KEY_HERE\"\n"," }\n"," \n","cos_api_key = \"PLACE_YOUR_API_KEY_HERE\"\n","OPTIMIZER_NAME = 'custom_name'\n","```"]},{"metadata":{"pycharm":{"name":"#%% md\n"}},"cell_type":"markdown","source":["The experiment settings were generated basing on parameters set on UI."]},{"metadata":{"pycharm":{"name":"#%% raw\n"}},"cell_type":"markdown","source":["```\n","from ibm_watson_machine_learning.helpers import DataConnection\n","from ibm_watson_machine_learning.helpers import S3Connection, S3Location\n","\n","training_data_reference = [DataConnection(\n"," connection=S3Connection(\n"," api_key=cos_api_key,\n"," auth_endpoint='https://iam.bluemix.net/oidc/token/',\n"," endpoint_url='https://s3-api.us-geo.objectstorage.softlayer.net',\n"," access_key_id = cos_hmac_keys['access_key_id'],\n"," secret_access_key = cos_hmac_keys['secret_access_key']\n"," ),\n"," location=S3Location(\n"," bucket='autoaitutorialnew-donotdelete-pr-zhaudczzjfda0e',\n"," path='german_credit_data.csv'\n"," )),\n","]\n","from ibm_watson_machine_learning.helpers import S3Connection, S3Location\n","training_result_reference = DataConnection(\n"," connection=S3Connection(\n"," api_key=cos_api_key,\n"," auth_endpoint='https://iam.bluemix.net/oidc/token/',\n"," endpoint_url='https://s3-api.us-geo.objectstorage.softlayer.net',\n"," access_key_id = cos_hmac_keys['access_key_id'],\n"," secret_access_key = cos_hmac_keys['secret_access_key']\n"," ),\n"," location=S3Location(\n"," bucket='autoaitutorialnew-donotdelete-pr-zhaudczzjfda0e',\n"," path='auto_ml/509ff920-3fcf-4eb9-9abc-bbc63b4a301a/wml_data/33aa26d1-15eb-482c-93ca-76c186c431a2/data/automl',\n"," model_location='auto_ml/509ff920-3fcf-4eb9-9abc-bbc63b4a301a/wml_data/33aa26d1-15eb-482c-93ca-76c186c431a2/data/automl/pre_hpo_d_output/Pipeline1/model.pickle',\n"," training_status='auto_ml/509ff920-3fcf-4eb9-9abc-bbc63b4a301a/wml_data/33aa26d1-15eb-482c-93ca-76c186c431a2/training-status.json'\n"," ))\n","```"]},{"metadata":{"pycharm":{"name":"#%%raw\n"}},"cell_type":"markdown","source":["```\n","pipeline_optimizer = experiment.optimizer(\n"," name=OPTIMIZER_NAME,\n"," prediction_type=experiment_metadata['prediction_type'],\n"," prediction_column=experiment_metadata['prediction_column'],\n"," scoring=experiment_metadata['scoring'],\n"," holdout_size=experiment_metadata['holdout_size'],\n"," csv_separator=experiment_metadata['csv_separator'],\n"," positive_label=experiment_metadata['positive_label'],\n"," drop_duplicates=experiment_metadata['drop_duplicates'])\n","```"]},{"metadata":{},"cell_type":"markdown","source":["```\n","pipeline_optimizer.fit(training_data_reference=training_data_reference,\n"," training_results_reference=training_result_reference,\n"," background_mode=False)\n","```"]},{"metadata":{},"cell_type":"markdown","source":["\n"," \n","# Next steps\n","\n","#### [Online Documentation](https://www.ibm.com/cloud/watson-studio/autoai)"]},{"metadata":{},"cell_type":"markdown","source":[" \n","### Copyrights\n","\n","Licensed Materials - Copyright © 2021 IBM. This notebook and its source code are released under the terms of the ILAN License.\n","Use, duplication disclosure restricted by GSA ADP Schedule Contract with IBM Corp.\n","\n","**Note:** The auto-generated notebooks are subject to the International License Agreement for Non-Warranted Programs (or equivalent) and License Information document for Watson Studio Auto-generated Notebook (License Terms), such agreements located in the link below. Specifically, the Source Components and Sample Materials clause included in the License Information document for Watson Studio Auto-generated Notebook applies to the auto-generated notebooks. \n","\n","By downloading, copying, accessing, or otherwise using the materials, you agree to the License Terms \n","\n","___"]}],"metadata":{"kernelspec":{"name":"python3","display_name":"Python 3.7","language":"python"},"language_info":{"name":"python","version":"3.7.10","mimetype":"text/x-python","codemirror_mode":{"name":"ipython","version":3},"pygments_lexer":"ipython3","nbconvert_exporter":"python","file_extension":".py"},"pycharm":{"stem_cell":{"cell_type":"raw","metadata":{"collapsed":false},"source":["\n"]}}},"nbformat":4,"nbformat_minor":1}
--------------------------------------------------------------------------------