├── .github
    └── workflows
    │   └── mlrun.yml
├── LICENSE
├── README.md
├── docs
    ├── flow.png
    ├── mlrun.png
    ├── pipeline.png
    ├── pr.png
    ├── slack.png
    └── use-this.png
├── gitops_project.ipynb
├── project.yaml
└── workflow.py


/.github/workflows/mlrun.yml:
--------------------------------------------------------------------------------
 1 | name: mlrun-project-workflow
 2 | on: [issue_comment]
 3 | 
 4 | jobs:
 5 |   submit-project:
 6 |     if: github.event.issue.pull_request != null && startsWith(github.event.comment.body, '/run')
 7 |     runs-on: ubuntu-latest
 8 | 
 9 |     steps:
10 |     - uses: actions/checkout@v2
11 |     - name: Set up Python 3.6
12 |       uses: actions/setup-python@v1
13 |       with:
14 |         python-version: '3.6'
15 |         architecture: 'x64'
16 |     
17 |     - name: Install mlrun
18 |       run: python -m pip install mlrun
19 |     - name: Submit project
20 |       run: python -m mlrun project ./ --git-issue "${{github.event.issue.number}}" --git-repo ${GITHUB_REPOSITORY} -w -x commit=${COMMIT:33} -r main ${CMD:5}
21 |       env:
22 |         V3IO_USERNAME: ${{ secrets.V3IO_USERNAME }}
23 |         V3IO_PASSWORD: ${{ secrets.V3IO_USERNAME }}
24 |         V3IO_API: ${{ secrets.V3IO_API }}
25 |         V3IO_ACCESS_KEY: ${{ secrets.V3IO_ACCESS_KEY }}
26 |         MLRUN_DBPATH: ${{ secrets.MLRUN_DBPATH }}
27 |         GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 
28 |         SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
29 |         CMD: ${{ github.event.comment.body}}  
30 |         COMMIT: ${{ github.sha}}
31 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [yyyy] [name of copyright owner]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # ML Pipeline Automation and CI/CD Using GitHub Actions, Kubeflow and MLRun
  2 | 
  3 | Machine learning (ML) pipelines allow us to automate multi-stage workflow which comprise of 
  4 | data ingestion, data preparation, model training, validation and finally deployment.
  5 | 
  6 | Every time our code, data or parameters change we may want to re-evaluate our model accuracy and performance before we deploy.
  7 | This resembles the CI/CD practice for delivering code to production with the additional aspects of data and parameter/configuration versioning,
  8 | and may require more powerful resources (computation cluster, GPUs, data processing engines, etc.).
  9 | 
 10 | This template repo demonstrates how you can automate the development, testing, and deployment 
 11 | of machine learning projects using the following tools:
 12 | 
 13 | * [**GitHub actions**](https://github.com/features/actions) - used for code and metadata versioning, workflow triggering, and process tracking
 14 | * [**Kubeflow Pipelines**](https://www.kubeflow.org/docs/pipelines/overview/pipelines-overview/) - Used to execute ML pipeline steps on a (remote) Kubernetes cluster  
 15 | * [**MLRun**](https://github.com/mlrun/mlrun) - Used for end to end MLOps automation and tracking, [read more below](#mlrun-overview).
 16 | 
 17 | To clone and run with your own environment or on [**iguazio data science platform**](https://www.iguazio.com/), check the [**instructions below**](#how-to-run-with-your-cluster).
 18 | 
 19 | ## How Does It Work?
 20 | 
 21 | This repo is representing an **mlrun project**, mlrun projects consists of **Functions** (code), **Artifacts** (data), **Workflows**, and **Parameters/secrets**. 
 22 | The [**project.yaml**](project.yaml) file list all of those elements.
 23 | 
 24 | Project elements can be linked (e.g. point to a library function which runs AutoML or data analysis, point to code/notebook files, point to external data objects, workflow files, etc.), 
 25 | or they can be embedded (e.g. store function code + configuration, workflow steps, etc.), in this example we show how to combine both.
 26 | 
 27 | The project file, workflow and embedded/linked code were generated by running the [**gitops_project notebook**](gitops_project.ipynb),
 28 | you can modify it to your needs, this is based on code from [MLRun Demos repo](https://github.com/mlrun/demos), 
 29 | where you can find more end to end ML Pipeline examples. 
 30 | 
 31 | When we change one of the elements (the project.yaml file or one of the other linked code/metadata files) and open a pull request (PR)
 32 | we can type `/run` in our PR, this will trigger running the ML Pipeline (as specified in the [workflow file](workflow.py)).
 33 | Once the pipeline starts, a comment will be added to your PR with a link to MLRun UI (allowing to track the progress), and when the ML Pipeline completes
 34 | MLRUn will write a result summary as a comment back into your PR with links to more details and data artifacts
 35 | 
 36 | **Flow diagram:**
 37 | 
 38 | <br><p align="center"><img src="docs/flow.png" width="800"/></p><br>
 39 | 
 40 | **This is an example of the PR comments:**
 41 | 
 42 | <br><p align="center"><img src="docs/pr.png" width="500"/></p><br> 
 43 | 
 44 | **This is an example of the summary report sent to `Slack`:**
 45 | 
 46 | <br><p align="center"><img src="docs/slack.png" width="400"/></p><br> 
 47 | 
 48 | **The Kubeflow pipeline graph**
 49 | 
 50 | <br><p align="center"><img src="docs/pipeline.png" width="600"/></p><br>
 51 | 
 52 | **MLRun UI showing the AutoML results (linked to from the PR)**
 53 | 
 54 | <br><p align="center"><img src="docs/mlrun.png" width="700"/></p><br>
 55 | 
 56 | ## What Is MLRun? 
 57 | 
 58 | MLRun is the first and currently only integrated open-source framework for end to end MLOps automation, it:
 59 | * Orchestrates job/pipeline from simple code or pre-baked functions (via Kubeflow and various k8s CRDs) 
 60 | * Runs, tracks and version projects comprising of experiments, jobs/functions, data, code, models and more.
 61 | * Provides an open marketplace for various ML, DL, Analytics, MLOps functions  
 62 | * Runs iterative AutoML, Hyper-param, or data analysis tasks on a distributed cluster  
 63 | * Automates deployment of models and real-time data processing functions using (Nuclio) real-time serverless engine 
 64 | 
 65 | Read more in [mlrun/mlrun](https://github.com/mlrun/mlrun)
 66 | 
 67 | ## How To Run With Your Cluster
 68 | 
 69 | ### Prerequisites
 70 | 
 71 | You need access to a working Kubernetes cluster with Kubeflow, Nuclio, and MLRun (see [installing MLRun](https://github.com/mlrun/mlrun#installation))<br>
 72 | Or use [**iguazio data science platform**](https://www.iguazio.com/) with all of those pre-installed and managed.
 73 | 
 74 | ### Clone and setup
 75 | 
 76 | #### 1. Copy this repo to your own GitHub account by clicking the `Use this template` button 
 77 | 
 78 | <br><p align="center"><img src="docs/use-this.png" width="600"/></p><br> 
 79 | 
 80 | #### 2. Configure the required secrets and addresses
 81 | 
 82 | Under the repo settings select the `secrets` tab and configure the following:
 83 | * `MLRUN_DBPATH` - remote URL to mlrun service (e.g. `https://<address:port>`) 
 84 | * `SLACK_WEBHOOK` - optional, if you would like to get run summary into your slack 
 85 | 
 86 | When using Iguazio platform your should set the following:  
 87 | * `V3IO_USERNAME` - Iguazio platform username
 88 | * `V3IO_ACCESS_KEY` - Iguazio V3IO data layer credentials (copy from your user settings)
 89 | * `V3IO_PASSWORD` - user password
 90 | * `V3IO_API` - V3IO data access API url (copy from the services screen)
 91 | 
 92 | When using the open source version and a secure API gateway you can use the following secrets 
 93 | * `MLRUN_HTTPDB__USER` - remote username
 94 | * `MLRUN_HTTPDB__PASSWORD` (for basic auth) or `MLRUN_HTTPDB__TOKEN` (for Bearer token) 
 95 | 
 96 | 
 97 | ### Customize 
 98 | 
 99 | Update and run the [**gitops_project notebook**](gitops_project.ipynb), 
100 | The Notebook will generate the data ingestion function, the workflow code and the [**project.yaml**](project.yaml) files.
101 | You can also run the workflow from the notebook, or you can run it from the pull request.
102 | 
103 | ### Run from a PR
104 | 
105 | change the project.yaml file, the workflow, or other elements and create a pull request, 
106 | once the PR is opened type `/run` in your PR.
107 | 
108 | For trouble shooting go to the `Actions` tab to see GitHub Actions workflow progress.  
109 |    
110 | 


--------------------------------------------------------------------------------
/docs/flow.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlrun/demo-github-actions/06d375eba8c79058ea7f1da6ebfc0758973c891f/docs/flow.png


--------------------------------------------------------------------------------
/docs/mlrun.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlrun/demo-github-actions/06d375eba8c79058ea7f1da6ebfc0758973c891f/docs/mlrun.png


--------------------------------------------------------------------------------
/docs/pipeline.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlrun/demo-github-actions/06d375eba8c79058ea7f1da6ebfc0758973c891f/docs/pipeline.png


--------------------------------------------------------------------------------
/docs/pr.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlrun/demo-github-actions/06d375eba8c79058ea7f1da6ebfc0758973c891f/docs/pr.png


--------------------------------------------------------------------------------
/docs/slack.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlrun/demo-github-actions/06d375eba8c79058ea7f1da6ebfc0758973c891f/docs/slack.png


--------------------------------------------------------------------------------
/docs/use-this.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlrun/demo-github-actions/06d375eba8c79058ea7f1da6ebfc0758973c891f/docs/use-this.png


--------------------------------------------------------------------------------
/gitops_project.ipynb:
--------------------------------------------------------------------------------
   1 | {
   2 |  "cells": [
   3 |   {
   4 |    "cell_type": "markdown",
   5 |    "metadata": {},
   6 |    "source": [
   7 |     "# Demonstrate Git Based ML Pipeline Automation\n",
   8 |     "  --------------------------------------------------------------------\n",
   9 |     "\n",
  10 |     "Creating a local function, running predefined functions, creating and running a full ML pipeline with local and library functions.\n",
  11 |     "\n",
  12 |     "#### **notebook how-to's**\n",
  13 |     "* Create and test a simple function\n",
  14 |     "* Examine data using serverless (containarized) `describe` function\n",
  15 |     "* Create an automated ML pipeline from various library functions\n",
  16 |     "* Running and tracking the pipeline results and artifacts"
  17 |    ]
  18 |   },
  19 |   {
  20 |    "cell_type": "markdown",
  21 |    "metadata": {},
  22 |    "source": [
  23 |     "## Create and Test a Local Ingestion/Data-prep Function (e.g. Iris Data Generator)\n",
  24 |     "Import nuclio SDK and magics, <b>do not remove the cell and comment !!!</b>"
  25 |    ]
  26 |   },
  27 |   {
  28 |    "cell_type": "code",
  29 |    "execution_count": 1,
  30 |    "metadata": {},
  31 |    "outputs": [],
  32 |    "source": [
  33 |     "# nuclio: ignore\n",
  34 |     "import nuclio"
  35 |    ]
  36 |   },
  37 |   {
  38 |    "cell_type": "markdown",
  39 |    "metadata": {},
  40 |    "source": [
  41 |     "<b>Specify function dependencies and configuration<b>"
  42 |    ]
  43 |   },
  44 |   {
  45 |    "cell_type": "code",
  46 |    "execution_count": 2,
  47 |    "metadata": {},
  48 |    "outputs": [
  49 |     {
  50 |      "name": "stdout",
  51 |      "output_type": "stream",
  52 |      "text": [
  53 |       "%nuclio: setting spec.image to 'mlrun/ml-models'\n"
  54 |      ]
  55 |     }
  56 |    ],
  57 |    "source": [
  58 |     "%nuclio config spec.image = \"mlrun/ml-models\""
  59 |    ]
  60 |   },
  61 |   {
  62 |    "cell_type": "markdown",
  63 |    "metadata": {},
  64 |    "source": [
  65 |     "#### Function code\n",
  66 |     "Generate the iris dataset and log the dataframe (as csv or parquet file)"
  67 |    ]
  68 |   },
  69 |   {
  70 |    "cell_type": "code",
  71 |    "execution_count": 3,
  72 |    "metadata": {},
  73 |    "outputs": [],
  74 |    "source": [
  75 |     "import os\n",
  76 |     "from sklearn.datasets import load_iris\n",
  77 |     "from sklearn.model_selection import train_test_split\n",
  78 |     "import numpy as np\n",
  79 |     "from sklearn.metrics import accuracy_score\n",
  80 |     "from mlrun.artifacts import TableArtifact, PlotArtifact\n",
  81 |     "import pandas as pd\n",
  82 |     "\n",
  83 |     "def iris_generator(context, format='csv'):\n",
  84 |     "    iris = load_iris()\n",
  85 |     "    iris_dataset = pd.DataFrame(data=iris.data, columns=iris.feature_names)\n",
  86 |     "    iris_labels = pd.DataFrame(data=iris.target, columns=['label'])\n",
  87 |     "    iris_dataset = pd.concat([iris_dataset, iris_labels], axis=1)\n",
  88 |     "    \n",
  89 |     "    context.logger.info('saving iris dataframe to {}'.format(context.artifact_path))\n",
  90 |     "    context.log_dataset('iris_dataset', df=iris_dataset, format=format, index=False)\n"
  91 |    ]
  92 |   },
  93 |   {
  94 |    "cell_type": "markdown",
  95 |    "metadata": {},
  96 |    "source": [
  97 |     "The following end-code annotation tells ```nuclio``` to stop parsing the notebook from this cell. _**Please do not remove this cell**_:"
  98 |    ]
  99 |   },
 100 |   {
 101 |    "cell_type": "code",
 102 |    "execution_count": 4,
 103 |    "metadata": {},
 104 |    "outputs": [],
 105 |    "source": [
 106 |     "# nuclio: end-code\n",
 107 |     "# marks the end of a code section"
 108 |    ]
 109 |   },
 110 |   {
 111 |    "cell_type": "markdown",
 112 |    "metadata": {},
 113 |    "source": [
 114 |     "## Create a project to host our functions, jobs and artifacts\n",
 115 |     "\n",
 116 |     "Projects are used to package multiple functions, workflows, and artifacts. We usually store project code and definitions in a Git archive.\n",
 117 |     "\n",
 118 |     "The following code creates a new project in a local dir and initialize git tracking on that"
 119 |    ]
 120 |   },
 121 |   {
 122 |    "cell_type": "code",
 123 |    "execution_count": 5,
 124 |    "metadata": {},
 125 |    "outputs": [],
 126 |    "source": [
 127 |     "from os import path\n",
 128 |     "from mlrun import run_local, NewTask, mlconf, import_function, mount_v3io\n",
 129 |     "mlconf.dbpath = mlconf.dbpath or 'http://mlrun-api:8080'\n",
 130 |     "\n",
 131 |     "# specify artifacts target location\n",
 132 |     "artifact_path = mlconf.artifact_path or path.abspath('./')\n",
 133 |     "project_name = 'gitops-project'"
 134 |    ]
 135 |   },
 136 |   {
 137 |    "cell_type": "code",
 138 |    "execution_count": 6,
 139 |    "metadata": {},
 140 |    "outputs": [],
 141 |    "source": [
 142 |     "from mlrun import new_project, code_to_function\n",
 143 |     "project_dir = './'\n",
 144 |     "skproj = new_project(project_name, project_dir)"
 145 |    ]
 146 |   },
 147 |   {
 148 |    "cell_type": "markdown",
 149 |    "metadata": {},
 150 |    "source": [
 151 |     "<a id='test-locally'></a>\n",
 152 |     "### Run/test the data generator function locally\n",
 153 |     "\n",
 154 |     "The functions above can be tested locally. Parameters, inputs, and outputs can be specified in the API or the `Task` object.<br>\n",
 155 |     "when using `run_local()` the function inputs and outputs are automatically recorded by MLRun experiment and data tracking DB.\n",
 156 |     "\n",
 157 |     "In each run we can specify the function, inputs, parameters/hyper-parameters, etc... For more details, see the [mlrun_basics notebook](mlrun_basics.ipynb)."
 158 |    ]
 159 |   },
 160 |   {
 161 |    "cell_type": "code",
 162 |    "execution_count": 7,
 163 |    "metadata": {},
 164 |    "outputs": [
 165 |     {
 166 |      "name": "stdout",
 167 |      "output_type": "stream",
 168 |      "text": [
 169 |       "> 2020-07-29 10:38:35,433 [info] starting run iris_gen uid=3e340d3561ca402c91e9bb09b1631dd4  -> http://mlrun-api:8080\n",
 170 |       "> 2020-07-29 10:38:35,518 [info] saving iris dataframe to /User/demo-github-actions/data\n"
 171 |      ]
 172 |     },
 173 |     {
 174 |      "data": {
 175 |       "text/html": [
 176 |        "<style>\n",
 177 |        ".dictlist {\n",
 178 |        "  background-color: #b3edff;\n",
 179 |        "  text-align: center;\n",
 180 |        "  margin: 4px;\n",
 181 |        "  border-radius: 3px; padding: 0px 3px 1px 3px; display: inline-block;}\n",
 182 |        ".artifact {\n",
 183 |        "  cursor: pointer;\n",
 184 |        "  background-color: #ffe6cc;\n",
 185 |        "  text-align: left;\n",
 186 |        "  margin: 4px; border-radius: 3px; padding: 0px 3px 1px 3px; display: inline-block;\n",
 187 |        "}\n",
 188 |        "div.block.hidden {\n",
 189 |        "  display: none;\n",
 190 |        "}\n",
 191 |        ".clickable {\n",
 192 |        "  cursor: pointer;\n",
 193 |        "}\n",
 194 |        ".ellipsis {\n",
 195 |        "  display: inline-block;\n",
 196 |        "  max-width: 60px;\n",
 197 |        "  white-space: nowrap;\n",
 198 |        "  overflow: hidden;\n",
 199 |        "  text-overflow: ellipsis;\n",
 200 |        "}\n",
 201 |        ".master-wrapper {\n",
 202 |        "  display: flex;\n",
 203 |        "  flex-flow: row nowrap;\n",
 204 |        "  justify-content: flex-start;\n",
 205 |        "  align-items: stretch;\n",
 206 |        "}\n",
 207 |        ".master-tbl {\n",
 208 |        "  flex: 3\n",
 209 |        "}\n",
 210 |        ".master-wrapper > div {\n",
 211 |        "  margin: 4px;\n",
 212 |        "  padding: 10px;\n",
 213 |        "}\n",
 214 |        "iframe.fileview {\n",
 215 |        "  border: 0 none;\n",
 216 |        "  height: 100%;\n",
 217 |        "  width: 100%;\n",
 218 |        "  white-space: pre-wrap;\n",
 219 |        "}\n",
 220 |        ".pane-header-title {\n",
 221 |        "  width: 80%;\n",
 222 |        "  font-weight: 500;\n",
 223 |        "}\n",
 224 |        ".pane-header {\n",
 225 |        "  line-height: 1;\n",
 226 |        "  background-color: #ffe6cc;\n",
 227 |        "  padding: 3px;\n",
 228 |        "}\n",
 229 |        ".pane-header .close {\n",
 230 |        "  font-size: 20px;\n",
 231 |        "  font-weight: 700;\n",
 232 |        "  float: right;\n",
 233 |        "  margin-top: -5px;\n",
 234 |        "}\n",
 235 |        ".master-wrapper .right-pane {\n",
 236 |        "  border: 1px inset silver;\n",
 237 |        "  width: 40%;\n",
 238 |        "  min-height: 300px;\n",
 239 |        "  flex: 3\n",
 240 |        "  min-width: 500px;\n",
 241 |        "}\n",
 242 |        ".master-wrapper * {\n",
 243 |        "  box-sizing: border-box;\n",
 244 |        "}\n",
 245 |        "</style><script>\n",
 246 |        "function copyToClipboard(fld) {\n",
 247 |        "    if (document.queryCommandSupported && document.queryCommandSupported('copy')) {\n",
 248 |        "        var textarea = document.createElement('textarea');\n",
 249 |        "        textarea.textContent = fld.innerHTML;\n",
 250 |        "        textarea.style.position = 'fixed';\n",
 251 |        "        document.body.appendChild(textarea);\n",
 252 |        "        textarea.select();\n",
 253 |        "\n",
 254 |        "        try {\n",
 255 |        "            return document.execCommand('copy'); // Security exception may be thrown by some browsers.\n",
 256 |        "        } catch (ex) {\n",
 257 |        "\n",
 258 |        "        } finally {\n",
 259 |        "            document.body.removeChild(textarea);\n",
 260 |        "        }\n",
 261 |        "    }\n",
 262 |        "}\n",
 263 |        "function expandPanel(el) {\n",
 264 |        "  const panelName = \"#\" + el.getAttribute('paneName');\n",
 265 |        "  console.log(el.title);\n",
 266 |        "\n",
 267 |        "  document.querySelector(panelName + \"-title\").innerHTML = el.title\n",
 268 |        "  iframe = document.querySelector(panelName + \"-body\");\n",
 269 |        "\n",
 270 |        "  const tblcss = `<style> body { font-family: Arial, Helvetica, sans-serif;}\n",
 271 |        "    #csv { margin-bottom: 15px; }\n",
 272 |        "    #csv table { border-collapse: collapse;}\n",
 273 |        "    #csv table td { padding: 4px 8px; border: 1px solid silver;} </style>`;\n",
 274 |        "\n",
 275 |        "  function csvToHtmlTable(str) {\n",
 276 |        "    return '<div id=\"csv\"><table><tr><td>' +  str.replace(/[\\n\\r]+$/g, '').replace(/[\\n\\r]+/g, '</td></tr><tr><td>')\n",
 277 |        "      .replace(/,/g, '</td><td>') + '</td></tr></table></div>';\n",
 278 |        "  }\n",
 279 |        "\n",
 280 |        "  function reqListener () {\n",
 281 |        "    if (el.title.endsWith(\".csv\")) {\n",
 282 |        "      iframe.setAttribute(\"srcdoc\", tblcss + csvToHtmlTable(this.responseText));\n",
 283 |        "    } else {\n",
 284 |        "      iframe.setAttribute(\"srcdoc\", this.responseText);\n",
 285 |        "    }\n",
 286 |        "    console.log(this.responseText);\n",
 287 |        "  }\n",
 288 |        "\n",
 289 |        "  const oReq = new XMLHttpRequest();\n",
 290 |        "  oReq.addEventListener(\"load\", reqListener);\n",
 291 |        "  oReq.open(\"GET\", el.title);\n",
 292 |        "  oReq.send();\n",
 293 |        "\n",
 294 |        "\n",
 295 |        "  //iframe.src = el.title;\n",
 296 |        "  const resultPane = document.querySelector(panelName + \"-pane\");\n",
 297 |        "  if (resultPane.classList.contains(\"hidden\")) {\n",
 298 |        "    resultPane.classList.remove(\"hidden\");\n",
 299 |        "  }\n",
 300 |        "}\n",
 301 |        "function closePanel(el) {\n",
 302 |        "  const panelName = \"#\" + el.getAttribute('paneName')\n",
 303 |        "  const resultPane = document.querySelector(panelName + \"-pane\");\n",
 304 |        "  if (!resultPane.classList.contains(\"hidden\")) {\n",
 305 |        "    resultPane.classList.add(\"hidden\");\n",
 306 |        "  }\n",
 307 |        "}\n",
 308 |        "\n",
 309 |        "</script>\n",
 310 |        "<div class=\"master-wrapper\">\n",
 311 |        "  <div class=\"block master-tbl\"><div>\n",
 312 |        "<style scoped>\n",
 313 |        "    .dataframe tbody tr th:only-of-type {\n",
 314 |        "        vertical-align: middle;\n",
 315 |        "    }\n",
 316 |        "\n",
 317 |        "    .dataframe tbody tr th {\n",
 318 |        "        vertical-align: top;\n",
 319 |        "    }\n",
 320 |        "\n",
 321 |        "    .dataframe thead th {\n",
 322 |        "        text-align: right;\n",
 323 |        "    }\n",
 324 |        "</style>\n",
 325 |        "<table border=\"1\" class=\"dataframe\">\n",
 326 |        "  <thead>\n",
 327 |        "    <tr style=\"text-align: right;\">\n",
 328 |        "      <th>project</th>\n",
 329 |        "      <th>uid</th>\n",
 330 |        "      <th>iter</th>\n",
 331 |        "      <th>start</th>\n",
 332 |        "      <th>state</th>\n",
 333 |        "      <th>name</th>\n",
 334 |        "      <th>labels</th>\n",
 335 |        "      <th>inputs</th>\n",
 336 |        "      <th>parameters</th>\n",
 337 |        "      <th>results</th>\n",
 338 |        "      <th>artifacts</th>\n",
 339 |        "    </tr>\n",
 340 |        "  </thead>\n",
 341 |        "  <tbody>\n",
 342 |        "    <tr>\n",
 343 |        "      <td>gitops-project</td>\n",
 344 |        "      <td><div title=\"3e340d3561ca402c91e9bb09b1631dd4\"><a href=\"https://mlrun-ui.default-tenant.app.cnhjntjojjps.iguazio-cd2.com/projects/gitops-project/jobs/3e340d3561ca402c91e9bb09b1631dd4/info\" target=\"_blank\" >...b1631dd4</a></div></td>\n",
 345 |        "      <td>0</td>\n",
 346 |        "      <td>Jul 29 10:38:35</td>\n",
 347 |        "      <td>completed</td>\n",
 348 |        "      <td>iris_gen</td>\n",
 349 |        "      <td><div class=\"dictlist\">v3io_user=admin</div><div class=\"dictlist\">kind=handler</div><div class=\"dictlist\">owner=admin</div><div class=\"dictlist\">host=jupyter-58d8fdb6fc-nmqbq</div></td>\n",
 350 |        "      <td></td>\n",
 351 |        "      <td></td>\n",
 352 |        "      <td></td>\n",
 353 |        "      <td><div class=\"artifact\" onclick=\"expandPanel(this)\" paneName=\"result52946aee\" title=\"/files/demo-github-actions/data/iris_dataset.csv\">iris_dataset</div></td>\n",
 354 |        "    </tr>\n",
 355 |        "  </tbody>\n",
 356 |        "</table>\n",
 357 |        "</div></div>\n",
 358 |        "  <div id=\"result52946aee-pane\" class=\"right-pane block hidden\">\n",
 359 |        "    <div class=\"pane-header\">\n",
 360 |        "      <span id=\"result52946aee-title\" class=\"pane-header-title\">Title</span>\n",
 361 |        "      <span onclick=\"closePanel(this)\" paneName=\"result52946aee\" class=\"close clickable\">&times;</span>\n",
 362 |        "    </div>\n",
 363 |        "    <iframe class=\"fileview\" id=\"result52946aee-body\"></iframe>\n",
 364 |        "  </div>\n",
 365 |        "</div>\n"
 366 |       ],
 367 |       "text/plain": [
 368 |        "<IPython.core.display.HTML object>"
 369 |       ]
 370 |      },
 371 |      "metadata": {},
 372 |      "output_type": "display_data"
 373 |     },
 374 |     {
 375 |      "name": "stdout",
 376 |      "output_type": "stream",
 377 |      "text": [
 378 |       "to track results use .show() or .logs() or in CLI: \n",
 379 |       "!mlrun get run 3e340d3561ca402c91e9bb09b1631dd4 --project gitops-project , !mlrun logs 3e340d3561ca402c91e9bb09b1631dd4 --project gitops-project\n",
 380 |       "> 2020-07-29 10:38:35,641 [info] run executed, status=completed\n"
 381 |      ]
 382 |     }
 383 |    ],
 384 |    "source": [
 385 |     "# run the function locally\n",
 386 |     "gen = run_local(name='iris_gen', handler=iris_generator, \n",
 387 |     "                project=project_name, artifact_path=path.join(artifact_path, 'data')) "
 388 |    ]
 389 |   },
 390 |   {
 391 |    "cell_type": "markdown",
 392 |    "metadata": {},
 393 |    "source": [
 394 |     "#### Convert our local code to a distributed serverless function object "
 395 |    ]
 396 |   },
 397 |   {
 398 |    "cell_type": "code",
 399 |    "execution_count": 8,
 400 |    "metadata": {},
 401 |    "outputs": [
 402 |     {
 403 |      "data": {
 404 |       "text/plain": [
 405 |        "<mlrun.runtimes.kubejob.KubejobRuntime at 0x7f5d751dbe10>"
 406 |       ]
 407 |      },
 408 |      "execution_count": 8,
 409 |      "metadata": {},
 410 |      "output_type": "execute_result"
 411 |     }
 412 |    ],
 413 |    "source": [
 414 |     "gen_func = code_to_function(name='gen_iris', kind='job')\n",
 415 |     "skproj.set_function(gen_func)"
 416 |    ]
 417 |   },
 418 |   {
 419 |    "cell_type": "code",
 420 |    "execution_count": null,
 421 |    "metadata": {},
 422 |    "outputs": [],
 423 |    "source": []
 424 |   },
 425 |   {
 426 |    "cell_type": "markdown",
 427 |    "metadata": {},
 428 |    "source": [
 429 |     "## Analyze the dataset features (useing marketplace function)\n",
 430 |     "load dataset analysis function (`describe`) from the function hub (marketplace), and print its doc."
 431 |    ]
 432 |   },
 433 |   {
 434 |    "cell_type": "code",
 435 |    "execution_count": 15,
 436 |    "metadata": {},
 437 |    "outputs": [
 438 |     {
 439 |      "name": "stdout",
 440 |      "output_type": "stream",
 441 |      "text": [
 442 |       "function: describe\n",
 443 |       "describe and visualizes dataset stats\n",
 444 |       "default handler: summarize\n",
 445 |       "entry points:\n",
 446 |       "  summarize: Summarize a table\n",
 447 |       "    context(MLClientCtx)  - the function context, default=\n",
 448 |       "    table(DataItem)  - MLRun input pointing to pandas dataframe (csv/parquet file path), default=\n",
 449 |       "    label_column(str)  - ground truth column label, default=None\n",
 450 |       "    class_labels(List[str])  - label for each class in tables and plots, default=[]\n",
 451 |       "    plot_hist(bool)  - (True) set this to False for large tables, default=True\n",
 452 |       "    plots_dest(str)  - destination folder of summary plots (relative to artifact_path), default=plots\n",
 453 |       "    update_dataset  - when the table is a registered dataset update the charts in-place, default=False\n"
 454 |      ]
 455 |     }
 456 |    ],
 457 |    "source": [
 458 |     "skproj.set_function('hub://describe', 'describe')\n",
 459 |     "skproj.func('describe').doc()"
 460 |    ]
 461 |   },
 462 |   {
 463 |    "cell_type": "markdown",
 464 |    "metadata": {},
 465 |    "source": [
 466 |     "### Run the describe function on our dataset (as a Kubernetes job)\n",
 467 |     "<b> using shared file system mount (`mount_v3io`) with our notebook.</b>"
 468 |    ]
 469 |   },
 470 |   {
 471 |    "cell_type": "code",
 472 |    "execution_count": 16,
 473 |    "metadata": {},
 474 |    "outputs": [
 475 |     {
 476 |      "name": "stdout",
 477 |      "output_type": "stream",
 478 |      "text": [
 479 |       "> 2020-07-29 12:46:52,341 [info] starting run describe-summarize uid=301ab10adbf34adb898f0751c7f0f0b4  -> http://mlrun-api:8080\n",
 480 |       "> 2020-07-29 12:46:52,497 [info] Job is running in the background, pod: describe-summarize-r9tvz\n",
 481 |       "> 2020-07-29 12:47:01,761 [info] run executed, status=completed\n",
 482 |       "final state: succeeded\n"
 483 |      ]
 484 |     },
 485 |     {
 486 |      "data": {
 487 |       "text/html": [
 488 |        "<style>\n",
 489 |        ".dictlist {\n",
 490 |        "  background-color: #b3edff;\n",
 491 |        "  text-align: center;\n",
 492 |        "  margin: 4px;\n",
 493 |        "  border-radius: 3px; padding: 0px 3px 1px 3px; display: inline-block;}\n",
 494 |        ".artifact {\n",
 495 |        "  cursor: pointer;\n",
 496 |        "  background-color: #ffe6cc;\n",
 497 |        "  text-align: left;\n",
 498 |        "  margin: 4px; border-radius: 3px; padding: 0px 3px 1px 3px; display: inline-block;\n",
 499 |        "}\n",
 500 |        "div.block.hidden {\n",
 501 |        "  display: none;\n",
 502 |        "}\n",
 503 |        ".clickable {\n",
 504 |        "  cursor: pointer;\n",
 505 |        "}\n",
 506 |        ".ellipsis {\n",
 507 |        "  display: inline-block;\n",
 508 |        "  max-width: 60px;\n",
 509 |        "  white-space: nowrap;\n",
 510 |        "  overflow: hidden;\n",
 511 |        "  text-overflow: ellipsis;\n",
 512 |        "}\n",
 513 |        ".master-wrapper {\n",
 514 |        "  display: flex;\n",
 515 |        "  flex-flow: row nowrap;\n",
 516 |        "  justify-content: flex-start;\n",
 517 |        "  align-items: stretch;\n",
 518 |        "}\n",
 519 |        ".master-tbl {\n",
 520 |        "  flex: 3\n",
 521 |        "}\n",
 522 |        ".master-wrapper > div {\n",
 523 |        "  margin: 4px;\n",
 524 |        "  padding: 10px;\n",
 525 |        "}\n",
 526 |        "iframe.fileview {\n",
 527 |        "  border: 0 none;\n",
 528 |        "  height: 100%;\n",
 529 |        "  width: 100%;\n",
 530 |        "  white-space: pre-wrap;\n",
 531 |        "}\n",
 532 |        ".pane-header-title {\n",
 533 |        "  width: 80%;\n",
 534 |        "  font-weight: 500;\n",
 535 |        "}\n",
 536 |        ".pane-header {\n",
 537 |        "  line-height: 1;\n",
 538 |        "  background-color: #ffe6cc;\n",
 539 |        "  padding: 3px;\n",
 540 |        "}\n",
 541 |        ".pane-header .close {\n",
 542 |        "  font-size: 20px;\n",
 543 |        "  font-weight: 700;\n",
 544 |        "  float: right;\n",
 545 |        "  margin-top: -5px;\n",
 546 |        "}\n",
 547 |        ".master-wrapper .right-pane {\n",
 548 |        "  border: 1px inset silver;\n",
 549 |        "  width: 40%;\n",
 550 |        "  min-height: 300px;\n",
 551 |        "  flex: 3\n",
 552 |        "  min-width: 500px;\n",
 553 |        "}\n",
 554 |        ".master-wrapper * {\n",
 555 |        "  box-sizing: border-box;\n",
 556 |        "}\n",
 557 |        "</style><script>\n",
 558 |        "function copyToClipboard(fld) {\n",
 559 |        "    if (document.queryCommandSupported && document.queryCommandSupported('copy')) {\n",
 560 |        "        var textarea = document.createElement('textarea');\n",
 561 |        "        textarea.textContent = fld.innerHTML;\n",
 562 |        "        textarea.style.position = 'fixed';\n",
 563 |        "        document.body.appendChild(textarea);\n",
 564 |        "        textarea.select();\n",
 565 |        "\n",
 566 |        "        try {\n",
 567 |        "            return document.execCommand('copy'); // Security exception may be thrown by some browsers.\n",
 568 |        "        } catch (ex) {\n",
 569 |        "\n",
 570 |        "        } finally {\n",
 571 |        "            document.body.removeChild(textarea);\n",
 572 |        "        }\n",
 573 |        "    }\n",
 574 |        "}\n",
 575 |        "function expandPanel(el) {\n",
 576 |        "  const panelName = \"#\" + el.getAttribute('paneName');\n",
 577 |        "  console.log(el.title);\n",
 578 |        "\n",
 579 |        "  document.querySelector(panelName + \"-title\").innerHTML = el.title\n",
 580 |        "  iframe = document.querySelector(panelName + \"-body\");\n",
 581 |        "\n",
 582 |        "  const tblcss = `<style> body { font-family: Arial, Helvetica, sans-serif;}\n",
 583 |        "    #csv { margin-bottom: 15px; }\n",
 584 |        "    #csv table { border-collapse: collapse;}\n",
 585 |        "    #csv table td { padding: 4px 8px; border: 1px solid silver;} </style>`;\n",
 586 |        "\n",
 587 |        "  function csvToHtmlTable(str) {\n",
 588 |        "    return '<div id=\"csv\"><table><tr><td>' +  str.replace(/[\\n\\r]+$/g, '').replace(/[\\n\\r]+/g, '</td></tr><tr><td>')\n",
 589 |        "      .replace(/,/g, '</td><td>') + '</td></tr></table></div>';\n",
 590 |        "  }\n",
 591 |        "\n",
 592 |        "  function reqListener () {\n",
 593 |        "    if (el.title.endsWith(\".csv\")) {\n",
 594 |        "      iframe.setAttribute(\"srcdoc\", tblcss + csvToHtmlTable(this.responseText));\n",
 595 |        "    } else {\n",
 596 |        "      iframe.setAttribute(\"srcdoc\", this.responseText);\n",
 597 |        "    }\n",
 598 |        "    console.log(this.responseText);\n",
 599 |        "  }\n",
 600 |        "\n",
 601 |        "  const oReq = new XMLHttpRequest();\n",
 602 |        "  oReq.addEventListener(\"load\", reqListener);\n",
 603 |        "  oReq.open(\"GET\", el.title);\n",
 604 |        "  oReq.send();\n",
 605 |        "\n",
 606 |        "\n",
 607 |        "  //iframe.src = el.title;\n",
 608 |        "  const resultPane = document.querySelector(panelName + \"-pane\");\n",
 609 |        "  if (resultPane.classList.contains(\"hidden\")) {\n",
 610 |        "    resultPane.classList.remove(\"hidden\");\n",
 611 |        "  }\n",
 612 |        "}\n",
 613 |        "function closePanel(el) {\n",
 614 |        "  const panelName = \"#\" + el.getAttribute('paneName')\n",
 615 |        "  const resultPane = document.querySelector(panelName + \"-pane\");\n",
 616 |        "  if (!resultPane.classList.contains(\"hidden\")) {\n",
 617 |        "    resultPane.classList.add(\"hidden\");\n",
 618 |        "  }\n",
 619 |        "}\n",
 620 |        "\n",
 621 |        "</script>\n",
 622 |        "<div class=\"master-wrapper\">\n",
 623 |        "  <div class=\"block master-tbl\"><div>\n",
 624 |        "<style scoped>\n",
 625 |        "    .dataframe tbody tr th:only-of-type {\n",
 626 |        "        vertical-align: middle;\n",
 627 |        "    }\n",
 628 |        "\n",
 629 |        "    .dataframe tbody tr th {\n",
 630 |        "        vertical-align: top;\n",
 631 |        "    }\n",
 632 |        "\n",
 633 |        "    .dataframe thead th {\n",
 634 |        "        text-align: right;\n",
 635 |        "    }\n",
 636 |        "</style>\n",
 637 |        "<table border=\"1\" class=\"dataframe\">\n",
 638 |        "  <thead>\n",
 639 |        "    <tr style=\"text-align: right;\">\n",
 640 |        "      <th>project</th>\n",
 641 |        "      <th>uid</th>\n",
 642 |        "      <th>iter</th>\n",
 643 |        "      <th>start</th>\n",
 644 |        "      <th>state</th>\n",
 645 |        "      <th>name</th>\n",
 646 |        "      <th>labels</th>\n",
 647 |        "      <th>inputs</th>\n",
 648 |        "      <th>parameters</th>\n",
 649 |        "      <th>results</th>\n",
 650 |        "      <th>artifacts</th>\n",
 651 |        "    </tr>\n",
 652 |        "  </thead>\n",
 653 |        "  <tbody>\n",
 654 |        "    <tr>\n",
 655 |        "      <td>gitops-project</td>\n",
 656 |        "      <td><div title=\"301ab10adbf34adb898f0751c7f0f0b4\"><a href=\"https://mlrun-ui.default-tenant.app.cnhjntjojjps.iguazio-cd2.com/projects/gitops-project/jobs/301ab10adbf34adb898f0751c7f0f0b4/info\" target=\"_blank\" >...c7f0f0b4</a></div></td>\n",
 657 |        "      <td>0</td>\n",
 658 |        "      <td>Jul 29 12:46:57</td>\n",
 659 |        "      <td>completed</td>\n",
 660 |        "      <td>describe-summarize</td>\n",
 661 |        "      <td><div class=\"dictlist\">v3io_user=admin</div><div class=\"dictlist\">kind=job</div><div class=\"dictlist\">owner=admin</div><div class=\"dictlist\">host=describe-summarize-r9tvz</div></td>\n",
 662 |        "      <td><div title=\"store://gitops-project/iris_gen_iris_dataset#3e340d3561ca402c91e9bb09b1631dd4\">table</div></td>\n",
 663 |        "      <td><div class=\"dictlist\">label_column=label</div></td>\n",
 664 |        "      <td></td>\n",
 665 |        "      <td><div class=\"artifact\" onclick=\"expandPanel(this)\" paneName=\"result13fa91d5\" title=\"/files/demo-github-actions/plots/hist.html\">histograms</div><div class=\"artifact\" onclick=\"expandPanel(this)\" paneName=\"result13fa91d5\" title=\"/files/demo-github-actions/plots/violin.html\">violin</div><div class=\"artifact\" onclick=\"expandPanel(this)\" paneName=\"result13fa91d5\" title=\"/files/demo-github-actions/plots/imbalance.html\">imbalance</div><div class=\"artifact\" onclick=\"expandPanel(this)\" paneName=\"result13fa91d5\" title=\"/files/demo-github-actions/plots/imbalance-weights-vec.csv\">imbalance-weights-vec</div><div class=\"artifact\" onclick=\"expandPanel(this)\" paneName=\"result13fa91d5\" title=\"/files/demo-github-actions/plots/correlation-matrix.csv\">correlation-matrix</div><div class=\"artifact\" onclick=\"expandPanel(this)\" paneName=\"result13fa91d5\" title=\"/files/demo-github-actions/plots/corr.html\">correlation</div></td>\n",
 666 |        "    </tr>\n",
 667 |        "  </tbody>\n",
 668 |        "</table>\n",
 669 |        "</div></div>\n",
 670 |        "  <div id=\"result13fa91d5-pane\" class=\"right-pane block hidden\">\n",
 671 |        "    <div class=\"pane-header\">\n",
 672 |        "      <span id=\"result13fa91d5-title\" class=\"pane-header-title\">Title</span>\n",
 673 |        "      <span onclick=\"closePanel(this)\" paneName=\"result13fa91d5\" class=\"close clickable\">&times;</span>\n",
 674 |        "    </div>\n",
 675 |        "    <iframe class=\"fileview\" id=\"result13fa91d5-body\"></iframe>\n",
 676 |        "  </div>\n",
 677 |        "</div>\n"
 678 |       ],
 679 |       "text/plain": [
 680 |        "<IPython.core.display.HTML object>"
 681 |       ]
 682 |      },
 683 |      "metadata": {},
 684 |      "output_type": "display_data"
 685 |     },
 686 |     {
 687 |      "name": "stdout",
 688 |      "output_type": "stream",
 689 |      "text": [
 690 |       "to track results use .show() or .logs() or in CLI: \n",
 691 |       "!mlrun get run 301ab10adbf34adb898f0751c7f0f0b4 --project gitops-project , !mlrun logs 301ab10adbf34adb898f0751c7f0f0b4 --project gitops-project\n",
 692 |       "> 2020-07-29 12:47:11,671 [info] run executed, status=completed\n"
 693 |      ]
 694 |     },
 695 |     {
 696 |      "data": {
 697 |       "text/plain": [
 698 |        "<mlrun.model.RunObject at 0x7f5d74f87d10>"
 699 |       ]
 700 |      },
 701 |      "execution_count": 16,
 702 |      "metadata": {},
 703 |      "output_type": "execute_result"
 704 |     }
 705 |    ],
 706 |    "source": [
 707 |     "skproj.func('describe').apply(mount_v3io()).run(params={'label_column': 'label'}, \n",
 708 |     "                                                inputs={\"table\": gen.outputs['iris_dataset']}, \n",
 709 |     "                                                artifact_path=artifact_path)"
 710 |    ]
 711 |   },
 712 |   {
 713 |    "cell_type": "markdown",
 714 |    "metadata": {},
 715 |    "source": [
 716 |     "## Create a Fully Automated ML Pipeline\n",
 717 |     "\n",
 718 |     "#### Add more functions to our project to be used in our pipeline (from the functions hub/marketplace)\n",
 719 |     "\n",
 720 |     "AutoML training (classifier), Model validation (test_classifier), Real-time model server, and Model REST API Tester"
 721 |    ]
 722 |   },
 723 |   {
 724 |    "cell_type": "code",
 725 |    "execution_count": 9,
 726 |    "metadata": {},
 727 |    "outputs": [
 728 |     {
 729 |      "data": {
 730 |       "text/plain": [
 731 |        "<mlrun.runtimes.kubejob.KubejobRuntime at 0x7f5d74fd8d50>"
 732 |       ]
 733 |      },
 734 |      "execution_count": 9,
 735 |      "metadata": {},
 736 |      "output_type": "execute_result"
 737 |     }
 738 |    ],
 739 |    "source": [
 740 |     "skproj.set_function('hub://sklearn_classifier', 'train')\n",
 741 |     "skproj.set_function('hub://test_classifier', 'test')\n",
 742 |     "skproj.set_function('hub://model_server', 'serving')\n",
 743 |     "skproj.set_function('hub://model_server_tester', 'live_tester')\n",
 744 |     "#print(skproj.to_yaml())"
 745 |    ]
 746 |   },
 747 |   {
 748 |    "cell_type": "markdown",
 749 |    "metadata": {},
 750 |    "source": [
 751 |     "#### Define and save a pipeline \n",
 752 |     "\n",
 753 |     "The following workflow definition will be written into a file, it describes a Kubeflow execution graph (DAG)<br>\n",
 754 |     "and how functions and data are connected  to form an end to end pipeline. \n",
 755 |     "\n",
 756 |     "* Build the iris generator (ingest) function container \n",
 757 |     "* Ingest the iris data\n",
 758 |     "* Analyze the dataset (describe)\n",
 759 |     "* Train and test the model\n",
 760 |     "* Deploy the model as a real-time serverless function\n",
 761 |     "* Test the serverless function REST API with test dataset\n",
 762 |     "\n",
 763 |     "Check the code below to see how functions objects are initialized and used (by name) inside the workflow.<br>\n",
 764 |     "The `workflow.py` file has two parts, initialize the function objects and define pipeline dsl (connect the function inputs and outputs).\n",
 765 |     "\n",
 766 |     "> Note: the pipeline can include CI steps like building container images and deploying models as illustrated  in the following example.\n"
 767 |    ]
 768 |   },
 769 |   {
 770 |    "cell_type": "code",
 771 |    "execution_count": 17,
 772 |    "metadata": {},
 773 |    "outputs": [
 774 |     {
 775 |      "name": "stdout",
 776 |      "output_type": "stream",
 777 |      "text": [
 778 |       "Overwriting ./workflow.py\n"
 779 |      ]
 780 |     }
 781 |    ],
 782 |    "source": [
 783 |     "%%writefile ./workflow.py\n",
 784 |     "from kfp import dsl\n",
 785 |     "from mlrun import mount_v3io, NewTask\n",
 786 |     "\n",
 787 |     "\n",
 788 |     "funcs = {}\n",
 789 |     "this_project = None\n",
 790 |     "DATASET = 'iris_dataset'\n",
 791 |     "LABELS  = \"label\"\n",
 792 |     "\n",
 793 |     "# init functions is used to configure function resources and local settings\n",
 794 |     "def init_functions(functions: dict, project=None, secrets=None):\n",
 795 |     "    for f in functions.values():\n",
 796 |     "        f.apply(mount_v3io())\n",
 797 |     "     \n",
 798 |     "    # uncomment this line to collect the inference results into a stream\n",
 799 |     "    # and specify a path in V3IO (<datacontainer>/<subpath>)\n",
 800 |     "    #functions['serving'].set_env('INFERENCE_STREAM', 'users/admin/model_stream')\n",
 801 |     "\n",
 802 |     "    \n",
 803 |     "@dsl.pipeline(\n",
 804 |     "    name=\"Demo training pipeline\",\n",
 805 |     "    description=\"Shows how to use mlrun.\"\n",
 806 |     ")\n",
 807 |     "def kfpipeline():\n",
 808 |     "    \n",
 809 |     "    # run the ingestion function with the new image and params\n",
 810 |     "    ingest = funcs['gen-iris'].as_step(\n",
 811 |     "        name=\"get-data\",\n",
 812 |     "        handler='iris_generator',\n",
 813 |     "        params={'format': 'pq'},\n",
 814 |     "        outputs=[DATASET])\n",
 815 |     "\n",
 816 |     "    # analyze our dataset\n",
 817 |     "    describe = funcs[\"describe\"].as_step(\n",
 818 |     "        name=\"summary\",\n",
 819 |     "        params={\"label_column\": LABELS},\n",
 820 |     "        inputs={\"table\": ingest.outputs[DATASET]})\n",
 821 |     "    \n",
 822 |     "    # train with hyper-paremeters\n",
 823 |     "    train = funcs[\"train\"].as_step(\n",
 824 |     "        name=\"train\",\n",
 825 |     "        params={\"sample\"          : -1,\n",
 826 |     "                \"label_column\"    : LABELS,\n",
 827 |     "                \"test_size\"       : 0.10},\n",
 828 |     "        hyperparams={'model_pkg_class': [\"sklearn.ensemble.RandomForestClassifier\",\n",
 829 |     "                                         \"sklearn.linear_model.LogisticRegression\",\n",
 830 |     "                                         \"sklearn.ensemble.AdaBoostClassifier\"]},\n",
 831 |     "        selector='max.accuracy',\n",
 832 |     "        inputs={\"dataset\"         : ingest.outputs[DATASET]},\n",
 833 |     "        labels={\"commit\": this_project.params.get('commit', '')},\n",
 834 |     "        outputs=['model', 'test_set'])\n",
 835 |     "\n",
 836 |     "    # test and visualize our model\n",
 837 |     "    test = funcs[\"test\"].as_step(\n",
 838 |     "        name=\"test\",\n",
 839 |     "        params={\"label_column\": LABELS},\n",
 840 |     "        inputs={\"models_path\" : train.outputs['model'],\n",
 841 |     "                \"test_set\"    : train.outputs['test_set']})\n",
 842 |     "\n",
 843 |     "    # deploy our model as a serverless function\n",
 844 |     "    deploy = funcs[\"serving\"].deploy_step(models={f\"{DATASET}_v1\": train.outputs['model']},\n",
 845 |     "                                          tag=this_project.params.get('commit', 'v1'))\n",
 846 |     "\n",
 847 |     "    # test out new model server (via REST API calls)\n",
 848 |     "    tester = funcs[\"live_tester\"].as_step(name='model-tester',\n",
 849 |     "        params={'addr': deploy.outputs['endpoint'], 'model': f\"{DATASET}_v1\"},\n",
 850 |     "        inputs={'table': train.outputs['test_set']})\n"
 851 |    ]
 852 |   },
 853 |   {
 854 |    "cell_type": "code",
 855 |    "execution_count": 18,
 856 |    "metadata": {},
 857 |    "outputs": [],
 858 |    "source": [
 859 |     "# register the workflow file as \"main\", embed the workflow code into the project YAML\n",
 860 |     "skproj.set_workflow('main', 'workflow.py')"
 861 |    ]
 862 |   },
 863 |   {
 864 |    "cell_type": "markdown",
 865 |    "metadata": {},
 866 |    "source": [
 867 |     "Save the project definitions to a file (project.yaml), it is recommended to commit all changes to a Git repo."
 868 |    ]
 869 |   },
 870 |   {
 871 |    "cell_type": "code",
 872 |    "execution_count": 22,
 873 |    "metadata": {},
 874 |    "outputs": [],
 875 |    "source": [
 876 |     "skproj.artifact_path = 'v3io:///users/{{run.user}}/pipe/{{workflow.uid}}'\n",
 877 |     "skproj.save()"
 878 |    ]
 879 |   },
 880 |   {
 881 |    "cell_type": "markdown",
 882 |    "metadata": {},
 883 |    "source": [
 884 |     "<a id='run-pipeline'></a>\n",
 885 |     "## Run a pipeline workflow manually (not via git PR)\n",
 886 |     "\n",
 887 |     "<b>This section is not used for the git automation, rather demo how to run the workflow from the notebook</b>\n",
 888 |     "\n",
 889 |     "use the `run` method to execute a workflow, you can provide alternative arguments and specify the default target for workflow artifacts.<br>\n",
 890 |     "The workflow ID is returned and can be used to track the progress or you can use the hyperlinks\n",
 891 |     "\n",
 892 |     "> Note: The same command can be issued through CLI commands:<br>\n",
 893 |     "    `mlrun project my-proj/ -r main -p \"v3io:///users/{{run.user}}/mlrun/kfp/{{workflow.uid}}/\"`\n",
 894 |     "\n",
 895 |     "The `dirty` flag allow us to run a project with uncommited changes (when the notebook is in the same git dir it will always be dirty)<br>\n",
 896 |     "The `watch` flag will wait for the pipeline to complete"
 897 |    ]
 898 |   },
 899 |   {
 900 |    "cell_type": "code",
 901 |    "execution_count": 23,
 902 |    "metadata": {},
 903 |    "outputs": [],
 904 |    "source": [
 905 |     "# If you want to get slack notification after the run with result summary, set the env var below\n",
 906 |     "# %env SLACK_WEBHOOK=<slack webhooh url>"
 907 |    ]
 908 |   },
 909 |   {
 910 |    "cell_type": "code",
 911 |    "execution_count": 24,
 912 |    "metadata": {},
 913 |    "outputs": [
 914 |     {
 915 |      "data": {
 916 |       "text/html": [
 917 |        "Experiment link <a href=\"https://dashboard.default-tenant.app.cnhjntjojjps.iguazio-cd2.com/pipelines/#/experiments/details/3070cb06-629f-4ff2-9123-81a0d9751d83\" target=\"_blank\" >here</a>"
 918 |       ],
 919 |       "text/plain": [
 920 |        "<IPython.core.display.HTML object>"
 921 |       ]
 922 |      },
 923 |      "metadata": {},
 924 |      "output_type": "display_data"
 925 |     },
 926 |     {
 927 |      "data": {
 928 |       "text/html": [
 929 |        "Run link <a href=\"https://dashboard.default-tenant.app.cnhjntjojjps.iguazio-cd2.com/pipelines/#/runs/details/8f462295-2154-428a-b861-4ec8be504832\" target=\"_blank\" >here</a>"
 930 |       ],
 931 |       "text/plain": [
 932 |        "<IPython.core.display.HTML object>"
 933 |       ]
 934 |      },
 935 |      "metadata": {},
 936 |      "output_type": "display_data"
 937 |     },
 938 |     {
 939 |      "name": "stdout",
 940 |      "output_type": "stream",
 941 |      "text": [
 942 |       "> 2020-07-29 13:04:18,155 [info] Pipeline run id=8f462295-2154-428a-b861-4ec8be504832, check UI or DB for progress\n",
 943 |       "> 2020-07-29 13:04:18,156 [info] waiting for pipeline run completion\n"
 944 |      ]
 945 |     },
 946 |     {
 947 |      "data": {
 948 |       "text/html": [
 949 |        "<h2>Run Results</h2>Workflow 8f462295-2154-428a-b861-4ec8be504832 finished, status=Succeeded<br>click the hyper links below to see detailed results<br><table border=\"1\" class=\"dataframe\">\n",
 950 |        "  <thead>\n",
 951 |        "    <tr style=\"text-align: right;\">\n",
 952 |        "      <th>uid</th>\n",
 953 |        "      <th>start</th>\n",
 954 |        "      <th>state</th>\n",
 955 |        "      <th>name</th>\n",
 956 |        "      <th>results</th>\n",
 957 |        "      <th>artifacts</th>\n",
 958 |        "    </tr>\n",
 959 |        "  </thead>\n",
 960 |        "  <tbody>\n",
 961 |        "    <tr>\n",
 962 |        "      <td><div title=\"6f2713272e674d748a481d4a7a29c0aa\"><a href=\"https://mlrun-ui.default-tenant.app.cnhjntjojjps.iguazio-cd2.com/projects/gitops-project/jobs/6f2713272e674d748a481d4a7a29c0aa/info\" target=\"_blank\" >...7a29c0aa</a></div></td>\n",
 963 |        "      <td>Jul 29 13:05:03</td>\n",
 964 |        "      <td>completed</td>\n",
 965 |        "      <td>model-tester</td>\n",
 966 |        "      <td><div class=\"dictlist\">total_tests=15</div><div class=\"dictlist\">errors=0</div><div class=\"dictlist\">match=14</div><div class=\"dictlist\">avg_latency=11446</div><div class=\"dictlist\">min_latency=11047</div><div class=\"dictlist\">max_latency=12131</div></td>\n",
 967 |        "      <td><div class=\"artifact\" onclick=\"expandPanel(this)\" paneName=\"result\" title=\"files/v3io/users/admin/pipe/8f462295-2154-428a-b861-4ec8be504832/latency.html\">latency</div></td>\n",
 968 |        "    </tr>\n",
 969 |        "    <tr>\n",
 970 |        "      <td><div title=\"655390e8e25d4c4a99356d4579bfc3ac\"><a href=\"https://mlrun-ui.default-tenant.app.cnhjntjojjps.iguazio-cd2.com/projects/gitops-project/jobs/655390e8e25d4c4a99356d4579bfc3ac/info\" target=\"_blank\" >...79bfc3ac</a></div></td>\n",
 971 |        "      <td>Jul 29 13:04:54</td>\n",
 972 |        "      <td>completed</td>\n",
 973 |        "      <td>test</td>\n",
 974 |        "      <td><div class=\"dictlist\">accuracy=0.9333333333333333</div><div class=\"dictlist\">test-error=0.06666666666666667</div><div class=\"dictlist\">auc-micro=0.9655555555555556</div><div class=\"dictlist\">auc-weighted=0.9888888888888889</div><div class=\"dictlist\">f1-score=0.9137254901960784</div><div class=\"dictlist\">precision_score=0.8888888888888888</div><div class=\"dictlist\">recall_score=0.9629629629629629</div></td>\n",
 975 |        "      <td><div class=\"artifact\" onclick=\"expandPanel(this)\" paneName=\"result\" title=\"files/v3io/users/admin/pipe/8f462295-2154-428a-b861-4ec8be504832/plots/confusion-matrix.html\">confusion-matrix</div><div class=\"artifact\" onclick=\"expandPanel(this)\" paneName=\"result\" title=\"files/v3io/users/admin/pipe/8f462295-2154-428a-b861-4ec8be504832/plots/feature-importances.html\">feature-importances</div><div class=\"artifact\" onclick=\"expandPanel(this)\" paneName=\"result\" title=\"files/v3io/users/admin/pipe/8f462295-2154-428a-b861-4ec8be504832/plots/precision-recall-multiclass.html\">precision-recall-multiclass</div><div class=\"artifact\" onclick=\"expandPanel(this)\" paneName=\"result\" title=\"files/v3io/users/admin/pipe/8f462295-2154-428a-b861-4ec8be504832/plots/roc-multiclass.html\">roc-multiclass</div><div title=\"v3io:///users/admin/pipe/8f462295-2154-428a-b861-4ec8be504832/test_set_preds.parquet\">test_set_preds</div></td>\n",
 976 |        "    </tr>\n",
 977 |        "    <tr>\n",
 978 |        "      <td><div title=\"05c0d074d5074627996073d91ecf6eb3\"><a href=\"https://mlrun-ui.default-tenant.app.cnhjntjojjps.iguazio-cd2.com/projects/gitops-project/jobs/05c0d074d5074627996073d91ecf6eb3/info\" target=\"_blank\" >...1ecf6eb3</a></div></td>\n",
 979 |        "      <td>Jul 29 13:04:37</td>\n",
 980 |        "      <td>completed</td>\n",
 981 |        "      <td>summary</td>\n",
 982 |        "      <td></td>\n",
 983 |        "      <td><div class=\"artifact\" onclick=\"expandPanel(this)\" paneName=\"result\" title=\"files/v3io/users/admin/pipe/8f462295-2154-428a-b861-4ec8be504832/plots/hist.html\">histograms</div><div class=\"artifact\" onclick=\"expandPanel(this)\" paneName=\"result\" title=\"files/v3io/users/admin/pipe/8f462295-2154-428a-b861-4ec8be504832/plots/violin.html\">violin</div><div class=\"artifact\" onclick=\"expandPanel(this)\" paneName=\"result\" title=\"files/v3io/users/admin/pipe/8f462295-2154-428a-b861-4ec8be504832/plots/imbalance.html\">imbalance</div><div class=\"artifact\" onclick=\"expandPanel(this)\" paneName=\"result\" title=\"files/v3io/users/admin/pipe/8f462295-2154-428a-b861-4ec8be504832/plots/imbalance-weights-vec.csv\">imbalance-weights-vec</div><div class=\"artifact\" onclick=\"expandPanel(this)\" paneName=\"result\" title=\"files/v3io/users/admin/pipe/8f462295-2154-428a-b861-4ec8be504832/plots/correlation-matrix.csv\">correlation-matrix</div><div class=\"artifact\" onclick=\"expandPanel(this)\" paneName=\"result\" title=\"files/v3io/users/admin/pipe/8f462295-2154-428a-b861-4ec8be504832/plots/corr.html\">correlation</div></td>\n",
 984 |        "    </tr>\n",
 985 |        "    <tr>\n",
 986 |        "      <td><div title=\"5ffee3d96618489a89a7b37d60321e1e\"><a href=\"https://mlrun-ui.default-tenant.app.cnhjntjojjps.iguazio-cd2.com/projects/gitops-project/jobs/5ffee3d96618489a89a7b37d60321e1e/info\" target=\"_blank\" >...60321e1e</a></div></td>\n",
 987 |        "      <td>Jul 29 13:04:36</td>\n",
 988 |        "      <td>completed</td>\n",
 989 |        "      <td>train</td>\n",
 990 |        "      <td><div class=\"dictlist\">best_iteration=1</div><div class=\"dictlist\">accuracy=0.9705882352941176</div><div class=\"dictlist\">test-error=0.029411764705882353</div><div class=\"dictlist\">auc-micro=0.9969723183391004</div><div class=\"dictlist\">auc-weighted=0.9949732620320856</div><div class=\"dictlist\">f1-score=0.9679633867276888</div><div class=\"dictlist\">precision_score=0.9666666666666667</div><div class=\"dictlist\">recall_score=0.9722222222222222</div></td>\n",
 991 |        "      <td><div title=\"v3io:///users/admin/pipe/8f462295-2154-428a-b861-4ec8be504832/data/1/test_set.parquet\">test_set</div><div class=\"artifact\" onclick=\"expandPanel(this)\" paneName=\"result\" title=\"files/v3io/users/admin/pipe/8f462295-2154-428a-b861-4ec8be504832/model/plots/1/confusion-matrix.html\">confusion-matrix</div><div class=\"artifact\" onclick=\"expandPanel(this)\" paneName=\"result\" title=\"files/v3io/users/admin/pipe/8f462295-2154-428a-b861-4ec8be504832/model/plots/1/feature-importances.html\">feature-importances</div><div class=\"artifact\" onclick=\"expandPanel(this)\" paneName=\"result\" title=\"files/v3io/users/admin/pipe/8f462295-2154-428a-b861-4ec8be504832/model/plots/1/precision-recall-multiclass.html\">precision-recall-multiclass</div><div class=\"artifact\" onclick=\"expandPanel(this)\" paneName=\"result\" title=\"files/v3io/users/admin/pipe/8f462295-2154-428a-b861-4ec8be504832/model/plots/1/roc-multiclass.html\">roc-multiclass</div><div title=\"v3io:///users/admin/pipe/8f462295-2154-428a-b861-4ec8be504832/model/1/\">model</div><div class=\"artifact\" onclick=\"expandPanel(this)\" paneName=\"result\" title=\"files/v3io/users/admin/pipe/8f462295-2154-428a-b861-4ec8be504832/iteration_results.csv\">iteration_results</div></td>\n",
 992 |        "    </tr>\n",
 993 |        "    <tr>\n",
 994 |        "      <td><div title=\"5bb988fa14d94b839c31db1eb05fce96\"><a href=\"https://mlrun-ui.default-tenant.app.cnhjntjojjps.iguazio-cd2.com/projects/gitops-project/jobs/5bb988fa14d94b839c31db1eb05fce96/info\" target=\"_blank\" >...b05fce96</a></div></td>\n",
 995 |        "      <td>Jul 29 13:04:26</td>\n",
 996 |        "      <td>completed</td>\n",
 997 |        "      <td>get-data</td>\n",
 998 |        "      <td></td>\n",
 999 |        "      <td><div title=\"v3io:///users/admin/pipe/8f462295-2154-428a-b861-4ec8be504832/iris_dataset.parquet\">iris_dataset</div></td>\n",
1000 |        "    </tr>\n",
1001 |        "  </tbody>\n",
1002 |        "</table>"
1003 |       ],
1004 |       "text/plain": [
1005 |        "<IPython.core.display.HTML object>"
1006 |       ]
1007 |      },
1008 |      "metadata": {},
1009 |      "output_type": "display_data"
1010 |     }
1011 |    ],
1012 |    "source": [
1013 |     "run_id = skproj.run(\n",
1014 |     "    'main', arguments={}, \n",
1015 |     "    dirty=True, watch=True)"
1016 |    ]
1017 |   },
1018 |   {
1019 |    "cell_type": "markdown",
1020 |    "metadata": {},
1021 |    "source": [
1022 |     "**[back to top](#top)**"
1023 |    ]
1024 |   }
1025 |  ],
1026 |  "metadata": {
1027 |   "kernelspec": {
1028 |    "display_name": "Python 3",
1029 |    "language": "python",
1030 |    "name": "python3"
1031 |   },
1032 |   "language_info": {
1033 |    "codemirror_mode": {
1034 |     "name": "ipython",
1035 |     "version": 3
1036 |    },
1037 |    "file_extension": ".py",
1038 |    "mimetype": "text/x-python",
1039 |    "name": "python",
1040 |    "nbconvert_exporter": "python",
1041 |    "pygments_lexer": "ipython3",
1042 |    "version": "3.7.6"
1043 |   }
1044 |  },
1045 |  "nbformat": 4,
1046 |  "nbformat_minor": 4
1047 | }
1048 | 


--------------------------------------------------------------------------------
/project.yaml:
--------------------------------------------------------------------------------
 1 | name: gitops-project
 2 | functions:
 3 | - name: gen-iris
 4 |   spec:
 5 |     kind: job
 6 |     metadata:
 7 |       name: gen-iris
 8 |       tag: ''
 9 |       project: gitops-project
10 |     spec:
11 |       command: ''
12 |       args: []
13 |       image: mlrun/ml-models
14 |       env: []
15 |       default_handler: ''
16 |       entry_points:
17 |         iris_generator:
18 |           name: iris_generator
19 |           doc: ''
20 |           parameters:
21 |           - name: context
22 |             default: ''
23 |           - name: format
24 |             default: csv
25 |           outputs:
26 |           - default: ''
27 |           lineno: 11
28 |       description: ''
29 |       build:
30 |         functionSourceCode: IyBHZW5lcmF0ZWQgYnkgbnVjbGlvLmV4cG9ydC5OdWNsaW9FeHBvcnRlcgoKaW1wb3J0IG9zCmZyb20gc2tsZWFybi5kYXRhc2V0cyBpbXBvcnQgbG9hZF9pcmlzCmZyb20gc2tsZWFybi5tb2RlbF9zZWxlY3Rpb24gaW1wb3J0IHRyYWluX3Rlc3Rfc3BsaXQKaW1wb3J0IG51bXB5IGFzIG5wCmZyb20gc2tsZWFybi5tZXRyaWNzIGltcG9ydCBhY2N1cmFjeV9zY29yZQpmcm9tIG1scnVuLmFydGlmYWN0cyBpbXBvcnQgVGFibGVBcnRpZmFjdCwgUGxvdEFydGlmYWN0CmltcG9ydCBwYW5kYXMgYXMgcGQKCmRlZiBpcmlzX2dlbmVyYXRvcihjb250ZXh0LCBmb3JtYXQ9J2NzdicpOgogICAgaXJpcyA9IGxvYWRfaXJpcygpCiAgICBpcmlzX2RhdGFzZXQgPSBwZC5EYXRhRnJhbWUoZGF0YT1pcmlzLmRhdGEsIGNvbHVtbnM9aXJpcy5mZWF0dXJlX25hbWVzKQogICAgaXJpc19sYWJlbHMgPSBwZC5EYXRhRnJhbWUoZGF0YT1pcmlzLnRhcmdldCwgY29sdW1ucz1bJ2xhYmVsJ10pCiAgICBpcmlzX2RhdGFzZXQgPSBwZC5jb25jYXQoW2lyaXNfZGF0YXNldCwgaXJpc19sYWJlbHNdLCBheGlzPTEpCiAgICAKICAgIGNvbnRleHQubG9nZ2VyLmluZm8oJ3NhdmluZyBpcmlzIGRhdGFmcmFtZSB0byB7fScuZm9ybWF0KGNvbnRleHQuYXJ0aWZhY3RfcGF0aCkpCiAgICBjb250ZXh0LmxvZ19kYXRhc2V0KCdpcmlzX2RhdGFzZXQnLCBkZj1pcmlzX2RhdGFzZXQsIGZvcm1hdD1mb3JtYXQsIGluZGV4PUZhbHNlKQoK
31 |         commands: []
32 |         code_origin: https://github.com/mlrun/demo-github-actions.git#0e717588b1354d3d60cd96ba5c352d71aace0552
33 | - url: hub://sklearn_classifier
34 |   name: train
35 | - url: hub://test_classifier
36 |   name: test
37 | - url: hub://model_server
38 |   name: serving
39 | - url: hub://model_server_tester
40 |   name: live_tester
41 | - url: hub://describe
42 |   name: describe
43 | workflows:
44 | - name: main
45 |   path: workflow.py
46 | artifacts: []
47 | artifact_path: v3io:///users/{{run.user}}/pipe/{{workflow.uid}}
48 | 


--------------------------------------------------------------------------------
/workflow.py:
--------------------------------------------------------------------------------
 1 | from kfp import dsl
 2 | from mlrun import mount_v3io, NewTask
 3 | 
 4 | 
 5 | funcs = {}
 6 | this_project = None
 7 | DATASET = 'iris_dataset'
 8 | LABELS  = "label"
 9 | 
10 | # init functions is used to configure function resources and local settings
11 | def init_functions(functions: dict, project=None, secrets=None):
12 |     for f in functions.values():
13 |         f.apply(mount_v3io())
14 |      
15 |     # uncomment this line to collect the inference results into a stream
16 |     # and specify a path in V3IO (<datacontainer>/<subpath>)
17 |     #functions['serving'].set_env('INFERENCE_STREAM', 'users/admin/model_stream')
18 | 
19 |     
20 | @dsl.pipeline(
21 |     name="Demo training pipeline",
22 |     description="Shows how to use mlrun."
23 | )
24 | def kfpipeline():
25 |     
26 |     # run the ingestion function with the new image and params
27 |     ingest = funcs['gen-iris'].as_step(
28 |         name="get-data",
29 |         handler='iris_generator',
30 |         params={'format': 'pq'},
31 |         outputs=[DATASET])
32 | 
33 |     # analyze our dataset
34 |     describe = funcs["describe"].as_step(
35 |         name="summary",
36 |         params={"label_column": LABELS},
37 |         inputs={"table": ingest.outputs[DATASET]})
38 |     
39 |     # train with hyper-paremeters
40 |     train = funcs["train"].as_step(
41 |         name="train",
42 |         params={"sample"          : -1,
43 |                 "label_column"    : LABELS,
44 |                 "test_size"       : 0.10},
45 |         hyperparams={'model_pkg_class': ["sklearn.ensemble.RandomForestClassifier",
46 |                                          "sklearn.linear_model.LogisticRegression",
47 |                                          "sklearn.ensemble.AdaBoostClassifier"]},
48 |         selector='max.accuracy',
49 |         inputs={"dataset"         : ingest.outputs[DATASET]},
50 |         labels={"commit": this_project.params.get('commit', '')},
51 |         outputs=['model', 'test_set'])
52 | 
53 |     # test and visualize our model
54 |     test = funcs["test"].as_step(
55 |         name="test",
56 |         params={"label_column": LABELS},
57 |         inputs={"models_path" : train.outputs['model'],
58 |                 "test_set"    : train.outputs['test_set']})
59 | 
60 |     # deploy our model as a serverless function
61 |     deploy = funcs["serving"].deploy_step(models={f"{DATASET}_v1": train.outputs['model']},
62 |                                           tag=this_project.params.get('commit', 'v1'))
63 | 
64 |     # test out new model server (via REST API calls)
65 |     tester = funcs["live_tester"].as_step(name='model-tester',
66 |         params={'addr': deploy.outputs['endpoint'], 'model': f"{DATASET}_v1"},
67 |         inputs={'table': train.outputs['test_set']})
68 | 


--------------------------------------------------------------------------------