├── .github
└── workflows
│ └── mlrun.yml
├── LICENSE
├── README.md
├── docs
├── flow.png
├── mlrun.png
├── pipeline.png
├── pr.png
├── slack.png
└── use-this.png
├── gitops_project.ipynb
├── project.yaml
└── workflow.py
/.github/workflows/mlrun.yml:
--------------------------------------------------------------------------------
1 | name: mlrun-project-workflow
2 | on: [issue_comment]
3 |
4 | jobs:
5 | submit-project:
6 | if: github.event.issue.pull_request != null && startsWith(github.event.comment.body, '/run')
7 | runs-on: ubuntu-latest
8 |
9 | steps:
10 | - uses: actions/checkout@v2
11 | - name: Set up Python 3.6
12 | uses: actions/setup-python@v1
13 | with:
14 | python-version: '3.6'
15 | architecture: 'x64'
16 |
17 | - name: Install mlrun
18 | run: python -m pip install mlrun
19 | - name: Submit project
20 | run: python -m mlrun project ./ --git-issue "${{github.event.issue.number}}" --git-repo ${GITHUB_REPOSITORY} -w -x commit=${COMMIT:33} -r main ${CMD:5}
21 | env:
22 | V3IO_USERNAME: ${{ secrets.V3IO_USERNAME }}
23 | V3IO_PASSWORD: ${{ secrets.V3IO_USERNAME }}
24 | V3IO_API: ${{ secrets.V3IO_API }}
25 | V3IO_ACCESS_KEY: ${{ secrets.V3IO_ACCESS_KEY }}
26 | MLRUN_DBPATH: ${{ secrets.MLRUN_DBPATH }}
27 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
28 | SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
29 | CMD: ${{ github.event.comment.body}}
30 | COMMIT: ${{ github.sha}}
31 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Apache License
2 | Version 2.0, January 2004
3 | http://www.apache.org/licenses/
4 |
5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6 |
7 | 1. Definitions.
8 |
9 | "License" shall mean the terms and conditions for use, reproduction,
10 | and distribution as defined by Sections 1 through 9 of this document.
11 |
12 | "Licensor" shall mean the copyright owner or entity authorized by
13 | the copyright owner that is granting the License.
14 |
15 | "Legal Entity" shall mean the union of the acting entity and all
16 | other entities that control, are controlled by, or are under common
17 | control with that entity. For the purposes of this definition,
18 | "control" means (i) the power, direct or indirect, to cause the
19 | direction or management of such entity, whether by contract or
20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
21 | outstanding shares, or (iii) beneficial ownership of such entity.
22 |
23 | "You" (or "Your") shall mean an individual or Legal Entity
24 | exercising permissions granted by this License.
25 |
26 | "Source" form shall mean the preferred form for making modifications,
27 | including but not limited to software source code, documentation
28 | source, and configuration files.
29 |
30 | "Object" form shall mean any form resulting from mechanical
31 | transformation or translation of a Source form, including but
32 | not limited to compiled object code, generated documentation,
33 | and conversions to other media types.
34 |
35 | "Work" shall mean the work of authorship, whether in Source or
36 | Object form, made available under the License, as indicated by a
37 | copyright notice that is included in or attached to the work
38 | (an example is provided in the Appendix below).
39 |
40 | "Derivative Works" shall mean any work, whether in Source or Object
41 | form, that is based on (or derived from) the Work and for which the
42 | editorial revisions, annotations, elaborations, or other modifications
43 | represent, as a whole, an original work of authorship. For the purposes
44 | of this License, Derivative Works shall not include works that remain
45 | separable from, or merely link (or bind by name) to the interfaces of,
46 | the Work and Derivative Works thereof.
47 |
48 | "Contribution" shall mean any work of authorship, including
49 | the original version of the Work and any modifications or additions
50 | to that Work or Derivative Works thereof, that is intentionally
51 | submitted to Licensor for inclusion in the Work by the copyright owner
52 | or by an individual or Legal Entity authorized to submit on behalf of
53 | the copyright owner. For the purposes of this definition, "submitted"
54 | means any form of electronic, verbal, or written communication sent
55 | to the Licensor or its representatives, including but not limited to
56 | communication on electronic mailing lists, source code control systems,
57 | and issue tracking systems that are managed by, or on behalf of, the
58 | Licensor for the purpose of discussing and improving the Work, but
59 | excluding communication that is conspicuously marked or otherwise
60 | designated in writing by the copyright owner as "Not a Contribution."
61 |
62 | "Contributor" shall mean Licensor and any individual or Legal Entity
63 | on behalf of whom a Contribution has been received by Licensor and
64 | subsequently incorporated within the Work.
65 |
66 | 2. Grant of Copyright License. Subject to the terms and conditions of
67 | this License, each Contributor hereby grants to You a perpetual,
68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69 | copyright license to reproduce, prepare Derivative Works of,
70 | publicly display, publicly perform, sublicense, and distribute the
71 | Work and such Derivative Works in Source or Object form.
72 |
73 | 3. Grant of Patent License. Subject to the terms and conditions of
74 | this License, each Contributor hereby grants to You a perpetual,
75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76 | (except as stated in this section) patent license to make, have made,
77 | use, offer to sell, sell, import, and otherwise transfer the Work,
78 | where such license applies only to those patent claims licensable
79 | by such Contributor that are necessarily infringed by their
80 | Contribution(s) alone or by combination of their Contribution(s)
81 | with the Work to which such Contribution(s) was submitted. If You
82 | institute patent litigation against any entity (including a
83 | cross-claim or counterclaim in a lawsuit) alleging that the Work
84 | or a Contribution incorporated within the Work constitutes direct
85 | or contributory patent infringement, then any patent licenses
86 | granted to You under this License for that Work shall terminate
87 | as of the date such litigation is filed.
88 |
89 | 4. Redistribution. You may reproduce and distribute copies of the
90 | Work or Derivative Works thereof in any medium, with or without
91 | modifications, and in Source or Object form, provided that You
92 | meet the following conditions:
93 |
94 | (a) You must give any other recipients of the Work or
95 | Derivative Works a copy of this License; and
96 |
97 | (b) You must cause any modified files to carry prominent notices
98 | stating that You changed the files; and
99 |
100 | (c) You must retain, in the Source form of any Derivative Works
101 | that You distribute, all copyright, patent, trademark, and
102 | attribution notices from the Source form of the Work,
103 | excluding those notices that do not pertain to any part of
104 | the Derivative Works; and
105 |
106 | (d) If the Work includes a "NOTICE" text file as part of its
107 | distribution, then any Derivative Works that You distribute must
108 | include a readable copy of the attribution notices contained
109 | within such NOTICE file, excluding those notices that do not
110 | pertain to any part of the Derivative Works, in at least one
111 | of the following places: within a NOTICE text file distributed
112 | as part of the Derivative Works; within the Source form or
113 | documentation, if provided along with the Derivative Works; or,
114 | within a display generated by the Derivative Works, if and
115 | wherever such third-party notices normally appear. The contents
116 | of the NOTICE file are for informational purposes only and
117 | do not modify the License. You may add Your own attribution
118 | notices within Derivative Works that You distribute, alongside
119 | or as an addendum to the NOTICE text from the Work, provided
120 | that such additional attribution notices cannot be construed
121 | as modifying the License.
122 |
123 | You may add Your own copyright statement to Your modifications and
124 | may provide additional or different license terms and conditions
125 | for use, reproduction, or distribution of Your modifications, or
126 | for any such Derivative Works as a whole, provided Your use,
127 | reproduction, and distribution of the Work otherwise complies with
128 | the conditions stated in this License.
129 |
130 | 5. Submission of Contributions. Unless You explicitly state otherwise,
131 | any Contribution intentionally submitted for inclusion in the Work
132 | by You to the Licensor shall be under the terms and conditions of
133 | this License, without any additional terms or conditions.
134 | Notwithstanding the above, nothing herein shall supersede or modify
135 | the terms of any separate license agreement you may have executed
136 | with Licensor regarding such Contributions.
137 |
138 | 6. Trademarks. This License does not grant permission to use the trade
139 | names, trademarks, service marks, or product names of the Licensor,
140 | except as required for reasonable and customary use in describing the
141 | origin of the Work and reproducing the content of the NOTICE file.
142 |
143 | 7. Disclaimer of Warranty. Unless required by applicable law or
144 | agreed to in writing, Licensor provides the Work (and each
145 | Contributor provides its Contributions) on an "AS IS" BASIS,
146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 | implied, including, without limitation, any warranties or conditions
148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 | PARTICULAR PURPOSE. You are solely responsible for determining the
150 | appropriateness of using or redistributing the Work and assume any
151 | risks associated with Your exercise of permissions under this License.
152 |
153 | 8. Limitation of Liability. In no event and under no legal theory,
154 | whether in tort (including negligence), contract, or otherwise,
155 | unless required by applicable law (such as deliberate and grossly
156 | negligent acts) or agreed to in writing, shall any Contributor be
157 | liable to You for damages, including any direct, indirect, special,
158 | incidental, or consequential damages of any character arising as a
159 | result of this License or out of the use or inability to use the
160 | Work (including but not limited to damages for loss of goodwill,
161 | work stoppage, computer failure or malfunction, or any and all
162 | other commercial damages or losses), even if such Contributor
163 | has been advised of the possibility of such damages.
164 |
165 | 9. Accepting Warranty or Additional Liability. While redistributing
166 | the Work or Derivative Works thereof, You may choose to offer,
167 | and charge a fee for, acceptance of support, warranty, indemnity,
168 | or other liability obligations and/or rights consistent with this
169 | License. However, in accepting such obligations, You may act only
170 | on Your own behalf and on Your sole responsibility, not on behalf
171 | of any other Contributor, and only if You agree to indemnify,
172 | defend, and hold each Contributor harmless for any liability
173 | incurred by, or claims asserted against, such Contributor by reason
174 | of your accepting any such warranty or additional liability.
175 |
176 | END OF TERMS AND CONDITIONS
177 |
178 | APPENDIX: How to apply the Apache License to your work.
179 |
180 | To apply the Apache License to your work, attach the following
181 | boilerplate notice, with the fields enclosed by brackets "[]"
182 | replaced with your own identifying information. (Don't include
183 | the brackets!) The text should be enclosed in the appropriate
184 | comment syntax for the file format. We also recommend that a
185 | file or class name and description of purpose be included on the
186 | same "printed page" as the copyright notice for easier
187 | identification within third-party archives.
188 |
189 | Copyright [yyyy] [name of copyright owner]
190 |
191 | Licensed under the Apache License, Version 2.0 (the "License");
192 | you may not use this file except in compliance with the License.
193 | You may obtain a copy of the License at
194 |
195 | http://www.apache.org/licenses/LICENSE-2.0
196 |
197 | Unless required by applicable law or agreed to in writing, software
198 | distributed under the License is distributed on an "AS IS" BASIS,
199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 | See the License for the specific language governing permissions and
201 | limitations under the License.
202 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # ML Pipeline Automation and CI/CD Using GitHub Actions, Kubeflow and MLRun
2 |
3 | Machine learning (ML) pipelines allow us to automate multi-stage workflow which comprise of
4 | data ingestion, data preparation, model training, validation and finally deployment.
5 |
6 | Every time our code, data or parameters change we may want to re-evaluate our model accuracy and performance before we deploy.
7 | This resembles the CI/CD practice for delivering code to production with the additional aspects of data and parameter/configuration versioning,
8 | and may require more powerful resources (computation cluster, GPUs, data processing engines, etc.).
9 |
10 | This template repo demonstrates how you can automate the development, testing, and deployment
11 | of machine learning projects using the following tools:
12 |
13 | * [**GitHub actions**](https://github.com/features/actions) - used for code and metadata versioning, workflow triggering, and process tracking
14 | * [**Kubeflow Pipelines**](https://www.kubeflow.org/docs/pipelines/overview/pipelines-overview/) - Used to execute ML pipeline steps on a (remote) Kubernetes cluster
15 | * [**MLRun**](https://github.com/mlrun/mlrun) - Used for end to end MLOps automation and tracking, [read more below](#mlrun-overview).
16 |
17 | To clone and run with your own environment or on [**iguazio data science platform**](https://www.iguazio.com/), check the [**instructions below**](#how-to-run-with-your-cluster).
18 |
19 | ## How Does It Work?
20 |
21 | This repo is representing an **mlrun project**, mlrun projects consists of **Functions** (code), **Artifacts** (data), **Workflows**, and **Parameters/secrets**.
22 | The [**project.yaml**](project.yaml) file list all of those elements.
23 |
24 | Project elements can be linked (e.g. point to a library function which runs AutoML or data analysis, point to code/notebook files, point to external data objects, workflow files, etc.),
25 | or they can be embedded (e.g. store function code + configuration, workflow steps, etc.), in this example we show how to combine both.
26 |
27 | The project file, workflow and embedded/linked code were generated by running the [**gitops_project notebook**](gitops_project.ipynb),
28 | you can modify it to your needs, this is based on code from [MLRun Demos repo](https://github.com/mlrun/demos),
29 | where you can find more end to end ML Pipeline examples.
30 |
31 | When we change one of the elements (the project.yaml file or one of the other linked code/metadata files) and open a pull request (PR)
32 | we can type `/run` in our PR, this will trigger running the ML Pipeline (as specified in the [workflow file](workflow.py)).
33 | Once the pipeline starts, a comment will be added to your PR with a link to MLRun UI (allowing to track the progress), and when the ML Pipeline completes
34 | MLRUn will write a result summary as a comment back into your PR with links to more details and data artifacts
35 |
36 | **Flow diagram:**
37 |
38 |

39 |
40 | **This is an example of the PR comments:**
41 |
42 |

43 |
44 | **This is an example of the summary report sent to `Slack`:**
45 |
46 |

47 |
48 | **The Kubeflow pipeline graph**
49 |
50 |

51 |
52 | **MLRun UI showing the AutoML results (linked to from the PR)**
53 |
54 |

55 |
56 | ## What Is MLRun?
57 |
58 | MLRun is the first and currently only integrated open-source framework for end to end MLOps automation, it:
59 | * Orchestrates job/pipeline from simple code or pre-baked functions (via Kubeflow and various k8s CRDs)
60 | * Runs, tracks and version projects comprising of experiments, jobs/functions, data, code, models and more.
61 | * Provides an open marketplace for various ML, DL, Analytics, MLOps functions
62 | * Runs iterative AutoML, Hyper-param, or data analysis tasks on a distributed cluster
63 | * Automates deployment of models and real-time data processing functions using (Nuclio) real-time serverless engine
64 |
65 | Read more in [mlrun/mlrun](https://github.com/mlrun/mlrun)
66 |
67 | ## How To Run With Your Cluster
68 |
69 | ### Prerequisites
70 |
71 | You need access to a working Kubernetes cluster with Kubeflow, Nuclio, and MLRun (see [installing MLRun](https://github.com/mlrun/mlrun#installation))
72 | Or use [**iguazio data science platform**](https://www.iguazio.com/) with all of those pre-installed and managed.
73 |
74 | ### Clone and setup
75 |
76 | #### 1. Copy this repo to your own GitHub account by clicking the `Use this template` button
77 |
78 |

79 |
80 | #### 2. Configure the required secrets and addresses
81 |
82 | Under the repo settings select the `secrets` tab and configure the following:
83 | * `MLRUN_DBPATH` - remote URL to mlrun service (e.g. `https://`)
84 | * `SLACK_WEBHOOK` - optional, if you would like to get run summary into your slack
85 |
86 | When using Iguazio platform your should set the following:
87 | * `V3IO_USERNAME` - Iguazio platform username
88 | * `V3IO_ACCESS_KEY` - Iguazio V3IO data layer credentials (copy from your user settings)
89 | * `V3IO_PASSWORD` - user password
90 | * `V3IO_API` - V3IO data access API url (copy from the services screen)
91 |
92 | When using the open source version and a secure API gateway you can use the following secrets
93 | * `MLRUN_HTTPDB__USER` - remote username
94 | * `MLRUN_HTTPDB__PASSWORD` (for basic auth) or `MLRUN_HTTPDB__TOKEN` (for Bearer token)
95 |
96 |
97 | ### Customize
98 |
99 | Update and run the [**gitops_project notebook**](gitops_project.ipynb),
100 | The Notebook will generate the data ingestion function, the workflow code and the [**project.yaml**](project.yaml) files.
101 | You can also run the workflow from the notebook, or you can run it from the pull request.
102 |
103 | ### Run from a PR
104 |
105 | change the project.yaml file, the workflow, or other elements and create a pull request,
106 | once the PR is opened type `/run` in your PR.
107 |
108 | For trouble shooting go to the `Actions` tab to see GitHub Actions workflow progress.
109 |
110 |
--------------------------------------------------------------------------------
/docs/flow.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlrun/demo-github-actions/06d375eba8c79058ea7f1da6ebfc0758973c891f/docs/flow.png
--------------------------------------------------------------------------------
/docs/mlrun.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlrun/demo-github-actions/06d375eba8c79058ea7f1da6ebfc0758973c891f/docs/mlrun.png
--------------------------------------------------------------------------------
/docs/pipeline.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlrun/demo-github-actions/06d375eba8c79058ea7f1da6ebfc0758973c891f/docs/pipeline.png
--------------------------------------------------------------------------------
/docs/pr.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlrun/demo-github-actions/06d375eba8c79058ea7f1da6ebfc0758973c891f/docs/pr.png
--------------------------------------------------------------------------------
/docs/slack.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlrun/demo-github-actions/06d375eba8c79058ea7f1da6ebfc0758973c891f/docs/slack.png
--------------------------------------------------------------------------------
/docs/use-this.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlrun/demo-github-actions/06d375eba8c79058ea7f1da6ebfc0758973c891f/docs/use-this.png
--------------------------------------------------------------------------------
/gitops_project.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Demonstrate Git Based ML Pipeline Automation\n",
8 | " --------------------------------------------------------------------\n",
9 | "\n",
10 | "Creating a local function, running predefined functions, creating and running a full ML pipeline with local and library functions.\n",
11 | "\n",
12 | "#### **notebook how-to's**\n",
13 | "* Create and test a simple function\n",
14 | "* Examine data using serverless (containarized) `describe` function\n",
15 | "* Create an automated ML pipeline from various library functions\n",
16 | "* Running and tracking the pipeline results and artifacts"
17 | ]
18 | },
19 | {
20 | "cell_type": "markdown",
21 | "metadata": {},
22 | "source": [
23 | "## Create and Test a Local Ingestion/Data-prep Function (e.g. Iris Data Generator)\n",
24 | "Import nuclio SDK and magics, do not remove the cell and comment !!!"
25 | ]
26 | },
27 | {
28 | "cell_type": "code",
29 | "execution_count": 1,
30 | "metadata": {},
31 | "outputs": [],
32 | "source": [
33 | "# nuclio: ignore\n",
34 | "import nuclio"
35 | ]
36 | },
37 | {
38 | "cell_type": "markdown",
39 | "metadata": {},
40 | "source": [
41 | "Specify function dependencies and configuration"
42 | ]
43 | },
44 | {
45 | "cell_type": "code",
46 | "execution_count": 2,
47 | "metadata": {},
48 | "outputs": [
49 | {
50 | "name": "stdout",
51 | "output_type": "stream",
52 | "text": [
53 | "%nuclio: setting spec.image to 'mlrun/ml-models'\n"
54 | ]
55 | }
56 | ],
57 | "source": [
58 | "%nuclio config spec.image = \"mlrun/ml-models\""
59 | ]
60 | },
61 | {
62 | "cell_type": "markdown",
63 | "metadata": {},
64 | "source": [
65 | "#### Function code\n",
66 | "Generate the iris dataset and log the dataframe (as csv or parquet file)"
67 | ]
68 | },
69 | {
70 | "cell_type": "code",
71 | "execution_count": 3,
72 | "metadata": {},
73 | "outputs": [],
74 | "source": [
75 | "import os\n",
76 | "from sklearn.datasets import load_iris\n",
77 | "from sklearn.model_selection import train_test_split\n",
78 | "import numpy as np\n",
79 | "from sklearn.metrics import accuracy_score\n",
80 | "from mlrun.artifacts import TableArtifact, PlotArtifact\n",
81 | "import pandas as pd\n",
82 | "\n",
83 | "def iris_generator(context, format='csv'):\n",
84 | " iris = load_iris()\n",
85 | " iris_dataset = pd.DataFrame(data=iris.data, columns=iris.feature_names)\n",
86 | " iris_labels = pd.DataFrame(data=iris.target, columns=['label'])\n",
87 | " iris_dataset = pd.concat([iris_dataset, iris_labels], axis=1)\n",
88 | " \n",
89 | " context.logger.info('saving iris dataframe to {}'.format(context.artifact_path))\n",
90 | " context.log_dataset('iris_dataset', df=iris_dataset, format=format, index=False)\n"
91 | ]
92 | },
93 | {
94 | "cell_type": "markdown",
95 | "metadata": {},
96 | "source": [
97 | "The following end-code annotation tells ```nuclio``` to stop parsing the notebook from this cell. _**Please do not remove this cell**_:"
98 | ]
99 | },
100 | {
101 | "cell_type": "code",
102 | "execution_count": 4,
103 | "metadata": {},
104 | "outputs": [],
105 | "source": [
106 | "# nuclio: end-code\n",
107 | "# marks the end of a code section"
108 | ]
109 | },
110 | {
111 | "cell_type": "markdown",
112 | "metadata": {},
113 | "source": [
114 | "## Create a project to host our functions, jobs and artifacts\n",
115 | "\n",
116 | "Projects are used to package multiple functions, workflows, and artifacts. We usually store project code and definitions in a Git archive.\n",
117 | "\n",
118 | "The following code creates a new project in a local dir and initialize git tracking on that"
119 | ]
120 | },
121 | {
122 | "cell_type": "code",
123 | "execution_count": 5,
124 | "metadata": {},
125 | "outputs": [],
126 | "source": [
127 | "from os import path\n",
128 | "from mlrun import run_local, NewTask, mlconf, import_function, mount_v3io\n",
129 | "mlconf.dbpath = mlconf.dbpath or 'http://mlrun-api:8080'\n",
130 | "\n",
131 | "# specify artifacts target location\n",
132 | "artifact_path = mlconf.artifact_path or path.abspath('./')\n",
133 | "project_name = 'gitops-project'"
134 | ]
135 | },
136 | {
137 | "cell_type": "code",
138 | "execution_count": 6,
139 | "metadata": {},
140 | "outputs": [],
141 | "source": [
142 | "from mlrun import new_project, code_to_function\n",
143 | "project_dir = './'\n",
144 | "skproj = new_project(project_name, project_dir)"
145 | ]
146 | },
147 | {
148 | "cell_type": "markdown",
149 | "metadata": {},
150 | "source": [
151 | "\n",
152 | "### Run/test the data generator function locally\n",
153 | "\n",
154 | "The functions above can be tested locally. Parameters, inputs, and outputs can be specified in the API or the `Task` object.
\n",
155 | "when using `run_local()` the function inputs and outputs are automatically recorded by MLRun experiment and data tracking DB.\n",
156 | "\n",
157 | "In each run we can specify the function, inputs, parameters/hyper-parameters, etc... For more details, see the [mlrun_basics notebook](mlrun_basics.ipynb)."
158 | ]
159 | },
160 | {
161 | "cell_type": "code",
162 | "execution_count": 7,
163 | "metadata": {},
164 | "outputs": [
165 | {
166 | "name": "stdout",
167 | "output_type": "stream",
168 | "text": [
169 | "> 2020-07-29 10:38:35,433 [info] starting run iris_gen uid=3e340d3561ca402c91e9bb09b1631dd4 -> http://mlrun-api:8080\n",
170 | "> 2020-07-29 10:38:35,518 [info] saving iris dataframe to /User/demo-github-actions/data\n"
171 | ]
172 | },
173 | {
174 | "data": {
175 | "text/html": [
176 | "\n",
310 | "\n",
311 | "
\n",
312 | "\n",
325 | "
\n",
326 | " \n",
327 | " \n",
328 | " project | \n",
329 | " uid | \n",
330 | " iter | \n",
331 | " start | \n",
332 | " state | \n",
333 | " name | \n",
334 | " labels | \n",
335 | " inputs | \n",
336 | " parameters | \n",
337 | " results | \n",
338 | " artifacts | \n",
339 | "
\n",
340 | " \n",
341 | " \n",
342 | " \n",
343 | " gitops-project | \n",
344 | " | \n",
345 | " 0 | \n",
346 | " Jul 29 10:38:35 | \n",
347 | " completed | \n",
348 | " iris_gen | \n",
349 | " v3io_user=admin kind=handler owner=admin host=jupyter-58d8fdb6fc-nmqbq | \n",
350 | " | \n",
351 | " | \n",
352 | " | \n",
353 | " iris_dataset | \n",
354 | "
\n",
355 | " \n",
356 | "
\n",
357 | "
\n",
358 | "
\n",
359 | " \n",
363 | " \n",
364 | "
\n",
365 | "
\n"
366 | ],
367 | "text/plain": [
368 | ""
369 | ]
370 | },
371 | "metadata": {},
372 | "output_type": "display_data"
373 | },
374 | {
375 | "name": "stdout",
376 | "output_type": "stream",
377 | "text": [
378 | "to track results use .show() or .logs() or in CLI: \n",
379 | "!mlrun get run 3e340d3561ca402c91e9bb09b1631dd4 --project gitops-project , !mlrun logs 3e340d3561ca402c91e9bb09b1631dd4 --project gitops-project\n",
380 | "> 2020-07-29 10:38:35,641 [info] run executed, status=completed\n"
381 | ]
382 | }
383 | ],
384 | "source": [
385 | "# run the function locally\n",
386 | "gen = run_local(name='iris_gen', handler=iris_generator, \n",
387 | " project=project_name, artifact_path=path.join(artifact_path, 'data')) "
388 | ]
389 | },
390 | {
391 | "cell_type": "markdown",
392 | "metadata": {},
393 | "source": [
394 | "#### Convert our local code to a distributed serverless function object "
395 | ]
396 | },
397 | {
398 | "cell_type": "code",
399 | "execution_count": 8,
400 | "metadata": {},
401 | "outputs": [
402 | {
403 | "data": {
404 | "text/plain": [
405 | ""
406 | ]
407 | },
408 | "execution_count": 8,
409 | "metadata": {},
410 | "output_type": "execute_result"
411 | }
412 | ],
413 | "source": [
414 | "gen_func = code_to_function(name='gen_iris', kind='job')\n",
415 | "skproj.set_function(gen_func)"
416 | ]
417 | },
418 | {
419 | "cell_type": "code",
420 | "execution_count": null,
421 | "metadata": {},
422 | "outputs": [],
423 | "source": []
424 | },
425 | {
426 | "cell_type": "markdown",
427 | "metadata": {},
428 | "source": [
429 | "## Analyze the dataset features (useing marketplace function)\n",
430 | "load dataset analysis function (`describe`) from the function hub (marketplace), and print its doc."
431 | ]
432 | },
433 | {
434 | "cell_type": "code",
435 | "execution_count": 15,
436 | "metadata": {},
437 | "outputs": [
438 | {
439 | "name": "stdout",
440 | "output_type": "stream",
441 | "text": [
442 | "function: describe\n",
443 | "describe and visualizes dataset stats\n",
444 | "default handler: summarize\n",
445 | "entry points:\n",
446 | " summarize: Summarize a table\n",
447 | " context(MLClientCtx) - the function context, default=\n",
448 | " table(DataItem) - MLRun input pointing to pandas dataframe (csv/parquet file path), default=\n",
449 | " label_column(str) - ground truth column label, default=None\n",
450 | " class_labels(List[str]) - label for each class in tables and plots, default=[]\n",
451 | " plot_hist(bool) - (True) set this to False for large tables, default=True\n",
452 | " plots_dest(str) - destination folder of summary plots (relative to artifact_path), default=plots\n",
453 | " update_dataset - when the table is a registered dataset update the charts in-place, default=False\n"
454 | ]
455 | }
456 | ],
457 | "source": [
458 | "skproj.set_function('hub://describe', 'describe')\n",
459 | "skproj.func('describe').doc()"
460 | ]
461 | },
462 | {
463 | "cell_type": "markdown",
464 | "metadata": {},
465 | "source": [
466 | "### Run the describe function on our dataset (as a Kubernetes job)\n",
467 | " using shared file system mount (`mount_v3io`) with our notebook."
468 | ]
469 | },
470 | {
471 | "cell_type": "code",
472 | "execution_count": 16,
473 | "metadata": {},
474 | "outputs": [
475 | {
476 | "name": "stdout",
477 | "output_type": "stream",
478 | "text": [
479 | "> 2020-07-29 12:46:52,341 [info] starting run describe-summarize uid=301ab10adbf34adb898f0751c7f0f0b4 -> http://mlrun-api:8080\n",
480 | "> 2020-07-29 12:46:52,497 [info] Job is running in the background, pod: describe-summarize-r9tvz\n",
481 | "> 2020-07-29 12:47:01,761 [info] run executed, status=completed\n",
482 | "final state: succeeded\n"
483 | ]
484 | },
485 | {
486 | "data": {
487 | "text/html": [
488 | "\n",
622 | "\n",
623 | "
\n",
624 | "\n",
637 | "
\n",
638 | " \n",
639 | " \n",
640 | " project | \n",
641 | " uid | \n",
642 | " iter | \n",
643 | " start | \n",
644 | " state | \n",
645 | " name | \n",
646 | " labels | \n",
647 | " inputs | \n",
648 | " parameters | \n",
649 | " results | \n",
650 | " artifacts | \n",
651 | "
\n",
652 | " \n",
653 | " \n",
654 | " \n",
655 | " gitops-project | \n",
656 | " | \n",
657 | " 0 | \n",
658 | " Jul 29 12:46:57 | \n",
659 | " completed | \n",
660 | " describe-summarize | \n",
661 | " v3io_user=admin kind=job owner=admin host=describe-summarize-r9tvz | \n",
662 | " table | \n",
663 | " label_column=label | \n",
664 | " | \n",
665 | " histograms violin imbalance imbalance-weights-vec correlation-matrix correlation | \n",
666 | "
\n",
667 | " \n",
668 | "
\n",
669 | "
\n",
670 | "
\n",
671 | " \n",
675 | " \n",
676 | "
\n",
677 | "
\n"
678 | ],
679 | "text/plain": [
680 | ""
681 | ]
682 | },
683 | "metadata": {},
684 | "output_type": "display_data"
685 | },
686 | {
687 | "name": "stdout",
688 | "output_type": "stream",
689 | "text": [
690 | "to track results use .show() or .logs() or in CLI: \n",
691 | "!mlrun get run 301ab10adbf34adb898f0751c7f0f0b4 --project gitops-project , !mlrun logs 301ab10adbf34adb898f0751c7f0f0b4 --project gitops-project\n",
692 | "> 2020-07-29 12:47:11,671 [info] run executed, status=completed\n"
693 | ]
694 | },
695 | {
696 | "data": {
697 | "text/plain": [
698 | ""
699 | ]
700 | },
701 | "execution_count": 16,
702 | "metadata": {},
703 | "output_type": "execute_result"
704 | }
705 | ],
706 | "source": [
707 | "skproj.func('describe').apply(mount_v3io()).run(params={'label_column': 'label'}, \n",
708 | " inputs={\"table\": gen.outputs['iris_dataset']}, \n",
709 | " artifact_path=artifact_path)"
710 | ]
711 | },
712 | {
713 | "cell_type": "markdown",
714 | "metadata": {},
715 | "source": [
716 | "## Create a Fully Automated ML Pipeline\n",
717 | "\n",
718 | "#### Add more functions to our project to be used in our pipeline (from the functions hub/marketplace)\n",
719 | "\n",
720 | "AutoML training (classifier), Model validation (test_classifier), Real-time model server, and Model REST API Tester"
721 | ]
722 | },
723 | {
724 | "cell_type": "code",
725 | "execution_count": 9,
726 | "metadata": {},
727 | "outputs": [
728 | {
729 | "data": {
730 | "text/plain": [
731 | ""
732 | ]
733 | },
734 | "execution_count": 9,
735 | "metadata": {},
736 | "output_type": "execute_result"
737 | }
738 | ],
739 | "source": [
740 | "skproj.set_function('hub://sklearn_classifier', 'train')\n",
741 | "skproj.set_function('hub://test_classifier', 'test')\n",
742 | "skproj.set_function('hub://model_server', 'serving')\n",
743 | "skproj.set_function('hub://model_server_tester', 'live_tester')\n",
744 | "#print(skproj.to_yaml())"
745 | ]
746 | },
747 | {
748 | "cell_type": "markdown",
749 | "metadata": {},
750 | "source": [
751 | "#### Define and save a pipeline \n",
752 | "\n",
753 | "The following workflow definition will be written into a file, it describes a Kubeflow execution graph (DAG)
\n",
754 | "and how functions and data are connected to form an end to end pipeline. \n",
755 | "\n",
756 | "* Build the iris generator (ingest) function container \n",
757 | "* Ingest the iris data\n",
758 | "* Analyze the dataset (describe)\n",
759 | "* Train and test the model\n",
760 | "* Deploy the model as a real-time serverless function\n",
761 | "* Test the serverless function REST API with test dataset\n",
762 | "\n",
763 | "Check the code below to see how functions objects are initialized and used (by name) inside the workflow.
\n",
764 | "The `workflow.py` file has two parts, initialize the function objects and define pipeline dsl (connect the function inputs and outputs).\n",
765 | "\n",
766 | "> Note: the pipeline can include CI steps like building container images and deploying models as illustrated in the following example.\n"
767 | ]
768 | },
769 | {
770 | "cell_type": "code",
771 | "execution_count": 17,
772 | "metadata": {},
773 | "outputs": [
774 | {
775 | "name": "stdout",
776 | "output_type": "stream",
777 | "text": [
778 | "Overwriting ./workflow.py\n"
779 | ]
780 | }
781 | ],
782 | "source": [
783 | "%%writefile ./workflow.py\n",
784 | "from kfp import dsl\n",
785 | "from mlrun import mount_v3io, NewTask\n",
786 | "\n",
787 | "\n",
788 | "funcs = {}\n",
789 | "this_project = None\n",
790 | "DATASET = 'iris_dataset'\n",
791 | "LABELS = \"label\"\n",
792 | "\n",
793 | "# init functions is used to configure function resources and local settings\n",
794 | "def init_functions(functions: dict, project=None, secrets=None):\n",
795 | " for f in functions.values():\n",
796 | " f.apply(mount_v3io())\n",
797 | " \n",
798 | " # uncomment this line to collect the inference results into a stream\n",
799 | " # and specify a path in V3IO (/)\n",
800 | " #functions['serving'].set_env('INFERENCE_STREAM', 'users/admin/model_stream')\n",
801 | "\n",
802 | " \n",
803 | "@dsl.pipeline(\n",
804 | " name=\"Demo training pipeline\",\n",
805 | " description=\"Shows how to use mlrun.\"\n",
806 | ")\n",
807 | "def kfpipeline():\n",
808 | " \n",
809 | " # run the ingestion function with the new image and params\n",
810 | " ingest = funcs['gen-iris'].as_step(\n",
811 | " name=\"get-data\",\n",
812 | " handler='iris_generator',\n",
813 | " params={'format': 'pq'},\n",
814 | " outputs=[DATASET])\n",
815 | "\n",
816 | " # analyze our dataset\n",
817 | " describe = funcs[\"describe\"].as_step(\n",
818 | " name=\"summary\",\n",
819 | " params={\"label_column\": LABELS},\n",
820 | " inputs={\"table\": ingest.outputs[DATASET]})\n",
821 | " \n",
822 | " # train with hyper-paremeters\n",
823 | " train = funcs[\"train\"].as_step(\n",
824 | " name=\"train\",\n",
825 | " params={\"sample\" : -1,\n",
826 | " \"label_column\" : LABELS,\n",
827 | " \"test_size\" : 0.10},\n",
828 | " hyperparams={'model_pkg_class': [\"sklearn.ensemble.RandomForestClassifier\",\n",
829 | " \"sklearn.linear_model.LogisticRegression\",\n",
830 | " \"sklearn.ensemble.AdaBoostClassifier\"]},\n",
831 | " selector='max.accuracy',\n",
832 | " inputs={\"dataset\" : ingest.outputs[DATASET]},\n",
833 | " labels={\"commit\": this_project.params.get('commit', '')},\n",
834 | " outputs=['model', 'test_set'])\n",
835 | "\n",
836 | " # test and visualize our model\n",
837 | " test = funcs[\"test\"].as_step(\n",
838 | " name=\"test\",\n",
839 | " params={\"label_column\": LABELS},\n",
840 | " inputs={\"models_path\" : train.outputs['model'],\n",
841 | " \"test_set\" : train.outputs['test_set']})\n",
842 | "\n",
843 | " # deploy our model as a serverless function\n",
844 | " deploy = funcs[\"serving\"].deploy_step(models={f\"{DATASET}_v1\": train.outputs['model']},\n",
845 | " tag=this_project.params.get('commit', 'v1'))\n",
846 | "\n",
847 | " # test out new model server (via REST API calls)\n",
848 | " tester = funcs[\"live_tester\"].as_step(name='model-tester',\n",
849 | " params={'addr': deploy.outputs['endpoint'], 'model': f\"{DATASET}_v1\"},\n",
850 | " inputs={'table': train.outputs['test_set']})\n"
851 | ]
852 | },
853 | {
854 | "cell_type": "code",
855 | "execution_count": 18,
856 | "metadata": {},
857 | "outputs": [],
858 | "source": [
859 | "# register the workflow file as \"main\", embed the workflow code into the project YAML\n",
860 | "skproj.set_workflow('main', 'workflow.py')"
861 | ]
862 | },
863 | {
864 | "cell_type": "markdown",
865 | "metadata": {},
866 | "source": [
867 | "Save the project definitions to a file (project.yaml), it is recommended to commit all changes to a Git repo."
868 | ]
869 | },
870 | {
871 | "cell_type": "code",
872 | "execution_count": 22,
873 | "metadata": {},
874 | "outputs": [],
875 | "source": [
876 | "skproj.artifact_path = 'v3io:///users/{{run.user}}/pipe/{{workflow.uid}}'\n",
877 | "skproj.save()"
878 | ]
879 | },
880 | {
881 | "cell_type": "markdown",
882 | "metadata": {},
883 | "source": [
884 | "\n",
885 | "## Run a pipeline workflow manually (not via git PR)\n",
886 | "\n",
887 | "This section is not used for the git automation, rather demo how to run the workflow from the notebook\n",
888 | "\n",
889 | "use the `run` method to execute a workflow, you can provide alternative arguments and specify the default target for workflow artifacts.
\n",
890 | "The workflow ID is returned and can be used to track the progress or you can use the hyperlinks\n",
891 | "\n",
892 | "> Note: The same command can be issued through CLI commands:
\n",
893 | " `mlrun project my-proj/ -r main -p \"v3io:///users/{{run.user}}/mlrun/kfp/{{workflow.uid}}/\"`\n",
894 | "\n",
895 | "The `dirty` flag allow us to run a project with uncommited changes (when the notebook is in the same git dir it will always be dirty)
\n",
896 | "The `watch` flag will wait for the pipeline to complete"
897 | ]
898 | },
899 | {
900 | "cell_type": "code",
901 | "execution_count": 23,
902 | "metadata": {},
903 | "outputs": [],
904 | "source": [
905 | "# If you want to get slack notification after the run with result summary, set the env var below\n",
906 | "# %env SLACK_WEBHOOK="
907 | ]
908 | },
909 | {
910 | "cell_type": "code",
911 | "execution_count": 24,
912 | "metadata": {},
913 | "outputs": [
914 | {
915 | "data": {
916 | "text/html": [
917 | "Experiment link here"
918 | ],
919 | "text/plain": [
920 | ""
921 | ]
922 | },
923 | "metadata": {},
924 | "output_type": "display_data"
925 | },
926 | {
927 | "data": {
928 | "text/html": [
929 | "Run link here"
930 | ],
931 | "text/plain": [
932 | ""
933 | ]
934 | },
935 | "metadata": {},
936 | "output_type": "display_data"
937 | },
938 | {
939 | "name": "stdout",
940 | "output_type": "stream",
941 | "text": [
942 | "> 2020-07-29 13:04:18,155 [info] Pipeline run id=8f462295-2154-428a-b861-4ec8be504832, check UI or DB for progress\n",
943 | "> 2020-07-29 13:04:18,156 [info] waiting for pipeline run completion\n"
944 | ]
945 | },
946 | {
947 | "data": {
948 | "text/html": [
949 | "Run Results
Workflow 8f462295-2154-428a-b861-4ec8be504832 finished, status=Succeeded
click the hyper links below to see detailed results
\n",
950 | " \n",
951 | " \n",
952 | " uid | \n",
953 | " start | \n",
954 | " state | \n",
955 | " name | \n",
956 | " results | \n",
957 | " artifacts | \n",
958 | "
\n",
959 | " \n",
960 | " \n",
961 | " \n",
962 | " | \n",
963 | " Jul 29 13:05:03 | \n",
964 | " completed | \n",
965 | " model-tester | \n",
966 | " total_tests=15 errors=0 match=14 avg_latency=11446 min_latency=11047 max_latency=12131 | \n",
967 | " latency | \n",
968 | "
\n",
969 | " \n",
970 | " | \n",
971 | " Jul 29 13:04:54 | \n",
972 | " completed | \n",
973 | " test | \n",
974 | " accuracy=0.9333333333333333 test-error=0.06666666666666667 auc-micro=0.9655555555555556 auc-weighted=0.9888888888888889 f1-score=0.9137254901960784 precision_score=0.8888888888888888 recall_score=0.9629629629629629 | \n",
975 | " confusion-matrix feature-importances precision-recall-multiclass roc-multiclass test_set_preds | \n",
976 | "
\n",
977 | " \n",
978 | " | \n",
979 | " Jul 29 13:04:37 | \n",
980 | " completed | \n",
981 | " summary | \n",
982 | " | \n",
983 | " histograms violin imbalance imbalance-weights-vec correlation-matrix correlation | \n",
984 | "
\n",
985 | " \n",
986 | " | \n",
987 | " Jul 29 13:04:36 | \n",
988 | " completed | \n",
989 | " train | \n",
990 | " best_iteration=1 accuracy=0.9705882352941176 test-error=0.029411764705882353 auc-micro=0.9969723183391004 auc-weighted=0.9949732620320856 f1-score=0.9679633867276888 precision_score=0.9666666666666667 recall_score=0.9722222222222222 | \n",
991 | " test_set confusion-matrix feature-importances precision-recall-multiclass roc-multiclass model iteration_results | \n",
992 | "
\n",
993 | " \n",
994 | " | \n",
995 | " Jul 29 13:04:26 | \n",
996 | " completed | \n",
997 | " get-data | \n",
998 | " | \n",
999 | " iris_dataset | \n",
1000 | "
\n",
1001 | " \n",
1002 | "
"
1003 | ],
1004 | "text/plain": [
1005 | ""
1006 | ]
1007 | },
1008 | "metadata": {},
1009 | "output_type": "display_data"
1010 | }
1011 | ],
1012 | "source": [
1013 | "run_id = skproj.run(\n",
1014 | " 'main', arguments={}, \n",
1015 | " dirty=True, watch=True)"
1016 | ]
1017 | },
1018 | {
1019 | "cell_type": "markdown",
1020 | "metadata": {},
1021 | "source": [
1022 | "**[back to top](#top)**"
1023 | ]
1024 | }
1025 | ],
1026 | "metadata": {
1027 | "kernelspec": {
1028 | "display_name": "Python 3",
1029 | "language": "python",
1030 | "name": "python3"
1031 | },
1032 | "language_info": {
1033 | "codemirror_mode": {
1034 | "name": "ipython",
1035 | "version": 3
1036 | },
1037 | "file_extension": ".py",
1038 | "mimetype": "text/x-python",
1039 | "name": "python",
1040 | "nbconvert_exporter": "python",
1041 | "pygments_lexer": "ipython3",
1042 | "version": "3.7.6"
1043 | }
1044 | },
1045 | "nbformat": 4,
1046 | "nbformat_minor": 4
1047 | }
1048 |
--------------------------------------------------------------------------------
/project.yaml:
--------------------------------------------------------------------------------
1 | name: gitops-project
2 | functions:
3 | - name: gen-iris
4 | spec:
5 | kind: job
6 | metadata:
7 | name: gen-iris
8 | tag: ''
9 | project: gitops-project
10 | spec:
11 | command: ''
12 | args: []
13 | image: mlrun/ml-models
14 | env: []
15 | default_handler: ''
16 | entry_points:
17 | iris_generator:
18 | name: iris_generator
19 | doc: ''
20 | parameters:
21 | - name: context
22 | default: ''
23 | - name: format
24 | default: csv
25 | outputs:
26 | - default: ''
27 | lineno: 11
28 | description: ''
29 | build:
30 | functionSourceCode: IyBHZW5lcmF0ZWQgYnkgbnVjbGlvLmV4cG9ydC5OdWNsaW9FeHBvcnRlcgoKaW1wb3J0IG9zCmZyb20gc2tsZWFybi5kYXRhc2V0cyBpbXBvcnQgbG9hZF9pcmlzCmZyb20gc2tsZWFybi5tb2RlbF9zZWxlY3Rpb24gaW1wb3J0IHRyYWluX3Rlc3Rfc3BsaXQKaW1wb3J0IG51bXB5IGFzIG5wCmZyb20gc2tsZWFybi5tZXRyaWNzIGltcG9ydCBhY2N1cmFjeV9zY29yZQpmcm9tIG1scnVuLmFydGlmYWN0cyBpbXBvcnQgVGFibGVBcnRpZmFjdCwgUGxvdEFydGlmYWN0CmltcG9ydCBwYW5kYXMgYXMgcGQKCmRlZiBpcmlzX2dlbmVyYXRvcihjb250ZXh0LCBmb3JtYXQ9J2NzdicpOgogICAgaXJpcyA9IGxvYWRfaXJpcygpCiAgICBpcmlzX2RhdGFzZXQgPSBwZC5EYXRhRnJhbWUoZGF0YT1pcmlzLmRhdGEsIGNvbHVtbnM9aXJpcy5mZWF0dXJlX25hbWVzKQogICAgaXJpc19sYWJlbHMgPSBwZC5EYXRhRnJhbWUoZGF0YT1pcmlzLnRhcmdldCwgY29sdW1ucz1bJ2xhYmVsJ10pCiAgICBpcmlzX2RhdGFzZXQgPSBwZC5jb25jYXQoW2lyaXNfZGF0YXNldCwgaXJpc19sYWJlbHNdLCBheGlzPTEpCiAgICAKICAgIGNvbnRleHQubG9nZ2VyLmluZm8oJ3NhdmluZyBpcmlzIGRhdGFmcmFtZSB0byB7fScuZm9ybWF0KGNvbnRleHQuYXJ0aWZhY3RfcGF0aCkpCiAgICBjb250ZXh0LmxvZ19kYXRhc2V0KCdpcmlzX2RhdGFzZXQnLCBkZj1pcmlzX2RhdGFzZXQsIGZvcm1hdD1mb3JtYXQsIGluZGV4PUZhbHNlKQoK
31 | commands: []
32 | code_origin: https://github.com/mlrun/demo-github-actions.git#0e717588b1354d3d60cd96ba5c352d71aace0552
33 | - url: hub://sklearn_classifier
34 | name: train
35 | - url: hub://test_classifier
36 | name: test
37 | - url: hub://model_server
38 | name: serving
39 | - url: hub://model_server_tester
40 | name: live_tester
41 | - url: hub://describe
42 | name: describe
43 | workflows:
44 | - name: main
45 | path: workflow.py
46 | artifacts: []
47 | artifact_path: v3io:///users/{{run.user}}/pipe/{{workflow.uid}}
48 |
--------------------------------------------------------------------------------
/workflow.py:
--------------------------------------------------------------------------------
1 | from kfp import dsl
2 | from mlrun import mount_v3io, NewTask
3 |
4 |
5 | funcs = {}
6 | this_project = None
7 | DATASET = 'iris_dataset'
8 | LABELS = "label"
9 |
10 | # init functions is used to configure function resources and local settings
11 | def init_functions(functions: dict, project=None, secrets=None):
12 | for f in functions.values():
13 | f.apply(mount_v3io())
14 |
15 | # uncomment this line to collect the inference results into a stream
16 | # and specify a path in V3IO (/)
17 | #functions['serving'].set_env('INFERENCE_STREAM', 'users/admin/model_stream')
18 |
19 |
20 | @dsl.pipeline(
21 | name="Demo training pipeline",
22 | description="Shows how to use mlrun."
23 | )
24 | def kfpipeline():
25 |
26 | # run the ingestion function with the new image and params
27 | ingest = funcs['gen-iris'].as_step(
28 | name="get-data",
29 | handler='iris_generator',
30 | params={'format': 'pq'},
31 | outputs=[DATASET])
32 |
33 | # analyze our dataset
34 | describe = funcs["describe"].as_step(
35 | name="summary",
36 | params={"label_column": LABELS},
37 | inputs={"table": ingest.outputs[DATASET]})
38 |
39 | # train with hyper-paremeters
40 | train = funcs["train"].as_step(
41 | name="train",
42 | params={"sample" : -1,
43 | "label_column" : LABELS,
44 | "test_size" : 0.10},
45 | hyperparams={'model_pkg_class': ["sklearn.ensemble.RandomForestClassifier",
46 | "sklearn.linear_model.LogisticRegression",
47 | "sklearn.ensemble.AdaBoostClassifier"]},
48 | selector='max.accuracy',
49 | inputs={"dataset" : ingest.outputs[DATASET]},
50 | labels={"commit": this_project.params.get('commit', '')},
51 | outputs=['model', 'test_set'])
52 |
53 | # test and visualize our model
54 | test = funcs["test"].as_step(
55 | name="test",
56 | params={"label_column": LABELS},
57 | inputs={"models_path" : train.outputs['model'],
58 | "test_set" : train.outputs['test_set']})
59 |
60 | # deploy our model as a serverless function
61 | deploy = funcs["serving"].deploy_step(models={f"{DATASET}_v1": train.outputs['model']},
62 | tag=this_project.params.get('commit', 'v1'))
63 |
64 | # test out new model server (via REST API calls)
65 | tester = funcs["live_tester"].as_step(name='model-tester',
66 | params={'addr': deploy.outputs['endpoint'], 'model': f"{DATASET}_v1"},
67 | inputs={'table': train.outputs['test_set']})
68 |
--------------------------------------------------------------------------------