├── .gitignore ├── LICENSE ├── OWNERS ├── README.md ├── VERSION ├── grafana.png ├── k8s_serving ├── ab_test_sklearn_tensorflow.json ├── ambassador-auth-service-config.yaml ├── ambassador-auth-service-setup.yaml ├── epsilon_greedy.json ├── epsilon_greedy_3way.json ├── serving_model.json ├── serving_r_model.json └── serving_sk_model.json ├── k8s_train ├── sklearn_training_job.yaml └── tfJob.json ├── models ├── r_mnist │ ├── runtime │ │ ├── Dockerfile │ │ ├── Makefile │ │ ├── install.R │ │ └── mnist.R │ └── train │ │ ├── Dockerfile │ │ ├── Makefile │ │ ├── get_data.sh │ │ ├── install.R │ │ ├── train.R │ │ └── train.sh ├── sk_mnist │ ├── runtime │ │ ├── Dockerfile │ │ ├── Makefile │ │ ├── SkMnist.py │ │ ├── contract.json │ │ └── requirements.txt │ └── train │ │ ├── Dockerfile │ │ ├── Makefile │ │ ├── create_model.py │ │ ├── requirements.txt │ │ └── train.sh └── tf_mnist │ ├── runtime │ ├── DeepMnist.py │ ├── Dockerfile │ ├── Makefile │ ├── contract.json │ └── requirements.txt │ └── train │ ├── Dockerfile │ ├── Makefile │ └── create_model.py ├── nfs.md ├── notebooks ├── MNIST_data │ ├── t10k-images-idx3-ubyte.gz │ ├── t10k-labels-idx1-ubyte.gz │ ├── train-images-idx3-ubyte.gz │ └── train-labels-idx1-ubyte.gz ├── Makefile ├── __init__.py ├── create-protos.sh ├── mnist.png ├── proto │ ├── __init__.py │ └── prediction.proto ├── requirements.txt ├── serving.ipynb ├── training.ipynb ├── utils.py └── visualizer.py ├── scripts ├── README.md ├── create_demo.sh ├── delete-demo.sh ├── env-example.sh ├── nfs-pvc.yaml ├── port-forwards.sh └── watch-mnist.sh └── workflows ├── serving-r-mnist-workflow.yaml ├── serving-sk-mnist-workflow.yaml ├── serving-tf-mnist-workflow.md ├── serving-tf-mnist-workflow.yaml ├── training-r-mnist-workflow.yaml ├── training-sk-mnist-workflow.yaml ├── training-tf-mnist-workflow.md └── training-tf-mnist-workflow.yaml /.gitignore: -------------------------------------------------------------------------------- 1 | # build 2 | /target/ 3 | /public 4 | cluster-manager/.m2/ 5 | 6 | .ipynb_checkpoints 7 | 8 | # eclipse 9 | .classpath 10 | .settings/ 11 | .project 12 | 13 | # Netbeans and IntelliJ files 14 | !.gitignore 15 | /nbproject 16 | /*.ipr 17 | /*.iws 18 | *.iml 19 | .idea 20 | 21 | /bin/ 22 | *~ 23 | *.pyc 24 | .m2 25 | \#* 26 | _*.yaml 27 | _*.json 28 | 29 | 30 | models/tf_mnist/runtime/build/ 31 | models/sk_mnist/runtime/build/ 32 | 33 | models/sk_mnist/train/mnist-original.mat 34 | notebooks/proto/prediction_pb2.py 35 | notebooks/proto/prediction_pb2_grpc.py 36 | notebooks/tensorflow 37 | scripts/kubeflow_src 38 | scripts/env.sh 39 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /OWNERS: -------------------------------------------------------------------------------- 1 | approvers: 2 | - cliveseldon 3 | - jinchihe 4 | - ryandawsonuk 5 | reviewers: 6 | - cliveseldon 7 | - jinchihe 8 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## :warning: **kubeflow/example-seldon is not maintained** 2 | 3 | This repository has been deprecated and [archived](https://github.com/kubeflow/community/issues/479) on Nov 30th, 2021. 4 | 5 | 6 | # Train and Deploy Machine Learning Models on Kubernetes with Kubeflow and Seldon-Core 7 | 8 | ![MNIST](notebooks/mnist.png "MNIST Digits") 9 | 10 | Using: 11 | 12 | * [kubeflow](https://github.com/kubeflow/kubeflow) 13 | * [seldon-core](https://github.com/SeldonIO/seldon-core) 14 | 15 | The example will be the MNIST handwritten digit classification task. We will train 3 different models to solve this task: 16 | 17 | * A TensorFlow neural network model. 18 | * A scikit-learn random forest model. 19 | * An R least squares model. 20 | 21 | We will then show various rolling deployments 22 | 23 | 1. Deploy the single Tensorflow model. 24 | 2. Do a rolling update to an AB test of the Tensorflow model and the sklearn model. 25 | 3. Do a rolling update to a Multi-armed Bandit over all 3 models to direct traffic in real time to the best model. 26 | 27 | 28 | In the follow we will: 29 | 30 | 1. [Install kubeflow and seldon-core on a kubernetes cluster](#setup) 31 | 1. [Train the models](#train-the-models) 32 | 1. [Serve the models](#serve-the-models) 33 | 34 | 35 | # Requirements 36 | 37 | * gcloud 38 | * kubectl 39 | * ksonnet 40 | * argo 41 | 42 | 43 | # Setup 44 | 45 | There is a consolidated script to create the demo which can be found [here](./scripts/README.md). For a step by step guide do the following: 46 | 47 | 1. [Install kubeflow on GKE](https://www.kubeflow.org/docs/started/getting-started-gke/). This should create kubeflow in a namespace ```kubeflow```. We suggest you use the command line install so you can easily modify your Ksonnet installation. Ensure you have the environment variables `KUBEFLOW_SRC` and `KFAPP` set. OAUTH is preferred as with basic auth [port-forwarding to ambassador is insufficient](https://github.com/kubeflow/kubeflow/issues/3213) 48 | 49 | 1. Install seldon. Go to your Ksonnet application folder setup in the previous step and run 50 | ``` 51 | cd ${KUBEFLOW_SRC}/${KFAPP}/ks_app 52 | 53 | ks pkg install kubeflow/seldon 54 | ks generate seldon seldon 55 | ks apply default -c seldon 56 | ``` 57 | 1. Install Helm 58 | ``` 59 | kubectl -n kube-system create sa tiller 60 | kubectl create clusterrolebinding tiller --clusterrole cluster-admin --serviceaccount=kube-system:tiller 61 | helm init --service-account tiller 62 | kubectl rollout status deploy/tiller-deploy -n kube-system 63 | ``` 64 | 1. Create an NFS disk and persistent volume claim called `nfs-1`. You can follow one guide on create an NFS volume using Google Filestore [here](https://cloud.google.com/community/tutorials/gke-filestore-dynamic-provisioning). A consolidated set of steps is shown [here](nfs.md) 65 | 1. Add Cluster Roles so Argo can start jobs successfully 66 | ``` 67 | kubectl create clusterrolebinding my-cluster-admin-binding --clusterrole=cluster-admin --user=$(gcloud info --format="value(config.account)") 68 | kubectl create clusterrolebinding default-admin2 --clusterrole=cluster-admin --serviceaccount=kubeflow:default 69 | ``` 70 | 1. Install Seldon Analytics Dashboard 71 | ``` 72 | helm install seldon-core-analytics --name seldon-core-analytics --set grafana_prom_admin_password=password --set persistence.enabled=false --repo https://storage.googleapis.com/seldon-charts --namespace kubeflow 73 | ``` 74 | 1. Port forward the dashboard when running 75 | ``` 76 | kubectl port-forward $(kubectl get pods -n kubeflow -l app=grafana-prom-server -o jsonpath='{.items[0].metadata.name}') -n kubeflow 3000:3000 77 | ``` 78 | 1. Visit http://localhost:3000/dashboard/db/prediction-analytics?refresh=5s&orgId=1 and login using "admin" and the password you set above when launching with helm. 79 | 80 | # MNIST models 81 | 82 | ## Tensorflow Model 83 | 84 | * [Python training code](models/tf_mnist/train/create_model.py) 85 | * [Python runtime prediction code](models/tf_mnist/runtime/DeepMnist.py) 86 | * [Dockerfile to wrap runtime prediction code to run under seldon-Core](models/tf_mnist/runtime/Dockerfile). 87 | 88 | ## SKLearn Model 89 | 90 | * [Python training code](models/sk_mnist/train/create_model.py) 91 | * [Python runtime prediction code](models/sk_mnist/runtime/SkMnist.py) 92 | * [Dockerfile to wrap runtime prediction code to run under seldon-Core](models/sk_mnist/runtime/Dockerfile). 93 | 94 | ## R Model 95 | 96 | * [R training code](models/r_mnist/train/train.R) 97 | * [R runtime prediction code](models/r_mnist/runtime/mnist.R) 98 | * [Dockerfile to wrap runtime prediction code to run under seldon-Core](models/r_mnist/runtime/Dockerfile). 99 | 100 | # Train the Models 101 | 102 | Follow the steps in [./notebooks/training.ipynb](./notebooks/training.ipynb) to: 103 | 104 | * Run Argo Jobs for each model to: 105 | * Creating training images and push to repo 106 | * Run training 107 | * Create runtime prediction images and push to repo 108 | * Deploy individual runtime model 109 | 110 | **To push to your own repo the Docker images you will need to setup your docker credentials as a Kubernetes secret containing a [config.json](https://www.projectatomic.io/blog/2016/03/docker-credentials-store/). To do this you can find your docker home (typically ~/.docker) and run `kubectl create secret generic docker-config --from-file=config.json=${DOCKERHOME}/config.json --type=kubernetes.io/config` to [create a secret](https://kubernetes.io/docs/tasks/configure-pod-container/pull-image-private-registry/#registry-secret-existing-credentials).** 111 | 112 | # Serve the Models 113 | 114 | Follow the steps in [./notebooks/serving.ipynb](./notebooks/serving.ipynb) to: 115 | 116 | 1. Deploy the single Tensorflow model. 117 | 2. Do a rolling update to an AB test of the Tensorflow model and the sklearn model. 118 | 3. Do a rolling update to a Multi-armed Bandit over all 3 models to direct traffic in real time to the best model. 119 | 120 | To ensure the notebook can run successfully install the python dependencies: 121 | 122 | ``` 123 | pip install -r notebooks/requirements.txt 124 | ``` 125 | 126 | If you have [installed the Seldon-Core analytics](#setup) you can view them on the grafana dashboard: 127 | 128 | ![Grafana](grafana.png "Grafana Dashboard") 129 | -------------------------------------------------------------------------------- /VERSION: -------------------------------------------------------------------------------- 1 | 0.1 -------------------------------------------------------------------------------- /grafana.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubeflow/example-seldon/d0e056aaa7ec44073a15337f1727006a7ad07ead/grafana.png -------------------------------------------------------------------------------- /k8s_serving/ab_test_sklearn_tensorflow.json: -------------------------------------------------------------------------------- 1 | { 2 | "apiVersion": "machinelearning.seldon.io/v1alpha2", 3 | "kind": "SeldonDeployment", 4 | "metadata": { 5 | "labels": { 6 | "app": "seldon" 7 | }, 8 | "name": "mnist-classifier" 9 | }, 10 | "spec": { 11 | "annotations": { 12 | "project_name": "kubeflow-seldon", 13 | "deployment_version": "v1", 14 | "seldon.io/rest-connection-timeout": "100" 15 | }, 16 | "name": "mnist-classifier", 17 | "predictors": [ 18 | { 19 | "componentSpecs": [{ 20 | "spec": { 21 | "containers": [ 22 | { 23 | "image": "seldonio/deepmnistclassifier_runtime:0.2", 24 | "name": "tf-model", 25 | "volumeMounts": [ 26 | { 27 | "mountPath": "/data", 28 | "name": "persistent-storage" 29 | } 30 | ] 31 | }, 32 | { 33 | "image": "seldonio/skmnistclassifier_runtime:0.2", 34 | "name": "sk-model", 35 | "volumeMounts": [ 36 | { 37 | "mountPath": "/data", 38 | "name": "persistent-storage" 39 | } 40 | ] 41 | } 42 | ], 43 | "volumes": [ 44 | { 45 | "name": "persistent-storage", 46 | "volumeSource" : { 47 | "persistentVolumeClaim": { 48 | "claimName": "nfs-1" 49 | } 50 | } 51 | } 52 | ] 53 | } 54 | }], 55 | "name": "mnist-classifier", 56 | "replicas": 1, 57 | "annotations": { 58 | "predictor_version": "v1" 59 | }, 60 | "graph": { 61 | "name": "random-ab-test", 62 | "implementation":"RANDOM_ABTEST", 63 | "parameters": [ 64 | { 65 | "name":"ratioA", 66 | "value":"0.5", 67 | "type":"FLOAT" 68 | } 69 | ], 70 | "children": [ 71 | { 72 | "name": "tf-model", 73 | "endpoint":{ 74 | "type":"REST" 75 | }, 76 | "type":"MODEL" 77 | }, 78 | { 79 | "name": "sk-model", 80 | "endpoint":{ 81 | "type":"REST" 82 | }, 83 | "type":"MODEL" 84 | } 85 | ] 86 | } 87 | } 88 | ] 89 | } 90 | } 91 | 92 | 93 | 94 | 95 | -------------------------------------------------------------------------------- /k8s_serving/ambassador-auth-service-config.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: v1 3 | kind: Service 4 | metadata: 5 | name: example-auth 6 | annotations: 7 | getambassador.io/config: | 8 | 9 | --- 10 | apiVersion: ambassador/v0 11 | kind: Module 12 | name: authentication 13 | config: 14 | auth_service: "example-auth:3000" 15 | path_prefix: "/extauth" 16 | spec: 17 | type: ClusterIP 18 | selector: 19 | app: example-auth 20 | ports: 21 | - port: 3000 22 | name: http-example-auth 23 | targetPort: http-api 24 | -------------------------------------------------------------------------------- /k8s_serving/ambassador-auth-service-setup.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: v1 3 | kind: Service 4 | metadata: 5 | name: example-auth 6 | spec: 7 | type: ClusterIP 8 | selector: 9 | app: example-auth 10 | ports: 11 | - port: 3000 12 | name: http-example-auth 13 | targetPort: http-api 14 | --- 15 | apiVersion: extensions/v1beta1 16 | kind: Deployment 17 | metadata: 18 | name: example-auth 19 | spec: 20 | replicas: 1 21 | strategy: 22 | type: RollingUpdate 23 | template: 24 | metadata: 25 | labels: 26 | app: example-auth 27 | spec: 28 | containers: 29 | - name: example-auth 30 | image: seldonio/ambassador-auth-service:1.1.1 31 | imagePullPolicy: IfNotPresent 32 | ports: 33 | - name: http-api 34 | containerPort: 3000 35 | resources: 36 | limits: 37 | cpu: "0.1" 38 | memory: 100Mi 39 | -------------------------------------------------------------------------------- /k8s_serving/epsilon_greedy.json: -------------------------------------------------------------------------------- 1 | { 2 | "apiVersion": "machinelearning.seldon.io/v1alpha2", 3 | "kind": "SeldonDeployment", 4 | "metadata": { 5 | "labels": { 6 | "app": "seldon" 7 | }, 8 | "name": "mnist-classifier" 9 | }, 10 | "spec": { 11 | "annotations": { 12 | "project_name": "kubeflow-seldon", 13 | "deployment_version": "v1" 14 | }, 15 | "name": "mnist-classifier", 16 | "predictors": [ 17 | { 18 | "componentSpecs": [{ 19 | "spec": { 20 | "containers": [ 21 | { 22 | "image": "seldonio/deepmnistclassifier_runtime:0.2", 23 | "name": "tf-model", 24 | "volumeMounts": [ 25 | { 26 | "mountPath": "/data", 27 | "name": "persistent-storage" 28 | } 29 | ] 30 | }, 31 | { 32 | "image": "seldonio/skmnistclassifier_runtime:0.2", 33 | "name": "sk-model", 34 | "volumeMounts": [ 35 | { 36 | "mountPath": "/data", 37 | "name": "persistent-storage" 38 | } 39 | ] 40 | }, 41 | { 42 | "image": "seldonio/mab_epsilon_greedy:1.1", 43 | "name": "eg-router" 44 | } 45 | ], 46 | "volumes": [ 47 | { 48 | "name": "persistent-storage", 49 | "volumeSource" : { 50 | "persistentVolumeClaim": { 51 | "claimName": "nfs-1" 52 | } 53 | } 54 | } 55 | ] 56 | } 57 | }], 58 | "name": "mnist-classifier", 59 | "replicas": 1, 60 | "annotations": { 61 | "predictor_version": "v1" 62 | }, 63 | "graph": { 64 | "name": "eg-router", 65 | "type":"ROUTER", 66 | "parameters": [ 67 | { 68 | "name": "n_branches", 69 | "value": "2", 70 | "type": "INT" 71 | }, 72 | { 73 | "name": "epsilon", 74 | "value": "0.1", 75 | "type": "FLOAT" 76 | }, 77 | { 78 | "name": "verbose", 79 | "value": "1", 80 | "type": "BOOL" 81 | } 82 | ], 83 | "children": [ 84 | { 85 | "name": "sk-model", 86 | "type": "MODEL", 87 | "endpoint":{ 88 | "type":"REST" 89 | } 90 | }, 91 | { 92 | "name": "tf-model", 93 | "type": "MODEL", 94 | "endpoint":{ 95 | "type":"REST" 96 | } 97 | } 98 | ] 99 | } 100 | } 101 | ] 102 | } 103 | } 104 | -------------------------------------------------------------------------------- /k8s_serving/epsilon_greedy_3way.json: -------------------------------------------------------------------------------- 1 | { 2 | "apiVersion": "machinelearning.seldon.io/v1alpha2", 3 | "kind": "SeldonDeployment", 4 | "metadata": { 5 | "labels": { 6 | "app": "seldon" 7 | }, 8 | "name": "mnist-classifier" 9 | }, 10 | "spec": { 11 | "annotations": { 12 | "project_name": "kubeflow-seldon", 13 | "deployment_version": "v1" 14 | }, 15 | "name": "mnist-classifier", 16 | "predictors": [ 17 | { 18 | "componentSpecs": [{ 19 | "spec": { 20 | "containers": [ 21 | { 22 | "image": "seldonio/deepmnistclassifier_runtime:0.2", 23 | "name": "tf-model", 24 | "volumeMounts": [ 25 | { 26 | "mountPath": "/data", 27 | "name": "persistent-storage" 28 | } 29 | ] 30 | }, 31 | { 32 | "image": "seldonio/skmnistclassifier_runtime:0.2", 33 | "name": "sk-model", 34 | "volumeMounts": [ 35 | { 36 | "mountPath": "/data", 37 | "name": "persistent-storage" 38 | } 39 | ] 40 | }, 41 | { 42 | "image": "seldonio/rmnistclassifier_runtime:0.2", 43 | "name": "r-model", 44 | "volumeMounts": [ 45 | { 46 | "mountPath": "/data", 47 | "name": "persistent-storage" 48 | } 49 | ] 50 | }, 51 | { 52 | "image": "seldonio/mab_epsilon_greedy:1.1", 53 | "name": "eg-router" 54 | } 55 | ], 56 | "volumes": [ 57 | { 58 | "name": "persistent-storage", 59 | "volumeSource" : { 60 | "persistentVolumeClaim": { 61 | "claimName": "nfs-1" 62 | } 63 | } 64 | } 65 | ] 66 | } 67 | }], 68 | "name": "mnist-classifier", 69 | "replicas": 1, 70 | "annotations": { 71 | "predictor_version": "v1" 72 | }, 73 | "graph": { 74 | "name": "eg-router", 75 | "type":"ROUTER", 76 | "parameters": [ 77 | { 78 | "name": "n_branches", 79 | "value": "3", 80 | "type": "INT" 81 | }, 82 | { 83 | "name": "epsilon", 84 | "value": "0.2", 85 | "type": "FLOAT" 86 | }, 87 | { 88 | "name": "verbose", 89 | "value": "1", 90 | "type": "BOOL" 91 | } 92 | ], 93 | "children": [ 94 | { 95 | "name": "sk-model", 96 | "type": "MODEL", 97 | "endpoint":{ 98 | "type":"REST" 99 | } 100 | }, 101 | { 102 | "name": "tf-model", 103 | "type": "MODEL", 104 | "endpoint":{ 105 | "type":"REST" 106 | } 107 | }, 108 | { 109 | "name": "r-model", 110 | "type": "MODEL", 111 | "endpoint":{ 112 | "type":"REST" 113 | } 114 | } 115 | ] 116 | } 117 | } 118 | ] 119 | } 120 | } 121 | -------------------------------------------------------------------------------- /k8s_serving/serving_model.json: -------------------------------------------------------------------------------- 1 | { 2 | "apiVersion": "machinelearning.seldon.io/v1alpha2", 3 | "kind": "SeldonDeployment", 4 | "metadata": { 5 | "labels": { 6 | "app": "seldon" 7 | }, 8 | "name": "mnist-classifier" 9 | }, 10 | "spec": { 11 | "annotations": { 12 | "deployment_version": "v1", 13 | "project_name": "MNIST Example", 14 | "seldon.io/engine-separate-pod": "false", 15 | "seldon.io/rest-connection-timeout": "100" 16 | }, 17 | "name": "mnist-classifier", 18 | "predictors": [ 19 | { 20 | "annotations": { 21 | "predictor_version": "v1" 22 | }, 23 | "componentSpecs": [{ 24 | "spec": { 25 | "containers": [ 26 | { 27 | "image": "seldonio/deepmnistclassifier_runtime:0.2", 28 | "imagePullPolicy": "Always", 29 | "name": "tf-model", 30 | "volumeMounts": [ 31 | { 32 | "mountPath": "/data", 33 | "name": "persistent-storage" 34 | } 35 | ] 36 | } 37 | ], 38 | "terminationGracePeriodSeconds": 1, 39 | "volumes": [ 40 | { 41 | "name": "persistent-storage", 42 | "volumeSource" : { 43 | "persistentVolumeClaim": { 44 | "claimName": "nfs-1" 45 | } 46 | } 47 | } 48 | ] 49 | } 50 | }], 51 | "graph": { 52 | "children": [], 53 | "endpoint": { 54 | "type": "REST" 55 | }, 56 | "name": "tf-model", 57 | "type": "MODEL" 58 | }, 59 | "name": "mnist-classifier", 60 | "replicas": 1 61 | } 62 | ] 63 | } 64 | } 65 | -------------------------------------------------------------------------------- /k8s_serving/serving_r_model.json: -------------------------------------------------------------------------------- 1 | { 2 | "apiVersion": "machinelearning.seldon.io/v1alpha2", 3 | "kind": "SeldonDeployment", 4 | "metadata": { 5 | "labels": { 6 | "app": "seldon" 7 | }, 8 | "name": "mnist-classifier" 9 | }, 10 | "spec": { 11 | "annotations": { 12 | "deployment_version": "v1", 13 | "project_name": "MNIST Example" 14 | }, 15 | "name": "mnist-classifier", 16 | "predictors": [ 17 | { 18 | "annotations": { 19 | "predictor_version": "v1" 20 | }, 21 | "componentSpecs": [{ 22 | "spec": { 23 | "containers": [ 24 | { 25 | "image": "seldonio/rmnistclassifier_runtime:0.2", 26 | "imagePullPolicy": "Always", 27 | "name": "r-model", 28 | "volumeMounts": [ 29 | { 30 | "mountPath": "/data", 31 | "name": "persistent-storage" 32 | } 33 | ] 34 | } 35 | ], 36 | "terminationGracePeriodSeconds": 1, 37 | "volumes": [ 38 | { 39 | "name": "persistent-storage", 40 | "volumeSource" : { 41 | "persistentVolumeClaim": { 42 | "claimName": "nfs-1" 43 | } 44 | } 45 | } 46 | ] 47 | } 48 | }], 49 | "graph": { 50 | "children": [], 51 | "endpoint": { 52 | "type": "REST" 53 | }, 54 | "name": "r-model", 55 | "type": "MODEL" 56 | }, 57 | "name": "mnist-classifier", 58 | "replicas": 1 59 | } 60 | ] 61 | } 62 | } 63 | -------------------------------------------------------------------------------- /k8s_serving/serving_sk_model.json: -------------------------------------------------------------------------------- 1 | { 2 | "apiVersion": "machinelearning.seldon.io/v1alpha2", 3 | "kind": "SeldonDeployment", 4 | "metadata": { 5 | "labels": { 6 | "app": "seldon" 7 | }, 8 | "name": "mnist-classifier" 9 | }, 10 | "spec": { 11 | "annotations": { 12 | "deployment_version": "v1", 13 | "project_name": "MNIST Example" 14 | }, 15 | "name": "mnist-classifier", 16 | "predictors": [ 17 | { 18 | "annotations": { 19 | "predictor_version": "v1" 20 | }, 21 | "componentSpecs": [{ 22 | "spec": { 23 | "containers": [ 24 | { 25 | "image": "seldonio/skmnistclassifier_runtime:0.2", 26 | "imagePullPolicy": "Always", 27 | "name": "sk-model", 28 | "volumeMounts": [ 29 | { 30 | "mountPath": "/data", 31 | "name": "persistent-storage" 32 | } 33 | ] 34 | } 35 | ], 36 | "terminationGracePeriodSeconds": 1, 37 | "volumes": [ 38 | { 39 | "name": "persistent-storage", 40 | "volumeSource" : { 41 | "persistentVolumeClaim": { 42 | "claimName": "nfs-1" 43 | } 44 | } 45 | } 46 | ] 47 | } 48 | }], 49 | "graph": { 50 | "children": [], 51 | "endpoint": { 52 | "type": "REST" 53 | }, 54 | "name": "sk-model", 55 | "type": "MODEL" 56 | }, 57 | "name": "mnist-classifier", 58 | "replicas": 1 59 | } 60 | ] 61 | } 62 | } 63 | -------------------------------------------------------------------------------- /k8s_train/sklearn_training_job.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: "batch/v1" 2 | kind: "Job" 3 | metadata: 4 | name: "sk-train" 5 | namespace: "default" 6 | spec: 7 | template: 8 | metadata: 9 | name: "sk-train" 10 | spec: 11 | containers: 12 | - 13 | image: "seldonio/skmnistclassifier_trainer:0.1" 14 | name: "sk-train" 15 | volumeMounts: 16 | - 17 | mountPath: "/data" 18 | name: "persistent-storage" 19 | restartPolicy: "Never" 20 | volumes: 21 | - 22 | name: "persistent-storage" 23 | persistentVolumeClaim: 24 | claimName: "ml-data" 25 | -------------------------------------------------------------------------------- /k8s_train/tfJob.json: -------------------------------------------------------------------------------- 1 | { 2 | "apiVersion": "kubeflow.org/v1alpha1", 3 | "kind": "TFJob", 4 | "metadata": { 5 | "name": "mnist-train", 6 | "namespace": "kubeflow-seldon" 7 | }, 8 | "spec": { 9 | "replicaSpecs": [ 10 | { 11 | "replicas": 1, 12 | "template": { 13 | "spec": { 14 | "containers": [ 15 | { 16 | "image": "seldonio/deepmnistclassifier_trainer:0.1", 17 | "name": "tensorflow", 18 | "volumeMounts": [ 19 | { 20 | "mountPath": "/data", 21 | "name": "persistent-storage" 22 | } 23 | ] 24 | } 25 | ], 26 | "restartPolicy": "OnFailure", 27 | "volumes": [ 28 | { 29 | "name": "persistent-storage", 30 | "persistentVolumeClaim": { 31 | "claimName": "ml-data" 32 | } 33 | } 34 | ] 35 | } 36 | }, 37 | "tfReplicaType": "MASTER" 38 | } 39 | ] 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /models/r_mnist/runtime/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM rocker/r-apt:bionic 2 | 3 | RUN apt-get update && \ 4 | apt-get install -y -qq \ 5 | r-cran-plumber \ 6 | r-cran-jsonlite \ 7 | r-cran-optparse \ 8 | r-cran-stringr \ 9 | r-cran-urltools \ 10 | r-cran-caret \ 11 | r-cran-pls \ 12 | curl 13 | 14 | ENV MODEL_NAME mnist.R 15 | ENV API_TYPE REST 16 | ENV SERVICE_TYPE MODEL 17 | ENV PERSISTENCE 0 18 | 19 | RUN mkdir microservice 20 | COPY . /microservice 21 | WORKDIR /microservice 22 | 23 | RUN curl -OL https://raw.githubusercontent.com/SeldonIO/seldon-core/v0.2.7/wrappers/s2i/R/microservice.R > /microservice/microservice.R 24 | 25 | EXPOSE 5000 26 | 27 | CMD Rscript microservice.R --model $MODEL_NAME --api $API_TYPE --service $SERVICE_TYPE --persistence $PERSISTENCE -------------------------------------------------------------------------------- /models/r_mnist/runtime/Makefile: -------------------------------------------------------------------------------- 1 | 2 | seldon_build_image_local: 3 | docker build . -t seldonio/rmnistclassifier_runtime:0.2 4 | 5 | seldon_push_docker_hub: 6 | docker push seldonio/rmnistclassifier_runtime:0.2 7 | -------------------------------------------------------------------------------- /models/r_mnist/runtime/install.R: -------------------------------------------------------------------------------- 1 | install.packages('pls') 2 | 3 | -------------------------------------------------------------------------------- /models/r_mnist/runtime/mnist.R: -------------------------------------------------------------------------------- 1 | library(methods) 2 | 3 | predict.mnist <- function(mnist,newdata=list()) { 4 | cn <- 1:784 5 | for (i in seq_along(cn)){cn[i] <- paste("X",cn[i],sep = "")} 6 | colnames(newdata) <- cn 7 | predict(mnist$model, newdata = newdata, type='prob') 8 | } 9 | 10 | send_feedback.mnist <- function(mnist,request=list(),reward=1,truth=list()) { 11 | } 12 | 13 | new_mnist <- function(filename) { 14 | model <- readRDS(filename) 15 | structure(list(model=model), class = "mnist") 16 | } 17 | 18 | initialise_seldon <- function(params) { 19 | new_mnist("/data/model.Rds") 20 | } -------------------------------------------------------------------------------- /models/r_mnist/train/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM rocker/r-apt:bionic 2 | 3 | RUN apt-get update && \ 4 | apt-get install -y -qq \ 5 | r-cran-caret \ 6 | r-cran-pls \ 7 | r-cran-e1071 8 | 9 | RUN R -e 'install.packages("doParallel")' 10 | 11 | RUN mkdir training 12 | COPY /train.R /training/train.R 13 | COPY /get_data.sh /training/get_data.sh 14 | COPY ./train.sh /training/train.sh 15 | 16 | RUN cd /training && \ 17 | ./get_data.sh 18 | 19 | WORKDIR /training 20 | 21 | CMD ["/training/train.sh"] -------------------------------------------------------------------------------- /models/r_mnist/train/Makefile: -------------------------------------------------------------------------------- 1 | 2 | 3 | build_model: 4 | docker build --force-rm=true -t seldonio/rmnistclassifier_trainer:0.1 . 5 | 6 | push_image: 7 | docker push seldonio/rmnistclassifier_trainer:0.1 8 | 9 | -------------------------------------------------------------------------------- /models/r_mnist/train/get_data.sh: -------------------------------------------------------------------------------- 1 | wget http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz 2 | wget http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz 3 | wget http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz 4 | wget http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz 5 | 6 | 7 | gunzip train-images-idx3-ubyte.gz 8 | gunzip train-labels-idx1-ubyte.gz 9 | gunzip t10k-images-idx3-ubyte.gz 10 | gunzip t10k-labels-idx1-ubyte.gz 11 | -------------------------------------------------------------------------------- /models/r_mnist/train/install.R: -------------------------------------------------------------------------------- 1 | install.packages('caret') 2 | install.packages('doParallel') 3 | install.packages('pls') 4 | install.packages('e1071') 5 | 6 | -------------------------------------------------------------------------------- /models/r_mnist/train/train.R: -------------------------------------------------------------------------------- 1 | library(caret) 2 | library(doParallel) 3 | 4 | # Enable parallel processing. 5 | cl <- makeCluster(detectCores()) 6 | registerDoParallel(cl) 7 | 8 | # Load the MNIST digit recognition dataset into R 9 | # http://yann.lecun.com/exdb/mnist/ 10 | # assume you have all 4 files and gunzip'd them 11 | # creates train$n, train$x, train$y and test$n, test$x, test$y 12 | # e.g. train$x is a 60000 x 784 matrix, each row is one digit (28x28) 13 | # call: show_digit(train$x[5,]) to see a digit. 14 | # brendan o'connor - gist.github.com/39760 - anyall.org 15 | load_mnist <- function() { 16 | load_image_file <- function(filename) { 17 | ret = list() 18 | f = file(filename,'rb') 19 | readBin(f,'integer',n=1,size=4,endian='big') 20 | ret$n = readBin(f,'integer',n=1,size=4,endian='big') 21 | nrow = readBin(f,'integer',n=1,size=4,endian='big') 22 | ncol = readBin(f,'integer',n=1,size=4,endian='big') 23 | x = readBin(f,'integer',n=ret$n*nrow*ncol,size=1,signed=F) 24 | ret$x = matrix(x, ncol=nrow*ncol, byrow=T) 25 | close(f) 26 | ret 27 | } 28 | load_label_file <- function(filename) { 29 | f = file(filename,'rb') 30 | readBin(f,'integer',n=1,size=4,endian='big') 31 | n = readBin(f,'integer',n=1,size=4,endian='big') 32 | y = readBin(f,'integer',n=n,size=1,signed=F) 33 | close(f) 34 | y 35 | } 36 | train <<- load_image_file('train-images-idx3-ubyte') 37 | test <<- load_image_file('t10k-images-idx3-ubyte') 38 | 39 | train$y <<- load_label_file('train-labels-idx1-ubyte') 40 | test$y <<- load_label_file('t10k-labels-idx1-ubyte') 41 | } 42 | 43 | show_digit <- function(arr784, col=gray(12:1/12), ...) { 44 | image(matrix(arr784, nrow=28)[,28:1], col=col, ...) 45 | } 46 | 47 | train <- data.frame() 48 | test <- data.frame() 49 | 50 | # Load data. 51 | load_mnist() 52 | 53 | # Normalize: X = (X - min) / (max - min) => X = (X - 0) / (255 - 0) => X = X / 255. 54 | train$x <- train$x / 255 55 | 56 | # Setup training data with digit and pixel values with 60/40 split for train/cv. 57 | inTrain = data.frame(y=train$y, train$x) 58 | inTrain$y <- as.factor(inTrain$y) 59 | trainIndex = createDataPartition(inTrain$y, p = 0.60,list=FALSE) 60 | training = inTrain[trainIndex,] 61 | cv = inTrain[-trainIndex,] 62 | 63 | # SVM. 95/94. 64 | #fit <- train(y ~ ., data = head(training, 1000), method = 'svmRadial', tuneGrid = data.frame(sigma=0.0107249, C=1)) 65 | fit <- train(y ~ ., data = head(training, 1000), method = 'pls') 66 | results <- predict(fit, newdata = head(cv, 1000), type='prob') 67 | #confusionMatrix(results, head(cv$y, 1000)) 68 | saveRDS(fit, file = "/data/model.Rds", compress = TRUE) 69 | -------------------------------------------------------------------------------- /models/r_mnist/train/train.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # exit when any command fails 4 | set -e 5 | 6 | until mountpoint -q /data; do 7 | echo "$(date) - waiting for /data to be mounted..." 8 | sleep 1 9 | done 10 | 11 | ls -l /data 12 | 13 | Rscript train.R 14 | 15 | ls -l /data 16 | -------------------------------------------------------------------------------- /models/sk_mnist/runtime/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.7-slim 2 | COPY . /app 3 | WORKDIR /app 4 | RUN pip install -r requirements.txt 5 | EXPOSE 5000 6 | 7 | # Define environment variable 8 | ENV MODEL_NAME SkMnist 9 | ENV API_TYPE REST 10 | ENV SERVICE_TYPE MODEL 11 | ENV PERSISTENCE 0 12 | 13 | CMD exec seldon-core-microservice $MODEL_NAME $API_TYPE --service-type $SERVICE_TYPE --persistence $PERSISTENCE 14 | -------------------------------------------------------------------------------- /models/sk_mnist/runtime/Makefile: -------------------------------------------------------------------------------- 1 | 2 | seldon_build_image_local: 3 | docker build . -t seldonio/skmnistclassifier_runtime:0.2 4 | 5 | seldon_push_docker_hub: 6 | docker push seldonio/skmnistclassifier_runtime:0.2 7 | 8 | -------------------------------------------------------------------------------- /models/sk_mnist/runtime/SkMnist.py: -------------------------------------------------------------------------------- 1 | from sklearn.externals import joblib 2 | 3 | class SkMnist(object): 4 | def __init__(self): 5 | self.class_names = ["class:{}".format(str(i)) for i in range(10)] 6 | self.clf = joblib.load('/data/sk.pkl') 7 | 8 | def predict(self,X,feature_names): 9 | predictions = self.clf.predict_proba(X) 10 | return predictions 11 | 12 | 13 | -------------------------------------------------------------------------------- /models/sk_mnist/runtime/contract.json: -------------------------------------------------------------------------------- 1 | { 2 | "features":[ 3 | { 4 | "name":"x", 5 | "dtype":"FLOAT", 6 | "ftype":"continuous", 7 | "range":[0,1], 8 | "repeat":784 9 | } 10 | ], 11 | "targets":[ 12 | { 13 | "name":"class", 14 | "dtype":"FLOAT", 15 | "ftype":"continuous", 16 | "range":[0,1], 17 | "repeat":10 18 | } 19 | ] 20 | } 21 | 22 | 23 | -------------------------------------------------------------------------------- /models/sk_mnist/runtime/requirements.txt: -------------------------------------------------------------------------------- 1 | scipy>= 0.13.3 2 | scikit-learn>=0.18 3 | seldon-core>=0.2.5 -------------------------------------------------------------------------------- /models/sk_mnist/train/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.7-slim 2 | 3 | RUN apt-get update -y 4 | RUN apt-get install -y python-pip python-dev build-essential 5 | 6 | COPY /requirements.txt /tmp/ 7 | RUN cd /tmp && \ 8 | pip install --no-cache-dir -r requirements.txt 9 | 10 | RUN mkdir training 11 | COPY ./create_model.py /training/create_model.py 12 | COPY ./train.sh /training/train.sh 13 | WORKDIR /training 14 | 15 | CMD ["/training/train.sh"] 16 | -------------------------------------------------------------------------------- /models/sk_mnist/train/Makefile: -------------------------------------------------------------------------------- 1 | 2 | 3 | build_model: 4 | docker build --force-rm=true -t seldonio/skmnistclassifier_trainer:0.2 . 5 | 6 | push_image: 7 | docker push seldonio/skmnistclassifier_trainer:0.2 8 | 9 | -------------------------------------------------------------------------------- /models/sk_mnist/train/create_model.py: -------------------------------------------------------------------------------- 1 | from sklearn.ensemble import RandomForestClassifier 2 | from sklearn import datasets, metrics 3 | from sklearn.utils import shuffle 4 | from sklearn.datasets import fetch_mldata 5 | from sklearn.externals import joblib 6 | from six.moves import urllib 7 | 8 | if __name__ == '__main__': 9 | try: 10 | mnist = fetch_mldata('MNIST original') 11 | except: 12 | print("Could not download MNIST data from mldata.org, trying alternative...") 13 | 14 | # Alternative method to load MNIST, if mldata.org is down 15 | from scipy.io import loadmat 16 | mnist_alternative_url = "https://github.com/amplab/datascience-sp14/raw/master/lab7/mldata/mnist-original.mat" 17 | mnist_path = "./mnist-original.mat" 18 | response = urllib.request.urlopen(mnist_alternative_url) 19 | with open(mnist_path, "wb") as f: 20 | content = response.read() 21 | f.write(content) 22 | mnist_raw = loadmat(mnist_path) 23 | mnist = { 24 | "data": mnist_raw["data"].T, 25 | "target": mnist_raw["label"][0], 26 | "COL_NAMES": ["label", "data"], 27 | "DESCR": "mldata.org dataset: mnist-original", 28 | } 29 | print("Success!") 30 | 31 | #mnist = fetch_mldata('MNIST original', data_home="./mnist_sklearn") 32 | # To apply a classifier on this data, we need to flatten the image, to 33 | # turn the data in a (samples, feature) matrix: 34 | n_samples = len(mnist['data']) 35 | data = mnist['data'].reshape((n_samples, -1)) 36 | targets = mnist['target'] 37 | 38 | data,targets = shuffle(data,targets) 39 | classifier = RandomForestClassifier(n_estimators=30) 40 | 41 | # We learn the digits on the first half of the digits 42 | classifier.fit(data[:n_samples // 2], targets[:n_samples // 2]) 43 | 44 | # Now predict the value of the digit on the second half: 45 | expected = targets[n_samples // 2:] 46 | test_data = data[n_samples // 2:] 47 | 48 | print(classifier.score(test_data, expected)) 49 | 50 | predicted = classifier.predict(data[n_samples // 2:]) 51 | 52 | print("Classification report for classifier %s:\n%s\n" 53 | % (classifier, metrics.classification_report(expected, predicted))) 54 | print("Confusion matrix:\n%s" % metrics.confusion_matrix(expected, predicted)) 55 | 56 | joblib.dump(classifier, '/data/sk.pkl') 57 | 58 | 59 | -------------------------------------------------------------------------------- /models/sk_mnist/train/requirements.txt: -------------------------------------------------------------------------------- 1 | scipy 2 | scikit-learn>=0.18 3 | six 4 | -------------------------------------------------------------------------------- /models/sk_mnist/train/train.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # exit when any command fails 4 | set -e 5 | 6 | until mountpoint -q /data; do 7 | echo "$(date) - wainting for /data to be mounted..." 8 | sleep 1 9 | done 10 | 11 | ls -l /data 12 | 13 | python -u create_model.py 14 | 15 | ls -l /data 16 | -------------------------------------------------------------------------------- /models/tf_mnist/runtime/DeepMnist.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import logging 3 | logging.basicConfig(format='%(asctime)s.%(msecs)03d %(levelname)s {%(module)s} [%(funcName)s] %(message)s', datefmt='%Y-%m-%d,%H:%M:%S', level=logging.INFO) 4 | logger = logging.getLogger(__name__) 5 | 6 | class DeepMnist(object): 7 | def __init__(self): 8 | self.class_names = ["class:{}".format(str(i)) for i in range(10)] 9 | self.sess = tf.Session() 10 | saver = tf.train.import_meta_graph("/data/deep_mnist_model.meta") 11 | saver.restore(self.sess,tf.train.latest_checkpoint("/data/")) 12 | 13 | graph = tf.get_default_graph() 14 | self.x = graph.get_tensor_by_name("x:0") 15 | self.y = graph.get_tensor_by_name("y:0") 16 | 17 | def predict(self,X,feature_names): 18 | predictions = self.sess.run(self.y,feed_dict={self.x:X}) 19 | return predictions 20 | 21 | 22 | -------------------------------------------------------------------------------- /models/tf_mnist/runtime/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.7-slim 2 | COPY . /app 3 | WORKDIR /app 4 | RUN pip install -r requirements.txt 5 | EXPOSE 5000 6 | 7 | # Define environment variable 8 | ENV MODEL_NAME DeepMnist 9 | ENV API_TYPE REST 10 | ENV SERVICE_TYPE MODEL 11 | ENV PERSISTENCE 0 12 | 13 | CMD exec seldon-core-microservice $MODEL_NAME $API_TYPE --service-type $SERVICE_TYPE --persistence $PERSISTENCE -------------------------------------------------------------------------------- /models/tf_mnist/runtime/Makefile: -------------------------------------------------------------------------------- 1 | 2 | seldon_build_image_local: 3 | docker build . -t seldonio/deepmnistclassifier_runtime:0.2 4 | 5 | seldon_push_docker_hub: 6 | docker push seldonio/deepmnistclassifier_runtime:0.2 -------------------------------------------------------------------------------- /models/tf_mnist/runtime/contract.json: -------------------------------------------------------------------------------- 1 | { 2 | "features":[ 3 | { 4 | "name":"x", 5 | "dtype":"FLOAT", 6 | "ftype":"continuous", 7 | "range":[0,1], 8 | "repeat":784 9 | } 10 | ], 11 | "targets":[ 12 | { 13 | "name":"class", 14 | "dtype":"FLOAT", 15 | "ftype":"continuous", 16 | "range":[0,1], 17 | "repeat":10 18 | } 19 | ] 20 | } 21 | 22 | 23 | -------------------------------------------------------------------------------- /models/tf_mnist/runtime/requirements.txt: -------------------------------------------------------------------------------- 1 | tensorflow==1.13.1 2 | seldon-core>=0.2.5 -------------------------------------------------------------------------------- /models/tf_mnist/train/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM tensorflow/tensorflow:1.3.0 2 | 3 | RUN mkdir training 4 | COPY ./create_model.py /training/create_model.py 5 | WORKDIR /training 6 | 7 | CMD ["python","-u","create_model.py"] 8 | -------------------------------------------------------------------------------- /models/tf_mnist/train/Makefile: -------------------------------------------------------------------------------- 1 | 2 | 3 | build_model: 4 | docker build --force-rm=true -t seldonio/deepmnistclassifier_trainer:0.1 . 5 | 6 | push_image: 7 | docker push seldonio/deepmnistclassifier_trainer:0.1 8 | 9 | -------------------------------------------------------------------------------- /models/tf_mnist/train/create_model.py: -------------------------------------------------------------------------------- 1 | from tensorflow.examples.tutorials.mnist import input_data 2 | mnist = input_data.read_data_sets("MNIST_data/", one_hot = True) 3 | import tensorflow as tf 4 | 5 | if __name__ == '__main__': 6 | 7 | x = tf.placeholder(tf.float32, [None,784], name="x") 8 | 9 | W = tf.Variable(tf.zeros([784,10])) 10 | b = tf.Variable(tf.zeros([10])) 11 | 12 | y = tf.nn.softmax(tf.matmul(x,W) + b, name="y") 13 | 14 | y_ = tf.placeholder(tf.float32, [None, 10]) 15 | 16 | 17 | cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(y), reduction_indices=[1])) 18 | 19 | train_step = tf.train.GradientDescentOptimizer(0.5).minimize(cross_entropy) 20 | 21 | init = tf.initialize_all_variables() 22 | 23 | sess = tf.Session() 24 | sess.run(init) 25 | 26 | for i in range(1000): 27 | batch_xs, batch_ys = mnist.train.next_batch(100) 28 | sess.run(train_step, feed_dict={x: batch_xs, y_: batch_ys}) 29 | 30 | correct_prediction = tf.equal(tf.argmax(y,1), tf.argmax(y_,1)) 31 | accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) 32 | print(sess.run(accuracy, feed_dict = {x: mnist.test.images, y_:mnist.test.labels})) 33 | 34 | saver = tf.train.Saver() 35 | 36 | saver.save(sess, "/data/deep_mnist_model") 37 | -------------------------------------------------------------------------------- /nfs.md: -------------------------------------------------------------------------------- 1 | # Example NFS Setup 2 | 3 | The steps below are a consolidated set of steps following the guide [here](https://cloud.google.com/community/tutorials/gke-filestore-dynamic-provisioning). 4 | 5 | Set the following variables 6 | 7 | * `FS` : the name of your filestore 8 | * `PROJECT` : Your Google Project 9 | * `ZONE` : Your GCP Zone 10 | 11 | Create a Google Filestore and install the helm chart for nfs-client-provisioner to use it. 12 | ``` 13 | PROJECT=seldon-demos 14 | FS=mnist-data 15 | ZONE=europe-west1-b 16 | 17 | gcloud beta filestore instances create ${FS} --project=${PROJECT} --location=${ZONE} --tier=STANDARD --file-share=name="volumes",capacity=1TB --network=name="default",reserved-ip-range="10.0.0.0/29" 18 | 19 | FSADDR=$(gcloud beta filestore instances describe ${FS} --project=${PROJECT} --location=${ZONE} --format="value(networks.ipAddresses[0])") 20 | 21 | helm install stable/nfs-client-provisioner --name nfs-cp --set nfs.server=${FSADDR} --set nfs.path=/volumes 22 | kubectl rollout status deploy/nfs-cp-nfs-client-provisioner -n kubeflow 23 | ``` 24 | 25 | To create the NFS claim save the following and apply to your kubernetes cluster 26 | 27 | ``` 28 | apiVersion: v1 29 | kind: PersistentVolumeClaim 30 | metadata: 31 | name: nfs-1 32 | spec: 33 | accessModes: 34 | - ReadWriteMany 35 | storageClassName: nfs-client 36 | resources: 37 | requests: 38 | storage: 30Gi 39 | ``` 40 | -------------------------------------------------------------------------------- /notebooks/MNIST_data/t10k-images-idx3-ubyte.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubeflow/example-seldon/d0e056aaa7ec44073a15337f1727006a7ad07ead/notebooks/MNIST_data/t10k-images-idx3-ubyte.gz -------------------------------------------------------------------------------- /notebooks/MNIST_data/t10k-labels-idx1-ubyte.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubeflow/example-seldon/d0e056aaa7ec44073a15337f1727006a7ad07ead/notebooks/MNIST_data/t10k-labels-idx1-ubyte.gz -------------------------------------------------------------------------------- /notebooks/MNIST_data/train-images-idx3-ubyte.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubeflow/example-seldon/d0e056aaa7ec44073a15337f1727006a7ad07ead/notebooks/MNIST_data/train-images-idx3-ubyte.gz -------------------------------------------------------------------------------- /notebooks/MNIST_data/train-labels-idx1-ubyte.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubeflow/example-seldon/d0e056aaa7ec44073a15337f1727006a7ad07ead/notebooks/MNIST_data/train-labels-idx1-ubyte.gz -------------------------------------------------------------------------------- /notebooks/Makefile: -------------------------------------------------------------------------------- 1 | SHELL=/bin/bash 2 | 3 | tensorflow/core/framework/tensor.proto: 4 | ./create-protos.sh 5 | 6 | .PHONY: create_protos 7 | create_protos: tensorflow/core/framework/tensor.proto 8 | 9 | .PHONY: clean 10 | clean: 11 | @rm -rfv tensorflow 12 | -------------------------------------------------------------------------------- /notebooks/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubeflow/example-seldon/d0e056aaa7ec44073a15337f1727006a7ad07ead/notebooks/__init__.py -------------------------------------------------------------------------------- /notebooks/create-protos.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | release=${1:-"master"} 4 | 5 | echo Downloading proto files for ${release} 6 | 7 | base=https://raw.githubusercontent.com/tensorflow 8 | tensorflow_base=${base}/tensorflow/${release} 9 | 10 | base_folder=tensorflow/core/framework/ 11 | mkdir -p ${base_folder} 12 | 13 | curl -s ${tensorflow_base}/tensorflow/core/framework/types.proto > ${base_folder}/types.proto 14 | curl -s ${tensorflow_base}/tensorflow/core/framework/resource_handle.proto > ${base_folder}/resource_handle.proto 15 | curl -s ${tensorflow_base}/tensorflow/core/framework/tensor_shape.proto > ${base_folder}/tensor_shape.proto 16 | curl -s ${tensorflow_base}/tensorflow/core/framework/tensor.proto > ${base_folder}/tensor.proto 17 | 18 | -------------------------------------------------------------------------------- /notebooks/mnist.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubeflow/example-seldon/d0e056aaa7ec44073a15337f1727006a7ad07ead/notebooks/mnist.png -------------------------------------------------------------------------------- /notebooks/proto/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubeflow/example-seldon/d0e056aaa7ec44073a15337f1727006a7ad07ead/notebooks/proto/__init__.py -------------------------------------------------------------------------------- /notebooks/proto/prediction.proto: -------------------------------------------------------------------------------- 1 | syntax = "proto3"; 2 | 3 | import "google/protobuf/struct.proto"; 4 | import "tensorflow/core/framework/tensor.proto"; 5 | 6 | package seldon.protos; 7 | 8 | option java_package = "io.seldon.protos"; 9 | option java_outer_classname = "PredictionProtos"; 10 | option go_package = "github.com/seldonio/seldon-core/examples/wrappers/go/pkg/api"; 11 | 12 | // [START Messages] 13 | 14 | message SeldonMessage { 15 | 16 | Status status = 1; 17 | Meta meta = 2; 18 | oneof data_oneof { 19 | DefaultData data = 3; 20 | bytes binData = 4; 21 | string strData = 5; 22 | } 23 | } 24 | 25 | message DefaultData { 26 | repeated string names = 1; 27 | oneof data_oneof { 28 | Tensor tensor = 2; 29 | google.protobuf.ListValue ndarray = 3; 30 | tensorflow.TensorProto tftensor = 4; 31 | } 32 | } 33 | 34 | message Tensor { 35 | repeated int32 shape = 1 [packed=true]; 36 | repeated double values = 2 [packed=true]; 37 | } 38 | 39 | message Meta { 40 | string puid = 1; 41 | map tags = 2; 42 | map routing = 3; 43 | map requestPath = 4; 44 | repeated Metric metrics = 5; 45 | } 46 | 47 | message Metric { 48 | enum MetricType { 49 | COUNTER = 0; 50 | GAUGE = 1; 51 | TIMER = 2; 52 | } 53 | string key = 1; 54 | MetricType type = 2; 55 | float value = 3; 56 | map tags = 4; 57 | } 58 | 59 | message SeldonMessageList { 60 | repeated SeldonMessage seldonMessages = 1; 61 | } 62 | 63 | message Status { 64 | 65 | enum StatusFlag { 66 | SUCCESS = 0; 67 | FAILURE = 1; 68 | } 69 | 70 | int32 code = 1; 71 | string info = 2; 72 | string reason = 3; 73 | StatusFlag status = 4; 74 | } 75 | 76 | message Feedback { 77 | SeldonMessage request = 1; 78 | SeldonMessage response = 2; 79 | float reward = 3; 80 | SeldonMessage truth = 4; 81 | } 82 | 83 | message RequestResponse { 84 | SeldonMessage request = 1; 85 | SeldonMessage response = 2; 86 | } 87 | 88 | // [END Messages] 89 | 90 | 91 | // [START Services] 92 | 93 | service Generic { 94 | rpc TransformInput(SeldonMessage) returns (SeldonMessage) {}; 95 | rpc TransformOutput(SeldonMessage) returns (SeldonMessage) {}; 96 | rpc Route(SeldonMessage) returns (SeldonMessage) {}; 97 | rpc Aggregate(SeldonMessageList) returns (SeldonMessage) {}; 98 | rpc SendFeedback(Feedback) returns (SeldonMessage) {}; 99 | } 100 | 101 | service Model { 102 | rpc Predict(SeldonMessage) returns (SeldonMessage) {}; 103 | rpc SendFeedback(Feedback) returns (SeldonMessage) {}; 104 | } 105 | 106 | service Router { 107 | rpc Route(SeldonMessage) returns (SeldonMessage) {}; 108 | rpc SendFeedback(Feedback) returns (SeldonMessage) {}; 109 | } 110 | 111 | service Transformer { 112 | rpc TransformInput(SeldonMessage) returns (SeldonMessage) {}; 113 | } 114 | 115 | service OutputTransformer { 116 | rpc TransformOutput(SeldonMessage) returns (SeldonMessage) {}; 117 | } 118 | 119 | service Combiner { 120 | rpc Aggregate(SeldonMessageList) returns (SeldonMessage) {}; 121 | } 122 | 123 | 124 | service Seldon { 125 | rpc Predict(SeldonMessage) returns (SeldonMessage) {}; 126 | rpc SendFeedback(Feedback) returns (SeldonMessage) {}; 127 | } 128 | 129 | // [END Services] -------------------------------------------------------------------------------- /notebooks/requirements.txt: -------------------------------------------------------------------------------- 1 | matplotlib==3.0.3 2 | grpcio==1.20.1 3 | grpcio-tools==1.20.1 4 | graphviz==0.10.1 5 | -------------------------------------------------------------------------------- /notebooks/serving.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Deploying Various MNIST Models on Kubernetes \n", 8 | "\n", 9 | "Using:\n", 10 | "\n", 11 | " * kubeflow\n", 12 | " * seldon-core\n", 13 | " \n", 14 | " \n", 15 | "Follow the main README to setup kubeflow and seldon-core. This notebook will show various rolling deployments of the trained models\n", 16 | "\n", 17 | " * Single model\n", 18 | " * AB Test between 2 models\n", 19 | " * Multi-Armed Bandit over 3 models\n", 20 | " \n", 21 | "### Dependencies\n", 22 | " \n", 23 | " * Tensorflow\n", 24 | " * grpcio package\n", 25 | " " 26 | ] 27 | }, 28 | { 29 | "cell_type": "markdown", 30 | "metadata": {}, 31 | "source": [ 32 | "# Setup\n", 33 | "\n", 34 | "Set kubectl to use the namespace where you installed kubeflow and seldon. In the README it is kubeflow." 35 | ] 36 | }, 37 | { 38 | "cell_type": "code", 39 | "execution_count": null, 40 | "metadata": {}, 41 | "outputs": [], 42 | "source": [ 43 | "!kubectl config set-context $(kubectl config current-context) --namespace=kubeflow" 44 | ] 45 | }, 46 | { 47 | "cell_type": "code", 48 | "execution_count": null, 49 | "metadata": {}, 50 | "outputs": [], 51 | "source": [ 52 | "!make create_protos" 53 | ] 54 | }, 55 | { 56 | "cell_type": "code", 57 | "execution_count": null, 58 | "metadata": {}, 59 | "outputs": [], 60 | "source": [ 61 | "!python -m grpc.tools.protoc -I. --python_out=. --grpc_python_out=. ./proto/prediction.proto" 62 | ] 63 | }, 64 | { 65 | "cell_type": "code", 66 | "execution_count": null, 67 | "metadata": {}, 68 | "outputs": [], 69 | "source": [ 70 | "%matplotlib inline\n", 71 | "import utils\n", 72 | "from visualizer import get_graph\n", 73 | "mnist = utils.download_mnist()" 74 | ] 75 | }, 76 | { 77 | "cell_type": "markdown", 78 | "metadata": {}, 79 | "source": [ 80 | "\n", 81 | "\n", 82 | "**Ensure you have port forwarded the ambassador reverse proxy**\n", 83 | "\n", 84 | "```bash\n", 85 | "kubectl port-forward $(kubectl get pods -n kubeflow -l service=ambassador -o jsonpath='{.items[0].metadata.name}') -n kubeflow 8002:80\n", 86 | "```" 87 | ] 88 | }, 89 | { 90 | "cell_type": "markdown", 91 | "metadata": {}, 92 | "source": [ 93 | "# Deploy Single Tensorflow Model" 94 | ] 95 | }, 96 | { 97 | "cell_type": "code", 98 | "execution_count": null, 99 | "metadata": {}, 100 | "outputs": [], 101 | "source": [ 102 | "get_graph(\"../k8s_serving/serving_model.json\",'r')" 103 | ] 104 | }, 105 | { 106 | "cell_type": "code", 107 | "execution_count": null, 108 | "metadata": {}, 109 | "outputs": [], 110 | "source": [ 111 | "!pygmentize ../k8s_serving/serving_model.json" 112 | ] 113 | }, 114 | { 115 | "cell_type": "code", 116 | "execution_count": null, 117 | "metadata": {}, 118 | "outputs": [], 119 | "source": [ 120 | "!kubectl apply -f ../k8s_serving/serving_model.json" 121 | ] 122 | }, 123 | { 124 | "cell_type": "code", 125 | "execution_count": null, 126 | "metadata": {}, 127 | "outputs": [], 128 | "source": [ 129 | "!kubectl get seldondeployments mnist-classifier -o jsonpath='{.status}'" 130 | ] 131 | }, 132 | { 133 | "cell_type": "code", 134 | "execution_count": null, 135 | "metadata": {}, 136 | "outputs": [], 137 | "source": [ 138 | "utils.predict_rest_mnist(mnist)" 139 | ] 140 | }, 141 | { 142 | "cell_type": "code", 143 | "execution_count": null, 144 | "metadata": {}, 145 | "outputs": [], 146 | "source": [ 147 | "utils.predict_grpc_mnist(mnist)" 148 | ] 149 | }, 150 | { 151 | "cell_type": "markdown", 152 | "metadata": {}, 153 | "source": [ 154 | "# Start load test" 155 | ] 156 | }, 157 | { 158 | "cell_type": "code", 159 | "execution_count": null, 160 | "metadata": {}, 161 | "outputs": [], 162 | "source": [ 163 | "!kubectl label nodes $(kubectl get nodes -o jsonpath='{.items[0].metadata.name}') role=locust" 164 | ] 165 | }, 166 | { 167 | "cell_type": "code", 168 | "execution_count": null, 169 | "metadata": {}, 170 | "outputs": [], 171 | "source": [ 172 | "!helm install seldon-core-loadtesting --name loadtest \\\n", 173 | " --namespace kubeflow \\\n", 174 | " --repo https://storage.googleapis.com/seldon-charts \\\n", 175 | " --set locust.script=mnist_rest_locust.py \\\n", 176 | " --set locust.host=http://mnist-classifier:8000 \\\n", 177 | " --set oauth.enabled=false \\\n", 178 | " --set oauth.key=oauth-key \\\n", 179 | " --set oauth.secret=oauth-secret \\\n", 180 | " --set locust.hatchRate=1 \\\n", 181 | " --set locust.clients=1 \\\n", 182 | " --set loadtest.sendFeedback=1 \\\n", 183 | " --set locust.minWait=0 \\\n", 184 | " --set locust.maxWait=0 \\\n", 185 | " --set replicaCount=1 \\\n", 186 | " --set data.size=784\n" 187 | ] 188 | }, 189 | { 190 | "cell_type": "markdown", 191 | "metadata": {}, 192 | "source": [ 193 | "# Rolling update to AB Test\n", 194 | " Run an AB Test between 2 models:\n", 195 | " * Tensorflow neural network model\n", 196 | " * Scikit-learn random forest.\n", 197 | " " 198 | ] 199 | }, 200 | { 201 | "cell_type": "code", 202 | "execution_count": null, 203 | "metadata": {}, 204 | "outputs": [], 205 | "source": [ 206 | "get_graph(\"../k8s_serving/ab_test_sklearn_tensorflow.json\",'r')" 207 | ] 208 | }, 209 | { 210 | "cell_type": "code", 211 | "execution_count": null, 212 | "metadata": {}, 213 | "outputs": [], 214 | "source": [ 215 | "!pygmentize ../k8s_serving/ab_test_sklearn_tensorflow.json" 216 | ] 217 | }, 218 | { 219 | "cell_type": "code", 220 | "execution_count": null, 221 | "metadata": {}, 222 | "outputs": [], 223 | "source": [ 224 | "!kubectl apply -f ../k8s_serving/ab_test_sklearn_tensorflow.json" 225 | ] 226 | }, 227 | { 228 | "cell_type": "code", 229 | "execution_count": null, 230 | "metadata": {}, 231 | "outputs": [], 232 | "source": [ 233 | "!kubectl get seldondeployments mnist-classifier -o jsonpath='{.status}'" 234 | ] 235 | }, 236 | { 237 | "cell_type": "code", 238 | "execution_count": null, 239 | "metadata": {}, 240 | "outputs": [], 241 | "source": [ 242 | "utils.predict_rest_mnist(mnist)" 243 | ] 244 | }, 245 | { 246 | "cell_type": "code", 247 | "execution_count": null, 248 | "metadata": {}, 249 | "outputs": [], 250 | "source": [ 251 | "utils.evaluate_abtest(mnist,100)" 252 | ] 253 | }, 254 | { 255 | "cell_type": "markdown", 256 | "metadata": {}, 257 | "source": [ 258 | "# Rolling Update to Multi-Armed Bandit\n", 259 | "Run a epsilon-greey multi-armed bandit over 3 models:\n", 260 | " * Tensorflow neural network model\n", 261 | " * Scikit-learn random forest model\n", 262 | " * R least-squares model\n", 263 | " " 264 | ] 265 | }, 266 | { 267 | "cell_type": "code", 268 | "execution_count": null, 269 | "metadata": {}, 270 | "outputs": [], 271 | "source": [ 272 | "get_graph(\"../k8s_serving/epsilon_greedy_3way.json\",'r')" 273 | ] 274 | }, 275 | { 276 | "cell_type": "code", 277 | "execution_count": null, 278 | "metadata": {}, 279 | "outputs": [], 280 | "source": [ 281 | "!pygmentize ../k8s_serving/epsilon_greedy_3way.json" 282 | ] 283 | }, 284 | { 285 | "cell_type": "code", 286 | "execution_count": null, 287 | "metadata": {}, 288 | "outputs": [], 289 | "source": [ 290 | "!kubectl apply -f ../k8s_serving/epsilon_greedy_3way.json" 291 | ] 292 | }, 293 | { 294 | "cell_type": "code", 295 | "execution_count": null, 296 | "metadata": {}, 297 | "outputs": [], 298 | "source": [ 299 | "!kubectl get seldondeployments mnist-classifier -o jsonpath='{.status}'" 300 | ] 301 | }, 302 | { 303 | "cell_type": "code", 304 | "execution_count": null, 305 | "metadata": {}, 306 | "outputs": [], 307 | "source": [ 308 | "utils.predict_rest_mnist(mnist)" 309 | ] 310 | }, 311 | { 312 | "cell_type": "code", 313 | "execution_count": null, 314 | "metadata": {}, 315 | "outputs": [], 316 | "source": [ 317 | "utils.evaluate_egreedy(mnist,100)" 318 | ] 319 | }, 320 | { 321 | "cell_type": "code", 322 | "execution_count": null, 323 | "metadata": {}, 324 | "outputs": [], 325 | "source": [] 326 | } 327 | ], 328 | "metadata": { 329 | "kernelspec": { 330 | "display_name": "Python 3", 331 | "language": "python", 332 | "name": "python3" 333 | }, 334 | "language_info": { 335 | "codemirror_mode": { 336 | "name": "ipython", 337 | "version": 3 338 | }, 339 | "file_extension": ".py", 340 | "mimetype": "text/x-python", 341 | "name": "python", 342 | "nbconvert_exporter": "python", 343 | "pygments_lexer": "ipython3", 344 | "version": "3.6.4" 345 | } 346 | }, 347 | "nbformat": 4, 348 | "nbformat_minor": 1 349 | } 350 | -------------------------------------------------------------------------------- /notebooks/training.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Train Various Models on MNIST using kubeflow and seldon-core\n", 8 | "\n", 9 | "Using:\n", 10 | "\n", 11 | " * kubeflow\n", 12 | " * seldon-core\n", 13 | " \n", 14 | "The example will be the MNIST handwriiten digit classification task.\n", 15 | "\n", 16 | "![MNIST](mnist.png \"MNIST Digits\")\n", 17 | "\n", 18 | "### Dependencies\n", 19 | "\n", 20 | " * Argo" 21 | ] 22 | }, 23 | { 24 | "cell_type": "markdown", 25 | "metadata": {}, 26 | "source": [ 27 | "# Setup\n" 28 | ] 29 | }, 30 | { 31 | "cell_type": "code", 32 | "execution_count": null, 33 | "metadata": {}, 34 | "outputs": [], 35 | "source": [ 36 | "!kubectl config set-context $(kubectl config current-context) --namespace=kubeflow" 37 | ] 38 | }, 39 | { 40 | "cell_type": "markdown", 41 | "metadata": {}, 42 | "source": [ 43 | "# Tensorflow Model\n", 44 | " A simple neural network in Tensorflow." 45 | ] 46 | }, 47 | { 48 | "cell_type": "markdown", 49 | "metadata": {}, 50 | "source": [ 51 | "### Training\n", 52 | " * Create image from source\n", 53 | " * Run training\n", 54 | " \n", 55 | "\n", 56 | "Run with:\n", 57 | " * ``` -p build-push-image=true``` to build image and push to repo, needed extra params:\n", 58 | " * ``` -p version=``` create `````` of model\n", 59 | " * ``` -p github-user=``` to download example-seldon source from `````` account\n", 60 | " * ``` -p github-revision=``` to use the github branch ``````\n", 61 | " * ``` -p docker-org=``` to use Docker repo `````` to push image to. Needs docker credentials in secret as described in README." 62 | ] 63 | }, 64 | { 65 | "cell_type": "code", 66 | "execution_count": null, 67 | "metadata": {}, 68 | "outputs": [], 69 | "source": [ 70 | "!pygmentize ../workflows/training-tf-mnist-workflow.yaml" 71 | ] 72 | }, 73 | { 74 | "cell_type": "code", 75 | "execution_count": null, 76 | "metadata": {}, 77 | "outputs": [], 78 | "source": [ 79 | "!argo submit ../workflows/training-tf-mnist-workflow.yaml -p tfjob-version-hack=1" 80 | ] 81 | }, 82 | { 83 | "cell_type": "code", 84 | "execution_count": null, 85 | "metadata": {}, 86 | "outputs": [], 87 | "source": [ 88 | "!argo list" 89 | ] 90 | }, 91 | { 92 | "cell_type": "markdown", 93 | "metadata": {}, 94 | "source": [ 95 | "### Runtime Image\n", 96 | "\n", 97 | "Run with:\n", 98 | " * ``` -p build-push-image=true``` to build image and push to repo, needed extra params:\n", 99 | " * ``` -p version=``` create `````` of model\n", 100 | " * ``` -p github-user=``` to download example-seldon source from `````` account\n", 101 | " * ``` -p github-revision=``` to use the github branch ``````\n", 102 | " * ``` -p docker-org=``` to use Docker user `````` to push image to. Needs docker credentials in secret as described in README.\n", 103 | " * ``` -p deploy-model=true``` to deploy model" 104 | ] 105 | }, 106 | { 107 | "cell_type": "code", 108 | "execution_count": null, 109 | "metadata": {}, 110 | "outputs": [], 111 | "source": [ 112 | "!pygmentize ../workflows/serving-tf-mnist-workflow.yaml" 113 | ] 114 | }, 115 | { 116 | "cell_type": "code", 117 | "execution_count": null, 118 | "metadata": {}, 119 | "outputs": [], 120 | "source": [ 121 | "!argo submit ../workflows/serving-tf-mnist-workflow.yaml" 122 | ] 123 | }, 124 | { 125 | "cell_type": "code", 126 | "execution_count": null, 127 | "metadata": {}, 128 | "outputs": [], 129 | "source": [ 130 | "!argo list" 131 | ] 132 | }, 133 | { 134 | "cell_type": "markdown", 135 | "metadata": {}, 136 | "source": [ 137 | "# Sklearn Model\n", 138 | "A Random forest in sklearn." 139 | ] 140 | }, 141 | { 142 | "cell_type": "markdown", 143 | "metadata": {}, 144 | "source": [ 145 | "### Training\n", 146 | "\n", 147 | " * For options see above Tensorflow example" 148 | ] 149 | }, 150 | { 151 | "cell_type": "code", 152 | "execution_count": null, 153 | "metadata": {}, 154 | "outputs": [], 155 | "source": [ 156 | "!pygmentize ../workflows/training-sk-mnist-workflow.yaml" 157 | ] 158 | }, 159 | { 160 | "cell_type": "code", 161 | "execution_count": null, 162 | "metadata": {}, 163 | "outputs": [], 164 | "source": [ 165 | "!argo submit ../workflows/training-sk-mnist-workflow.yaml" 166 | ] 167 | }, 168 | { 169 | "cell_type": "code", 170 | "execution_count": null, 171 | "metadata": {}, 172 | "outputs": [], 173 | "source": [ 174 | "!argo list" 175 | ] 176 | }, 177 | { 178 | "cell_type": "markdown", 179 | "metadata": {}, 180 | "source": [ 181 | "### Runtime Image\n", 182 | " * For options see above Tensorflow example" 183 | ] 184 | }, 185 | { 186 | "cell_type": "code", 187 | "execution_count": null, 188 | "metadata": {}, 189 | "outputs": [], 190 | "source": [ 191 | "!pygmentize ../workflows/serving-sk-mnist-workflow.yaml" 192 | ] 193 | }, 194 | { 195 | "cell_type": "code", 196 | "execution_count": null, 197 | "metadata": {}, 198 | "outputs": [], 199 | "source": [ 200 | "!argo submit ../workflows/serving-sk-mnist-workflow.yaml" 201 | ] 202 | }, 203 | { 204 | "cell_type": "code", 205 | "execution_count": null, 206 | "metadata": {}, 207 | "outputs": [], 208 | "source": [ 209 | "!argo list" 210 | ] 211 | }, 212 | { 213 | "cell_type": "markdown", 214 | "metadata": {}, 215 | "source": [ 216 | "# R Model\n", 217 | "A partial least squares model in R." 218 | ] 219 | }, 220 | { 221 | "cell_type": "markdown", 222 | "metadata": {}, 223 | "source": [ 224 | "### Training\n", 225 | "\n", 226 | " * For options see above Tensorflow example" 227 | ] 228 | }, 229 | { 230 | "cell_type": "code", 231 | "execution_count": null, 232 | "metadata": {}, 233 | "outputs": [], 234 | "source": [ 235 | "!pygmentize ../workflows/training-r-mnist-workflow.yaml" 236 | ] 237 | }, 238 | { 239 | "cell_type": "code", 240 | "execution_count": null, 241 | "metadata": {}, 242 | "outputs": [], 243 | "source": [ 244 | "!argo submit ../workflows/training-r-mnist-workflow.yaml" 245 | ] 246 | }, 247 | { 248 | "cell_type": "code", 249 | "execution_count": null, 250 | "metadata": {}, 251 | "outputs": [], 252 | "source": [ 253 | "!argo list" 254 | ] 255 | }, 256 | { 257 | "cell_type": "markdown", 258 | "metadata": {}, 259 | "source": [ 260 | "### Runtime Image\n", 261 | " * For options see above Tensorflow example" 262 | ] 263 | }, 264 | { 265 | "cell_type": "code", 266 | "execution_count": null, 267 | "metadata": {}, 268 | "outputs": [], 269 | "source": [ 270 | "!pygmentize ../workflows/serving-r-mnist-workflow.yaml" 271 | ] 272 | }, 273 | { 274 | "cell_type": "code", 275 | "execution_count": null, 276 | "metadata": {}, 277 | "outputs": [], 278 | "source": [ 279 | "!argo submit ../workflows/serving-r-mnist-workflow.yaml" 280 | ] 281 | }, 282 | { 283 | "cell_type": "code", 284 | "execution_count": null, 285 | "metadata": {}, 286 | "outputs": [], 287 | "source": [ 288 | "!argo list" 289 | ] 290 | }, 291 | { 292 | "cell_type": "code", 293 | "execution_count": null, 294 | "metadata": {}, 295 | "outputs": [], 296 | "source": [] 297 | } 298 | ], 299 | "metadata": { 300 | "kernelspec": { 301 | "display_name": "Python 3", 302 | "language": "python", 303 | "name": "python3" 304 | }, 305 | "language_info": { 306 | "codemirror_mode": { 307 | "name": "ipython", 308 | "version": 3 309 | }, 310 | "file_extension": ".py", 311 | "mimetype": "text/x-python", 312 | "name": "python", 313 | "nbconvert_exporter": "python", 314 | "pygments_lexer": "ipython3", 315 | "version": "3.6.4" 316 | } 317 | }, 318 | "nbformat": 4, 319 | "nbformat_minor": 1 320 | } 321 | -------------------------------------------------------------------------------- /notebooks/utils.py: -------------------------------------------------------------------------------- 1 | import requests 2 | from requests.auth import HTTPBasicAuth 3 | from random import randint,random 4 | from proto import prediction_pb2 5 | from proto import prediction_pb2_grpc 6 | import grpc 7 | import json 8 | from visualizer import get_graph 9 | from matplotlib import pyplot as plt 10 | import numpy as np 11 | from tensorflow.examples.tutorials.mnist import input_data 12 | from google.protobuf.json_format import MessageToJson 13 | 14 | AMBASSADOR_API_IP="localhost:8002" 15 | 16 | def rest_request(deploymentName,request): 17 | response = requests.post( 18 | "http://"+AMBASSADOR_API_IP+"/seldon/"+deploymentName+"/api/v0.1/predictions", 19 | json=request) 20 | j = response.json() 21 | return j 22 | 23 | def rest_request_auth(deploymentName,data,username,password): 24 | payload = {"data":{"ndarray":data.tolist()}} 25 | response = requests.post( 26 | "http://"+AMBASSADOR_API_IP+"/seldon/"+deploymentName+"/api/v0.1/predictions", 27 | json=payload, 28 | auth=HTTPBasicAuth(username, password)) 29 | print(response.status_code) 30 | return response.json() 31 | 32 | def grpc_request(deploymentName,data): 33 | datadef = prediction_pb2.DefaultData( 34 | names = ["a","b"], 35 | tensor = prediction_pb2.Tensor( 36 | shape = [1,784], 37 | values = data 38 | ) 39 | ) 40 | request = prediction_pb2.SeldonMessage(data = datadef) 41 | channel = grpc.insecure_channel(AMBASSADOR_API_IP) 42 | stub = prediction_pb2_grpc.SeldonStub(channel) 43 | metadata = [('seldon',deploymentName)] 44 | response = stub.Predict(request=request,metadata=metadata) 45 | return response 46 | 47 | def send_feedback_rest(deploymentName,request,response,reward): 48 | feedback = { 49 | "request": request, 50 | "response": response, 51 | "reward": reward 52 | } 53 | ret = requests.post( 54 | "http://"+AMBASSADOR_API_IP+"/seldon/"+deploymentName+"/api/v0.1/feedback", 55 | json=feedback) 56 | return ret.text 57 | 58 | 59 | def gen_image(arr): 60 | two_d = (np.reshape(arr, (28, 28)) * 255).astype(np.uint8) 61 | plt.imshow(two_d,cmap=plt.cm.gray_r, interpolation='nearest') 62 | return plt 63 | 64 | def download_mnist(): 65 | return input_data.read_data_sets("MNIST_data/", one_hot = True) 66 | 67 | 68 | def predict_rest_mnist(mnist): 69 | batch_xs, batch_ys = mnist.train.next_batch(1) 70 | chosen=0 71 | gen_image(batch_xs[chosen]).show() 72 | data = batch_xs[chosen].reshape((1,784)) 73 | features = ["X"+str(i+1) for i in range (0,784)] 74 | request = {"data":{"names":features,"ndarray":data.tolist()}} 75 | predictions = rest_request("mnist-classifier",request) 76 | print(json.dumps(predictions,indent=2)) 77 | #print("Route:"+json.dumps(predictions["meta"]["routing"],indent=2)) 78 | fpreds = [ '%.2f' % elem for elem in predictions["data"]["ndarray"][0] ] 79 | m = dict(zip(predictions["data"]["names"],fpreds)) 80 | print("Returned probabilities") 81 | print(json.dumps(m,indent=2)) 82 | 83 | 84 | 85 | def predict_grpc_mnist(mnist): 86 | batch_xs, batch_ys = mnist.train.next_batch(1) 87 | chosen=0 88 | gen_image(batch_xs[chosen]).show() 89 | data = batch_xs[chosen].reshape((784)) 90 | resp = grpc_request("mnist-classifier",data) 91 | predictions = MessageToJson(resp) 92 | predictions = json.loads(predictions) 93 | print(json.dumps(predictions,indent=2)) 94 | fpreds = [ '%.2f' % elem for elem in predictions["data"]["tensor"]["values"] ] 95 | m = dict(zip(predictions["data"]["names"],fpreds)) 96 | print("Returned probabilities") 97 | print(json.dumps(m,indent=2)) 98 | 99 | def evaluate_abtest(mnist,sz=100): 100 | batch_xs, batch_ys = mnist.train.next_batch(sz) 101 | routes_history = [] 102 | for idx in range(sz): 103 | if idx % 10 == 0: 104 | print("{}/{}".format(idx,sz)) 105 | data = batch_xs[idx].reshape((1,784)) 106 | request = {"data":{"ndarray":data.tolist()}} 107 | response = rest_request("mnist-classifier",request) 108 | route = response.get("meta").get("routing").get("random-ab-test") 109 | routes_history.append(route) 110 | 111 | plt.figure(figsize=(15,6)) 112 | ax = plt.scatter(range(len(routes_history)),routes_history) 113 | ax.axes.xaxis.set_label_text("Incoming Requests over Time") 114 | ax.axes.yaxis.set_label_text("Selected Branch") 115 | plt.yticks([0,1,2]) 116 | _ = plt.title("Branch Chosen for Incoming Requests") 117 | 118 | 119 | def evaluate_egreedy(mnist,sz=100): 120 | score = [0.0,0.0,0.0] 121 | sz = 100 122 | batch_xs, batch_ys = mnist.train.next_batch(sz) 123 | routes_history = [] 124 | for idx in range(sz): 125 | if idx % 10 == 0: 126 | print("{}/{}".format(idx,sz)) 127 | data = batch_xs[idx].reshape((1,784)) 128 | request = {"data":{"ndarray":data.tolist()}} 129 | response = rest_request("mnist-classifier",request) 130 | route = response.get("meta").get("routing").get("eg-router") 131 | proba = response["data"]["ndarray"][0] 132 | predicted = proba.index(max(proba)) 133 | correct = np.argmax(batch_ys[idx]) 134 | if predicted == correct: 135 | score[route] = score[route] + 1 136 | send_feedback_rest("mnist-classifier",request,response,reward=1) 137 | else: 138 | send_feedback_rest("mnist-classifier",request,response,reward=0) 139 | routes_history.append(route) 140 | 141 | plt.figure(figsize=(15,6)) 142 | ax = plt.scatter(range(len(routes_history)),routes_history) 143 | ax.axes.xaxis.set_label_text("Incoming Requests over Time") 144 | ax.axes.yaxis.set_label_text("Selected Branch") 145 | plt.yticks([0,1,2]) 146 | _ = plt.title("Branch Chosen for Incoming Requests") 147 | print(score) 148 | 149 | 150 | -------------------------------------------------------------------------------- /notebooks/visualizer.py: -------------------------------------------------------------------------------- 1 | import graphviz 2 | import json 3 | 4 | def _populate_graph(dot, root, suffix=''): 5 | name = root.get("name") 6 | id = name+suffix 7 | if root.get("implementation"): 8 | dot.node(id, label=name, shape="box", style="filled", color="lightgrey") 9 | else: 10 | dot.node(id, label=name, shape="box") 11 | endpoint_type = root.get("endpoint",{}).get("type") 12 | if endpoint_type is not None: 13 | dot.node(id+'endpoint', label=endpoint_type) 14 | dot.edge(id,id+'endpoint') 15 | for child in root.get("children",[]): 16 | child_id = _populate_graph(dot,child) 17 | dot.edge(id, child_id) 18 | return id 19 | 20 | def get_graph(filename,predictor=0): 21 | deployment = json.load(open(filename,'r')) 22 | predictors = deployment.get("spec").get("predictors") 23 | dot = graphviz.Digraph() 24 | 25 | with dot.subgraph(name="cluster_0") as pdot: 26 | graph = predictors[0].get("graph") 27 | _populate_graph(pdot, graph, suffix='0') 28 | pdot.attr(label="predictor") 29 | 30 | if len(predictors)>1: 31 | with dot.subgraph(name="cluster_1") as cdot: 32 | graph = predictors[1].get("graph") 33 | _populate_graph(cdot, graph, suffix='1') 34 | cdot.attr(label="canary") 35 | 36 | return dot 37 | -------------------------------------------------------------------------------- /scripts/README.md: -------------------------------------------------------------------------------- 1 | # Create MNIST Demo 2 | 3 | 1. You will need all prerequisites (gcloud, kubectl, ks) in your path. 4 | 1. Copy `env-example.sh` to `env.sh` and edit with your own settings 5 | 1. run `create_demo.sh` 6 | 7 | # Delete Demo 8 | 9 | 1. run `delete-demo.sh` - this will delete the GCP resources except the Filestore disk. You will need to delete this manually at present. 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /scripts/create_demo.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -o nounset 4 | set -o errexit 5 | set -o pipefail 6 | 7 | create_src() { 8 | mkdir -p ${KUBEFLOW_SRC} 9 | cd ${KUBEFLOW_SRC} 10 | curl https://raw.githubusercontent.com/kubeflow/kubeflow/${KUBEFLOW_TAG}/scripts/download.sh | bash 11 | } 12 | 13 | 14 | launch_kubeflow() { 15 | 16 | KUBEFLOW_REPO=${KUBEFLOW_SRC} ${KUBEFLOW_SRC}/scripts/kfctl.sh init ${KFAPP} --platform gcp --project ${PROJECT} 17 | 18 | cd ${KFAPP} 19 | ${KUBEFLOW_SRC}/scripts/kfctl.sh generate platform 20 | ${KUBEFLOW_SRC}/scripts/kfctl.sh apply platform 21 | ${KUBEFLOW_SRC}/scripts/kfctl.sh generate k8s 22 | ${KUBEFLOW_SRC}/scripts/kfctl.sh apply k8s 23 | 24 | } 25 | 26 | launch_seldon() { 27 | cd ${KUBEFLOW_SRC}/${KFAPP}/ks_app 28 | 29 | ks pkg install kubeflow/seldon 30 | ks generate seldon seldon 31 | ks apply default -c seldon 32 | } 33 | 34 | add_helm() { 35 | kubectl -n kube-system create sa tiller 36 | kubectl create clusterrolebinding tiller --clusterrole cluster-admin --serviceaccount=kube-system:tiller 37 | helm init --service-account tiller 38 | kubectl rollout status deploy/tiller-deploy -n kube-system 39 | } 40 | 41 | add_nfs_disk() { 42 | 43 | set +e 44 | FSADDR=$(gcloud beta filestore instances describe ${FS} --project=${PROJECT} --location=${ZONE} --format="value(networks.ipAddresses[0])") 45 | if [ -z "$FSADDR" ]; then 46 | echo "Creating filestore NFS volume" 47 | gcloud beta filestore instances create ${FS} --project=${PROJECT} --location=${ZONE} --tier=STANDARD --file-share=name="volumes",capacity=1TB --network=name="default",reserved-ip-range="10.0.0.0/29" 48 | fi 49 | set -e 50 | 51 | FSADDR=$(gcloud beta filestore instances describe ${FS} --project=${PROJECT} --location=${ZONE} --format="value(networks.ipAddresses[0])") 52 | 53 | helm install stable/nfs-client-provisioner --name nfs-cp --set nfs.server=${FSADDR} --set nfs.path=/volumes 54 | kubectl rollout status deploy/nfs-cp-nfs-client-provisioner -n kubeflow 55 | 56 | kubectl apply -f ${STARTUP_DIR}/nfs-pvc.yaml -n kubeflow 57 | } 58 | 59 | add_argo_clusterrole() { 60 | kubectl create clusterrolebinding my-cluster-admin-binding --clusterrole=cluster-admin --user=$(gcloud info --format="value(config.account)") 61 | kubectl create clusterrolebinding default-admin2 --clusterrole=cluster-admin --serviceaccount=kubeflow:default 62 | 63 | } 64 | 65 | add_seldon_analytics() { 66 | helm install seldon-core-analytics --name seldon-core-analytics --set grafana_prom_admin_password=password --set persistence.enabled=false --repo https://storage.googleapis.com/seldon-charts --namespace kubeflow 67 | } 68 | 69 | if [ ! -f env.sh ]; then 70 | echo "Create env.sh by copying env-example.sh" 71 | fi 72 | source env.sh 73 | create_src 74 | launch_kubeflow 75 | launch_seldon 76 | add_helm 77 | add_nfs_disk 78 | add_argo_clusterrole 79 | add_seldon_analytics 80 | -------------------------------------------------------------------------------- /scripts/delete-demo.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -o nounset 4 | set -o errexit 5 | set -o pipefail 6 | 7 | 8 | if [ ! -f env.sh ]; then 9 | echo "Create env.sh by copying env-example.sh" 10 | fi 11 | source env.sh 12 | 13 | cd ${KUBEFLOW_SRC}/${KFAPP} 14 | ${KUBEFLOW_SRC}/scripts/kfctl.sh delete all 15 | -------------------------------------------------------------------------------- /scripts/env-example.sh: -------------------------------------------------------------------------------- 1 | STARTUP_DIR="$( cd "$( dirname "$0" )" && pwd )" 2 | KFAPP=my-kubeflow 3 | PROJECT=seldon-demos 4 | KUBEFLOW_SRC=${STARTUP_DIR}/kubeflow_src 5 | FS=mnist-data 6 | ZONE=europe-west1-b 7 | # Next two lines are set from values created as discussed in https://www.kubeflow.org/docs/started/getting-started-gke/ 8 | export CLIENT_ID= 9 | export CLIENT_SECRET= 10 | export KUBEFLOW_TAG=v0.5.1 11 | -------------------------------------------------------------------------------- /scripts/nfs-pvc.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: PersistentVolumeClaim 3 | metadata: 4 | name: nfs-1 5 | spec: 6 | accessModes: 7 | - ReadWriteMany 8 | storageClassName: nfs-client 9 | resources: 10 | requests: 11 | storage: 30Gi 12 | -------------------------------------------------------------------------------- /scripts/port-forwards.sh: -------------------------------------------------------------------------------- 1 | 2 | #Argo 3 | kubectl port-forward $(kubectl get pods -n kubeflow -l app=argo-ui -o jsonpath='{.items[0].metadata.name}') -n kubeflow 8001:8001 & 4 | 5 | #Seldon Grafana 6 | kubectl port-forward $(kubectl get pods -n kubeflow -l app=grafana-prom-server -o jsonpath='{.items[0].metadata.name}') -n kubeflow 3000:3000 & 7 | 8 | #Ambassador reverse proxy 9 | kubectl port-forward $(kubectl get pods -n kubeflow -l service=ambassador -o jsonpath='{.items[0].metadata.name}') -n kubeflow 8002:80 & 10 | 11 | #Ambassador admin 12 | kubectl port-forward $(kubectl get pods -n kubeflow -l service=ambassador -o jsonpath='{.items[0].metadata.name}') -n kubeflow 8877:8877 & 13 | 14 | 15 | -------------------------------------------------------------------------------- /scripts/watch-mnist.sh: -------------------------------------------------------------------------------- 1 | watch kubectl get pods -l seldon-app=mnist-classifier 2 | -------------------------------------------------------------------------------- /workflows/serving-r-mnist-workflow.yaml: -------------------------------------------------------------------------------- 1 | # This example demonstrates the use of a git repo as a hard-wired 2 | # input artifact. The argo repo is cloned to its target destination 3 | # at '/src' for the main container to consume. 4 | apiVersion: argoproj.io/v1alpha1 5 | kind: Workflow 6 | metadata: 7 | generateName: seldon-r-deploy- 8 | spec: 9 | entrypoint: workflow 10 | arguments: 11 | parameters: 12 | - name: version 13 | value: 0.1 14 | - name: github-user 15 | value: kubeflow 16 | - name: github-revision 17 | value: master 18 | - name: docker-org 19 | value: index.docker.io/seldonio 20 | - name: build-push-image 21 | value: false 22 | - name: deploy-model 23 | value: false 24 | volumes: 25 | - name: docker-config 26 | secret: 27 | secretName: docker-config # name of an existing k8s secret 28 | volumeClaimTemplates: 29 | - metadata: 30 | name: workspace 31 | spec: 32 | accessModes: [ "ReadWriteOnce" ] 33 | resources: 34 | requests: 35 | storage: 0.5Gi 36 | templates: 37 | - name: workflow 38 | steps: 39 | - - name: get-source 40 | template: get-source-code 41 | - - name: build-push 42 | template: build-and-push 43 | when: "{{workflow.parameters.build-push-image}} == true" 44 | - - name: serve 45 | template: seldon 46 | when: "{{workflow.parameters.deploy-model}} == true" 47 | - name: get-source-code 48 | inputs: 49 | artifacts: 50 | - name: argo-source 51 | path: /src/example-seldon 52 | git: 53 | repo: https://github.com/{{workflow.parameters.github-user}}/example-seldon.git 54 | revision: "{{workflow.parameters.github-revision}}" 55 | container: 56 | image: alpine:latest 57 | command: [sh, -c] 58 | args: ["cp /src/example-seldon/models/r_mnist/runtime/* /workspace/; ls /workspace/"] 59 | volumeMounts: 60 | - name: workspace 61 | mountPath: /workspace 62 | - name: build-and-push 63 | container: 64 | image: gcr.io/kaniko-project/executor:latest 65 | args: ["--dockerfile","Dockerfile","--destination","{{workflow.parameters.docker-org}}/rmnistclassifier_runtime:{{workflow.parameters.version}}"] 66 | workingDir: /src/example-seldon/models/r_mnist/runtime/ 67 | volumeMounts: 68 | - name: docker-config 69 | mountPath: "/root/.docker/" 70 | - name: workspace 71 | mountPath: /workspace 72 | - name: seldon 73 | resource: #indicates that this is a resource template 74 | action: apply #can be any kubectl action (e.g. create, delete, apply, patch) 75 | #successCondition: ? 76 | manifest: | #put your kubernetes spec here 77 | apiVersion: "machinelearning.seldon.io/v1alpha2" 78 | kind: "SeldonDeployment" 79 | metadata: 80 | labels: 81 | app: "seldon" 82 | name: "mnist-classifier" 83 | spec: 84 | annotations: 85 | deployment_version: "v1" 86 | project_name: "MNIST Example" 87 | name: "mnist-classifier" 88 | predictors: 89 | - 90 | annotations: 91 | predictor_version: "v1" 92 | componentSpecs: 93 | - 94 | spec: 95 | containers: 96 | - 97 | image: "{{workflow.parameters.docker-org}}/rmnistclassifier_runtime:{{workflow.parameters.version}}" 98 | imagePullPolicy: "Always" 99 | name: "mnist-classifier" 100 | volumeMounts: 101 | - 102 | mountPath: "/data" 103 | name: "persistent-storage" 104 | terminationGracePeriodSeconds: 1 105 | volumes: 106 | - 107 | name: "persistent-storage" 108 | volumeSource: 109 | persistentVolumeClaim: 110 | claimName: "nfs-1" 111 | graph: 112 | children: [] 113 | endpoint: 114 | type: "REST" 115 | name: "mnist-classifier" 116 | type: "MODEL" 117 | name: "mnist-classifier" 118 | replicas: 1 119 | -------------------------------------------------------------------------------- /workflows/serving-sk-mnist-workflow.yaml: -------------------------------------------------------------------------------- 1 | # This example demonstrates the use of a git repo as a hard-wired 2 | # input artifact. The argo repo is cloned to its target destination 3 | # at '/src' for the main container to consume. 4 | apiVersion: argoproj.io/v1alpha1 5 | kind: Workflow 6 | metadata: 7 | generateName: seldon-sk-deploy- 8 | spec: 9 | entrypoint: workflow 10 | arguments: 11 | parameters: 12 | - name: version 13 | value: 0.1 14 | - name: github-user 15 | value: kubeflow 16 | - name: github-revision 17 | value: master 18 | - name: docker-org 19 | value: index.docker.io/seldonio 20 | - name: build-push-image 21 | value: false 22 | - name: deploy-model 23 | value: false 24 | volumes: 25 | - name: docker-config 26 | secret: 27 | secretName: docker-config # name of an existing k8s secret 28 | volumeClaimTemplates: 29 | - metadata: 30 | name: workspace 31 | spec: 32 | accessModes: [ "ReadWriteOnce" ] 33 | resources: 34 | requests: 35 | storage: 0.5Gi 36 | templates: 37 | - name: workflow 38 | steps: 39 | - - name: get-source 40 | template: get-source-code 41 | - - name: build-push 42 | template: build-and-push 43 | when: "{{workflow.parameters.build-push-image}} == true" 44 | - - name: serve 45 | template: seldon 46 | when: "{{workflow.parameters.deploy-model}} == true" 47 | - name: get-source-code 48 | inputs: 49 | artifacts: 50 | - name: argo-source 51 | path: /src/example-seldon 52 | git: 53 | repo: https://github.com/{{workflow.parameters.github-user}}/example-seldon.git 54 | revision: "{{workflow.parameters.github-revision}}" 55 | container: 56 | image: alpine:latest 57 | command: [sh, -c] 58 | args: ["cp /src/example-seldon/models/sk_mnist/runtime/* /workspace/; ls /workspace/"] 59 | volumeMounts: 60 | - name: workspace 61 | mountPath: /workspace 62 | - name: build-and-push 63 | container: 64 | image: gcr.io/kaniko-project/executor:latest 65 | args: ["--dockerfile","Dockerfile","--destination","{{workflow.parameters.docker-org}}/skmnistclassifier_runtime:{{workflow.parameters.version}}"] 66 | workingDir: /src/example-seldon/models/sk_mnist/runtime/ 67 | volumeMounts: 68 | - name: docker-config 69 | mountPath: "/root/.docker/" 70 | - name: workspace 71 | mountPath: /workspace 72 | - name: seldon 73 | resource: #indicates that this is a resource template 74 | action: apply #can be any kubectl action (e.g. create, delete, apply, patch) 75 | #successCondition: ? 76 | manifest: | #put your kubernetes spec here 77 | apiVersion: "machinelearning.seldon.io/v1alpha2" 78 | kind: "SeldonDeployment" 79 | metadata: 80 | labels: 81 | app: "seldon" 82 | name: "mnist-classifier" 83 | spec: 84 | annotations: 85 | deployment_version: "v1" 86 | project_name: "MNIST Example" 87 | name: "mnist-classifier" 88 | predictors: 89 | - 90 | annotations: 91 | predictor_version: "v1" 92 | componentSpecs: 93 | - 94 | spec: 95 | containers: 96 | - 97 | image: "{{workflow.parameters.docker-org}}/skmnistclassifier_runtime:{{workflow.parameters.version}}" 98 | imagePullPolicy: "Always" 99 | name: "mnist-classifier" 100 | volumeMounts: 101 | - 102 | mountPath: "/data" 103 | name: "persistent-storage" 104 | terminationGracePeriodSeconds: 1 105 | volumes: 106 | - 107 | name: "persistent-storage" 108 | volumeSource: 109 | persistentVolumeClaim: 110 | claimName: "nfs-1" 111 | graph: 112 | children: [] 113 | endpoint: 114 | type: "REST" 115 | name: "mnist-classifier" 116 | type: "MODEL" 117 | name: "mnist-classifier" 118 | replicas: 1 119 | -------------------------------------------------------------------------------- /workflows/serving-tf-mnist-workflow.md: -------------------------------------------------------------------------------- 1 | # Example Argo Workflow to dockerize runtime model and deploy it for serving 2 | 3 | Comments on the [serving-tf-mnist-workflow.yaml](serving-tf-mnist-workflow.yaml) 4 | 5 | ## Workflow Summary 6 | 7 | To serve our runtime model we create: 8 | 9 | * [```models/tf_mnist/runtime/Dockerfile```](../models/tf_mnist/runtime/Dockerfile) to wrap model using the seldon-core python wrapper. 10 | * An Argo workflow to: 11 | * Wrap the runtime model, builds a docker container for it and optionally push it to your repo 12 | * Optionally starts a seldon deployment that will run and expose your model 13 | 14 | 15 | ## Workflow parameters 16 | 17 | * version 18 | * The version tag for the Docker image 19 | * github-user 20 | * The github user to use to clone this repo/fork 21 | * github-revision 22 | * The github revision to use for cloning the repo (can be a branch name) 23 | * docker-org 24 | * The Docker host and org/user/project to use when pushing an image to the registry 25 | * build-push-image 26 | * Whether to build and push the image to docker registry (true/false) 27 | * deploy-model 28 | * Whether to start a seldon deployment to run and expose your model (true/false) 29 | -------------------------------------------------------------------------------- /workflows/serving-tf-mnist-workflow.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: argoproj.io/v1alpha1 2 | kind: Workflow 3 | metadata: 4 | generateName: seldon-tf-deploy- 5 | spec: 6 | entrypoint: workflow 7 | arguments: 8 | parameters: 9 | - name: version 10 | value: 0.1 11 | - name: github-user 12 | value: kubeflow 13 | - name: github-revision 14 | value: master 15 | - name: docker-org 16 | value: index.docker.io/seldonio 17 | - name: build-push-image 18 | value: false 19 | - name: deploy-model 20 | value: false 21 | volumes: 22 | - name: docker-config 23 | secret: 24 | secretName: docker-config # name of an existing k8s secret 25 | volumeClaimTemplates: 26 | - metadata: 27 | name: workspace 28 | spec: 29 | accessModes: [ "ReadWriteOnce" ] 30 | resources: 31 | requests: 32 | storage: 0.5Gi 33 | templates: 34 | - name: workflow 35 | steps: 36 | - - name: get-source 37 | template: get-source-code 38 | - - name: build-push 39 | template: build-and-push 40 | when: "{{workflow.parameters.build-push-image}} == true" 41 | - - name: serve 42 | template: seldon 43 | when: "{{workflow.parameters.deploy-model}} == true" 44 | - name: get-source-code 45 | inputs: 46 | artifacts: 47 | - name: argo-source 48 | path: /src/example-seldon 49 | git: 50 | repo: https://github.com/{{workflow.parameters.github-user}}/example-seldon.git 51 | revision: "{{workflow.parameters.github-revision}}" 52 | container: 53 | image: alpine:latest 54 | command: [sh, -c] 55 | args: ["cp /src/example-seldon/models/tf_mnist/runtime/* /workspace/; ls /workspace/"] 56 | volumeMounts: 57 | - name: workspace 58 | mountPath: /workspace 59 | - name: build-and-push 60 | container: 61 | image: gcr.io/kaniko-project/executor:latest 62 | args: ["--dockerfile","Dockerfile","--destination","{{workflow.parameters.docker-org}}/deepmnistclassifier_runtime:{{workflow.parameters.version}}"] 63 | workingDir: /src/example-seldon/models/tf_mnist/runtime/ 64 | volumeMounts: 65 | - name: docker-config 66 | mountPath: "/root/.docker/" 67 | - name: workspace 68 | mountPath: /workspace 69 | - name: seldon 70 | resource: #indicates that this is a resource template 71 | action: apply #can be any kubectl action (e.g. create, delete, apply, patch) 72 | #successCondition: ? 73 | manifest: | #put your kubernetes spec here 74 | apiVersion: "machinelearning.seldon.io/v1alpha2" 75 | kind: "SeldonDeployment" 76 | metadata: 77 | labels: 78 | app: "seldon" 79 | name: "mnist-classifier" 80 | spec: 81 | annotations: 82 | deployment_version: "v1" 83 | project_name: "MNIST Example" 84 | name: "mnist-classifier" 85 | predictors: 86 | - 87 | annotations: 88 | predictor_version: "v1" 89 | componentSpecs: 90 | - 91 | spec: 92 | containers: 93 | - 94 | image: "{{workflow.parameters.docker-org}}/deepmnistclassifier_runtime:{{workflow.parameters.version}}" 95 | imagePullPolicy: "Always" 96 | name: "mnist-classifier" 97 | volumeMounts: 98 | - 99 | mountPath: "/data" 100 | name: "persistent-storage" 101 | terminationGracePeriodSeconds: 1 102 | volumes: 103 | - 104 | name: "persistent-storage" 105 | volumeSource: 106 | persistentVolumeClaim: 107 | claimName: "nfs-1" 108 | graph: 109 | children: [] 110 | endpoint: 111 | type: "REST" 112 | name: "mnist-classifier" 113 | type: "MODEL" 114 | name: "mnist-classifier" 115 | replicas: 1 116 | -------------------------------------------------------------------------------- /workflows/training-r-mnist-workflow.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: argoproj.io/v1alpha1 2 | kind: Workflow 3 | metadata: 4 | generateName: kubeflow-r-train- 5 | spec: 6 | entrypoint: workflow 7 | arguments: 8 | parameters: 9 | - name: version 10 | value: 0.1 11 | - name: github-user 12 | value: kubeflow 13 | - name: github-revision 14 | value: master 15 | - name: docker-org 16 | value: seldonio 17 | - name: build-push-image 18 | value: false 19 | volumes: 20 | - name: docker-config 21 | secret: 22 | secretName: docker-config # name of an existing k8s secret 23 | volumeClaimTemplates: 24 | - metadata: 25 | name: workspace 26 | spec: 27 | accessModes: [ "ReadWriteOnce" ] 28 | resources: 29 | requests: 30 | storage: 0.5Gi 31 | templates: 32 | - name: workflow 33 | steps: 34 | - - name: get-source 35 | template: get-source-code 36 | - - name: build-push 37 | template: build-and-push 38 | when: "{{workflow.parameters.build-push-image}} == true" 39 | - - name: train 40 | template: tfjob 41 | - name: get-source-code 42 | inputs: 43 | artifacts: 44 | - name: argo-source 45 | path: /src/example-seldon 46 | git: 47 | repo: https://github.com/{{workflow.parameters.github-user}}/example-seldon.git 48 | revision: "{{workflow.parameters.github-revision}}" 49 | container: 50 | image: alpine:latest 51 | command: [sh, -c] 52 | args: ["cp /src/example-seldon/models/r_mnist/train/* /workspace/; ls /workspace/"] 53 | volumeMounts: 54 | - name: workspace 55 | mountPath: /workspace 56 | - name: build-and-push 57 | container: 58 | image: gcr.io/kaniko-project/executor:latest 59 | args: ["--dockerfile","Dockerfile","--destination","{{workflow.parameters.docker-org}}/rmnistclassifier_trainer:{{workflow.parameters.version}}"] 60 | workingDir: /src/example-seldon/models/r_mnist/train/ 61 | volumeMounts: 62 | - name: docker-config 63 | mountPath: "/root/.docker/" 64 | - name: workspace 65 | mountPath: /workspace 66 | - name: tfjob 67 | resource: #indicates that this is a resource template 68 | action: create #can be any kubectl action (e.g. create, delete, apply, patch) 69 | successCondition: status.succeeded == 1 70 | manifest: | #put your kubernetes spec here 71 | apiVersion: "batch/v1" 72 | kind: "Job" 73 | metadata: 74 | name: "r-train" 75 | ownerReferences: 76 | - apiVersion: argoproj.io/v1alpha1 77 | kind: Workflow 78 | controller: true 79 | name: {{workflow.name}} 80 | uid: {{workflow.uid}} 81 | spec: 82 | template: 83 | metadata: 84 | name: "r-train" 85 | spec: 86 | containers: 87 | - 88 | image: "{{workflow.parameters.docker-org}}/rmnistclassifier_trainer:{{workflow.parameters.version}}" 89 | name: "r-train" 90 | volumeMounts: 91 | - 92 | mountPath: "/data" 93 | name: "persistent-storage" 94 | restartPolicy: "Never" 95 | volumes: 96 | - 97 | name: "persistent-storage" 98 | persistentVolumeClaim: 99 | claimName: "nfs-1" 100 | -------------------------------------------------------------------------------- /workflows/training-sk-mnist-workflow.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: argoproj.io/v1alpha1 2 | kind: Workflow 3 | metadata: 4 | generateName: kubeflow-sk-train- 5 | spec: 6 | entrypoint: workflow 7 | arguments: 8 | parameters: 9 | - name: version 10 | value: 0.2 11 | - name: github-user 12 | value: kubeflow 13 | - name: github-revision 14 | value: master 15 | - name: docker-org 16 | value: index.docker.io/seldonio 17 | - name: build-push-image 18 | value: false 19 | volumes: 20 | - name: docker-config 21 | secret: 22 | secretName: docker-config # name of an existing k8s secret 23 | volumeClaimTemplates: 24 | - metadata: 25 | name: workspace 26 | spec: 27 | accessModes: [ "ReadWriteOnce" ] 28 | resources: 29 | requests: 30 | storage: 0.5Gi 31 | templates: 32 | - name: workflow 33 | steps: 34 | - - name: get-source 35 | template: get-source-code 36 | - - name: build-push 37 | template: build-and-push 38 | when: "{{workflow.parameters.build-push-image}} == true" 39 | - - name: train 40 | template: tfjob 41 | - name: get-source-code 42 | inputs: 43 | artifacts: 44 | - name: argo-source 45 | path: /src/example-seldon 46 | git: 47 | repo: https://github.com/{{workflow.parameters.github-user}}/example-seldon.git 48 | revision: "{{workflow.parameters.github-revision}}" 49 | container: 50 | image: alpine:latest 51 | command: [sh, -c] 52 | args: ["cp /src/example-seldon/models/sk_mnist/train/* /workspace/; ls /workspace/"] 53 | volumeMounts: 54 | - name: workspace 55 | mountPath: /workspace 56 | - name: build-and-push 57 | container: 58 | image: gcr.io/kaniko-project/executor:latest 59 | args: ["--dockerfile","Dockerfile","--destination","{{workflow.parameters.docker-org}}/skmnistclassifier_trainer:{{workflow.parameters.version}}"] 60 | workingDir: /src/example-seldon/models/sk_mnist/train/ 61 | volumeMounts: 62 | - name: docker-config 63 | mountPath: "/root/.docker/" 64 | - name: workspace 65 | mountPath: /workspace 66 | - name: tfjob 67 | resource: #indicates that this is a resource template 68 | action: create #can be any kubectl action (e.g. create, delete, apply, patch) 69 | successCondition: status.succeeded == 1 70 | manifest: | #put your kubernetes spec here 71 | apiVersion: "batch/v1" 72 | kind: "Job" 73 | metadata: 74 | name: "sk-train" 75 | ownerReferences: 76 | - apiVersion: argoproj.io/v1alpha1 77 | kind: Workflow 78 | controller: true 79 | name: {{workflow.name}} 80 | uid: {{workflow.uid}} 81 | spec: 82 | template: 83 | metadata: 84 | name: "sk-train" 85 | spec: 86 | containers: 87 | - 88 | image: "{{workflow.parameters.docker-org}}/skmnistclassifier_trainer:{{workflow.parameters.version}}" 89 | name: "sk-train" 90 | imagePullPolicy: Always 91 | volumeMounts: 92 | - 93 | mountPath: "/data" 94 | name: "persistent-storage" 95 | restartPolicy: "Never" 96 | volumes: 97 | - 98 | name: "persistent-storage" 99 | persistentVolumeClaim: 100 | claimName: "nfs-1" 101 | -------------------------------------------------------------------------------- /workflows/training-tf-mnist-workflow.md: -------------------------------------------------------------------------------- 1 | # Example Argo Workflow to dockerize and Train Model 2 | 3 | Comments on the [training-tf-mnist-workflow.yaml](training-tf-mnist-workflow.yaml) 4 | 5 | ## Workflow summary 6 | 7 | To dockerize our model training and run it we create: 8 | 9 | * [```models/tf_mnist/train/build_and_push.sh```](../models/tf_mnist/train/build_and_push.sh) that will build an image for our Tensorflow training and push to our repo. 10 | * An Argo workflow [```workflows/training-tf-mnist-workflow.yaml```](training-tf-mnist-workflow.yaml) is created which: 11 | * Clones the project from github 12 | * Runs the build and push script (using DockerInDocker) 13 | * Starts a kubeflow TfJob to train the model and save the results to the persistent volume 14 | 15 | 16 | ## Workflow parameters 17 | 18 | * version 19 | * The version tag for the Docker image 20 | * github-user 21 | * The github user/org for which to clone this repo/fork 22 | * github-revision 23 | * The github revision to use for cloning the repo (can be a branch name) 24 | * docker-org 25 | * The Docker host and org/user/project to use when pushing an image to the registry 26 | * tfjob-version-hack 27 | * A temporary random integer for the tfjob ID 28 | * build-push-image 29 | * Whether to build and push the image to docker registry (true/false) 30 | 31 | ## Setup For Pushing Images 32 | 33 | **To push to your own repo the Docker images you will need to setup your docker credentials as a Kubernetes secret containing a [config.json](https://www.projectatomic.io/blog/2016/03/docker-credentials-store/). To do this you can find your docker home (typically ~/.docker) and run `kubectl create secret generic docker-config --from-file=config.json=${DOCKERHOME}/config.json --type=kubernetes.io/config` to [create a secret](https://kubernetes.io/docs/tasks/configure-pod-container/pull-image-private-registry/#registry-secret-existing-credentials).** 34 | -------------------------------------------------------------------------------- /workflows/training-tf-mnist-workflow.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: argoproj.io/v1alpha1 2 | kind: Workflow 3 | metadata: 4 | generateName: kubeflow-tf-train- 5 | spec: 6 | entrypoint: workflow 7 | arguments: 8 | parameters: 9 | - name: version 10 | value: 0.1 11 | - name: github-user 12 | value: kubeflow 13 | - name: github-revision 14 | value: master 15 | - name: docker-org 16 | value: index.docker.io/seldonio 17 | - name: tfjob-version-hack 18 | value: 1 19 | - name: build-push-image 20 | value: false 21 | volumes: 22 | - name: docker-config 23 | secret: 24 | secretName: docker-config # name of an existing k8s secret 25 | volumeClaimTemplates: 26 | - metadata: 27 | name: workspace 28 | spec: 29 | accessModes: [ "ReadWriteOnce" ] 30 | resources: 31 | requests: 32 | storage: 0.5Gi 33 | templates: 34 | - name: workflow 35 | steps: 36 | - - name: get-source 37 | template: get-source-code 38 | - - name: build-push 39 | template: build-and-push 40 | when: "{{workflow.parameters.build-push-image}} == true" 41 | - - name: train 42 | template: tfjob 43 | - name: get-source-code 44 | inputs: 45 | artifacts: 46 | - name: argo-source 47 | path: /src/example-seldon 48 | git: 49 | repo: https://github.com/{{workflow.parameters.github-user}}/example-seldon.git 50 | revision: "{{workflow.parameters.github-revision}}" 51 | container: 52 | image: alpine:latest 53 | command: [sh, -c] 54 | args: ["cp /src/example-seldon/models/tf_mnist/train/* /workspace/; ls /workspace/"] 55 | volumeMounts: 56 | - name: workspace 57 | mountPath: /workspace 58 | - name: build-and-push 59 | container: 60 | image: gcr.io/kaniko-project/executor:latest 61 | args: ["--dockerfile","Dockerfile","--destination","{{workflow.parameters.docker-org}}/deepmnistclassifier_trainer:{{workflow.parameters.version}}"] 62 | workingDir: /src/example-seldon/models/tf_mnist/train/ 63 | volumeMounts: 64 | - name: docker-config 65 | mountPath: "/root/.docker/" 66 | - name: workspace 67 | mountPath: /workspace 68 | - name: tfjob 69 | resource: #indicates that this is a resource template 70 | action: create #can be any kubectl action (e.g. create, delete, apply, patch) 71 | #successCondition: status.tfReplicaStatuses.Worker.succeeded == 1 72 | #successCondition: status.conditions.type == Succeeded 73 | successCondition: status.replicaStatuses.Worker.succeeded == 1 74 | manifest: | #put your kubernetes spec here 75 | apiVersion: "kubeflow.org/v1beta1" 76 | kind: "TFJob" 77 | metadata: 78 | name: mnist-train-{{workflow.parameters.tfjob-version-hack}} 79 | ownerReferences: 80 | - apiVersion: argoproj.io/v1alpha1 81 | kind: Workflow 82 | controller: true 83 | name: {{workflow.name}} 84 | uid: {{workflow.uid}} 85 | spec: 86 | tfReplicaSpecs: 87 | Worker: 88 | replicas: 1 89 | template: 90 | spec: 91 | containers: 92 | - 93 | image: "{{workflow.parameters.docker-org}}/deepmnistclassifier_trainer:{{workflow.parameters.version}}" 94 | name: "tensorflow" 95 | volumeMounts: 96 | - 97 | mountPath: "/data" 98 | name: "persistent-storage" 99 | restartPolicy: "OnFailure" 100 | volumes: 101 | - 102 | name: "persistent-storage" 103 | persistentVolumeClaim: 104 | claimName: "nfs-1" 105 | tfReplicaType: "MASTER" 106 | --------------------------------------------------------------------------------