├── .gitignore ├── 01 ├── .DS_Store ├── my_model │ ├── saved_model.pb │ └── variables │ │ ├── variables.data-00000-of-00001 │ │ └── variables.index └── simple_dnn_model.py ├── 02 └── setup_commands.md ├── 03 ├── .DS_Store ├── hyperparameter │ ├── Dockerfile │ ├── buid_image.sh │ ├── experiment_katib.py │ └── katib_template.yaml ├── notebook │ ├── .DS_Store │ ├── myNotebook.ipynb │ └── my_model │ │ ├── saved_model.pb │ │ └── variables │ │ ├── variables.data-00000-of-00001 │ │ └── variables.index └── pipelines │ ├── .DS_Store │ ├── build_images.sh │ ├── define_compile_model │ ├── .DS_Store │ ├── Dockerfile │ ├── build_image.sh │ ├── component.yaml │ └── src │ │ └── define_compile_model.py │ ├── load_preprocess_data │ ├── .DS_Store │ ├── Dockerfile │ ├── build_image.sh │ ├── component.yaml │ └── src │ │ └── load_preprocess.py │ ├── pipeline_def_execute.py │ └── train_model │ ├── .DS_Store │ ├── Dockerfile │ ├── build_image.sh │ ├── component.yaml │ └── src │ └── train_model.py ├── 04 └── deploy_kfserving.py ├── 05 └── metadata_example.ipynb ├── LICENSE └── README.md /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | -------------------------------------------------------------------------------- /01/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datastacktv/kubeflow-introduction/088e32171861cd7a4e5f76b3dcda7c69dc18bed8/01/.DS_Store -------------------------------------------------------------------------------- /01/my_model/saved_model.pb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datastacktv/kubeflow-introduction/088e32171861cd7a4e5f76b3dcda7c69dc18bed8/01/my_model/saved_model.pb -------------------------------------------------------------------------------- /01/my_model/variables/variables.data-00000-of-00001: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datastacktv/kubeflow-introduction/088e32171861cd7a4e5f76b3dcda7c69dc18bed8/01/my_model/variables/variables.data-00000-of-00001 -------------------------------------------------------------------------------- /01/my_model/variables/variables.index: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datastacktv/kubeflow-introduction/088e32171861cd7a4e5f76b3dcda7c69dc18bed8/01/my_model/variables/variables.index -------------------------------------------------------------------------------- /01/simple_dnn_model.py: -------------------------------------------------------------------------------- 1 | #1. Load packages 2 | import tensorflow as tf 3 | import numpy as np 4 | #2.Load dataset 5 | fashion_mnist = tf.keras.datasets.fashion_mnist 6 | (train_images, train_labels), (test_images, test_labels) = fashion_mnist.load_data() 7 | #3.Preprocess data 8 | train_images = train_images / 255.0 9 | test_images = test_images / 255.0 10 | #4.Define and compile model 11 | model = tf.keras.Sequential([ 12 | tf.keras.layers.Flatten(input_shape=(28, 28)), 13 | tf.keras.layers.Dense(128, activation='relu'), 14 | tf.keras.layers.Dense(10) 15 | ]) 16 | model.compile(optimizer='adam', 17 | loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), 18 | metrics=['accuracy']) 19 | #5.Train model 20 | model.fit(train_images, train_labels, epochs=10) 21 | #6.Evaluate accuracy 22 | test_loss, test_acc = model.evaluate(test_images, test_labels, verbose=2) 23 | print('\nTest accuracy:', test_acc) 24 | #7.Save model 25 | model.save('my_model') 26 | #8.Run predictions 27 | probability_model = tf.keras.Sequential([model, 28 | tf.keras.layers.Softmax()]) 29 | predictions = probability_model.predict(test_images) 30 | print('\nPrediction:',predictions[0]) 31 | -------------------------------------------------------------------------------- /02/setup_commands.md: -------------------------------------------------------------------------------- 1 | # kubeflow v1.1 setup on Google Cloud command and notes 2 | 3 | NOTE: This is not a executable script 4 | 5 | * Enable APIs: 6 | 7 | ```bash 8 | gcloud services enable \ 9 | compute.googleapis.com \ 10 | container.googleapis.com \ 11 | iam.googleapis.com \ 12 | servicemanagement.googleapis.com \ 13 | cloudresourcemanager.googleapis.com \ 14 | ml.googleapis.com \ 15 | cloudbuild.googleapis.com 16 | ``` 17 | 18 | * Anthos Service Mesh Project initialization 19 | 20 | ```bash 21 | curl --request POST \ 22 | --header "Authorization: Bearer $(gcloud auth print-access-token)" \ 23 | --data '' \ 24 | https://meshconfig.googleapis.com/v1alpha1/projects/${PROJECT_ID}:initialize 25 | ``` 26 | 27 | * Install CLIs for mgmt cluster deployment 28 | 29 | ```bash 30 | #kpt 31 | sudo apt-get install google-cloud-sdk-kpt google-cloud-sdk google-cloud-sdk 32 | ``` 33 | 34 | ```bash 35 | #yq 36 | sudo wget https://github.com/mikefarah/yq/releases/download/3.4.1/yq_linux_amd64 -O /usr/bin/yq && sudo chmod +x /usr/bin/yq 37 | ``` 38 | 39 | ```bash 40 | #kustomize 41 | curl -s "https://raw.githubusercontent.com/\ 42 | kubernetes-sigs/kustomize/master/hack/install_kustomize.sh" | bash 43 | sudo mv ./kustomize /usr/bin/kustomize 44 | sudo chmod +x /usr/bin/kustomize 45 | ``` 46 | 47 | * Management cluster deploy 48 | 49 | ```bash 50 | kpt pkg get https://github.com/kubeflow/gcp-blueprints.git/management@v1.1.0 ./ 51 | cd management 52 | make get-pkg 53 | ##--Edit makefile 54 | ## Set values for: 55 | #kpt cfg set ./instance name NAME 56 | #kpt cfg set ./instance location LOCATION 57 | #kpt cfg set ./instance gcloud.core.project PROJECT 58 | #kpt cfg set ./upstream/management name NAME 59 | #kpt cfg set ./upstream/management location LOCATION 60 | #kpt cfg set ./upstream/management gcloud.core.project PROJECT 61 | make set-values 62 | make apply 63 | #Install Cloud Config Connector 64 | ## 65 | make create-ctxt 66 | make apply-kcc 67 | kpt cfg set ./instance managed-project ${PROJECT_ID} 68 | anthoscli apply -f ./instance/managed-project/iam.yaml 69 | ``` 70 | 71 | * Install CLIs for kubeflow deployment 72 | 73 | ```bash 74 | #kustomize 3.2 75 | curl -LO https://github.com/kubernetes-sigs/kustomize/releases/download/kustomize%2Fv3.2.1/kustomize_kustomize.v3.2.1_linux_amd64 76 | mv kustomize_kustomize.v3.2.1_linux_amd64 kustomize 77 | chmod +x ./kustomize 78 | sudo mv ./kustomize /usr/local/bin/kustomize 79 | ``` 80 | 81 | ```bash 82 | #istioctl 83 | gcloud projects get-iam-policy ${PROJECT_ID} | grep -B 1 'roles/meshdataplane.serviceAgent' 84 | curl -LO https://storage.googleapis.com/gke-release/asm/istio-1.4.10-asm.18-linux.tar.gz 85 | curl -LO https://storage.googleapis.com/gke-release/asm/istio-1.4.10-asm.18-linux.tar.gz.1.sig 86 | openssl dgst -verify - -signature istio-1.4.10-asm.18-linux.tar.gz.1.sig istio-1.4.10-asm.18-linux.tar.gz <<'EOF' 87 | -----BEGIN PUBLIC KEY----- 88 | .... 89 | -----END PUBLIC KEY----- 90 | EOF 91 | tar xzf istio-1.6.11-asm.1-linux-amd64.tar.gz 92 | cd istio-1.6.11-asm.1 93 | export PATH=$PWD/bin:$PATH 94 | ``` 95 | 96 | * Get and apply blueprints 97 | 98 | ```bash 99 | kpt pkg get https://github.com/kubeflow/gcp-blueprints.git/kubeflow@v1.1.0 ./${KFDIR} 100 | cd kf/kubeflow 101 | make get-pkg 102 | kubectl config use-context mgmt-cluster 103 | kubectl create namespace ${PROJECT_ID} 104 | kubectl config set-context --current --namespace ${PROJECT_ID} 105 | ##--Edit makefile 106 | ## Set values for: 107 | #kpt cfg set ./instance mgmt-ctxt MGMT_NAME 108 | #kpt cfg set ./upstream/manifests/gcp name NAME 109 | #kpt cfg set ./upstream/manifests/gcp gcloud.core.project PROJECT 110 | #kpt cfg set ./upstream/manifests/gcp gcloud.compute.zone ZONE 111 | #kpt cfg set ./upstream/manifests/gcp location LOCATION 112 | #kpt cfg set ./upstream/manifests/gcp log-firewalls false 113 | #kpt cfg set ./upstream/manifests/stacks/gcp name NAME 114 | #kpt cfg set ./upstream/manifests/stacks/gcp gcloud.core.project PROJECT 115 | #kpt cfg set ./instance name NAME 116 | #kpt cfg set ./instance location LOCATION 117 | #kpt cfg set ./instance gcloud.core.project PROJECT 118 | #kpt cfg set ./instance email EMAIL 119 | #Export credentials 120 | export CLIENT_ID= 121 | export CLIENT_SECRET= 122 | #Deploy 123 | ## 124 | make apply 125 | ``` 126 | 127 | * Grant Web App user role 128 | 129 | ```bash 130 | gcloud projects add-iam-policy-binding PROJECT --member=user:EMAIL --role=roles/iap.httpsResourceAccessor 131 | ``` 132 | -------------------------------------------------------------------------------- /03/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datastacktv/kubeflow-introduction/088e32171861cd7a4e5f76b3dcda7c69dc18bed8/03/.DS_Store -------------------------------------------------------------------------------- /03/hyperparameter/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM tensorflow/tensorflow:latest-py3 2 | WORKDIR /katib_exp 3 | COPY experiment_katib.py /katib_exp 4 | -------------------------------------------------------------------------------- /03/hyperparameter/buid_image.sh: -------------------------------------------------------------------------------- 1 | #Developer: change project id 2 | gcloud builds submit --tag gcr.io/${PROJECT_ID}/katib_exp 3 | -------------------------------------------------------------------------------- /03/hyperparameter/experiment_katib.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import tensorflow as tf 3 | import numpy as np 4 | 5 | def main(params): 6 | cell_numbers = params.cells_number 7 | fashion_mnist = tf.keras.datasets.fashion_mnist 8 | (train_images, train_labels), (test_images, test_labels) = fashion_mnist.load_data() 9 | train_images = train_images / 255.0 10 | test_images = test_images / 255.0 11 | model = tf.keras.Sequential([ 12 | tf.keras.layers.Flatten(input_shape=(28, 28)), 13 | tf.keras.layers.Dense(128, activation='relu'), 14 | tf.keras.layers.Dense(10)]) 15 | 16 | model.compile(optimizer='adam', 17 | loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), 18 | metrics=['accuracy']) 19 | model.fit(train_images, train_labels, epochs=10) 20 | test_acc = model.evaluate(test_images, test_labels, verbose=2) 21 | print('accuracy:', test_acc) 22 | model.save('my_model') 23 | 24 | if __name__ == "__main__": 25 | parser = argparse.ArgumentParser(description='katib experiment') 26 | parser.add_argument('--cells_number', type=int, default=128) 27 | params = parser.parse_args() 28 | main(params) -------------------------------------------------------------------------------- /03/hyperparameter/katib_template.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: "kubeflow.org/v1alpha3" 2 | kind: Experiment 3 | metadata: 4 | namespace: default-profile 5 | labels: 6 | controller-tools.k8s.io: "1.0" 7 | name: fashion-mnist-cell-tuner 8 | spec: 9 | objective: 10 | type: maximize 11 | goal: 0.99 12 | objectiveMetricName: accuracy 13 | algorithm: 14 | algorithmName: random 15 | parallelTrialCount: 3 16 | maxTrialCount: 12 17 | maxFailedTrialCount: 3 18 | parameters: 19 | - name: --cells_number 20 | parameterType: int 21 | feasibleSpace: 22 | min: "10" 23 | max: "200" 24 | trialTemplate: 25 | goTemplate: 26 | rawTemplate: |- 27 | apiVersion: batch/v1 28 | kind: Job 29 | metadata: 30 | name: {{.Trial}} 31 | namespace: {{.NameSpace}} 32 | spec: 33 | template: 34 | spec: 35 | containers: 36 | - name: {{.Trial}} 37 | image: gcr.io/velascoluis-test/katib_exp 38 | command: 39 | - "python" 40 | - "/katib_exp/experiment_katib.py" 41 | {{- with .HyperParameters}} 42 | {{- range .}} 43 | - "{{.Name}}={{.Value}}" 44 | {{- end}} 45 | {{- end}} 46 | restartPolicy: Never -------------------------------------------------------------------------------- /03/notebook/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datastacktv/kubeflow-introduction/088e32171861cd7a4e5f76b3dcda7c69dc18bed8/03/notebook/.DS_Store -------------------------------------------------------------------------------- /03/notebook/myNotebook.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Example - simple notebook" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": null, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "import tensorflow as tf\n", 17 | "import numpy as np" 18 | ] 19 | }, 20 | { 21 | "cell_type": "markdown", 22 | "metadata": {}, 23 | "source": [ 24 | "### Load data" 25 | ] 26 | }, 27 | { 28 | "cell_type": "code", 29 | "execution_count": null, 30 | "metadata": {}, 31 | "outputs": [], 32 | "source": [ 33 | "fashion_mnist = tf.keras.datasets.fashion_mnist\n", 34 | "(train_images, train_labels), (test_images, test_labels) = fashion_mnist.load_data()" 35 | ] 36 | }, 37 | { 38 | "cell_type": "markdown", 39 | "metadata": {}, 40 | "source": [ 41 | "### Preprocess data" 42 | ] 43 | }, 44 | { 45 | "cell_type": "code", 46 | "execution_count": null, 47 | "metadata": {}, 48 | "outputs": [], 49 | "source": [ 50 | "train_images = train_images / 255.0\n", 51 | "test_images = test_images / 255.0" 52 | ] 53 | }, 54 | { 55 | "cell_type": "markdown", 56 | "metadata": {}, 57 | "source": [ 58 | "### Create and compile DNN model" 59 | ] 60 | }, 61 | { 62 | "cell_type": "code", 63 | "execution_count": null, 64 | "metadata": {}, 65 | "outputs": [], 66 | "source": [ 67 | "model = tf.keras.Sequential([\n", 68 | " tf.keras.layers.Flatten(input_shape=(28, 28)),\n", 69 | " tf.keras.layers.Dense(128, activation='relu'),\n", 70 | " tf.keras.layers.Dense(10),\n", 71 | " tf.keras.layers.Softmax()])" 72 | ] 73 | }, 74 | { 75 | "cell_type": "code", 76 | "execution_count": null, 77 | "metadata": {}, 78 | "outputs": [], 79 | "source": [ 80 | "model.compile(optimizer='adam',\n", 81 | " loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),\n", 82 | " metrics=['accuracy'])" 83 | ] 84 | }, 85 | { 86 | "cell_type": "markdown", 87 | "metadata": {}, 88 | "source": [ 89 | "### Train model" 90 | ] 91 | }, 92 | { 93 | "cell_type": "code", 94 | "execution_count": null, 95 | "metadata": {}, 96 | "outputs": [], 97 | "source": [ 98 | "model.fit(train_images, train_labels, epochs=10)" 99 | ] 100 | }, 101 | { 102 | "cell_type": "markdown", 103 | "metadata": {}, 104 | "source": [ 105 | "### Evaluate model" 106 | ] 107 | }, 108 | { 109 | "cell_type": "code", 110 | "execution_count": null, 111 | "metadata": {}, 112 | "outputs": [], 113 | "source": [ 114 | "test_loss, test_acc = model.evaluate(test_images, test_labels, verbose=2)\n", 115 | "print('\\nTest accuracy:', test_acc)" 116 | ] 117 | }, 118 | { 119 | "cell_type": "markdown", 120 | "metadata": {}, 121 | "source": [ 122 | "### Save model" 123 | ] 124 | }, 125 | { 126 | "cell_type": "code", 127 | "execution_count": null, 128 | "metadata": {}, 129 | "outputs": [], 130 | "source": [ 131 | "model.save('my_model')" 132 | ] 133 | }, 134 | { 135 | "cell_type": "markdown", 136 | "metadata": {}, 137 | "source": [ 138 | "### Run sample prediction" 139 | ] 140 | }, 141 | { 142 | "cell_type": "code", 143 | "execution_count": null, 144 | "metadata": {}, 145 | "outputs": [], 146 | "source": [ 147 | "predictions = model.predict(test_images)\n", 148 | "print('\\nPrediction:',predictions[0])" 149 | ] 150 | }, 151 | { 152 | "cell_type": "markdown", 153 | "metadata": {}, 154 | "source": [ 155 | "### Training @ TFOperator" 156 | ] 157 | }, 158 | { 159 | "cell_type": "code", 160 | "execution_count": 1, 161 | "metadata": {}, 162 | "outputs": [], 163 | "source": [ 164 | "gcp_project=''" 165 | ] 166 | }, 167 | { 168 | "cell_type": "code", 169 | "execution_count": 2, 170 | "metadata": {}, 171 | "outputs": [], 172 | "source": [ 173 | "import tensorflow as tf\n", 174 | "import numpy as np\n", 175 | "import logging\n", 176 | "class mySimpleTrainer(object):\n", 177 | "\n", 178 | " def __init__(self):\n", 179 | " self.model = None\n", 180 | " pass\n", 181 | "\n", 182 | " def load_data_train(self):\n", 183 | " logging.basicConfig(level=logging.INFO)\n", 184 | " logging.info('STEP 1 - Start loading data:')\n", 185 | " fashion_mnist = tf.keras.datasets.fashion_mnist\n", 186 | " (train_images, train_labels), (test_images, test_labels) = fashion_mnist.load_data()\n", 187 | " logging.info('Loading data finished')\n", 188 | " return (train_images, train_labels, test_images, test_labels)\n", 189 | " \n", 190 | " def prepare_data_train(self, train_images, test_images): \n", 191 | " logging.basicConfig(level=logging.INFO)\n", 192 | " logging.info('STEP 2 - Start preparing data:')\n", 193 | " train_images = train_images / 255.0\n", 194 | " test_images = test_images / 255.0\n", 195 | " logging.info('Preparing data finished')\n", 196 | " return (train_images, test_images)\n", 197 | "\n", 198 | " def generate_keras_model(self):\n", 199 | " logging.basicConfig(level=logging.INFO)\n", 200 | " logging.info('STEP 3 - Start generating model:')\n", 201 | " model = tf.keras.Sequential([\n", 202 | " tf.keras.layers.Flatten(input_shape=(28, 28)),\n", 203 | " tf.keras.layers.Dense(128, activation='relu'),\n", 204 | " tf.keras.layers.Dense(10),\n", 205 | " tf.keras.layers.Softmax()])\n", 206 | " logging.info('Generating model finished')\n", 207 | " return model\n", 208 | "\n", 209 | " def train_model(self, model, train_images, train_labels, test_images, test_labels, epochs):\n", 210 | " logging.basicConfig(level=logging.INFO)\n", 211 | " logging.info('STEP 4 - Starting training model:')\n", 212 | " model.compile(optimizer='adam',\n", 213 | " loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),\n", 214 | " metrics=['accuracy'])\n", 215 | " model.fit(train_images, train_labels, epochs=epochs)\n", 216 | " logging.info('Train model finished')\n", 217 | " \n", 218 | " def launch_train_cycle(self,epochs):\n", 219 | " logging.basicConfig(level=logging.INFO)\n", 220 | " logging.info('Starting E2E cycle:')\n", 221 | " logging.info('Arguments:')\n", 222 | " logging.info('epochs:{}'.format(epochs))\n", 223 | " train_images, train_labels, test_images, test_labels = self.load_data_train()\n", 224 | " train_images, test_images = self.prepare_data_train(train_images, test_images)\n", 225 | " model = self.generate_keras_model()\n", 226 | " self.train_model(model, train_images, train_labels, test_images, test_labels, epochs)\n", 227 | " self.model = model\n", 228 | " logging.info('Train cycle finished')" 229 | ] 230 | }, 231 | { 232 | "cell_type": "code", 233 | "execution_count": 3, 234 | "metadata": {}, 235 | "outputs": [ 236 | { 237 | "name": "stderr", 238 | "output_type": "stream", 239 | "text": [ 240 | "INFO:root:Starting E2E cycle:\n", 241 | "INFO:root:Arguments:\n", 242 | "INFO:root:epochs:10\n", 243 | "INFO:root:STEP 1 - Start loading data:\n", 244 | "INFO:root:Loading data finished\n", 245 | "INFO:root:STEP 2 - Start preparing data:\n", 246 | "INFO:root:Preparing data finished\n", 247 | "INFO:root:STEP 3 - Start generating model:\n", 248 | "INFO:root:Generating model finished\n", 249 | "INFO:root:STEP 4 - Starting training model:\n" 250 | ] 251 | }, 252 | { 253 | "name": "stdout", 254 | "output_type": "stream", 255 | "text": [ 256 | "Train on 60000 samples\n", 257 | "Epoch 1/10\n", 258 | "60000/60000 [==============================] - 4s 71us/sample - loss: 1.7185 - accuracy: 0.7504\n", 259 | "Epoch 2/10\n", 260 | "60000/60000 [==============================] - 4s 65us/sample - loss: 1.6730 - accuracy: 0.7896\n", 261 | "Epoch 3/10\n", 262 | "60000/60000 [==============================] - 4s 65us/sample - loss: 1.6643 - accuracy: 0.7975\n", 263 | "Epoch 4/10\n", 264 | "60000/60000 [==============================] - 4s 65us/sample - loss: 1.6579 - accuracy: 0.8037\n", 265 | "Epoch 5/10\n", 266 | "60000/60000 [==============================] - 6s 93us/sample - loss: 1.6531 - accuracy: 0.8089\n", 267 | "Epoch 6/10\n", 268 | "60000/60000 [==============================] - 4s 69us/sample - loss: 1.6499 - accuracy: 0.8117\n", 269 | "Epoch 7/10\n", 270 | "60000/60000 [==============================] - 4s 65us/sample - loss: 1.6464 - accuracy: 0.8149\n", 271 | "Epoch 8/10\n", 272 | "60000/60000 [==============================] - 4s 64us/sample - loss: 1.6440 - accuracy: 0.8180\n", 273 | "Epoch 9/10\n", 274 | "60000/60000 [==============================] - 4s 64us/sample - loss: 1.6428 - accuracy: 0.8182\n", 275 | "Epoch 10/10\n", 276 | "60000/60000 [==============================] - 4s 64us/sample - loss: 1.6406 - accuracy: 0.8204\n" 277 | ] 278 | }, 279 | { 280 | "name": "stderr", 281 | "output_type": "stream", 282 | "text": [ 283 | "INFO:root:Train model finished\n", 284 | "INFO:root:Train cycle finished\n" 285 | ] 286 | } 287 | ], 288 | "source": [ 289 | "#Parameters\n", 290 | "epochs = 10\n", 291 | "#Sequence\n", 292 | "mySimpleTrainer_instance = mySimpleTrainer()\n", 293 | "mySimpleTrainer_instance.launch_train_cycle(epochs)" 294 | ] 295 | }, 296 | { 297 | "cell_type": "code", 298 | "execution_count": 5, 299 | "metadata": {}, 300 | "outputs": [], 301 | "source": [ 302 | "class mySimpleTrainerWrapper(object):\n", 303 | " \n", 304 | " def __init__(self):\n", 305 | " self.mySimpleTrainer_instance = mySimpleTrainer()\n", 306 | "\n", 307 | " def train(self):\n", 308 | " epochs = 10\n", 309 | " self.mySimpleTrainer_instance.launch_train_cycle(epochs)\n", 310 | "\n", 311 | " def predict(self, X, feature_names=None):\n", 312 | " \"\"\"Predict using the model for given ndarray.\"\"\"\n", 313 | " prediction = self.model.predict(data=X)\n", 314 | " # Do any postprocessing\n", 315 | " return prediction" 316 | ] 317 | }, 318 | { 319 | "cell_type": "code", 320 | "execution_count": null, 321 | "metadata": {}, 322 | "outputs": [], 323 | "source": [ 324 | "##!pip install --user kubeflow-fairing" 325 | ] 326 | }, 327 | { 328 | "cell_type": "code", 329 | "execution_count": null, 330 | "metadata": {}, 331 | "outputs": [], 332 | "source": [ 333 | "##!pip install --user msrestazure" 334 | ] 335 | }, 336 | { 337 | "cell_type": "code", 338 | "execution_count": 25, 339 | "metadata": {}, 340 | "outputs": [ 341 | { 342 | "name": "stdout", 343 | "output_type": "stream", 344 | "text": [ 345 | "\n", 346 | "You are running on a Google Compute Engine virtual machine.\n", 347 | "It is recommended that you use service accounts for authentication.\n", 348 | "\n", 349 | "You can run:\n", 350 | "\n", 351 | " $ gcloud config set account `ACCOUNT`\n", 352 | "\n", 353 | "to switch accounts if necessary.\n", 354 | "\n", 355 | "Your credentials may be visible to others with access to this\n", 356 | "virtual machine. Are you sure you want to authenticate with\n", 357 | "your personal account?\n", 358 | "\n", 359 | "Do you want to continue (Y/n)? ^C\n", 360 | "\n", 361 | "\n", 362 | "Command killed by keyboard interrupt\n", 363 | "\n" 364 | ] 365 | } 366 | ], 367 | "source": [ 368 | "!gcloud auth login --no-launch-browser" 369 | ] 370 | }, 371 | { 372 | "cell_type": "code", 373 | "execution_count": 71, 374 | "metadata": {}, 375 | "outputs": [ 376 | { 377 | "ename": "NameError", 378 | "evalue": "name 'cluster_builder' is not defined", 379 | "output_type": "error", 380 | "traceback": [ 381 | "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", 382 | "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", 383 | "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 12\u001b[0m \u001b[0mpreprocessor\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mpreprocessor\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 13\u001b[0m context_source=cluster.gcs_context.GCSContextSource())\n\u001b[0;32m---> 14\u001b[0;31m \u001b[0mcluster_builder\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbuild\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 15\u001b[0m \u001b[0;31m#fairing.config.set_deployer(name='tfjob', worker_count=1, ps_count=1)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 16\u001b[0m \u001b[0;31m#train_tfjob = fairing.config.fn(mySimpleTrainerWrapper)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 384 | "\u001b[0;31mNameError\u001b[0m: name 'cluster_builder' is not defined" 385 | ] 386 | } 387 | ], 388 | "source": [ 389 | "import os\n", 390 | "from kubeflow import fairing\n", 391 | "from kubeflow.fairing.builders import cluster\n", 392 | "from kubeflow.fairing import cloud\n", 393 | "os.environ[\"TF_JOB_VERSION\"] = \"v1\"\n", 394 | "BASE_IMAGE='tensorflow/tensorflow:2.1.0'\n", 395 | "DOCKER_REGISTRY='gcr.io/{}/simple-notebook-example-tf'.format(gcp_project)\n", 396 | "fairing.config.set_builder(name='cluster', registry=DOCKER_REGISTRY, \n", 397 | " base_image=BASE_IMAGE,\n", 398 | " pod_spec_mutators=[cloud.gcp.add_gcp_credentials_if_exists],\n", 399 | " push=True,\n", 400 | " preprocessor=preprocessor,\n", 401 | " context_source=cluster.gcs_context.GCSContextSource())\n", 402 | "#fairing.config.set_deployer(name='tfjob', worker_count=1, ps_count=1)\n", 403 | "#train_tfjob = fairing.config.fn(mySimpleTrainerWrapper)" 404 | ] 405 | }, 406 | { 407 | "cell_type": "code", 408 | "execution_count": 65, 409 | "metadata": {}, 410 | "outputs": [ 411 | { 412 | "name": "stderr", 413 | "output_type": "stream", 414 | "text": [ 415 | "INFO:root:Using preprocessor: \n", 416 | "INFO:root:Using builder: \n", 417 | "INFO:root:Using deployer: \n", 418 | "INFO:root:Building image using cluster builder.\n", 419 | "WARNING:root:/home/jovyan/.local/lib/python3.6/site-packages/kubeflow/fairing/__init__.py already exists in Fairing context, skipping...\n", 420 | "INFO:root:Creating docker context: /tmp/fairing_context_5d68_gwo\n", 421 | "WARNING:root:/home/jovyan/.local/lib/python3.6/site-packages/kubeflow/fairing/__init__.py already exists in Fairing context, skipping...\n", 422 | "WARNING:kubeflow.fairing.cloud.gcp:Not able to find gcp credentials secret: user-gcp-sa\n", 423 | "WARNING:kubeflow.fairing.cloud.gcp:Trying workload identity service account: default-editor\n", 424 | "WARNING:kubeflow.fairing.kubernetes.manager:Waiting for fairing-builder-sc4kd-g7555 to start...\n", 425 | "WARNING:kubeflow.fairing.kubernetes.manager:Waiting for fairing-builder-sc4kd-g7555 to start...\n", 426 | "WARNING:kubeflow.fairing.kubernetes.manager:Waiting for fairing-builder-sc4kd-g7555 to start...\n", 427 | "INFO:kubeflow.fairing.kubernetes.manager:Pod started running True\n" 428 | ] 429 | }, 430 | { 431 | "name": "stdout", 432 | "output_type": "stream", 433 | "text": [ 434 | "E1108 19:09:38.290112 1 aws_credentials.go:77] while getting AWS credentials NoCredentialProviders: no valid providers in chain. Deprecated.\n", 435 | "\tFor verbose messaging see aws.Config.CredentialsChainVerboseErrors\n", 436 | "\u001b[36mINFO\u001b[0m[0005] Retrieving image manifest tensorflow/tensorflow:2.1.0\n", 437 | "E1108 19:09:38.439988 1 metadata.go:154] while reading 'google-dockercfg' metadata: http status code: 404 while fetching url http://metadata.google.internal./computeMetadata/v1/instance/attributes/google-dockercfg\n", 438 | "E1108 19:09:38.442201 1 metadata.go:166] while reading 'google-dockercfg-url' metadata: http status code: 404 while fetching url http://metadata.google.internal./computeMetadata/v1/instance/attributes/google-dockercfg-url\n", 439 | "\u001b[36mINFO\u001b[0m[0006] Retrieving image manifest tensorflow/tensorflow:2.1.0\n", 440 | "\u001b[36mINFO\u001b[0m[0007] Built cross stage deps: map[]\n", 441 | "\u001b[36mINFO\u001b[0m[0007] Retrieving image manifest tensorflow/tensorflow:2.1.0\n", 442 | "\u001b[36mINFO\u001b[0m[0007] Retrieving image manifest tensorflow/tensorflow:2.1.0\n", 443 | "\u001b[36mINFO\u001b[0m[0008] Executing 0 build triggers\n", 444 | "\u001b[36mINFO\u001b[0m[0008] Checking for cached layer gcr.io/velascoluis-test/simple-notebook-example-tf/fairing-job/cache:15e03feb87f8aa5a1ea325648b4812f1433f9b54efdbb947154010c24f5e1d14...\n", 445 | "\u001b[36mINFO\u001b[0m[0008] No cached layer found for cmd RUN if [ -e requirements.txt ];then pip install --no-cache -r requirements.txt; fi\n", 446 | "\u001b[36mINFO\u001b[0m[0008] Unpacking rootfs as cmd RUN if [ -e requirements.txt ];then pip install --no-cache -r requirements.txt; fi requires it.\n", 447 | "\u001b[36mINFO\u001b[0m[0046] WORKDIR /app/\n", 448 | "\u001b[36mINFO\u001b[0m[0046] cmd: workdir\n", 449 | "\u001b[36mINFO\u001b[0m[0046] Changed working directory to /app/\n", 450 | "\u001b[36mINFO\u001b[0m[0046] Creating directory /app/\n", 451 | "\u001b[36mINFO\u001b[0m[0046] Resolving 1 paths\n", 452 | "\u001b[36mINFO\u001b[0m[0046] Taking snapshot of files...\n", 453 | "\u001b[36mINFO\u001b[0m[0046] ENV FAIRING_RUNTIME 1\n", 454 | "\u001b[36mINFO\u001b[0m[0046] No files changed in this command, skipping snapshotting.\n", 455 | "\u001b[36mINFO\u001b[0m[0046] RUN if [ -e requirements.txt ];then pip install --no-cache -r requirements.txt; fi\n", 456 | "\u001b[36mINFO\u001b[0m[0046] Taking snapshot of full filesystem...\n", 457 | "\u001b[36mINFO\u001b[0m[0059] Resolving 26684 paths\n", 458 | "\u001b[36mINFO\u001b[0m[0064] cmd: /bin/sh\n", 459 | "\u001b[36mINFO\u001b[0m[0064] args: [-c if [ -e requirements.txt ];then pip install --no-cache -r requirements.txt; fi]\n", 460 | "\u001b[36mINFO\u001b[0m[0064] Running: [/bin/sh -c if [ -e requirements.txt ];then pip install --no-cache -r requirements.txt; fi]\n", 461 | "\u001b[36mINFO\u001b[0m[0064] Taking snapshot of full filesystem...\n", 462 | "\u001b[36mINFO\u001b[0m[0064] Resolving 26684 paths\n", 463 | "\u001b[36mINFO\u001b[0m[0067] No files were changed, appending empty layer to config. No layer added to image.\n", 464 | "\u001b[36mINFO\u001b[0m[0067] Pushing layer gcr.io/velascoluis-test/simple-notebook-example-tf/fairing-job/cache:15e03feb87f8aa5a1ea325648b4812f1433f9b54efdbb947154010c24f5e1d14 to cache now\n", 465 | "\u001b[36mINFO\u001b[0m[0067] COPY /app/ /app/\n", 466 | "\u001b[36mINFO\u001b[0m[0067] Resolving 10 paths\n", 467 | "\u001b[36mINFO\u001b[0m[0067] Taking snapshot of files...\n", 468 | "\u001b[36mINFO\u001b[0m[0067] Pushing layer gcr.io/velascoluis-test/simple-notebook-example-tf/fairing-job/cache:b511dab0e6f98ec3fc927ef71efb3d0cab404e8aa301b74214d12ee29b64e858 to cache now\n" 469 | ] 470 | }, 471 | { 472 | "name": "stderr", 473 | "output_type": "stream", 474 | "text": [ 475 | "WARNING:kubeflow.fairing.deployers.job.job:The tfjob fairing-tfjob-d4w9t launched.\n", 476 | "WARNING:kubeflow.fairing.kubernetes.manager:Waiting for fairing-tfjob-d4w9t-worker-0 to start...\n", 477 | "WARNING:kubeflow.fairing.kubernetes.manager:Waiting for fairing-tfjob-d4w9t-worker-0 to start...\n", 478 | "WARNING:kubeflow.fairing.kubernetes.manager:Waiting for fairing-tfjob-d4w9t-worker-0 to start...\n", 479 | "ERROR:kubeflow.fairing.kubernetes.manager:Failed to launch fairing-tfjob-d4w9t-worker-0, reason: Error, message: None\n" 480 | ] 481 | }, 482 | { 483 | "name": "stdout", 484 | "output_type": "stream", 485 | "text": [ 486 | "Traceback (most recent call last):\n", 487 | " File \"/app/function_shim.py\", line 78, in \n", 488 | " compare_version(args.python_version)\n", 489 | " File \"/app/function_shim.py\", line 50, in compare_version\n", 490 | " with Python ' + local_python_version + ' in the local environment.')\n", 491 | "RuntimeError: The Python version 2.7 mismatches with Python 3.6 in the local environment.\n" 492 | ] 493 | } 494 | ], 495 | "source": [ 496 | "train_tfjob()" 497 | ] 498 | }, 499 | { 500 | "cell_type": "code", 501 | "execution_count": null, 502 | "metadata": {}, 503 | "outputs": [], 504 | "source": [] 505 | } 506 | ], 507 | "metadata": { 508 | "kernelspec": { 509 | "display_name": "Python 3", 510 | "language": "python", 511 | "name": "python3" 512 | }, 513 | "language_info": { 514 | "codemirror_mode": { 515 | "name": "ipython", 516 | "version": 3 517 | }, 518 | "file_extension": ".py", 519 | "mimetype": "text/x-python", 520 | "name": "python", 521 | "nbconvert_exporter": "python", 522 | "pygments_lexer": "ipython3", 523 | "version": "3.6.9" 524 | } 525 | }, 526 | "nbformat": 4, 527 | "nbformat_minor": 4 528 | } -------------------------------------------------------------------------------- /03/notebook/my_model/saved_model.pb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datastacktv/kubeflow-introduction/088e32171861cd7a4e5f76b3dcda7c69dc18bed8/03/notebook/my_model/saved_model.pb -------------------------------------------------------------------------------- /03/notebook/my_model/variables/variables.data-00000-of-00001: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datastacktv/kubeflow-introduction/088e32171861cd7a4e5f76b3dcda7c69dc18bed8/03/notebook/my_model/variables/variables.data-00000-of-00001 -------------------------------------------------------------------------------- /03/notebook/my_model/variables/variables.index: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datastacktv/kubeflow-introduction/088e32171861cd7a4e5f76b3dcda7c69dc18bed8/03/notebook/my_model/variables/variables.index -------------------------------------------------------------------------------- /03/pipelines/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datastacktv/kubeflow-introduction/088e32171861cd7a4e5f76b3dcda7c69dc18bed8/03/pipelines/.DS_Store -------------------------------------------------------------------------------- /03/pipelines/build_images.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Script for executing build processes of all components 4 | # In case you get a run permission error uncomment line 11 5 | 6 | for d in */ ; 7 | do 8 | if [ "$d" != "src" ] 9 | then 10 | cd "$d" 11 | chmod +x *.sh 12 | ./*.sh 13 | echo "$d built" 14 | cd .. 15 | fi 16 | done 17 | -------------------------------------------------------------------------------- /03/pipelines/define_compile_model/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datastacktv/kubeflow-introduction/088e32171861cd7a4e5f76b3dcda7c69dc18bed8/03/pipelines/define_compile_model/.DS_Store -------------------------------------------------------------------------------- /03/pipelines/define_compile_model/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM tensorflow/tensorflow:latest-py3 2 | WORKDIR /define_compile_model 3 | COPY . /define_compile_model 4 | 5 | -------------------------------------------------------------------------------- /03/pipelines/define_compile_model/build_image.sh: -------------------------------------------------------------------------------- 1 | gcloud builds submit --tag gcr.io/${PROJECT_ID}/define_compile_model -------------------------------------------------------------------------------- /03/pipelines/define_compile_model/component.yaml: -------------------------------------------------------------------------------- 1 | name: kubeflow simple example 2 | description: Define and compile a keras DNN very simple model 3 | 4 | outputs: 5 | - {name: model_path, type: String, description: DNN model path } 6 | 7 | implementation: 8 | container: 9 | image: gcr.io//define_compile_model:latest 10 | command: [/usr/bin/python3, src/define_compile_model.py] 11 | args: [ 12 | '--model_path', {outputPath: model_path} 13 | ] 14 | -------------------------------------------------------------------------------- /03/pipelines/define_compile_model/src/define_compile_model.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import numpy as np 3 | import json 4 | import os 5 | 6 | def define_compile_model(model_path): 7 | model = tf.keras.Sequential([ 8 | tf.keras.layers.Flatten(input_shape=(28, 28)), 9 | tf.keras.layers.Dense(128, activation='relu'), 10 | tf.keras.layers.Dense(10)]) 11 | model.compile(optimizer='adam', 12 | loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), 13 | metrics=['accuracy']) 14 | if not os.path.exists(os.path.dirname(model_path)): 15 | os.makedirs(os.path.dirname(model_path)) 16 | with open(model_path, 'w') as f: 17 | f.write(model.to_json()) 18 | 19 | def main(params): 20 | define_compile_model(params.model_path) 21 | 22 | if __name__ == "__main__": 23 | parser = argparse.ArgumentParser(description='define_compile_model') 24 | parser.add_argument('--model_path', type=str, default='None') 25 | params = parser.parse_args() 26 | main(params) 27 | 28 | 29 | -------------------------------------------------------------------------------- /03/pipelines/load_preprocess_data/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datastacktv/kubeflow-introduction/088e32171861cd7a4e5f76b3dcda7c69dc18bed8/03/pipelines/load_preprocess_data/.DS_Store -------------------------------------------------------------------------------- /03/pipelines/load_preprocess_data/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM tensorflow/tensorflow:latest-py3 2 | WORKDIR /load_preprocess_data 3 | COPY . /load_preprocess_data 4 | 5 | -------------------------------------------------------------------------------- /03/pipelines/load_preprocess_data/build_image.sh: -------------------------------------------------------------------------------- 1 | gcloud builds submit --tag gcr.io/${PROJECT_ID}/load_preprocess_data -------------------------------------------------------------------------------- /03/pipelines/load_preprocess_data/component.yaml: -------------------------------------------------------------------------------- 1 | name: kubeflow simple example 2 | description: Load and prepares data from keras dataset 3 | 4 | outputs: 5 | - {name: train_images_data_path, type: String, description: Train images } 6 | - {name: train_labels_data_path, type: String, description: Train labels} 7 | - {name: test_images_data_path, type: String, description: Test images} 8 | - {name: test_labels_data_path, type: String, description: Test labels} 9 | 10 | 11 | 12 | implementation: 13 | container: 14 | image: gcr.io//load_preprocess_data:latest 15 | command: [/usr/bin/python3, src/load_preprocess_data.py] 16 | args: [ 17 | '--train_images_data_path', {outputPath: train_images_data_path}, 18 | '--train_labels_data_path', {outputPath: train_labels_data_path}, 19 | '--test_images_data_path', {outputPath: test_images_data_path}, 20 | '--test_labels_data_path', {outputPath: test_labels_data_path} 21 | ] 22 | -------------------------------------------------------------------------------- /03/pipelines/load_preprocess_data/src/load_preprocess.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import numpy as np 3 | import json 4 | import os 5 | 6 | def load_preprocess(train_images_data_path,train_labels_data_path,test_images_data_path,test_labels_data_path): 7 | fashion_mnist = tf.keras.datasets.fashion_mnist 8 | (train_images, train_labels), (test_images, test_labels) = fashion_mnist.load_data() 9 | train_images = train_images / 255.0 10 | test_images = test_images / 255.0 11 | 12 | if not os.path.exists(os.path.dirname(train_images_data_path)): 13 | os.makedirs(os.path.dirname(train_images_data_path)) 14 | if not os.path.exists(os.path.dirname(train_labels_data_path)): 15 | os.makedirs(os.path.dirname(train_labels_data_path)) 16 | if not os.path.exists(os.path.dirname(test_images_data_path)): 17 | os.makedirs(os.path.dirname(test_images_data_path)) 18 | if not os.path.exists(os.path.dirname(test_labels_data_path)): 19 | os.makedirs(os.path.dirname(test_labels_data_path)) 20 | 21 | 22 | with open(train_images_data_path, 'w') as f: 23 | f.write(json.dumps(train_images.tolist())) 24 | 25 | with open(train_labels_data_path, 'w') as f: 26 | f.write(json.dumps(train_labels.tolist())) 27 | 28 | with open(test_images_data_path, 'w') as f: 29 | f.write(json.dumps(test_images.tolist())) 30 | 31 | with open(test_labels_data_path, 'w') as f: 32 | f.write(json.dumps(test_labels.tolist())) 33 | 34 | 35 | def main(params): 36 | load_preprocess(params.train_images_data_path,params.train_labels_data_path,params.test_images_data_path,params.test_labels_data_path) 37 | 38 | 39 | if __name__ == "__main__": 40 | parser = argparse.ArgumentParser(description='load_preprocess') 41 | parser.add_argument('--train_images_data_path', type=str, default='None') 42 | parser.add_argument('--train_labels_data_path', type=str, default='None') 43 | parser.add_argument('--test_images_data_path', type=str, default='None') 44 | parser.add_argument('--test_labels_data_path', type=str, default='None') 45 | params = parser.parse_args() 46 | main(params) -------------------------------------------------------------------------------- /03/pipelines/pipeline_def_execute.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import kfp 3 | import kfp.dsl as dsl 4 | import datetime 5 | import logging 6 | 7 | 8 | # Components load 9 | load_preprocess_data_component = kfp.components.load_component_from_file('load_preprocess_data/component.yaml') 10 | define_compile_model_component = kfp.components.load_component_from_file('define_compile_model/component.yaml') 11 | train_model_component = kfp.components.load_component_from_file('train_model/component.yaml') 12 | 13 | 14 | 15 | # Operations definition 16 | 17 | 18 | def load_preprocess_data_operation(): 19 | return load_preprocess_data_component() 20 | 21 | def define_compile_model_operation(): 22 | return define_compile_model_component() 23 | 24 | 25 | def train_model_operation(model_path, train_images_data_path, train_labels_data_path, test_images_data_path, test_labels_data_path): 26 | return train_model_component(model_path, train_images_data_path, train_labels_data_path, test_images_data_path, test_labels_data_path) 27 | 28 | 29 | 30 | 31 | def main(params): 32 | @dsl.pipeline( 33 | name='kubeflos simple pipeline', 34 | description='Pipeline for training a fashion mnist classifier' 35 | ) 36 | def simple_pipeline(): 37 | now = datetime.datetime.utcnow().strftime("%Y%m%d%H%M%S") 38 | workspace_name = 'simple_pipeline' + now 39 | load_preprocess_data_task = load_preprocess_data_operation() 40 | define_compile_model_task = define_compile_model_operation() 41 | train_model_task = train_model_component(define_compile_model_task.outputs['model_path'], 42 | load_preprocess_data_task.outputs['train_images_data_path'], 43 | load_preprocess_data_task.outputs['train_labels_data_path'], 44 | load_preprocess_data_task.outputs['test_images_data_path'], 45 | load_preprocess_data_task.outputs['test_labels_data_path']) 46 | 47 | # Generate .zip file 48 | pipeline_func = simple_pipeline 49 | pipeline_filename = pipeline_func.__name__ + '.kf_pipeline_containers.zip' 50 | kfp.compiler.Compiler().compile(pipeline_func, pipeline_filename) 51 | 52 | if __name__ == "__main__": 53 | parser = argparse.ArgumentParser(description='Component-based simple build-train pipeline') 54 | params = parser.parse_args() 55 | main(params) -------------------------------------------------------------------------------- /03/pipelines/train_model/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datastacktv/kubeflow-introduction/088e32171861cd7a4e5f76b3dcda7c69dc18bed8/03/pipelines/train_model/.DS_Store -------------------------------------------------------------------------------- /03/pipelines/train_model/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM tensorflow/tensorflow:latest-py3 2 | WORKDIR /train_model 3 | COPY . /train_model 4 | 5 | -------------------------------------------------------------------------------- /03/pipelines/train_model/build_image.sh: -------------------------------------------------------------------------------- 1 | gcloud builds submit --tag gcr.io/${PROJECT_ID}/train_model -------------------------------------------------------------------------------- /03/pipelines/train_model/component.yaml: -------------------------------------------------------------------------------- 1 | name: kubeflow simple example 2 | description: Train DNN model 3 | 4 | 5 | inputs: 6 | - {name: model_path, type: String, default: 'None', description: Load keras model path} 7 | - {name: train_images_data_path, type: String, default: 'None', description: Load x train data path} 8 | - {name: train_labels_data_path, type: String, default: 'None', description: Load x val data path} 9 | - {name: test_images_data_path, type: String, default: 'None', description: Load y train data path} 10 | - {name: test_labels_data_path, type: String, default: 'None', description: Load y train data path} 11 | 12 | 13 | outputs: 14 | - {name: trained_model_path, type: String, description: DNN model path } 15 | 16 | implementation: 17 | container: 18 | image: gcr.io//train_model:latest 19 | command: [/usr/bin/python3, src/train_model.py] 20 | args: [ 21 | '--model_path', {inputPath: model_path}, 22 | '--train_images_data_path', {inputPath: train_images_data_path}, 23 | '--train_labels_data_path', {inputPath: train_labels_data_path}, 24 | '--test_images_data_path', {inputPath: test_images_data_path}, 25 | '--test_labels_data_path', {inputPath: test_labels_data_path}, 26 | '--trained_model_path', {outputPath: trained_model_path} 27 | ] 28 | 29 | 30 | 31 | 32 | -------------------------------------------------------------------------------- /03/pipelines/train_model/src/train_model.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import argparse 3 | import numpy as np 4 | import json 5 | import os 6 | 7 | def train_model(model_path, train_images_data_path, train_labels_data_path, test_images_data_path, test_labels_data_path,trained_model_path): 8 | with open(train_images_data_path) as f: 9 | train_images_data_path_json = json.load(f) 10 | train_images = np.array(train_images_data_path_json) 11 | with open(train_labels_data_path) as f: 12 | train_labels_data_path_json = json.load(f) 13 | train_labels = np.array(train_labels_data_path_json) 14 | with open(test_images_data_path) as f: 15 | test_images_data_path_json = json.load(f) 16 | test_images = np.array(test_images_data_path_json) 17 | with open(test_labels_data_path) as f: 18 | test_labels_data_path_json = json.load(f) 19 | test_labels = np.array(test_labels_data_path_json) 20 | 21 | 22 | model_file = open(model_path, 'r') 23 | model_json = model_file.read() 24 | model_file.close() 25 | model = tf.keras.models.model_from_json(model_json) 26 | 27 | model.fit(train_images, train_labels, epochs=10) 28 | test_loss, test_acc = model.evaluate(test_images, test_labels, verbose=2) 29 | print('accuracy:', test_acc) 30 | 31 | 32 | if not os.path.exists(os.path.dirname(model_output_path)): 33 | os.makedirs(os.path.dirname(model_output_path)) 34 | model.save(model_output_path, save_format='tf') 35 | 36 | 37 | 38 | if not os.path.exists(os.path.dirname(trained_model_path)): 39 | os.makedirs(os.path.dirname(trained_model_path)) 40 | with open(model_path, 'w') as f: 41 | f.write(model.to_json()) 42 | 43 | def main(params): 44 | train_model(params.model_path,params.train_images_data_path,params.train_labels_data_path,params.test_images_data_path,params.test_labels_data_path,params.trained_model_path ) 45 | 46 | if __name__ == "__main__": 47 | parser = argparse.ArgumentParser(description='train_model') 48 | parser.add_argument('--model_path', type=str, default='None') 49 | parser.add_argument('--train_images_data_path', type=str, default='None') 50 | parser.add_argument('--train_labels_data_path', type=str, default='None') 51 | parser.add_argument('--test_images_data_path', type=str, default='None') 52 | parser.add_argument('--test_labels_data_path', type=str, default='None') 53 | parser.add_argument('--trained_model_path', type=str, default='None') 54 | params = parser.parse_args() 55 | main(params) -------------------------------------------------------------------------------- /04/deploy_kfserving.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import datetime 3 | import logging 4 | from kubernetes import client 5 | from kfserving import KFServingClient 6 | from kfserving import constants 7 | from kfserving import V1alpha2EndpointSpec 8 | from kfserving import V1alpha2PredictorSpec 9 | from kfserving import V1alpha2TensorflowSpec 10 | from kfserving import V1alpha2InferenceServiceSpec 11 | from kfserving import V1alpha2InferenceService 12 | from kubernetes.client import V1ResourceRequirements 13 | 14 | 15 | def deploy_model(namespace,trained_model_path): 16 | logging.basicConfig(level=logging.INFO) 17 | logging.info('Starting deploy model step ..') 18 | logging.info('Input data ..') 19 | logging.info('namespace:{}'.format(namespace)) 20 | logging.info('trained_model_path:{}'.format(trained_model_path)) 21 | 22 | logging.info('STEP: DEPLOY MODEL (1/2) Generating definition..') 23 | api_version = constants.KFSERVING_GROUP + '/' + constants.KFSERVING_VERSION 24 | now = datetime.datetime.utcnow().strftime("%Y%m%d%H%M%S") 25 | inference_service_name = 'simple-model'+now 26 | default_endpoint_spec = V1alpha2EndpointSpec( 27 | predictor=V1alpha2PredictorSpec( 28 | tensorflow=V1alpha2TensorflowSpec( 29 | storage_uri=trained_model_path, 30 | resources=V1ResourceRequirements( 31 | requests={'cpu': '100m', 'memory': '1Gi'}, 32 | limits={'cpu': '100m', 'memory': '1Gi'})))) 33 | 34 | isvc = V1alpha2InferenceService(api_version=api_version, 35 | kind=constants.KFSERVING_KIND, 36 | metadata=client.V1ObjectMeta( 37 | name=inference_service_name, 38 | annotations= 39 | { 40 | 'sidecar.istio.io/inject': 'false', 41 | 'autoscaling.knative.dev/target': '1' 42 | }, 43 | namespace=namespace), 44 | spec=V1alpha2InferenceServiceSpec(default=default_endpoint_spec)) 45 | 46 | #velascoluis: sidecar is disabled by https://github.com/knative/serving/issues/6829 47 | #Note: make sure trained model path starts with file:// or gs:// 48 | 49 | KFServing = KFServingClient() 50 | logging.info('STEP: DEPLOY MODEL (2/2) Creating inference service..') 51 | KFServing.create(isvc) 52 | logging.info('Inference service ' + inference_service_name + " created ...") 53 | KFServing.get(inference_service_name, namespace=namespace, watch=True, timeout_seconds=120) 54 | logging.info('Deploy model step finished') 55 | 56 | def main(params): 57 | deploy_model(params.namespace, params.trained_model_path) 58 | 59 | if __name__ == "__main__": 60 | parser = argparse.ArgumentParser(description='Deploy TF model KFServing') 61 | parser.add_argument('--namespace', type=str, default='default') 62 | parser.add_argument('--trained_model_path', type=str, default='default') 63 | params = parser.parse_args() 64 | main(params) 65 | 66 | 67 | -------------------------------------------------------------------------------- /05/metadata_example.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Metadata management in kubeflow" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": null, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "!pip install kubeflow-metadata --user" 17 | ] 18 | }, 19 | { 20 | "cell_type": "code", 21 | "execution_count": 1, 22 | "metadata": {}, 23 | "outputs": [], 24 | "source": [ 25 | "from kubeflow.metadata import metadata\n", 26 | "from datetime import datetime\n", 27 | "from uuid import uuid4" 28 | ] 29 | }, 30 | { 31 | "cell_type": "code", 32 | "execution_count": 2, 33 | "metadata": {}, 34 | "outputs": [], 35 | "source": [ 36 | "METADATA_STORE_HOST = \"metadata-grpc-service.kubeflow\" # default DNS of Kubeflow Metadata gRPC serivce.\n", 37 | "METADATA_STORE_PORT = 8080" 38 | ] 39 | }, 40 | { 41 | "cell_type": "code", 42 | "execution_count": 3, 43 | "metadata": {}, 44 | "outputs": [], 45 | "source": [ 46 | "#Define a workspace\n", 47 | "ws_tf = metadata.Workspace(\n", 48 | " \n", 49 | " store=metadata.Store(grpc_host=METADATA_STORE_HOST, grpc_port=METADATA_STORE_PORT),\n", 50 | " name=\"kubeflow_dnn_tf\",\n", 51 | " description=\"Simple DNN workspace\",\n", 52 | " labels={\"Execution_key_1\": \"value_1\"})" 53 | ] 54 | }, 55 | { 56 | "cell_type": "code", 57 | "execution_count": 4, 58 | "metadata": {}, 59 | "outputs": [], 60 | "source": [ 61 | "#Create a run inside the workspace\n", 62 | "r = metadata.Run(\n", 63 | " workspace=ws_tf,\n", 64 | " name=\"run-\" + datetime.utcnow().isoformat(\"T\") ,\n", 65 | " description=\"A example run\"\n", 66 | ")" 67 | ] 68 | }, 69 | { 70 | "cell_type": "code", 71 | "execution_count": 5, 72 | "metadata": {}, 73 | "outputs": [ 74 | { 75 | "name": "stdout", 76 | "output_type": "stream", 77 | "text": [ 78 | "An execution was created with id 16\n" 79 | ] 80 | } 81 | ], 82 | "source": [ 83 | "#Create an execution\n", 84 | "exec = metadata.Execution(\n", 85 | " name = \"execution\" + datetime.utcnow().isoformat(\"T\") ,\n", 86 | " workspace=ws_tf,\n", 87 | " run=r,\n", 88 | " description=\"DNN Execution example\",\n", 89 | ")\n", 90 | "print(\"An execution was created with id %s\" % exec.id)" 91 | ] 92 | }, 93 | { 94 | "cell_type": "code", 95 | "execution_count": 6, 96 | "metadata": {}, 97 | "outputs": [], 98 | "source": [ 99 | "##Run model ...." 100 | ] 101 | }, 102 | { 103 | "cell_type": "code", 104 | "execution_count": 7, 105 | "metadata": {}, 106 | "outputs": [ 107 | { 108 | "name": "stdout", 109 | "output_type": "stream", 110 | "text": [ 111 | "Data set id is 11 with version 'data_set_version_cecc67e2-b908-457d-a627-7123570ea03f'\n" 112 | ] 113 | } 114 | ], 115 | "source": [ 116 | "#Log information about the input data used\n", 117 | "date_set_version = \"data_set_version_\" + str(uuid4())\n", 118 | "data_set = exec.log_input(\n", 119 | " metadata.DataSet(\n", 120 | " description=\"Sample dateset - fashion mnist\",\n", 121 | " name=\"data-exraction\",\n", 122 | " owner=\"luis@luis.com\",\n", 123 | " uri=\"gs://...\",\n", 124 | " version=date_set_version,\n", 125 | " query=\"SELECT * FROM table ...\"))\n", 126 | "print(\"Data set id is {0.id} with version '{0.version}'\".format(data_set))" 127 | ] 128 | }, 129 | { 130 | "cell_type": "code", 131 | "execution_count": 8, 132 | "metadata": {}, 133 | "outputs": [ 134 | { 135 | "name": "stdout", 136 | "output_type": "stream", 137 | "text": [ 138 | "kubeflow.metadata.metadata.Model(workspace=None, name='MNIST Fashion', description='model to recognize classify fashion', owner='luis@luis.com', uri='gs://...', version='model_version_944e639b-db1f-45a5-b13b-c9ec282e7451', model_type='neural network', training_framework={'name': 'tensorflow', 'version': 'v1.0'}, hyperparameters={'layers': [10, 3, 1], 'early_stop': True}, labels={'mylabel': 'l1'}, id=12, create_time='2020-12-11T18:56:39.711894Z', kwargs={})\n", 139 | "\n", 140 | "Model id is 12 and version is model_version_944e639b-db1f-45a5-b13b-c9ec282e7451\n" 141 | ] 142 | } 143 | ], 144 | "source": [ 145 | "#Log information about a trained model\n", 146 | "model_version = \"model_version_\" + str(uuid4())\n", 147 | "model = exec.log_output(\n", 148 | " metadata.Model(\n", 149 | " name=\"MNIST Fashion\",\n", 150 | " description=\"model to recognize classify fashion\",\n", 151 | " owner=\"luis@luis.com\",\n", 152 | " uri=\"gs://...\",\n", 153 | " model_type=\"neural network\",\n", 154 | " training_framework={\n", 155 | " \"name\": \"tensorflow\",\n", 156 | " \"version\": \"v1.0\"\n", 157 | " },\n", 158 | " hyperparameters={\n", 159 | " \n", 160 | " \"layers\": [10, 3, 1],\n", 161 | " \"early_stop\": True\n", 162 | " },\n", 163 | " version=model_version,\n", 164 | " labels={\"mylabel\": \"l1\"}))\n", 165 | "print(model)\n", 166 | "print(\"\\nModel id is {0.id} and version is {0.version}\".format(model))" 167 | ] 168 | }, 169 | { 170 | "cell_type": "code", 171 | "execution_count": 9, 172 | "metadata": {}, 173 | "outputs": [ 174 | { 175 | "name": "stdout", 176 | "output_type": "stream", 177 | "text": [ 178 | "Metrics id is 13\n" 179 | ] 180 | } 181 | ], 182 | "source": [ 183 | "#Log metrics information about the model\n", 184 | "metrics = exec.log_output(\n", 185 | " metadata.Metrics(\n", 186 | " name=\"Fashion MNIST-evaluation\",\n", 187 | " description=\"validating the Fashion MNIST model to recognize fashion clothes\",\n", 188 | " owner=\"luis@luis.com\",\n", 189 | " uri=\"gs://...\",\n", 190 | " data_set_id=str(data_set.id),\n", 191 | " model_id=str(model.id),\n", 192 | " metrics_type=metadata.Metrics.VALIDATION,\n", 193 | " values={\"accuracy\": 0.95},\n", 194 | " labels={\"mylabel\": \"l1\"}))\n", 195 | "print(\"Metrics id is %s\" % metrics.id)" 196 | ] 197 | }, 198 | { 199 | "cell_type": "code", 200 | "execution_count": null, 201 | "metadata": {}, 202 | "outputs": [], 203 | "source": [] 204 | } 205 | ], 206 | "metadata": { 207 | "kernelspec": { 208 | "display_name": "Python 3", 209 | "language": "python", 210 | "name": "python3" 211 | }, 212 | "language_info": { 213 | "codemirror_mode": { 214 | "name": "ipython", 215 | "version": 3 216 | }, 217 | "file_extension": ".py", 218 | "mimetype": "text/x-python", 219 | "name": "python", 220 | "nbconvert_exporter": "python", 221 | "pygments_lexer": "ipython3", 222 | "version": "3.6.9" 223 | } 224 | }, 225 | "nbformat": 4, 226 | "nbformat_minor": 4 227 | } 228 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 datastack.tv 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Introduction to kubeflow 2 | 3 | This repository contains code examples for the Introduction to [kubeflow](https://www.kubeflow.org) course on [datastack](https://datastack.tv/) 4 | 5 | | **Course** | Kubeflow component | 6 | | ------------- |:-------------:| 7 | | 01 | - | 8 | | 02 | Deployment on GCP | 9 | | 03 | Jupyter notebooks + TFJob Operator katib + fairing + pipelines | 10 | | 04 | KFServing | 11 | | 05 | metadata | 12 | --------------------------------------------------------------------------------