├── LICENSE ├── README.md ├── cloud_run └── twilio_vision │ ├── Dockerfile │ ├── README.md │ └── src │ ├── requirements.txt │ └── whats_that.py ├── datalab └── facets │ ├── README.md │ └── facets_snippets.ipynb └── ml ├── README.md ├── automl └── tables │ ├── kfp_e2e │ ├── README.md │ ├── create_dataset_for_tables │ │ ├── tables_component.py │ │ └── tables_component.yaml │ ├── create_model_for_tables │ │ ├── tables_component.py │ │ ├── tables_component.yaml │ │ ├── tables_eval_component.py │ │ ├── tables_eval_component.yaml │ │ ├── tables_eval_metrics_component.py │ │ └── tables_eval_metrics_component.yaml │ ├── deploy_model_for_tables │ │ ├── convert_oss.py │ │ ├── exported_model_deploy.py │ │ ├── instances.json │ │ ├── model_serve_template.yaml │ │ ├── tables_deploy_component.py │ │ └── tables_deploy_component.yaml │ ├── import_data_from_bigquery │ │ ├── tables_component.py │ │ ├── tables_component.yaml │ │ ├── tables_schema_component.py │ │ └── tables_schema_component.yaml │ ├── tables_containers │ │ └── model-service-launcher │ │ │ ├── Dockerfile │ │ │ └── build.sh │ ├── tables_pipeline_caip.py │ ├── tables_pipeline_caip.py.tar.gz │ ├── tables_pipeline_kf.py │ └── tables_pipeline_kf.py.tar.gz │ ├── model_export │ ├── Dockerfile.template │ ├── automl_tables_model_export_cloud_run.md │ ├── convert_oss.py │ └── instances.json │ └── xai │ ├── README.md │ ├── automl_tables_xai.ipynb │ └── bigquery_examples.md ├── census_train_and_eval ├── README.md ├── config_custom_gpus.yaml ├── hptuning_config.yaml ├── test.json ├── trainer │ ├── __init__.py │ ├── model.py │ └── task.py └── using_tf.estimator.train_and_evaluate.ipynb ├── kubeflow-pipelines ├── README.md ├── README_github_summ.md ├── README_taxidata_examples.md ├── components │ ├── README.md │ ├── automl │ │ ├── container │ │ │ ├── Dockerfile │ │ │ └── build.sh │ │ └── dataset_train │ │ │ └── dataset_model.py │ ├── cmle │ │ ├── containers │ │ │ ├── base │ │ │ │ ├── Dockerfile │ │ │ │ └── build.sh │ │ │ └── cmle_deploy │ │ │ │ ├── Dockerfile │ │ │ │ └── build.sh │ │ └── deploy │ │ │ └── deploy_model.py │ └── older │ │ ├── dataflow │ │ ├── containers │ │ │ ├── base │ │ │ │ ├── Dockerfile │ │ │ │ └── build.sh │ │ │ ├── tfma │ │ │ │ ├── Dockerfile │ │ │ │ └── build.sh │ │ │ └── tft │ │ │ │ ├── Dockerfile │ │ │ │ └── build.sh │ │ ├── taxi_schema │ │ │ └── taxi_schema │ │ │ │ ├── __init__.py │ │ │ │ └── taxi_schema.py │ │ ├── tfma │ │ │ ├── analysis │ │ │ │ └── setup.py │ │ │ ├── model_analysis-taxi.py │ │ │ └── tfma_expers.ipynb │ │ └── tft │ │ │ ├── mcsv_coder.py │ │ │ ├── preprocessing.py │ │ │ ├── preprocessing2.py │ │ │ ├── schema.pbtxt │ │ │ ├── taxi_preprocess_bq.py │ │ │ └── transform │ │ │ └── setup.py │ │ ├── kubeflow │ │ ├── containers │ │ │ ├── launcher │ │ │ │ ├── Dockerfile │ │ │ │ └── build.sh │ │ │ ├── tf-serving-gh │ │ │ │ ├── Dockerfile │ │ │ │ └── build.sh │ │ │ ├── tf-serving │ │ │ │ ├── Dockerfile │ │ │ │ └── build.sh │ │ │ └── trainer │ │ │ │ ├── Dockerfile │ │ │ │ └── build.sh │ │ ├── launcher │ │ │ ├── train.py │ │ │ └── train.template.yaml │ │ ├── taxi_model │ │ │ ├── __init__.py │ │ │ ├── data │ │ │ │ ├── eval │ │ │ │ │ └── data.csv │ │ │ │ └── train │ │ │ │ │ └── data.csv │ │ │ ├── schema.pbtxt │ │ │ ├── setup.py │ │ │ └── trainer │ │ │ │ ├── __init__.py │ │ │ │ ├── model.py │ │ │ │ ├── task.py │ │ │ │ └── taxi.py │ │ ├── tf-serving-gh │ │ │ ├── deploy-tf-serve.py │ │ │ └── tf-serve-template.yaml │ │ └── tf-serving │ │ │ ├── chicago_taxi_client.py │ │ │ ├── deploy-tf-serve.py │ │ │ ├── schema.pbtxt │ │ │ └── tf-serve-template.yaml │ │ └── t2t │ │ ├── containers │ │ ├── base │ │ │ ├── Dockerfile │ │ │ └── build.sh │ │ ├── t2t_app │ │ │ ├── Dockerfile │ │ │ └── build.sh │ │ ├── t2t_proc │ │ │ ├── Dockerfile │ │ │ └── build.sh │ │ ├── t2t_train │ │ │ ├── Dockerfile │ │ │ └── build.sh │ │ └── webapp-launcher │ │ │ ├── Dockerfile │ │ │ └── build.sh │ │ ├── t2t-app │ │ └── app │ │ │ ├── ghsumm │ │ │ ├── __init__.py │ │ │ ├── setup.py │ │ │ └── trainer │ │ │ │ ├── __init__.py │ │ │ │ └── problem.py │ │ │ ├── github_issues_sample.csv │ │ │ ├── main.py │ │ │ └── templates │ │ │ └── index.html │ │ ├── t2t-proc │ │ └── ghsumm │ │ │ ├── __init__.py │ │ │ ├── setup.py │ │ │ └── trainer │ │ │ ├── __init__.py │ │ │ └── problem.py │ │ ├── t2t-train │ │ ├── ghsumm │ │ │ ├── __init__.py │ │ │ ├── setup.py │ │ │ └── trainer │ │ │ │ ├── __init__.py │ │ │ │ └── problem.py │ │ └── train_model.py │ │ └── webapp-launcher │ │ ├── deploy-webapp.py │ │ └── t2tapp-template.yaml ├── keras_tuner │ ├── README.md │ ├── components │ │ ├── eval_metrics_component.yaml │ │ ├── kubeflow-resources │ │ │ ├── bikesw_training │ │ │ │ ├── bikes_weather_limited.py │ │ │ │ ├── bw_hptune_standalone.py │ │ │ │ ├── bwmodel │ │ │ │ │ ├── __init__.py │ │ │ │ │ └── model.py │ │ │ │ ├── deploy_tuner.py │ │ │ │ ├── eval_metrics.py │ │ │ │ ├── kchief_deployment_templ.yaml │ │ │ │ └── ktuners_deployment_templ.yaml │ │ │ ├── cloudbuild.yaml │ │ │ ├── containers │ │ │ │ ├── bikesw_training │ │ │ │ │ ├── Dockerfile │ │ │ │ │ ├── build.sh │ │ │ │ │ ├── cloudbuild.yaml │ │ │ │ │ └── copydir.sh │ │ │ │ ├── bikesw_training_hptune │ │ │ │ │ ├── Dockerfile │ │ │ │ │ ├── build.sh │ │ │ │ │ ├── cloudbuild.yaml │ │ │ │ │ └── copydir.sh │ │ │ │ ├── deploy_jobs │ │ │ │ │ ├── Dockerfile │ │ │ │ │ ├── build.sh │ │ │ │ │ ├── cloudbuild.yaml │ │ │ │ │ └── copydir.sh │ │ │ │ └── tf-serving │ │ │ │ │ ├── Dockerfile │ │ │ │ │ ├── build.sh │ │ │ │ │ ├── cloudbuild.yaml │ │ │ │ │ └── copydir.sh │ │ │ └── tf-serving │ │ │ │ ├── deploy-tfserve.py │ │ │ │ └── tf-serve-template.yaml │ │ ├── serve_component.yaml │ │ ├── tfdv │ │ │ ├── Dockerfile │ │ │ ├── requirements.txt │ │ │ ├── tfdv.py │ │ │ └── tfdv_compare.py │ │ ├── tfdv_component.yaml │ │ ├── tfdv_drift_component.yaml │ │ └── train_component.yaml │ ├── example_pipelines │ │ ├── bw_ktune.py │ │ ├── bw_ktune.py.tar.gz │ │ ├── bw_ktune_metrics.py │ │ ├── bw_tfdv.py │ │ ├── bw_train.py │ │ └── bw_train_metrics.py │ └── notebooks │ │ └── metrics_eval_component.ipynb ├── samples │ ├── automl │ │ ├── README.md │ │ ├── dataset_and_train.py │ │ └── dataset_and_train.py.tar.gz │ └── kubeflow-tf │ │ ├── README.md │ │ └── older │ │ ├── README.md │ │ ├── gh_summ.py │ │ ├── gh_summ.py.tar.gz │ │ ├── gh_summ_serve.py │ │ ├── gh_summ_serve.py.tar.gz │ │ ├── pipelines-kubecon.ipynb │ │ ├── workflow1.py │ │ └── workflow2.py └── sbtb │ ├── README.md │ ├── components │ ├── kubeflow-resources │ │ ├── bikesw_training │ │ │ └── bikes_weather.py │ │ ├── containers │ │ │ ├── bikesw_training │ │ │ │ ├── Dockerfile │ │ │ │ └── build.sh │ │ │ └── tf-serving │ │ │ │ ├── Dockerfile │ │ │ │ └── build.sh │ │ └── tf-serving │ │ │ ├── deploy-tfserve.py │ │ │ └── tf-serve-template.yaml │ ├── serve_component.yaml │ └── train_component.yaml │ └── example_pipelines │ └── bw.py ├── notebook_examples ├── TF_linear_regressor.ipynb ├── caipp │ ├── caipp_connect.ipynb │ └── kfp_in_a_notebook.ipynb ├── functions │ ├── hosted_kfp_gcf.ipynb │ ├── main.py │ └── requirements.txt ├── hosted_kfp │ └── event_triggered_kfp_pipeline_bw.ipynb ├── keras_linear_regressor.ipynb └── mnist_estimator.ipynb └── vertex_pipelines └── pytorch └── cifar ├── Dockerfile ├── Dockerfile-gpu ├── Dockerfile-gpu-ct ├── LICENSE ├── README.md ├── input.json ├── pytorch-pipeline ├── .gitignore ├── README.md ├── cifar10_datamodule.py ├── cifar10_pytorch.py ├── cifar10_train.py ├── process_test.py ├── pytorch_pipeline │ ├── Dockerfile │ ├── __init__.py │ ├── components │ │ ├── base │ │ │ ├── __init__.py │ │ │ ├── base_component.py │ │ │ └── base_executor.py │ │ └── trainer │ │ │ ├── __init__.py │ │ │ ├── component.py │ │ │ ├── executor.py │ │ │ └── generic_executor.py │ └── examples │ │ ├── __init__.py │ │ └── cifar10 │ │ ├── cifar10_datamodule.py │ │ ├── cifar10_pre_process.py │ │ ├── cifar10_pytorch.py │ │ ├── cifar10_train.py │ │ ├── input.json │ │ └── utils.py └── training_task.py ├── pytorch_cifar10_vertex_pipelines.ipynb ├── requirements.txt └── screenshots ├── pt-profiler.png └── vertex-tensorboard.png /README.md: -------------------------------------------------------------------------------- 1 | 2 | This is a repo for small Google Cloud Platform (GCP) snippets and examples used in blog posts etc. 3 | 4 | Contributions are not currently accepted. This is not an official Google product. -------------------------------------------------------------------------------- /cloud_run/twilio_vision/Dockerfile: -------------------------------------------------------------------------------- 1 | # Copyright 2019 Google Inc. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | # For more information about this base image and dockerfile, see 16 | # https://github.com/GoogleCloudPlatform/python-docker 17 | 18 | FROM python:3.7 19 | 20 | ENV APP_HOME /app 21 | WORKDIR $APP_HOME 22 | # COPY . 23 | 24 | ADD src /app 25 | RUN pip install -r /app/requirements.txt 26 | 27 | CMD gunicorn -w 4 -b :$PORT whats_that:app 28 | -------------------------------------------------------------------------------- /cloud_run/twilio_vision/src/requirements.txt: -------------------------------------------------------------------------------- 1 | google-api-python-client 2 | oauth2client 3 | gunicorn 4 | flask 5 | twilio 6 | requests 7 | -------------------------------------------------------------------------------- /datalab/facets/README.md: -------------------------------------------------------------------------------- 1 | 2 | [To be added.] -------------------------------------------------------------------------------- /datalab/facets/facets_snippets.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "```\n", 8 | "Copyright 2017 Google Inc. All rights reserved.\n", 9 | "Licensed under the Apache License, Version 2.0 (the \"License\");\n", 10 | "you may not use this file except in compliance with the License.\n", 11 | "You may obtain a copy of the License at\n", 12 | " http://www.apache.org/licenses/LICENSE-2.0\n", 13 | "Unless required by applicable law or agreed to in writing, software\n", 14 | "distributed under the License is distributed on an \"AS IS\" BASIS,\n", 15 | "WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", 16 | "See the License for the specific language governing permissions and\n", 17 | "limitations under the License.\n", 18 | "```" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": null, 24 | "metadata": { 25 | "collapsed": false 26 | }, 27 | "outputs": [], 28 | "source": [ 29 | "import google.datalab.bigquery as bq\n", 30 | "import pandas as pd" 31 | ] 32 | }, 33 | { 34 | "cell_type": "code", 35 | "execution_count": null, 36 | "metadata": { 37 | "collapsed": true 38 | }, 39 | "outputs": [], 40 | "source": [ 41 | "%%bq query -n requests\n", 42 | "SELECT *\n", 43 | "FROM `bigquery-public-data.nhtsa_traffic_fatalities.accident_2015` \n", 44 | "LIMIT 10000" 45 | ] 46 | }, 47 | { 48 | "cell_type": "code", 49 | "execution_count": null, 50 | "metadata": { 51 | "collapsed": false 52 | }, 53 | "outputs": [], 54 | "source": [ 55 | "df = requests.execute(output_options=bq.QueryOutput.dataframe()).result()" 56 | ] 57 | }, 58 | { 59 | "cell_type": "code", 60 | "execution_count": null, 61 | "metadata": { 62 | "collapsed": false 63 | }, 64 | "outputs": [], 65 | "source": [ 66 | "from google.datalab.ml import FacetsOverview\n", 67 | "\n", 68 | "FacetsOverview().plot({'data': df})" 69 | ] 70 | }, 71 | { 72 | "cell_type": "markdown", 73 | "metadata": {}, 74 | "source": [ 75 | "...." 76 | ] 77 | }, 78 | { 79 | "cell_type": "code", 80 | "execution_count": null, 81 | "metadata": { 82 | "collapsed": false 83 | }, 84 | "outputs": [], 85 | "source": [ 86 | "from google.datalab.ml import FacetsDiveview\n", 87 | "\n", 88 | "FacetsDiveview().plot(df)" 89 | ] 90 | }, 91 | { 92 | "cell_type": "code", 93 | "execution_count": null, 94 | "metadata": { 95 | "collapsed": true 96 | }, 97 | "outputs": [], 98 | "source": [] 99 | } 100 | ], 101 | "metadata": { 102 | "kernelspec": { 103 | "display_name": "Python 2", 104 | "language": "python", 105 | "name": "python2" 106 | }, 107 | "language_info": { 108 | "codemirror_mode": { 109 | "name": "ipython", 110 | "version": 2 111 | }, 112 | "file_extension": ".py", 113 | "mimetype": "text/x-python", 114 | "name": "python", 115 | "nbconvert_exporter": "python", 116 | "pygments_lexer": "ipython2", 117 | "version": "2.7.12" 118 | } 119 | }, 120 | "nbformat": 4, 121 | "nbformat_minor": 2 122 | } 123 | -------------------------------------------------------------------------------- /ml/README.md: -------------------------------------------------------------------------------- 1 | 2 | This directory contains various ML-related examples. 3 | 4 | (The Cloud Shell tutorials have moved [here](https://github.com/GoogleCloudPlatform/cloud-shell-tutorials/tree/master/ml)). 5 | -------------------------------------------------------------------------------- /ml/automl/tables/kfp_e2e/create_dataset_for_tables/tables_component.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from typing import NamedTuple 16 | 17 | 18 | def automl_create_dataset_for_tables( 19 | gcp_project_id: str, 20 | gcp_region: str, 21 | dataset_display_name: str, 22 | api_endpoint: str = None, 23 | tables_dataset_metadata: dict = {}, 24 | ) -> NamedTuple('Outputs', [('dataset_path', str), ('create_time', str), ('dataset_id', str)]): 25 | 26 | import sys 27 | import subprocess 28 | subprocess.run([sys.executable, '-m', 'pip', 'install', 'googleapis-common-protos==1.6.0', 29 | '--no-warn-script-location'], 30 | env={'PIP_DISABLE_PIP_VERSION_CHECK': '1'}, check=True) 31 | subprocess.run([sys.executable, '-m', 'pip', 'install', 'google-cloud-automl==0.9.0', 32 | '--quiet', '--no-warn-script-location'], 33 | env={'PIP_DISABLE_PIP_VERSION_CHECK': '1'}, check=True) 34 | 35 | import google 36 | import logging 37 | from google.api_core.client_options import ClientOptions 38 | from google.cloud import automl_v1beta1 as automl 39 | 40 | logging.getLogger().setLevel(logging.INFO) # TODO: make level configurable 41 | # TODO: we could instead check for region 'eu' and use 'eu-automl.googleapis.com:443'endpoint 42 | # in that case, instead of requiring endpoint to be specified. 43 | if api_endpoint: 44 | client_options = ClientOptions(api_endpoint=api_endpoint) 45 | client = automl.TablesClient(project=gcp_project_id, region=gcp_region, 46 | client_options=client_options) 47 | else: 48 | client = automl.TablesClient(project=gcp_project_id, region=gcp_region) 49 | 50 | try: 51 | # Create a dataset with the given display name 52 | dataset = client.create_dataset(dataset_display_name, metadata=tables_dataset_metadata) 53 | # Log info about the created dataset 54 | logging.info("Dataset name: {}".format(dataset.name)) 55 | logging.info("Dataset id: {}".format(dataset.name.split("/")[-1])) 56 | logging.info("Dataset display name: {}".format(dataset.display_name)) 57 | logging.info("Dataset metadata:") 58 | logging.info("\t{}".format(dataset.tables_dataset_metadata)) 59 | logging.info("Dataset example count: {}".format(dataset.example_count)) 60 | logging.info("Dataset create time:") 61 | logging.info("\tseconds: {}".format(dataset.create_time.seconds)) 62 | logging.info("\tnanos: {}".format(dataset.create_time.nanos)) 63 | print(str(dataset)) 64 | dataset_id = dataset.name.rsplit('/', 1)[-1] 65 | return (dataset.name, str(dataset.create_time), dataset_id) 66 | except google.api_core.exceptions.GoogleAPICallError as e: 67 | logging.warning(e) 68 | raise e 69 | 70 | 71 | if __name__ == '__main__': 72 | import kfp 73 | kfp.components.func_to_container_op(automl_create_dataset_for_tables, 74 | output_component_file='tables_component.yaml', base_image='python:3.7') 75 | -------------------------------------------------------------------------------- /ml/automl/tables/kfp_e2e/deploy_model_for_tables/convert_oss.py: -------------------------------------------------------------------------------- 1 | # Copyright 2019 Google Inc. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | # tested with TF1.14 16 | import sys 17 | import tensorflow as tf 18 | 19 | from absl import app 20 | from absl import flags 21 | from tensorflow.core.protobuf import saved_model_pb2 22 | from tensorflow.python.summary import summary 23 | 24 | FLAGS = flags.FLAGS 25 | 26 | flags.DEFINE_string('saved_model', '', 'The location of the saved_model.pb to visualize.') 27 | flags.DEFINE_string('output_dir', '', 28 | 'The location for the Tensorboard log to begin visualization from.') 29 | 30 | def import_to_tensorboard(saved_model, output_dir): 31 | """View an imported saved_model.pb as a graph in Tensorboard. 32 | 33 | Args: 34 | saved_model: The location of the saved_model.pb to visualize. 35 | output_dir: The location for the Tensorboard log to begin visualization from. 36 | 37 | Usage: 38 | Call this function with your model location and desired log directory. 39 | Launch Tensorboard by pointing it to the log directory. 40 | View your imported `.pb` model as a graph. 41 | """ 42 | with open(saved_model, "rb") as f: 43 | sm = saved_model_pb2.SavedModel() 44 | sm.ParseFromString(f.read()) 45 | if 1 != len(sm.meta_graphs): 46 | print('More than one graph found. Not sure which to write') 47 | sys.exit(1) 48 | graph_def = sm.meta_graphs[0].graph_def 49 | 50 | pb_visual_writer = summary.FileWriter(output_dir) 51 | pb_visual_writer.add_graph(None, graph_def=graph_def) 52 | print("Model Imported. Visualize by running: " 53 | "tensorboard --logdir={}".format(output_dir)) 54 | 55 | 56 | def main(argv): 57 | import_to_tensorboard(FLAGS.saved_model, FLAGS.output_dir) 58 | 59 | 60 | if __name__ == '__main__': 61 | app.run(main) 62 | -------------------------------------------------------------------------------- /ml/automl/tables/kfp_e2e/deploy_model_for_tables/exported_model_deploy.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Google Inc. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # https://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import argparse 16 | import os 17 | import logging 18 | import subprocess 19 | 20 | 21 | def main(): 22 | parser = argparse.ArgumentParser(description='Serving webapp') 23 | parser.add_argument( 24 | '--model_name', 25 | required=True) 26 | parser.add_argument( 27 | '--image_name', 28 | required=True) 29 | parser.add_argument( 30 | '--namespace', 31 | default='default') 32 | args = parser.parse_args() 33 | 34 | NAMESPACE = 'default' 35 | logging.getLogger().setLevel(logging.INFO) 36 | logging.info('Generating training template.') 37 | 38 | template_file = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'model_serve_template.yaml') 39 | target_file = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'model_serve.yaml') 40 | mname = args.model_name.replace('_', '-') 41 | logging.info("using model name: {}, image {}, and namespace: {}".format( 42 | mname, args.image_name, NAMESPACE)) 43 | 44 | with open(template_file, 'r') as f: 45 | with open(target_file, "w") as target: 46 | data = f.read() 47 | changed = data.replace('MODEL_NAME', mname).replace( 48 | 'IMAGE_NAME', args.image_name).replace('NAMESPACE', NAMESPACE) 49 | target.write(changed) 50 | 51 | logging.info('deploying...') 52 | subprocess.call(['kubectl', 'create', '-f', '/ml/model_serve.yaml']) 53 | 54 | # kubectl -n default port-forward svc/ 8080:80 55 | # curl -X POST --data @./instances.json http://localhost:8080/predict 56 | 57 | if __name__ == "__main__": 58 | main() 59 | -------------------------------------------------------------------------------- /ml/automl/tables/kfp_e2e/deploy_model_for_tables/instances.json: -------------------------------------------------------------------------------- 1 | { 2 | "instances": [ 3 | { 4 | "bike_id": "6179", 5 | "day_of_week": "6", 6 | "end_latitude": 51.50379168, 7 | "end_longitude": -0.11282408, 8 | "end_station_id": "154", 9 | "euclidean": 2513.254047872678, 10 | "loc_cross": "POINT(-0.08 51.52)POINT(-0.11 51.5)", 11 | "max": 56.8, 12 | "min": 50.9, 13 | "prcp": 0, 14 | "ts": 1445624280, 15 | "start_latitude": 51.51615461, 16 | "start_longitude": -0.082422399, 17 | "start_station_id": "217", 18 | "temp": 54, 19 | "dewp": 44 20 | }, 21 | { 22 | "bike_id": "5373", 23 | "day_of_week": "3", 24 | "end_latitude": 51.52059681, 25 | "end_longitude": -0.116688468, 26 | "end_station_id": "68", 27 | "euclidean": 1181.215448450556, 28 | "loc_cross": "POINT(-0.13 51.53)POINT(-0.12 51.52)", 29 | "max": 56.7, 30 | "min": 45.9, 31 | "prcp": 0, 32 | "ts": 1494317220, 33 | "start_latitude": 51.52683806, 34 | "start_longitude": -0.130504336, 35 | "start_station_id": "214", 36 | "temp": 50.5, 37 | "dewp": 37.1 38 | }, 39 | { 40 | "bike_id": "5373", 41 | "day_of_week": "3", 42 | "end_latitude": 51.52059681, 43 | "end_longitude": -0.116688468, 44 | "end_station_id": "68", 45 | "euclidean": 3589.5146210024977, 46 | "loc_cross": "POINT(-0.07 51.52)POINT(-0.12 51.52)", 47 | "max": 44.6, 48 | "min": 34.0, 49 | "prcp": 0, 50 | "ts": 1480407420, 51 | "start_latitude": 51.52388, 52 | "start_longitude": -0.065076, 53 | "start_station_id": "445", 54 | "temp": 38.2, 55 | "dewp": 28.6 56 | } 57 | ] 58 | } 59 | -------------------------------------------------------------------------------- /ml/automl/tables/kfp_e2e/deploy_model_for_tables/model_serve_template.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: v1 3 | kind: Service 4 | metadata: 5 | labels: 6 | app: MODEL_NAME 7 | name: MODEL_NAME 8 | namespace: NAMESPACE 9 | spec: 10 | ports: 11 | - name: model-serving 12 | port: 80 13 | targetPort: "http-server" 14 | selector: 15 | app: MODEL_NAME 16 | type: ClusterIP 17 | --- 18 | apiVersion: extensions/v1beta1 19 | kind: Deployment 20 | metadata: 21 | labels: 22 | app: MODEL_NAME 23 | name: MODEL_NAME-dep 24 | namespace: NAMESPACE 25 | spec: 26 | replicas: 2 27 | template: 28 | metadata: 29 | labels: 30 | app: MODEL_NAME 31 | version: v1 32 | spec: 33 | containers: 34 | - name: MODEL_NAME 35 | image: IMAGE_NAME 36 | imagePullPolicy: Always 37 | livenessProbe: 38 | initialDelaySeconds: 30 39 | periodSeconds: 30 40 | tcpSocket: 41 | port: 8080 42 | ports: 43 | - name: http-server 44 | containerPort: 8080 45 | resources: 46 | limits: 47 | cpu: "4" 48 | memory: 4Gi 49 | requests: 50 | cpu: "1" 51 | memory: 1Gi 52 | -------------------------------------------------------------------------------- /ml/automl/tables/kfp_e2e/deploy_model_for_tables/tables_deploy_component.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from typing import NamedTuple 16 | 17 | def automl_deploy_tables_model( 18 | gcp_project_id: str, 19 | gcp_region: str, 20 | model_display_name: str, 21 | api_endpoint: str = None, 22 | ) -> NamedTuple('Outputs', [('model_display_name', str), ('status', str)]): 23 | import subprocess 24 | import sys 25 | subprocess.run([sys.executable, '-m', 'pip', 'install', 'googleapis-common-protos==1.6.0', '--no-warn-script-location'], 26 | env={'PIP_DISABLE_PIP_VERSION_CHECK': '1'}, check=True) 27 | subprocess.run([sys.executable, '-m', 'pip', 'install', 'google-cloud-automl==0.9.0', '--quiet', '--no-warn-script-location'], 28 | env={'PIP_DISABLE_PIP_VERSION_CHECK': '1'}, check=True) 29 | 30 | import google 31 | import logging 32 | from google.api_core.client_options import ClientOptions 33 | from google.api_core import exceptions 34 | from google.cloud import automl_v1beta1 as automl 35 | from google.cloud.automl_v1beta1 import enums 36 | 37 | logging.getLogger().setLevel(logging.INFO) # TODO: make level configurable 38 | # TODO: we could instead check for region 'eu' and use 'eu-automl.googleapis.com:443'endpoint 39 | # in that case, instead of requiring endpoint to be specified. 40 | if api_endpoint: 41 | client_options = ClientOptions(api_endpoint=api_endpoint) 42 | client = automl.TablesClient(project=gcp_project_id, region=gcp_region, 43 | client_options=client_options) 44 | else: 45 | client = automl.TablesClient(project=gcp_project_id, region=gcp_region) 46 | 47 | try: 48 | model = client.get_model(model_display_name=model_display_name) 49 | if model.deployment_state == enums.Model.DeploymentState.DEPLOYED: 50 | status = 'deployed' 51 | logging.info('Model {} already deployed'.format(model_display_name)) 52 | else: 53 | logging.info('Deploying model {}'.format(model_display_name)) 54 | response = client.deploy_model(model_display_name=model_display_name) 55 | # synchronous wait 56 | logging.info("Model deployed. {}".format(response.result())) 57 | status = 'deployed' 58 | except exceptions.NotFound as e: 59 | logging.warning(e) 60 | status = 'not_found' 61 | except Exception as e: 62 | logging.warning(e) 63 | status = 'undeployed' 64 | 65 | logging.info('Model status: {}'.format(status)) 66 | return (model_display_name, status) 67 | 68 | 69 | 70 | if __name__ == '__main__': 71 | import kfp 72 | kfp.components.func_to_container_op( 73 | automl_deploy_tables_model, output_component_file='tables_deploy_component.yaml', 74 | base_image='python:3.7') 75 | -------------------------------------------------------------------------------- /ml/automl/tables/kfp_e2e/tables_containers/model-service-launcher/Dockerfile: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Google Inc. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | FROM ubuntu:18.04 16 | 17 | RUN apt-get update \ 18 | && apt-get install -y python3-pip python3-dev \ 19 | && cd /usr/local/bin \ 20 | && ln -s /usr/bin/python3 python \ 21 | && pip3 install --upgrade pip 22 | 23 | RUN apt-get install -y wget unzip git 24 | 25 | RUN pip install --upgrade pip 26 | RUN pip install urllib3 certifi retrying 27 | 28 | # RUN apt-get install --no-install-recommends -y -q ca-certificates python-dev python-setuptools wget unzip 29 | 30 | # RUN pip install pyyaml==3.12 six==1.11.0 requests==2.18.4 31 | 32 | RUN wget -nv https://dl.google.com/dl/cloudsdk/release/google-cloud-sdk.zip && \ 33 | unzip -qq google-cloud-sdk.zip -d tools && \ 34 | rm google-cloud-sdk.zip && \ 35 | tools/google-cloud-sdk/install.sh --usage-reporting=false \ 36 | --path-update=false --bash-completion=false \ 37 | --disable-installation-options && \ 38 | tools/google-cloud-sdk/bin/gcloud -q components update \ 39 | gcloud core gsutil && \ 40 | tools/google-cloud-sdk/bin/gcloud -q components install kubectl && \ 41 | tools/google-cloud-sdk/bin/gcloud config set component_manager/disable_update_check true && \ 42 | touch /tools/google-cloud-sdk/lib/third_party/google.py 43 | 44 | 45 | ENV PATH $PATH:/tools/google-cloud-sdk/bin 46 | 47 | ADD build /ml 48 | 49 | ENTRYPOINT ["python", "/ml/exported_model_deploy.py"] 50 | -------------------------------------------------------------------------------- /ml/automl/tables/kfp_e2e/tables_containers/model-service-launcher/build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -e 2 | # Copyright 2020 Google Inc. All Rights Reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | 17 | if [ -z "$1" ] 18 | then 19 | PROJECT_ID=$(gcloud config config-helper --format "value(configuration.properties.core.project)") 20 | else 21 | PROJECT_ID=$1 22 | fi 23 | 24 | mkdir -p ./build 25 | rsync -arvp "../../deploy_model_for_tables"/ ./build/ 26 | 27 | docker build -t model-service-launcher . 28 | rm -rf ./build 29 | 30 | docker tag model-service-launcher gcr.io/${PROJECT_ID}/model-service-launcher 31 | docker push gcr.io/${PROJECT_ID}/model-service-launcher 32 | -------------------------------------------------------------------------------- /ml/automl/tables/kfp_e2e/tables_pipeline_caip.py.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amygdala/code-snippets/9fa00e5a4ec4ae3eb5a2fa212d1ecefb4009eba7/ml/automl/tables/kfp_e2e/tables_pipeline_caip.py.tar.gz -------------------------------------------------------------------------------- /ml/automl/tables/kfp_e2e/tables_pipeline_kf.py.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amygdala/code-snippets/9fa00e5a4ec4ae3eb5a2fa212d1ecefb4009eba7/ml/automl/tables/kfp_e2e/tables_pipeline_kf.py.tar.gz -------------------------------------------------------------------------------- /ml/automl/tables/model_export/Dockerfile.template: -------------------------------------------------------------------------------- 1 | # Copyright 2019 Google Inc. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | FROM gcr.io/cloud-automl-tables-public/model_server 16 | 17 | ADD model-export/tbl/YOUR_RENAMED_DIRECTORY /models/default/0000001 18 | -------------------------------------------------------------------------------- /ml/automl/tables/model_export/convert_oss.py: -------------------------------------------------------------------------------- 1 | # Copyright 2019 Google Inc. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | # tested with TF1.14 16 | import sys 17 | import tensorflow as tf 18 | 19 | from absl import app 20 | from absl import flags 21 | from tensorflow.core.protobuf import saved_model_pb2 22 | from tensorflow.python.summary import summary 23 | 24 | FLAGS = flags.FLAGS 25 | 26 | flags.DEFINE_string('saved_model', '', 'The location of the saved_model.pb to visualize.') 27 | flags.DEFINE_string('output_dir', '', 'The location for the Tensorboard log to begin visualization from.') 28 | 29 | def import_to_tensorboard(saved_model, output_dir): 30 | """View an imported saved_model.pb as a graph in Tensorboard. 31 | 32 | Args: 33 | saved_model: The location of the saved_model.pb to visualize. 34 | output_dir: The location for the Tensorboard log to begin visualization from. 35 | 36 | Usage: 37 | Call this function with your model location and desired log directory. 38 | Launch Tensorboard by pointing it to the log directory. 39 | View your imported `.pb` model as a graph. 40 | """ 41 | with open(saved_model, "rb") as f: 42 | sm = saved_model_pb2.SavedModel() 43 | sm.ParseFromString(f.read()) 44 | if 1 != len(sm.meta_graphs): 45 | print('More than one graph found. Not sure which to write') 46 | sys.exit(1) 47 | graph_def = sm.meta_graphs[0].graph_def 48 | 49 | pb_visual_writer = summary.FileWriter(output_dir) 50 | pb_visual_writer.add_graph(None, graph_def=graph_def) 51 | print("Model Imported. Visualize by running: " 52 | "tensorboard --logdir={}".format(output_dir)) 53 | 54 | 55 | def main(argv): 56 | import_to_tensorboard(FLAGS.saved_model, FLAGS.output_dir) 57 | 58 | 59 | if __name__ == '__main__': 60 | app.run(main) 61 | -------------------------------------------------------------------------------- /ml/automl/tables/model_export/instances.json: -------------------------------------------------------------------------------- 1 | { 2 | "instances": [ 3 | { 4 | "bike_id": "6179", 5 | "day_of_week": "6", 6 | "end_latitude": 51.50379168, 7 | "end_longitude": -0.11282408, 8 | "end_station_id": "154", 9 | "euclidean": 2513.254047872678, 10 | "loc_cross": "POINT(-0.08 51.52)POINT(-0.11 51.5)", 11 | "max": 56.8, 12 | "min": 50.9, 13 | "prcp": 0, 14 | "ts": 1445624280, 15 | "start_latitude": 51.51615461, 16 | "start_longitude": -0.082422399, 17 | "start_station_id": "217", 18 | "temp": 54, 19 | "dewp": 44 20 | }, 21 | { 22 | "bike_id": "5373", 23 | "day_of_week": "3", 24 | "end_latitude": 51.52059681, 25 | "end_longitude": -0.116688468, 26 | "end_station_id": "68", 27 | "euclidean": 1181.215448450556, 28 | "loc_cross": "POINT(-0.13 51.53)POINT(-0.12 51.52)", 29 | "max": 56.7, 30 | "min": 45.9, 31 | "prcp": 0, 32 | "ts": 1494317220, 33 | "start_latitude": 51.52683806, 34 | "start_longitude": -0.130504336, 35 | "start_station_id": "214", 36 | "temp": 50.5, 37 | "dewp": 37.1 38 | }, 39 | { 40 | "bike_id": "5373", 41 | "day_of_week": "3", 42 | "end_latitude": 51.52059681, 43 | "end_longitude": -0.116688468, 44 | "end_station_id": "68", 45 | "euclidean": 3589.5146210024977, 46 | "loc_cross": "POINT(-0.07 51.52)POINT(-0.12 51.52)", 47 | "max": 44.6, 48 | "min": 34.0, 49 | "prcp": 0, 50 | "ts": 1480407420, 51 | "start_latitude": 51.52388, 52 | "start_longitude": -0.065076, 53 | "start_station_id": "445", 54 | "temp": 38.2, 55 | "dewp": 28.6 56 | } 57 | ] 58 | } 59 | -------------------------------------------------------------------------------- /ml/automl/tables/xai/README.md: -------------------------------------------------------------------------------- 1 | 2 | # AutoML Tables examples 3 | 4 | This directory contains a notebook that shows examples of using the [AutoML Tables](https://cloud.google.com/automl-tables/docs/) client library. For these examples, we’ll use data that is essentially a join of two public datasets stored in [BigQuery](https://cloud.google.com/bigquery/): [London Bike rentals](https://console.cloud.google.com/bigquery?p=bigquery-public-data&d=london_bicycles&page=dataset) and [NOAA weather data](https://console.cloud.google.com/bigquery?p=bigquery-public-data&d=noaa_gsod&page=dataset), with some additional processing to clean up outliers and derive additional GIS and day-of-week fields. 5 | 6 | The [automl_tables_xai.ipynb notebook](automl_tables_xai.ipynb) shows how to create a custom [AutoML Tables](https://cloud.google.com/automl-tables/docs/) model to predict duration of London bike rentals given information about local weather as well as info about the rental trip. It walks through examples of using the Tables client libraries for creating a dataset, training a custom model, deploying the model, and using it to make predictions; and shows how you can programmatically request *local feature importance* explanations. 7 | 8 | AutoML Tables allows you to [export a model's test dataset to BigQuery](https://cloud.google.com/automl-tables/docs/evaluate#downloading_your_test_dataset_to) after training. The [bigquery_examples.md](bigquery_examples.md) file shows some examples of how you can use BigQuery to analyze this dataset. -------------------------------------------------------------------------------- /ml/automl/tables/xai/bigquery_examples.md: -------------------------------------------------------------------------------- 1 | 2 | # Examples of inspecting the "London bikes and weather" test dataset in BigQuery 3 | 4 | 5 | AutoML Tables allows you to [export a model's test dataset to BigQuery](https://cloud.google.com/automl-tables/docs/evaluate#downloading_your_test_dataset_to) after training. This makes it easy to do some additional poking around in a sample of the dataset— even if it didn't originally reside in BigQuery. This can be helpful, for example, if your model's explanations of predictions suggest some interesting characteristics of the data. 6 | (See the "Use your trained model to make predictions and see explanations of the results" section of [automl_tables_xai.ipynb](automl_tables_xai.ipynb) for an example of requesting a prediction explanation). 7 | 8 | Here are a few example queries for the "bikes and weather" dataset used in 9 | [automl_tables_xai.ipynb](automl_tables_xai.ipynb). 10 | In the following, replace `your-project` and `your-dataset` with the appropriate values. (The exported table should be named `evaluated_examples`, but if not, edit that value as well.) 11 | 12 | 1. Find the average predicted and actual ride durations for the day of the week (in this dataset, 1 & 7 are weekends). 13 | 14 | ```sql 15 | SELECT day_of_week, avg(predicted_duration[offset(0)].tables.value) as ad, avg(duration) as adur 16 | FROM `your-project.your-dataset.evaluated_examples` 17 | where euclidean > 0 group by day_of_week 18 | order by adur desc 19 | limit 10000 20 | ``` 21 | 22 | 2. Find the average predicted and actual ride durations for those rides where the max temperature was > 70F or < 40F. 23 | 24 | ```sql 25 | SELECT max, avg(predicted_duration[offset(0)].tables.value) as ad, avg(duration) as adur 26 | FROM `your-project.your-dataset.evaluated_examples` 27 | where euclidean > 0 and (max > 70 or max < 40) group by max 28 | order by adur desc 29 | limit 10000 30 | ``` 31 | 32 | 3. Show the starting stations for rides as ordered by greatest standard deviation in prediction accuracy. 33 | 34 | ```sql 35 | SELECT start_station_id, stddev(predicted_duration[offset(0)].tables.value - duration) as sd, avg(predicted_duration[offset(0)].tables.value - duration) as ad 36 | FROM `your-project.your-dataset.evaluated_examples` 37 | where euclidean > 0 group by start_station_id 38 | order by sd desc 39 | limit 1000 40 | ``` -------------------------------------------------------------------------------- /ml/census_train_and_eval/config_custom_gpus.yaml: -------------------------------------------------------------------------------- 1 | trainingInput: 2 | scaleTier: CUSTOM 3 | masterType: standard_p100 4 | workerType: standard_p100 5 | parameterServerType: standard 6 | workerCount: 3 7 | parameterServerCount: 3 8 | -------------------------------------------------------------------------------- /ml/census_train_and_eval/hptuning_config.yaml: -------------------------------------------------------------------------------- 1 | trainingInput: 2 | hyperparameters: 3 | goal: MAXIMIZE 4 | hyperparameterMetricTag: accuracy 5 | maxTrials: 6 6 | maxParallelTrials: 2 7 | params: 8 | - parameterName: first-layer-size 9 | type: INTEGER 10 | minValue: 50 11 | maxValue: 400 12 | scaleType: UNIT_LINEAR_SCALE 13 | - parameterName: num-layers 14 | type: INTEGER 15 | minValue: 1 16 | maxValue: 10 17 | scaleType: UNIT_LINEAR_SCALE 18 | - parameterName: scale-factor 19 | type: DOUBLE 20 | minValue: 0.1 21 | maxValue: 0.9 22 | scaleType: UNIT_REVERSE_LOG_SCALE 23 | -------------------------------------------------------------------------------- /ml/census_train_and_eval/test.json: -------------------------------------------------------------------------------- 1 | {"age": 25, "workclass": " Private", "education": " 11th", "education_num": 7, "marital_status": " Never-married", "occupation": " Machine-op-inspct", "relationship": " Own-child", "race": " Black", "gender": " Male", "capital_gain": 0, "capital_loss": 0, "hours_per_week": 40, "native_country": " United-States"} 2 | -------------------------------------------------------------------------------- /ml/census_train_and_eval/trainer/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amygdala/code-snippets/9fa00e5a4ec4ae3eb5a2fa212d1ecefb4009eba7/ml/census_train_and_eval/trainer/__init__.py -------------------------------------------------------------------------------- /ml/kubeflow-pipelines/README.md: -------------------------------------------------------------------------------- 1 | 2 | # Kubeflow Pipelines examples 3 | 4 | [Kubeflow](https://www.kubeflow.org/) is an OSS project to support a machine learning stack on Kubernetes, to make deployments of ML workflows on Kubernetes simple, portable and scalable. 5 | 6 | [**Kubeflow Pipelines**](https://github.com/kubeflow/pipelines) is a new component of Kubeflow that makes it easy to compose, deploy and manage end-to-end machine learning workflows. The Kubeflow Pipelines documentation is [here](https://www.kubeflow.org/docs/guides/pipelines/). 7 | 8 | This directory tree contains code for several different groups of Kubeflow Pipelines examples. 9 | The examples highlight how Kubeflow and Kubeflow Pipelines can help support portability, composability and reproducibility, scalability, and visualization and collaboration in your ML lifecycle; and make it easier to support hybrid ML solutions. 10 | 11 | - A pipeline that [implements an AutoML Tables end-to-end workflow](https://github.com/amygdala/code-snippets/tree/master/ml/automl/tables/kfp_e2e). 12 | - [Distributed Keras Tuner + KFP example](./keras_tuner) 13 | - A pipeline that shows how you can make calls to the AutoML Vision API to build a pipeline that creates an AutoML *dataset* and then trains a model on that dataset: [samples/automl/README.md](./samples/automl/README.md). 14 | - [Example pipeline](./sbtb) for Scale by the Bay workshop (2019) 15 | 16 | ## Deprecated examples 17 | 18 | These examples are not currently maintained and most likely don't work properly. 19 | 20 | - [README_taxidata_examples.md](./README_taxidata_examples.md) 21 | - [README_github_summ.md](README_github_summ.md): going forward, the current version of this example lives here: https://github.com/kubeflow/examples/tree/master/github_issue_summarization/pipelines. 22 | 23 | 24 | -------------------------------------------------------------------------------- /ml/kubeflow-pipelines/components/README.md: -------------------------------------------------------------------------------- 1 | 2 | # Workflow Components 3 | 4 | This directory contains the definitions of the Argo workflow steps used in the example workflows. For each step, you can find both the code and the Dockerfile used to build the step's container. 5 | 6 | To make it easy to run the examples, we're using prebuilt Docker containers, but if you want to change anything about a step, you can rebuild and use your own container instead. Just edit the workflow definition under [`samples`](../samples) to point to your own container instead. 7 | -------------------------------------------------------------------------------- /ml/kubeflow-pipelines/components/automl/container/Dockerfile: -------------------------------------------------------------------------------- 1 | # Copyright 2019 Google Inc. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | FROM ubuntu:18.04 16 | 17 | RUN apt-get update \ 18 | && apt-get install -y python3-pip python3-dev \ 19 | && cd /usr/local/bin \ 20 | && ln -s /usr/bin/python3 python \ 21 | && pip3 install --upgrade pip 22 | 23 | 24 | RUN apt-get install -y wget unzip git 25 | 26 | 27 | RUN pip install google-cloud-automl 28 | 29 | 30 | RUN wget -nv https://dl.google.com/dl/cloudsdk/release/google-cloud-sdk.zip && \ 31 | unzip -qq google-cloud-sdk.zip -d tools && \ 32 | rm google-cloud-sdk.zip && \ 33 | tools/google-cloud-sdk/install.sh --usage-reporting=false \ 34 | --path-update=false --bash-completion=false \ 35 | --disable-installation-options && \ 36 | tools/google-cloud-sdk/bin/gcloud -q components update \ 37 | gcloud core gsutil && \ 38 | tools/google-cloud-sdk/bin/gcloud config set component_manager/disable_update_check true && \ 39 | touch /tools/google-cloud-sdk/lib/third_party/google.py 40 | 41 | ADD build /ml 42 | 43 | ENV PATH $PATH:/tools/node/bin:/tools/google-cloud-sdk/bin 44 | 45 | ENTRYPOINT ["python", "/ml/dataset_model.py"] 46 | -------------------------------------------------------------------------------- /ml/kubeflow-pipelines/components/automl/container/build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -e 2 | # Copyright 2019 Google Inc. All Rights Reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | 17 | if [ -z "$1" ] 18 | then 19 | PROJECT_ID=$(gcloud config config-helper --format "value(configuration.properties.core.project)") 20 | else 21 | PROJECT_ID=$1 22 | fi 23 | 24 | mkdir -p ./build 25 | rsync -arvp "../dataset_train"/ ./build/ 26 | 27 | docker build -t automl-pipeline . 28 | rm -rf ./build 29 | 30 | docker tag automl-pipeline gcr.io/${PROJECT_ID}/automl-pipeline 31 | docker push gcr.io/${PROJECT_ID}/automl-pipeline -------------------------------------------------------------------------------- /ml/kubeflow-pipelines/components/cmle/containers/base/Dockerfile: -------------------------------------------------------------------------------- 1 | # Copyright 2018 Google Inc. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | FROM ubuntu:16.04 16 | 17 | RUN apt-get update -y 18 | 19 | RUN apt-get install --no-install-recommends -y -q ca-certificates python-dev python-setuptools \ 20 | wget unzip git 21 | 22 | RUN easy_install pip 23 | 24 | RUN pip install tensorflow==1.10 25 | RUN pip install pyyaml==3.12 six==1.11.0 26 | 27 | RUN wget -nv https://dl.google.com/dl/cloudsdk/release/google-cloud-sdk.zip && \ 28 | unzip -qq google-cloud-sdk.zip -d tools && \ 29 | rm google-cloud-sdk.zip && \ 30 | tools/google-cloud-sdk/install.sh --usage-reporting=false \ 31 | --path-update=false --bash-completion=false \ 32 | --disable-installation-options && \ 33 | tools/google-cloud-sdk/bin/gcloud -q components update \ 34 | gcloud core gsutil && \ 35 | tools/google-cloud-sdk/bin/gcloud config set component_manager/disable_update_check true && \ 36 | touch /tools/google-cloud-sdk/lib/third_party/google.py 37 | 38 | ADD build /ml 39 | 40 | ENV PATH $PATH:/tools/node/bin:/tools/google-cloud-sdk/bin 41 | -------------------------------------------------------------------------------- /ml/kubeflow-pipelines/components/cmle/containers/base/build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -e 2 | # Copyright 2018 Google Inc. All Rights Reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | 17 | mkdir -p ./build 18 | rsync -arvp "../../deploy"/ ./build/ 19 | 20 | docker build -t ml-pipeline-cmle-base . 21 | rm -rf ./build 22 | 23 | -------------------------------------------------------------------------------- /ml/kubeflow-pipelines/components/cmle/containers/cmle_deploy/Dockerfile: -------------------------------------------------------------------------------- 1 | # Copyright 2018 Google Inc. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | FROM ml-pipeline-cmle-base 16 | 17 | ENTRYPOINT ["python", "/ml/deploy_model.py"] 18 | -------------------------------------------------------------------------------- /ml/kubeflow-pipelines/components/cmle/containers/cmle_deploy/build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -e 2 | # Copyright 2018 Google Inc. All Rights Reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | 17 | if [ -z "$1" ] 18 | then 19 | PROJECT_ID=$(gcloud config config-helper --format "value(configuration.properties.core.project)") 20 | else 21 | PROJECT_ID=$1 22 | fi 23 | 24 | # build base image 25 | pushd ../base 26 | ./build.sh 27 | popd 28 | 29 | docker build -t ml-pipeline-cmle-op . 30 | docker tag ml-pipeline-cmle-op gcr.io/${PROJECT_ID}/ml-pipeline-cmle-op 31 | docker push gcr.io/${PROJECT_ID}/ml-pipeline-cmle-op 32 | 33 | -------------------------------------------------------------------------------- /ml/kubeflow-pipelines/components/cmle/deploy/deploy_model.py: -------------------------------------------------------------------------------- 1 | # Copyright 2018 Google Inc. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # https://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """Deploy a TF model to CMLE.""" 16 | 17 | import argparse 18 | import os 19 | import subprocess 20 | import time 21 | 22 | from tensorflow.python.lib.io import file_io 23 | 24 | def main(argv=None): 25 | parser = argparse.ArgumentParser(description='ML Trainer') 26 | parser.add_argument( 27 | '--project', 28 | help='The GCS project to use', 29 | required=True) 30 | parser.add_argument( 31 | '--gcs-path', 32 | help='The GCS path to the trained model. The path should end with "../export/".', 33 | required=True) 34 | parser.add_argument( 35 | '--version-name', 36 | help='The model version name.', 37 | required=True) 38 | 39 | parser.add_argument( 40 | '--model-name', 41 | help='The model name.', 42 | default='taxifare') 43 | 44 | parser.add_argument( 45 | '--region', 46 | help='The model region.', 47 | default='us-central1' 48 | ) 49 | 50 | args = parser.parse_args() 51 | 52 | # Make sure the model dir exists before proceeding, as sometimes it takes a few seconds to become 53 | # available after training completes. 54 | retries = 0 55 | sleeptime = 5 56 | while retries < 20: 57 | try: 58 | model_location = os.path.join(args.gcs_path, file_io.list_directory(args.gcs_path)[-1]) 59 | print("model location: %s" % model_location) 60 | break 61 | except Exception as e: 62 | print(e) 63 | print("Sleeping %s seconds to wait for GCS files..." % sleeptime) 64 | time.sleep(sleeptime) 65 | retries += 1 66 | sleeptime *= 2 67 | if retries >=20: 68 | print("could not get model location subdir from %s, exiting" % args.gcs_path) 69 | exit(1) 70 | 71 | 72 | model_create_command = ['gcloud', 'ml-engine', 'models', 'create', args.model_name, '--regions', 73 | args.region, '--project', args.project] 74 | print(model_create_command) 75 | result = subprocess.call(model_create_command) 76 | print(result) 77 | 78 | proper_version_name = args.version_name.replace('-', '_') 79 | print("using version name: %s" % proper_version_name) 80 | 81 | model_deploy_command = ['gcloud', 'ml-engine', 'versions', 'create', proper_version_name, 82 | '--model', args.model_name, '--runtime-version', '1.10', '--project', args.project, 83 | '--origin', model_location 84 | ] 85 | print(model_deploy_command) 86 | result2 = subprocess.call(model_deploy_command) 87 | print(result2) 88 | 89 | 90 | 91 | 92 | if __name__== "__main__": 93 | main() 94 | 95 | -------------------------------------------------------------------------------- /ml/kubeflow-pipelines/components/older/dataflow/containers/base/Dockerfile: -------------------------------------------------------------------------------- 1 | # Copyright 2018 Google Inc. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | FROM ubuntu:18.04 16 | 17 | RUN apt-get update -y 18 | RUN apt-get -y install build-essential python-pip python2.7 19 | 20 | RUN apt-get install --no-install-recommends -y -q ca-certificates python-dev python-setuptools \ 21 | wget unzip git 22 | 23 | # RUN easy_install pip 24 | 25 | RUN pip install --upgrade pip 26 | RUN pip install tensorflow==1.11 27 | RUN pip install pyyaml==3.12 six==1.11.0 28 | # RUN pip install pyyaml six 29 | 30 | RUN pip install tensorflow-transform==0.11.0 31 | 32 | RUN wget -nv https://dl.google.com/dl/cloudsdk/release/google-cloud-sdk.zip && \ 33 | unzip -qq google-cloud-sdk.zip -d tools && \ 34 | rm google-cloud-sdk.zip && \ 35 | tools/google-cloud-sdk/install.sh --usage-reporting=false \ 36 | --path-update=false --bash-completion=false \ 37 | --disable-installation-options && \ 38 | tools/google-cloud-sdk/bin/gcloud -q components update \ 39 | gcloud core gsutil && \ 40 | tools/google-cloud-sdk/bin/gcloud config set component_manager/disable_update_check true && \ 41 | touch /tools/google-cloud-sdk/lib/third_party/google.py 42 | 43 | ADD build /ml 44 | 45 | ENV PATH $PATH:/tools/node/bin:/tools/google-cloud-sdk/bin 46 | -------------------------------------------------------------------------------- /ml/kubeflow-pipelines/components/older/dataflow/containers/base/build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -e 2 | # Copyright 2018 Google Inc. All Rights Reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | 17 | mkdir -p ./build 18 | rsync -arvp "../../tft"/ ./build/ 19 | rsync -arvp "../../tfma"/ ./build/ 20 | rsync -arvp "../../taxi_schema"/ ./build/ 21 | rsync -arvp "../../taxi_schema"/ ./build/transform/ 22 | rsync -arvp "../../taxi_schema"/ ./build/analysis/ 23 | 24 | docker build -t ml-pipeline-dataflow-base . 25 | rm -rf ./build 26 | 27 | -------------------------------------------------------------------------------- /ml/kubeflow-pipelines/components/older/dataflow/containers/tfma/Dockerfile: -------------------------------------------------------------------------------- 1 | # Copyright 2018 Google Inc. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | FROM ml-pipeline-dataflow-base 16 | 17 | RUN apt-get update -y && \ 18 | apt-get install --no-install-recommends -y -q build-essential && \ 19 | pip install tensorflow-model-analysis==0.9.2 && \ 20 | pip install ipywidgets==7.2.1 && \ 21 | apt-get --purge autoremove -y build-essential 22 | 23 | WORKDIR /ml 24 | 25 | ENTRYPOINT ["python", "/ml/model_analysis-taxi.py"] 26 | -------------------------------------------------------------------------------- /ml/kubeflow-pipelines/components/older/dataflow/containers/tfma/build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -e 2 | # Copyright 2018 Google Inc. All Rights Reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | 17 | if [ -z "$1" ] 18 | then 19 | PROJECT_ID=$(gcloud config config-helper --format "value(configuration.properties.core.project)") 20 | else 21 | PROJECT_ID=$1 22 | fi 23 | 24 | # build base image 25 | pushd ../base 26 | ./build.sh 27 | popd 28 | 29 | docker build -f Dockerfile -t gcr.io/${PROJECT_ID}/ml-pipeline-dataflow-tfma-taxi . 30 | docker push gcr.io/${PROJECT_ID}/ml-pipeline-dataflow-tfma-taxi 31 | 32 | -------------------------------------------------------------------------------- /ml/kubeflow-pipelines/components/older/dataflow/containers/tft/Dockerfile: -------------------------------------------------------------------------------- 1 | # Copyright 2018 Google Inc. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | FROM ml-pipeline-dataflow-base 16 | 17 | RUN apt-get update -y && \ 18 | apt-get install --no-install-recommends -y -q build-essential && \ 19 | pip install tensorflow-model-analysis==0.9.2 && \ 20 | pip install ipywidgets==7.2.1 && \ 21 | apt-get --purge autoremove -y build-essential 22 | 23 | WORKDIR /ml 24 | 25 | ENTRYPOINT ["python", "/ml/taxi_preprocess_bq.py"] 26 | -------------------------------------------------------------------------------- /ml/kubeflow-pipelines/components/older/dataflow/containers/tft/build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -e 2 | # Copyright 2018 Google Inc. All Rights Reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | 17 | if [ -z "$1" ] 18 | then 19 | PROJECT_ID=$(gcloud config config-helper --format "value(configuration.properties.core.project)") 20 | else 21 | PROJECT_ID=$1 22 | fi 23 | 24 | # build base image 25 | pushd ../base 26 | ./build.sh 27 | popd 28 | 29 | docker build -f Dockerfile -t ml-pipeline-dataflow-tftbq-taxi . 30 | docker tag ml-pipeline-dataflow-tftbq-taxi gcr.io/${PROJECT_ID}/ml-pipeline-dataflow-tftbq-taxi 31 | docker push gcr.io/${PROJECT_ID}/ml-pipeline-dataflow-tftbq-taxi 32 | -------------------------------------------------------------------------------- /ml/kubeflow-pipelines/components/older/dataflow/taxi_schema/taxi_schema/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amygdala/code-snippets/9fa00e5a4ec4ae3eb5a2fa212d1ecefb4009eba7/ml/kubeflow-pipelines/components/older/dataflow/taxi_schema/taxi_schema/__init__.py -------------------------------------------------------------------------------- /ml/kubeflow-pipelines/components/older/dataflow/tfma/analysis/setup.py: -------------------------------------------------------------------------------- 1 | # Copyright 2018 Google Inc. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # https://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """Setup dependencies for deployment of the analyzer.""" 16 | 17 | import setuptools 18 | 19 | if __name__ == '__main__': 20 | setuptools.setup(name='taxi_schema', version='1.0', 21 | packages=setuptools.find_packages(), 22 | install_requires=[ 23 | 'tensorflow==1.15.4', 24 | 'tensorflow-model-analysis==0.9.2', 25 | 'tensorflow-serving-api==1.9.0', 26 | 'tensorflow-transform==0.11.0']) 27 | -------------------------------------------------------------------------------- /ml/kubeflow-pipelines/components/older/dataflow/tft/transform/setup.py: -------------------------------------------------------------------------------- 1 | # Copyright 2018 Google Inc. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # https://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """Setup dependencies for cloud deployment.""" 16 | import setuptools 17 | 18 | if __name__ == '__main__': 19 | setuptools.setup(name='taxi_schema', version='1.0', 20 | packages=setuptools.find_packages(), 21 | install_requires=[ 22 | 'jupyter==1.0', 23 | 'tensorflow==1.15.4', 24 | 'tensorflow-model-analysis==0.9.2', 25 | 'tensorflow-serving-api==1.9.0', 26 | 'tensorflow-transform==0.11.0']) 27 | -------------------------------------------------------------------------------- /ml/kubeflow-pipelines/components/older/kubeflow/containers/launcher/Dockerfile: -------------------------------------------------------------------------------- 1 | # Copyright 2018 Google Inc. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | FROM ubuntu:16.04 16 | 17 | RUN apt-get update -y 18 | 19 | RUN apt-get install --no-install-recommends -y -q ca-certificates python-dev python-setuptools wget unzip git 20 | 21 | RUN easy_install pip 22 | 23 | RUN pip install pyyaml==3.12 six==1.11.0 requests==2.18.4 tensorflow==1.10.0 \ 24 | kubernetes google-api-python-client retrying 25 | RUN pip install google-cloud-storage 26 | 27 | RUN wget -nv https://dl.google.com/dl/cloudsdk/release/google-cloud-sdk.zip && \ 28 | unzip -qq google-cloud-sdk.zip -d tools && \ 29 | rm google-cloud-sdk.zip && \ 30 | tools/google-cloud-sdk/install.sh --usage-reporting=false \ 31 | --path-update=false --bash-completion=false \ 32 | --disable-installation-options && \ 33 | tools/google-cloud-sdk/bin/gcloud -q components update \ 34 | gcloud core gsutil && \ 35 | tools/google-cloud-sdk/bin/gcloud -q components install kubectl && \ 36 | tools/google-cloud-sdk/bin/gcloud config set component_manager/disable_update_check true && \ 37 | touch /tools/google-cloud-sdk/lib/third_party/google.py 38 | 39 | RUN wget -nv https://github.com/ksonnet/ksonnet/releases/download/v0.13.1/ks_0.13.1_linux_amd64.tar.gz && \ 40 | tar -xvzf ks_0.13.1_linux_amd64.tar.gz && \ 41 | mkdir -p /tools/ks/bin && \ 42 | cp ./ks_0.13.1_linux_amd64/ks /tools/ks/bin && \ 43 | rm ks_0.13.1_linux_amd64.tar.gz && \ 44 | rm -r ks_0.13.1_linux_amd64 45 | 46 | RUN wget https://github.com/kubeflow/tf-operator/archive/v0.4.0-rc.1.zip && \ 47 | unzip v0.4.0-rc.1.zip && \ 48 | mv tf-operator-0.4.0-rc.1 tf-operator 49 | 50 | ENV PYTHONPATH $PYTHONPATH:/tf-operator 51 | 52 | RUN wget https://github.com/kubeflow/testing/archive/master.zip && \ 53 | unzip master.zip && \ 54 | mv testing-master testing 55 | 56 | ENV PYTHONPATH $PYTHONPATH:/testing/py 57 | 58 | ENV PATH $PATH:/tools/google-cloud-sdk/bin:/tools/ks/bin 59 | 60 | ADD build /ml 61 | 62 | ENTRYPOINT ["python", "/ml/train.py"] 63 | 64 | -------------------------------------------------------------------------------- /ml/kubeflow-pipelines/components/older/kubeflow/containers/launcher/build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -e 2 | # Copyright 2018 Google Inc. All Rights Reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | 17 | if [ -z "$1" ] 18 | then 19 | PROJECT_ID=$(gcloud config config-helper --format "value(configuration.properties.core.project)") 20 | else 21 | PROJECT_ID=$1 22 | fi 23 | 24 | mkdir -p ./build 25 | rsync -arvp "../../launcher"/ ./build/ 26 | 27 | docker build -t ml-pipeline-kubeflow-tf-taxi . 28 | rm -rf ./build 29 | 30 | docker tag ml-pipeline-kubeflow-tf-taxi gcr.io/${PROJECT_ID}/ml-pipeline-kubeflow-tf-taxi 31 | docker push gcr.io/${PROJECT_ID}/ml-pipeline-kubeflow-tf-taxi 32 | -------------------------------------------------------------------------------- /ml/kubeflow-pipelines/components/older/kubeflow/containers/tf-serving-gh/Dockerfile: -------------------------------------------------------------------------------- 1 | # Copyright 2018 Google Inc. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | FROM ubuntu:16.04 16 | 17 | RUN apt-get update -y 18 | 19 | RUN apt-get install --no-install-recommends -y -q ca-certificates python-dev python-setuptools wget unzip 20 | 21 | RUN easy_install pip 22 | 23 | RUN pip install pyyaml==3.12 six==1.11.0 requests==2.18.4 tensorflow==1.11.0 24 | 25 | RUN wget -nv https://dl.google.com/dl/cloudsdk/release/google-cloud-sdk.zip && \ 26 | unzip -qq google-cloud-sdk.zip -d tools && \ 27 | rm google-cloud-sdk.zip && \ 28 | tools/google-cloud-sdk/install.sh --usage-reporting=false \ 29 | --path-update=false --bash-completion=false \ 30 | --disable-installation-options && \ 31 | tools/google-cloud-sdk/bin/gcloud -q components update \ 32 | gcloud core gsutil && \ 33 | tools/google-cloud-sdk/bin/gcloud -q components install kubectl && \ 34 | tools/google-cloud-sdk/bin/gcloud config set component_manager/disable_update_check true && \ 35 | touch /tools/google-cloud-sdk/lib/third_party/google.py 36 | 37 | RUN wget -nv https://github.com/ksonnet/ksonnet/releases/download/v0.11.0/ks_0.11.0_linux_amd64.tar.gz && \ 38 | tar -xvzf ks_0.11.0_linux_amd64.tar.gz && \ 39 | mkdir -p /tools/ks/bin && \ 40 | cp ./ks_0.11.0_linux_amd64/ks /tools/ks/bin && \ 41 | rm ks_0.11.0_linux_amd64.tar.gz && \ 42 | rm -r ks_0.11.0_linux_amd64 43 | 44 | ENV PATH $PATH:/tools/google-cloud-sdk/bin:/tools/ks/bin 45 | 46 | ADD build /ml 47 | 48 | ENTRYPOINT ["python", "/ml/deploy-tf-serve.py"] 49 | 50 | -------------------------------------------------------------------------------- /ml/kubeflow-pipelines/components/older/kubeflow/containers/tf-serving-gh/build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -e 2 | # Copyright 2018 Google Inc. All Rights Reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | 17 | if [ -z "$1" ] 18 | then 19 | PROJECT_ID=$(gcloud config config-helper --format "value(configuration.properties.core.project)") 20 | else 21 | PROJECT_ID=$1 22 | fi 23 | 24 | mkdir -p ./build 25 | rsync -arvp "../../tf-serving-gh"/ ./build/ 26 | 27 | docker build -t ml-pipeline-kubeflow-tfserve . 28 | rm -rf ./build 29 | 30 | docker tag ml-pipeline-kubeflow-tfserve gcr.io/${PROJECT_ID}/ml-pipeline-kubeflow-tfserve 31 | docker push gcr.io/${PROJECT_ID}/ml-pipeline-kubeflow-tfserve 32 | -------------------------------------------------------------------------------- /ml/kubeflow-pipelines/components/older/kubeflow/containers/tf-serving/Dockerfile: -------------------------------------------------------------------------------- 1 | # Copyright 2018 Google Inc. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | FROM ubuntu:16.04 16 | 17 | RUN apt-get update -y 18 | 19 | RUN apt-get install --no-install-recommends -y -q ca-certificates python-dev python-setuptools wget unzip 20 | 21 | RUN easy_install pip 22 | 23 | RUN pip install pyyaml==3.12 six==1.11.0 requests==2.18.4 tensorflow==1.11.0 24 | 25 | RUN wget -nv https://dl.google.com/dl/cloudsdk/release/google-cloud-sdk.zip && \ 26 | unzip -qq google-cloud-sdk.zip -d tools && \ 27 | rm google-cloud-sdk.zip && \ 28 | tools/google-cloud-sdk/install.sh --usage-reporting=false \ 29 | --path-update=false --bash-completion=false \ 30 | --disable-installation-options && \ 31 | tools/google-cloud-sdk/bin/gcloud -q components update \ 32 | gcloud core gsutil && \ 33 | tools/google-cloud-sdk/bin/gcloud -q components install kubectl && \ 34 | tools/google-cloud-sdk/bin/gcloud config set component_manager/disable_update_check true && \ 35 | touch /tools/google-cloud-sdk/lib/third_party/google.py 36 | 37 | RUN wget -nv https://github.com/ksonnet/ksonnet/releases/download/v0.11.0/ks_0.11.0_linux_amd64.tar.gz && \ 38 | tar -xvzf ks_0.11.0_linux_amd64.tar.gz && \ 39 | mkdir -p /tools/ks/bin && \ 40 | cp ./ks_0.11.0_linux_amd64/ks /tools/ks/bin && \ 41 | rm ks_0.11.0_linux_amd64.tar.gz && \ 42 | rm -r ks_0.11.0_linux_amd64 43 | 44 | ENV PATH $PATH:/tools/google-cloud-sdk/bin:/tools/ks/bin 45 | 46 | ADD build /ml 47 | 48 | ENTRYPOINT ["python", "/ml/deploy-tf-serve.py"] 49 | 50 | -------------------------------------------------------------------------------- /ml/kubeflow-pipelines/components/older/kubeflow/containers/tf-serving/build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -e 2 | # Copyright 2018 Google Inc. All Rights Reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | 17 | if [ -z "$1" ] 18 | then 19 | PROJECT_ID=$(gcloud config config-helper --format "value(configuration.properties.core.project)") 20 | else 21 | PROJECT_ID=$1 22 | fi 23 | 24 | mkdir -p ./build 25 | rsync -arvp "../../tf-serving"/ ./build/ 26 | 27 | docker build -t ml-pipeline-kubeflow-tfserve-taxi . 28 | rm -rf ./build 29 | 30 | docker tag ml-pipeline-kubeflow-tfserve-taxi gcr.io/${PROJECT_ID}/ml-pipeline-kubeflow-tfserve-taxi 31 | docker push gcr.io/${PROJECT_ID}/ml-pipeline-kubeflow-tfserve-taxi 32 | -------------------------------------------------------------------------------- /ml/kubeflow-pipelines/components/older/kubeflow/containers/trainer/Dockerfile: -------------------------------------------------------------------------------- 1 | # Copyright 2018 Google Inc. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | FROM tensorflow/tensorflow:1.11.0 16 | 17 | RUN apt-get update -y && \ 18 | apt-get install --no-install-recommends -y -q build-essential && \ 19 | pip install pyyaml==3.12 six==1.11.0 \ 20 | tensorflow-transform==0.11.0 \ 21 | tensorflow-model-analysis==0.9.2 && \ 22 | apt-get --purge autoremove -y build-essential 23 | 24 | ADD build /ml 25 | WORKDIR /ml 26 | -------------------------------------------------------------------------------- /ml/kubeflow-pipelines/components/older/kubeflow/containers/trainer/build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -e 2 | # Copyright 2018 Google Inc. All rights reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | 17 | if [ -z "$1" ] 18 | then 19 | PROJECT_ID=$(gcloud config config-helper --format "value(configuration.properties.core.project)") 20 | else 21 | PROJECT_ID=$1 22 | fi 23 | 24 | mkdir -p ./build 25 | rsync -arvp "../../taxi_model"/ ./build/ 26 | 27 | docker build -f Dockerfile -t ml-pipeline-kubeflow-trainer-taxi . 28 | rm -rf ./build 29 | 30 | docker tag ml-pipeline-kubeflow-trainer-taxi gcr.io/${PROJECT_ID}/ml-pipeline-kubeflow-trainer-taxi 31 | docker push gcr.io/${PROJECT_ID}/ml-pipeline-kubeflow-trainer-taxi 32 | -------------------------------------------------------------------------------- /ml/kubeflow-pipelines/components/older/kubeflow/launcher/train.template.yaml: -------------------------------------------------------------------------------- 1 | # Copyright 2018 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | apiVersion: kubeflow.org/v1beta1 16 | kind: TFJob 17 | metadata: 18 | generateName: tfjob 19 | namespace: default 20 | spec: 21 | tfReplicaSpecs: 22 | PS: 23 | replicas: 1 24 | restartPolicy: OnFailure 25 | template: 26 | spec: 27 | containers: 28 | - name: tensorflow 29 | image: gcr.io/google-samples/ml-pipeline-kubeflow-trainer-taxi 30 | command: 31 | - python 32 | - -m 33 | - trainer.task 34 | env: 35 | - name: GOOGLE_APPLICATION_CREDENTIALS 36 | value: "/etc/secrets/user-gcp-sa.json" 37 | volumeMounts: 38 | - name: sa 39 | mountPath: "/etc/secrets" 40 | readOnly: true 41 | volumes: 42 | - name: sa 43 | secret: 44 | secretName: user-gcp-sa 45 | Worker: 46 | replicas: 1 47 | restartPolicy: OnFailure 48 | template: 49 | spec: 50 | containers: 51 | - name: tensorflow 52 | image: gcr.io/google-samples/ml-pipeline-kubeflow-trainer-taxi 53 | command: 54 | - python 55 | - -m 56 | - trainer.task 57 | env: 58 | - name: GOOGLE_APPLICATION_CREDENTIALS 59 | value: "/etc/secrets/user-gcp-sa.json" 60 | volumeMounts: 61 | - name: sa 62 | mountPath: "/etc/secrets" 63 | readOnly: true 64 | volumes: 65 | - name: sa 66 | secret: 67 | secretName: user-gcp-sa 68 | Master: 69 | replicas: 1 70 | restartPolicy: OnFailure 71 | template: 72 | spec: 73 | containers: 74 | - name: tensorflow 75 | image: gcr.io/google-samples/ml-pipeline-kubeflow-trainer-taxi 76 | command: 77 | - python 78 | - -m 79 | - trainer.task 80 | env: 81 | - name: GOOGLE_APPLICATION_CREDENTIALS 82 | value: "/etc/secrets/user-gcp-sa.json" 83 | volumeMounts: 84 | - name: sa 85 | mountPath: "/etc/secrets" 86 | readOnly: true 87 | volumes: 88 | - name: sa 89 | secret: 90 | secretName: user-gcp-sa 91 | 92 | -------------------------------------------------------------------------------- /ml/kubeflow-pipelines/components/older/kubeflow/taxi_model/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2018 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # https://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | -------------------------------------------------------------------------------- /ml/kubeflow-pipelines/components/older/kubeflow/taxi_model/setup.py: -------------------------------------------------------------------------------- 1 | # Copyright 2018 Google Inc. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # https://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | 16 | from setuptools import setup, find_packages 17 | 18 | 19 | setup( 20 | name='trainer', 21 | version='1.0.0', 22 | packages=find_packages(), 23 | description='Classifier', 24 | author='Google', 25 | keywords=[ 26 | ], 27 | license="Apache Software License", 28 | long_description=""" 29 | """, 30 | install_requires=[ 31 | 'tensorflow==1.15.4', 32 | ], 33 | package_data={ 34 | }, 35 | data_files=[], 36 | ) 37 | -------------------------------------------------------------------------------- /ml/kubeflow-pipelines/components/older/kubeflow/taxi_model/trainer/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2018 Google Inc. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # https://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /ml/kubeflow-pipelines/components/older/kubeflow/tf-serving-gh/tf-serve-template.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: v1 3 | kind: Service 4 | metadata: 5 | annotations: 6 | getambassador.io/config: |- 7 | --- 8 | apiVersion: ambassador/v0 9 | kind: Mapping 10 | name: tfserving-predict-mapping-MODEL_NAME 11 | prefix: tfserving/models/MODEL_NAME/ 12 | rewrite: /v1/models/MODEL_NAME:predict 13 | method: POST 14 | service: MODEL_NAME.kubeflow:8500 15 | labels: 16 | app: MODEL_NAME 17 | name: MODEL_NAME 18 | namespace: KUBEFLOW_NAMESPACE 19 | spec: 20 | ports: 21 | - name: grpc-tf-serving 22 | port: 9000 23 | targetPort: 9000 24 | - name: tf-serving-builtin-http 25 | port: 8500 26 | targetPort: 8500 27 | selector: 28 | app: MODEL_NAME 29 | type: ClusterIP 30 | --- 31 | apiVersion: extensions/v1beta1 32 | kind: Deployment 33 | metadata: 34 | labels: 35 | app: MODEL_NAME 36 | name: MODEL_NAME 37 | namespace: KUBEFLOW_NAMESPACE 38 | spec: 39 | replicas: 1 40 | template: 41 | metadata: 42 | labels: 43 | app: MODEL_NAME 44 | version: v1 45 | spec: 46 | containers: 47 | - args: 48 | - --port=9000 49 | - --rest_api_port=8500 50 | - --model_name=MODEL_NAME 51 | - --model_base_path=MODEL_PATH 52 | command: 53 | - /usr/bin/tensorflow_model_server 54 | image: tensorflow/serving 55 | imagePullPolicy: IfNotPresent 56 | livenessProbe: 57 | initialDelaySeconds: 30 58 | periodSeconds: 30 59 | tcpSocket: 60 | port: 9000 61 | name: MODEL_NAME 62 | ports: 63 | - containerPort: 9000 64 | - containerPort: 8500 65 | resources: 66 | limits: 67 | cpu: "4" 68 | memory: 4Gi 69 | requests: 70 | cpu: "1" 71 | memory: 1Gi 72 | -------------------------------------------------------------------------------- /ml/kubeflow-pipelines/components/older/kubeflow/tf-serving/tf-serve-template.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: v1 3 | kind: Service 4 | metadata: 5 | labels: 6 | app: MODEL_NAME 7 | name: MODEL_NAME 8 | namespace: KUBEFLOW_NAMESPACE 9 | spec: 10 | ports: 11 | - name: grpc-tf-serving 12 | port: 9000 13 | targetPort: 9000 14 | - name: tf-serving-builtin-http 15 | port: 8500 16 | targetPort: 8500 17 | selector: 18 | app: MODEL_NAME 19 | # type: LoadBalancer 20 | type: ClusterIP 21 | --- 22 | apiVersion: extensions/v1beta1 23 | kind: Deployment 24 | metadata: 25 | labels: 26 | app: MODEL_NAME 27 | name: MODEL_NAME 28 | namespace: KUBEFLOW_NAMESPACE 29 | spec: 30 | replicas: 1 31 | template: 32 | metadata: 33 | labels: 34 | app: MODEL_NAME 35 | version: v1 36 | spec: 37 | containers: 38 | - args: 39 | - --port=9000 40 | - --rest_api_port=8500 41 | - --model_name=MODEL_NAME 42 | - --model_base_path=MODEL_PATH 43 | command: 44 | - /usr/bin/tensorflow_model_server 45 | image: tensorflow/serving 46 | imagePullPolicy: IfNotPresent 47 | livenessProbe: 48 | initialDelaySeconds: 30 49 | periodSeconds: 30 50 | tcpSocket: 51 | port: 9000 52 | name: MODEL_NAME 53 | ports: 54 | - containerPort: 9000 55 | - containerPort: 8500 56 | resources: 57 | limits: 58 | cpu: "4" 59 | memory: 4Gi 60 | requests: 61 | cpu: "1" 62 | memory: 1Gi 63 | -------------------------------------------------------------------------------- /ml/kubeflow-pipelines/components/older/t2t/containers/base/Dockerfile: -------------------------------------------------------------------------------- 1 | # Copyright 2018 Google Inc. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | FROM tensorflow/tensorflow:1.12.0-gpu 16 | 17 | RUN apt-get update -y 18 | 19 | RUN apt-get install --no-install-recommends -y -q ca-certificates python-dev python-setuptools \ 20 | wget unzip git 21 | 22 | RUN easy_install pip 23 | 24 | RUN pip install tensorflow-probability==0.5 25 | RUN pip install tensor2tensor==1.11.0 26 | RUN pip install tensorflow_hub==0.1.1 27 | RUN pip install pyyaml==3.12 six==1.11.0 28 | 29 | RUN wget -nv https://dl.google.com/dl/cloudsdk/release/google-cloud-sdk.zip && \ 30 | unzip -qq google-cloud-sdk.zip -d /tools && \ 31 | rm google-cloud-sdk.zip && \ 32 | /tools/google-cloud-sdk/install.sh --usage-reporting=false \ 33 | --path-update=false --bash-completion=false \ 34 | --disable-installation-options && \ 35 | /tools/google-cloud-sdk/bin/gcloud -q components update \ 36 | gcloud core gsutil && \ 37 | /tools/google-cloud-sdk/bin/gcloud config set component_manager/disable_update_check true && \ 38 | touch /tools/google-cloud-sdk/lib/third_party/google.py 39 | 40 | ADD build /ml 41 | 42 | ENV PATH $PATH:/tools/node/bin:/tools/google-cloud-sdk/bin 43 | -------------------------------------------------------------------------------- /ml/kubeflow-pipelines/components/older/t2t/containers/base/build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -e 2 | # Copyright 2018 Google Inc. All Rights Reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | 17 | mkdir -p ./build 18 | rsync -arvp "../../t2t-train"/ ./build/ 19 | 20 | docker build -t ml-pipeline-t2t-base . 21 | rm -rf ./build 22 | 23 | -------------------------------------------------------------------------------- /ml/kubeflow-pipelines/components/older/t2t/containers/t2t_app/Dockerfile: -------------------------------------------------------------------------------- 1 | # Copyright 2018 Google Inc. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | FROM tensorflow/tensorflow:1.12.0 16 | 17 | RUN apt-get update -y 18 | 19 | RUN apt-get install --no-install-recommends -y -q ca-certificates python-dev python-setuptools \ 20 | wget unzip git 21 | 22 | RUN easy_install pip 23 | 24 | RUN pip install tensorflow-probability==0.5 25 | RUN pip install tensor2tensor==1.11.0 26 | RUN pip install tensorflow-serving-api 27 | RUN pip install gunicorn 28 | RUN pip install pyyaml==3.12 six==1.11.0 29 | RUN pip install pandas 30 | 31 | RUN wget -nv https://dl.google.com/dl/cloudsdk/release/google-cloud-sdk.zip && \ 32 | unzip -qq google-cloud-sdk.zip -d /tools && \ 33 | rm google-cloud-sdk.zip && \ 34 | /tools/google-cloud-sdk/install.sh --usage-reporting=false \ 35 | --path-update=false --bash-completion=false \ 36 | --disable-installation-options && \ 37 | /tools/google-cloud-sdk/bin/gcloud -q components update \ 38 | gcloud core gsutil && \ 39 | /tools/google-cloud-sdk/bin/gcloud config set component_manager/disable_update_check true && \ 40 | touch /tools/google-cloud-sdk/lib/third_party/google.py 41 | 42 | ADD build /ml 43 | 44 | ENV PATH $PATH:/tools/node/bin:/tools/google-cloud-sdk/bin 45 | 46 | WORKDIR /ml/app 47 | 48 | CMD gunicorn -w 4 -b :8080 main:app 49 | 50 | -------------------------------------------------------------------------------- /ml/kubeflow-pipelines/components/older/t2t/containers/t2t_app/build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -e 2 | # Copyright 2018 Google Inc. All Rights Reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | 17 | if [ -z "$1" ] 18 | then 19 | PROJECT_ID=$(gcloud config config-helper --format "value(configuration.properties.core.project)") 20 | else 21 | PROJECT_ID=$1 22 | fi 23 | 24 | mkdir -p ./build 25 | rsync -arvp "../../t2t-app"/ ./build/ 26 | 27 | 28 | docker build -t ml-pipeline-t2tapp . 29 | docker tag ml-pipeline-t2tapp gcr.io/${PROJECT_ID}/ml-pipeline-t2tapp 30 | docker push gcr.io/${PROJECT_ID}/ml-pipeline-t2tapp 31 | 32 | rm -rf ./build -------------------------------------------------------------------------------- /ml/kubeflow-pipelines/components/older/t2t/containers/t2t_proc/Dockerfile: -------------------------------------------------------------------------------- 1 | # Copyright 2018 Google Inc. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | FROM tensorflow/tensorflow:1.12.0 16 | 17 | RUN apt-get update -y 18 | 19 | RUN apt-get install --no-install-recommends -y -q ca-certificates python-dev python-setuptools \ 20 | wget unzip git 21 | 22 | RUN easy_install pip 23 | 24 | RUN pip install tensorflow-probability==0.5 25 | RUN pip install tensor2tensor==1.11.0 26 | RUN pip install pyyaml==3.12 six==1.11.0 27 | RUN pip install google-cloud-storage 28 | 29 | 30 | RUN wget -nv https://dl.google.com/dl/cloudsdk/release/google-cloud-sdk.zip && \ 31 | unzip -qq google-cloud-sdk.zip -d /tools && \ 32 | rm google-cloud-sdk.zip && \ 33 | /tools/google-cloud-sdk/install.sh --usage-reporting=false \ 34 | --path-update=false --bash-completion=false \ 35 | --disable-installation-options && \ 36 | /tools/google-cloud-sdk/bin/gcloud -q components update \ 37 | gcloud core gsutil && \ 38 | /tools/google-cloud-sdk/bin/gcloud config set component_manager/disable_update_check true && \ 39 | touch /tools/google-cloud-sdk/lib/third_party/google.py 40 | 41 | ADD build /ml 42 | 43 | ENV PATH $PATH:/tools/node/bin:/tools/google-cloud-sdk/bin 44 | 45 | 46 | WORKDIR /ml 47 | 48 | RUN mkdir -p /ml/gh_data 49 | RUN mkdir -p /ml/gh_data/tmp 50 | 51 | ENTRYPOINT ["python", "/ml/datagen.py"] 52 | -------------------------------------------------------------------------------- /ml/kubeflow-pipelines/components/older/t2t/containers/t2t_proc/build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -e 2 | # Copyright 2018 Google Inc. All Rights Reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | 17 | if [ -z "$1" ] 18 | then 19 | PROJECT_ID=$(gcloud config config-helper --format "value(configuration.properties.core.project)") 20 | else 21 | PROJECT_ID=$1 22 | fi 23 | 24 | mkdir -p ./build 25 | rsync -arvp "../../t2t-proc"/ ./build/ 26 | 27 | docker build -t ml-pipeline-t2tproc . 28 | docker tag ml-pipeline-t2tproc gcr.io/${PROJECT_ID}/ml-pipeline-t2tproc 29 | docker push gcr.io/${PROJECT_ID}/ml-pipeline-t2tproc 30 | 31 | rm -rf ./build -------------------------------------------------------------------------------- /ml/kubeflow-pipelines/components/older/t2t/containers/t2t_train/Dockerfile: -------------------------------------------------------------------------------- 1 | # Copyright 2018 Google Inc. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | FROM ml-pipeline-t2t-base 16 | 17 | ENTRYPOINT ["python", "/ml/train_model.py"] 18 | -------------------------------------------------------------------------------- /ml/kubeflow-pipelines/components/older/t2t/containers/t2t_train/build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -e 2 | # Copyright 2018 Google Inc. All Rights Reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | 17 | if [ -z "$1" ] 18 | then 19 | PROJECT_ID=$(gcloud config config-helper --format "value(configuration.properties.core.project)") 20 | else 21 | PROJECT_ID=$1 22 | fi 23 | 24 | # build base image 25 | pushd ../base 26 | ./build.sh 27 | popd 28 | 29 | docker build -t ml-pipeline-t2ttrain . 30 | docker tag ml-pipeline-t2ttrain gcr.io/${PROJECT_ID}/ml-pipeline-t2ttrain 31 | docker push gcr.io/${PROJECT_ID}/ml-pipeline-t2ttrain 32 | 33 | -------------------------------------------------------------------------------- /ml/kubeflow-pipelines/components/older/t2t/containers/webapp-launcher/Dockerfile: -------------------------------------------------------------------------------- 1 | # Copyright 2018 Google Inc. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | FROM ubuntu:16.04 16 | 17 | RUN apt-get update -y 18 | 19 | RUN apt-get install --no-install-recommends -y -q ca-certificates python-dev python-setuptools wget unzip 20 | 21 | RUN easy_install pip 22 | 23 | RUN pip install pyyaml==3.12 six==1.11.0 requests==2.18.4 tensorflow==1.12.0 24 | 25 | RUN wget -nv https://dl.google.com/dl/cloudsdk/release/google-cloud-sdk.zip && \ 26 | unzip -qq google-cloud-sdk.zip -d tools && \ 27 | rm google-cloud-sdk.zip && \ 28 | tools/google-cloud-sdk/install.sh --usage-reporting=false \ 29 | --path-update=false --bash-completion=false \ 30 | --disable-installation-options && \ 31 | tools/google-cloud-sdk/bin/gcloud -q components update \ 32 | gcloud core gsutil && \ 33 | tools/google-cloud-sdk/bin/gcloud -q components install kubectl && \ 34 | tools/google-cloud-sdk/bin/gcloud config set component_manager/disable_update_check true && \ 35 | touch /tools/google-cloud-sdk/lib/third_party/google.py 36 | 37 | RUN wget -nv https://github.com/ksonnet/ksonnet/releases/download/v0.11.0/ks_0.11.0_linux_amd64.tar.gz && \ 38 | tar -xvzf ks_0.11.0_linux_amd64.tar.gz && \ 39 | mkdir -p /tools/ks/bin && \ 40 | cp ./ks_0.11.0_linux_amd64/ks /tools/ks/bin && \ 41 | rm ks_0.11.0_linux_amd64.tar.gz && \ 42 | rm -r ks_0.11.0_linux_amd64 43 | 44 | ENV PATH $PATH:/tools/google-cloud-sdk/bin:/tools/ks/bin 45 | 46 | ADD build /ml 47 | 48 | ENTRYPOINT ["python", "/ml/deploy-webapp.py"] 49 | 50 | -------------------------------------------------------------------------------- /ml/kubeflow-pipelines/components/older/t2t/containers/webapp-launcher/build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -e 2 | # Copyright 2018 Google Inc. All Rights Reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | 17 | if [ -z "$1" ] 18 | then 19 | PROJECT_ID=$(gcloud config config-helper --format "value(configuration.properties.core.project)") 20 | else 21 | PROJECT_ID=$1 22 | fi 23 | 24 | mkdir -p ./build 25 | rsync -arvp "../../webapp-launcher"/ ./build/ 26 | 27 | docker build -t ml-pipeline-webapp-launcher . 28 | rm -rf ./build 29 | 30 | docker tag ml-pipeline-webapp-launcher gcr.io/${PROJECT_ID}/ml-pipeline-webapp-launcher 31 | docker push gcr.io/${PROJECT_ID}/ml-pipeline-webapp-launcher 32 | -------------------------------------------------------------------------------- /ml/kubeflow-pipelines/components/older/t2t/t2t-app/app/ghsumm/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amygdala/code-snippets/9fa00e5a4ec4ae3eb5a2fa212d1ecefb4009eba7/ml/kubeflow-pipelines/components/older/t2t/t2t-app/app/ghsumm/__init__.py -------------------------------------------------------------------------------- /ml/kubeflow-pipelines/components/older/t2t/t2t-app/app/ghsumm/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import find_packages 2 | from setuptools import setup 3 | 4 | REQUIRED_PACKAGES = [ 5 | 'tensor2tensor' 6 | ] 7 | 8 | setup( 9 | name='ghsumm', 10 | version='0.1', 11 | author='Google', 12 | author_email='training-feedback@cloud.google.com', 13 | install_requires=REQUIRED_PACKAGES, 14 | packages=find_packages(), 15 | include_package_data=True, 16 | description='Github Problem', 17 | requires=[] 18 | ) 19 | -------------------------------------------------------------------------------- /ml/kubeflow-pipelines/components/older/t2t/t2t-app/app/ghsumm/trainer/__init__.py: -------------------------------------------------------------------------------- 1 | from . import problem 2 | -------------------------------------------------------------------------------- /ml/kubeflow-pipelines/components/older/t2t/t2t-app/app/ghsumm/trainer/problem.py: -------------------------------------------------------------------------------- 1 | import csv 2 | 3 | from tensor2tensor.utils import registry 4 | from tensor2tensor.data_generators import problem 5 | from tensor2tensor.data_generators import text_problems 6 | 7 | 8 | @registry.register_problem 9 | class GhProblem(text_problems.Text2TextProblem): 10 | """... predict GH issue title from body...""" 11 | 12 | @property 13 | def approx_vocab_size(self): 14 | return 2**13 # ~8k 15 | 16 | @property 17 | def is_generate_per_split(self): 18 | # generate_data will NOT shard the data into TRAIN and EVAL for us. 19 | return False 20 | 21 | @property 22 | def max_subtoken_length(self): 23 | return 4 24 | 25 | @property 26 | def dataset_splits(self): 27 | """Splits of data to produce and number of output shards for each.""" 28 | # 10% evaluation data 29 | return [{ 30 | "split": problem.DatasetSplit.TRAIN, 31 | "shards": 90, 32 | }, { 33 | "split": problem.DatasetSplit.EVAL, 34 | "shards": 10, 35 | }] 36 | 37 | def generate_samples(self, data_dir, tmp_dir, dataset_split): #pylint: disable=unused-argument 38 | with open('gh_data/github_issues.csv') as csvfile: 39 | ireader = csv.reader((line.replace('\0', '') for line in csvfile), delimiter=',' 40 | # quotechar='|' 41 | ) 42 | NUM_ROWS = 1500000 43 | i = 0 44 | for row in ireader: 45 | if i >= NUM_ROWS: 46 | break 47 | yield { 48 | "inputs": row[2], # body 49 | "targets": row[1] # issue title 50 | } 51 | i += 1 52 | -------------------------------------------------------------------------------- /ml/kubeflow-pipelines/components/older/t2t/t2t-proc/ghsumm/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amygdala/code-snippets/9fa00e5a4ec4ae3eb5a2fa212d1ecefb4009eba7/ml/kubeflow-pipelines/components/older/t2t/t2t-proc/ghsumm/__init__.py -------------------------------------------------------------------------------- /ml/kubeflow-pipelines/components/older/t2t/t2t-proc/ghsumm/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import find_packages 2 | from setuptools import setup 3 | 4 | REQUIRED_PACKAGES = [ 5 | 'tensor2tensor' 6 | ] 7 | 8 | setup( 9 | name='ghsumm', 10 | version='0.1', 11 | author='Google', 12 | author_email='training-feedback@cloud.google.com', 13 | install_requires=REQUIRED_PACKAGES, 14 | packages=find_packages(), 15 | include_package_data=True, 16 | description='Github Problem', 17 | requires=[] 18 | ) 19 | -------------------------------------------------------------------------------- /ml/kubeflow-pipelines/components/older/t2t/t2t-proc/ghsumm/trainer/__init__.py: -------------------------------------------------------------------------------- 1 | from . import problem 2 | -------------------------------------------------------------------------------- /ml/kubeflow-pipelines/components/older/t2t/t2t-proc/ghsumm/trainer/problem.py: -------------------------------------------------------------------------------- 1 | import csv 2 | 3 | from tensor2tensor.utils import registry 4 | from tensor2tensor.data_generators import problem 5 | from tensor2tensor.data_generators import text_problems 6 | 7 | 8 | @registry.register_problem 9 | class GhProblem(text_problems.Text2TextProblem): 10 | """... predict GH issue title from body...""" 11 | 12 | @property 13 | def approx_vocab_size(self): 14 | return 2**13 # ~8k 15 | 16 | @property 17 | def is_generate_per_split(self): 18 | # generate_data will NOT shard the data into TRAIN and EVAL for us. 19 | return False 20 | 21 | @property 22 | def max_subtoken_length(self): 23 | return 4 24 | 25 | @property 26 | def dataset_splits(self): 27 | """Splits of data to produce and number of output shards for each.""" 28 | # 10% evaluation data 29 | return [{ 30 | "split": problem.DatasetSplit.TRAIN, 31 | "shards": 90, 32 | }, { 33 | "split": problem.DatasetSplit.EVAL, 34 | "shards": 10, 35 | }] 36 | 37 | def generate_samples(self, data_dir, tmp_dir, dataset_split): #pylint: disable=unused-argument 38 | with open('/ml/gh_data/github_issues.csv') as csvfile: 39 | ireader = csv.reader((line.replace('\0', '') for line in csvfile), delimiter=',' 40 | # quotechar='|' 41 | ) 42 | NUM_ROWS = 50000 43 | i = 0 44 | for row in ireader: 45 | if i >= NUM_ROWS: 46 | break 47 | yield { 48 | "inputs": row[2], # body 49 | "targets": row[1] # issue title 50 | } 51 | i += 1 52 | -------------------------------------------------------------------------------- /ml/kubeflow-pipelines/components/older/t2t/t2t-train/ghsumm/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amygdala/code-snippets/9fa00e5a4ec4ae3eb5a2fa212d1ecefb4009eba7/ml/kubeflow-pipelines/components/older/t2t/t2t-train/ghsumm/__init__.py -------------------------------------------------------------------------------- /ml/kubeflow-pipelines/components/older/t2t/t2t-train/ghsumm/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import find_packages 2 | from setuptools import setup 3 | 4 | REQUIRED_PACKAGES = [ 5 | 'tensor2tensor' 6 | ] 7 | 8 | setup( 9 | name='ghsumm', 10 | version='0.1', 11 | author='Google', 12 | author_email='training-feedback@cloud.google.com', 13 | install_requires=REQUIRED_PACKAGES, 14 | packages=find_packages(), 15 | include_package_data=True, 16 | description='Github Problem', 17 | requires=[] 18 | ) 19 | -------------------------------------------------------------------------------- /ml/kubeflow-pipelines/components/older/t2t/t2t-train/ghsumm/trainer/__init__.py: -------------------------------------------------------------------------------- 1 | from . import problem 2 | -------------------------------------------------------------------------------- /ml/kubeflow-pipelines/components/older/t2t/t2t-train/ghsumm/trainer/problem.py: -------------------------------------------------------------------------------- 1 | import csv 2 | 3 | from tensor2tensor.utils import registry 4 | from tensor2tensor.data_generators import problem 5 | from tensor2tensor.data_generators import text_problems 6 | 7 | 8 | @registry.register_problem 9 | class GhProblem(text_problems.Text2TextProblem): 10 | """... predict GH issue title from body...""" 11 | 12 | @property 13 | def approx_vocab_size(self): 14 | return 2**13 # ~8k 15 | 16 | @property 17 | def is_generate_per_split(self): 18 | # generate_data will NOT shard the data into TRAIN and EVAL for us. 19 | return False 20 | 21 | @property 22 | def max_subtoken_length(self): 23 | return 4 24 | 25 | @property 26 | def dataset_splits(self): 27 | """Splits of data to produce and number of output shards for each.""" 28 | # 10% evaluation data 29 | return [{ 30 | "split": problem.DatasetSplit.TRAIN, 31 | "shards": 90, 32 | }, { 33 | "split": problem.DatasetSplit.EVAL, 34 | "shards": 10, 35 | }] 36 | 37 | def generate_samples(self, data_dir, tmp_dir, dataset_split): #pylint: disable=unused-argument 38 | with open('gh_data/github_issues.csv') as csvfile: 39 | ireader = csv.reader((line.replace('\0', '') for line in csvfile), delimiter=',' 40 | # quotechar='|' 41 | ) 42 | NUM_ROWS = 6000000 43 | i = 0 44 | for row in ireader: 45 | if i >= NUM_ROWS: 46 | break 47 | yield { 48 | "inputs": row[2], # body 49 | "targets": row[1] # issue title 50 | } 51 | i += 1 52 | -------------------------------------------------------------------------------- /ml/kubeflow-pipelines/components/older/t2t/t2t-train/train_model.py: -------------------------------------------------------------------------------- 1 | # Copyright 2018 Google Inc. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # https://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """...""" 16 | 17 | import argparse 18 | import json 19 | import subprocess 20 | 21 | 22 | def main(): 23 | parser = argparse.ArgumentParser(description='ML Trainer') 24 | parser.add_argument( 25 | '--model-dir', 26 | help='...', 27 | required=True) 28 | parser.add_argument( 29 | '--data-dir', 30 | help='...', 31 | required=True) 32 | parser.add_argument( 33 | '--checkpoint-dir', 34 | help='...', 35 | required=True) 36 | parser.add_argument( 37 | '--train-steps', 38 | help='...', 39 | required=True) 40 | parser.add_argument( 41 | '--deploy-webapp', 42 | help='...', 43 | required=True) 44 | 45 | args = parser.parse_args() 46 | 47 | # Create metadata.json file for visualization. 48 | metadata = { 49 | 'outputs' : [{ 50 | 'type': 'tensorboard', 51 | 'source': args.model_dir, 52 | }] 53 | } 54 | with open('/mlpipeline-ui-metadata.json', 'w') as f: 55 | json.dump(metadata, f) 56 | 57 | problem = 'gh_problem' 58 | data_dir = args.data_dir 59 | print("data dir: %s" % data_dir) 60 | # copy the model starting point 61 | model_startpoint = args.checkpoint_dir 62 | print("model_startpoint: %s" % model_startpoint) 63 | model_dir = args.model_dir 64 | print("model_dir: %s" % model_dir) 65 | model_copy_command = ['gsutil', '-m', 'cp', '-r', model_startpoint, model_dir 66 | ] 67 | print(model_copy_command) 68 | result1 = subprocess.call(model_copy_command) 69 | print(result1) 70 | 71 | print('training steps (total): %s' % args.train_steps) 72 | 73 | # Then run the training for N steps from there. 74 | model_train_command = ['t2t-trainer', '--data_dir', data_dir, 75 | '--t2t_usr_dir', '/ml/ghsumm/trainer', 76 | '--problem', problem, 77 | '--model', 'transformer', '--hparams_set', 'transformer_prepend', '--output_dir', model_dir, 78 | '--job-dir', model_dir, 79 | '--train_steps', args.train_steps, '--eval_throttle_seconds', '240', 80 | ] 81 | print(model_train_command) 82 | result2 = subprocess.call(model_train_command) 83 | print(result2) 84 | 85 | # then export the model... 86 | 87 | model_export_command = ['t2t-exporter', '--model', 'transformer', 88 | '--hparams_set', 'transformer_prepend', 89 | '--problem', problem, 90 | '--t2t_usr_dir', '/ml/ghsumm/trainer', '--data_dir', data_dir, '--output_dir', model_dir] 91 | print(model_export_command) 92 | result3 = subprocess.call(model_export_command) 93 | print(result3) 94 | 95 | print("deploy-webapp arg: %s" % args.deploy_webapp) 96 | with open('/tmp/output', 'w') as f: 97 | f.write(args.deploy_webapp) 98 | 99 | if __name__ == "__main__": 100 | main() 101 | -------------------------------------------------------------------------------- /ml/kubeflow-pipelines/components/older/t2t/webapp-launcher/deploy-webapp.py: -------------------------------------------------------------------------------- 1 | # Copyright 2018 Google Inc. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # https://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | 16 | import argparse 17 | import os 18 | import logging 19 | import subprocess 20 | import requests 21 | 22 | 23 | def main(): 24 | parser = argparse.ArgumentParser(description='Serving webapp') 25 | parser.add_argument( 26 | '--model_name', 27 | help='...', 28 | required=True) 29 | parser.add_argument( 30 | '--github_token', 31 | help='...', 32 | required=True) 33 | 34 | parser.add_argument('--cluster', type=str, 35 | help='GKE cluster set up for kubeflow. If set, zone must be provided. ' + 36 | 'If not set, assuming this runs in a GKE container and current ' + 37 | 'cluster is used.') 38 | parser.add_argument('--zone', type=str, help='zone of the kubeflow cluster.') 39 | args = parser.parse_args() 40 | 41 | KUBEFLOW_NAMESPACE = 'kubeflow' 42 | 43 | print("using model name: %s and namespace: %s" % (args.model_name, KUBEFLOW_NAMESPACE)) 44 | 45 | logging.getLogger().setLevel(logging.INFO) 46 | args_dict = vars(args) 47 | 48 | if args.cluster and args.zone: 49 | cluster = args_dict.pop('cluster') #pylint: disable=unused-variable 50 | zone = args_dict.pop('zone') #pylint: disable=unused-variable 51 | else: 52 | # Get cluster name and zone from metadata 53 | metadata_server = "http://metadata/computeMetadata/v1/instance/" 54 | metadata_flavor = {'Metadata-Flavor' : 'Google'} 55 | cluster = requests.get(metadata_server + "attributes/cluster-name", 56 | headers=metadata_flavor).text 57 | zone = requests.get(metadata_server + "zone", 58 | headers=metadata_flavor).text.split('/')[-1] 59 | 60 | # logging.info('Getting credentials for GKE cluster %s.' % cluster) 61 | # subprocess.call(['gcloud', 'container', 'clusters', 'get-credentials', cluster, 62 | # '--zone', zone]) 63 | 64 | logging.info('Generating training template.') 65 | 66 | template_file = os.path.join(os.path.dirname(os.path.realpath(__file__)), 't2tapp-template.yaml') 67 | target_file = os.path.join(os.path.dirname(os.path.realpath(__file__)), 't2tapp.yaml') 68 | 69 | with open(template_file, 'r') as f: 70 | with open(target_file, "w") as target: 71 | data = f.read() 72 | changed = data.replace('MODEL_NAME', args.model_name) 73 | changed1 = changed.replace('KUBEFLOW_NAMESPACE', KUBEFLOW_NAMESPACE).replace( 74 | 'GITHUB_TOKEN', args.github_token).replace( 75 | 'DATA_DIR', 'gs://aju-dev-demos-codelabs/kubecon/t2t_data_gh_all/') 76 | target.write(changed1) 77 | 78 | 79 | logging.info('deploying web app.') 80 | subprocess.call(['kubectl', 'create', '-f', '/ml/t2tapp.yaml']) 81 | 82 | 83 | if __name__ == "__main__": 84 | main() 85 | -------------------------------------------------------------------------------- /ml/kubeflow-pipelines/components/older/t2t/webapp-launcher/t2tapp-template.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Service 3 | metadata: 4 | annotations: 5 | getambassador.io/config: |- 6 | --- 7 | apiVersion: ambassador/v0 8 | kind: Mapping 9 | name: webapp-MODEL_NAME 10 | prefix: /webapp/ 11 | rewrite: / 12 | timeout_ms: 1200000 13 | service: MODEL_NAME-webappsvc.KUBEFLOW_NAMESPACE:80 14 | name: MODEL_NAME-webappsvc 15 | labels: 16 | app: ghsumm 17 | role: frontend 18 | spec: 19 | type: ClusterIP 20 | ports: 21 | - port: 80 22 | targetPort: "http-server" 23 | selector: 24 | app: ghsumm 25 | role: frontend 26 | 27 | --- 28 | 29 | apiVersion: extensions/v1beta1 30 | kind: Deployment 31 | metadata: 32 | name: MODEL_NAME-webapp 33 | spec: 34 | replicas: 1 35 | template: 36 | metadata: 37 | labels: 38 | app: ghsumm 39 | role: frontend 40 | spec: 41 | containers: 42 | - name: MODEL_NAME-webapp 43 | image: gcr.io/google-samples/ml-pipeline-t2tapp 44 | # resources: 45 | # limits: 46 | # nvidia.com/gpu: 1 47 | imagePullPolicy: Always 48 | env: 49 | - name: TFSERVING_HOST 50 | value: MODEL_NAME.KUBEFLOW_NAMESPACE 51 | - name: TF_SERVABLE_NAME 52 | value: MODEL_NAME 53 | - name: GH_TOKEN 54 | value: GITHUB_TOKEN 55 | - name: DATADIR 56 | value: DATA_DIR 57 | ports: 58 | - name: http-server 59 | containerPort: 8080 60 | -------------------------------------------------------------------------------- /ml/kubeflow-pipelines/keras_tuner/components/eval_metrics_component.yaml: -------------------------------------------------------------------------------- 1 | name: Eval metrics 2 | inputs: 3 | - {name: metrics, type: String} 4 | - {name: thresholds, type: String} 5 | outputs: 6 | - {name: deploy, type: String} 7 | implementation: 8 | container: 9 | image: gcr.io/deeplearning-platform-release/tf2-cpu.2-3:latest 10 | command: 11 | - python3 12 | - -u 13 | - -c 14 | - | 15 | def eval_metrics( 16 | metrics, 17 | thresholds 18 | ): 19 | 20 | import json 21 | import logging 22 | 23 | def regression_threshold_check(metrics_info): 24 | # ... 25 | for k, v in thresholds_dict.items(): 26 | logging.info('k {}, v {}'.format(k, v)) 27 | if k in ['root_mean_squared_error', 'mae']: 28 | if metrics_info[k][-1] > v: 29 | logging.info('{} > {}; returning False'.format(metrics_info[k][0], v)) 30 | return ('False', ) 31 | return ('deploy', ) 32 | 33 | logging.getLogger().setLevel(logging.INFO) # TODO: make level configurable 34 | 35 | thresholds_dict = json.loads(thresholds) 36 | logging.info('thresholds dict: {}'.format(thresholds_dict)) 37 | logging.info('metrics: %s', metrics) 38 | metrics_dict = json.loads(metrics) 39 | 40 | logging.info("got metrics info: %s", metrics_dict) 41 | res = regression_threshold_check(metrics_dict) 42 | logging.info('deploy decision: %s', res) 43 | return res 44 | 45 | def _serialize_str(str_value: str) -> str: 46 | if not isinstance(str_value, str): 47 | raise TypeError('Value "{}" has type "{}" instead of str.'.format(str(str_value), str(type(str_value)))) 48 | return str_value 49 | 50 | import argparse 51 | _parser = argparse.ArgumentParser(prog='Eval metrics', description='') 52 | _parser.add_argument("--metrics", dest="metrics", type=str, required=True, default=argparse.SUPPRESS) 53 | _parser.add_argument("--thresholds", dest="thresholds", type=str, required=True, default=argparse.SUPPRESS) 54 | _parser.add_argument("----output-paths", dest="_output_paths", type=str, nargs=1) 55 | _parsed_args = vars(_parser.parse_args()) 56 | _output_files = _parsed_args.pop("_output_paths", []) 57 | 58 | _outputs = eval_metrics(**_parsed_args) 59 | 60 | _output_serializers = [ 61 | _serialize_str, 62 | 63 | ] 64 | 65 | import os 66 | for idx, output_file in enumerate(_output_files): 67 | try: 68 | os.makedirs(os.path.dirname(output_file)) 69 | except OSError: 70 | pass 71 | with open(output_file, 'w') as f: 72 | f.write(_output_serializers[idx](_outputs[idx])) 73 | args: 74 | - --metrics 75 | - {inputValue: metrics} 76 | - --thresholds 77 | - {inputValue: thresholds} 78 | - '----output-paths' 79 | - {outputPath: deploy} 80 | -------------------------------------------------------------------------------- /ml/kubeflow-pipelines/keras_tuner/components/kubeflow-resources/bikesw_training/bwmodel/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amygdala/code-snippets/9fa00e5a4ec4ae3eb5a2fa212d1ecefb4009eba7/ml/kubeflow-pipelines/keras_tuner/components/kubeflow-resources/bikesw_training/bwmodel/__init__.py -------------------------------------------------------------------------------- /ml/kubeflow-pipelines/keras_tuner/components/kubeflow-resources/bikesw_training/eval_metrics.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from typing import NamedTuple 16 | # from kfp.components import InputPath, OutputPath 17 | 18 | 19 | # An example of how the model eval info could be used to make decisions about whether or not 20 | # to deploy the model. 21 | def eval_metrics( 22 | metrics: str, 23 | thresholds: str 24 | ) -> NamedTuple('Outputs', [('deploy', str)]): 25 | 26 | import json 27 | import logging 28 | 29 | def regression_threshold_check(metrics_info): 30 | # ... 31 | for k, v in thresholds_dict.items(): 32 | logging.info('k {}, v {}'.format(k, v)) 33 | if k in ['root_mean_squared_error', 'mae']: 34 | if metrics_info[k][-1] > v: 35 | logging.info('{} > {}; returning False'.format(metrics_info[k][0], v)) 36 | return ('False', ) 37 | return ('deploy', ) 38 | 39 | logging.getLogger().setLevel(logging.INFO) # TODO: make level configurable 40 | 41 | thresholds_dict = json.loads(thresholds) 42 | logging.info('thresholds dict: {}'.format(thresholds_dict)) 43 | logging.info('metrics: %s', metrics) 44 | metrics_dict = json.loads(metrics) 45 | 46 | logging.info("got metrics info: %s", metrics_dict) 47 | res = regression_threshold_check(metrics_dict) 48 | logging.info('deploy decision: %s', res) 49 | return res 50 | 51 | 52 | if __name__ == '__main__': 53 | import kfp 54 | kfp.components.func_to_container_op(eval_metrics, 55 | output_component_file='../../eval_metrics_component.yaml', base_image='gcr.io/deeplearning-platform-release/tf2-cpu.2-3:latest') 56 | -------------------------------------------------------------------------------- /ml/kubeflow-pipelines/keras_tuner/components/kubeflow-resources/bikesw_training/kchief_deployment_templ.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: v1 3 | kind: Service 4 | metadata: 5 | labels: 6 | app: KTUNER_CHIEF 7 | apptype: ktuner-chief 8 | name: KTUNER_CHIEF 9 | namespace: NAMESPACE 10 | spec: 11 | ports: 12 | - name: grpc 13 | port: 9000 14 | targetPort: 9000 15 | selector: 16 | app: KTUNER_CHIEF 17 | type: ClusterIP 18 | --- 19 | apiVersion: batch/v1 20 | kind: Job 21 | metadata: 22 | labels: 23 | app: KTUNER_CHIEF 24 | apptype: ktuner-chief 25 | name: KTUNER_CHIEF-dep 26 | namespace: NAMESPACE 27 | spec: 28 | # replicas: 1 29 | template: 30 | metadata: 31 | labels: 32 | app: KTUNER_CHIEF 33 | apptype: ktuner-chief 34 | version: v1 35 | spec: 36 | containers: 37 | - args: 38 | - --epochs=EPOCHS 39 | - --tuner-dir=TUNER_DIR 40 | - --tuner-proj=TUNER_PROJ 41 | - --tuner-num=TUNER_NUM 42 | - --max-trials=MAX_TRIALS 43 | - --executions-per-trial=EXECS_PER_TRIAL 44 | - --num-best-hps=NUM_BEST_HPS 45 | - --respath=RES_PATH 46 | - --bucket-name=BUCKET_NAME 47 | image: gcr.io/google-samples/ml-pipeline-bikes-tuner 48 | env: 49 | - name: KERASTUNER_TUNER_ID 50 | value: chief 51 | - name: KERASTUNER_ORACLE_IP 52 | valueFrom: 53 | fieldRef: 54 | fieldPath: status.podIP 55 | - name: KERASTUNER_ORACLE_PORT 56 | value: "9000" 57 | imagePullPolicy: Always 58 | name: ktuner-chief 59 | ports: 60 | - name: tuner-port 61 | containerPort: 9000 62 | resources: 63 | limits: 64 | cpu: 1 65 | memory: 2Gi 66 | restartPolicy: Never 67 | -------------------------------------------------------------------------------- /ml/kubeflow-pipelines/keras_tuner/components/kubeflow-resources/bikesw_training/ktuners_deployment_templ.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: batch/v1 3 | kind: Job 4 | metadata: 5 | labels: 6 | app: ktuner-tuner 7 | name: KTUNER_DEP_NAME 8 | namespace: NAMESPACE 9 | spec: 10 | # replicas: 1 11 | template: 12 | metadata: 13 | labels: 14 | app: ktuner-tuner 15 | version: v1 16 | spec: 17 | containers: 18 | - args: 19 | - --epochs=EPOCHS 20 | - --tuner-dir=TUNER_DIR 21 | - --tuner-proj=TUNER_PROJ 22 | - --tuner-num=TUNER_NUM 23 | - --max-trials=MAX_TRIALS 24 | - --executions-per-trial=EXECS_PER_TRIAL 25 | - --num-best-hps=NUM_BEST_HPS 26 | - --respath=RES_PATH 27 | - --bucket-name=BUCKET_NAME 28 | image: gcr.io/google-samples/ml-pipeline-bikes-tuner 29 | env: 30 | - name: KERASTUNER_TUNER_ID 31 | value: KTUNER_ID 32 | - name: KERASTUNER_ORACLE_IP 33 | value: KTUNER_CHIEF 34 | - name: KERASTUNER_ORACLE_PORT 35 | value: "9000" 36 | imagePullPolicy: Always 37 | name: kktuner-tuner 38 | ports: 39 | - name: tuner-port 40 | containerPort: 9000 41 | resources: 42 | limits: {nvidia.com/gpu: 1} 43 | restartPolicy: Never 44 | -------------------------------------------------------------------------------- /ml/kubeflow-pipelines/keras_tuner/components/kubeflow-resources/cloudbuild.yaml: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Google Inc. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | steps: 16 | 17 | # bw-pl-bikes-train 18 | - name: 'bash' 19 | args: ['./copydir.sh'] 20 | id: copy1 21 | dir: 'ml/kubeflow-pipelines/keras_tuner/components/kubeflow-resources/containers/bikesw_training' 22 | 23 | - name: 'gcr.io/cloud-builders/docker' 24 | args: ['build', '-t', 'gcr.io/$PROJECT_ID/bw-pl-bikes-train:$SHORT_SHA', '.'] 25 | dir: 'ml/kubeflow-pipelines/keras_tuner/components/kubeflow-resources/containers/bikesw_training' 26 | waitFor: ['copy1'] 27 | 28 | - name: 'gcr.io/cloud-builders/docker' 29 | args: ['push', 'gcr.io/$PROJECT_ID/bw-pl-bikes-train:$SHORT_SHA'] 30 | 31 | # ml-pipeline-bikes-tuner 32 | - name: 'bash' 33 | args: ['./copydir.sh'] 34 | id: copy2 35 | dir: 'ml/kubeflow-pipelines/keras_tuner/components/kubeflow-resources/containers/bikesw_training_hptune' 36 | waitFor: ['-'] # The '-' indicates that this step begins immediately. 37 | 38 | - name: 'gcr.io/cloud-builders/docker' 39 | args: ['build', '-t', 'gcr.io/$PROJECT_ID/ml-pipeline-bikes-tuner:$SHORT_SHA', '.'] 40 | dir: 'ml/kubeflow-pipelines/keras_tuner/components/kubeflow-resources/containers/bikesw_training_hptune' 41 | waitFor: ['copy2'] 42 | 43 | - name: 'gcr.io/cloud-builders/docker' 44 | args: ['push', 'gcr.io/$PROJECT_ID/ml-pipeline-bikes-tuner:$SHORT_SHA'] 45 | 46 | # ml-pipeline-bikes-dep 47 | - name: 'bash' 48 | args: ['./copydir.sh'] 49 | id: copy3 50 | dir: 'ml/kubeflow-pipelines/keras_tuner/components/kubeflow-resources/containers/deploy_jobs' 51 | waitFor: ['-'] 52 | 53 | - name: 'gcr.io/cloud-builders/docker' 54 | args: ['build', '-t', 'gcr.io/$PROJECT_ID/ml-pipeline-bikes-dep:$SHORT_SHA', '.'] 55 | dir: 'ml/kubeflow-pipelines/keras_tuner/components/kubeflow-resources/containers/deploy_jobs' 56 | waitFor: ['copy3'] 57 | 58 | - name: 'gcr.io/cloud-builders/docker' 59 | args: ['push', 'gcr.io/$PROJECT_ID/ml-pipeline-bikes-dep:$SHORT_SHA'] 60 | 61 | # bw-pipeline-tfserve 62 | - name: 'bash' 63 | args: ['./copydir.sh'] 64 | id: copy4 65 | dir: 'ml/kubeflow-pipelines/keras_tuner/components/kubeflow-resources/containers/tf-serving' 66 | waitFor: ['-'] 67 | 68 | - name: 'gcr.io/cloud-builders/docker' 69 | args: ['build', '-t', 'gcr.io/$PROJECT_ID/bw-pipeline-tfserve:$SHORT_SHA', '.'] 70 | dir: 'ml/kubeflow-pipelines/keras_tuner/components/kubeflow-resources/containers/tf-serving' 71 | waitFor: ['copy4'] 72 | 73 | - name: 'gcr.io/cloud-builders/docker' 74 | args: ['push', 'gcr.io/$PROJECT_ID/bw-pipeline-tfserve:$SHORT_SHA'] 75 | 76 | timeout: 2000s 77 | -------------------------------------------------------------------------------- /ml/kubeflow-pipelines/keras_tuner/components/kubeflow-resources/containers/bikesw_training/Dockerfile: -------------------------------------------------------------------------------- 1 | # Copyright 2019 Google Inc. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | # FROM tensorflow/tensorflow:2.1.0-gpu-py3 16 | FROM tensorflow/tensorflow:2.3.0-gpu 17 | 18 | 19 | RUN pip install --upgrade pip 20 | RUN pip install pathlib2 21 | 22 | 23 | ADD build /ml 24 | 25 | ENTRYPOINT ["python", "/ml/bikes_weather_limited.py"] 26 | -------------------------------------------------------------------------------- /ml/kubeflow-pipelines/keras_tuner/components/kubeflow-resources/containers/bikesw_training/build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -e 2 | # Copyright 2019 Google Inc. All Rights Reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | 17 | if [ -z "$1" ] 18 | then 19 | PROJECT_ID=$(gcloud config config-helper --format "value(configuration.properties.core.project)") 20 | else 21 | PROJECT_ID=$1 22 | fi 23 | 24 | mkdir -p ./build 25 | rsync -arvp "../../bikesw_training"/ ./build/ 26 | 27 | docker build -t bw-pl-bikes-train . 28 | rm -rf ./build 29 | 30 | docker tag bw-pl-bikes-train gcr.io/${PROJECT_ID}/bw-pl-bikes-train 31 | docker push gcr.io/${PROJECT_ID}/bw-pl-bikes-train 32 | -------------------------------------------------------------------------------- /ml/kubeflow-pipelines/keras_tuner/components/kubeflow-resources/containers/bikesw_training/cloudbuild.yaml: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Google Inc. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | steps: 16 | 17 | - name: 'bash' 18 | args: ['./copydir.sh'] 19 | id: 'copydir' 20 | dir: 'ml/kubeflow-pipelines/keras_tuner/components/kubeflow-resources/containers/bikesw_training' 21 | 22 | - name: 'gcr.io/cloud-builders/docker' 23 | args: ['build', '-t', 'gcr.io/$PROJECT_ID/bw-pl-bikes-train:$SHORT_SHA', '.'] 24 | dir: 'ml/kubeflow-pipelines/keras_tuner/components/kubeflow-resources/containers/bikesw_training' 25 | 26 | - name: 'gcr.io/cloud-builders/docker' 27 | args: ['push', 'gcr.io/$PROJECT_ID/bw-pl-bikes-train:$SHORT_SHA'] 28 | 29 | -------------------------------------------------------------------------------- /ml/kubeflow-pipelines/keras_tuner/components/kubeflow-resources/containers/bikesw_training/copydir.sh: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Google Inc. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | 16 | mkdir -p ./build 17 | cp -pr ../../bikesw_training/* ./build/ 18 | -------------------------------------------------------------------------------- /ml/kubeflow-pipelines/keras_tuner/components/kubeflow-resources/containers/bikesw_training_hptune/Dockerfile: -------------------------------------------------------------------------------- 1 | # Copyright 2019 Google Inc. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | # FROM tensorflow/tensorflow:2.1.0-gpu-py3 16 | FROM tensorflow/tensorflow:2.3.0-gpu 17 | 18 | RUN pip install --upgrade pip 19 | RUN pip install keras-tuner google-cloud-storage 20 | 21 | 22 | ADD build /ml 23 | 24 | ENTRYPOINT ["python", "/ml/bw_hptune_standalone.py"] 25 | -------------------------------------------------------------------------------- /ml/kubeflow-pipelines/keras_tuner/components/kubeflow-resources/containers/bikesw_training_hptune/build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -e 2 | # Copyright 2019 Google Inc. All Rights Reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | 17 | if [ -z "$1" ] 18 | then 19 | PROJECT_ID=$(gcloud config config-helper --format "value(configuration.properties.core.project)") 20 | else 21 | PROJECT_ID=$1 22 | fi 23 | 24 | mkdir -p ./build 25 | rsync -arvp "../../bikesw_training/"/ ./build/ 26 | 27 | docker build -t ml-pipeline-bikes-tuner . 28 | rm -rf ./build 29 | 30 | docker tag ml-pipeline-bikes-tuner gcr.io/${PROJECT_ID}/ml-pipeline-bikes-tuner 31 | docker push gcr.io/${PROJECT_ID}/ml-pipeline-bikes-tuner 32 | -------------------------------------------------------------------------------- /ml/kubeflow-pipelines/keras_tuner/components/kubeflow-resources/containers/bikesw_training_hptune/cloudbuild.yaml: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Google Inc. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | steps: 16 | 17 | # ml-pipeline-bikes-tuner 18 | - name: 'bash' 19 | args: ['./copydir.sh'] 20 | id: copy2 21 | dir: 'ml/kubeflow-pipelines/keras_tuner/components/kubeflow-resources/containers/bikesw_training_hptune' 22 | waitFor: ['-'] 23 | 24 | - name: 'gcr.io/cloud-builders/docker' 25 | args: ['build', '-t', 'gcr.io/$PROJECT_ID/ml-pipeline-bikes-tuner:$SHORT_SHA', '.'] 26 | dir: 'ml/kubeflow-pipelines/keras_tuner/components/kubeflow-resources/containers/bikesw_training_hptune' 27 | waitFor: ['copy2'] 28 | 29 | - name: 'gcr.io/cloud-builders/docker' 30 | args: ['push', 'gcr.io/$PROJECT_ID/ml-pipeline-bikes-tuner:$SHORT_SHA'] 31 | -------------------------------------------------------------------------------- /ml/kubeflow-pipelines/keras_tuner/components/kubeflow-resources/containers/bikesw_training_hptune/copydir.sh: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Google Inc. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | 16 | mkdir -p ./build 17 | cp -pr ../../bikesw_training/* ./build/ 18 | -------------------------------------------------------------------------------- /ml/kubeflow-pipelines/keras_tuner/components/kubeflow-resources/containers/deploy_jobs/Dockerfile: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Google Inc. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | FROM ubuntu:20.04 16 | 17 | RUN apt-get update \ 18 | && apt-get install -y python3-pip python3-dev wget unzip \ 19 | && cd /usr/local/bin \ 20 | && ln -s /usr/bin/python3 python \ 21 | && pip3 install --upgrade pip 22 | 23 | # RUN apt-get install -y wget unzip git 24 | 25 | RUN pip install --upgrade pip 26 | RUN pip install urllib3 certifi retrying 27 | RUN pip install google-cloud-storage 28 | RUN pip install --upgrade six 29 | 30 | 31 | RUN wget -nv https://dl.google.com/dl/cloudsdk/release/google-cloud-sdk.zip && \ 32 | unzip -qq google-cloud-sdk.zip -d tools && \ 33 | rm google-cloud-sdk.zip && \ 34 | tools/google-cloud-sdk/install.sh --usage-reporting=false \ 35 | --path-update=false --bash-completion=false \ 36 | --disable-installation-options && \ 37 | tools/google-cloud-sdk/bin/gcloud -q components update \ 38 | gcloud core gsutil && \ 39 | tools/google-cloud-sdk/bin/gcloud -q components install kubectl && \ 40 | tools/google-cloud-sdk/bin/gcloud config set component_manager/disable_update_check true && \ 41 | touch /tools/google-cloud-sdk/lib/third_party/google.py 42 | 43 | 44 | ENV PATH $PATH:/tools/google-cloud-sdk/bin 45 | 46 | ADD build /ml 47 | 48 | ENTRYPOINT ["python", "/ml/deploy_tuner.py"] 49 | 50 | -------------------------------------------------------------------------------- /ml/kubeflow-pipelines/keras_tuner/components/kubeflow-resources/containers/deploy_jobs/build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -e 2 | # Copyright 2020 Google Inc. All Rights Reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | 17 | if [ -z "$1" ] 18 | then 19 | PROJECT_ID=$(gcloud config config-helper --format "value(configuration.properties.core.project)") 20 | else 21 | PROJECT_ID=$1 22 | fi 23 | 24 | mkdir -p ./build 25 | rsync -arvp "../../bikesw_training/"/ ./build/ 26 | 27 | docker build -t ml-pipeline-bikes-dep . 28 | rm -rf ./build 29 | 30 | docker tag ml-pipeline-bikes-dep gcr.io/${PROJECT_ID}/ml-pipeline-bikes-dep 31 | docker push gcr.io/${PROJECT_ID}/ml-pipeline-bikes-dep 32 | 33 | -------------------------------------------------------------------------------- /ml/kubeflow-pipelines/keras_tuner/components/kubeflow-resources/containers/deploy_jobs/cloudbuild.yaml: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Google Inc. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | steps: 16 | 17 | # ml-pipeline-bikes-dep 18 | - name: 'bash' 19 | args: ['./copydir.sh'] 20 | id: copy3 21 | dir: 'ml/kubeflow-pipelines/keras_tuner/components/kubeflow-resources/containers/deploy_jobs' 22 | waitFor: ['-'] 23 | 24 | - name: 'gcr.io/cloud-builders/docker' 25 | args: ['build', '-t', 'gcr.io/$PROJECT_ID/ml-pipeline-bikes-dep:$SHORT_SHA', '.'] 26 | dir: 'ml/kubeflow-pipelines/keras_tuner/components/kubeflow-resources/containers/deploy_jobs' 27 | waitFor: ['copy3'] 28 | 29 | - name: 'gcr.io/cloud-builders/docker' 30 | args: ['push', 'gcr.io/$PROJECT_ID/ml-pipeline-bikes-dep:$SHORT_SHA'] -------------------------------------------------------------------------------- /ml/kubeflow-pipelines/keras_tuner/components/kubeflow-resources/containers/deploy_jobs/copydir.sh: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Google Inc. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | 16 | mkdir -p ./build 17 | cp -pr ../../bikesw_training/* ./build/ 18 | -------------------------------------------------------------------------------- /ml/kubeflow-pipelines/keras_tuner/components/kubeflow-resources/containers/tf-serving/Dockerfile: -------------------------------------------------------------------------------- 1 | # Copyright 2018 Google Inc. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | FROM tensorflow/tensorflow:2.1.0-gpu-py3 16 | 17 | RUN apt-get update -y 18 | 19 | RUN apt-get install --no-install-recommends -y -q ca-certificates python-dev python-setuptools wget unzip 20 | 21 | RUN easy_install pip 22 | 23 | RUN pip install pyyaml==3.12 six requests==2.18.4 24 | 25 | RUN wget -nv https://dl.google.com/dl/cloudsdk/release/google-cloud-sdk.zip && \ 26 | unzip -qq google-cloud-sdk.zip -d tools && \ 27 | rm google-cloud-sdk.zip && \ 28 | tools/google-cloud-sdk/install.sh --usage-reporting=false \ 29 | --path-update=false --bash-completion=false \ 30 | --disable-installation-options && \ 31 | tools/google-cloud-sdk/bin/gcloud -q components update \ 32 | gcloud core gsutil && \ 33 | tools/google-cloud-sdk/bin/gcloud -q components install kubectl && \ 34 | tools/google-cloud-sdk/bin/gcloud config set component_manager/disable_update_check true && \ 35 | touch /tools/google-cloud-sdk/lib/third_party/google.py 36 | 37 | 38 | ENV PATH $PATH:/tools/google-cloud-sdk/bin 39 | 40 | ADD build /ml 41 | 42 | ENTRYPOINT ["python", "/ml/deploy-tfserve.py"] 43 | -------------------------------------------------------------------------------- /ml/kubeflow-pipelines/keras_tuner/components/kubeflow-resources/containers/tf-serving/build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -e 2 | # Copyright 2018 Google Inc. All Rights Reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | 17 | if [ -z "$1" ] 18 | then 19 | PROJECT_ID=$(gcloud config config-helper --format "value(configuration.properties.core.project)") 20 | else 21 | PROJECT_ID=$1 22 | fi 23 | 24 | mkdir -p ./build 25 | rsync -arvp "../../tf-serving"/ ./build/ 26 | 27 | docker build -t bw-pipeline-tfserve . 28 | rm -rf ./build 29 | 30 | docker tag bw-pipeline-tfserve gcr.io/${PROJECT_ID}/bw-pipeline-tfserve 31 | docker push gcr.io/${PROJECT_ID}/bw-pipeline-tfserve 32 | -------------------------------------------------------------------------------- /ml/kubeflow-pipelines/keras_tuner/components/kubeflow-resources/containers/tf-serving/cloudbuild.yaml: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Google Inc. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | steps: 16 | 17 | - name: 'bash' 18 | args: ['./copydir.sh'] 19 | id: copy4 20 | dir: 'ml/kubeflow-pipelines/keras_tuner/components/kubeflow-resources/containers/tf-serving' 21 | waitFor: ['-'] 22 | 23 | - name: 'gcr.io/cloud-builders/docker' 24 | args: ['build', '-t', 'gcr.io/$PROJECT_ID/bw-pipeline-tfserve:$SHORT_SHA', '.'] 25 | dir: 'ml/kubeflow-pipelines/keras_tuner/components/kubeflow-resources/containers/tf-serving' 26 | waitFor: ['copy4'] 27 | 28 | - name: 'gcr.io/cloud-builders/docker' 29 | args: ['push', 'gcr.io/$PROJECT_ID/bw-pipeline-tfserve:$SHORT_SHA'] 30 | 31 | -------------------------------------------------------------------------------- /ml/kubeflow-pipelines/keras_tuner/components/kubeflow-resources/containers/tf-serving/copydir.sh: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Google Inc. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | 16 | mkdir -p ./build 17 | cp -pr ../../tf-serving/* ./build/ 18 | 19 | 20 | -------------------------------------------------------------------------------- /ml/kubeflow-pipelines/keras_tuner/components/kubeflow-resources/tf-serving/tf-serve-template.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: v1 3 | kind: Service 4 | metadata: 5 | labels: 6 | app: SERVICE_NAME 7 | apptype: tf-serving 8 | name: SERVICE_NAME 9 | namespace: KUBEFLOW_NAMESPACE 10 | spec: 11 | ports: 12 | - name: grpc-tf-serving 13 | port: 9000 14 | targetPort: 9000 15 | - name: tf-serving-builtin-http 16 | port: 8500 17 | targetPort: 8500 18 | selector: 19 | app: SERVICE_NAME 20 | type: ClusterIP 21 | --- 22 | apiVersion: apps/v1 23 | kind: Deployment 24 | metadata: 25 | labels: 26 | app: SERVICE_NAME 27 | apptype: tf-serving 28 | name: SERVICE_NAME 29 | namespace: KUBEFLOW_NAMESPACE 30 | spec: 31 | replicas: 1 32 | selector: 33 | matchLabels: 34 | app: SERVICE_NAME 35 | template: 36 | metadata: 37 | labels: 38 | app: SERVICE_NAME 39 | version: v1 40 | spec: 41 | containers: 42 | - args: 43 | - --port=9000 44 | - --rest_api_port=8500 45 | - --model_name=MODEL_NAME 46 | - --model_base_path=MODEL_PATH 47 | - --enable_batching 48 | command: 49 | - /usr/bin/tensorflow_model_server 50 | image: tensorflow/serving:2.3.0-rc0 51 | imagePullPolicy: Always 52 | livenessProbe: 53 | initialDelaySeconds: 30 54 | periodSeconds: 30 55 | tcpSocket: 56 | port: 9000 57 | name: MODEL_NAME 58 | ports: 59 | - containerPort: 9000 60 | - containerPort: 8500 61 | resources: 62 | limits: 63 | cpu: "4" 64 | memory: 4Gi 65 | requests: 66 | cpu: "1" 67 | memory: 1Gi 68 | -------------------------------------------------------------------------------- /ml/kubeflow-pipelines/keras_tuner/components/serve_component.yaml: -------------------------------------------------------------------------------- 1 | # Copyright 2019 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | name: Serve TF model 16 | description: | 17 | A Kubeflow Pipeline component to deploy a tf-serving service 18 | metadata: 19 | labels: 20 | add-pod-env: 'true' 21 | inputs: 22 | - name: model_name 23 | type: String 24 | - name: model_path 25 | type: GCSPath 26 | - name: namespace 27 | type: String 28 | implementation: 29 | container: 30 | image: gcr.io/google-samples/bw-pipeline-tfserve:aad15ad 31 | args: [ 32 | --model_name, {inputValue: model_name}, 33 | --model_path, {inputValue: model_path}, 34 | --namespace, {inputValue: namespace} 35 | ] 36 | env: 37 | KFP_POD_NAME: "{{pod.name}}" 38 | -------------------------------------------------------------------------------- /ml/kubeflow-pipelines/keras_tuner/components/tfdv/Dockerfile: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Google Inc. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | FROM gcr.io/deeplearning-platform-release/tf2-cpu.2-3:latest 16 | 17 | ADD requirements.txt / 18 | # ADD tfdv.py / 19 | RUN pip install -U tensorflow-data-validation 20 | RUN pip download tensorflow_data_validation --no-deps --platform manylinux2010_x86_64 --only-binary=:all: 21 | RUN pip install -U "apache-beam[gcp]" 22 | -------------------------------------------------------------------------------- /ml/kubeflow-pipelines/keras_tuner/components/tfdv/requirements.txt: -------------------------------------------------------------------------------- 1 | ipython==7.16.1 2 | ipython-genutils==0.2.0 3 | -------------------------------------------------------------------------------- /ml/kubeflow-pipelines/keras_tuner/components/tfdv/tfdv.py: -------------------------------------------------------------------------------- 1 | # Copyright 2021 Google Inc. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # https://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from typing import NamedTuple 16 | 17 | 18 | def generate_tfdv_stats(input_data: str, output_path: str, job_name: str, use_dataflow: str, 19 | project_id: str, region:str, gcs_temp_location: str, gcs_staging_location: str, 20 | whl_location: str = '', requirements_file: str = 'requirements.txt' 21 | ) -> NamedTuple('Outputs', [('stats_path', str)]): 22 | 23 | import logging 24 | import time 25 | 26 | import tensorflow_data_validation as tfdv 27 | import tensorflow_data_validation.statistics.stats_impl 28 | from apache_beam.options.pipeline_options import PipelineOptions, GoogleCloudOptions, StandardOptions, SetupOptions 29 | 30 | # pip download tensorflow_data_validation --no-deps --platform manylinux2010_x86_64 --only-binary=:all: 31 | # CHANGE this if your download resulted in a different filename. 32 | 33 | logging.getLogger().setLevel(logging.INFO) 34 | logging.info("output path: %s", output_path) 35 | logging.info("Building pipeline options") 36 | # Create and set your PipelineOptions. 37 | options = PipelineOptions() 38 | 39 | if use_dataflow == 'true': 40 | logging.info("using Dataflow") 41 | if not whl_location: 42 | logging.warning('tfdv whl file required with dataflow runner.') 43 | exit(1) 44 | # For Cloud execution, set the Cloud Platform project, job_name, 45 | # staging location, temp_location and specify DataflowRunner. 46 | google_cloud_options = options.view_as(GoogleCloudOptions) 47 | google_cloud_options.project = project_id 48 | google_cloud_options.job_name = '{}-{}'.format(job_name, str(int(time.time()))) 49 | google_cloud_options.staging_location = gcs_staging_location 50 | google_cloud_options.temp_location = gcs_temp_location 51 | google_cloud_options.region = region 52 | options.view_as(StandardOptions).runner = 'DataflowRunner' 53 | 54 | setup_options = options.view_as(SetupOptions) 55 | # PATH_TO_WHL_FILE should point to the downloaded tfdv wheel file. 56 | setup_options.extra_packages = [whl_location] 57 | setup_options.requirements_file = 'requirements.txt' 58 | 59 | tfdv.generate_statistics_from_csv( 60 | data_location=input_data, output_path=output_path, 61 | pipeline_options=options) 62 | 63 | return (output_path, ) 64 | 65 | 66 | if __name__ == '__main__': 67 | import kfp 68 | kfp.components.func_to_container_op(generate_tfdv_stats, 69 | output_component_file='../tfdv_component.yaml', 70 | base_image='gcr.io/google-samples/tfdv-tests:v1') 71 | -------------------------------------------------------------------------------- /ml/kubeflow-pipelines/keras_tuner/components/tfdv/tfdv_compare.py: -------------------------------------------------------------------------------- 1 | # Copyright 2021 Google Inc. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # https://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from typing import NamedTuple 16 | 17 | 18 | def tfdv_detect_drift( 19 | stats_older_path: str, stats_new_path: str 20 | ) -> NamedTuple('Outputs', [('drift', str)]): 21 | 22 | import logging 23 | import time 24 | 25 | import tensorflow_data_validation as tfdv 26 | import tensorflow_data_validation.statistics.stats_impl 27 | 28 | logging.getLogger().setLevel(logging.INFO) 29 | logging.info('stats_older_path: %s', stats_older_path) 30 | logging.info('stats_new_path: %s', stats_new_path) 31 | 32 | if stats_older_path == 'none': 33 | return ('true', ) 34 | 35 | stats1 = tfdv.load_statistics(stats_older_path) 36 | stats2 = tfdv.load_statistics(stats_new_path) 37 | 38 | schema1 = tfdv.infer_schema(statistics=stats1) 39 | tfdv.get_feature(schema1, 'duration').drift_comparator.jensen_shannon_divergence.threshold = 0.01 40 | drift_anomalies = tfdv.validate_statistics( 41 | statistics=stats2, schema=schema1, previous_statistics=stats1) 42 | logging.info('drift analysis results: %s', drift_anomalies.drift_skew_info) 43 | 44 | from google.protobuf.json_format import MessageToDict 45 | d = MessageToDict(drift_anomalies) 46 | val = d['driftSkewInfo'][0]['driftMeasurements'][0]['value'] 47 | thresh = d['driftSkewInfo'][0]['driftMeasurements'][0]['threshold'] 48 | logging.info('value %s and threshold %s', val, thresh) 49 | res = 'true' 50 | if val < thresh: 51 | res = 'false' 52 | logging.info('train decision: %s', res) 53 | return (res, ) 54 | 55 | 56 | if __name__ == '__main__': 57 | import kfp 58 | kfp.components.func_to_container_op(tfdv_detect_drift, 59 | output_component_file='../tfdv_drift_component.yaml', 60 | base_image='gcr.io/google-samples/tfdv-tests:v1') 61 | -------------------------------------------------------------------------------- /ml/kubeflow-pipelines/keras_tuner/components/train_component.yaml: -------------------------------------------------------------------------------- 1 | # Copyright 2019 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | name: Train bikes_weather model 16 | description: | 17 | A Kubeflow Pipeline component to train a Tensor2Tensor 18 | model 19 | metadata: 20 | labels: 21 | add-pod-env: 'true' 22 | inputs: 23 | - name: epochs 24 | type: Integer 25 | default: 1 26 | - name: steps_per_epoch 27 | type: Integer 28 | default: -1 29 | - name: data_dir 30 | type: String 31 | - name: workdir 32 | type: String 33 | - name: tb_dir 34 | type: String 35 | - name: hp_idx 36 | type: Integer 37 | - name: hptune_results 38 | type: String 39 | outputs: 40 | - name: train_output_path 41 | type: GCSPath 42 | - name: metrics_output_path 43 | type: String 44 | - name: MLPipeline UI metadata 45 | type: UI metadata 46 | implementation: 47 | container: 48 | image: gcr.io/google-samples/bw-pl-bikes-train:v2 49 | args: [ 50 | --data-dir, {inputValue: data_dir}, 51 | --epochs, {inputValue: epochs}, 52 | --steps-per-epoch, {inputValue: steps_per_epoch}, 53 | --workdir, {inputValue: workdir}, 54 | --tb-dir, {inputValue: tb_dir}, 55 | --train-output-path, {outputPath: train_output_path}, 56 | --metrics-output-path, {outputPath: metrics_output_path}, 57 | --hp-idx, {inputValue: hp_idx}, 58 | --hptune-results, {inputValue: hptune_results} 59 | ] 60 | env: 61 | KFP_POD_NAME: "{{pod.name}}" 62 | fileOutputs: 63 | MLPipeline UI metadata: /mlpipeline-ui-metadata.json 64 | -------------------------------------------------------------------------------- /ml/kubeflow-pipelines/keras_tuner/example_pipelines/bw_ktune.py.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amygdala/code-snippets/9fa00e5a4ec4ae3eb5a2fa212d1ecefb4009eba7/ml/kubeflow-pipelines/keras_tuner/example_pipelines/bw_ktune.py.tar.gz -------------------------------------------------------------------------------- /ml/kubeflow-pipelines/keras_tuner/example_pipelines/bw_train.py: -------------------------------------------------------------------------------- 1 | # Copyright 2019 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | 16 | import kfp.dsl as dsl 17 | import kfp.gcp as gcp 18 | import kfp.components as comp 19 | from kfp.dsl.types import GCSPath, String 20 | 21 | 22 | train_op = comp.load_component_from_file( 23 | '../components/train_component.yaml' 24 | ) 25 | serve_op = comp.load_component_from_file( 26 | '../components/serve_component.yaml' 27 | ) 28 | 29 | tb_op = comp.load_component_from_url( 30 | 'https://raw.githubusercontent.com/kubeflow/pipelines/master/components/tensorflow/tensorboard/prepare_tensorboard/component.yaml' # pylint: disable=line-too-long 31 | ) 32 | 33 | 34 | @dsl.pipeline( 35 | name='bikes_weather', 36 | description='Model bike rental duration given weather' 37 | ) 38 | def bikes_weather( #pylint: disable=unused-argument 39 | train_epochs: int = 5, 40 | working_dir: str = 'gs://YOUR/GCS/PATH', # for the full training jobs 41 | data_dir: str = 'gs://aju-dev-demos-codelabs/bikes_weather/', 42 | steps_per_epoch: int = -1 , # if -1, don't override normal calcs based on dataset size 43 | num_best_hps_list: list = [0], 44 | hptune_params: str = '[{"num_hidden_layers": %s, "learning_rate": %s, "hidden_size": %s}]' % (3, 1e-2, 64) 45 | ): 46 | 47 | 48 | # create TensorBoard viz for the parent directory of all training runs, so that we can 49 | # compare them. 50 | tb_viz = tb_op( 51 | log_dir_uri='%s/%s' % (working_dir, dsl.RUN_ID_PLACEHOLDER) 52 | ) 53 | 54 | with dsl.ParallelFor(num_best_hps_list) as idx: # start the full training runs in parallel 55 | 56 | train = train_op( 57 | data_dir=data_dir, 58 | workdir='%s/%s' % (tb_viz.outputs['log_dir_uri'], idx), 59 | tb_dir=tb_viz.outputs['log_dir_uri'], 60 | epochs=train_epochs, steps_per_epoch=steps_per_epoch, 61 | hp_idx=idx, 62 | hptune_results=hptune_params 63 | ) 64 | 65 | serve = serve_op( 66 | model_path=train.outputs['train_output_path'], 67 | model_name='bikesw', 68 | namespace='default' 69 | ) 70 | train.set_gpu_limit(2) 71 | 72 | 73 | if __name__ == '__main__': 74 | import kfp.compiler as compiler 75 | compiler.Compiler().compile(bikes_weather, __file__ + '.tar.gz') 76 | 77 | -------------------------------------------------------------------------------- /ml/kubeflow-pipelines/keras_tuner/example_pipelines/bw_train_metrics.py: -------------------------------------------------------------------------------- 1 | # Copyright 2019 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | 16 | import kfp.dsl as dsl 17 | import kfp.gcp as gcp 18 | import kfp.components as comp 19 | from kfp.dsl.types import GCSPath, String 20 | 21 | 22 | train_op = comp.load_component_from_file( 23 | '../components/train_component.yaml' 24 | ) 25 | serve_op = comp.load_component_from_file( 26 | '../components/serve_component.yaml' 27 | ) 28 | 29 | eval_metrics_op = comp.load_component_from_file( 30 | '../components/eval_metrics_component.yaml') 31 | 32 | tb_op = comp.load_component_from_url( 33 | 'https://raw.githubusercontent.com/kubeflow/pipelines/master/components/tensorflow/tensorboard/prepare_tensorboard/component.yaml' # pylint: disable=line-too-long 34 | ) 35 | 36 | 37 | @dsl.pipeline( 38 | name='bikes_weather', 39 | description='Model bike rental duration given weather' 40 | ) 41 | def bikes_weather_metrics( #pylint: disable=unused-argument 42 | train_epochs: int = 5, 43 | working_dir: str = 'gs://YOUR/GCS/PATH', # for the full training jobs 44 | data_dir: str = 'gs://aju-dev-demos-codelabs/bikes_weather/', 45 | steps_per_epoch: int = -1 , # if -1, don't override normal calcs based on dataset size 46 | num_best_hps_list: list = [0], 47 | hptune_params: str = '[{"num_hidden_layers": %s, "learning_rate": %s, "hidden_size": %s}]' % (3, 1e-2, 64), 48 | thresholds: str = '{"root_mean_squared_error": 2000}' 49 | ): 50 | 51 | 52 | # create TensorBoard viz for the parent directory of all training runs, so that we can 53 | # compare them. 54 | tb_viz = tb_op( 55 | log_dir_uri='%s/%s' % (working_dir, dsl.RUN_ID_PLACEHOLDER) 56 | ) 57 | 58 | with dsl.ParallelFor(num_best_hps_list) as idx: # start the full training runs in parallel 59 | 60 | train = train_op( 61 | data_dir=data_dir, 62 | workdir='%s/%s' % (tb_viz.outputs['log_dir_uri'], idx), 63 | tb_dir=tb_viz.outputs['log_dir_uri'], 64 | epochs=train_epochs, steps_per_epoch=steps_per_epoch, 65 | hp_idx=idx, 66 | hptune_results=hptune_params 67 | ) 68 | 69 | eval_metrics = eval_metrics_op( 70 | thresholds=thresholds, 71 | metrics=train.outputs['metrics_output_path'], 72 | ) 73 | 74 | with dsl.Condition(eval_metrics.outputs['deploy'] == 'deploy'): 75 | serve = serve_op( 76 | model_path=train.outputs['train_output_path'], 77 | model_name='bikesw', 78 | namespace='default' 79 | ) 80 | train.set_gpu_limit(2) 81 | 82 | 83 | if __name__ == '__main__': 84 | import kfp.compiler as compiler 85 | compiler.Compiler().compile(bikes_weather_metrics, __file__ + '.tar.gz') 86 | 87 | -------------------------------------------------------------------------------- /ml/kubeflow-pipelines/samples/automl/README.md: -------------------------------------------------------------------------------- 1 | 2 | The pipeline in this directory shows how you can make calls to the AutoML Vision API to build a pipeline that creates an AutoML *dataset* and then trains a model on that dataset. 3 | 4 | This pipeline requires a GKE installation of Kubeflow, e.g. via the 5 | ['click to deploy' web app](https://deploy.kubeflow.cloud/#/deploy). 6 | Once Kubeflow is installed on your GKE cluster, to run this pipeline, you'll need to vist the [IAM panel in the GCP Cloud Console](https://pantheon.corp.google.com/iam-admin/iam), find the Kubeflow-created service account 7 | `-user@.iam.gserviceaccount.com`, and add permissions to make that account an `AutoML Admin`. This will give the Kubeflow Pipeline steps permission to call the AutoML APIs. 8 | 9 | -------------------------------------------------------------------------------- /ml/kubeflow-pipelines/samples/automl/dataset_and_train.py: -------------------------------------------------------------------------------- 1 | # Copyright 2019 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | 16 | import kfp.dsl as dsl 17 | import kfp.gcp as gcp 18 | 19 | DATASET_OP = 'dataset' 20 | MODEL_OP = 'model' 21 | 22 | @dsl.pipeline( 23 | name='automl1', 24 | description='Create AutoML dataset and train model' 25 | ) 26 | def automl1( #pylint: disable=unused-argument 27 | # There's now a more succinct way to define the pipeline params 28 | project_id: dsl.PipelineParam = dsl.PipelineParam(name='project-id', value='YOUR_PROJECT_HERE'), 29 | compute_region: dsl.PipelineParam = dsl.PipelineParam(name='compute-region', value='YOUR_REGION_HERE'), 30 | dataset_name: dsl.PipelineParam = dsl.PipelineParam(name='dataset-name', value='YOUR_DATASETNAME_HERE'), 31 | model_name: dsl.PipelineParam = dsl.PipelineParam(name='model-name', value='YOUR_MODELNAME_HERE'), 32 | csv_path: dsl.PipelineParam = dsl.PipelineParam(name='csv-path', value='YOUR_DATASET_PATH') 33 | ): 34 | 35 | 36 | dataset = dsl.ContainerOp( 37 | name='dataset', 38 | image='gcr.io/google-samples/automl-pipeline', 39 | arguments=["--project_id", project_id, "--operation", DATASET_OP, 40 | "--compute_region", compute_region, 41 | "--dataset_name", dataset_name, 42 | "--csv_path", csv_path], 43 | file_outputs={'dataset_id': '/dataset_id.txt', 'csv_path': '/csv_path.txt'} 44 | 45 | ).apply(gcp.use_gcp_secret('user-gcp-sa')) 46 | 47 | model = dsl.ContainerOp( 48 | name='model', 49 | image='gcr.io/google-samples/automl-pipeline', 50 | arguments=["--project_id", project_id, "--operation", MODEL_OP, 51 | "--compute_region", compute_region, 52 | "--model_name", model_name, 53 | "--csv_path", dataset.outputs['csv_path'], 54 | "--dataset_id", dataset.outputs['dataset_id']] 55 | ).apply(gcp.use_gcp_secret('user-gcp-sa')) 56 | 57 | model.after(dataset) 58 | 59 | 60 | 61 | if __name__ == '__main__': 62 | import kfp.compiler as compiler 63 | compiler.Compiler().compile(automl1, __file__ + '.tar.gz') 64 | -------------------------------------------------------------------------------- /ml/kubeflow-pipelines/samples/automl/dataset_and_train.py.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amygdala/code-snippets/9fa00e5a4ec4ae3eb5a2fa212d1ecefb4009eba7/ml/kubeflow-pipelines/samples/automl/dataset_and_train.py.tar.gz -------------------------------------------------------------------------------- /ml/kubeflow-pipelines/samples/kubeflow-tf/README.md: -------------------------------------------------------------------------------- 1 | 2 | The example pipelines that were in this directory have been moved to the [`older`](./older) subdirectory. 3 | They are not currently maintained and are probably out of date. 4 | -------------------------------------------------------------------------------- /ml/kubeflow-pipelines/samples/kubeflow-tf/older/gh_summ.py: -------------------------------------------------------------------------------- 1 | # Copyright 2018 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | 16 | import kfp.dsl as dsl 17 | import kfp.gcp as gcp 18 | 19 | 20 | @dsl.pipeline( 21 | name='Github issue summarization', 22 | description='Demonstrate Tensor2Tensor-based training and TF-Serving' 23 | ) 24 | def gh_summ( #pylint: disable=unused-argument 25 | train_steps: dsl.PipelineParam = dsl.PipelineParam(name='train-steps', value=2019300), 26 | project: dsl.PipelineParam = dsl.PipelineParam(name='project', value='YOUR_PROJECT_HERE'), 27 | github_token: dsl.PipelineParam = dsl.PipelineParam( 28 | name='github-token', value='YOUR_GITHUB_TOKEN_HERE'), 29 | working_dir: dsl.PipelineParam = dsl.PipelineParam(name='working-dir', value='YOUR_GCS_DIR_HERE'), 30 | checkpoint_dir: dsl.PipelineParam = dsl.PipelineParam( 31 | name='checkpoint-dir', 32 | value='gs://aju-dev-demos-codelabs/kubecon/model_output_tbase.bak2019000'), 33 | deploy_webapp: dsl.PipelineParam = dsl.PipelineParam(name='deploy-webapp', value='true'), 34 | data_dir: dsl.PipelineParam = dsl.PipelineParam( 35 | name='data-dir', value='gs://aju-dev-demos-codelabs/kubecon/t2t_data_gh_all/')): 36 | 37 | 38 | train = dsl.ContainerOp( 39 | name='train', 40 | image='gcr.io/google-samples/ml-pipeline-t2ttrain', 41 | arguments=["--data-dir", data_dir, 42 | "--checkpoint-dir", checkpoint_dir, 43 | "--model-dir", '%s/%s/model_output' % (working_dir, '{{workflow.name}}'), 44 | "--train-steps", train_steps, "--deploy-webapp", deploy_webapp], 45 | file_outputs={'output': '/tmp/output'} 46 | 47 | ).apply(gcp.use_gcp_secret('user-gcp-sa')) 48 | 49 | serve = dsl.ContainerOp( 50 | name='serve', 51 | image='gcr.io/google-samples/ml-pipeline-kubeflow-tfserve', 52 | arguments=["--model_name", 'ghsumm-%s' % ('{{workflow.name}}',), 53 | "--model_path", '%s/%s/model_output/export' % (working_dir, '{{workflow.name}}') 54 | ] 55 | ) 56 | serve.after(train) 57 | train.set_gpu_limit(4) 58 | 59 | with dsl.Condition(train.output == 'true'): 60 | webapp = dsl.ContainerOp( 61 | name='webapp', 62 | image='gcr.io/google-samples/ml-pipeline-webapp-launcher', 63 | arguments=["--model_name", 'ghsumm-%s' % ('{{workflow.name}}',), 64 | "--github_token", github_token] 65 | 66 | ) 67 | webapp.after(serve) 68 | 69 | 70 | if __name__ == '__main__': 71 | import kfp.compiler as compiler 72 | compiler.Compiler().compile(gh_summ, __file__ + '.tar.gz') 73 | -------------------------------------------------------------------------------- /ml/kubeflow-pipelines/samples/kubeflow-tf/older/gh_summ.py.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amygdala/code-snippets/9fa00e5a4ec4ae3eb5a2fa212d1ecefb4009eba7/ml/kubeflow-pipelines/samples/kubeflow-tf/older/gh_summ.py.tar.gz -------------------------------------------------------------------------------- /ml/kubeflow-pipelines/samples/kubeflow-tf/older/gh_summ_serve.py: -------------------------------------------------------------------------------- 1 | # Copyright 2018 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | 16 | import kfp.dsl as dsl 17 | 18 | @dsl.pipeline( 19 | name='Github issue summarization', 20 | description='Demonstrate Tensor2Tensor-based training and TF-Serving' 21 | ) 22 | def gh_summ( 23 | github_token: dsl.PipelineParam = dsl.PipelineParam( 24 | name='github-token', value='YOUR_GITHUB_TOKEN_HERE'), 25 | ): 26 | 27 | 28 | serve = dsl.ContainerOp( 29 | name='serve', 30 | image='gcr.io/google-samples/ml-pipeline-kubeflow-tfserve', 31 | arguments=["--model_name", 'ghsumm-%s' % ('{{workflow.name}}',), 32 | "--model_path", 33 | 'gs://aju-dev-demos-codelabs/kubecon/example_t2t_model/model_output/export' 34 | ] 35 | ) 36 | 37 | webapp = dsl.ContainerOp( 38 | name='webapp', 39 | image='gcr.io/google-samples/ml-pipeline-webapp-launcher', 40 | arguments=["--model_name", 'ghsumm-%s' % ('{{workflow.name}}',), 41 | "--github_token", github_token] 42 | 43 | ) 44 | webapp.after(serve) 45 | 46 | 47 | if __name__ == '__main__': 48 | import kfp.compiler as compiler 49 | compiler.Compiler().compile(gh_summ, __file__ + '.tar.gz') 50 | -------------------------------------------------------------------------------- /ml/kubeflow-pipelines/samples/kubeflow-tf/older/gh_summ_serve.py.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amygdala/code-snippets/9fa00e5a4ec4ae3eb5a2fa212d1ecefb4009eba7/ml/kubeflow-pipelines/samples/kubeflow-tf/older/gh_summ_serve.py.tar.gz -------------------------------------------------------------------------------- /ml/kubeflow-pipelines/sbtb/components/kubeflow-resources/containers/bikesw_training/Dockerfile: -------------------------------------------------------------------------------- 1 | # Copyright 2019 Google Inc. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | FROM tensorflow/tensorflow:2.0.0-gpu-py3 16 | 17 | RUN pip install --upgrade pip 18 | RUN pip install pathlib2 19 | 20 | 21 | ADD build /ml 22 | 23 | ENTRYPOINT ["python", "/ml/bikes_weather.py"] 24 | -------------------------------------------------------------------------------- /ml/kubeflow-pipelines/sbtb/components/kubeflow-resources/containers/bikesw_training/build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -e 2 | # Copyright 2019 Google Inc. All Rights Reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | 17 | if [ -z "$1" ] 18 | then 19 | PROJECT_ID=$(gcloud config config-helper --format "value(configuration.properties.core.project)") 20 | else 21 | PROJECT_ID=$1 22 | fi 23 | 24 | mkdir -p ./build 25 | rsync -arvp "../../bikesw_training"/ ./build/ 26 | 27 | docker build -t ml-pipeline-bikes-train . 28 | rm -rf ./build 29 | 30 | docker tag ml-pipeline-bikes-train gcr.io/${PROJECT_ID}/ml-pipeline-bikes-train 31 | docker push gcr.io/${PROJECT_ID}/ml-pipeline-bikes-train 32 | -------------------------------------------------------------------------------- /ml/kubeflow-pipelines/sbtb/components/kubeflow-resources/containers/tf-serving/Dockerfile: -------------------------------------------------------------------------------- 1 | # Copyright 2018 Google Inc. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | FROM ubuntu:16.04 16 | 17 | RUN apt-get update -y 18 | 19 | RUN apt-get install --no-install-recommends -y -q ca-certificates python-dev python-setuptools wget unzip 20 | 21 | RUN easy_install pip 22 | 23 | RUN pip install pyyaml==3.12 six requests==2.18.4 tensorflow==2.0.0 24 | 25 | RUN wget -nv https://dl.google.com/dl/cloudsdk/release/google-cloud-sdk.zip && \ 26 | unzip -qq google-cloud-sdk.zip -d tools && \ 27 | rm google-cloud-sdk.zip && \ 28 | tools/google-cloud-sdk/install.sh --usage-reporting=false \ 29 | --path-update=false --bash-completion=false \ 30 | --disable-installation-options && \ 31 | tools/google-cloud-sdk/bin/gcloud -q components update \ 32 | gcloud core gsutil && \ 33 | tools/google-cloud-sdk/bin/gcloud -q components install kubectl && \ 34 | tools/google-cloud-sdk/bin/gcloud config set component_manager/disable_update_check true && \ 35 | touch /tools/google-cloud-sdk/lib/third_party/google.py 36 | 37 | # RUN wget -nv https://github.com/ksonnet/ksonnet/releases/download/v0.11.0/ks_0.11.0_linux_amd64.tar.gz && \ 38 | # tar -xvzf ks_0.11.0_linux_amd64.tar.gz && \ 39 | # mkdir -p /tools/ks/bin && \ 40 | # cp ./ks_0.11.0_linux_amd64/ks /tools/ks/bin && \ 41 | # rm ks_0.11.0_linux_amd64.tar.gz && \ 42 | # rm -r ks_0.11.0_linux_amd64 43 | 44 | ENV PATH $PATH:/tools/google-cloud-sdk/bin:/tools/ks/bin 45 | 46 | ADD build /ml 47 | 48 | ENTRYPOINT ["python", "/ml/deploy-tfserve.py"] 49 | 50 | -------------------------------------------------------------------------------- /ml/kubeflow-pipelines/sbtb/components/kubeflow-resources/containers/tf-serving/build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -e 2 | # Copyright 2018 Google Inc. All Rights Reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | 17 | if [ -z "$1" ] 18 | then 19 | PROJECT_ID=$(gcloud config config-helper --format "value(configuration.properties.core.project)") 20 | else 21 | PROJECT_ID=$1 22 | fi 23 | 24 | mkdir -p ./build 25 | rsync -arvp "../../tf-serving"/ ./build/ 26 | 27 | docker build -t ml-pipeline-tfserve . 28 | rm -rf ./build 29 | 30 | docker tag ml-pipeline-tfserve gcr.io/${PROJECT_ID}/ml-pipeline-tfserve 31 | docker push gcr.io/${PROJECT_ID}/ml-pipeline-tfserve 32 | -------------------------------------------------------------------------------- /ml/kubeflow-pipelines/sbtb/components/kubeflow-resources/tf-serving/tf-serve-template.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: v1 3 | kind: Service 4 | metadata: 5 | annotations: 6 | getambassador.io/config: |- 7 | --- 8 | apiVersion: ambassador/v0 9 | kind: Mapping 10 | name: tfserving-predict-mapping-MODEL_NAME 11 | prefix: tfserving/models/MODEL_NAME/ 12 | rewrite: /v1/models/MODEL_NAME:predict 13 | method: POST 14 | service: MODEL_NAME.kubeflow:8500 15 | labels: 16 | app: MODEL_NAME 17 | name: SERVICE_NAME 18 | namespace: KUBEFLOW_NAMESPACE 19 | spec: 20 | ports: 21 | - name: grpc-tf-serving 22 | port: 9000 23 | targetPort: 9000 24 | - name: tf-serving-builtin-http 25 | port: 8500 26 | targetPort: 8500 27 | selector: 28 | app: SERVICE_NAME 29 | type: LoadBalancer 30 | --- 31 | apiVersion: extensions/v1beta1 32 | kind: Deployment 33 | metadata: 34 | labels: 35 | app: SERVICE_NAME 36 | name: SERVICE_NAME 37 | namespace: KUBEFLOW_NAMESPACE 38 | spec: 39 | replicas: 1 40 | template: 41 | metadata: 42 | labels: 43 | app: SERVICE_NAME 44 | version: v1 45 | spec: 46 | volumes: 47 | - name: gcp-credentials-user-gcp-sa 48 | secret: 49 | secretName: user-gcp-sa 50 | containers: 51 | - args: 52 | - --port=9000 53 | - --rest_api_port=8500 54 | - --model_name=MODEL_NAME 55 | - --model_base_path=MODEL_PATH 56 | command: 57 | - /usr/bin/tensorflow_model_server 58 | image: tensorflow/serving 59 | env: 60 | - name: GOOGLE_APPLICATION_CREDENTIALS 61 | value: /secret/gcp-credentials/user-gcp-sa.json 62 | - name: CLOUDSDK_AUTH_CREDENTIAL_FILE_OVERRIDE 63 | value: /secret/gcp-credentials/user-gcp-sa.json 64 | volumeMounts: 65 | - mountPath: /secret/gcp-credentials 66 | name: gcp-credentials-user-gcp-sa 67 | imagePullPolicy: IfNotPresent 68 | livenessProbe: 69 | initialDelaySeconds: 30 70 | periodSeconds: 30 71 | tcpSocket: 72 | port: 9000 73 | name: MODEL_NAME 74 | ports: 75 | - containerPort: 9000 76 | - containerPort: 8500 77 | resources: 78 | limits: 79 | cpu: "4" 80 | memory: 4Gi 81 | requests: 82 | cpu: "1" 83 | memory: 1Gi 84 | -------------------------------------------------------------------------------- /ml/kubeflow-pipelines/sbtb/components/serve_component.yaml: -------------------------------------------------------------------------------- 1 | # Copyright 2019 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | name: Serve TF model 16 | description: | 17 | A Kubeflow Pipeline component to deploy a tf-serving service 18 | metadata: 19 | labels: 20 | add-pod-env: 'true' 21 | inputs: 22 | - name: model_name 23 | type: String 24 | - name: model_path 25 | type: GCSPath 26 | implementation: 27 | container: 28 | image: gcr.io/google-samples/ml-pipeline-tfserve:v2 29 | args: [ 30 | --model_name, {inputValue: model_name}, 31 | --model_path, {inputValue: model_path}, 32 | ] 33 | env: 34 | KFP_POD_NAME: "{{pod.name}}" 35 | -------------------------------------------------------------------------------- /ml/kubeflow-pipelines/sbtb/components/train_component.yaml: -------------------------------------------------------------------------------- 1 | # Copyright 2019 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | name: Train bikes_weather model 16 | description: | 17 | A Kubeflow Pipeline component to train a Tensor2Tensor 18 | model 19 | metadata: 20 | labels: 21 | add-pod-env: 'true' 22 | inputs: 23 | - name: epochs 24 | type: Integer 25 | default: 1 26 | - name: steps_per_epoch 27 | type: Integer 28 | default: -1 29 | - name: data_dir 30 | type: GCSPath 31 | - name: workdir 32 | type: GCSPath 33 | - name: load_checkpoint 34 | type: GCSPath 35 | outputs: 36 | - name: train_output_path 37 | type: GCSPath 38 | implementation: 39 | container: 40 | image: gcr.io/google-samples/ml-pipeline-bikes-train:v2 41 | args: [ 42 | --data-dir, {inputValue: data_dir}, 43 | --epochs, {inputValue: epochs}, 44 | --steps-per-epoch, {inputValue: steps_per_epoch}, 45 | --workdir, {inputValue: workdir}, 46 | --load-checkpoint, {inputValue: load_checkpoint}, 47 | --train-output-path, {outputPath: train_output_path} 48 | ] 49 | env: 50 | KFP_POD_NAME: "{{pod.name}}" 51 | -------------------------------------------------------------------------------- /ml/kubeflow-pipelines/sbtb/example_pipelines/bw.py: -------------------------------------------------------------------------------- 1 | # Copyright 2019 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | 16 | import kfp.dsl as dsl 17 | import kfp.gcp as gcp 18 | import kfp.components as comp 19 | from kfp.dsl.types import GCSPath, String 20 | 21 | 22 | COPY_ACTION = 'copy_data' 23 | TRAIN_ACTION = 'train' 24 | WORKSPACE_NAME = 'ws_gh_summ' 25 | DATASET = 'dataset' 26 | MODEL = 'model' 27 | 28 | train_op = comp.load_component_from_url( 29 | 'https://raw.githubusercontent.com/amygdala/code-snippets/master/ml/kubeflow-pipelines/sbtb/components/train_component.yaml' # pylint: disable=line-too-long 30 | ) 31 | serve_op = comp.load_component_from_url( 32 | 'https://raw.githubusercontent.com/amygdala/code-snippets/master/ml/kubeflow-pipelines/sbtb/components/serve_component.yaml' # pylint: disable=line-too-long 33 | ) 34 | 35 | 36 | @dsl.pipeline( 37 | name='bikes_weather', 38 | description='Model bike rental duration given weather' 39 | ) 40 | def bikes_weather( #pylint: disable=unused-argument 41 | working_dir: GCSPath = 'gs://YOUR_GCS_DIR_HERE', 42 | data_dir: GCSPath = 'gs://aju-dev-demos-codelabs/bikes_weather/', 43 | epochs: 'Integer' = 1, 44 | steps_per_epoch: 'Integer' = -1 , # if -1, don't override normal calcs based on dataset size 45 | load_checkpoint: String = '' 46 | ): 47 | 48 | 49 | train = train_op( 50 | data_dir=data_dir, 51 | workdir='%s/%s' % (working_dir, dsl.RUN_ID_PLACEHOLDER), 52 | epochs=epochs, steps_per_epoch=steps_per_epoch, 53 | load_checkpoint=load_checkpoint 54 | ).apply(gcp.use_gcp_secret('user-gcp-sa')) 55 | 56 | 57 | serve = serve_op( 58 | model_path=train.outputs['train_output_path'], 59 | model_name='bikesw' 60 | ).apply(gcp.use_gcp_secret('user-gcp-sa')) 61 | 62 | train.set_gpu_limit(1) 63 | 64 | if __name__ == '__main__': 65 | import kfp.compiler as compiler 66 | compiler.Compiler().compile(bikes_weather, __file__ + '.tar.gz') 67 | -------------------------------------------------------------------------------- /ml/notebook_examples/functions/main.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import datetime 3 | import logging 4 | import time 5 | 6 | import kfp 7 | import kfp.compiler as compiler 8 | import kfp.dsl as dsl 9 | 10 | import requests 11 | 12 | # TODO: replace yours 13 | # HOST = 'https://.pipelines.googleusercontent.com' 14 | HOST = 'https://7c7f7f3e3d11e1d4-dot-us-central2.pipelines.googleusercontent.com' 15 | 16 | @dsl.pipeline( 17 | name='Sequential', 18 | description='A pipeline with two sequential steps.' 19 | ) 20 | def sequential_pipeline(filename='gs://ml-pipeline-playground/shakespeare1.txt'): 21 | """A pipeline with two sequential steps.""" 22 | op1 = dsl.ContainerOp( 23 | name='filechange', 24 | image='library/bash:4.4.23', 25 | command=['sh', '-c'], 26 | arguments=['echo "%s" > /tmp/results.txt' % filename], 27 | file_outputs={'newfile': '/tmp/results.txt'}) 28 | op2 = dsl.ContainerOp( 29 | name='echo', 30 | image='library/bash:4.4.23', 31 | command=['sh', '-c'], 32 | arguments=['echo "%s"' % op1.outputs['newfile']] 33 | ) 34 | 35 | def get_access_token(): 36 | url = 'http://metadata.google.internal/computeMetadata/v1/instance/service-accounts/default/token' 37 | r = requests.get(url, headers={'Metadata-Flavor': 'Google'}) 38 | r.raise_for_status() 39 | access_token = r.json()['access_token'] 40 | return access_token 41 | 42 | def hosted_kfp_test(data, context): 43 | logging.info('Event ID: {}'.format(context.event_id)) 44 | logging.info('Event type: {}'.format(context.event_type)) 45 | logging.info('Data: {}'.format(data)) 46 | logging.info('Bucket: {}'.format(data['bucket'])) 47 | logging.info('File: {}'.format(data['name'])) 48 | file_uri = 'gs://%s/%s' % (data['bucket'], data['name']) 49 | logging.info('Using file uri: %s', file_uri) 50 | 51 | logging.info('Metageneration: {}'.format(data['metageneration'])) 52 | logging.info('Created: {}'.format(data['timeCreated'])) 53 | logging.info('Updated: {}'.format(data['updated'])) 54 | 55 | token = get_access_token() 56 | logging.info('attempting to launch pipeline run.') 57 | ts = int(datetime.datetime.utcnow().timestamp() * 100000) 58 | client = kfp.Client(host=HOST, existing_token=token) 59 | compiler.Compiler().compile(sequential_pipeline, '/tmp/sequential.tar.gz') 60 | exp = client.create_experiment(name='gcstriggered') # this is a 'get or create' op 61 | res = client.run_pipeline(exp.id, 'sequential_' + str(ts), '/tmp/sequential.tar.gz', 62 | params={'filename': file_uri}) 63 | logging.info(res) 64 | -------------------------------------------------------------------------------- /ml/notebook_examples/functions/requirements.txt: -------------------------------------------------------------------------------- 1 | kfp 2 | -------------------------------------------------------------------------------- /ml/notebook_examples/keras_linear_regressor.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "Copyright 2017 Google Inc. All Rights Reserved.\n", 8 | "Licensed under the Apache License, Version 2.0 (the \"License\"); you may not use this file except in compliance with the License. You may obtain a copy of the License at\n", 9 | "\n", 10 | "http://www.apache.org/licenses/LICENSE-2.0\n", 11 | "\n", 12 | "Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License." 13 | ] 14 | }, 15 | { 16 | "cell_type": "code", 17 | "execution_count": null, 18 | "metadata": { 19 | "collapsed": true 20 | }, 21 | "outputs": [], 22 | "source": [ 23 | "import tensorflow as tf\n", 24 | "import keras\n", 25 | "from sklearn.preprocessing import StandardScaler" 26 | ] 27 | }, 28 | { 29 | "cell_type": "code", 30 | "execution_count": null, 31 | "metadata": { 32 | "collapsed": true 33 | }, 34 | "outputs": [], 35 | "source": [ 36 | "import numpy as np\n", 37 | "\n", 38 | "X_train = np.linspace(0, 80, 100).reshape(-1, 1)\n", 39 | "# print(X_train)\n", 40 | "Y_train = 5 * X_train\n", 41 | "# print(Y_train)\n", 42 | "\n", 43 | "X_test = np.linspace(0, 80, 20).reshape(-1, 1)\n", 44 | "#print(X_test)\n", 45 | "Y_test = 5 * X_test\n", 46 | "#print(Y_test)\n" 47 | ] 48 | }, 49 | { 50 | "cell_type": "code", 51 | "execution_count": null, 52 | "metadata": { 53 | "collapsed": true 54 | }, 55 | "outputs": [], 56 | "source": [ 57 | "sc = StandardScaler()\n", 58 | "x = sc.fit_transform(X_train)\n", 59 | "y = sc.fit_transform(Y_train)\n", 60 | "xt = sc.fit_transform(X_test)\n", 61 | "yt = sc.fit_transform(Y_test)" 62 | ] 63 | }, 64 | { 65 | "cell_type": "code", 66 | "execution_count": null, 67 | "metadata": {}, 68 | "outputs": [], 69 | "source": [ 70 | "from keras.layers import Input, Dense\n", 71 | "from keras.models import Model\n", 72 | "\n", 73 | "inputs = Input(shape=(1,))\n", 74 | "preds = Dense(1,activation='linear')(inputs)\n", 75 | "\n", 76 | "model = Model(inputs=inputs,outputs=preds)\n", 77 | "sgd=keras.optimizers.SGD()\n", 78 | "model.compile(optimizer=sgd ,loss='mse',metrics=['mse'])\n", 79 | "\n", 80 | "model.fit(x,y, batch_size=1, epochs=30, shuffle=False)\n" 81 | ] 82 | }, 83 | { 84 | "cell_type": "code", 85 | "execution_count": null, 86 | "metadata": {}, 87 | "outputs": [], 88 | "source": [ 89 | "score = model.evaluate(xt, yt, batch_size=16)\n", 90 | "print(\"\\nScore: %s\" % score)" 91 | ] 92 | }, 93 | { 94 | "cell_type": "code", 95 | "execution_count": null, 96 | "metadata": { 97 | "collapsed": true 98 | }, 99 | "outputs": [], 100 | "source": [] 101 | } 102 | ], 103 | "metadata": { 104 | 105 | }, 106 | "nbformat": 4, 107 | "nbformat_minor": 2 108 | } 109 | -------------------------------------------------------------------------------- /ml/vertex_pipelines/pytorch/cifar/Dockerfile: -------------------------------------------------------------------------------- 1 | # Copyright 2021 Google Inc. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | FROM pytorch/pytorch:latest 16 | 17 | COPY requirements.txt requirements.txt 18 | 19 | RUN pip3 install -r requirements.txt 20 | 21 | ADD pytorch-pipeline /workspace/pytorch-pipeline 22 | 23 | ENV PYTHONPATH /workspace/pytorch-pipeline 24 | 25 | WORKDIR /workspace/pytorch-pipeline 26 | 27 | ENTRYPOINT /bin/bash 28 | -------------------------------------------------------------------------------- /ml/vertex_pipelines/pytorch/cifar/Dockerfile-gpu: -------------------------------------------------------------------------------- 1 | # Copyright 2021 Google Inc. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | FROM pytorch/pytorch:1.8.1-cuda10.2-cudnn7-runtime 16 | 17 | COPY requirements.txt requirements.txt 18 | 19 | RUN pip3 install -U pip 20 | RUN pip3 install -r requirements.txt 21 | RUN pip3 install -U google-cloud-aiplatform[tensorboard] 22 | 23 | ADD pytorch-pipeline /workspace/pytorch-pipeline 24 | 25 | ENV PYTHONPATH /workspace/pytorch-pipeline 26 | 27 | WORKDIR /workspace/pytorch-pipeline 28 | 29 | ENTRYPOINT /bin/bash 30 | -------------------------------------------------------------------------------- /ml/vertex_pipelines/pytorch/cifar/Dockerfile-gpu-ct: -------------------------------------------------------------------------------- 1 | # Copyright 2021 Google Inc. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | FROM pytorch/pytorch:1.8.1-cuda10.2-cudnn7-runtime 16 | 17 | COPY requirements.txt requirements.txt 18 | RUN pip3 install -U pip 19 | RUN pip3 install -r requirements.txt 20 | RUN pip3 install gcsfs 21 | RUN pip3 install google-cloud-storage 22 | RUN pip3 install -U google-cloud-aiplatform[tensorboard] 23 | 24 | 25 | RUN apt-get update -y 26 | RUN apt-get install --no-install-recommends -y -q ca-certificates wget unzip 27 | 28 | 29 | RUN wget -nv https://dl.google.com/dl/cloudsdk/release/google-cloud-sdk.zip && \ 30 | unzip -qq google-cloud-sdk.zip -d tools && \ 31 | rm google-cloud-sdk.zip && \ 32 | tools/google-cloud-sdk/install.sh --usage-reporting=false \ 33 | --path-update=false --bash-completion=false \ 34 | --disable-installation-options && \ 35 | tools/google-cloud-sdk/bin/gcloud -q components update \ 36 | gcloud core gsutil && \ 37 | tools/google-cloud-sdk/bin/gcloud -q components install kubectl && \ 38 | tools/google-cloud-sdk/bin/gcloud config set component_manager/disable_update_check true 39 | 40 | 41 | ENV PATH $PATH:/tools/google-cloud-sdk/bin 42 | 43 | ADD pytorch-pipeline /workspace/pytorch-pipeline 44 | 45 | ENV PYTHONPATH /workspace/pytorch-pipeline 46 | 47 | WORKDIR /workspace/pytorch-pipeline 48 | 49 | #ENTRYPOINT /bin/bash 50 | ENTRYPOINT ["python", "/workspace/pytorch-pipeline/training_task.py"] 51 | 52 | 53 | -------------------------------------------------------------------------------- /ml/vertex_pipelines/pytorch/cifar/input.json: -------------------------------------------------------------------------------- 1 | { 2 | "instances": [ 3 | { 4 | "data": 5 | "iVBORw0KGgoAAAANSUhEUgAAACAAAAAgCAIAAAD8GO2jAAAAB3RJTUUH2AoYFB0lQuPgxAAAB/ZJREFUSImVVlmPplURruWcd+ttpnt6mZ4JdsMwC0OUxAXcgka58Xf4O/wnmsi1N8QQ3EJMBA1ookNUZJh9pqe3r7/9Pe97tiovPiBR4cK6q0rqqarnpE49+OOf/FwksMHClsZaa5kIAJTIWGuZGQFSTEFFM+Q+uaRdiDknEWXIIfg+SMqQBVUg5jRzLgQfg/cp9yLG2FIyISmSISJEYkYkRCBEVFUFIGNQNQMIE+REJABQlmbzwpL33cNHJ4pGowJjBiBiYkNGETJmIcPEbJgsISEiAgACExHRwlNVAFEARRJiQCRCRMw5A8jm5vmqssxEzAAAAERIhITEzMxskBIDEBljmBmJEUAVgBAXCUxIGgu2WYQIiYmVRCTnfHJyOhgAALFhEUkxSc6AiIilIYk5oVJVs7HABlQFEYmYiEABERdDWKbVxpJ6wkwozMS8IBNVbOdUhRa9ICIiMrNlXl9tGks1g9n/0vrpYOpcIlOJKjIQMSEQLQbAqjAltdK3ZXU+MomKKBITKaOQNRxSJlJmQEOsZASt4YqgKcuMbK7vra4tmydH45AxCUkWUGUmIhBkjdlSpjSvDAoxMCcJiYQJgVlQVNUYQ4pIWSHX1rDQfDQ3wlVZz5M3qzWWl9bWVuuDo9F0FoCKJJSTEBKyBYAkXvsuBi4ay4qsZJTAEoAAKBtSAQaqyWyfr1fr8vxK9frPft+snt945gY6MAVl1bSzbrfPX37w4GTURuG673OM6lXJUFnX81HyEY0qEBVFSczeZwRRyIAIxKrAhNYYYo25G89O1y5eqBuyUzVLdanOqfaVtS9cuXg4mI3mCZar0cSNughEOefJ1K1s7gmRiIgqIhZFwaRIklJSwRhzH2Xug+Rec3fzle89/9z+vI32tDW9DylKWVYoqSnM/qX1TReHoxkKdCG2vdqlZm//SquVA4OaQHSxHIhYFLYoTIxCRBpyCJLEZK2rld2Ts64pytKw6UMKQTULlIrgqgovLNslU6+s2LYL47NxJIOIrevs2nIGAM0xZkJUQkVEQFVAJMNcWfAee0/ex3nXr2yUdVkZEMxJCHJOkJTUiK0z1WCq5vLOhpul5MO4cz4BFHMiY4mwYATN3mfDEZCQRCWFLvSTFHJjq42V5cOn0+ngxJI1IIAgAImo8H3u3PTC5jKxqOjhwcFkNN7evFg0ZZNy2/ddG9EUzWpz+fK21XT38fHMS++70cnw+PBgcHbYzmYXN7dv3rh+94P3bn/08OpL3zQiwsyICIrD0eTDf97Z3t7cvbSTxLz7h/cOjoY3rl27eGm7rJraSm2Ladu58TyuF+dWmjA8Onp8PByM3NTNu861MzedfXwy+OjWB/OzietS1zkDACklY0wWIq5SxsdPzgaDdmd3d2NjS6kaj4cpzJLIdOae2XtOtRhOxqcHT6Sb9167oKHtU4gpqo8UEgefummLfdrZ2d2/8pwJMagKEc3m/Z//covZeJ/LAre2Ni+N3Zf2rxCo72YPHj46OX7gulTZ8z6lGB0lr2gzFwm4S+CShkw+Ux91aXWdiu75F29u7W6TZFEFY8zt2w/+dusftjAh+aUVu31xbWW1RiIiOxhMh2PXLG/0XkeTcdvNfewAxRpEiSAxS4h+nrtRdkPpxqzh4rN7ezevU2lNVZUpJUScTV1RVM1SNRpPs+Sub+ft7J13/mqp8j75GPso1tqyymhy3dQ76+cub13647vvPzm8P5q1s9ksdC52wYLO/Wj/tR9u7O60zpuQOwLWLGvLSzeuP39ha+vw2B0cpV//5oOP7zxy0wTaLjVLq01p+zanfr0xIiG2848P7r9/9vbdu/fmcyeqokjcFNb64C7t7V598RpXVkQMQOn79ODO/ZmbXHvhquty26XRcNp3XUrBGj23tqKpf/T44Wg0dO2MIU5GZ851WbKqLFaaABAQJIKxVJWvfP/Vje2N+byvSjauk9Tnwdnkws65BHrv/vF4Mu19Gzo/ODsanB02je1no8OnR94nBDZMIspcVFUhEvvgJAsAEChI70J+/stf+fp3vqsFVDUxkZnOpr7rysasrC23nR48Ob59+1+D4yfT4aR1o5TblDxnZCoKu9Q0K0mi9z7nHGMWiSr6ySlGUElVU/7gR69d2N0ejWdVAYxoQpLxZOY7V1fnfvfbt3/15lvDwZFmDyLEwkZqWwBVkiEESXGW1AMqAqYcATKA4CfXG5PItevPfvvVlyPlqkDIgiLmwb1x3zoCfeuXf3rjF2+enT5c6AuwAIAgmDMoCCAoJgFBVABFRABUXfysgAgCCMa+/K1vrK8vHY9aa1Uyioh5+GgwHg5Oj4+nQzcbtQUjoaiAqgAgAgEiYFYQAEFUVAIFgEWZT9ARMaW0vr310le/lsUTRAJEECYwhydPh2fj4XDST52IEJBgVgJQBABAAFDUTx3FRegz+1TcgEi4evXq3v61KF3JJAqkwKqEhOPJJOVcVLi0YgEEAPU/UT5Dg8+PLyQLXb3yQl2ek0CYrWZCZQQ2IYSUovdhOnwqbsiYPwXSL8D6XFM09Yf3Tn/6+hsuj0kKUsqSVdT43q+vb8SYUz9uuzMQ0IXswv9udvGk/xtfECVY3rpz9Pen3uc5JVskSTHmLMYWtm07a4uyWPLcSE4A6f9sH1QUGXKIgt77vgAMSVJKKSXjWuecMyaDGoQSlAEzgH4x3Z9XQJUlQHvq53OVXmk5GSuaAeXfrDxOutzfcVMAAAAASUVORK5CYII=" 6 | } 7 | ] 8 | } -------------------------------------------------------------------------------- /ml/vertex_pipelines/pytorch/cifar/pytorch-pipeline/.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | -------------------------------------------------------------------------------- /ml/vertex_pipelines/pytorch/cifar/pytorch-pipeline/README.md: -------------------------------------------------------------------------------- 1 | # pytorch-pipeline -------------------------------------------------------------------------------- /ml/vertex_pipelines/pytorch/cifar/pytorch-pipeline/process_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2021 Google Inc. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import subprocess 16 | import logging 17 | from pathlib import Path 18 | 19 | import torchvision 20 | import webdataset as wds 21 | from sklearn.model_selection import train_test_split 22 | 23 | logging.getLogger().setLevel(logging.INFO) 24 | # logging.info("Dataset path is: %s", cifar_dataset.path) 25 | output_pth = "output/processing" 26 | 27 | Path(output_pth).mkdir(parents=True, exist_ok=True) 28 | 29 | trainset = torchvision.datasets.CIFAR10( 30 | root="./", train=True, download=True 31 | ) 32 | testset = torchvision.datasets.CIFAR10( 33 | root="./", train=False, download=True 34 | ) 35 | 36 | Path(output_pth + "/train").mkdir(parents=True, exist_ok=True) 37 | Path(output_pth + "/val").mkdir(parents=True, exist_ok=True) 38 | Path(output_pth + "/test").mkdir(parents=True, exist_ok=True) 39 | 40 | random_seed = 25 41 | y = trainset.targets 42 | trainset, valset, y_train, y_val = train_test_split( 43 | trainset, 44 | y, 45 | stratify=y, 46 | shuffle=True, 47 | test_size=0.2, 48 | random_state=random_seed, 49 | ) 50 | 51 | for name in [(trainset, "train"), (valset, "val"), (testset, "test")]: 52 | with wds.ShardWriter( 53 | output_pth + "/" + str(name[1]) + "/" + str(name[1]) + "-%d.tar", 54 | maxcount=1000, 55 | ) as sink: 56 | for index, (image, cls) in enumerate(name[0]): 57 | sink.write( 58 | {"__key__": "%06d" % index, "ppm": image, "cls": cls} 59 | ) 60 | 61 | entry_point = ["ls", "-R", output_pth] 62 | run_code = subprocess.run(entry_point, stdout=subprocess.PIPE) 63 | print(run_code.stdout) 64 | -------------------------------------------------------------------------------- /ml/vertex_pipelines/pytorch/cifar/pytorch-pipeline/pytorch_pipeline/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM pytorch/pytorch:latest 2 | 3 | COPY requirements.txt requirements.txt 4 | 5 | RUN apt-get update 6 | 7 | RUN apt-get install -y git 8 | 9 | RUN git clone -b trainer-code-revamp https://github.com/jagadeeshi2i/pytorch-pipeline 10 | 11 | # RUN git clone -b jagadeeshi2i-patch-7 https://github.com/jagadeeshi2i/pytorch-pipeline 12 | 13 | RUN pip3 install -r requirements.txt 14 | 15 | ENV PYTHONPATH /workspace/pytorch-pipeline 16 | 17 | WORKDIR /workspace/pytorch-pipeline 18 | 19 | ENTRYPOINT /bin/bash -------------------------------------------------------------------------------- /ml/vertex_pipelines/pytorch/cifar/pytorch-pipeline/pytorch_pipeline/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amygdala/code-snippets/9fa00e5a4ec4ae3eb5a2fa212d1ecefb4009eba7/ml/vertex_pipelines/pytorch/cifar/pytorch-pipeline/pytorch_pipeline/__init__.py -------------------------------------------------------------------------------- /ml/vertex_pipelines/pytorch/cifar/pytorch-pipeline/pytorch_pipeline/components/base/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amygdala/code-snippets/9fa00e5a4ec4ae3eb5a2fa212d1ecefb4009eba7/ml/vertex_pipelines/pytorch/cifar/pytorch-pipeline/pytorch_pipeline/components/base/__init__.py -------------------------------------------------------------------------------- /ml/vertex_pipelines/pytorch/cifar/pytorch-pipeline/pytorch_pipeline/components/base/base_component.py: -------------------------------------------------------------------------------- 1 | import abc 2 | from six import with_metaclass 3 | 4 | class BaseComponent(with_metaclass(abc.ABCMeta, object)): 5 | def __init__(self): 6 | pass 7 | 8 | @classmethod 9 | def _validate_component_class(cls): 10 | # TODO: Spec validation to be done here 11 | pass -------------------------------------------------------------------------------- /ml/vertex_pipelines/pytorch/cifar/pytorch-pipeline/pytorch_pipeline/components/base/base_executor.py: -------------------------------------------------------------------------------- 1 | import abc 2 | from six import with_metaclass 3 | 4 | 5 | class BaseExecutor(with_metaclass(abc.ABCMeta, object)): 6 | 7 | def __init__(self): 8 | pass 9 | 10 | @abc.abstractmethod 11 | def Do(self, model_class, data_module_class=None, data_module_args=None, module_file_args=None): 12 | pass -------------------------------------------------------------------------------- /ml/vertex_pipelines/pytorch/cifar/pytorch-pipeline/pytorch_pipeline/components/trainer/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amygdala/code-snippets/9fa00e5a4ec4ae3eb5a2fa212d1ecefb4009eba7/ml/vertex_pipelines/pytorch/cifar/pytorch-pipeline/pytorch_pipeline/components/trainer/__init__.py -------------------------------------------------------------------------------- /ml/vertex_pipelines/pytorch/cifar/pytorch-pipeline/pytorch_pipeline/components/trainer/component.py: -------------------------------------------------------------------------------- 1 | # Copyright 2021 Google Inc. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import inspect 16 | import importlib 17 | from typing import Optional, Dict 18 | from pytorch_pipeline.components.base.base_component import BaseComponent 19 | from pytorch_pipeline.components.trainer.generic_executor import GenericExecutor 20 | from pytorch_pipeline.components.trainer.executor import Executor 21 | 22 | class Trainer(BaseComponent): 23 | def __init__(self, 24 | module_file: Optional = None, 25 | data_module_file: Optional = None, 26 | trainer_fn: Optional = None, 27 | run_fn: Optional = None, 28 | data_module_args: Optional[Dict] = None, 29 | module_file_args: Optional[Dict] = None, 30 | trainer_args: Optional[Dict] = None 31 | ): 32 | super(BaseComponent, self).__init__() 33 | if [bool(module_file), bool(run_fn), bool(trainer_fn)].count(True) != 1: 34 | raise ValueError( 35 | "Exactly one of 'module_file', 'trainer_fn', or 'run_fn' must be " 36 | "supplied.") 37 | 38 | if module_file and data_module_file: 39 | # Both module file and data module file are present 40 | 41 | model_class = None 42 | data_module_class = None 43 | 44 | class_module = importlib.import_module(module_file.split(".")[0]) 45 | data_module = importlib.import_module(data_module_file.split(".")[0]) 46 | 47 | for cls in inspect.getmembers(class_module, lambda member: inspect.isclass( 48 | member) and member.__module__ == class_module.__name__): 49 | model_class = cls[1] 50 | 51 | for cls in inspect.getmembers(data_module, lambda member: inspect.isclass( 52 | member) and member.__module__ == data_module.__name__): 53 | data_module_class = cls[1] 54 | 55 | print(model_class, data_module_class) 56 | 57 | Executor().Do( 58 | model_class=model_class, 59 | data_module_class=data_module_class, 60 | data_module_args=data_module_args, 61 | module_file_args=module_file_args, 62 | trainer_args=trainer_args 63 | ) 64 | # 65 | # elif run_fn: 66 | # GenericExecutor().Do() 67 | # elif trainer_fn: 68 | # Executor().Do() 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | -------------------------------------------------------------------------------- /ml/vertex_pipelines/pytorch/cifar/pytorch-pipeline/pytorch_pipeline/components/trainer/executor.py: -------------------------------------------------------------------------------- 1 | # Copyright 2021 Google Inc. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import pytorch_lightning as pl 16 | import torch 17 | import os 18 | from pytorch_pipeline.components.trainer.generic_executor import GenericExecutor 19 | 20 | 21 | class Executor(GenericExecutor): 22 | def __init__(self): 23 | super(GenericExecutor, self).__init__() 24 | 25 | def Do( 26 | self, 27 | model_class, 28 | data_module_class=None, 29 | data_module_args=None, 30 | module_file_args=None, 31 | trainer_args=None, 32 | ): 33 | 34 | if data_module_class: 35 | dm = data_module_class(**data_module_args if data_module_args else {}) 36 | dm.prepare_data() 37 | dm.setup(stage="fit") 38 | 39 | parser = module_file_args 40 | args = vars(parser.parse_args()) 41 | model = model_class(**args if args else {}) 42 | 43 | trainer = pl.Trainer.from_argparse_args(parser, **trainer_args) 44 | 45 | trainer.fit(model, dm) 46 | trainer.test() 47 | 48 | if "checkpoint_dir" in args: 49 | model_save_path = args["checkpoint_dir"] 50 | else: 51 | model_save_path = "/tmp" 52 | 53 | if "model_name" in args: 54 | model_name = args["model_name"] 55 | else: 56 | model_name = "model_state_dict.pth" 57 | 58 | model_save_path = os.path.join(model_save_path, model_name) 59 | if trainer.global_rank == 0: 60 | print("Saving model to {}".format(model_save_path)) 61 | torch.save(model.state_dict(), model_save_path) 62 | -------------------------------------------------------------------------------- /ml/vertex_pipelines/pytorch/cifar/pytorch-pipeline/pytorch_pipeline/components/trainer/generic_executor.py: -------------------------------------------------------------------------------- 1 | # Copyright 2021 Google Inc. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from pytorch_pipeline.components.base.base_executor import BaseExecutor 16 | 17 | class GenericExecutor(BaseExecutor): 18 | 19 | def Do(self, model_class, data_module_class=None, data_module_args=None, module_file_args=None): 20 | # TODO: Code to train pretrained model 21 | pass 22 | 23 | def _GetFnArgs(self): 24 | pass 25 | 26 | -------------------------------------------------------------------------------- /ml/vertex_pipelines/pytorch/cifar/pytorch-pipeline/pytorch_pipeline/examples/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amygdala/code-snippets/9fa00e5a4ec4ae3eb5a2fa212d1ecefb4009eba7/ml/vertex_pipelines/pytorch/cifar/pytorch-pipeline/pytorch_pipeline/examples/__init__.py -------------------------------------------------------------------------------- /ml/vertex_pipelines/pytorch/cifar/pytorch-pipeline/pytorch_pipeline/examples/cifar10/cifar10_pre_process.py: -------------------------------------------------------------------------------- 1 | import subprocess 2 | from pathlib import Path 3 | 4 | import torchvision 5 | import webdataset as wds 6 | from sklearn.model_selection import train_test_split 7 | from argparse import ArgumentParser 8 | 9 | if __name__ == "__main__": 10 | parser = ArgumentParser() 11 | parser.add_argument( 12 | "--output_data", 13 | type=str 14 | ) 15 | 16 | args = vars(parser.parse_args()) 17 | output_path = args["output_data"] 18 | 19 | Path(output_path).mkdir(parents=True, exist_ok=True) 20 | 21 | trainset = torchvision.datasets.CIFAR10(root="./", train=True, download=True) 22 | testset = torchvision.datasets.CIFAR10(root="./", train=False, download=True) 23 | 24 | Path(output_path + "/train").mkdir(parents=True, exist_ok=True) 25 | Path(output_path + "/val").mkdir(parents=True, exist_ok=True) 26 | Path(output_path + "/test").mkdir(parents=True, exist_ok=True) 27 | 28 | random_seed = 25 29 | y = trainset.targets 30 | trainset, valset, y_train, y_val = train_test_split( 31 | trainset, y, stratify=y, shuffle=True, test_size=0.2, random_state=random_seed 32 | ) 33 | 34 | for name in [(trainset, "train"), (valset, "val"), (testset, "test")]: 35 | with wds.ShardWriter( 36 | output_path + "/" + str(name[1]) + "/" + str(name[1]) + "-%d.tar", maxcount=1000 37 | ) as sink: 38 | for index, (image, cls) in enumerate(name[0]): 39 | sink.write({"__key__": "%06d" % index, "ppm": image, "cls": cls}) 40 | 41 | entry_point = ["ls", "-R", output_path] 42 | run_code = subprocess.run(entry_point, stdout=subprocess.PIPE) 43 | print(run_code.stdout) 44 | -------------------------------------------------------------------------------- /ml/vertex_pipelines/pytorch/cifar/pytorch-pipeline/pytorch_pipeline/examples/cifar10/cifar10_pytorch.py: -------------------------------------------------------------------------------- 1 | import pytorch_lightning as pl 2 | import os 3 | from pytorch_pipeline.components.trainer.component import Trainer 4 | from pytorch_pipeline.components.mar.mar_generation import MarGeneration 5 | from argparse import ArgumentParser 6 | from pytorch_lightning.loggers import TensorBoardLogger 7 | from pytorch_lightning.callbacks import ( 8 | EarlyStopping, 9 | LearningRateMonitor, 10 | ModelCheckpoint, 11 | ) 12 | 13 | 14 | # Argument parser for user defined paths 15 | parser = ArgumentParser() 16 | 17 | parser.add_argument( 18 | "--tensorboard_root", 19 | type=str, 20 | default="output/tensorboard", 21 | help="Tensorboard Root path (default: output/tensorboard)", 22 | ) 23 | 24 | parser.add_argument( 25 | "--checkpoint_dir", 26 | type=str, 27 | default="output/train/models", 28 | help="Path to save model checkpoints (default: output/train/models)", 29 | ) 30 | 31 | parser.add_argument( 32 | "--dataset_path", 33 | type=str, 34 | default="output/processing", 35 | help="Cifar10 Dataset path (default: output/processing)", 36 | ) 37 | 38 | parser.add_argument( 39 | "--model_name", 40 | type=str, 41 | default="resnet.pth", 42 | help="Name of the model to be saved as (default: resnet.pth)", 43 | ) 44 | 45 | parser.add_argument( 46 | "--minio_path", 47 | type=str, 48 | default="tensorboard/version_0", 49 | help="Path to upload files to minio (default: tensorboard/version_0)", 50 | ) 51 | 52 | parser = pl.Trainer.add_argparse_args(parent_parser=parser) 53 | 54 | args = vars(parser.parse_args()) 55 | 56 | 57 | # Enabling Tensorboard Logger, ModelCheckpoint, Earlystopping 58 | 59 | lr_logger = LearningRateMonitor() 60 | tboard = TensorBoardLogger(args["tensorboard_root"]) 61 | early_stopping = EarlyStopping(monitor="val_loss", mode="min", patience=5, verbose=True) 62 | checkpoint_callback = ModelCheckpoint( 63 | dirpath=args["checkpoint_dir"], 64 | filename="cifar10_{epoch:02d}", 65 | save_top_k=1, 66 | verbose=True, 67 | monitor="val_loss", 68 | mode="min", 69 | ) 70 | 71 | if not args["max_epochs"]: 72 | max_epochs = 1 73 | else: 74 | max_epochs = args["max_epochs"] 75 | 76 | 77 | # Setting the trainer specific arguments 78 | trainer_args = { 79 | "logger": tboard, 80 | "checkpoint_callback": True, 81 | "max_epochs": max_epochs, 82 | "callbacks": [lr_logger, early_stopping, checkpoint_callback], 83 | } 84 | 85 | 86 | # Setting the datamodule specific arguments 87 | data_module_args = {"train_glob": args["dataset_path"]} 88 | 89 | 90 | # Initiating the training process 91 | trainer = Trainer( 92 | module_file="cifar10_train.py", 93 | data_module_file="cifar10_datamodule.py", 94 | module_file_args=parser, 95 | data_module_args=data_module_args, 96 | trainer_args=trainer_args, 97 | ) 98 | 99 | 100 | # Mar file generation 101 | 102 | mar_config = { 103 | "MODEL_NAME": "cifar10_test", 104 | "MODEL_FILE": "pytorch_pipeline/examples/cifar10/cifar10_train.py", 105 | "HANDLER": "image_classifier", 106 | "SERIALIZED_FILE": os.path.join(args["checkpoint_dir"], args["model_name"]), 107 | "VERSION": "1", 108 | "EXPORT_PATH": args["checkpoint_dir"], 109 | "CONFIG_PROPERTIES": "https://kubeflow-dataset.s3.us-east-2.amazonaws.com/config.properties" 110 | } 111 | 112 | 113 | MarGeneration(mar_config=mar_config).generate_mar_file() 114 | -------------------------------------------------------------------------------- /ml/vertex_pipelines/pytorch/cifar/requirements.txt: -------------------------------------------------------------------------------- 1 | boto3 2 | pytorch_lightning 3 | image 4 | matplotlib 5 | torch 6 | pyarrow 7 | sklearn 8 | transformers 9 | torchtext 10 | webdataset 11 | torchvision 12 | pandas 13 | numpy 14 | s3fs 15 | wget 16 | torch-model-archiver 17 | 18 | -------------------------------------------------------------------------------- /ml/vertex_pipelines/pytorch/cifar/screenshots/pt-profiler.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amygdala/code-snippets/9fa00e5a4ec4ae3eb5a2fa212d1ecefb4009eba7/ml/vertex_pipelines/pytorch/cifar/screenshots/pt-profiler.png -------------------------------------------------------------------------------- /ml/vertex_pipelines/pytorch/cifar/screenshots/vertex-tensorboard.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amygdala/code-snippets/9fa00e5a4ec4ae3eb5a2fa212d1ecefb4009eba7/ml/vertex_pipelines/pytorch/cifar/screenshots/vertex-tensorboard.png --------------------------------------------------------------------------------