├── LICENSE
├── README.md
├── cloud_run
    └── twilio_vision
    │   ├── Dockerfile
    │   ├── README.md
    │   └── src
    │       ├── requirements.txt
    │       └── whats_that.py
├── datalab
    └── facets
    │   ├── README.md
    │   └── facets_snippets.ipynb
└── ml
    ├── README.md
    ├── automl
        └── tables
        │   ├── kfp_e2e
        │       ├── README.md
        │       ├── create_dataset_for_tables
        │       │   ├── tables_component.py
        │       │   └── tables_component.yaml
        │       ├── create_model_for_tables
        │       │   ├── tables_component.py
        │       │   ├── tables_component.yaml
        │       │   ├── tables_eval_component.py
        │       │   ├── tables_eval_component.yaml
        │       │   ├── tables_eval_metrics_component.py
        │       │   └── tables_eval_metrics_component.yaml
        │       ├── deploy_model_for_tables
        │       │   ├── convert_oss.py
        │       │   ├── exported_model_deploy.py
        │       │   ├── instances.json
        │       │   ├── model_serve_template.yaml
        │       │   ├── tables_deploy_component.py
        │       │   └── tables_deploy_component.yaml
        │       ├── import_data_from_bigquery
        │       │   ├── tables_component.py
        │       │   ├── tables_component.yaml
        │       │   ├── tables_schema_component.py
        │       │   └── tables_schema_component.yaml
        │       ├── tables_containers
        │       │   └── model-service-launcher
        │       │   │   ├── Dockerfile
        │       │   │   └── build.sh
        │       ├── tables_pipeline_caip.py
        │       ├── tables_pipeline_caip.py.tar.gz
        │       ├── tables_pipeline_kf.py
        │       └── tables_pipeline_kf.py.tar.gz
        │   ├── model_export
        │       ├── Dockerfile.template
        │       ├── automl_tables_model_export_cloud_run.md
        │       ├── convert_oss.py
        │       └── instances.json
        │   └── xai
        │       ├── README.md
        │       ├── automl_tables_xai.ipynb
        │       └── bigquery_examples.md
    ├── census_train_and_eval
        ├── README.md
        ├── config_custom_gpus.yaml
        ├── hptuning_config.yaml
        ├── test.json
        ├── trainer
        │   ├── __init__.py
        │   ├── model.py
        │   └── task.py
        └── using_tf.estimator.train_and_evaluate.ipynb
    ├── kubeflow-pipelines
        ├── README.md
        ├── README_github_summ.md
        ├── README_taxidata_examples.md
        ├── components
        │   ├── README.md
        │   ├── automl
        │   │   ├── container
        │   │   │   ├── Dockerfile
        │   │   │   └── build.sh
        │   │   └── dataset_train
        │   │   │   └── dataset_model.py
        │   ├── cmle
        │   │   ├── containers
        │   │   │   ├── base
        │   │   │   │   ├── Dockerfile
        │   │   │   │   └── build.sh
        │   │   │   └── cmle_deploy
        │   │   │   │   ├── Dockerfile
        │   │   │   │   └── build.sh
        │   │   └── deploy
        │   │   │   └── deploy_model.py
        │   └── older
        │   │   ├── dataflow
        │   │       ├── containers
        │   │       │   ├── base
        │   │       │   │   ├── Dockerfile
        │   │       │   │   └── build.sh
        │   │       │   ├── tfma
        │   │       │   │   ├── Dockerfile
        │   │       │   │   └── build.sh
        │   │       │   └── tft
        │   │       │   │   ├── Dockerfile
        │   │       │   │   └── build.sh
        │   │       ├── taxi_schema
        │   │       │   └── taxi_schema
        │   │       │   │   ├── __init__.py
        │   │       │   │   └── taxi_schema.py
        │   │       ├── tfma
        │   │       │   ├── analysis
        │   │       │   │   └── setup.py
        │   │       │   ├── model_analysis-taxi.py
        │   │       │   └── tfma_expers.ipynb
        │   │       └── tft
        │   │       │   ├── mcsv_coder.py
        │   │       │   ├── preprocessing.py
        │   │       │   ├── preprocessing2.py
        │   │       │   ├── schema.pbtxt
        │   │       │   ├── taxi_preprocess_bq.py
        │   │       │   └── transform
        │   │       │       └── setup.py
        │   │   ├── kubeflow
        │   │       ├── containers
        │   │       │   ├── launcher
        │   │       │   │   ├── Dockerfile
        │   │       │   │   └── build.sh
        │   │       │   ├── tf-serving-gh
        │   │       │   │   ├── Dockerfile
        │   │       │   │   └── build.sh
        │   │       │   ├── tf-serving
        │   │       │   │   ├── Dockerfile
        │   │       │   │   └── build.sh
        │   │       │   └── trainer
        │   │       │   │   ├── Dockerfile
        │   │       │   │   └── build.sh
        │   │       ├── launcher
        │   │       │   ├── train.py
        │   │       │   └── train.template.yaml
        │   │       ├── taxi_model
        │   │       │   ├── __init__.py
        │   │       │   ├── data
        │   │       │   │   ├── eval
        │   │       │   │   │   └── data.csv
        │   │       │   │   └── train
        │   │       │   │   │   └── data.csv
        │   │       │   ├── schema.pbtxt
        │   │       │   ├── setup.py
        │   │       │   └── trainer
        │   │       │   │   ├── __init__.py
        │   │       │   │   ├── model.py
        │   │       │   │   ├── task.py
        │   │       │   │   └── taxi.py
        │   │       ├── tf-serving-gh
        │   │       │   ├── deploy-tf-serve.py
        │   │       │   └── tf-serve-template.yaml
        │   │       └── tf-serving
        │   │       │   ├── chicago_taxi_client.py
        │   │       │   ├── deploy-tf-serve.py
        │   │       │   ├── schema.pbtxt
        │   │       │   └── tf-serve-template.yaml
        │   │   └── t2t
        │   │       ├── containers
        │   │           ├── base
        │   │           │   ├── Dockerfile
        │   │           │   └── build.sh
        │   │           ├── t2t_app
        │   │           │   ├── Dockerfile
        │   │           │   └── build.sh
        │   │           ├── t2t_proc
        │   │           │   ├── Dockerfile
        │   │           │   └── build.sh
        │   │           ├── t2t_train
        │   │           │   ├── Dockerfile
        │   │           │   └── build.sh
        │   │           └── webapp-launcher
        │   │           │   ├── Dockerfile
        │   │           │   └── build.sh
        │   │       ├── t2t-app
        │   │           └── app
        │   │           │   ├── ghsumm
        │   │           │       ├── __init__.py
        │   │           │       ├── setup.py
        │   │           │       └── trainer
        │   │           │       │   ├── __init__.py
        │   │           │       │   └── problem.py
        │   │           │   ├── github_issues_sample.csv
        │   │           │   ├── main.py
        │   │           │   └── templates
        │   │           │       └── index.html
        │   │       ├── t2t-proc
        │   │           └── ghsumm
        │   │           │   ├── __init__.py
        │   │           │   ├── setup.py
        │   │           │   └── trainer
        │   │           │       ├── __init__.py
        │   │           │       └── problem.py
        │   │       ├── t2t-train
        │   │           ├── ghsumm
        │   │           │   ├── __init__.py
        │   │           │   ├── setup.py
        │   │           │   └── trainer
        │   │           │   │   ├── __init__.py
        │   │           │   │   └── problem.py
        │   │           └── train_model.py
        │   │       └── webapp-launcher
        │   │           ├── deploy-webapp.py
        │   │           └── t2tapp-template.yaml
        ├── keras_tuner
        │   ├── README.md
        │   ├── components
        │   │   ├── eval_metrics_component.yaml
        │   │   ├── kubeflow-resources
        │   │   │   ├── bikesw_training
        │   │   │   │   ├── bikes_weather_limited.py
        │   │   │   │   ├── bw_hptune_standalone.py
        │   │   │   │   ├── bwmodel
        │   │   │   │   │   ├── __init__.py
        │   │   │   │   │   └── model.py
        │   │   │   │   ├── deploy_tuner.py
        │   │   │   │   ├── eval_metrics.py
        │   │   │   │   ├── kchief_deployment_templ.yaml
        │   │   │   │   └── ktuners_deployment_templ.yaml
        │   │   │   ├── cloudbuild.yaml
        │   │   │   ├── containers
        │   │   │   │   ├── bikesw_training
        │   │   │   │   │   ├── Dockerfile
        │   │   │   │   │   ├── build.sh
        │   │   │   │   │   ├── cloudbuild.yaml
        │   │   │   │   │   └── copydir.sh
        │   │   │   │   ├── bikesw_training_hptune
        │   │   │   │   │   ├── Dockerfile
        │   │   │   │   │   ├── build.sh
        │   │   │   │   │   ├── cloudbuild.yaml
        │   │   │   │   │   └── copydir.sh
        │   │   │   │   ├── deploy_jobs
        │   │   │   │   │   ├── Dockerfile
        │   │   │   │   │   ├── build.sh
        │   │   │   │   │   ├── cloudbuild.yaml
        │   │   │   │   │   └── copydir.sh
        │   │   │   │   └── tf-serving
        │   │   │   │   │   ├── Dockerfile
        │   │   │   │   │   ├── build.sh
        │   │   │   │   │   ├── cloudbuild.yaml
        │   │   │   │   │   └── copydir.sh
        │   │   │   └── tf-serving
        │   │   │   │   ├── deploy-tfserve.py
        │   │   │   │   └── tf-serve-template.yaml
        │   │   ├── serve_component.yaml
        │   │   ├── tfdv
        │   │   │   ├── Dockerfile
        │   │   │   ├── requirements.txt
        │   │   │   ├── tfdv.py
        │   │   │   └── tfdv_compare.py
        │   │   ├── tfdv_component.yaml
        │   │   ├── tfdv_drift_component.yaml
        │   │   └── train_component.yaml
        │   ├── example_pipelines
        │   │   ├── bw_ktune.py
        │   │   ├── bw_ktune.py.tar.gz
        │   │   ├── bw_ktune_metrics.py
        │   │   ├── bw_tfdv.py
        │   │   ├── bw_train.py
        │   │   └── bw_train_metrics.py
        │   └── notebooks
        │   │   └── metrics_eval_component.ipynb
        ├── samples
        │   ├── automl
        │   │   ├── README.md
        │   │   ├── dataset_and_train.py
        │   │   └── dataset_and_train.py.tar.gz
        │   └── kubeflow-tf
        │   │   ├── README.md
        │   │   └── older
        │   │       ├── README.md
        │   │       ├── gh_summ.py
        │   │       ├── gh_summ.py.tar.gz
        │   │       ├── gh_summ_serve.py
        │   │       ├── gh_summ_serve.py.tar.gz
        │   │       ├── pipelines-kubecon.ipynb
        │   │       ├── workflow1.py
        │   │       └── workflow2.py
        └── sbtb
        │   ├── README.md
        │   ├── components
        │       ├── kubeflow-resources
        │       │   ├── bikesw_training
        │       │   │   └── bikes_weather.py
        │       │   ├── containers
        │       │   │   ├── bikesw_training
        │       │   │   │   ├── Dockerfile
        │       │   │   │   └── build.sh
        │       │   │   └── tf-serving
        │       │   │   │   ├── Dockerfile
        │       │   │   │   └── build.sh
        │       │   └── tf-serving
        │       │   │   ├── deploy-tfserve.py
        │       │   │   └── tf-serve-template.yaml
        │       ├── serve_component.yaml
        │       └── train_component.yaml
        │   └── example_pipelines
        │       └── bw.py
    ├── notebook_examples
        ├── TF_linear_regressor.ipynb
        ├── caipp
        │   ├── caipp_connect.ipynb
        │   └── kfp_in_a_notebook.ipynb
        ├── functions
        │   ├── hosted_kfp_gcf.ipynb
        │   ├── main.py
        │   └── requirements.txt
        ├── hosted_kfp
        │   └── event_triggered_kfp_pipeline_bw.ipynb
        ├── keras_linear_regressor.ipynb
        └── mnist_estimator.ipynb
    └── vertex_pipelines
        └── pytorch
            └── cifar
                ├── Dockerfile
                ├── Dockerfile-gpu
                ├── Dockerfile-gpu-ct
                ├── LICENSE
                ├── README.md
                ├── input.json
                ├── pytorch-pipeline
                    ├── .gitignore
                    ├── README.md
                    ├── cifar10_datamodule.py
                    ├── cifar10_pytorch.py
                    ├── cifar10_train.py
                    ├── process_test.py
                    ├── pytorch_pipeline
                    │   ├── Dockerfile
                    │   ├── __init__.py
                    │   ├── components
                    │   │   ├── base
                    │   │   │   ├── __init__.py
                    │   │   │   ├── base_component.py
                    │   │   │   └── base_executor.py
                    │   │   └── trainer
                    │   │   │   ├── __init__.py
                    │   │   │   ├── component.py
                    │   │   │   ├── executor.py
                    │   │   │   └── generic_executor.py
                    │   └── examples
                    │   │   ├── __init__.py
                    │   │   └── cifar10
                    │   │       ├── cifar10_datamodule.py
                    │   │       ├── cifar10_pre_process.py
                    │   │       ├── cifar10_pytorch.py
                    │   │       ├── cifar10_train.py
                    │   │       ├── input.json
                    │   │       └── utils.py
                    └── training_task.py
                ├── pytorch_cifar10_vertex_pipelines.ipynb
                ├── requirements.txt
                └── screenshots
                    ├── pt-profiler.png
                    └── vertex-tensorboard.png


/README.md:
--------------------------------------------------------------------------------
1 | 
2 | This is a repo for small Google Cloud Platform (GCP) snippets and examples used in blog posts etc.
3 | 
4 | Contributions are not currently accepted.  This is not an official Google product.


--------------------------------------------------------------------------------
/cloud_run/twilio_vision/Dockerfile:
--------------------------------------------------------------------------------
 1 | # Copyright 2019 Google Inc. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | # For more information about this base image and dockerfile, see
16 | # https://github.com/GoogleCloudPlatform/python-docker
17 | 
18 | FROM python:3.7
19 | 
20 | ENV APP_HOME /app
21 | WORKDIR $APP_HOME
22 | # COPY .
23 | 
24 | ADD src /app
25 | RUN pip install -r /app/requirements.txt
26 | 
27 | CMD gunicorn -w 4 -b :$PORT whats_that:app
28 | 


--------------------------------------------------------------------------------
/cloud_run/twilio_vision/src/requirements.txt:
--------------------------------------------------------------------------------
1 | google-api-python-client
2 | oauth2client
3 | gunicorn
4 | flask
5 | twilio
6 | requests
7 | 


--------------------------------------------------------------------------------
/datalab/facets/README.md:
--------------------------------------------------------------------------------
1 | 
2 | [To be added.]


--------------------------------------------------------------------------------
/datalab/facets/facets_snippets.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "```\n",
  8 |     "Copyright 2017 Google Inc. All rights reserved.\n",
  9 |     "Licensed under the Apache License, Version 2.0 (the \"License\");\n",
 10 |     "you may not use this file except in compliance with the License.\n",
 11 |     "You may obtain a copy of the License at\n",
 12 |     " http://www.apache.org/licenses/LICENSE-2.0\n",
 13 |     "Unless required by applicable law or agreed to in writing, software\n",
 14 |     "distributed under the License is distributed on an \"AS IS\" BASIS,\n",
 15 |     "WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
 16 |     "See the License for the specific language governing permissions and\n",
 17 |     "limitations under the License.\n",
 18 |     "```"
 19 |    ]
 20 |   },
 21 |   {
 22 |    "cell_type": "code",
 23 |    "execution_count": null,
 24 |    "metadata": {
 25 |     "collapsed": false
 26 |    },
 27 |    "outputs": [],
 28 |    "source": [
 29 |     "import google.datalab.bigquery as bq\n",
 30 |     "import pandas as pd"
 31 |    ]
 32 |   },
 33 |   {
 34 |    "cell_type": "code",
 35 |    "execution_count": null,
 36 |    "metadata": {
 37 |     "collapsed": true
 38 |    },
 39 |    "outputs": [],
 40 |    "source": [
 41 |     "%%bq query -n requests\n",
 42 |     "SELECT *\n",
 43 |     "FROM `bigquery-public-data.nhtsa_traffic_fatalities.accident_2015` \n",
 44 |     "LIMIT 10000"
 45 |    ]
 46 |   },
 47 |   {
 48 |    "cell_type": "code",
 49 |    "execution_count": null,
 50 |    "metadata": {
 51 |     "collapsed": false
 52 |    },
 53 |    "outputs": [],
 54 |    "source": [
 55 |     "df = requests.execute(output_options=bq.QueryOutput.dataframe()).result()"
 56 |    ]
 57 |   },
 58 |   {
 59 |    "cell_type": "code",
 60 |    "execution_count": null,
 61 |    "metadata": {
 62 |     "collapsed": false
 63 |    },
 64 |    "outputs": [],
 65 |    "source": [
 66 |     "from google.datalab.ml import FacetsOverview\n",
 67 |     "\n",
 68 |     "FacetsOverview().plot({'data': df})"
 69 |    ]
 70 |   },
 71 |   {
 72 |    "cell_type": "markdown",
 73 |    "metadata": {},
 74 |    "source": [
 75 |     "...."
 76 |    ]
 77 |   },
 78 |   {
 79 |    "cell_type": "code",
 80 |    "execution_count": null,
 81 |    "metadata": {
 82 |     "collapsed": false
 83 |    },
 84 |    "outputs": [],
 85 |    "source": [
 86 |     "from google.datalab.ml import FacetsDiveview\n",
 87 |     "\n",
 88 |     "FacetsDiveview().plot(df)"
 89 |    ]
 90 |   },
 91 |   {
 92 |    "cell_type": "code",
 93 |    "execution_count": null,
 94 |    "metadata": {
 95 |     "collapsed": true
 96 |    },
 97 |    "outputs": [],
 98 |    "source": []
 99 |   }
100 |  ],
101 |  "metadata": {
102 |   "kernelspec": {
103 |    "display_name": "Python 2",
104 |    "language": "python",
105 |    "name": "python2"
106 |   },
107 |   "language_info": {
108 |    "codemirror_mode": {
109 |     "name": "ipython",
110 |     "version": 2
111 |    },
112 |    "file_extension": ".py",
113 |    "mimetype": "text/x-python",
114 |    "name": "python",
115 |    "nbconvert_exporter": "python",
116 |    "pygments_lexer": "ipython2",
117 |    "version": "2.7.12"
118 |   }
119 |  },
120 |  "nbformat": 4,
121 |  "nbformat_minor": 2
122 | }
123 | 


--------------------------------------------------------------------------------
/ml/README.md:
--------------------------------------------------------------------------------
1 | 
2 | This directory contains various ML-related examples.
3 | 
4 | (The Cloud Shell tutorials have moved [here](https://github.com/GoogleCloudPlatform/cloud-shell-tutorials/tree/master/ml)).
5 | 


--------------------------------------------------------------------------------
/ml/automl/tables/kfp_e2e/create_dataset_for_tables/tables_component.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2020 Google LLC
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from typing import NamedTuple
16 | 
17 | 
18 | def automl_create_dataset_for_tables(
19 |   gcp_project_id: str,
20 |   gcp_region: str,
21 |   dataset_display_name: str,
22 |   api_endpoint: str = None,
23 |   tables_dataset_metadata: dict = {},
24 | ) -> NamedTuple('Outputs', [('dataset_path', str), ('create_time', str), ('dataset_id', str)]):
25 | 
26 |   import sys
27 |   import subprocess
28 |   subprocess.run([sys.executable, '-m', 'pip', 'install', 'googleapis-common-protos==1.6.0',
29 |       '--no-warn-script-location'],
30 |       env={'PIP_DISABLE_PIP_VERSION_CHECK': '1'}, check=True)
31 |   subprocess.run([sys.executable, '-m', 'pip', 'install', 'google-cloud-automl==0.9.0',
32 |       '--quiet', '--no-warn-script-location'],
33 |       env={'PIP_DISABLE_PIP_VERSION_CHECK': '1'}, check=True)
34 | 
35 |   import google
36 |   import logging
37 |   from google.api_core.client_options import ClientOptions
38 |   from google.cloud import automl_v1beta1 as automl
39 | 
40 |   logging.getLogger().setLevel(logging.INFO)  # TODO: make level configurable
41 |   # TODO: we could instead check for region 'eu' and use 'eu-automl.googleapis.com:443'endpoint
42 |   # in that case, instead of requiring endpoint to be specified.
43 |   if api_endpoint:
44 |     client_options = ClientOptions(api_endpoint=api_endpoint)
45 |     client = automl.TablesClient(project=gcp_project_id, region=gcp_region,
46 |         client_options=client_options)
47 |   else:
48 |     client = automl.TablesClient(project=gcp_project_id, region=gcp_region)
49 | 
50 |   try:
51 |     # Create a dataset with the given display name
52 |     dataset = client.create_dataset(dataset_display_name, metadata=tables_dataset_metadata)
53 |     # Log info about the created dataset
54 |     logging.info("Dataset name: {}".format(dataset.name))
55 |     logging.info("Dataset id: {}".format(dataset.name.split("/")[-1]))
56 |     logging.info("Dataset display name: {}".format(dataset.display_name))
57 |     logging.info("Dataset metadata:")
58 |     logging.info("\t{}".format(dataset.tables_dataset_metadata))
59 |     logging.info("Dataset example count: {}".format(dataset.example_count))
60 |     logging.info("Dataset create time:")
61 |     logging.info("\tseconds: {}".format(dataset.create_time.seconds))
62 |     logging.info("\tnanos: {}".format(dataset.create_time.nanos))
63 |     print(str(dataset))
64 |     dataset_id = dataset.name.rsplit('/', 1)[-1]
65 |     return (dataset.name, str(dataset.create_time), dataset_id)
66 |   except google.api_core.exceptions.GoogleAPICallError as e:
67 |     logging.warning(e)
68 |     raise e
69 | 
70 | 
71 | if __name__ == '__main__':
72 |   import kfp
73 |   kfp.components.func_to_container_op(automl_create_dataset_for_tables,
74 |       output_component_file='tables_component.yaml', base_image='python:3.7')
75 | 


--------------------------------------------------------------------------------
/ml/automl/tables/kfp_e2e/deploy_model_for_tables/convert_oss.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2019 Google Inc. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | # tested with TF1.14
16 | import sys
17 | import tensorflow as tf
18 | 
19 | from absl import app
20 | from absl import flags
21 | from tensorflow.core.protobuf import saved_model_pb2
22 | from tensorflow.python.summary import summary
23 | 
24 | FLAGS = flags.FLAGS
25 | 
26 | flags.DEFINE_string('saved_model', '', 'The location of the saved_model.pb to visualize.')
27 | flags.DEFINE_string('output_dir', '',
28 |     'The location for the Tensorboard log to begin visualization from.')
29 | 
30 | def import_to_tensorboard(saved_model, output_dir):
31 |   """View an imported saved_model.pb as a graph in Tensorboard.
32 | 
33 |   Args:
34 |     saved_model: The location of the saved_model.pb to visualize.
35 |     output_dir: The location for the Tensorboard log to begin visualization from.
36 | 
37 |   Usage:
38 |     Call this function with your model location and desired log directory.
39 |     Launch Tensorboard by pointing it to the log directory.
40 |     View your imported `.pb` model as a graph.
41 |   """
42 |   with open(saved_model, "rb") as f:
43 |     sm = saved_model_pb2.SavedModel()
44 |     sm.ParseFromString(f.read())
45 |     if 1 != len(sm.meta_graphs):
46 |       print('More than one graph found. Not sure which to write')
47 |       sys.exit(1)
48 |     graph_def = sm.meta_graphs[0].graph_def
49 | 
50 |     pb_visual_writer = summary.FileWriter(output_dir)
51 |     pb_visual_writer.add_graph(None, graph_def=graph_def)
52 |     print("Model Imported. Visualize by running: "
53 |           "tensorboard --logdir={}".format(output_dir))
54 | 
55 | 
56 | def main(argv):
57 |   import_to_tensorboard(FLAGS.saved_model, FLAGS.output_dir)
58 | 
59 | 
60 | if __name__ == '__main__':
61 |   app.run(main)
62 | 


--------------------------------------------------------------------------------
/ml/automl/tables/kfp_e2e/deploy_model_for_tables/exported_model_deploy.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2020 Google Inc. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     https://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import argparse
16 | import os
17 | import logging
18 | import subprocess
19 | 
20 | 
21 | def main():
22 |   parser = argparse.ArgumentParser(description='Serving webapp')
23 |   parser.add_argument(
24 |       '--model_name',
25 |       required=True)
26 |   parser.add_argument(
27 |       '--image_name',
28 |       required=True)
29 |   parser.add_argument(
30 |       '--namespace',
31 |       default='default')
32 |   args = parser.parse_args()
33 | 
34 |   NAMESPACE = 'default'
35 |   logging.getLogger().setLevel(logging.INFO)
36 |   logging.info('Generating training template.')
37 | 
38 |   template_file = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'model_serve_template.yaml')
39 |   target_file = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'model_serve.yaml')
40 |   mname = args.model_name.replace('_', '-')
41 |   logging.info("using model name: {}, image {}, and namespace: {}".format(
42 |       mname, args.image_name, NAMESPACE))
43 | 
44 |   with open(template_file, 'r') as f:
45 |     with open(target_file, "w") as target:
46 |       data = f.read()
47 |       changed = data.replace('MODEL_NAME', mname).replace(
48 |           'IMAGE_NAME', args.image_name).replace('NAMESPACE', NAMESPACE)
49 |       target.write(changed)
50 | 
51 |   logging.info('deploying...')
52 |   subprocess.call(['kubectl', 'create', '-f', '/ml/model_serve.yaml'])
53 | 
54 |   # kubectl -n default  port-forward svc/<mname>  8080:80
55 |   # curl -X POST --data @./instances.json http://localhost:8080/predict
56 | 
57 | if __name__ == "__main__":
58 |   main()
59 | 


--------------------------------------------------------------------------------
/ml/automl/tables/kfp_e2e/deploy_model_for_tables/instances.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "instances": [
 3 |     {
 4 |       "bike_id": "6179",
 5 |       "day_of_week": "6",
 6 |       "end_latitude": 51.50379168,
 7 |       "end_longitude": -0.11282408,
 8 |       "end_station_id": "154",
 9 |       "euclidean": 2513.254047872678,
10 |       "loc_cross": "POINT(-0.08 51.52)POINT(-0.11 51.5)",
11 |       "max": 56.8,
12 |       "min": 50.9,
13 |       "prcp": 0,
14 |       "ts": 1445624280,
15 |       "start_latitude": 51.51615461,
16 |       "start_longitude": -0.082422399,
17 |       "start_station_id": "217",
18 |       "temp": 54,
19 |       "dewp": 44
20 |     },
21 |     {
22 |       "bike_id": "5373",
23 |       "day_of_week": "3",
24 |       "end_latitude": 51.52059681,
25 |       "end_longitude": -0.116688468,
26 |       "end_station_id": "68",
27 |       "euclidean": 1181.215448450556,
28 |       "loc_cross": "POINT(-0.13 51.53)POINT(-0.12 51.52)",
29 |       "max": 56.7,
30 |       "min": 45.9,
31 |       "prcp": 0,
32 |       "ts": 1494317220,
33 |       "start_latitude": 51.52683806,
34 |       "start_longitude": -0.130504336,
35 |       "start_station_id": "214",
36 |       "temp": 50.5,
37 |       "dewp": 37.1
38 |     },
39 |     {
40 |       "bike_id": "5373",
41 |       "day_of_week": "3",
42 |       "end_latitude": 51.52059681,
43 |       "end_longitude": -0.116688468,
44 |       "end_station_id": "68",
45 |       "euclidean": 3589.5146210024977,
46 |       "loc_cross": "POINT(-0.07 51.52)POINT(-0.12 51.52)",
47 |       "max": 44.6,
48 |       "min": 34.0,
49 |       "prcp": 0,
50 |       "ts": 1480407420,
51 |       "start_latitude": 51.52388,
52 |       "start_longitude": -0.065076,
53 |       "start_station_id": "445",
54 |       "temp": 38.2,
55 |       "dewp": 28.6
56 |     }
57 |   ]
58 | }
59 | 


--------------------------------------------------------------------------------
/ml/automl/tables/kfp_e2e/deploy_model_for_tables/model_serve_template.yaml:
--------------------------------------------------------------------------------
 1 | ---
 2 | apiVersion: v1
 3 | kind: Service
 4 | metadata:
 5 |   labels:
 6 |     app: MODEL_NAME
 7 |   name: MODEL_NAME
 8 |   namespace: NAMESPACE
 9 | spec:
10 |   ports:
11 |   - name: model-serving
12 |     port: 80
13 |     targetPort: "http-server"
14 |   selector:
15 |     app: MODEL_NAME
16 |   type: ClusterIP
17 | ---
18 | apiVersion: extensions/v1beta1
19 | kind: Deployment
20 | metadata:
21 |   labels:
22 |     app: MODEL_NAME
23 |   name: MODEL_NAME-dep
24 |   namespace: NAMESPACE
25 | spec:
26 |   replicas: 2
27 |   template:
28 |     metadata:
29 |       labels:
30 |         app: MODEL_NAME
31 |         version: v1
32 |     spec:
33 |       containers:
34 |       - name: MODEL_NAME
35 |         image: IMAGE_NAME
36 |         imagePullPolicy: Always
37 |         livenessProbe:
38 |           initialDelaySeconds: 30
39 |           periodSeconds: 30
40 |           tcpSocket:
41 |             port: 8080
42 |         ports:
43 |         - name: http-server
44 |           containerPort: 8080
45 |         resources:
46 |           limits:
47 |             cpu: "4"
48 |             memory: 4Gi
49 |           requests:
50 |             cpu: "1"
51 |             memory: 1Gi
52 | 


--------------------------------------------------------------------------------
/ml/automl/tables/kfp_e2e/deploy_model_for_tables/tables_deploy_component.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2020 Google LLC
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from typing import NamedTuple
16 | 
17 | def automl_deploy_tables_model(
18 |   gcp_project_id: str,
19 |   gcp_region: str,
20 |   model_display_name: str,
21 |   api_endpoint: str = None,
22 | ) -> NamedTuple('Outputs', [('model_display_name', str), ('status', str)]):
23 |   import subprocess
24 |   import sys
25 |   subprocess.run([sys.executable, '-m', 'pip', 'install', 'googleapis-common-protos==1.6.0', '--no-warn-script-location'],
26 |       env={'PIP_DISABLE_PIP_VERSION_CHECK': '1'}, check=True)
27 |   subprocess.run([sys.executable, '-m', 'pip', 'install', 'google-cloud-automl==0.9.0', '--quiet', '--no-warn-script-location'],
28 |       env={'PIP_DISABLE_PIP_VERSION_CHECK': '1'}, check=True)
29 | 
30 |   import google
31 |   import logging
32 |   from google.api_core.client_options import ClientOptions
33 |   from google.api_core import exceptions
34 |   from google.cloud import automl_v1beta1 as automl
35 |   from google.cloud.automl_v1beta1 import enums
36 | 
37 |   logging.getLogger().setLevel(logging.INFO)  # TODO: make level configurable
38 |   # TODO: we could instead check for region 'eu' and use 'eu-automl.googleapis.com:443'endpoint
39 |   # in that case, instead of requiring endpoint to be specified.
40 |   if api_endpoint:
41 |     client_options = ClientOptions(api_endpoint=api_endpoint)
42 |     client = automl.TablesClient(project=gcp_project_id, region=gcp_region,
43 |         client_options=client_options)
44 |   else:
45 |     client = automl.TablesClient(project=gcp_project_id, region=gcp_region)
46 | 
47 |   try:
48 |     model = client.get_model(model_display_name=model_display_name)
49 |     if model.deployment_state == enums.Model.DeploymentState.DEPLOYED:
50 |         status = 'deployed'
51 |         logging.info('Model {} already deployed'.format(model_display_name))
52 |     else:
53 |       logging.info('Deploying model {}'.format(model_display_name))
54 |       response = client.deploy_model(model_display_name=model_display_name)
55 |       # synchronous wait
56 |       logging.info("Model deployed. {}".format(response.result()))
57 |       status = 'deployed'
58 |   except exceptions.NotFound as e:
59 |     logging.warning(e)
60 |     status = 'not_found'
61 |   except Exception as e:
62 |     logging.warning(e)
63 |     status = 'undeployed'
64 | 
65 |   logging.info('Model status: {}'.format(status))
66 |   return (model_display_name, status)
67 | 
68 | 
69 | 
70 | if __name__ == '__main__':
71 |   import kfp
72 |   kfp.components.func_to_container_op(
73 |       automl_deploy_tables_model, output_component_file='tables_deploy_component.yaml',
74 |       base_image='python:3.7')
75 | 


--------------------------------------------------------------------------------
/ml/automl/tables/kfp_e2e/tables_containers/model-service-launcher/Dockerfile:
--------------------------------------------------------------------------------
 1 | # Copyright 2020 Google Inc. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | FROM ubuntu:18.04
16 | 
17 | RUN apt-get update \
18 |   && apt-get install -y python3-pip python3-dev \
19 |   && cd /usr/local/bin \
20 |   && ln -s /usr/bin/python3 python \
21 |   && pip3 install --upgrade pip
22 | 
23 | RUN apt-get install -y wget unzip git
24 | 
25 | RUN pip install --upgrade pip
26 | RUN pip install urllib3 certifi retrying
27 | 
28 | # RUN apt-get install --no-install-recommends -y -q ca-certificates python-dev python-setuptools wget unzip
29 | 
30 | # RUN pip install pyyaml==3.12 six==1.11.0 requests==2.18.4
31 | 
32 | RUN wget -nv https://dl.google.com/dl/cloudsdk/release/google-cloud-sdk.zip && \
33 |     unzip -qq google-cloud-sdk.zip -d tools && \
34 |     rm google-cloud-sdk.zip && \
35 |     tools/google-cloud-sdk/install.sh --usage-reporting=false \
36 |         --path-update=false --bash-completion=false \
37 |         --disable-installation-options && \
38 |     tools/google-cloud-sdk/bin/gcloud -q components update \
39 |         gcloud core gsutil && \
40 |     tools/google-cloud-sdk/bin/gcloud -q components install kubectl && \
41 |     tools/google-cloud-sdk/bin/gcloud config set component_manager/disable_update_check true && \
42 |     touch /tools/google-cloud-sdk/lib/third_party/google.py
43 | 
44 | 
45 | ENV PATH $PATH:/tools/google-cloud-sdk/bin
46 | 
47 | ADD build /ml
48 | 
49 | ENTRYPOINT ["python", "/ml/exported_model_deploy.py"]
50 | 


--------------------------------------------------------------------------------
/ml/automl/tables/kfp_e2e/tables_containers/model-service-launcher/build.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -e
 2 | # Copyright 2020 Google Inc. All Rights Reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | # http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | 
17 | if [ -z "$1" ]
18 |   then
19 |     PROJECT_ID=$(gcloud config config-helper --format "value(configuration.properties.core.project)")
20 | else
21 |   PROJECT_ID=$1
22 | fi
23 | 
24 | mkdir -p ./build
25 | rsync -arvp "../../deploy_model_for_tables"/ ./build/
26 | 
27 | docker build -t model-service-launcher .
28 | rm -rf ./build
29 | 
30 | docker tag model-service-launcher gcr.io/${PROJECT_ID}/model-service-launcher
31 | docker push gcr.io/${PROJECT_ID}/model-service-launcher
32 | 


--------------------------------------------------------------------------------
/ml/automl/tables/kfp_e2e/tables_pipeline_caip.py.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/amygdala/code-snippets/9fa00e5a4ec4ae3eb5a2fa212d1ecefb4009eba7/ml/automl/tables/kfp_e2e/tables_pipeline_caip.py.tar.gz


--------------------------------------------------------------------------------
/ml/automl/tables/kfp_e2e/tables_pipeline_kf.py.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/amygdala/code-snippets/9fa00e5a4ec4ae3eb5a2fa212d1ecefb4009eba7/ml/automl/tables/kfp_e2e/tables_pipeline_kf.py.tar.gz


--------------------------------------------------------------------------------
/ml/automl/tables/model_export/Dockerfile.template:
--------------------------------------------------------------------------------
 1 | # Copyright 2019 Google Inc. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | FROM gcr.io/cloud-automl-tables-public/model_server
16 | 
17 | ADD model-export/tbl/YOUR_RENAMED_DIRECTORY /models/default/0000001
18 | 


--------------------------------------------------------------------------------
/ml/automl/tables/model_export/convert_oss.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2019 Google Inc. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | # tested with TF1.14
16 | import sys
17 | import tensorflow as tf
18 | 
19 | from absl import app
20 | from absl import flags
21 | from tensorflow.core.protobuf import saved_model_pb2
22 | from tensorflow.python.summary import summary
23 | 
24 | FLAGS = flags.FLAGS
25 | 
26 | flags.DEFINE_string('saved_model', '', 'The location of the saved_model.pb to visualize.')
27 | flags.DEFINE_string('output_dir', '', 'The location for the Tensorboard log to begin visualization from.')
28 | 
29 | def import_to_tensorboard(saved_model, output_dir):
30 |   """View an imported saved_model.pb as a graph in Tensorboard.
31 | 
32 |   Args:
33 |     saved_model: The location of the saved_model.pb to visualize.
34 |     output_dir: The location for the Tensorboard log to begin visualization from.
35 | 
36 |   Usage:
37 |     Call this function with your model location and desired log directory.
38 |     Launch Tensorboard by pointing it to the log directory.
39 |     View your imported `.pb` model as a graph.
40 |   """
41 |   with open(saved_model, "rb") as f:
42 |     sm = saved_model_pb2.SavedModel()
43 |     sm.ParseFromString(f.read())
44 |     if 1 != len(sm.meta_graphs):
45 |       print('More than one graph found. Not sure which to write')
46 |       sys.exit(1)
47 |     graph_def = sm.meta_graphs[0].graph_def
48 | 
49 |     pb_visual_writer = summary.FileWriter(output_dir)
50 |     pb_visual_writer.add_graph(None, graph_def=graph_def)
51 |     print("Model Imported. Visualize by running: "
52 |           "tensorboard --logdir={}".format(output_dir))
53 | 
54 | 
55 | def main(argv):
56 |   import_to_tensorboard(FLAGS.saved_model, FLAGS.output_dir)
57 | 
58 | 
59 | if __name__ == '__main__':
60 |   app.run(main)
61 | 


--------------------------------------------------------------------------------
/ml/automl/tables/model_export/instances.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "instances": [
 3 |     {
 4 |       "bike_id": "6179",
 5 |       "day_of_week": "6",
 6 |       "end_latitude": 51.50379168,
 7 |       "end_longitude": -0.11282408,
 8 |       "end_station_id": "154",
 9 |       "euclidean": 2513.254047872678,
10 |       "loc_cross": "POINT(-0.08 51.52)POINT(-0.11 51.5)",
11 |       "max": 56.8,
12 |       "min": 50.9,
13 |       "prcp": 0,
14 |       "ts": 1445624280,
15 |       "start_latitude": 51.51615461,
16 |       "start_longitude": -0.082422399,
17 |       "start_station_id": "217",
18 |       "temp": 54,
19 |       "dewp": 44
20 |     },
21 |     {
22 |       "bike_id": "5373",
23 |       "day_of_week": "3",
24 |       "end_latitude": 51.52059681,
25 |       "end_longitude": -0.116688468,
26 |       "end_station_id": "68",
27 |       "euclidean": 1181.215448450556,
28 |       "loc_cross": "POINT(-0.13 51.53)POINT(-0.12 51.52)",
29 |       "max": 56.7,
30 |       "min": 45.9,
31 |       "prcp": 0,
32 |       "ts": 1494317220,
33 |       "start_latitude": 51.52683806,
34 |       "start_longitude": -0.130504336,
35 |       "start_station_id": "214",
36 |       "temp": 50.5,
37 |       "dewp": 37.1
38 |     },
39 |     {
40 |       "bike_id": "5373",
41 |       "day_of_week": "3",
42 |       "end_latitude": 51.52059681,
43 |       "end_longitude": -0.116688468,
44 |       "end_station_id": "68",
45 |       "euclidean": 3589.5146210024977,
46 |       "loc_cross": "POINT(-0.07 51.52)POINT(-0.12 51.52)",
47 |       "max": 44.6,
48 |       "min": 34.0,
49 |       "prcp": 0,
50 |       "ts": 1480407420,
51 |       "start_latitude": 51.52388,
52 |       "start_longitude": -0.065076,
53 |       "start_station_id": "445",
54 |       "temp": 38.2,
55 |       "dewp": 28.6
56 |     }
57 |   ]
58 | }
59 | 


--------------------------------------------------------------------------------
/ml/automl/tables/xai/README.md:
--------------------------------------------------------------------------------
1 | 
2 | # AutoML Tables examples
3 | 
4 | This directory contains a notebook that shows examples of using the [AutoML Tables](https://cloud.google.com/automl-tables/docs/) client library. For these examples, we’ll use data that is essentially a join of two public datasets stored in [BigQuery](https://cloud.google.com/bigquery/): [London Bike rentals](https://console.cloud.google.com/bigquery?p=bigquery-public-data&d=london_bicycles&page=dataset) and [NOAA weather data](https://console.cloud.google.com/bigquery?p=bigquery-public-data&d=noaa_gsod&page=dataset), with some additional processing to clean up outliers and derive additional GIS and day-of-week fields. 
5 | 
6 | The [automl_tables_xai.ipynb notebook](automl_tables_xai.ipynb) shows how to create a custom [AutoML Tables](https://cloud.google.com/automl-tables/docs/) model to predict duration of London bike rentals given information about local weather as well as info about the rental trip. It walks through examples of using the Tables client libraries for creating a dataset, training a custom model, deploying the model, and using it to make predictions; and shows how you can programmatically request *local feature importance* explanations.
7 | 
8 | AutoML Tables allows you to [export a model's test dataset to BigQuery](https://cloud.google.com/automl-tables/docs/evaluate#downloading_your_test_dataset_to) after training. The [bigquery_examples.md](bigquery_examples.md) file shows some examples of how you can use BigQuery to analyze this dataset.


--------------------------------------------------------------------------------
/ml/automl/tables/xai/bigquery_examples.md:
--------------------------------------------------------------------------------
 1 | 
 2 | # Examples of inspecting the "London bikes and weather" test dataset in BigQuery
 3 | 
 4 | 
 5 | AutoML Tables allows you to [export a model's test dataset to BigQuery](https://cloud.google.com/automl-tables/docs/evaluate#downloading_your_test_dataset_to) after training.  This makes it easy to do some additional poking around in a sample of the dataset— even if it didn't originally reside in BigQuery. This can be helpful, for example, if your model's explanations of predictions suggest some interesting characteristics of the data.
 6 | (See the "Use your trained model to make predictions and see explanations of the results" section of [automl_tables_xai.ipynb](automl_tables_xai.ipynb) for an example of requesting a prediction explanation).
 7 | 
 8 | Here are a few example queries for the "bikes and weather" dataset used in
 9 | [automl_tables_xai.ipynb](automl_tables_xai.ipynb).
10 | In the following, replace `your-project` and `your-dataset` with the appropriate values. (The exported table should be named `evaluated_examples`, but if not, edit that value as well.)
11 | 
12 | 1. Find the average predicted and actual ride durations for the day of the week (in this dataset, 1 & 7 are weekends).
13 | 
14 | ```sql
15 | SELECT day_of_week, avg(predicted_duration[offset(0)].tables.value) as ad, avg(duration) as adur
16 | FROM `your-project.your-dataset.evaluated_examples`
17 | where euclidean > 0 group by day_of_week
18 | order by adur desc
19 | limit 10000
20 | ```
21 | 
22 | 2. Find the average predicted and actual ride durations for those rides where the max temperature was > 70F or < 40F.
23 | 
24 | ```sql
25 | SELECT max, avg(predicted_duration[offset(0)].tables.value) as ad, avg(duration) as adur
26 | FROM `your-project.your-dataset.evaluated_examples`
27 | where euclidean > 0 and (max > 70 or max < 40) group by max
28 | order by adur desc
29 | limit 10000
30 | ```
31 | 
32 | 3. Show the starting stations for rides as ordered by greatest standard deviation in prediction accuracy. 
33 | 
34 | ```sql
35 | SELECT start_station_id, stddev(predicted_duration[offset(0)].tables.value - duration) as sd, avg(predicted_duration[offset(0)].tables.value - duration) as ad
36 | FROM `your-project.your-dataset.evaluated_examples`
37 | where euclidean > 0 group by start_station_id
38 | order by sd desc
39 | limit 1000
40 | ```


--------------------------------------------------------------------------------
/ml/census_train_and_eval/config_custom_gpus.yaml:
--------------------------------------------------------------------------------
1 | trainingInput:
2 |   scaleTier: CUSTOM
3 |   masterType: standard_p100
4 |   workerType: standard_p100
5 |   parameterServerType: standard
6 |   workerCount: 3
7 |   parameterServerCount: 3
8 | 


--------------------------------------------------------------------------------
/ml/census_train_and_eval/hptuning_config.yaml:
--------------------------------------------------------------------------------
 1 | trainingInput:
 2 |   hyperparameters:
 3 |     goal: MAXIMIZE
 4 |     hyperparameterMetricTag: accuracy
 5 |     maxTrials: 6
 6 |     maxParallelTrials: 2
 7 |     params:
 8 |       - parameterName: first-layer-size
 9 |         type: INTEGER
10 |         minValue: 50
11 |         maxValue: 400
12 |         scaleType: UNIT_LINEAR_SCALE
13 |       - parameterName: num-layers
14 |         type: INTEGER
15 |         minValue: 1
16 |         maxValue: 10
17 |         scaleType: UNIT_LINEAR_SCALE
18 |       - parameterName: scale-factor
19 |         type: DOUBLE
20 |         minValue: 0.1
21 |         maxValue: 0.9
22 |         scaleType: UNIT_REVERSE_LOG_SCALE
23 | 


--------------------------------------------------------------------------------
/ml/census_train_and_eval/test.json:
--------------------------------------------------------------------------------
1 | {"age": 25, "workclass": " Private", "education": " 11th", "education_num": 7, "marital_status": " Never-married", "occupation": " Machine-op-inspct", "relationship": " Own-child", "race": " Black", "gender": " Male", "capital_gain": 0, "capital_loss": 0, "hours_per_week": 40, "native_country": " United-States"}
2 | 


--------------------------------------------------------------------------------
/ml/census_train_and_eval/trainer/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/amygdala/code-snippets/9fa00e5a4ec4ae3eb5a2fa212d1ecefb4009eba7/ml/census_train_and_eval/trainer/__init__.py


--------------------------------------------------------------------------------
/ml/kubeflow-pipelines/README.md:
--------------------------------------------------------------------------------
 1 | 
 2 | # Kubeflow Pipelines examples
 3 | 
 4 | [Kubeflow](https://www.kubeflow.org/) is an OSS project to support a machine learning stack on Kubernetes, to make deployments of ML workflows on Kubernetes simple, portable and scalable.
 5 | 
 6 | [**Kubeflow Pipelines**](https://github.com/kubeflow/pipelines) is a new component of Kubeflow that makes it easy to compose, deploy and manage end-to-end machine learning workflows. The Kubeflow Pipelines documentation is [here](https://www.kubeflow.org/docs/guides/pipelines/).
 7 | 
 8 | This directory tree contains code for several different groups of Kubeflow Pipelines examples.
 9 | The examples highlight how Kubeflow and Kubeflow Pipelines can help support portability, composability and reproducibility, scalability, and visualization and collaboration in your ML lifecycle; and make it easier to support hybrid ML solutions.
10 | 
11 | - A pipeline that [implements an AutoML Tables end-to-end workflow](https://github.com/amygdala/code-snippets/tree/master/ml/automl/tables/kfp_e2e).
12 | - [Distributed Keras Tuner + KFP example](./keras_tuner)
13 | - A pipeline that shows how you can make calls to the AutoML Vision API to build a pipeline that creates an AutoML *dataset* and then trains a model on that dataset: [samples/automl/README.md](./samples/automl/README.md).
14 | - [Example pipeline](./sbtb) for Scale by the Bay workshop (2019)
15 | 
16 | ## Deprecated examples
17 | 
18 | These examples are not currently maintained and most likely don't work properly.
19 | 
20 | - [README_taxidata_examples.md](./README_taxidata_examples.md)
21 | - [README_github_summ.md](README_github_summ.md): going forward, the current version of this example lives here: https://github.com/kubeflow/examples/tree/master/github_issue_summarization/pipelines.
22 | 
23 | 
24 | 


--------------------------------------------------------------------------------
/ml/kubeflow-pipelines/components/README.md:
--------------------------------------------------------------------------------
1 | 
2 | # Workflow Components
3 | 
4 | This directory contains the definitions of the Argo workflow steps used in the example workflows.  For each step, you can find both the code and the Dockerfile used to build the step's container.
5 | 
6 | To make it easy to run the examples, we're using prebuilt Docker containers, but if you want to change anything about a step, you can rebuild and use your own container instead.  Just edit the workflow definition under [`samples`](../samples) to point to your own container instead.
7 | 


--------------------------------------------------------------------------------
/ml/kubeflow-pipelines/components/automl/container/Dockerfile:
--------------------------------------------------------------------------------
 1 | # Copyright 2019 Google Inc. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | FROM ubuntu:18.04
16 | 
17 | RUN apt-get update \
18 |   && apt-get install -y python3-pip python3-dev \
19 |   && cd /usr/local/bin \
20 |   && ln -s /usr/bin/python3 python \
21 |   && pip3 install --upgrade pip
22 | 
23 | 
24 | RUN apt-get install -y wget unzip git
25 | 
26 | 
27 | RUN pip install google-cloud-automl
28 | 
29 | 
30 | RUN wget -nv https://dl.google.com/dl/cloudsdk/release/google-cloud-sdk.zip && \
31 |     unzip -qq google-cloud-sdk.zip -d tools && \
32 |     rm google-cloud-sdk.zip && \
33 |     tools/google-cloud-sdk/install.sh --usage-reporting=false \
34 |         --path-update=false --bash-completion=false \
35 |         --disable-installation-options && \
36 |     tools/google-cloud-sdk/bin/gcloud -q components update \
37 |         gcloud core gsutil && \
38 |     tools/google-cloud-sdk/bin/gcloud config set component_manager/disable_update_check true && \
39 |     touch /tools/google-cloud-sdk/lib/third_party/google.py
40 | 
41 | ADD build /ml
42 | 
43 | ENV PATH $PATH:/tools/node/bin:/tools/google-cloud-sdk/bin
44 | 
45 | ENTRYPOINT ["python", "/ml/dataset_model.py"]
46 | 


--------------------------------------------------------------------------------
/ml/kubeflow-pipelines/components/automl/container/build.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -e
 2 | # Copyright 2019 Google Inc. All Rights Reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | # http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | 
17 | if [ -z "$1" ]
18 |   then
19 |     PROJECT_ID=$(gcloud config config-helper --format "value(configuration.properties.core.project)")
20 | else
21 |   PROJECT_ID=$1
22 | fi
23 | 
24 | mkdir -p ./build
25 | rsync -arvp "../dataset_train"/ ./build/
26 | 
27 | docker build -t automl-pipeline .
28 | rm -rf ./build
29 | 
30 | docker tag automl-pipeline gcr.io/${PROJECT_ID}/automl-pipeline
31 | docker push gcr.io/${PROJECT_ID}/automl-pipeline


--------------------------------------------------------------------------------
/ml/kubeflow-pipelines/components/cmle/containers/base/Dockerfile:
--------------------------------------------------------------------------------
 1 | # Copyright 2018 Google Inc. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | FROM ubuntu:16.04
16 | 
17 | RUN apt-get update -y
18 | 
19 | RUN apt-get install --no-install-recommends -y -q ca-certificates python-dev python-setuptools \
20 |                                                   wget unzip git
21 | 
22 | RUN easy_install pip
23 | 
24 | RUN pip install tensorflow==1.10
25 | RUN pip install pyyaml==3.12 six==1.11.0
26 | 
27 | RUN wget -nv https://dl.google.com/dl/cloudsdk/release/google-cloud-sdk.zip && \
28 |     unzip -qq google-cloud-sdk.zip -d tools && \
29 |     rm google-cloud-sdk.zip && \
30 |     tools/google-cloud-sdk/install.sh --usage-reporting=false \
31 |         --path-update=false --bash-completion=false \
32 |         --disable-installation-options && \
33 |     tools/google-cloud-sdk/bin/gcloud -q components update \
34 |         gcloud core gsutil && \
35 |     tools/google-cloud-sdk/bin/gcloud config set component_manager/disable_update_check true && \
36 |     touch /tools/google-cloud-sdk/lib/third_party/google.py
37 | 
38 | ADD build /ml
39 | 
40 | ENV PATH $PATH:/tools/node/bin:/tools/google-cloud-sdk/bin
41 | 


--------------------------------------------------------------------------------
/ml/kubeflow-pipelines/components/cmle/containers/base/build.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -e
 2 | # Copyright 2018 Google Inc. All Rights Reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | # http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | 
17 | mkdir -p ./build
18 | rsync -arvp "../../deploy"/ ./build/
19 | 
20 | docker build -t ml-pipeline-cmle-base .
21 | rm -rf ./build
22 | 
23 | 


--------------------------------------------------------------------------------
/ml/kubeflow-pipelines/components/cmle/containers/cmle_deploy/Dockerfile:
--------------------------------------------------------------------------------
 1 | # Copyright 2018 Google Inc. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | FROM ml-pipeline-cmle-base
16 | 
17 | ENTRYPOINT ["python", "/ml/deploy_model.py"]
18 | 


--------------------------------------------------------------------------------
/ml/kubeflow-pipelines/components/cmle/containers/cmle_deploy/build.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -e
 2 | # Copyright 2018 Google Inc. All Rights Reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | # http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | 
17 | if [ -z "$1" ]
18 |   then
19 |     PROJECT_ID=$(gcloud config config-helper --format "value(configuration.properties.core.project)")
20 | else
21 |   PROJECT_ID=$1
22 | fi
23 | 
24 | # build base image
25 | pushd ../base
26 | ./build.sh
27 | popd
28 | 
29 | docker build -t ml-pipeline-cmle-op .
30 | docker tag ml-pipeline-cmle-op gcr.io/${PROJECT_ID}/ml-pipeline-cmle-op
31 | docker push gcr.io/${PROJECT_ID}/ml-pipeline-cmle-op
32 | 
33 | 


--------------------------------------------------------------------------------
/ml/kubeflow-pipelines/components/cmle/deploy/deploy_model.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2018 Google Inc. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     https://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | """Deploy a TF model to CMLE."""
16 | 
17 | import argparse
18 | import os
19 | import subprocess
20 | import time
21 | 
22 | from tensorflow.python.lib.io import file_io
23 | 
24 | def main(argv=None):
25 |   parser = argparse.ArgumentParser(description='ML Trainer')
26 |   parser.add_argument(
27 |       '--project',
28 |       help='The GCS project to use',
29 |       required=True)
30 |   parser.add_argument(
31 |       '--gcs-path',
32 |       help='The GCS path to the trained model. The path should end with "../export/<model-name>".',
33 |       required=True)
34 |   parser.add_argument(
35 |       '--version-name',
36 |       help='The model version name.',
37 |       required=True)
38 | 
39 |   parser.add_argument(
40 |       '--model-name',
41 |       help='The model name.',
42 |       default='taxifare')
43 | 
44 |   parser.add_argument(
45 |       '--region',
46 |       help='The model region.',
47 |       default='us-central1'
48 |       )
49 | 
50 |   args = parser.parse_args()
51 | 
52 |   # Make sure the model dir exists before proceeding, as sometimes it takes a few seconds to become
53 |   # available after training completes.
54 |   retries = 0
55 |   sleeptime = 5
56 |   while retries < 20:
57 |     try:
58 |       model_location = os.path.join(args.gcs_path, file_io.list_directory(args.gcs_path)[-1])
59 |       print("model location: %s" % model_location)
60 |       break
61 |     except Exception as e:
62 |       print(e)
63 |       print("Sleeping %s seconds to wait for GCS files..." % sleeptime)
64 |       time.sleep(sleeptime)
65 |       retries += 1
66 |       sleeptime *= 2
67 |   if retries >=20:
68 |     print("could not get model location subdir from %s, exiting" % args.gcs_path)
69 |     exit(1)
70 | 
71 | 
72 |   model_create_command = ['gcloud', 'ml-engine', 'models', 'create', args.model_name, '--regions',
73 |       args.region, '--project', args.project]
74 |   print(model_create_command)
75 |   result = subprocess.call(model_create_command)
76 |   print(result)
77 | 
78 |   proper_version_name = args.version_name.replace('-', '_')
79 |   print("using version name: %s" % proper_version_name)
80 | 
81 |   model_deploy_command = ['gcloud', 'ml-engine', 'versions', 'create', proper_version_name,
82 |     '--model', args.model_name, '--runtime-version', '1.10', '--project', args.project,
83 |     '--origin', model_location
84 |       ]
85 |   print(model_deploy_command)
86 |   result2 = subprocess.call(model_deploy_command)
87 |   print(result2)
88 | 
89 | 
90 | 
91 | 
92 | if __name__== "__main__":
93 |   main()
94 | 
95 | 


--------------------------------------------------------------------------------
/ml/kubeflow-pipelines/components/older/dataflow/containers/base/Dockerfile:
--------------------------------------------------------------------------------
 1 | # Copyright 2018 Google Inc. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | FROM ubuntu:18.04
16 | 
17 | RUN apt-get update -y
18 | RUN apt-get -y install build-essential python-pip python2.7
19 | 
20 | RUN apt-get install --no-install-recommends -y -q ca-certificates python-dev python-setuptools \
21 |                                                   wget unzip git
22 | 
23 | # RUN easy_install pip
24 | 
25 | RUN pip install --upgrade pip
26 | RUN pip install tensorflow==1.11
27 | RUN pip install pyyaml==3.12 six==1.11.0
28 | # RUN pip install pyyaml six
29 | 
30 | RUN pip install tensorflow-transform==0.11.0
31 | 
32 | RUN wget -nv https://dl.google.com/dl/cloudsdk/release/google-cloud-sdk.zip && \
33 |     unzip -qq google-cloud-sdk.zip -d tools && \
34 |     rm google-cloud-sdk.zip && \
35 |     tools/google-cloud-sdk/install.sh --usage-reporting=false \
36 |         --path-update=false --bash-completion=false \
37 |         --disable-installation-options && \
38 |     tools/google-cloud-sdk/bin/gcloud -q components update \
39 |         gcloud core gsutil && \
40 |     tools/google-cloud-sdk/bin/gcloud config set component_manager/disable_update_check true && \
41 |     touch /tools/google-cloud-sdk/lib/third_party/google.py
42 | 
43 | ADD build /ml
44 | 
45 | ENV PATH $PATH:/tools/node/bin:/tools/google-cloud-sdk/bin
46 | 


--------------------------------------------------------------------------------
/ml/kubeflow-pipelines/components/older/dataflow/containers/base/build.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -e
 2 | # Copyright 2018 Google Inc. All Rights Reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | # http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | 
17 | mkdir -p ./build
18 | rsync -arvp "../../tft"/ ./build/
19 | rsync -arvp "../../tfma"/ ./build/
20 | rsync -arvp "../../taxi_schema"/ ./build/
21 | rsync -arvp "../../taxi_schema"/ ./build/transform/
22 | rsync -arvp "../../taxi_schema"/ ./build/analysis/
23 | 
24 | docker build -t ml-pipeline-dataflow-base .
25 | rm -rf ./build
26 | 
27 | 


--------------------------------------------------------------------------------
/ml/kubeflow-pipelines/components/older/dataflow/containers/tfma/Dockerfile:
--------------------------------------------------------------------------------
 1 | # Copyright 2018 Google Inc. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | FROM ml-pipeline-dataflow-base
16 | 
17 | RUN apt-get update -y && \
18 |     apt-get install --no-install-recommends -y -q build-essential && \
19 |     pip install tensorflow-model-analysis==0.9.2 && \
20 |     pip install ipywidgets==7.2.1 && \
21 |     apt-get --purge autoremove -y build-essential
22 | 
23 | WORKDIR /ml
24 | 
25 | ENTRYPOINT ["python", "/ml/model_analysis-taxi.py"]
26 | 


--------------------------------------------------------------------------------
/ml/kubeflow-pipelines/components/older/dataflow/containers/tfma/build.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -e
 2 | # Copyright 2018 Google Inc. All Rights Reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | # http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | 
17 | if [ -z "$1" ]
18 |   then
19 |     PROJECT_ID=$(gcloud config config-helper --format "value(configuration.properties.core.project)")
20 | else
21 |   PROJECT_ID=$1
22 | fi
23 | 
24 | # build base image
25 | pushd ../base
26 | ./build.sh
27 | popd
28 | 
29 | docker build -f Dockerfile -t gcr.io/${PROJECT_ID}/ml-pipeline-dataflow-tfma-taxi .
30 | docker push gcr.io/${PROJECT_ID}/ml-pipeline-dataflow-tfma-taxi
31 | 
32 | 


--------------------------------------------------------------------------------
/ml/kubeflow-pipelines/components/older/dataflow/containers/tft/Dockerfile:
--------------------------------------------------------------------------------
 1 | # Copyright 2018 Google Inc. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | FROM ml-pipeline-dataflow-base
16 | 
17 | RUN apt-get update -y && \
18 |     apt-get install --no-install-recommends -y -q build-essential && \
19 |     pip install tensorflow-model-analysis==0.9.2 && \
20 |     pip install ipywidgets==7.2.1 && \
21 |     apt-get --purge autoremove -y build-essential
22 | 
23 | WORKDIR /ml
24 | 
25 | ENTRYPOINT ["python", "/ml/taxi_preprocess_bq.py"]
26 | 


--------------------------------------------------------------------------------
/ml/kubeflow-pipelines/components/older/dataflow/containers/tft/build.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -e
 2 | # Copyright 2018 Google Inc. All Rights Reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | # http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | 
17 | if [ -z "$1" ]
18 |   then
19 |     PROJECT_ID=$(gcloud config config-helper --format "value(configuration.properties.core.project)")
20 | else
21 |   PROJECT_ID=$1
22 | fi
23 | 
24 | # build base image
25 | pushd ../base
26 | ./build.sh
27 | popd
28 | 
29 | docker build -f Dockerfile -t ml-pipeline-dataflow-tftbq-taxi .
30 | docker tag ml-pipeline-dataflow-tftbq-taxi gcr.io/${PROJECT_ID}/ml-pipeline-dataflow-tftbq-taxi
31 | docker push gcr.io/${PROJECT_ID}/ml-pipeline-dataflow-tftbq-taxi
32 | 


--------------------------------------------------------------------------------
/ml/kubeflow-pipelines/components/older/dataflow/taxi_schema/taxi_schema/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/amygdala/code-snippets/9fa00e5a4ec4ae3eb5a2fa212d1ecefb4009eba7/ml/kubeflow-pipelines/components/older/dataflow/taxi_schema/taxi_schema/__init__.py


--------------------------------------------------------------------------------
/ml/kubeflow-pipelines/components/older/dataflow/tfma/analysis/setup.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2018 Google Inc. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     https://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | """Setup dependencies for deployment of the analyzer."""
16 | 
17 | import setuptools
18 | 
19 | if __name__ == '__main__':
20 |   setuptools.setup(name='taxi_schema', version='1.0',
21 |                    packages=setuptools.find_packages(),
22 |                    install_requires=[
23 |                        'tensorflow==1.15.4',
24 |                        'tensorflow-model-analysis==0.9.2',
25 |                        'tensorflow-serving-api==1.9.0',
26 |                        'tensorflow-transform==0.11.0'])
27 | 


--------------------------------------------------------------------------------
/ml/kubeflow-pipelines/components/older/dataflow/tft/transform/setup.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2018 Google Inc. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     https://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | """Setup dependencies for cloud deployment."""
16 | import setuptools
17 | 
18 | if __name__ == '__main__':
19 |   setuptools.setup(name='taxi_schema', version='1.0',
20 |                    packages=setuptools.find_packages(),
21 |                    install_requires=[
22 |                        'jupyter==1.0',
23 |                        'tensorflow==1.15.4',
24 |                        'tensorflow-model-analysis==0.9.2',
25 |                        'tensorflow-serving-api==1.9.0',
26 |                        'tensorflow-transform==0.11.0'])
27 | 


--------------------------------------------------------------------------------
/ml/kubeflow-pipelines/components/older/kubeflow/containers/launcher/Dockerfile:
--------------------------------------------------------------------------------
 1 | # Copyright 2018 Google Inc. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | FROM ubuntu:16.04
16 | 
17 | RUN apt-get update -y
18 | 
19 | RUN apt-get install --no-install-recommends -y -q ca-certificates python-dev python-setuptools wget unzip git
20 | 
21 | RUN easy_install pip
22 | 
23 | RUN pip install pyyaml==3.12 six==1.11.0 requests==2.18.4 tensorflow==1.10.0 \
24 |       kubernetes google-api-python-client retrying
25 | RUN pip install google-cloud-storage
26 | 
27 | RUN wget -nv https://dl.google.com/dl/cloudsdk/release/google-cloud-sdk.zip && \
28 |     unzip -qq google-cloud-sdk.zip -d tools && \
29 |     rm google-cloud-sdk.zip && \
30 |     tools/google-cloud-sdk/install.sh --usage-reporting=false \
31 |         --path-update=false --bash-completion=false \
32 |         --disable-installation-options && \
33 |     tools/google-cloud-sdk/bin/gcloud -q components update \
34 |         gcloud core gsutil && \
35 |     tools/google-cloud-sdk/bin/gcloud -q components install kubectl && \
36 |     tools/google-cloud-sdk/bin/gcloud config set component_manager/disable_update_check true && \
37 |     touch /tools/google-cloud-sdk/lib/third_party/google.py
38 | 
39 | RUN wget -nv https://github.com/ksonnet/ksonnet/releases/download/v0.13.1/ks_0.13.1_linux_amd64.tar.gz && \
40 |     tar -xvzf ks_0.13.1_linux_amd64.tar.gz && \
41 |     mkdir -p /tools/ks/bin && \
42 |     cp ./ks_0.13.1_linux_amd64/ks /tools/ks/bin && \
43 |     rm ks_0.13.1_linux_amd64.tar.gz && \
44 |     rm -r ks_0.13.1_linux_amd64
45 | 
46 | RUN wget https://github.com/kubeflow/tf-operator/archive/v0.4.0-rc.1.zip && \
47 |     unzip v0.4.0-rc.1.zip && \
48 |     mv tf-operator-0.4.0-rc.1 tf-operator
49 | 
50 | ENV PYTHONPATH $PYTHONPATH:/tf-operator
51 | 
52 | RUN wget https://github.com/kubeflow/testing/archive/master.zip && \
53 |     unzip master.zip && \
54 |     mv testing-master testing
55 | 
56 | ENV PYTHONPATH $PYTHONPATH:/testing/py
57 | 
58 | ENV PATH $PATH:/tools/google-cloud-sdk/bin:/tools/ks/bin
59 | 
60 | ADD build /ml
61 | 
62 | ENTRYPOINT ["python", "/ml/train.py"]
63 | 
64 | 


--------------------------------------------------------------------------------
/ml/kubeflow-pipelines/components/older/kubeflow/containers/launcher/build.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -e
 2 | # Copyright 2018 Google Inc. All Rights Reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | # http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | 
17 | if [ -z "$1" ]
18 |   then
19 |     PROJECT_ID=$(gcloud config config-helper --format "value(configuration.properties.core.project)")
20 | else
21 |   PROJECT_ID=$1
22 | fi
23 | 
24 | mkdir -p ./build
25 | rsync -arvp "../../launcher"/ ./build/
26 | 
27 | docker build -t ml-pipeline-kubeflow-tf-taxi .
28 | rm -rf ./build
29 | 
30 | docker tag ml-pipeline-kubeflow-tf-taxi gcr.io/${PROJECT_ID}/ml-pipeline-kubeflow-tf-taxi
31 | docker push gcr.io/${PROJECT_ID}/ml-pipeline-kubeflow-tf-taxi
32 | 


--------------------------------------------------------------------------------
/ml/kubeflow-pipelines/components/older/kubeflow/containers/tf-serving-gh/Dockerfile:
--------------------------------------------------------------------------------
 1 | # Copyright 2018 Google Inc. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | FROM ubuntu:16.04
16 | 
17 | RUN apt-get update -y
18 | 
19 | RUN apt-get install --no-install-recommends -y -q ca-certificates python-dev python-setuptools wget unzip
20 | 
21 | RUN easy_install pip
22 | 
23 | RUN pip install pyyaml==3.12 six==1.11.0 requests==2.18.4 tensorflow==1.11.0
24 | 
25 | RUN wget -nv https://dl.google.com/dl/cloudsdk/release/google-cloud-sdk.zip && \
26 |     unzip -qq google-cloud-sdk.zip -d tools && \
27 |     rm google-cloud-sdk.zip && \
28 |     tools/google-cloud-sdk/install.sh --usage-reporting=false \
29 |         --path-update=false --bash-completion=false \
30 |         --disable-installation-options && \
31 |     tools/google-cloud-sdk/bin/gcloud -q components update \
32 |         gcloud core gsutil && \
33 |     tools/google-cloud-sdk/bin/gcloud -q components install kubectl && \
34 |     tools/google-cloud-sdk/bin/gcloud config set component_manager/disable_update_check true && \
35 |     touch /tools/google-cloud-sdk/lib/third_party/google.py
36 | 
37 | RUN wget -nv https://github.com/ksonnet/ksonnet/releases/download/v0.11.0/ks_0.11.0_linux_amd64.tar.gz && \
38 |     tar -xvzf ks_0.11.0_linux_amd64.tar.gz && \
39 |     mkdir -p /tools/ks/bin && \
40 |     cp ./ks_0.11.0_linux_amd64/ks /tools/ks/bin && \
41 |     rm ks_0.11.0_linux_amd64.tar.gz && \
42 |     rm -r ks_0.11.0_linux_amd64
43 | 
44 | ENV PATH $PATH:/tools/google-cloud-sdk/bin:/tools/ks/bin
45 | 
46 | ADD build /ml
47 | 
48 | ENTRYPOINT ["python", "/ml/deploy-tf-serve.py"]
49 | 
50 | 


--------------------------------------------------------------------------------
/ml/kubeflow-pipelines/components/older/kubeflow/containers/tf-serving-gh/build.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -e
 2 | # Copyright 2018 Google Inc. All Rights Reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | # http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | 
17 | if [ -z "$1" ]
18 |   then
19 |     PROJECT_ID=$(gcloud config config-helper --format "value(configuration.properties.core.project)")
20 | else
21 |   PROJECT_ID=$1
22 | fi
23 | 
24 | mkdir -p ./build
25 | rsync -arvp "../../tf-serving-gh"/ ./build/
26 | 
27 | docker build -t ml-pipeline-kubeflow-tfserve .
28 | rm -rf ./build
29 | 
30 | docker tag ml-pipeline-kubeflow-tfserve gcr.io/${PROJECT_ID}/ml-pipeline-kubeflow-tfserve
31 | docker push gcr.io/${PROJECT_ID}/ml-pipeline-kubeflow-tfserve
32 | 


--------------------------------------------------------------------------------
/ml/kubeflow-pipelines/components/older/kubeflow/containers/tf-serving/Dockerfile:
--------------------------------------------------------------------------------
 1 | # Copyright 2018 Google Inc. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | FROM ubuntu:16.04
16 | 
17 | RUN apt-get update -y
18 | 
19 | RUN apt-get install --no-install-recommends -y -q ca-certificates python-dev python-setuptools wget unzip
20 | 
21 | RUN easy_install pip
22 | 
23 | RUN pip install pyyaml==3.12 six==1.11.0 requests==2.18.4 tensorflow==1.11.0
24 | 
25 | RUN wget -nv https://dl.google.com/dl/cloudsdk/release/google-cloud-sdk.zip && \
26 |     unzip -qq google-cloud-sdk.zip -d tools && \
27 |     rm google-cloud-sdk.zip && \
28 |     tools/google-cloud-sdk/install.sh --usage-reporting=false \
29 |         --path-update=false --bash-completion=false \
30 |         --disable-installation-options && \
31 |     tools/google-cloud-sdk/bin/gcloud -q components update \
32 |         gcloud core gsutil && \
33 |     tools/google-cloud-sdk/bin/gcloud -q components install kubectl && \
34 |     tools/google-cloud-sdk/bin/gcloud config set component_manager/disable_update_check true && \
35 |     touch /tools/google-cloud-sdk/lib/third_party/google.py
36 | 
37 | RUN wget -nv https://github.com/ksonnet/ksonnet/releases/download/v0.11.0/ks_0.11.0_linux_amd64.tar.gz && \
38 |     tar -xvzf ks_0.11.0_linux_amd64.tar.gz && \
39 |     mkdir -p /tools/ks/bin && \
40 |     cp ./ks_0.11.0_linux_amd64/ks /tools/ks/bin && \
41 |     rm ks_0.11.0_linux_amd64.tar.gz && \
42 |     rm -r ks_0.11.0_linux_amd64
43 | 
44 | ENV PATH $PATH:/tools/google-cloud-sdk/bin:/tools/ks/bin
45 | 
46 | ADD build /ml
47 | 
48 | ENTRYPOINT ["python", "/ml/deploy-tf-serve.py"]
49 | 
50 | 


--------------------------------------------------------------------------------
/ml/kubeflow-pipelines/components/older/kubeflow/containers/tf-serving/build.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -e
 2 | # Copyright 2018 Google Inc. All Rights Reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | # http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | 
17 | if [ -z "$1" ]
18 |   then
19 |     PROJECT_ID=$(gcloud config config-helper --format "value(configuration.properties.core.project)")
20 | else
21 |   PROJECT_ID=$1
22 | fi
23 | 
24 | mkdir -p ./build
25 | rsync -arvp "../../tf-serving"/ ./build/
26 | 
27 | docker build -t ml-pipeline-kubeflow-tfserve-taxi .
28 | rm -rf ./build
29 | 
30 | docker tag ml-pipeline-kubeflow-tfserve-taxi gcr.io/${PROJECT_ID}/ml-pipeline-kubeflow-tfserve-taxi
31 | docker push gcr.io/${PROJECT_ID}/ml-pipeline-kubeflow-tfserve-taxi
32 | 


--------------------------------------------------------------------------------
/ml/kubeflow-pipelines/components/older/kubeflow/containers/trainer/Dockerfile:
--------------------------------------------------------------------------------
 1 | # Copyright 2018 Google Inc. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | FROM tensorflow/tensorflow:1.11.0
16 | 
17 | RUN apt-get update -y && \
18 |     apt-get install --no-install-recommends -y -q build-essential && \
19 |     pip install pyyaml==3.12 six==1.11.0 \
20 |         tensorflow-transform==0.11.0 \
21 |         tensorflow-model-analysis==0.9.2 && \
22 |     apt-get --purge autoremove -y build-essential
23 | 
24 | ADD build /ml
25 | WORKDIR /ml
26 | 


--------------------------------------------------------------------------------
/ml/kubeflow-pipelines/components/older/kubeflow/containers/trainer/build.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -e
 2 | # Copyright 2018 Google Inc. All rights reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #  http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | 
17 | if [ -z "$1" ]
18 |   then
19 |     PROJECT_ID=$(gcloud config config-helper --format "value(configuration.properties.core.project)")
20 | else
21 |   PROJECT_ID=$1
22 | fi
23 | 
24 | mkdir -p ./build
25 | rsync -arvp "../../taxi_model"/ ./build/
26 | 
27 | docker build -f Dockerfile -t ml-pipeline-kubeflow-trainer-taxi .
28 | rm -rf ./build
29 | 
30 | docker tag ml-pipeline-kubeflow-trainer-taxi gcr.io/${PROJECT_ID}/ml-pipeline-kubeflow-trainer-taxi
31 | docker push gcr.io/${PROJECT_ID}/ml-pipeline-kubeflow-trainer-taxi
32 | 


--------------------------------------------------------------------------------
/ml/kubeflow-pipelines/components/older/kubeflow/launcher/train.template.yaml:
--------------------------------------------------------------------------------
 1 | # Copyright 2018 Google LLC
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | apiVersion: kubeflow.org/v1beta1
16 | kind: TFJob
17 | metadata:
18 |   generateName: tfjob
19 |   namespace: default
20 | spec:
21 |   tfReplicaSpecs:
22 |     PS:
23 |       replicas: 1
24 |       restartPolicy: OnFailure
25 |       template:
26 |         spec:
27 |           containers:
28 |           - name: tensorflow
29 |             image: gcr.io/google-samples/ml-pipeline-kubeflow-trainer-taxi
30 |             command:
31 |               - python
32 |               - -m
33 |               - trainer.task
34 |             env:
35 |             - name: GOOGLE_APPLICATION_CREDENTIALS
36 |               value: "/etc/secrets/user-gcp-sa.json"
37 |             volumeMounts:
38 |             - name: sa
39 |               mountPath: "/etc/secrets"
40 |               readOnly: true
41 |           volumes:
42 |           - name: sa
43 |             secret:
44 |               secretName: user-gcp-sa
45 |     Worker:
46 |       replicas: 1
47 |       restartPolicy: OnFailure
48 |       template:
49 |         spec:
50 |           containers:
51 |           - name: tensorflow
52 |             image: gcr.io/google-samples/ml-pipeline-kubeflow-trainer-taxi
53 |             command:
54 |               - python
55 |               - -m
56 |               - trainer.task
57 |             env:
58 |             - name: GOOGLE_APPLICATION_CREDENTIALS
59 |               value: "/etc/secrets/user-gcp-sa.json"
60 |             volumeMounts:
61 |             - name: sa
62 |               mountPath: "/etc/secrets"
63 |               readOnly: true
64 |           volumes:
65 |           - name: sa
66 |             secret:
67 |               secretName: user-gcp-sa
68 |     Master:
69 |           replicas: 1
70 |           restartPolicy: OnFailure
71 |           template:
72 |             spec:
73 |               containers:
74 |               - name: tensorflow
75 |                 image: gcr.io/google-samples/ml-pipeline-kubeflow-trainer-taxi
76 |                 command:
77 |                   - python
78 |                   - -m
79 |                   - trainer.task
80 |                 env:
81 |                 - name: GOOGLE_APPLICATION_CREDENTIALS
82 |                   value: "/etc/secrets/user-gcp-sa.json"
83 |                 volumeMounts:
84 |                 - name: sa
85 |                   mountPath: "/etc/secrets"
86 |                   readOnly: true
87 |               volumes:
88 |               - name: sa
89 |                 secret:
90 |                   secretName: user-gcp-sa
91 | 
92 | 


--------------------------------------------------------------------------------
/ml/kubeflow-pipelines/components/older/kubeflow/taxi_model/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2018 Google LLC
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # https://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | 


--------------------------------------------------------------------------------
/ml/kubeflow-pipelines/components/older/kubeflow/taxi_model/setup.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2018 Google Inc. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     https://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | 
16 | from setuptools import setup, find_packages
17 | 
18 | 
19 | setup(
20 |   name='trainer',
21 |   version='1.0.0',
22 |   packages=find_packages(),
23 |   description='Classifier',
24 |   author='Google',
25 |   keywords=[
26 |   ],
27 |   license="Apache Software License",
28 |   long_description="""
29 |   """,
30 |   install_requires=[
31 |     'tensorflow==1.15.4',
32 |   ],
33 |   package_data={
34 |   },
35 |   data_files=[],
36 | )
37 | 


--------------------------------------------------------------------------------
/ml/kubeflow-pipelines/components/older/kubeflow/taxi_model/trainer/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2018 Google Inc. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     https://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/ml/kubeflow-pipelines/components/older/kubeflow/tf-serving-gh/tf-serve-template.yaml:
--------------------------------------------------------------------------------
 1 | ---
 2 | apiVersion: v1
 3 | kind: Service
 4 | metadata:
 5 |   annotations:
 6 |     getambassador.io/config: |-
 7 |       ---
 8 |       apiVersion: ambassador/v0
 9 |       kind:  Mapping
10 |       name: tfserving-predict-mapping-MODEL_NAME
11 |       prefix: tfserving/models/MODEL_NAME/
12 |       rewrite: /v1/models/MODEL_NAME:predict
13 |       method: POST
14 |       service: MODEL_NAME.kubeflow:8500
15 |   labels:
16 |     app: MODEL_NAME
17 |   name: MODEL_NAME
18 |   namespace: KUBEFLOW_NAMESPACE
19 | spec:
20 |   ports:
21 |   - name: grpc-tf-serving
22 |     port: 9000
23 |     targetPort: 9000
24 |   - name: tf-serving-builtin-http
25 |     port: 8500
26 |     targetPort: 8500
27 |   selector:
28 |     app: MODEL_NAME
29 |   type: ClusterIP
30 | ---
31 | apiVersion: extensions/v1beta1
32 | kind: Deployment
33 | metadata:
34 |   labels:
35 |     app: MODEL_NAME
36 |   name: MODEL_NAME
37 |   namespace: KUBEFLOW_NAMESPACE
38 | spec:
39 |   replicas: 1
40 |   template:
41 |     metadata:
42 |       labels:
43 |         app: MODEL_NAME
44 |         version: v1
45 |     spec:
46 |       containers:
47 |       - args:
48 |         - --port=9000
49 |         - --rest_api_port=8500
50 |         - --model_name=MODEL_NAME
51 |         - --model_base_path=MODEL_PATH
52 |         command:
53 |         - /usr/bin/tensorflow_model_server
54 |         image: tensorflow/serving
55 |         imagePullPolicy: IfNotPresent
56 |         livenessProbe:
57 |           initialDelaySeconds: 30
58 |           periodSeconds: 30
59 |           tcpSocket:
60 |             port: 9000
61 |         name: MODEL_NAME
62 |         ports:
63 |         - containerPort: 9000
64 |         - containerPort: 8500
65 |         resources:
66 |           limits:
67 |             cpu: "4"
68 |             memory: 4Gi
69 |           requests:
70 |             cpu: "1"
71 |             memory: 1Gi
72 | 


--------------------------------------------------------------------------------
/ml/kubeflow-pipelines/components/older/kubeflow/tf-serving/tf-serve-template.yaml:
--------------------------------------------------------------------------------
 1 | ---
 2 | apiVersion: v1
 3 | kind: Service
 4 | metadata:
 5 |   labels:
 6 |     app: MODEL_NAME
 7 |   name: MODEL_NAME
 8 |   namespace: KUBEFLOW_NAMESPACE
 9 | spec:
10 |   ports:
11 |   - name: grpc-tf-serving
12 |     port: 9000
13 |     targetPort: 9000
14 |   - name: tf-serving-builtin-http
15 |     port: 8500
16 |     targetPort: 8500
17 |   selector:
18 |     app: MODEL_NAME
19 |   # type: LoadBalancer
20 |   type: ClusterIP
21 | ---
22 | apiVersion: extensions/v1beta1
23 | kind: Deployment
24 | metadata:
25 |   labels:
26 |     app: MODEL_NAME
27 |   name: MODEL_NAME
28 |   namespace: KUBEFLOW_NAMESPACE
29 | spec:
30 |   replicas: 1
31 |   template:
32 |     metadata:
33 |       labels:
34 |         app: MODEL_NAME
35 |         version: v1
36 |     spec:
37 |       containers:
38 |       - args:
39 |         - --port=9000
40 |         - --rest_api_port=8500
41 |         - --model_name=MODEL_NAME
42 |         - --model_base_path=MODEL_PATH
43 |         command:
44 |         - /usr/bin/tensorflow_model_server
45 |         image: tensorflow/serving
46 |         imagePullPolicy: IfNotPresent
47 |         livenessProbe:
48 |           initialDelaySeconds: 30
49 |           periodSeconds: 30
50 |           tcpSocket:
51 |             port: 9000
52 |         name: MODEL_NAME
53 |         ports:
54 |         - containerPort: 9000
55 |         - containerPort: 8500
56 |         resources:
57 |           limits:
58 |             cpu: "4"
59 |             memory: 4Gi
60 |           requests:
61 |             cpu: "1"
62 |             memory: 1Gi
63 | 


--------------------------------------------------------------------------------
/ml/kubeflow-pipelines/components/older/t2t/containers/base/Dockerfile:
--------------------------------------------------------------------------------
 1 | # Copyright 2018 Google Inc. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | FROM tensorflow/tensorflow:1.12.0-gpu
16 | 
17 | RUN apt-get update -y
18 | 
19 | RUN apt-get install --no-install-recommends -y -q ca-certificates python-dev python-setuptools \
20 |                                                   wget unzip git
21 | 
22 | RUN easy_install pip
23 | 
24 | RUN pip install tensorflow-probability==0.5
25 | RUN pip install tensor2tensor==1.11.0
26 | RUN pip install tensorflow_hub==0.1.1
27 | RUN pip install pyyaml==3.12 six==1.11.0
28 | 
29 | RUN wget -nv https://dl.google.com/dl/cloudsdk/release/google-cloud-sdk.zip && \
30 |     unzip -qq google-cloud-sdk.zip -d /tools && \
31 |     rm google-cloud-sdk.zip && \
32 |     /tools/google-cloud-sdk/install.sh --usage-reporting=false \
33 |         --path-update=false --bash-completion=false \
34 |         --disable-installation-options && \
35 |     /tools/google-cloud-sdk/bin/gcloud -q components update \
36 |         gcloud core gsutil && \
37 |     /tools/google-cloud-sdk/bin/gcloud config set component_manager/disable_update_check true && \
38 |     touch /tools/google-cloud-sdk/lib/third_party/google.py
39 | 
40 | ADD build /ml
41 | 
42 | ENV PATH $PATH:/tools/node/bin:/tools/google-cloud-sdk/bin
43 | 


--------------------------------------------------------------------------------
/ml/kubeflow-pipelines/components/older/t2t/containers/base/build.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -e
 2 | # Copyright 2018 Google Inc. All Rights Reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | # http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | 
17 | mkdir -p ./build
18 | rsync -arvp "../../t2t-train"/ ./build/
19 | 
20 | docker build -t ml-pipeline-t2t-base .
21 | rm -rf ./build
22 | 
23 | 


--------------------------------------------------------------------------------
/ml/kubeflow-pipelines/components/older/t2t/containers/t2t_app/Dockerfile:
--------------------------------------------------------------------------------
 1 | # Copyright 2018 Google Inc. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | FROM tensorflow/tensorflow:1.12.0
16 | 
17 | RUN apt-get update -y
18 | 
19 | RUN apt-get install --no-install-recommends -y -q ca-certificates python-dev python-setuptools \
20 |                                                   wget unzip git
21 | 
22 | RUN easy_install pip
23 | 
24 | RUN pip install tensorflow-probability==0.5
25 | RUN pip install tensor2tensor==1.11.0
26 | RUN pip install tensorflow-serving-api
27 | RUN pip install gunicorn
28 | RUN pip install pyyaml==3.12 six==1.11.0
29 | RUN pip install pandas
30 | 
31 | RUN wget -nv https://dl.google.com/dl/cloudsdk/release/google-cloud-sdk.zip && \
32 |     unzip -qq google-cloud-sdk.zip -d /tools && \
33 |     rm google-cloud-sdk.zip && \
34 |     /tools/google-cloud-sdk/install.sh --usage-reporting=false \
35 |         --path-update=false --bash-completion=false \
36 |         --disable-installation-options && \
37 |     /tools/google-cloud-sdk/bin/gcloud -q components update \
38 |         gcloud core gsutil && \
39 |     /tools/google-cloud-sdk/bin/gcloud config set component_manager/disable_update_check true && \
40 |     touch /tools/google-cloud-sdk/lib/third_party/google.py
41 | 
42 | ADD build /ml
43 | 
44 | ENV PATH $PATH:/tools/node/bin:/tools/google-cloud-sdk/bin
45 | 
46 | WORKDIR /ml/app
47 | 
48 | CMD gunicorn -w 4 -b :8080 main:app
49 | 
50 | 


--------------------------------------------------------------------------------
/ml/kubeflow-pipelines/components/older/t2t/containers/t2t_app/build.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -e
 2 | # Copyright 2018 Google Inc. All Rights Reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | # http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | 
17 | if [ -z "$1" ]
18 |   then
19 |     PROJECT_ID=$(gcloud config config-helper --format "value(configuration.properties.core.project)")
20 | else
21 |   PROJECT_ID=$1
22 | fi
23 | 
24 | mkdir -p ./build
25 | rsync -arvp "../../t2t-app"/ ./build/
26 | 
27 | 
28 | docker build -t ml-pipeline-t2tapp .
29 | docker tag ml-pipeline-t2tapp gcr.io/${PROJECT_ID}/ml-pipeline-t2tapp
30 | docker push gcr.io/${PROJECT_ID}/ml-pipeline-t2tapp
31 | 
32 | rm -rf ./build


--------------------------------------------------------------------------------
/ml/kubeflow-pipelines/components/older/t2t/containers/t2t_proc/Dockerfile:
--------------------------------------------------------------------------------
 1 | # Copyright 2018 Google Inc. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | FROM tensorflow/tensorflow:1.12.0
16 | 
17 | RUN apt-get update -y
18 | 
19 | RUN apt-get install --no-install-recommends -y -q ca-certificates python-dev python-setuptools \
20 |                                                   wget unzip git
21 | 
22 | RUN easy_install pip
23 | 
24 | RUN pip install tensorflow-probability==0.5
25 | RUN pip install tensor2tensor==1.11.0
26 | RUN pip install pyyaml==3.12 six==1.11.0
27 | RUN pip install google-cloud-storage
28 | 
29 | 
30 | RUN wget -nv https://dl.google.com/dl/cloudsdk/release/google-cloud-sdk.zip && \
31 |     unzip -qq google-cloud-sdk.zip -d /tools && \
32 |     rm google-cloud-sdk.zip && \
33 |     /tools/google-cloud-sdk/install.sh --usage-reporting=false \
34 |         --path-update=false --bash-completion=false \
35 |         --disable-installation-options && \
36 |     /tools/google-cloud-sdk/bin/gcloud -q components update \
37 |         gcloud core gsutil && \
38 |     /tools/google-cloud-sdk/bin/gcloud config set component_manager/disable_update_check true && \
39 |     touch /tools/google-cloud-sdk/lib/third_party/google.py
40 | 
41 | ADD build /ml
42 | 
43 | ENV PATH $PATH:/tools/node/bin:/tools/google-cloud-sdk/bin
44 | 
45 | 
46 | WORKDIR /ml
47 | 
48 | RUN mkdir -p /ml/gh_data
49 | RUN mkdir -p /ml/gh_data/tmp
50 | 
51 | ENTRYPOINT ["python", "/ml/datagen.py"]
52 | 


--------------------------------------------------------------------------------
/ml/kubeflow-pipelines/components/older/t2t/containers/t2t_proc/build.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -e
 2 | # Copyright 2018 Google Inc. All Rights Reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | # http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | 
17 | if [ -z "$1" ]
18 |   then
19 |     PROJECT_ID=$(gcloud config config-helper --format "value(configuration.properties.core.project)")
20 | else
21 |   PROJECT_ID=$1
22 | fi
23 | 
24 | mkdir -p ./build
25 | rsync -arvp "../../t2t-proc"/ ./build/
26 | 
27 | docker build -t ml-pipeline-t2tproc .
28 | docker tag ml-pipeline-t2tproc gcr.io/${PROJECT_ID}/ml-pipeline-t2tproc
29 | docker push gcr.io/${PROJECT_ID}/ml-pipeline-t2tproc
30 | 
31 | rm -rf ./build


--------------------------------------------------------------------------------
/ml/kubeflow-pipelines/components/older/t2t/containers/t2t_train/Dockerfile:
--------------------------------------------------------------------------------
 1 | # Copyright 2018 Google Inc. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | FROM ml-pipeline-t2t-base
16 | 
17 | ENTRYPOINT ["python", "/ml/train_model.py"]
18 | 


--------------------------------------------------------------------------------
/ml/kubeflow-pipelines/components/older/t2t/containers/t2t_train/build.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -e
 2 | # Copyright 2018 Google Inc. All Rights Reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | # http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | 
17 | if [ -z "$1" ]
18 |   then
19 |     PROJECT_ID=$(gcloud config config-helper --format "value(configuration.properties.core.project)")
20 | else
21 |   PROJECT_ID=$1
22 | fi
23 | 
24 | # build base image
25 | pushd ../base
26 | ./build.sh
27 | popd
28 | 
29 | docker build -t ml-pipeline-t2ttrain .
30 | docker tag ml-pipeline-t2ttrain gcr.io/${PROJECT_ID}/ml-pipeline-t2ttrain
31 | docker push gcr.io/${PROJECT_ID}/ml-pipeline-t2ttrain
32 | 
33 | 


--------------------------------------------------------------------------------
/ml/kubeflow-pipelines/components/older/t2t/containers/webapp-launcher/Dockerfile:
--------------------------------------------------------------------------------
 1 | # Copyright 2018 Google Inc. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | FROM ubuntu:16.04
16 | 
17 | RUN apt-get update -y
18 | 
19 | RUN apt-get install --no-install-recommends -y -q ca-certificates python-dev python-setuptools wget unzip
20 | 
21 | RUN easy_install pip
22 | 
23 | RUN pip install pyyaml==3.12 six==1.11.0 requests==2.18.4 tensorflow==1.12.0
24 | 
25 | RUN wget -nv https://dl.google.com/dl/cloudsdk/release/google-cloud-sdk.zip && \
26 |     unzip -qq google-cloud-sdk.zip -d tools && \
27 |     rm google-cloud-sdk.zip && \
28 |     tools/google-cloud-sdk/install.sh --usage-reporting=false \
29 |         --path-update=false --bash-completion=false \
30 |         --disable-installation-options && \
31 |     tools/google-cloud-sdk/bin/gcloud -q components update \
32 |         gcloud core gsutil && \
33 |     tools/google-cloud-sdk/bin/gcloud -q components install kubectl && \
34 |     tools/google-cloud-sdk/bin/gcloud config set component_manager/disable_update_check true && \
35 |     touch /tools/google-cloud-sdk/lib/third_party/google.py
36 | 
37 | RUN wget -nv https://github.com/ksonnet/ksonnet/releases/download/v0.11.0/ks_0.11.0_linux_amd64.tar.gz && \
38 |     tar -xvzf ks_0.11.0_linux_amd64.tar.gz && \
39 |     mkdir -p /tools/ks/bin && \
40 |     cp ./ks_0.11.0_linux_amd64/ks /tools/ks/bin && \
41 |     rm ks_0.11.0_linux_amd64.tar.gz && \
42 |     rm -r ks_0.11.0_linux_amd64
43 | 
44 | ENV PATH $PATH:/tools/google-cloud-sdk/bin:/tools/ks/bin
45 | 
46 | ADD build /ml
47 | 
48 | ENTRYPOINT ["python", "/ml/deploy-webapp.py"]
49 | 
50 | 


--------------------------------------------------------------------------------
/ml/kubeflow-pipelines/components/older/t2t/containers/webapp-launcher/build.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -e
 2 | # Copyright 2018 Google Inc. All Rights Reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | # http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | 
17 | if [ -z "$1" ]
18 |   then
19 |     PROJECT_ID=$(gcloud config config-helper --format "value(configuration.properties.core.project)")
20 | else
21 |   PROJECT_ID=$1
22 | fi
23 | 
24 | mkdir -p ./build
25 | rsync -arvp "../../webapp-launcher"/ ./build/
26 | 
27 | docker build -t ml-pipeline-webapp-launcher .
28 | rm -rf ./build
29 | 
30 | docker tag ml-pipeline-webapp-launcher gcr.io/${PROJECT_ID}/ml-pipeline-webapp-launcher
31 | docker push gcr.io/${PROJECT_ID}/ml-pipeline-webapp-launcher
32 | 


--------------------------------------------------------------------------------
/ml/kubeflow-pipelines/components/older/t2t/t2t-app/app/ghsumm/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/amygdala/code-snippets/9fa00e5a4ec4ae3eb5a2fa212d1ecefb4009eba7/ml/kubeflow-pipelines/components/older/t2t/t2t-app/app/ghsumm/__init__.py


--------------------------------------------------------------------------------
/ml/kubeflow-pipelines/components/older/t2t/t2t-app/app/ghsumm/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import find_packages
 2 | from setuptools import setup
 3 | 
 4 | REQUIRED_PACKAGES = [
 5 |   'tensor2tensor'
 6 | ]
 7 | 
 8 | setup(
 9 |     name='ghsumm',
10 |     version='0.1',
11 |     author='Google',
12 |     author_email='training-feedback@cloud.google.com',
13 |     install_requires=REQUIRED_PACKAGES,
14 |     packages=find_packages(),
15 |     include_package_data=True,
16 |     description='Github Problem',
17 |     requires=[]
18 | )
19 | 


--------------------------------------------------------------------------------
/ml/kubeflow-pipelines/components/older/t2t/t2t-app/app/ghsumm/trainer/__init__.py:
--------------------------------------------------------------------------------
1 | from . import problem
2 | 


--------------------------------------------------------------------------------
/ml/kubeflow-pipelines/components/older/t2t/t2t-app/app/ghsumm/trainer/problem.py:
--------------------------------------------------------------------------------
 1 | import csv
 2 | 
 3 | from tensor2tensor.utils import registry
 4 | from tensor2tensor.data_generators import problem
 5 | from tensor2tensor.data_generators import text_problems
 6 | 
 7 | 
 8 | @registry.register_problem
 9 | class GhProblem(text_problems.Text2TextProblem):
10 |   """... predict GH issue title from body..."""
11 | 
12 |   @property
13 |   def approx_vocab_size(self):
14 |     return 2**13  # ~8k
15 | 
16 |   @property
17 |   def is_generate_per_split(self):
18 |     # generate_data will NOT shard the data into TRAIN and EVAL for us.
19 |     return False
20 | 
21 |   @property
22 |   def max_subtoken_length(self):
23 |     return 4
24 | 
25 |   @property
26 |   def dataset_splits(self):
27 |     """Splits of data to produce and number of output shards for each."""
28 |     # 10% evaluation data
29 |     return [{
30 |         "split": problem.DatasetSplit.TRAIN,
31 |         "shards": 90,
32 |     }, {
33 |         "split": problem.DatasetSplit.EVAL,
34 |         "shards": 10,
35 |     }]
36 | 
37 |   def generate_samples(self, data_dir, tmp_dir, dataset_split):  #pylint: disable=unused-argument
38 |     with open('gh_data/github_issues.csv') as csvfile:
39 |       ireader = csv.reader((line.replace('\0', '') for line in csvfile), delimiter=','
40 |        # quotechar='|'
41 |        )
42 |       NUM_ROWS = 1500000
43 |       i = 0
44 |       for row in ireader:
45 |         if i >= NUM_ROWS:
46 |           break
47 |         yield {
48 |             "inputs": row[2],  # body
49 |             "targets": row[1]  # issue title
50 |         }
51 |         i += 1
52 | 


--------------------------------------------------------------------------------
/ml/kubeflow-pipelines/components/older/t2t/t2t-proc/ghsumm/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/amygdala/code-snippets/9fa00e5a4ec4ae3eb5a2fa212d1ecefb4009eba7/ml/kubeflow-pipelines/components/older/t2t/t2t-proc/ghsumm/__init__.py


--------------------------------------------------------------------------------
/ml/kubeflow-pipelines/components/older/t2t/t2t-proc/ghsumm/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import find_packages
 2 | from setuptools import setup
 3 | 
 4 | REQUIRED_PACKAGES = [
 5 |   'tensor2tensor'
 6 | ]
 7 | 
 8 | setup(
 9 |     name='ghsumm',
10 |     version='0.1',
11 |     author='Google',
12 |     author_email='training-feedback@cloud.google.com',
13 |     install_requires=REQUIRED_PACKAGES,
14 |     packages=find_packages(),
15 |     include_package_data=True,
16 |     description='Github Problem',
17 |     requires=[]
18 | )
19 | 


--------------------------------------------------------------------------------
/ml/kubeflow-pipelines/components/older/t2t/t2t-proc/ghsumm/trainer/__init__.py:
--------------------------------------------------------------------------------
1 | from . import problem
2 | 


--------------------------------------------------------------------------------
/ml/kubeflow-pipelines/components/older/t2t/t2t-proc/ghsumm/trainer/problem.py:
--------------------------------------------------------------------------------
 1 | import csv
 2 | 
 3 | from tensor2tensor.utils import registry
 4 | from tensor2tensor.data_generators import problem
 5 | from tensor2tensor.data_generators import text_problems
 6 | 
 7 | 
 8 | @registry.register_problem
 9 | class GhProblem(text_problems.Text2TextProblem):
10 |   """... predict GH issue title from body..."""
11 | 
12 |   @property
13 |   def approx_vocab_size(self):
14 |     return 2**13  # ~8k
15 | 
16 |   @property
17 |   def is_generate_per_split(self):
18 |     # generate_data will NOT shard the data into TRAIN and EVAL for us.
19 |     return False
20 | 
21 |   @property
22 |   def max_subtoken_length(self):
23 |     return 4
24 | 
25 |   @property
26 |   def dataset_splits(self):
27 |     """Splits of data to produce and number of output shards for each."""
28 |     # 10% evaluation data
29 |     return [{
30 |         "split": problem.DatasetSplit.TRAIN,
31 |         "shards": 90,
32 |     }, {
33 |         "split": problem.DatasetSplit.EVAL,
34 |         "shards": 10,
35 |     }]
36 | 
37 |   def generate_samples(self, data_dir, tmp_dir, dataset_split):  #pylint: disable=unused-argument
38 |     with open('/ml/gh_data/github_issues.csv') as csvfile:
39 |       ireader = csv.reader((line.replace('\0', '') for line in csvfile), delimiter=','
40 |        # quotechar='|'
41 |        )
42 |       NUM_ROWS = 50000
43 |       i = 0
44 |       for row in ireader:
45 |         if i >= NUM_ROWS:
46 |           break
47 |         yield {
48 |             "inputs": row[2],  # body
49 |             "targets": row[1]  # issue title
50 |         }
51 |         i += 1
52 | 


--------------------------------------------------------------------------------
/ml/kubeflow-pipelines/components/older/t2t/t2t-train/ghsumm/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/amygdala/code-snippets/9fa00e5a4ec4ae3eb5a2fa212d1ecefb4009eba7/ml/kubeflow-pipelines/components/older/t2t/t2t-train/ghsumm/__init__.py


--------------------------------------------------------------------------------
/ml/kubeflow-pipelines/components/older/t2t/t2t-train/ghsumm/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import find_packages
 2 | from setuptools import setup
 3 | 
 4 | REQUIRED_PACKAGES = [
 5 |   'tensor2tensor'
 6 | ]
 7 | 
 8 | setup(
 9 |     name='ghsumm',
10 |     version='0.1',
11 |     author='Google',
12 |     author_email='training-feedback@cloud.google.com',
13 |     install_requires=REQUIRED_PACKAGES,
14 |     packages=find_packages(),
15 |     include_package_data=True,
16 |     description='Github Problem',
17 |     requires=[]
18 | )
19 | 


--------------------------------------------------------------------------------
/ml/kubeflow-pipelines/components/older/t2t/t2t-train/ghsumm/trainer/__init__.py:
--------------------------------------------------------------------------------
1 | from . import problem
2 | 


--------------------------------------------------------------------------------
/ml/kubeflow-pipelines/components/older/t2t/t2t-train/ghsumm/trainer/problem.py:
--------------------------------------------------------------------------------
 1 | import csv
 2 | 
 3 | from tensor2tensor.utils import registry
 4 | from tensor2tensor.data_generators import problem
 5 | from tensor2tensor.data_generators import text_problems
 6 | 
 7 | 
 8 | @registry.register_problem
 9 | class GhProblem(text_problems.Text2TextProblem):
10 |   """... predict GH issue title from body..."""
11 | 
12 |   @property
13 |   def approx_vocab_size(self):
14 |     return 2**13  # ~8k
15 | 
16 |   @property
17 |   def is_generate_per_split(self):
18 |     # generate_data will NOT shard the data into TRAIN and EVAL for us.
19 |     return False
20 | 
21 |   @property
22 |   def max_subtoken_length(self):
23 |     return 4
24 | 
25 |   @property
26 |   def dataset_splits(self):
27 |     """Splits of data to produce and number of output shards for each."""
28 |     # 10% evaluation data
29 |     return [{
30 |         "split": problem.DatasetSplit.TRAIN,
31 |         "shards": 90,
32 |     }, {
33 |         "split": problem.DatasetSplit.EVAL,
34 |         "shards": 10,
35 |     }]
36 | 
37 |   def generate_samples(self, data_dir, tmp_dir, dataset_split):  #pylint: disable=unused-argument
38 |     with open('gh_data/github_issues.csv') as csvfile:
39 |       ireader = csv.reader((line.replace('\0', '') for line in csvfile), delimiter=','
40 |        # quotechar='|'
41 |        )
42 |       NUM_ROWS = 6000000
43 |       i = 0
44 |       for row in ireader:
45 |         if i >= NUM_ROWS:
46 |           break
47 |         yield {
48 |             "inputs": row[2],  # body
49 |             "targets": row[1]  # issue title
50 |         }
51 |         i += 1
52 | 


--------------------------------------------------------------------------------
/ml/kubeflow-pipelines/components/older/t2t/t2t-train/train_model.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2018 Google Inc. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     https://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | """..."""
 16 | 
 17 | import argparse
 18 | import json
 19 | import subprocess
 20 | 
 21 | 
 22 | def main():
 23 |   parser = argparse.ArgumentParser(description='ML Trainer')
 24 |   parser.add_argument(
 25 |       '--model-dir',
 26 |       help='...',
 27 |       required=True)
 28 |   parser.add_argument(
 29 |       '--data-dir',
 30 |       help='...',
 31 |       required=True)
 32 |   parser.add_argument(
 33 |       '--checkpoint-dir',
 34 |       help='...',
 35 |       required=True)
 36 |   parser.add_argument(
 37 |       '--train-steps',
 38 |       help='...',
 39 |       required=True)
 40 |   parser.add_argument(
 41 |       '--deploy-webapp',
 42 |       help='...',
 43 |       required=True)
 44 | 
 45 |   args = parser.parse_args()
 46 | 
 47 |   # Create metadata.json file for visualization.
 48 |   metadata = {
 49 |     'outputs' : [{
 50 |       'type': 'tensorboard',
 51 |       'source': args.model_dir,
 52 |     }]
 53 |   }
 54 |   with open('/mlpipeline-ui-metadata.json', 'w') as f:
 55 |     json.dump(metadata, f)
 56 | 
 57 |   problem = 'gh_problem'
 58 |   data_dir = args.data_dir
 59 |   print("data dir: %s" % data_dir)
 60 |   # copy the model starting point
 61 |   model_startpoint = args.checkpoint_dir
 62 |   print("model_startpoint: %s" % model_startpoint)
 63 |   model_dir = args.model_dir
 64 |   print("model_dir: %s" % model_dir)
 65 |   model_copy_command = ['gsutil', '-m', 'cp', '-r', model_startpoint, model_dir
 66 |       ]
 67 |   print(model_copy_command)
 68 |   result1 = subprocess.call(model_copy_command)
 69 |   print(result1)
 70 | 
 71 |   print('training steps (total): %s' % args.train_steps)
 72 | 
 73 |   # Then run the training for N steps from there.
 74 |   model_train_command = ['t2t-trainer', '--data_dir', data_dir,
 75 |      '--t2t_usr_dir', '/ml/ghsumm/trainer',
 76 |      '--problem', problem,
 77 |      '--model', 'transformer', '--hparams_set', 'transformer_prepend', '--output_dir', model_dir,
 78 |      '--job-dir', model_dir,
 79 |      '--train_steps', args.train_steps, '--eval_throttle_seconds', '240',
 80 |      ]
 81 |   print(model_train_command)
 82 |   result2 = subprocess.call(model_train_command)
 83 |   print(result2)
 84 | 
 85 |   # then export the model...
 86 | 
 87 |   model_export_command = ['t2t-exporter', '--model', 'transformer',
 88 |       '--hparams_set', 'transformer_prepend',
 89 |       '--problem', problem,
 90 |       '--t2t_usr_dir', '/ml/ghsumm/trainer', '--data_dir', data_dir, '--output_dir', model_dir]
 91 |   print(model_export_command)
 92 |   result3 = subprocess.call(model_export_command)
 93 |   print(result3)
 94 | 
 95 |   print("deploy-webapp arg: %s" % args.deploy_webapp)
 96 |   with open('/tmp/output', 'w') as f:
 97 |     f.write(args.deploy_webapp)
 98 | 
 99 | if __name__ == "__main__":
100 |   main()
101 | 


--------------------------------------------------------------------------------
/ml/kubeflow-pipelines/components/older/t2t/webapp-launcher/deploy-webapp.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2018 Google Inc. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     https://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | 
16 | import argparse
17 | import os
18 | import logging
19 | import subprocess
20 | import requests
21 | 
22 | 
23 | def main():
24 |   parser = argparse.ArgumentParser(description='Serving webapp')
25 |   parser.add_argument(
26 |       '--model_name',
27 |       help='...',
28 |       required=True)
29 |   parser.add_argument(
30 |       '--github_token',
31 |       help='...',
32 |       required=True)
33 | 
34 |   parser.add_argument('--cluster', type=str,
35 |                       help='GKE cluster set up for kubeflow. If set, zone must be provided. ' +
36 |                            'If not set, assuming this runs in a GKE container and current ' +
37 |                            'cluster is used.')
38 |   parser.add_argument('--zone', type=str, help='zone of the kubeflow cluster.')
39 |   args = parser.parse_args()
40 | 
41 |   KUBEFLOW_NAMESPACE = 'kubeflow'
42 | 
43 |   print("using model name: %s and namespace: %s" % (args.model_name, KUBEFLOW_NAMESPACE))
44 | 
45 |   logging.getLogger().setLevel(logging.INFO)
46 |   args_dict = vars(args)
47 | 
48 |   if args.cluster and args.zone:
49 |     cluster = args_dict.pop('cluster')  #pylint: disable=unused-variable
50 |     zone = args_dict.pop('zone')  #pylint: disable=unused-variable
51 |   else:
52 |     # Get cluster name and zone from metadata
53 |     metadata_server = "http://metadata/computeMetadata/v1/instance/"
54 |     metadata_flavor = {'Metadata-Flavor' : 'Google'}
55 |     cluster = requests.get(metadata_server + "attributes/cluster-name",
56 |                            headers=metadata_flavor).text
57 |     zone = requests.get(metadata_server + "zone",
58 |                         headers=metadata_flavor).text.split('/')[-1]
59 | 
60 |   # logging.info('Getting credentials for GKE cluster %s.' % cluster)
61 |   # subprocess.call(['gcloud', 'container', 'clusters', 'get-credentials', cluster,
62 |   #                  '--zone', zone])
63 | 
64 |   logging.info('Generating training template.')
65 | 
66 |   template_file = os.path.join(os.path.dirname(os.path.realpath(__file__)), 't2tapp-template.yaml')
67 |   target_file = os.path.join(os.path.dirname(os.path.realpath(__file__)), 't2tapp.yaml')
68 | 
69 |   with open(template_file, 'r') as f:
70 |     with open(target_file, "w") as target:
71 |       data = f.read()
72 |       changed = data.replace('MODEL_NAME', args.model_name)
73 |       changed1 = changed.replace('KUBEFLOW_NAMESPACE', KUBEFLOW_NAMESPACE).replace(
74 |         'GITHUB_TOKEN', args.github_token).replace(
75 |         'DATA_DIR', 'gs://aju-dev-demos-codelabs/kubecon/t2t_data_gh_all/')
76 |       target.write(changed1)
77 | 
78 | 
79 |   logging.info('deploying web app.')
80 |   subprocess.call(['kubectl', 'create', '-f', '/ml/t2tapp.yaml'])
81 | 
82 | 
83 | if __name__ == "__main__":
84 |   main()
85 | 


--------------------------------------------------------------------------------
/ml/kubeflow-pipelines/components/older/t2t/webapp-launcher/t2tapp-template.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: v1
 2 | kind: Service
 3 | metadata:
 4 |   annotations:
 5 |     getambassador.io/config: |-
 6 |       ---
 7 |       apiVersion: ambassador/v0
 8 |       kind:  Mapping
 9 |       name: webapp-MODEL_NAME
10 |       prefix: /webapp/
11 |       rewrite: /
12 |       timeout_ms: 1200000
13 |       service: MODEL_NAME-webappsvc.KUBEFLOW_NAMESPACE:80
14 |   name: MODEL_NAME-webappsvc
15 |   labels:
16 |     app: ghsumm
17 |     role: frontend
18 | spec:
19 |   type: ClusterIP
20 |   ports:
21 |   - port: 80
22 |     targetPort: "http-server"
23 |   selector:
24 |     app: ghsumm
25 |     role: frontend
26 | 
27 | ---
28 | 
29 | apiVersion: extensions/v1beta1
30 | kind: Deployment
31 | metadata:
32 |   name: MODEL_NAME-webapp
33 | spec:
34 |   replicas: 1
35 |   template:
36 |     metadata:
37 |       labels:
38 |         app: ghsumm
39 |         role: frontend
40 |     spec:
41 |       containers:
42 |       - name: MODEL_NAME-webapp
43 |         image: gcr.io/google-samples/ml-pipeline-t2tapp
44 |         # resources:
45 |         #   limits:
46 |         #     nvidia.com/gpu: 1
47 |         imagePullPolicy: Always
48 |         env:
49 |         - name: TFSERVING_HOST
50 |           value: MODEL_NAME.KUBEFLOW_NAMESPACE
51 |         - name: TF_SERVABLE_NAME
52 |           value: MODEL_NAME
53 |         - name: GH_TOKEN
54 |           value: GITHUB_TOKEN
55 |         - name: DATADIR
56 |           value: DATA_DIR
57 |         ports:
58 |         - name: http-server
59 |           containerPort: 8080
60 | 


--------------------------------------------------------------------------------
/ml/kubeflow-pipelines/keras_tuner/components/eval_metrics_component.yaml:
--------------------------------------------------------------------------------
 1 | name: Eval metrics
 2 | inputs:
 3 | - {name: metrics, type: String}
 4 | - {name: thresholds, type: String}
 5 | outputs:
 6 | - {name: deploy, type: String}
 7 | implementation:
 8 |   container:
 9 |     image: gcr.io/deeplearning-platform-release/tf2-cpu.2-3:latest
10 |     command:
11 |     - python3
12 |     - -u
13 |     - -c
14 |     - |
15 |       def eval_metrics(
16 |         metrics,
17 |         thresholds
18 |       ):
19 | 
20 |         import json
21 |         import logging
22 | 
23 |         def regression_threshold_check(metrics_info):
24 |           # ...
25 |           for k, v in thresholds_dict.items():
26 |             logging.info('k {}, v {}'.format(k, v))
27 |             if k in ['root_mean_squared_error', 'mae']:
28 |               if metrics_info[k][-1] > v:
29 |                 logging.info('{} > {}; returning False'.format(metrics_info[k][0], v))
30 |                 return ('False', )
31 |           return ('deploy', )
32 | 
33 |         logging.getLogger().setLevel(logging.INFO)  # TODO: make level configurable
34 | 
35 |         thresholds_dict = json.loads(thresholds)
36 |         logging.info('thresholds dict: {}'.format(thresholds_dict))
37 |         logging.info('metrics: %s', metrics)
38 |         metrics_dict = json.loads(metrics)
39 | 
40 |         logging.info("got metrics info: %s", metrics_dict)
41 |         res = regression_threshold_check(metrics_dict)
42 |         logging.info('deploy decision: %s', res)
43 |         return res
44 | 
45 |       def _serialize_str(str_value: str) -> str:
46 |           if not isinstance(str_value, str):
47 |               raise TypeError('Value "{}" has type "{}" instead of str.'.format(str(str_value), str(type(str_value))))
48 |           return str_value
49 | 
50 |       import argparse
51 |       _parser = argparse.ArgumentParser(prog='Eval metrics', description='')
52 |       _parser.add_argument("--metrics", dest="metrics", type=str, required=True, default=argparse.SUPPRESS)
53 |       _parser.add_argument("--thresholds", dest="thresholds", type=str, required=True, default=argparse.SUPPRESS)
54 |       _parser.add_argument("----output-paths", dest="_output_paths", type=str, nargs=1)
55 |       _parsed_args = vars(_parser.parse_args())
56 |       _output_files = _parsed_args.pop("_output_paths", [])
57 | 
58 |       _outputs = eval_metrics(**_parsed_args)
59 | 
60 |       _output_serializers = [
61 |           _serialize_str,
62 | 
63 |       ]
64 | 
65 |       import os
66 |       for idx, output_file in enumerate(_output_files):
67 |           try:
68 |               os.makedirs(os.path.dirname(output_file))
69 |           except OSError:
70 |               pass
71 |           with open(output_file, 'w') as f:
72 |               f.write(_output_serializers[idx](_outputs[idx]))
73 |     args:
74 |     - --metrics
75 |     - {inputValue: metrics}
76 |     - --thresholds
77 |     - {inputValue: thresholds}
78 |     - '----output-paths'
79 |     - {outputPath: deploy}
80 | 


--------------------------------------------------------------------------------
/ml/kubeflow-pipelines/keras_tuner/components/kubeflow-resources/bikesw_training/bwmodel/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/amygdala/code-snippets/9fa00e5a4ec4ae3eb5a2fa212d1ecefb4009eba7/ml/kubeflow-pipelines/keras_tuner/components/kubeflow-resources/bikesw_training/bwmodel/__init__.py


--------------------------------------------------------------------------------
/ml/kubeflow-pipelines/keras_tuner/components/kubeflow-resources/bikesw_training/eval_metrics.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2020 Google LLC
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from typing import NamedTuple
16 | # from kfp.components import InputPath, OutputPath
17 | 
18 | 
19 | # An example of how the model eval info could be used to make decisions about whether or not
20 | # to deploy the model.
21 | def eval_metrics(
22 |   metrics: str,
23 |   thresholds: str
24 | ) -> NamedTuple('Outputs', [('deploy', str)]):
25 | 
26 |   import json
27 |   import logging
28 | 
29 |   def regression_threshold_check(metrics_info):
30 |     # ...
31 |     for k, v in thresholds_dict.items():
32 |       logging.info('k {}, v {}'.format(k, v))
33 |       if k in ['root_mean_squared_error', 'mae']:
34 |         if metrics_info[k][-1] > v:
35 |           logging.info('{} > {}; returning False'.format(metrics_info[k][0], v))
36 |           return ('False', )
37 |     return ('deploy', )
38 | 
39 |   logging.getLogger().setLevel(logging.INFO)  # TODO: make level configurable
40 | 
41 |   thresholds_dict = json.loads(thresholds)
42 |   logging.info('thresholds dict: {}'.format(thresholds_dict))
43 |   logging.info('metrics: %s', metrics)
44 |   metrics_dict = json.loads(metrics)
45 | 
46 |   logging.info("got metrics info: %s", metrics_dict)
47 |   res = regression_threshold_check(metrics_dict)
48 |   logging.info('deploy decision: %s', res)
49 |   return res
50 | 
51 | 
52 | if __name__ == '__main__':
53 |   import kfp
54 |   kfp.components.func_to_container_op(eval_metrics,
55 |       output_component_file='../../eval_metrics_component.yaml', base_image='gcr.io/deeplearning-platform-release/tf2-cpu.2-3:latest')
56 | 


--------------------------------------------------------------------------------
/ml/kubeflow-pipelines/keras_tuner/components/kubeflow-resources/bikesw_training/kchief_deployment_templ.yaml:
--------------------------------------------------------------------------------
 1 | ---
 2 |   apiVersion: v1
 3 |   kind: Service
 4 |   metadata:
 5 |     labels:
 6 |       app: KTUNER_CHIEF
 7 |       apptype: ktuner-chief
 8 |     name: KTUNER_CHIEF
 9 |     namespace: NAMESPACE
10 |   spec:
11 |     ports:
12 |     - name: grpc
13 |       port: 9000
14 |       targetPort: 9000
15 |     selector:
16 |       app: KTUNER_CHIEF
17 |     type: ClusterIP
18 | ---
19 | apiVersion: batch/v1
20 | kind: Job
21 | metadata:
22 |   labels:
23 |     app: KTUNER_CHIEF
24 |     apptype: ktuner-chief
25 |   name: KTUNER_CHIEF-dep
26 |   namespace: NAMESPACE
27 | spec:
28 |   # replicas: 1
29 |   template:
30 |     metadata:
31 |       labels:
32 |         app: KTUNER_CHIEF
33 |         apptype: ktuner-chief
34 |         version: v1
35 |     spec:
36 |       containers:
37 |       - args:
38 |         - --epochs=EPOCHS
39 |         - --tuner-dir=TUNER_DIR
40 |         - --tuner-proj=TUNER_PROJ
41 |         - --tuner-num=TUNER_NUM
42 |         - --max-trials=MAX_TRIALS
43 |         - --executions-per-trial=EXECS_PER_TRIAL
44 |         - --num-best-hps=NUM_BEST_HPS
45 |         - --respath=RES_PATH
46 |         - --bucket-name=BUCKET_NAME
47 |         image: gcr.io/google-samples/ml-pipeline-bikes-tuner
48 |         env:
49 |         - name: KERASTUNER_TUNER_ID
50 |           value: chief
51 |         - name: KERASTUNER_ORACLE_IP
52 |           valueFrom:
53 |             fieldRef:
54 |               fieldPath: status.podIP
55 |         - name: KERASTUNER_ORACLE_PORT
56 |           value: "9000"
57 |         imagePullPolicy: Always
58 |         name: ktuner-chief
59 |         ports:
60 |         - name: tuner-port
61 |           containerPort: 9000
62 |         resources:
63 |           limits:
64 |             cpu: 1
65 |             memory: 2Gi
66 |       restartPolicy: Never
67 | 


--------------------------------------------------------------------------------
/ml/kubeflow-pipelines/keras_tuner/components/kubeflow-resources/bikesw_training/ktuners_deployment_templ.yaml:
--------------------------------------------------------------------------------
 1 | ---
 2 | apiVersion: batch/v1
 3 | kind: Job
 4 | metadata:
 5 |   labels:
 6 |     app: ktuner-tuner
 7 |   name: KTUNER_DEP_NAME
 8 |   namespace: NAMESPACE
 9 | spec:
10 |   # replicas: 1
11 |   template:
12 |     metadata:
13 |       labels:
14 |         app: ktuner-tuner
15 |         version: v1
16 |     spec:
17 |       containers:
18 |       - args:
19 |         - --epochs=EPOCHS
20 |         - --tuner-dir=TUNER_DIR
21 |         - --tuner-proj=TUNER_PROJ
22 |         - --tuner-num=TUNER_NUM
23 |         - --max-trials=MAX_TRIALS
24 |         - --executions-per-trial=EXECS_PER_TRIAL
25 |         - --num-best-hps=NUM_BEST_HPS
26 |         - --respath=RES_PATH
27 |         - --bucket-name=BUCKET_NAME
28 |         image: gcr.io/google-samples/ml-pipeline-bikes-tuner
29 |         env:
30 |         - name: KERASTUNER_TUNER_ID
31 |           value: KTUNER_ID
32 |         - name: KERASTUNER_ORACLE_IP
33 |           value: KTUNER_CHIEF
34 |         - name: KERASTUNER_ORACLE_PORT
35 |           value: "9000"
36 |         imagePullPolicy: Always
37 |         name: kktuner-tuner
38 |         ports:
39 |         - name: tuner-port
40 |           containerPort: 9000
41 |         resources:
42 |           limits: {nvidia.com/gpu: 1}
43 |       restartPolicy: Never
44 | 


--------------------------------------------------------------------------------
/ml/kubeflow-pipelines/keras_tuner/components/kubeflow-resources/cloudbuild.yaml:
--------------------------------------------------------------------------------
 1 | # Copyright 2020 Google Inc. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | steps:
16 | 
17 | # bw-pl-bikes-train
18 | - name: 'bash'
19 |   args: ['./copydir.sh']
20 |   id: copy1
21 |   dir: 'ml/kubeflow-pipelines/keras_tuner/components/kubeflow-resources/containers/bikesw_training'
22 | 
23 | - name: 'gcr.io/cloud-builders/docker'
24 |   args: ['build', '-t', 'gcr.io/$PROJECT_ID/bw-pl-bikes-train:$SHORT_SHA', '.']
25 |   dir: 'ml/kubeflow-pipelines/keras_tuner/components/kubeflow-resources/containers/bikesw_training'
26 |   waitFor: ['copy1']
27 | 
28 | - name: 'gcr.io/cloud-builders/docker'
29 |   args: ['push', 'gcr.io/$PROJECT_ID/bw-pl-bikes-train:$SHORT_SHA']
30 | 
31 | # ml-pipeline-bikes-tuner
32 | - name: 'bash'
33 |   args: ['./copydir.sh']
34 |   id: copy2
35 |   dir: 'ml/kubeflow-pipelines/keras_tuner/components/kubeflow-resources/containers/bikesw_training_hptune'
36 |   waitFor: ['-']  # The '-' indicates that this step begins immediately.
37 | 
38 | - name: 'gcr.io/cloud-builders/docker'
39 |   args: ['build', '-t', 'gcr.io/$PROJECT_ID/ml-pipeline-bikes-tuner:$SHORT_SHA', '.']
40 |   dir: 'ml/kubeflow-pipelines/keras_tuner/components/kubeflow-resources/containers/bikesw_training_hptune'
41 |   waitFor: ['copy2']
42 | 
43 | - name: 'gcr.io/cloud-builders/docker'
44 |   args: ['push', 'gcr.io/$PROJECT_ID/ml-pipeline-bikes-tuner:$SHORT_SHA']
45 | 
46 | # ml-pipeline-bikes-dep
47 | - name: 'bash'
48 |   args: ['./copydir.sh']
49 |   id: copy3
50 |   dir: 'ml/kubeflow-pipelines/keras_tuner/components/kubeflow-resources/containers/deploy_jobs'
51 |   waitFor: ['-']
52 | 
53 | - name: 'gcr.io/cloud-builders/docker'
54 |   args: ['build', '-t', 'gcr.io/$PROJECT_ID/ml-pipeline-bikes-dep:$SHORT_SHA', '.']
55 |   dir: 'ml/kubeflow-pipelines/keras_tuner/components/kubeflow-resources/containers/deploy_jobs'
56 |   waitFor: ['copy3']
57 | 
58 | - name: 'gcr.io/cloud-builders/docker'
59 |   args: ['push', 'gcr.io/$PROJECT_ID/ml-pipeline-bikes-dep:$SHORT_SHA']
60 | 
61 | # bw-pipeline-tfserve
62 | - name: 'bash'
63 |   args: ['./copydir.sh']
64 |   id: copy4
65 |   dir: 'ml/kubeflow-pipelines/keras_tuner/components/kubeflow-resources/containers/tf-serving'
66 |   waitFor: ['-']
67 | 
68 | - name: 'gcr.io/cloud-builders/docker'
69 |   args: ['build', '-t', 'gcr.io/$PROJECT_ID/bw-pipeline-tfserve:$SHORT_SHA', '.']
70 |   dir: 'ml/kubeflow-pipelines/keras_tuner/components/kubeflow-resources/containers/tf-serving'
71 |   waitFor: ['copy4']
72 | 
73 | - name: 'gcr.io/cloud-builders/docker'
74 |   args: ['push', 'gcr.io/$PROJECT_ID/bw-pipeline-tfserve:$SHORT_SHA']
75 | 
76 | timeout: 2000s
77 | 


--------------------------------------------------------------------------------
/ml/kubeflow-pipelines/keras_tuner/components/kubeflow-resources/containers/bikesw_training/Dockerfile:
--------------------------------------------------------------------------------
 1 | # Copyright 2019 Google Inc. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | # FROM tensorflow/tensorflow:2.1.0-gpu-py3
16 | FROM tensorflow/tensorflow:2.3.0-gpu
17 | 
18 | 
19 | RUN pip install --upgrade pip
20 | RUN pip install pathlib2
21 | 
22 | 
23 | ADD build /ml
24 | 
25 | ENTRYPOINT ["python", "/ml/bikes_weather_limited.py"]
26 | 


--------------------------------------------------------------------------------
/ml/kubeflow-pipelines/keras_tuner/components/kubeflow-resources/containers/bikesw_training/build.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -e
 2 | # Copyright 2019 Google Inc. All Rights Reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | # http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | 
17 | if [ -z "$1" ]
18 |   then
19 |     PROJECT_ID=$(gcloud config config-helper --format "value(configuration.properties.core.project)")
20 | else
21 |   PROJECT_ID=$1
22 | fi
23 | 
24 | mkdir -p ./build
25 | rsync -arvp "../../bikesw_training"/ ./build/
26 | 
27 | docker build -t bw-pl-bikes-train .
28 | rm -rf ./build
29 | 
30 | docker tag bw-pl-bikes-train gcr.io/${PROJECT_ID}/bw-pl-bikes-train
31 | docker push gcr.io/${PROJECT_ID}/bw-pl-bikes-train
32 | 


--------------------------------------------------------------------------------
/ml/kubeflow-pipelines/keras_tuner/components/kubeflow-resources/containers/bikesw_training/cloudbuild.yaml:
--------------------------------------------------------------------------------
 1 | # Copyright 2020 Google Inc. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | steps:
16 | 
17 |   - name: 'bash'
18 |     args: ['./copydir.sh']
19 |     id: 'copydir'
20 |     dir: 'ml/kubeflow-pipelines/keras_tuner/components/kubeflow-resources/containers/bikesw_training'
21 | 
22 |   - name: 'gcr.io/cloud-builders/docker'
23 |     args: ['build', '-t', 'gcr.io/$PROJECT_ID/bw-pl-bikes-train:$SHORT_SHA', '.']
24 |     dir: 'ml/kubeflow-pipelines/keras_tuner/components/kubeflow-resources/containers/bikesw_training'
25 | 
26 |   - name: 'gcr.io/cloud-builders/docker'
27 |     args: ['push', 'gcr.io/$PROJECT_ID/bw-pl-bikes-train:$SHORT_SHA']
28 | 
29 | 


--------------------------------------------------------------------------------
/ml/kubeflow-pipelines/keras_tuner/components/kubeflow-resources/containers/bikesw_training/copydir.sh:
--------------------------------------------------------------------------------
 1 | # Copyright 2020 Google Inc. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | 
16 | mkdir -p ./build
17 | cp -pr ../../bikesw_training/* ./build/
18 | 


--------------------------------------------------------------------------------
/ml/kubeflow-pipelines/keras_tuner/components/kubeflow-resources/containers/bikesw_training_hptune/Dockerfile:
--------------------------------------------------------------------------------
 1 | # Copyright 2019 Google Inc. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | # FROM tensorflow/tensorflow:2.1.0-gpu-py3
16 | FROM tensorflow/tensorflow:2.3.0-gpu
17 | 
18 | RUN pip install --upgrade pip
19 | RUN pip install keras-tuner google-cloud-storage
20 | 
21 | 
22 | ADD build /ml
23 | 
24 | ENTRYPOINT ["python", "/ml/bw_hptune_standalone.py"]
25 | 


--------------------------------------------------------------------------------
/ml/kubeflow-pipelines/keras_tuner/components/kubeflow-resources/containers/bikesw_training_hptune/build.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -e
 2 | # Copyright 2019 Google Inc. All Rights Reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | # http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | 
17 | if [ -z "$1" ]
18 |   then
19 |     PROJECT_ID=$(gcloud config config-helper --format "value(configuration.properties.core.project)")
20 | else
21 |   PROJECT_ID=$1
22 | fi
23 | 
24 | mkdir -p ./build
25 | rsync -arvp "../../bikesw_training/"/ ./build/
26 | 
27 | docker build -t ml-pipeline-bikes-tuner .
28 | rm -rf ./build
29 | 
30 | docker tag ml-pipeline-bikes-tuner gcr.io/${PROJECT_ID}/ml-pipeline-bikes-tuner
31 | docker push gcr.io/${PROJECT_ID}/ml-pipeline-bikes-tuner
32 | 


--------------------------------------------------------------------------------
/ml/kubeflow-pipelines/keras_tuner/components/kubeflow-resources/containers/bikesw_training_hptune/cloudbuild.yaml:
--------------------------------------------------------------------------------
 1 | # Copyright 2020 Google Inc. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | steps:
16 | 
17 | # ml-pipeline-bikes-tuner
18 | - name: 'bash'
19 |   args: ['./copydir.sh']
20 |   id: copy2
21 |   dir: 'ml/kubeflow-pipelines/keras_tuner/components/kubeflow-resources/containers/bikesw_training_hptune'
22 |   waitFor: ['-']
23 | 
24 | - name: 'gcr.io/cloud-builders/docker'
25 |   args: ['build', '-t', 'gcr.io/$PROJECT_ID/ml-pipeline-bikes-tuner:$SHORT_SHA', '.']
26 |   dir: 'ml/kubeflow-pipelines/keras_tuner/components/kubeflow-resources/containers/bikesw_training_hptune'
27 |   waitFor: ['copy2']
28 | 
29 | - name: 'gcr.io/cloud-builders/docker'
30 |   args: ['push', 'gcr.io/$PROJECT_ID/ml-pipeline-bikes-tuner:$SHORT_SHA']
31 | 


--------------------------------------------------------------------------------
/ml/kubeflow-pipelines/keras_tuner/components/kubeflow-resources/containers/bikesw_training_hptune/copydir.sh:
--------------------------------------------------------------------------------
 1 | # Copyright 2020 Google Inc. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | 
16 | mkdir -p ./build
17 | cp -pr ../../bikesw_training/* ./build/
18 | 


--------------------------------------------------------------------------------
/ml/kubeflow-pipelines/keras_tuner/components/kubeflow-resources/containers/deploy_jobs/Dockerfile:
--------------------------------------------------------------------------------
 1 | # Copyright 2020 Google Inc. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | FROM ubuntu:20.04
16 | 
17 | RUN apt-get update \
18 |   && apt-get install -y python3-pip python3-dev wget unzip \
19 |   && cd /usr/local/bin \
20 |   && ln -s /usr/bin/python3 python \
21 |   && pip3 install --upgrade pip
22 | 
23 | # RUN apt-get install -y wget unzip git
24 | 
25 | RUN pip install --upgrade pip
26 | RUN pip install urllib3 certifi retrying
27 | RUN pip install google-cloud-storage
28 | RUN pip install --upgrade six
29 | 
30 | 
31 | RUN wget -nv https://dl.google.com/dl/cloudsdk/release/google-cloud-sdk.zip && \
32 |     unzip -qq google-cloud-sdk.zip -d tools && \
33 |     rm google-cloud-sdk.zip && \
34 |     tools/google-cloud-sdk/install.sh --usage-reporting=false \
35 |         --path-update=false --bash-completion=false \
36 |         --disable-installation-options && \
37 |     tools/google-cloud-sdk/bin/gcloud -q components update \
38 |         gcloud core gsutil && \
39 |     tools/google-cloud-sdk/bin/gcloud -q components install kubectl && \
40 |     tools/google-cloud-sdk/bin/gcloud config set component_manager/disable_update_check true && \
41 |     touch /tools/google-cloud-sdk/lib/third_party/google.py
42 | 
43 | 
44 | ENV PATH $PATH:/tools/google-cloud-sdk/bin
45 | 
46 | ADD build /ml
47 | 
48 | ENTRYPOINT ["python", "/ml/deploy_tuner.py"]
49 | 
50 | 


--------------------------------------------------------------------------------
/ml/kubeflow-pipelines/keras_tuner/components/kubeflow-resources/containers/deploy_jobs/build.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -e
 2 | # Copyright 2020 Google Inc. All Rights Reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | # http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | 
17 | if [ -z "$1" ]
18 |   then
19 |     PROJECT_ID=$(gcloud config config-helper --format "value(configuration.properties.core.project)")
20 | else
21 |   PROJECT_ID=$1
22 | fi
23 | 
24 | mkdir -p ./build
25 | rsync -arvp "../../bikesw_training/"/ ./build/
26 | 
27 | docker build -t ml-pipeline-bikes-dep .
28 | rm -rf ./build
29 | 
30 | docker tag ml-pipeline-bikes-dep gcr.io/${PROJECT_ID}/ml-pipeline-bikes-dep
31 | docker push gcr.io/${PROJECT_ID}/ml-pipeline-bikes-dep
32 | 
33 | 


--------------------------------------------------------------------------------
/ml/kubeflow-pipelines/keras_tuner/components/kubeflow-resources/containers/deploy_jobs/cloudbuild.yaml:
--------------------------------------------------------------------------------
 1 | # Copyright 2020 Google Inc. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | steps:
16 |   
17 | # ml-pipeline-bikes-dep
18 | - name: 'bash'
19 |   args: ['./copydir.sh']
20 |   id: copy3
21 |   dir: 'ml/kubeflow-pipelines/keras_tuner/components/kubeflow-resources/containers/deploy_jobs'
22 |   waitFor: ['-']
23 | 
24 | - name: 'gcr.io/cloud-builders/docker'
25 |   args: ['build', '-t', 'gcr.io/$PROJECT_ID/ml-pipeline-bikes-dep:$SHORT_SHA', '.']
26 |   dir: 'ml/kubeflow-pipelines/keras_tuner/components/kubeflow-resources/containers/deploy_jobs'
27 |   waitFor: ['copy3']
28 | 
29 | - name: 'gcr.io/cloud-builders/docker'
30 |   args: ['push', 'gcr.io/$PROJECT_ID/ml-pipeline-bikes-dep:$SHORT_SHA']


--------------------------------------------------------------------------------
/ml/kubeflow-pipelines/keras_tuner/components/kubeflow-resources/containers/deploy_jobs/copydir.sh:
--------------------------------------------------------------------------------
 1 | # Copyright 2020 Google Inc. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | 
16 | mkdir -p ./build
17 | cp -pr ../../bikesw_training/* ./build/
18 | 


--------------------------------------------------------------------------------
/ml/kubeflow-pipelines/keras_tuner/components/kubeflow-resources/containers/tf-serving/Dockerfile:
--------------------------------------------------------------------------------
 1 | # Copyright 2018 Google Inc. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | FROM tensorflow/tensorflow:2.1.0-gpu-py3
16 | 
17 | RUN apt-get update -y
18 | 
19 | RUN apt-get install --no-install-recommends -y -q ca-certificates python-dev python-setuptools wget unzip
20 | 
21 | RUN easy_install pip
22 | 
23 | RUN pip install pyyaml==3.12 six requests==2.18.4
24 | 
25 | RUN wget -nv https://dl.google.com/dl/cloudsdk/release/google-cloud-sdk.zip && \
26 |     unzip -qq google-cloud-sdk.zip -d tools && \
27 |     rm google-cloud-sdk.zip && \
28 |     tools/google-cloud-sdk/install.sh --usage-reporting=false \
29 |         --path-update=false --bash-completion=false \
30 |         --disable-installation-options && \
31 |     tools/google-cloud-sdk/bin/gcloud -q components update \
32 |         gcloud core gsutil && \
33 |     tools/google-cloud-sdk/bin/gcloud -q components install kubectl && \
34 |     tools/google-cloud-sdk/bin/gcloud config set component_manager/disable_update_check true && \
35 |     touch /tools/google-cloud-sdk/lib/third_party/google.py
36 | 
37 | 
38 | ENV PATH $PATH:/tools/google-cloud-sdk/bin
39 | 
40 | ADD build /ml
41 | 
42 | ENTRYPOINT ["python", "/ml/deploy-tfserve.py"]
43 | 


--------------------------------------------------------------------------------
/ml/kubeflow-pipelines/keras_tuner/components/kubeflow-resources/containers/tf-serving/build.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -e
 2 | # Copyright 2018 Google Inc. All Rights Reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | # http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | 
17 | if [ -z "$1" ]
18 |   then
19 |     PROJECT_ID=$(gcloud config config-helper --format "value(configuration.properties.core.project)")
20 | else
21 |   PROJECT_ID=$1
22 | fi
23 | 
24 | mkdir -p ./build
25 | rsync -arvp "../../tf-serving"/ ./build/
26 | 
27 | docker build -t bw-pipeline-tfserve .
28 | rm -rf ./build
29 | 
30 | docker tag bw-pipeline-tfserve gcr.io/${PROJECT_ID}/bw-pipeline-tfserve
31 | docker push gcr.io/${PROJECT_ID}/bw-pipeline-tfserve
32 | 


--------------------------------------------------------------------------------
/ml/kubeflow-pipelines/keras_tuner/components/kubeflow-resources/containers/tf-serving/cloudbuild.yaml:
--------------------------------------------------------------------------------
 1 | # Copyright 2020 Google Inc. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | steps:
16 | 
17 | - name: 'bash'
18 |   args: ['./copydir.sh']
19 |   id: copy4
20 |   dir: 'ml/kubeflow-pipelines/keras_tuner/components/kubeflow-resources/containers/tf-serving'
21 |   waitFor: ['-']
22 | 
23 | - name: 'gcr.io/cloud-builders/docker'
24 |   args: ['build', '-t', 'gcr.io/$PROJECT_ID/bw-pipeline-tfserve:$SHORT_SHA', '.']
25 |   dir: 'ml/kubeflow-pipelines/keras_tuner/components/kubeflow-resources/containers/tf-serving'
26 |   waitFor: ['copy4']
27 | 
28 | - name: 'gcr.io/cloud-builders/docker'
29 |   args: ['push', 'gcr.io/$PROJECT_ID/bw-pipeline-tfserve:$SHORT_SHA']
30 | 
31 | 


--------------------------------------------------------------------------------
/ml/kubeflow-pipelines/keras_tuner/components/kubeflow-resources/containers/tf-serving/copydir.sh:
--------------------------------------------------------------------------------
 1 | # Copyright 2020 Google Inc. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | 
16 | mkdir -p ./build
17 | cp -pr ../../tf-serving/* ./build/
18 | 
19 | 
20 | 


--------------------------------------------------------------------------------
/ml/kubeflow-pipelines/keras_tuner/components/kubeflow-resources/tf-serving/tf-serve-template.yaml:
--------------------------------------------------------------------------------
 1 | ---
 2 | apiVersion: v1
 3 | kind: Service
 4 | metadata:
 5 |   labels:
 6 |     app: SERVICE_NAME
 7 |     apptype: tf-serving
 8 |   name: SERVICE_NAME
 9 |   namespace: KUBEFLOW_NAMESPACE
10 | spec:
11 |   ports:
12 |   - name: grpc-tf-serving
13 |     port: 9000
14 |     targetPort: 9000
15 |   - name: tf-serving-builtin-http
16 |     port: 8500
17 |     targetPort: 8500
18 |   selector:
19 |     app: SERVICE_NAME
20 |   type: ClusterIP
21 | ---
22 | apiVersion: apps/v1
23 | kind: Deployment
24 | metadata:
25 |   labels:
26 |     app: SERVICE_NAME
27 |     apptype: tf-serving
28 |   name: SERVICE_NAME
29 |   namespace: KUBEFLOW_NAMESPACE
30 | spec:
31 |   replicas: 1
32 |   selector:
33 |     matchLabels:
34 |       app: SERVICE_NAME  
35 |   template:
36 |     metadata:
37 |       labels:
38 |         app: SERVICE_NAME
39 |         version: v1
40 |     spec:
41 |       containers:
42 |       - args:
43 |         - --port=9000
44 |         - --rest_api_port=8500
45 |         - --model_name=MODEL_NAME
46 |         - --model_base_path=MODEL_PATH
47 |         - --enable_batching
48 |         command:
49 |         - /usr/bin/tensorflow_model_server
50 |         image: tensorflow/serving:2.3.0-rc0
51 |         imagePullPolicy: Always
52 |         livenessProbe:
53 |           initialDelaySeconds: 30
54 |           periodSeconds: 30
55 |           tcpSocket:
56 |             port: 9000
57 |         name: MODEL_NAME
58 |         ports:
59 |         - containerPort: 9000
60 |         - containerPort: 8500
61 |         resources:
62 |           limits:
63 |             cpu: "4"
64 |             memory: 4Gi
65 |           requests:
66 |             cpu: "1"
67 |             memory: 1Gi
68 | 


--------------------------------------------------------------------------------
/ml/kubeflow-pipelines/keras_tuner/components/serve_component.yaml:
--------------------------------------------------------------------------------
 1 | # Copyright 2019 Google LLC
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | name: Serve TF model
16 | description: |
17 |   A Kubeflow Pipeline component to deploy a tf-serving service
18 | metadata:
19 |   labels:
20 |     add-pod-env: 'true'
21 | inputs:
22 |   - name: model_name
23 |     type: String
24 |   - name: model_path
25 |     type: GCSPath
26 |   - name: namespace
27 |     type: String    
28 | implementation:
29 |   container:
30 |     image: gcr.io/google-samples/bw-pipeline-tfserve:aad15ad
31 |     args: [
32 |       --model_name, {inputValue: model_name},
33 |       --model_path, {inputValue: model_path},
34 |       --namespace,  {inputValue: namespace}
35 |     ]
36 |     env:
37 |       KFP_POD_NAME: "{{pod.name}}"
38 | 


--------------------------------------------------------------------------------
/ml/kubeflow-pipelines/keras_tuner/components/tfdv/Dockerfile:
--------------------------------------------------------------------------------
 1 | # Copyright 2020 Google Inc. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | FROM gcr.io/deeplearning-platform-release/tf2-cpu.2-3:latest
16 | 
17 | ADD requirements.txt /
18 | # ADD tfdv.py /
19 | RUN pip install -U tensorflow-data-validation
20 | RUN pip download tensorflow_data_validation --no-deps --platform manylinux2010_x86_64 --only-binary=:all:
21 | RUN pip install -U "apache-beam[gcp]"
22 | 


--------------------------------------------------------------------------------
/ml/kubeflow-pipelines/keras_tuner/components/tfdv/requirements.txt:
--------------------------------------------------------------------------------
1 | ipython==7.16.1
2 | ipython-genutils==0.2.0
3 | 


--------------------------------------------------------------------------------
/ml/kubeflow-pipelines/keras_tuner/components/tfdv/tfdv.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2021 Google Inc. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     https://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from typing import NamedTuple
16 | 
17 | 
18 | def generate_tfdv_stats(input_data: str, output_path: str, job_name: str, use_dataflow: str,
19 |                         project_id: str, region:str, gcs_temp_location: str, gcs_staging_location: str,
20 |                         whl_location: str = '', requirements_file: str = 'requirements.txt'
21 | ) -> NamedTuple('Outputs', [('stats_path', str)]):
22 | 
23 |   import logging
24 |   import time
25 | 
26 |   import tensorflow_data_validation as tfdv
27 |   import tensorflow_data_validation.statistics.stats_impl
28 |   from apache_beam.options.pipeline_options import PipelineOptions, GoogleCloudOptions, StandardOptions, SetupOptions
29 | 
30 |   # pip download tensorflow_data_validation --no-deps --platform manylinux2010_x86_64 --only-binary=:all:
31 |   # CHANGE this if your download resulted in a different filename.
32 | 
33 |   logging.getLogger().setLevel(logging.INFO)
34 |   logging.info("output path: %s", output_path)
35 |   logging.info("Building pipeline options")
36 |   # Create and set your PipelineOptions.
37 |   options = PipelineOptions()
38 | 
39 |   if use_dataflow == 'true':
40 |     logging.info("using Dataflow")
41 |     if not whl_location:
42 |       logging.warning('tfdv whl file required with dataflow runner.')
43 |       exit(1)
44 |     # For Cloud execution, set the Cloud Platform project, job_name,
45 |     # staging location, temp_location and specify DataflowRunner.
46 |     google_cloud_options = options.view_as(GoogleCloudOptions)
47 |     google_cloud_options.project = project_id
48 |     google_cloud_options.job_name = '{}-{}'.format(job_name, str(int(time.time())))
49 |     google_cloud_options.staging_location = gcs_staging_location
50 |     google_cloud_options.temp_location = gcs_temp_location
51 |     google_cloud_options.region = region
52 |     options.view_as(StandardOptions).runner = 'DataflowRunner'
53 | 
54 |     setup_options = options.view_as(SetupOptions)
55 |     # PATH_TO_WHL_FILE should point to the downloaded tfdv wheel file.
56 |     setup_options.extra_packages = [whl_location]
57 |     setup_options.requirements_file = 'requirements.txt'
58 | 
59 |   tfdv.generate_statistics_from_csv(
60 |     data_location=input_data, output_path=output_path,
61 |     pipeline_options=options)
62 | 
63 |   return (output_path, )
64 | 
65 | 
66 | if __name__ == '__main__':
67 |   import kfp
68 |   kfp.components.func_to_container_op(generate_tfdv_stats,
69 |       output_component_file='../tfdv_component.yaml',
70 |       base_image='gcr.io/google-samples/tfdv-tests:v1')
71 | 


--------------------------------------------------------------------------------
/ml/kubeflow-pipelines/keras_tuner/components/tfdv/tfdv_compare.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2021 Google Inc. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     https://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from typing import NamedTuple
16 | 
17 | 
18 | def tfdv_detect_drift(
19 |     stats_older_path: str, stats_new_path: str
20 | ) -> NamedTuple('Outputs', [('drift', str)]):
21 | 
22 |   import logging
23 |   import time
24 | 
25 |   import tensorflow_data_validation as tfdv
26 |   import tensorflow_data_validation.statistics.stats_impl
27 | 
28 |   logging.getLogger().setLevel(logging.INFO)
29 |   logging.info('stats_older_path: %s', stats_older_path)
30 |   logging.info('stats_new_path: %s', stats_new_path)
31 | 
32 |   if stats_older_path == 'none':
33 |     return ('true', )
34 | 
35 |   stats1 = tfdv.load_statistics(stats_older_path)
36 |   stats2 = tfdv.load_statistics(stats_new_path)
37 | 
38 |   schema1 = tfdv.infer_schema(statistics=stats1)
39 |   tfdv.get_feature(schema1, 'duration').drift_comparator.jensen_shannon_divergence.threshold = 0.01
40 |   drift_anomalies = tfdv.validate_statistics(
41 |       statistics=stats2, schema=schema1, previous_statistics=stats1)
42 |   logging.info('drift analysis results: %s', drift_anomalies.drift_skew_info)
43 | 
44 |   from google.protobuf.json_format import MessageToDict
45 |   d = MessageToDict(drift_anomalies)
46 |   val = d['driftSkewInfo'][0]['driftMeasurements'][0]['value']
47 |   thresh = d['driftSkewInfo'][0]['driftMeasurements'][0]['threshold']
48 |   logging.info('value %s and threshold %s', val, thresh)
49 |   res = 'true'
50 |   if val < thresh:
51 |     res = 'false'
52 |   logging.info('train decision: %s', res)
53 |   return (res, )
54 | 
55 | 
56 | if __name__ == '__main__':
57 |   import kfp
58 |   kfp.components.func_to_container_op(tfdv_detect_drift,
59 |       output_component_file='../tfdv_drift_component.yaml',
60 |       base_image='gcr.io/google-samples/tfdv-tests:v1')
61 | 


--------------------------------------------------------------------------------
/ml/kubeflow-pipelines/keras_tuner/components/train_component.yaml:
--------------------------------------------------------------------------------
 1 | # Copyright 2019 Google LLC
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | name: Train bikes_weather model
16 | description: |
17 |   A Kubeflow Pipeline component to train a Tensor2Tensor
18 |   model
19 | metadata:
20 |   labels:
21 |     add-pod-env: 'true'
22 | inputs:
23 |   - name: epochs
24 |     type: Integer
25 |     default: 1
26 |   - name: steps_per_epoch
27 |     type: Integer
28 |     default: -1
29 |   - name: data_dir
30 |     type: String
31 |   - name: workdir
32 |     type: String
33 |   - name: tb_dir
34 |     type: String
35 |   - name: hp_idx
36 |     type: Integer
37 |   - name: hptune_results
38 |     type: String
39 | outputs:
40 |   - name: train_output_path
41 |     type: GCSPath
42 |   - name: metrics_output_path
43 |     type: String
44 |   - name: MLPipeline UI metadata
45 |     type: UI metadata
46 | implementation:
47 |   container:
48 |     image: gcr.io/google-samples/bw-pl-bikes-train:v2
49 |     args: [
50 |       --data-dir, {inputValue: data_dir},
51 |       --epochs, {inputValue: epochs},
52 |       --steps-per-epoch, {inputValue: steps_per_epoch},
53 |       --workdir, {inputValue: workdir},
54 |       --tb-dir, {inputValue: tb_dir},
55 |       --train-output-path, {outputPath: train_output_path},
56 |       --metrics-output-path, {outputPath: metrics_output_path},
57 |       --hp-idx, {inputValue: hp_idx},
58 |       --hptune-results, {inputValue: hptune_results}
59 |     ]
60 |     env:
61 |       KFP_POD_NAME: "{{pod.name}}"
62 |     fileOutputs:
63 |       MLPipeline UI metadata: /mlpipeline-ui-metadata.json
64 | 


--------------------------------------------------------------------------------
/ml/kubeflow-pipelines/keras_tuner/example_pipelines/bw_ktune.py.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/amygdala/code-snippets/9fa00e5a4ec4ae3eb5a2fa212d1ecefb4009eba7/ml/kubeflow-pipelines/keras_tuner/example_pipelines/bw_ktune.py.tar.gz


--------------------------------------------------------------------------------
/ml/kubeflow-pipelines/keras_tuner/example_pipelines/bw_train.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2019 Google LLC
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | 
16 | import kfp.dsl as dsl
17 | import kfp.gcp as gcp
18 | import kfp.components as comp
19 | from kfp.dsl.types import GCSPath, String
20 | 
21 | 
22 | train_op = comp.load_component_from_file(
23 |   '../components/train_component.yaml'
24 |   )
25 | serve_op = comp.load_component_from_file(
26 |   '../components/serve_component.yaml'
27 |   )
28 | 
29 | tb_op = comp.load_component_from_url(
30 |   'https://raw.githubusercontent.com/kubeflow/pipelines/master/components/tensorflow/tensorboard/prepare_tensorboard/component.yaml' # pylint: disable=line-too-long
31 |   )
32 | 
33 | 
34 | @dsl.pipeline(
35 |   name='bikes_weather',
36 |   description='Model bike rental duration given weather'
37 | )
38 | def bikes_weather(  #pylint: disable=unused-argument
39 |   train_epochs: int = 5,
40 |   working_dir: str = 'gs://YOUR/GCS/PATH',  # for the full training jobs
41 |   data_dir: str = 'gs://aju-dev-demos-codelabs/bikes_weather/',
42 |   steps_per_epoch: int = -1 ,  # if -1, don't override normal calcs based on dataset size
43 |   num_best_hps_list: list = [0],
44 |   hptune_params: str = '[{"num_hidden_layers": %s, "learning_rate": %s, "hidden_size": %s}]' % (3, 1e-2, 64)
45 |   ):
46 | 
47 | 
48 |   # create TensorBoard viz for the parent directory of all training runs, so that we can
49 |   # compare them.
50 |   tb_viz = tb_op(
51 |     log_dir_uri='%s/%s' % (working_dir, dsl.RUN_ID_PLACEHOLDER)
52 |   )
53 | 
54 |   with dsl.ParallelFor(num_best_hps_list) as idx:  # start the full training runs in parallel
55 | 
56 |     train = train_op(
57 |       data_dir=data_dir,
58 |       workdir='%s/%s' % (tb_viz.outputs['log_dir_uri'], idx),
59 |       tb_dir=tb_viz.outputs['log_dir_uri'],
60 |       epochs=train_epochs, steps_per_epoch=steps_per_epoch,
61 |       hp_idx=idx, 
62 |       hptune_results=hptune_params
63 |       )
64 | 
65 |     serve = serve_op(
66 |       model_path=train.outputs['train_output_path'],
67 |       model_name='bikesw',
68 |       namespace='default'
69 |       )
70 |     train.set_gpu_limit(2)
71 | 
72 | 
73 | if __name__ == '__main__':
74 |   import kfp.compiler as compiler
75 |   compiler.Compiler().compile(bikes_weather, __file__ + '.tar.gz')
76 | 
77 | 


--------------------------------------------------------------------------------
/ml/kubeflow-pipelines/keras_tuner/example_pipelines/bw_train_metrics.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2019 Google LLC
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | 
16 | import kfp.dsl as dsl
17 | import kfp.gcp as gcp
18 | import kfp.components as comp
19 | from kfp.dsl.types import GCSPath, String
20 | 
21 | 
22 | train_op = comp.load_component_from_file(
23 |   '../components/train_component.yaml'
24 |   )
25 | serve_op = comp.load_component_from_file(
26 |   '../components/serve_component.yaml'
27 |   )
28 | 
29 | eval_metrics_op = comp.load_component_from_file(
30 |     '../components/eval_metrics_component.yaml')
31 | 
32 | tb_op = comp.load_component_from_url(
33 |   'https://raw.githubusercontent.com/kubeflow/pipelines/master/components/tensorflow/tensorboard/prepare_tensorboard/component.yaml' # pylint: disable=line-too-long
34 |   )
35 | 
36 | 
37 | @dsl.pipeline(
38 |   name='bikes_weather',
39 |   description='Model bike rental duration given weather'
40 | )
41 | def bikes_weather_metrics(  #pylint: disable=unused-argument
42 |   train_epochs: int = 5,
43 |   working_dir: str = 'gs://YOUR/GCS/PATH',  # for the full training jobs
44 |   data_dir: str = 'gs://aju-dev-demos-codelabs/bikes_weather/',
45 |   steps_per_epoch: int = -1 ,  # if -1, don't override normal calcs based on dataset size
46 |   num_best_hps_list: list = [0],
47 |   hptune_params: str = '[{"num_hidden_layers": %s, "learning_rate": %s, "hidden_size": %s}]' % (3, 1e-2, 64),
48 |   thresholds: str = '{"root_mean_squared_error": 2000}'
49 |   ):
50 | 
51 | 
52 |   # create TensorBoard viz for the parent directory of all training runs, so that we can
53 |   # compare them.
54 |   tb_viz = tb_op(
55 |     log_dir_uri='%s/%s' % (working_dir, dsl.RUN_ID_PLACEHOLDER)
56 |   )
57 | 
58 |   with dsl.ParallelFor(num_best_hps_list) as idx:  # start the full training runs in parallel
59 | 
60 |     train = train_op(
61 |       data_dir=data_dir,
62 |       workdir='%s/%s' % (tb_viz.outputs['log_dir_uri'], idx),
63 |       tb_dir=tb_viz.outputs['log_dir_uri'],
64 |       epochs=train_epochs, steps_per_epoch=steps_per_epoch,
65 |       hp_idx=idx,
66 |       hptune_results=hptune_params
67 |       )
68 | 
69 |     eval_metrics = eval_metrics_op(
70 |       thresholds=thresholds,
71 |       metrics=train.outputs['metrics_output_path'],
72 |       )
73 | 
74 |     with dsl.Condition(eval_metrics.outputs['deploy'] == 'deploy'):
75 |       serve = serve_op(
76 |         model_path=train.outputs['train_output_path'],
77 |         model_name='bikesw',
78 |         namespace='default'
79 |         )
80 |     train.set_gpu_limit(2)
81 | 
82 | 
83 | if __name__ == '__main__':
84 |   import kfp.compiler as compiler
85 |   compiler.Compiler().compile(bikes_weather_metrics, __file__ + '.tar.gz')
86 | 
87 | 


--------------------------------------------------------------------------------
/ml/kubeflow-pipelines/samples/automl/README.md:
--------------------------------------------------------------------------------
1 | 
2 | The pipeline in this directory shows how you can make calls to the AutoML Vision API to build a pipeline that creates an AutoML *dataset* and then trains a model on that dataset.
3 | 
4 | This pipeline requires a GKE installation of Kubeflow, e.g. via the
5 | ['click to deploy' web app](https://deploy.kubeflow.cloud/#/deploy).
6 | Once Kubeflow is installed on your GKE cluster, to run this pipeline, you'll need to vist the [IAM panel in the GCP Cloud Console](https://pantheon.corp.google.com/iam-admin/iam), find the Kubeflow-created service account
7 | `<deployment>-user@<project>.iam.gserviceaccount.com`, and add permissions to make that account an `AutoML Admin`. This will give the Kubeflow Pipeline steps permission to call the AutoML APIs.
8 | 
9 | 


--------------------------------------------------------------------------------
/ml/kubeflow-pipelines/samples/automl/dataset_and_train.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2019 Google LLC
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | 
16 | import kfp.dsl as dsl
17 | import kfp.gcp as gcp
18 | 
19 | DATASET_OP = 'dataset'
20 | MODEL_OP = 'model'
21 | 
22 | @dsl.pipeline(
23 |   name='automl1',
24 |   description='Create AutoML dataset and train model'
25 | )
26 | def automl1(  #pylint: disable=unused-argument
27 |   # There's now a more succinct way to define the pipeline params
28 |   project_id: dsl.PipelineParam = dsl.PipelineParam(name='project-id', value='YOUR_PROJECT_HERE'),
29 |   compute_region: dsl.PipelineParam = dsl.PipelineParam(name='compute-region', value='YOUR_REGION_HERE'),
30 |   dataset_name: dsl.PipelineParam = dsl.PipelineParam(name='dataset-name', value='YOUR_DATASETNAME_HERE'),
31 |   model_name: dsl.PipelineParam = dsl.PipelineParam(name='model-name', value='YOUR_MODELNAME_HERE'),
32 |   csv_path: dsl.PipelineParam = dsl.PipelineParam(name='csv-path', value='YOUR_DATASET_PATH')
33 |   ):
34 | 
35 | 
36 |   dataset = dsl.ContainerOp(
37 |       name='dataset',
38 |       image='gcr.io/google-samples/automl-pipeline',
39 |       arguments=["--project_id", project_id, "--operation", DATASET_OP,
40 |           "--compute_region", compute_region,
41 |           "--dataset_name", dataset_name,
42 |           "--csv_path", csv_path],
43 |       file_outputs={'dataset_id': '/dataset_id.txt', 'csv_path': '/csv_path.txt'}
44 | 
45 |       ).apply(gcp.use_gcp_secret('user-gcp-sa'))
46 | 
47 |   model = dsl.ContainerOp(
48 |       name='model',
49 |       image='gcr.io/google-samples/automl-pipeline',
50 |       arguments=["--project_id", project_id, "--operation", MODEL_OP,
51 |           "--compute_region", compute_region,
52 |           "--model_name", model_name,
53 |           "--csv_path", dataset.outputs['csv_path'],
54 |           "--dataset_id", dataset.outputs['dataset_id']]
55 |       ).apply(gcp.use_gcp_secret('user-gcp-sa'))
56 | 
57 |   model.after(dataset)
58 | 
59 | 
60 | 
61 | if __name__ == '__main__':
62 |   import kfp.compiler as compiler
63 |   compiler.Compiler().compile(automl1, __file__ + '.tar.gz')
64 | 


--------------------------------------------------------------------------------
/ml/kubeflow-pipelines/samples/automl/dataset_and_train.py.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/amygdala/code-snippets/9fa00e5a4ec4ae3eb5a2fa212d1ecefb4009eba7/ml/kubeflow-pipelines/samples/automl/dataset_and_train.py.tar.gz


--------------------------------------------------------------------------------
/ml/kubeflow-pipelines/samples/kubeflow-tf/README.md:
--------------------------------------------------------------------------------
1 | 
2 | The example pipelines that were in this directory have been moved to the [`older`](./older) subdirectory.
3 | They are not currently maintained and are probably out of date.
4 | 


--------------------------------------------------------------------------------
/ml/kubeflow-pipelines/samples/kubeflow-tf/older/gh_summ.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2018 Google LLC
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | 
16 | import kfp.dsl as dsl
17 | import kfp.gcp as gcp
18 | 
19 | 
20 | @dsl.pipeline(
21 |   name='Github issue summarization',
22 |   description='Demonstrate Tensor2Tensor-based training and TF-Serving'
23 | )
24 | def gh_summ(  #pylint: disable=unused-argument
25 |   train_steps: dsl.PipelineParam = dsl.PipelineParam(name='train-steps', value=2019300),
26 |   project: dsl.PipelineParam = dsl.PipelineParam(name='project', value='YOUR_PROJECT_HERE'),
27 |   github_token: dsl.PipelineParam = dsl.PipelineParam(
28 |       name='github-token', value='YOUR_GITHUB_TOKEN_HERE'),
29 |   working_dir: dsl.PipelineParam = dsl.PipelineParam(name='working-dir', value='YOUR_GCS_DIR_HERE'),
30 |   checkpoint_dir: dsl.PipelineParam = dsl.PipelineParam(
31 |       name='checkpoint-dir',
32 |       value='gs://aju-dev-demos-codelabs/kubecon/model_output_tbase.bak2019000'),
33 |   deploy_webapp: dsl.PipelineParam = dsl.PipelineParam(name='deploy-webapp', value='true'),
34 |   data_dir: dsl.PipelineParam = dsl.PipelineParam(
35 |       name='data-dir', value='gs://aju-dev-demos-codelabs/kubecon/t2t_data_gh_all/')):
36 | 
37 | 
38 |   train = dsl.ContainerOp(
39 |       name='train',
40 |       image='gcr.io/google-samples/ml-pipeline-t2ttrain',
41 |       arguments=["--data-dir", data_dir,
42 |           "--checkpoint-dir", checkpoint_dir,
43 |           "--model-dir", '%s/%s/model_output' % (working_dir, '{{workflow.name}}'),
44 |           "--train-steps", train_steps, "--deploy-webapp", deploy_webapp],
45 |       file_outputs={'output': '/tmp/output'}
46 | 
47 |       ).apply(gcp.use_gcp_secret('user-gcp-sa'))
48 | 
49 |   serve = dsl.ContainerOp(
50 |       name='serve',
51 |       image='gcr.io/google-samples/ml-pipeline-kubeflow-tfserve',
52 |       arguments=["--model_name", 'ghsumm-%s' % ('{{workflow.name}}',),
53 |           "--model_path", '%s/%s/model_output/export' % (working_dir, '{{workflow.name}}')
54 |           ]
55 |       )
56 |   serve.after(train)
57 |   train.set_gpu_limit(4)
58 | 
59 |   with dsl.Condition(train.output == 'true'):
60 |     webapp = dsl.ContainerOp(
61 |         name='webapp',
62 |         image='gcr.io/google-samples/ml-pipeline-webapp-launcher',
63 |         arguments=["--model_name", 'ghsumm-%s' % ('{{workflow.name}}',),
64 |             "--github_token", github_token]
65 | 
66 |         )
67 |     webapp.after(serve)
68 | 
69 | 
70 | if __name__ == '__main__':
71 |   import kfp.compiler as compiler
72 |   compiler.Compiler().compile(gh_summ, __file__ + '.tar.gz')
73 | 


--------------------------------------------------------------------------------
/ml/kubeflow-pipelines/samples/kubeflow-tf/older/gh_summ.py.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/amygdala/code-snippets/9fa00e5a4ec4ae3eb5a2fa212d1ecefb4009eba7/ml/kubeflow-pipelines/samples/kubeflow-tf/older/gh_summ.py.tar.gz


--------------------------------------------------------------------------------
/ml/kubeflow-pipelines/samples/kubeflow-tf/older/gh_summ_serve.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2018 Google LLC
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | 
16 | import kfp.dsl as dsl
17 | 
18 | @dsl.pipeline(
19 |   name='Github issue summarization',
20 |   description='Demonstrate Tensor2Tensor-based training and TF-Serving'
21 | )
22 | def gh_summ(
23 |   github_token: dsl.PipelineParam = dsl.PipelineParam(
24 |       name='github-token', value='YOUR_GITHUB_TOKEN_HERE'),
25 |   ):
26 | 
27 | 
28 |   serve = dsl.ContainerOp(
29 |       name='serve',
30 |       image='gcr.io/google-samples/ml-pipeline-kubeflow-tfserve',
31 |       arguments=["--model_name", 'ghsumm-%s' % ('{{workflow.name}}',),
32 |           "--model_path",
33 |           'gs://aju-dev-demos-codelabs/kubecon/example_t2t_model/model_output/export'
34 |           ]
35 |       )
36 | 
37 |   webapp = dsl.ContainerOp(
38 |       name='webapp',
39 |       image='gcr.io/google-samples/ml-pipeline-webapp-launcher',
40 |       arguments=["--model_name", 'ghsumm-%s' % ('{{workflow.name}}',),
41 |           "--github_token", github_token]
42 | 
43 |       )
44 |   webapp.after(serve)
45 | 
46 | 
47 | if __name__ == '__main__':
48 |   import kfp.compiler as compiler
49 |   compiler.Compiler().compile(gh_summ, __file__ + '.tar.gz')
50 | 


--------------------------------------------------------------------------------
/ml/kubeflow-pipelines/samples/kubeflow-tf/older/gh_summ_serve.py.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/amygdala/code-snippets/9fa00e5a4ec4ae3eb5a2fa212d1ecefb4009eba7/ml/kubeflow-pipelines/samples/kubeflow-tf/older/gh_summ_serve.py.tar.gz


--------------------------------------------------------------------------------
/ml/kubeflow-pipelines/sbtb/components/kubeflow-resources/containers/bikesw_training/Dockerfile:
--------------------------------------------------------------------------------
 1 | # Copyright 2019 Google Inc. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | FROM tensorflow/tensorflow:2.0.0-gpu-py3
16 | 
17 | RUN pip install --upgrade pip
18 | RUN pip install pathlib2
19 | 
20 | 
21 | ADD build /ml
22 | 
23 | ENTRYPOINT ["python", "/ml/bikes_weather.py"]
24 | 


--------------------------------------------------------------------------------
/ml/kubeflow-pipelines/sbtb/components/kubeflow-resources/containers/bikesw_training/build.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -e
 2 | # Copyright 2019 Google Inc. All Rights Reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | # http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | 
17 | if [ -z "$1" ]
18 |   then
19 |     PROJECT_ID=$(gcloud config config-helper --format "value(configuration.properties.core.project)")
20 | else
21 |   PROJECT_ID=$1
22 | fi
23 | 
24 | mkdir -p ./build
25 | rsync -arvp "../../bikesw_training"/ ./build/
26 | 
27 | docker build -t ml-pipeline-bikes-train .
28 | rm -rf ./build
29 | 
30 | docker tag ml-pipeline-bikes-train gcr.io/${PROJECT_ID}/ml-pipeline-bikes-train
31 | docker push gcr.io/${PROJECT_ID}/ml-pipeline-bikes-train
32 | 


--------------------------------------------------------------------------------
/ml/kubeflow-pipelines/sbtb/components/kubeflow-resources/containers/tf-serving/Dockerfile:
--------------------------------------------------------------------------------
 1 | # Copyright 2018 Google Inc. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | FROM ubuntu:16.04
16 | 
17 | RUN apt-get update -y
18 | 
19 | RUN apt-get install --no-install-recommends -y -q ca-certificates python-dev python-setuptools wget unzip
20 | 
21 | RUN easy_install pip
22 | 
23 | RUN pip install pyyaml==3.12 six requests==2.18.4 tensorflow==2.0.0
24 | 
25 | RUN wget -nv https://dl.google.com/dl/cloudsdk/release/google-cloud-sdk.zip && \
26 |     unzip -qq google-cloud-sdk.zip -d tools && \
27 |     rm google-cloud-sdk.zip && \
28 |     tools/google-cloud-sdk/install.sh --usage-reporting=false \
29 |         --path-update=false --bash-completion=false \
30 |         --disable-installation-options && \
31 |     tools/google-cloud-sdk/bin/gcloud -q components update \
32 |         gcloud core gsutil && \
33 |     tools/google-cloud-sdk/bin/gcloud -q components install kubectl && \
34 |     tools/google-cloud-sdk/bin/gcloud config set component_manager/disable_update_check true && \
35 |     touch /tools/google-cloud-sdk/lib/third_party/google.py
36 | 
37 | # RUN wget -nv https://github.com/ksonnet/ksonnet/releases/download/v0.11.0/ks_0.11.0_linux_amd64.tar.gz && \
38 | #     tar -xvzf ks_0.11.0_linux_amd64.tar.gz && \
39 | #     mkdir -p /tools/ks/bin && \
40 | #     cp ./ks_0.11.0_linux_amd64/ks /tools/ks/bin && \
41 | #     rm ks_0.11.0_linux_amd64.tar.gz && \
42 | #     rm -r ks_0.11.0_linux_amd64
43 | 
44 | ENV PATH $PATH:/tools/google-cloud-sdk/bin:/tools/ks/bin
45 | 
46 | ADD build /ml
47 | 
48 | ENTRYPOINT ["python", "/ml/deploy-tfserve.py"]
49 | 
50 | 


--------------------------------------------------------------------------------
/ml/kubeflow-pipelines/sbtb/components/kubeflow-resources/containers/tf-serving/build.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -e
 2 | # Copyright 2018 Google Inc. All Rights Reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | # http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | 
17 | if [ -z "$1" ]
18 |   then
19 |     PROJECT_ID=$(gcloud config config-helper --format "value(configuration.properties.core.project)")
20 | else
21 |   PROJECT_ID=$1
22 | fi
23 | 
24 | mkdir -p ./build
25 | rsync -arvp "../../tf-serving"/ ./build/
26 | 
27 | docker build -t ml-pipeline-tfserve .
28 | rm -rf ./build
29 | 
30 | docker tag ml-pipeline-tfserve gcr.io/${PROJECT_ID}/ml-pipeline-tfserve
31 | docker push gcr.io/${PROJECT_ID}/ml-pipeline-tfserve
32 | 


--------------------------------------------------------------------------------
/ml/kubeflow-pipelines/sbtb/components/kubeflow-resources/tf-serving/tf-serve-template.yaml:
--------------------------------------------------------------------------------
 1 | ---
 2 | apiVersion: v1
 3 | kind: Service
 4 | metadata:
 5 |   annotations:
 6 |     getambassador.io/config: |-
 7 |       ---
 8 |       apiVersion: ambassador/v0
 9 |       kind:  Mapping
10 |       name: tfserving-predict-mapping-MODEL_NAME
11 |       prefix: tfserving/models/MODEL_NAME/
12 |       rewrite: /v1/models/MODEL_NAME:predict
13 |       method: POST
14 |       service: MODEL_NAME.kubeflow:8500
15 |   labels:
16 |     app: MODEL_NAME
17 |   name: SERVICE_NAME
18 |   namespace: KUBEFLOW_NAMESPACE
19 | spec:
20 |   ports:
21 |   - name: grpc-tf-serving
22 |     port: 9000
23 |     targetPort: 9000
24 |   - name: tf-serving-builtin-http
25 |     port: 8500
26 |     targetPort: 8500
27 |   selector:
28 |     app: SERVICE_NAME
29 |   type: LoadBalancer
30 | ---
31 | apiVersion: extensions/v1beta1
32 | kind: Deployment
33 | metadata:
34 |   labels:
35 |     app: SERVICE_NAME
36 |   name: SERVICE_NAME
37 |   namespace: KUBEFLOW_NAMESPACE
38 | spec:
39 |   replicas: 1
40 |   template:
41 |     metadata:
42 |       labels:
43 |         app: SERVICE_NAME
44 |         version: v1
45 |     spec:
46 |       volumes:
47 |       - name: gcp-credentials-user-gcp-sa
48 |         secret:
49 |           secretName: user-gcp-sa
50 |       containers:
51 |       - args:
52 |         - --port=9000
53 |         - --rest_api_port=8500
54 |         - --model_name=MODEL_NAME
55 |         - --model_base_path=MODEL_PATH
56 |         command:
57 |         - /usr/bin/tensorflow_model_server
58 |         image: tensorflow/serving
59 |         env:
60 |         - name: GOOGLE_APPLICATION_CREDENTIALS
61 |           value: /secret/gcp-credentials/user-gcp-sa.json
62 |         - name: CLOUDSDK_AUTH_CREDENTIAL_FILE_OVERRIDE
63 |           value: /secret/gcp-credentials/user-gcp-sa.json
64 |         volumeMounts:
65 |         - mountPath: /secret/gcp-credentials
66 |           name: gcp-credentials-user-gcp-sa
67 |         imagePullPolicy: IfNotPresent
68 |         livenessProbe:
69 |           initialDelaySeconds: 30
70 |           periodSeconds: 30
71 |           tcpSocket:
72 |             port: 9000
73 |         name: MODEL_NAME
74 |         ports:
75 |         - containerPort: 9000
76 |         - containerPort: 8500
77 |         resources:
78 |           limits:
79 |             cpu: "4"
80 |             memory: 4Gi
81 |           requests:
82 |             cpu: "1"
83 |             memory: 1Gi
84 | 


--------------------------------------------------------------------------------
/ml/kubeflow-pipelines/sbtb/components/serve_component.yaml:
--------------------------------------------------------------------------------
 1 | # Copyright 2019 Google LLC
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | name: Serve TF model
16 | description: |
17 |   A Kubeflow Pipeline component to deploy a tf-serving service
18 | metadata:
19 |   labels:
20 |     add-pod-env: 'true'
21 | inputs:
22 |   - name: model_name
23 |     type: String
24 |   - name: model_path
25 |     type: GCSPath
26 | implementation:
27 |   container:
28 |     image: gcr.io/google-samples/ml-pipeline-tfserve:v2
29 |     args: [
30 |       --model_name, {inputValue: model_name},
31 |       --model_path, {inputValue: model_path},
32 |     ]
33 |     env:
34 |       KFP_POD_NAME: "{{pod.name}}"
35 | 


--------------------------------------------------------------------------------
/ml/kubeflow-pipelines/sbtb/components/train_component.yaml:
--------------------------------------------------------------------------------
 1 | # Copyright 2019 Google LLC
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | name: Train bikes_weather model
16 | description: |
17 |   A Kubeflow Pipeline component to train a Tensor2Tensor
18 |   model
19 | metadata:
20 |   labels:
21 |     add-pod-env: 'true'
22 | inputs:
23 |   - name: epochs
24 |     type: Integer
25 |     default: 1
26 |   - name: steps_per_epoch
27 |     type: Integer
28 |     default: -1
29 |   - name: data_dir
30 |     type: GCSPath
31 |   - name: workdir
32 |     type: GCSPath
33 |   - name: load_checkpoint
34 |     type: GCSPath
35 | outputs:
36 |   - name: train_output_path
37 |     type: GCSPath
38 | implementation:
39 |   container:
40 |     image: gcr.io/google-samples/ml-pipeline-bikes-train:v2
41 |     args: [
42 |       --data-dir, {inputValue: data_dir},
43 |       --epochs, {inputValue: epochs},
44 |       --steps-per-epoch, {inputValue: steps_per_epoch},
45 |       --workdir, {inputValue: workdir},
46 |       --load-checkpoint, {inputValue: load_checkpoint},
47 |       --train-output-path, {outputPath: train_output_path}
48 |     ]
49 |     env:
50 |       KFP_POD_NAME: "{{pod.name}}"
51 | 


--------------------------------------------------------------------------------
/ml/kubeflow-pipelines/sbtb/example_pipelines/bw.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2019 Google LLC
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | 
16 | import kfp.dsl as dsl
17 | import kfp.gcp as gcp
18 | import kfp.components as comp
19 | from kfp.dsl.types import GCSPath, String
20 | 
21 | 
22 | COPY_ACTION = 'copy_data'
23 | TRAIN_ACTION = 'train'
24 | WORKSPACE_NAME = 'ws_gh_summ'
25 | DATASET = 'dataset'
26 | MODEL = 'model'
27 | 
28 | train_op = comp.load_component_from_url(
29 |   'https://raw.githubusercontent.com/amygdala/code-snippets/master/ml/kubeflow-pipelines/sbtb/components/train_component.yaml' # pylint: disable=line-too-long
30 |   )
31 | serve_op = comp.load_component_from_url(
32 |   'https://raw.githubusercontent.com/amygdala/code-snippets/master/ml/kubeflow-pipelines/sbtb/components/serve_component.yaml' # pylint: disable=line-too-long
33 |   )
34 | 
35 | 
36 | @dsl.pipeline(
37 |   name='bikes_weather',
38 |   description='Model bike rental duration given weather'
39 | )
40 | def bikes_weather(  #pylint: disable=unused-argument
41 |   working_dir: GCSPath = 'gs://YOUR_GCS_DIR_HERE',
42 |   data_dir: GCSPath = 'gs://aju-dev-demos-codelabs/bikes_weather/',
43 |   epochs: 'Integer' = 1,
44 |   steps_per_epoch: 'Integer' = -1 ,  # if -1, don't override normal calcs based on dataset size
45 |   load_checkpoint: String = ''
46 |   ):
47 | 
48 | 
49 |   train = train_op(
50 |     data_dir=data_dir,
51 |     workdir='%s/%s' % (working_dir, dsl.RUN_ID_PLACEHOLDER),
52 |     epochs=epochs, steps_per_epoch=steps_per_epoch,
53 |     load_checkpoint=load_checkpoint
54 |     ).apply(gcp.use_gcp_secret('user-gcp-sa'))
55 | 
56 | 
57 |   serve = serve_op(
58 |     model_path=train.outputs['train_output_path'],
59 |     model_name='bikesw'
60 |     ).apply(gcp.use_gcp_secret('user-gcp-sa'))
61 | 
62 |   train.set_gpu_limit(1)
63 | 
64 | if __name__ == '__main__':
65 |   import kfp.compiler as compiler
66 |   compiler.Compiler().compile(bikes_weather, __file__ + '.tar.gz')
67 | 


--------------------------------------------------------------------------------
/ml/notebook_examples/functions/main.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import datetime
 3 | import logging
 4 | import time
 5 |  
 6 | import kfp
 7 | import kfp.compiler as compiler
 8 | import kfp.dsl as dsl
 9 |  
10 | import requests
11 |  
12 | # TODO: replace yours
13 | # HOST = 'https://<yours>.pipelines.googleusercontent.com'
14 | HOST = 'https://7c7f7f3e3d11e1d4-dot-us-central2.pipelines.googleusercontent.com'
15 |  
16 | @dsl.pipeline(
17 |     name='Sequential',
18 |     description='A pipeline with two sequential steps.'
19 | )
20 | def sequential_pipeline(filename='gs://ml-pipeline-playground/shakespeare1.txt'):
21 |   """A pipeline with two sequential steps."""
22 |   op1 = dsl.ContainerOp(
23 |       name='filechange',
24 |       image='library/bash:4.4.23',
25 |       command=['sh', '-c'],
26 |       arguments=['echo "%s" > /tmp/results.txt' % filename],
27 |       file_outputs={'newfile': '/tmp/results.txt'})
28 |   op2 = dsl.ContainerOp(
29 |       name='echo',
30 |       image='library/bash:4.4.23',
31 |       command=['sh', '-c'],
32 |       arguments=['echo "%s"' % op1.outputs['newfile']]
33 |       )
34 |  
35 | def get_access_token():
36 |   url = 'http://metadata.google.internal/computeMetadata/v1/instance/service-accounts/default/token'
37 |   r = requests.get(url, headers={'Metadata-Flavor': 'Google'})
38 |   r.raise_for_status()
39 |   access_token = r.json()['access_token']
40 |   return access_token
41 |  
42 | def hosted_kfp_test(data, context):
43 |   logging.info('Event ID: {}'.format(context.event_id))
44 |   logging.info('Event type: {}'.format(context.event_type))
45 |   logging.info('Data: {}'.format(data))
46 |   logging.info('Bucket: {}'.format(data['bucket']))
47 |   logging.info('File: {}'.format(data['name']))
48 |   file_uri = 'gs://%s/%s' % (data['bucket'], data['name'])
49 |   logging.info('Using file uri: %s', file_uri)
50 |   
51 |   logging.info('Metageneration: {}'.format(data['metageneration']))
52 |   logging.info('Created: {}'.format(data['timeCreated']))
53 |   logging.info('Updated: {}'.format(data['updated']))
54 |   
55 |   token = get_access_token() 
56 |   logging.info('attempting to launch pipeline run.')
57 |   ts = int(datetime.datetime.utcnow().timestamp() * 100000)
58 |   client = kfp.Client(host=HOST, existing_token=token)
59 |   compiler.Compiler().compile(sequential_pipeline, '/tmp/sequential.tar.gz')
60 |   exp = client.create_experiment(name='gcstriggered')  # this is a 'get or create' op
61 |   res = client.run_pipeline(exp.id, 'sequential_' + str(ts), '/tmp/sequential.tar.gz',
62 |                               params={'filename': file_uri})
63 |   logging.info(res)
64 | 


--------------------------------------------------------------------------------
/ml/notebook_examples/functions/requirements.txt:
--------------------------------------------------------------------------------
1 | kfp
2 | 


--------------------------------------------------------------------------------
/ml/notebook_examples/keras_linear_regressor.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "Copyright 2017 Google Inc. All Rights Reserved.\n",
  8 |     "Licensed under the Apache License, Version 2.0 (the \"License\"); you may not use this file except in compliance with the License. You may obtain a copy of the License at\n",
  9 |     "\n",
 10 |     "http://www.apache.org/licenses/LICENSE-2.0\n",
 11 |     "\n",
 12 |     "Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License."
 13 |    ]
 14 |   },
 15 |   {
 16 |    "cell_type": "code",
 17 |    "execution_count": null,
 18 |    "metadata": {
 19 |     "collapsed": true
 20 |    },
 21 |    "outputs": [],
 22 |    "source": [
 23 |     "import tensorflow as tf\n",
 24 |     "import keras\n",
 25 |     "from sklearn.preprocessing import StandardScaler"
 26 |    ]
 27 |   },
 28 |   {
 29 |    "cell_type": "code",
 30 |    "execution_count": null,
 31 |    "metadata": {
 32 |     "collapsed": true
 33 |    },
 34 |    "outputs": [],
 35 |    "source": [
 36 |     "import numpy as np\n",
 37 |     "\n",
 38 |     "X_train = np.linspace(0, 80, 100).reshape(-1, 1)\n",
 39 |     "# print(X_train)\n",
 40 |     "Y_train = 5 * X_train\n",
 41 |     "# print(Y_train)\n",
 42 |     "\n",
 43 |     "X_test = np.linspace(0, 80, 20).reshape(-1, 1)\n",
 44 |     "#print(X_test)\n",
 45 |     "Y_test = 5 * X_test\n",
 46 |     "#print(Y_test)\n"
 47 |    ]
 48 |   },
 49 |   {
 50 |    "cell_type": "code",
 51 |    "execution_count": null,
 52 |    "metadata": {
 53 |     "collapsed": true
 54 |    },
 55 |    "outputs": [],
 56 |    "source": [
 57 |     "sc = StandardScaler()\n",
 58 |     "x = sc.fit_transform(X_train)\n",
 59 |     "y = sc.fit_transform(Y_train)\n",
 60 |     "xt = sc.fit_transform(X_test)\n",
 61 |     "yt = sc.fit_transform(Y_test)"
 62 |    ]
 63 |   },
 64 |   {
 65 |    "cell_type": "code",
 66 |    "execution_count": null,
 67 |    "metadata": {},
 68 |    "outputs": [],
 69 |    "source": [
 70 |     "from keras.layers import Input, Dense\n",
 71 |     "from keras.models import Model\n",
 72 |     "\n",
 73 |     "inputs = Input(shape=(1,))\n",
 74 |     "preds = Dense(1,activation='linear')(inputs)\n",
 75 |     "\n",
 76 |     "model = Model(inputs=inputs,outputs=preds)\n",
 77 |     "sgd=keras.optimizers.SGD()\n",
 78 |     "model.compile(optimizer=sgd ,loss='mse',metrics=['mse'])\n",
 79 |     "\n",
 80 |     "model.fit(x,y, batch_size=1, epochs=30, shuffle=False)\n"
 81 |    ]
 82 |   },
 83 |   {
 84 |    "cell_type": "code",
 85 |    "execution_count": null,
 86 |    "metadata": {},
 87 |    "outputs": [],
 88 |    "source": [
 89 |     "score = model.evaluate(xt, yt, batch_size=16)\n",
 90 |     "print(\"\\nScore: %s\" % score)"
 91 |    ]
 92 |   },
 93 |   {
 94 |    "cell_type": "code",
 95 |    "execution_count": null,
 96 |    "metadata": {
 97 |     "collapsed": true
 98 |    },
 99 |    "outputs": [],
100 |    "source": []
101 |   }
102 |  ],
103 |  "metadata": {
104 | 
105 |  },
106 |  "nbformat": 4,
107 |  "nbformat_minor": 2
108 | }
109 | 


--------------------------------------------------------------------------------
/ml/vertex_pipelines/pytorch/cifar/Dockerfile:
--------------------------------------------------------------------------------
 1 | # Copyright 2021 Google Inc. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | FROM pytorch/pytorch:latest
16 | 
17 | COPY requirements.txt requirements.txt
18 | 
19 | RUN pip3 install -r requirements.txt
20 | 
21 | ADD pytorch-pipeline /workspace/pytorch-pipeline
22 | 
23 | ENV PYTHONPATH /workspace/pytorch-pipeline
24 | 
25 | WORKDIR /workspace/pytorch-pipeline
26 | 
27 | ENTRYPOINT /bin/bash
28 | 


--------------------------------------------------------------------------------
/ml/vertex_pipelines/pytorch/cifar/Dockerfile-gpu:
--------------------------------------------------------------------------------
 1 | # Copyright 2021 Google Inc. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | FROM pytorch/pytorch:1.8.1-cuda10.2-cudnn7-runtime
16 | 
17 | COPY requirements.txt requirements.txt
18 | 
19 | RUN pip3 install -U pip
20 | RUN pip3 install -r requirements.txt
21 | RUN pip3 install -U google-cloud-aiplatform[tensorboard]
22 | 
23 | ADD pytorch-pipeline /workspace/pytorch-pipeline
24 | 
25 | ENV PYTHONPATH /workspace/pytorch-pipeline
26 | 
27 | WORKDIR /workspace/pytorch-pipeline
28 | 
29 | ENTRYPOINT /bin/bash
30 | 


--------------------------------------------------------------------------------
/ml/vertex_pipelines/pytorch/cifar/Dockerfile-gpu-ct:
--------------------------------------------------------------------------------
 1 | # Copyright 2021 Google Inc. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | FROM pytorch/pytorch:1.8.1-cuda10.2-cudnn7-runtime
16 | 
17 | COPY requirements.txt requirements.txt
18 | RUN pip3 install -U pip
19 | RUN pip3 install -r requirements.txt
20 | RUN pip3 install gcsfs
21 | RUN pip3 install google-cloud-storage
22 | RUN pip3 install -U google-cloud-aiplatform[tensorboard]
23 | 
24 | 
25 | RUN apt-get update -y
26 | RUN apt-get install --no-install-recommends -y -q ca-certificates wget unzip
27 | 
28 | 
29 | RUN wget -nv https://dl.google.com/dl/cloudsdk/release/google-cloud-sdk.zip && \
30 |     unzip -qq google-cloud-sdk.zip -d tools && \
31 |     rm google-cloud-sdk.zip && \
32 |     tools/google-cloud-sdk/install.sh --usage-reporting=false \
33 |         --path-update=false --bash-completion=false \
34 |         --disable-installation-options && \
35 |     tools/google-cloud-sdk/bin/gcloud -q components update \
36 |         gcloud core gsutil && \
37 |     tools/google-cloud-sdk/bin/gcloud -q components install kubectl && \
38 |     tools/google-cloud-sdk/bin/gcloud config set component_manager/disable_update_check true
39 | 
40 | 
41 | ENV PATH $PATH:/tools/google-cloud-sdk/bin
42 | 
43 | ADD pytorch-pipeline /workspace/pytorch-pipeline
44 | 
45 | ENV PYTHONPATH /workspace/pytorch-pipeline
46 | 
47 | WORKDIR /workspace/pytorch-pipeline
48 | 
49 | #ENTRYPOINT /bin/bash
50 | ENTRYPOINT ["python", "/workspace/pytorch-pipeline/training_task.py"]
51 | 
52 | 
53 | 


--------------------------------------------------------------------------------
/ml/vertex_pipelines/pytorch/cifar/input.json:
--------------------------------------------------------------------------------
1 | {
2 |   "instances": [
3 |     {
4 |       "data":
5 |         "iVBORw0KGgoAAAANSUhEUgAAACAAAAAgCAIAAAD8GO2jAAAAB3RJTUUH2AoYFB0lQuPgxAAAB/ZJREFUSImVVlmPplURruWcd+ttpnt6mZ4JdsMwC0OUxAXcgka58Xf4O/wnmsi1N8QQ3EJMBA1ookNUZJh9pqe3r7/9Pe97tiovPiBR4cK6q0rqqarnpE49+OOf/FwksMHClsZaa5kIAJTIWGuZGQFSTEFFM+Q+uaRdiDknEWXIIfg+SMqQBVUg5jRzLgQfg/cp9yLG2FIyISmSISJEYkYkRCBEVFUFIGNQNQMIE+REJABQlmbzwpL33cNHJ4pGowJjBiBiYkNGETJmIcPEbJgsISEiAgACExHRwlNVAFEARRJiQCRCRMw5A8jm5vmqssxEzAAAAERIhITEzMxskBIDEBljmBmJEUAVgBAXCUxIGgu2WYQIiYmVRCTnfHJyOhgAALFhEUkxSc6AiIilIYk5oVJVs7HABlQFEYmYiEABERdDWKbVxpJ6wkwozMS8IBNVbOdUhRa9ICIiMrNlXl9tGks1g9n/0vrpYOpcIlOJKjIQMSEQLQbAqjAltdK3ZXU+MomKKBITKaOQNRxSJlJmQEOsZASt4YqgKcuMbK7vra4tmydH45AxCUkWUGUmIhBkjdlSpjSvDAoxMCcJiYQJgVlQVNUYQ4pIWSHX1rDQfDQ3wlVZz5M3qzWWl9bWVuuDo9F0FoCKJJSTEBKyBYAkXvsuBi4ay4qsZJTAEoAAKBtSAQaqyWyfr1fr8vxK9frPft+snt945gY6MAVl1bSzbrfPX37w4GTURuG673OM6lXJUFnX81HyEY0qEBVFSczeZwRRyIAIxKrAhNYYYo25G89O1y5eqBuyUzVLdanOqfaVtS9cuXg4mI3mCZar0cSNughEOefJ1K1s7gmRiIgqIhZFwaRIklJSwRhzH2Xug+Rec3fzle89/9z+vI32tDW9DylKWVYoqSnM/qX1TReHoxkKdCG2vdqlZm//SquVA4OaQHSxHIhYFLYoTIxCRBpyCJLEZK2rld2Ts64pytKw6UMKQTULlIrgqgovLNslU6+s2LYL47NxJIOIrevs2nIGAM0xZkJUQkVEQFVAJMNcWfAee0/ex3nXr2yUdVkZEMxJCHJOkJTUiK0z1WCq5vLOhpul5MO4cz4BFHMiY4mwYATN3mfDEZCQRCWFLvSTFHJjq42V5cOn0+ngxJI1IIAgAImo8H3u3PTC5jKxqOjhwcFkNN7evFg0ZZNy2/ddG9EUzWpz+fK21XT38fHMS++70cnw+PBgcHbYzmYXN7dv3rh+94P3bn/08OpL3zQiwsyICIrD0eTDf97Z3t7cvbSTxLz7h/cOjoY3rl27eGm7rJraSm2Ladu58TyuF+dWmjA8Onp8PByM3NTNu861MzedfXwy+OjWB/OzietS1zkDACklY0wWIq5SxsdPzgaDdmd3d2NjS6kaj4cpzJLIdOae2XtOtRhOxqcHT6Sb9167oKHtU4gpqo8UEgefummLfdrZ2d2/8pwJMagKEc3m/Z//covZeJ/LAre2Ni+N3Zf2rxCo72YPHj46OX7gulTZ8z6lGB0lr2gzFwm4S+CShkw+Ux91aXWdiu75F29u7W6TZFEFY8zt2w/+dusftjAh+aUVu31xbWW1RiIiOxhMh2PXLG/0XkeTcdvNfewAxRpEiSAxS4h+nrtRdkPpxqzh4rN7ezevU2lNVZUpJUScTV1RVM1SNRpPs+Sub+ft7J13/mqp8j75GPso1tqyymhy3dQ76+cub13647vvPzm8P5q1s9ksdC52wYLO/Wj/tR9u7O60zpuQOwLWLGvLSzeuP39ha+vw2B0cpV//5oOP7zxy0wTaLjVLq01p+zanfr0xIiG2848P7r9/9vbdu/fmcyeqokjcFNb64C7t7V598RpXVkQMQOn79ODO/ZmbXHvhquty26XRcNp3XUrBGj23tqKpf/T44Wg0dO2MIU5GZ851WbKqLFaaABAQJIKxVJWvfP/Vje2N+byvSjauk9Tnwdnkws65BHrv/vF4Mu19Gzo/ODsanB02je1no8OnR94nBDZMIspcVFUhEvvgJAsAEChI70J+/stf+fp3vqsFVDUxkZnOpr7rysasrC23nR48Ob59+1+D4yfT4aR1o5TblDxnZCoKu9Q0K0mi9z7nHGMWiSr6ySlGUElVU/7gR69d2N0ejWdVAYxoQpLxZOY7V1fnfvfbt3/15lvDwZFmDyLEwkZqWwBVkiEESXGW1AMqAqYcATKA4CfXG5PItevPfvvVlyPlqkDIgiLmwb1x3zoCfeuXf3rjF2+enT5c6AuwAIAgmDMoCCAoJgFBVABFRABUXfysgAgCCMa+/K1vrK8vHY9aa1Uyioh5+GgwHg5Oj4+nQzcbtQUjoaiAqgAgAgEiYFYQAEFUVAIFgEWZT9ARMaW0vr310le/lsUTRAJEECYwhydPh2fj4XDST52IEJBgVgJQBABAAFDUTx3FRegz+1TcgEi4evXq3v61KF3JJAqkwKqEhOPJJOVcVLi0YgEEAPU/UT5Dg8+PLyQLXb3yQl2ek0CYrWZCZQQ2IYSUovdhOnwqbsiYPwXSL8D6XFM09Yf3Tn/6+hsuj0kKUsqSVdT43q+vb8SYUz9uuzMQ0IXswv9udvGk/xtfECVY3rpz9Pen3uc5JVskSTHmLMYWtm07a4uyWPLcSE4A6f9sH1QUGXKIgt77vgAMSVJKKSXjWuecMyaDGoQSlAEzgH4x3Z9XQJUlQHvq53OVXmk5GSuaAeXfrDxOutzfcVMAAAAASUVORK5CYII="
6 |     }
7 |   ]
8 | }


--------------------------------------------------------------------------------
/ml/vertex_pipelines/pytorch/cifar/pytorch-pipeline/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | pip-wheel-metadata/
 24 | share/python-wheels/
 25 | *.egg-info/
 26 | .installed.cfg
 27 | *.egg
 28 | MANIFEST
 29 | 
 30 | # PyInstaller
 31 | #  Usually these files are written by a python script from a template
 32 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 33 | *.manifest
 34 | *.spec
 35 | 
 36 | # Installer logs
 37 | pip-log.txt
 38 | pip-delete-this-directory.txt
 39 | 
 40 | # Unit test / coverage reports
 41 | htmlcov/
 42 | .tox/
 43 | .nox/
 44 | .coverage
 45 | .coverage.*
 46 | .cache
 47 | nosetests.xml
 48 | coverage.xml
 49 | *.cover
 50 | *.py,cover
 51 | .hypothesis/
 52 | .pytest_cache/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | target/
 76 | 
 77 | # Jupyter Notebook
 78 | .ipynb_checkpoints
 79 | 
 80 | # IPython
 81 | profile_default/
 82 | ipython_config.py
 83 | 
 84 | # pyenv
 85 | .python-version
 86 | 
 87 | # pipenv
 88 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 89 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 90 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 91 | #   install all needed dependencies.
 92 | #Pipfile.lock
 93 | 
 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 95 | __pypackages__/
 96 | 
 97 | # Celery stuff
 98 | celerybeat-schedule
 99 | celerybeat.pid
100 | 
101 | # SageMath parsed files
102 | *.sage.py
103 | 
104 | # Environments
105 | .env
106 | .venv
107 | env/
108 | venv/
109 | ENV/
110 | env.bak/
111 | venv.bak/
112 | 
113 | # Spyder project settings
114 | .spyderproject
115 | .spyproject
116 | 
117 | # Rope project settings
118 | .ropeproject
119 | 
120 | # mkdocs documentation
121 | /site
122 | 
123 | # mypy
124 | .mypy_cache/
125 | .dmypy.json
126 | dmypy.json
127 | 
128 | # Pyre type checker
129 | .pyre/
130 | 


--------------------------------------------------------------------------------
/ml/vertex_pipelines/pytorch/cifar/pytorch-pipeline/README.md:
--------------------------------------------------------------------------------
1 | # pytorch-pipeline


--------------------------------------------------------------------------------
/ml/vertex_pipelines/pytorch/cifar/pytorch-pipeline/process_test.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2021 Google Inc. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import subprocess
16 | import logging
17 | from pathlib import Path
18 | 
19 | import torchvision
20 | import webdataset as wds
21 | from sklearn.model_selection import train_test_split
22 | 
23 | logging.getLogger().setLevel(logging.INFO)
24 | # logging.info("Dataset path is: %s", cifar_dataset.path)
25 | output_pth = "output/processing"
26 | 
27 | Path(output_pth).mkdir(parents=True, exist_ok=True)
28 | 
29 | trainset = torchvision.datasets.CIFAR10(
30 |     root="./", train=True, download=True
31 | )
32 | testset = torchvision.datasets.CIFAR10(
33 |     root="./", train=False, download=True
34 | )
35 | 
36 | Path(output_pth + "/train").mkdir(parents=True, exist_ok=True)
37 | Path(output_pth + "/val").mkdir(parents=True, exist_ok=True)
38 | Path(output_pth + "/test").mkdir(parents=True, exist_ok=True)
39 | 
40 | random_seed = 25
41 | y = trainset.targets
42 | trainset, valset, y_train, y_val = train_test_split(
43 |     trainset,
44 |     y,
45 |     stratify=y,
46 |     shuffle=True,
47 |     test_size=0.2,
48 |     random_state=random_seed,
49 | )
50 | 
51 | for name in [(trainset, "train"), (valset, "val"), (testset, "test")]:
52 |     with wds.ShardWriter(
53 |         output_pth + "/" + str(name[1]) + "/" + str(name[1]) + "-%d.tar",
54 |         maxcount=1000,
55 |     ) as sink:
56 |         for index, (image, cls) in enumerate(name[0]):
57 |             sink.write(
58 |                 {"__key__": "%06d" % index, "ppm": image, "cls": cls}
59 |             )
60 | 
61 | entry_point = ["ls", "-R", output_pth]
62 | run_code = subprocess.run(entry_point, stdout=subprocess.PIPE)
63 | print(run_code.stdout)
64 | 


--------------------------------------------------------------------------------
/ml/vertex_pipelines/pytorch/cifar/pytorch-pipeline/pytorch_pipeline/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM pytorch/pytorch:latest
 2 | 
 3 | COPY requirements.txt requirements.txt
 4 | 
 5 | RUN apt-get update
 6 | 
 7 | RUN apt-get install -y git
 8 | 
 9 | RUN git clone -b trainer-code-revamp https://github.com/jagadeeshi2i/pytorch-pipeline
10 | 
11 | # RUN git clone -b jagadeeshi2i-patch-7 https://github.com/jagadeeshi2i/pytorch-pipeline
12 | 
13 | RUN pip3 install -r requirements.txt
14 | 
15 | ENV PYTHONPATH /workspace/pytorch-pipeline
16 | 
17 | WORKDIR /workspace/pytorch-pipeline
18 | 
19 | ENTRYPOINT /bin/bash


--------------------------------------------------------------------------------
/ml/vertex_pipelines/pytorch/cifar/pytorch-pipeline/pytorch_pipeline/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/amygdala/code-snippets/9fa00e5a4ec4ae3eb5a2fa212d1ecefb4009eba7/ml/vertex_pipelines/pytorch/cifar/pytorch-pipeline/pytorch_pipeline/__init__.py


--------------------------------------------------------------------------------
/ml/vertex_pipelines/pytorch/cifar/pytorch-pipeline/pytorch_pipeline/components/base/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/amygdala/code-snippets/9fa00e5a4ec4ae3eb5a2fa212d1ecefb4009eba7/ml/vertex_pipelines/pytorch/cifar/pytorch-pipeline/pytorch_pipeline/components/base/__init__.py


--------------------------------------------------------------------------------
/ml/vertex_pipelines/pytorch/cifar/pytorch-pipeline/pytorch_pipeline/components/base/base_component.py:
--------------------------------------------------------------------------------
 1 | import abc
 2 | from six import with_metaclass
 3 | 
 4 | class BaseComponent(with_metaclass(abc.ABCMeta, object)):
 5 |     def __init__(self):
 6 |         pass
 7 | 
 8 |     @classmethod
 9 |     def _validate_component_class(cls):
10 |         # TODO: Spec validation to be done here
11 |         pass


--------------------------------------------------------------------------------
/ml/vertex_pipelines/pytorch/cifar/pytorch-pipeline/pytorch_pipeline/components/base/base_executor.py:
--------------------------------------------------------------------------------
 1 | import abc
 2 | from six import with_metaclass
 3 | 
 4 | 
 5 | class BaseExecutor(with_metaclass(abc.ABCMeta, object)):
 6 | 
 7 |     def __init__(self):
 8 |         pass
 9 | 
10 |     @abc.abstractmethod
11 |     def Do(self, model_class, data_module_class=None, data_module_args=None, module_file_args=None):
12 |         pass


--------------------------------------------------------------------------------
/ml/vertex_pipelines/pytorch/cifar/pytorch-pipeline/pytorch_pipeline/components/trainer/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/amygdala/code-snippets/9fa00e5a4ec4ae3eb5a2fa212d1ecefb4009eba7/ml/vertex_pipelines/pytorch/cifar/pytorch-pipeline/pytorch_pipeline/components/trainer/__init__.py


--------------------------------------------------------------------------------
/ml/vertex_pipelines/pytorch/cifar/pytorch-pipeline/pytorch_pipeline/components/trainer/component.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2021 Google Inc. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import inspect
16 | import importlib
17 | from typing import Optional, Dict
18 | from pytorch_pipeline.components.base.base_component import BaseComponent
19 | from pytorch_pipeline.components.trainer.generic_executor import GenericExecutor
20 | from pytorch_pipeline.components.trainer.executor import Executor
21 | 
22 | class Trainer(BaseComponent):
23 |     def __init__(self,
24 |                  module_file: Optional = None,
25 |                  data_module_file: Optional = None,
26 |                  trainer_fn: Optional = None,
27 |                  run_fn: Optional = None,
28 |                  data_module_args: Optional[Dict] = None,
29 |                  module_file_args: Optional[Dict] = None,
30 |                  trainer_args: Optional[Dict] = None
31 |                  ):
32 |         super(BaseComponent, self).__init__()
33 |         if [bool(module_file), bool(run_fn), bool(trainer_fn)].count(True) != 1:
34 |           raise ValueError(
35 |               "Exactly one of 'module_file', 'trainer_fn', or 'run_fn' must be "
36 |               "supplied.")
37 | 
38 |         if module_file and data_module_file:
39 |             # Both module file and data module file are present
40 | 
41 |             model_class = None
42 |             data_module_class = None
43 | 
44 |             class_module = importlib.import_module(module_file.split(".")[0])
45 |             data_module = importlib.import_module(data_module_file.split(".")[0])
46 | 
47 |             for cls in inspect.getmembers(class_module, lambda member: inspect.isclass(
48 |                     member) and member.__module__ == class_module.__name__):
49 |                 model_class = cls[1]
50 | 
51 |             for cls in inspect.getmembers(data_module, lambda member: inspect.isclass(
52 |                     member) and member.__module__ == data_module.__name__):
53 |                 data_module_class = cls[1]
54 | 
55 |             print(model_class, data_module_class)
56 | 
57 |             Executor().Do(
58 |                 model_class=model_class,
59 |                 data_module_class=data_module_class,
60 |                 data_module_args=data_module_args,
61 |                 module_file_args=module_file_args,
62 |                 trainer_args=trainer_args
63 |             )
64 |         #
65 |         # elif run_fn:
66 |         #     GenericExecutor().Do()
67 |         # elif trainer_fn:
68 |         #     Executor().Do()
69 | 
70 | 
71 | 
72 | 
73 | 
74 | 
75 | 
76 | 
77 | 
78 | 
79 | 
80 | 


--------------------------------------------------------------------------------
/ml/vertex_pipelines/pytorch/cifar/pytorch-pipeline/pytorch_pipeline/components/trainer/executor.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2021 Google Inc. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import pytorch_lightning as pl
16 | import torch
17 | import os
18 | from pytorch_pipeline.components.trainer.generic_executor import GenericExecutor
19 | 
20 | 
21 | class Executor(GenericExecutor):
22 |     def __init__(self):
23 |         super(GenericExecutor, self).__init__()
24 | 
25 |     def Do(
26 |         self,
27 |         model_class,
28 |         data_module_class=None,
29 |         data_module_args=None,
30 |         module_file_args=None,
31 |         trainer_args=None,
32 |     ):
33 | 
34 |         if data_module_class:
35 |             dm = data_module_class(**data_module_args if data_module_args else {})
36 |             dm.prepare_data()
37 |             dm.setup(stage="fit")
38 | 
39 |             parser = module_file_args
40 |             args = vars(parser.parse_args())
41 |             model = model_class(**args if args else {})
42 | 
43 |             trainer = pl.Trainer.from_argparse_args(parser, **trainer_args)
44 | 
45 |             trainer.fit(model, dm)
46 |             trainer.test()
47 | 
48 |             if "checkpoint_dir" in args:
49 |                 model_save_path = args["checkpoint_dir"]
50 |             else:
51 |                 model_save_path = "/tmp"
52 | 
53 |             if "model_name" in args:
54 |                 model_name = args["model_name"]
55 |             else:
56 |                 model_name = "model_state_dict.pth"
57 | 
58 |             model_save_path = os.path.join(model_save_path, model_name)
59 |             if trainer.global_rank == 0:
60 |                 print("Saving model to {}".format(model_save_path))
61 |                 torch.save(model.state_dict(), model_save_path)
62 | 


--------------------------------------------------------------------------------
/ml/vertex_pipelines/pytorch/cifar/pytorch-pipeline/pytorch_pipeline/components/trainer/generic_executor.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2021 Google Inc. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from pytorch_pipeline.components.base.base_executor import BaseExecutor
16 | 
17 | class GenericExecutor(BaseExecutor):
18 | 
19 |     def Do(self, model_class, data_module_class=None, data_module_args=None, module_file_args=None):
20 |         # TODO: Code to train pretrained model
21 |         pass
22 | 
23 |     def  _GetFnArgs(self):
24 |         pass
25 | 
26 | 


--------------------------------------------------------------------------------
/ml/vertex_pipelines/pytorch/cifar/pytorch-pipeline/pytorch_pipeline/examples/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/amygdala/code-snippets/9fa00e5a4ec4ae3eb5a2fa212d1ecefb4009eba7/ml/vertex_pipelines/pytorch/cifar/pytorch-pipeline/pytorch_pipeline/examples/__init__.py


--------------------------------------------------------------------------------
/ml/vertex_pipelines/pytorch/cifar/pytorch-pipeline/pytorch_pipeline/examples/cifar10/cifar10_pre_process.py:
--------------------------------------------------------------------------------
 1 | import subprocess
 2 | from pathlib import Path
 3 | 
 4 | import torchvision
 5 | import webdataset as wds
 6 | from sklearn.model_selection import train_test_split
 7 | from argparse import ArgumentParser
 8 | 
 9 | if __name__ == "__main__":
10 |     parser = ArgumentParser()
11 |     parser.add_argument(
12 |         "--output_data",
13 |         type=str
14 |     )
15 | 
16 |     args = vars(parser.parse_args())
17 |     output_path = args["output_data"]
18 | 
19 |     Path(output_path).mkdir(parents=True, exist_ok=True)
20 | 
21 |     trainset = torchvision.datasets.CIFAR10(root="./", train=True, download=True)
22 |     testset = torchvision.datasets.CIFAR10(root="./", train=False, download=True)
23 | 
24 |     Path(output_path + "/train").mkdir(parents=True, exist_ok=True)
25 |     Path(output_path + "/val").mkdir(parents=True, exist_ok=True)
26 |     Path(output_path + "/test").mkdir(parents=True, exist_ok=True)
27 | 
28 |     random_seed = 25
29 |     y = trainset.targets
30 |     trainset, valset, y_train, y_val = train_test_split(
31 |         trainset, y, stratify=y, shuffle=True, test_size=0.2, random_state=random_seed
32 |     )
33 | 
34 |     for name in [(trainset, "train"), (valset, "val"), (testset, "test")]:
35 |         with wds.ShardWriter(
36 |             output_path + "/" + str(name[1]) + "/" + str(name[1]) + "-%d.tar", maxcount=1000
37 |         ) as sink:
38 |             for index, (image, cls) in enumerate(name[0]):
39 |                 sink.write({"__key__": "%06d" % index, "ppm": image, "cls": cls})
40 | 
41 |     entry_point = ["ls", "-R", output_path]
42 |     run_code = subprocess.run(entry_point, stdout=subprocess.PIPE)
43 |     print(run_code.stdout)
44 | 


--------------------------------------------------------------------------------
/ml/vertex_pipelines/pytorch/cifar/pytorch-pipeline/pytorch_pipeline/examples/cifar10/cifar10_pytorch.py:
--------------------------------------------------------------------------------
  1 | import pytorch_lightning as pl
  2 | import os
  3 | from pytorch_pipeline.components.trainer.component import Trainer
  4 | from pytorch_pipeline.components.mar.mar_generation import MarGeneration
  5 | from argparse import ArgumentParser
  6 | from pytorch_lightning.loggers import TensorBoardLogger
  7 | from pytorch_lightning.callbacks import (
  8 |     EarlyStopping,
  9 |     LearningRateMonitor,
 10 |     ModelCheckpoint,
 11 | )
 12 | 
 13 | 
 14 | # Argument parser for user defined paths
 15 | parser = ArgumentParser()
 16 | 
 17 | parser.add_argument(
 18 |     "--tensorboard_root",
 19 |     type=str,
 20 |     default="output/tensorboard",
 21 |     help="Tensorboard Root path (default: output/tensorboard)",
 22 | )
 23 | 
 24 | parser.add_argument(
 25 |     "--checkpoint_dir",
 26 |     type=str,
 27 |     default="output/train/models",
 28 |     help="Path to save model checkpoints (default: output/train/models)",
 29 | )
 30 | 
 31 | parser.add_argument(
 32 |     "--dataset_path",
 33 |     type=str,
 34 |     default="output/processing",
 35 |     help="Cifar10 Dataset path (default: output/processing)",
 36 | )
 37 | 
 38 | parser.add_argument(
 39 |     "--model_name",
 40 |     type=str,
 41 |     default="resnet.pth",
 42 |     help="Name of the model to be saved as (default: resnet.pth)",
 43 | )
 44 | 
 45 | parser.add_argument(
 46 |     "--minio_path",
 47 |     type=str,
 48 |     default="tensorboard/version_0",
 49 |     help="Path to upload files to minio (default: tensorboard/version_0)",
 50 | )
 51 | 
 52 | parser = pl.Trainer.add_argparse_args(parent_parser=parser)
 53 | 
 54 | args = vars(parser.parse_args())
 55 | 
 56 | 
 57 | # Enabling Tensorboard Logger, ModelCheckpoint, Earlystopping
 58 | 
 59 | lr_logger = LearningRateMonitor()
 60 | tboard = TensorBoardLogger(args["tensorboard_root"])
 61 | early_stopping = EarlyStopping(monitor="val_loss", mode="min", patience=5, verbose=True)
 62 | checkpoint_callback = ModelCheckpoint(
 63 |     dirpath=args["checkpoint_dir"],
 64 |     filename="cifar10_{epoch:02d}",
 65 |     save_top_k=1,
 66 |     verbose=True,
 67 |     monitor="val_loss",
 68 |     mode="min",
 69 | )
 70 | 
 71 | if not args["max_epochs"]:
 72 |     max_epochs = 1
 73 | else:
 74 |     max_epochs = args["max_epochs"]
 75 | 
 76 | 
 77 | # Setting the trainer specific arguments
 78 | trainer_args = {
 79 |     "logger": tboard,
 80 |     "checkpoint_callback": True,
 81 |     "max_epochs": max_epochs,
 82 |     "callbacks": [lr_logger, early_stopping, checkpoint_callback],
 83 | }
 84 | 
 85 | 
 86 | # Setting the datamodule specific arguments
 87 | data_module_args = {"train_glob": args["dataset_path"]}
 88 | 
 89 | 
 90 | # Initiating the training process
 91 | trainer = Trainer(
 92 |     module_file="cifar10_train.py",
 93 |     data_module_file="cifar10_datamodule.py",
 94 |     module_file_args=parser,
 95 |     data_module_args=data_module_args,
 96 |     trainer_args=trainer_args,
 97 | )
 98 | 
 99 | 
100 | # Mar file generation
101 | 
102 | mar_config = {
103 |     "MODEL_NAME": "cifar10_test",
104 |     "MODEL_FILE": "pytorch_pipeline/examples/cifar10/cifar10_train.py",
105 |     "HANDLER": "image_classifier",
106 |     "SERIALIZED_FILE": os.path.join(args["checkpoint_dir"], args["model_name"]),
107 |     "VERSION": "1",
108 |     "EXPORT_PATH": args["checkpoint_dir"],
109 |     "CONFIG_PROPERTIES": "https://kubeflow-dataset.s3.us-east-2.amazonaws.com/config.properties"
110 | }
111 | 
112 | 
113 | MarGeneration(mar_config=mar_config).generate_mar_file()
114 | 


--------------------------------------------------------------------------------
/ml/vertex_pipelines/pytorch/cifar/requirements.txt:
--------------------------------------------------------------------------------
 1 | boto3
 2 | pytorch_lightning
 3 | image
 4 | matplotlib
 5 | torch
 6 | pyarrow
 7 | sklearn
 8 | transformers
 9 | torchtext
10 | webdataset
11 | torchvision
12 | pandas
13 | numpy
14 | s3fs
15 | wget
16 | torch-model-archiver
17 | 
18 | 


--------------------------------------------------------------------------------
/ml/vertex_pipelines/pytorch/cifar/screenshots/pt-profiler.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/amygdala/code-snippets/9fa00e5a4ec4ae3eb5a2fa212d1ecefb4009eba7/ml/vertex_pipelines/pytorch/cifar/screenshots/pt-profiler.png


--------------------------------------------------------------------------------
/ml/vertex_pipelines/pytorch/cifar/screenshots/vertex-tensorboard.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/amygdala/code-snippets/9fa00e5a4ec4ae3eb5a2fa212d1ecefb4009eba7/ml/vertex_pipelines/pytorch/cifar/screenshots/vertex-tensorboard.png


--------------------------------------------------------------------------------