├── .dockerignore
├── parameters.yaml
├── requirements.txt
├── forecast_peython_wiki
    ├── preprocess
    │   ├── Dockerfile
    │   ├── requirements.txt
    │   └── main.py
    ├── train_forecast
    │   ├── Dockerfile
    │   ├── requirements.txt
    │   └── main.py
    ├── parameters.yaml
    └── deployment
    │   └── pipline.py
├── entrypoint.sh
├── Dockerfile
├── .github
    └── workflows
    │   ├── test-action-compile-deploy-run.yaml
    │   └── versioning_pipeline_action.yaml
├── action.yml
├── .gitignore
├── example_pipeline.py
├── README.md
├── main.py
└── client.py


/.dockerignore:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/parameters.yaml:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | kfp==0.2


--------------------------------------------------------------------------------
/forecast_peython_wiki/preprocess/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM python:3.7-stretch
2 | 
3 | COPY . . 
4 | 
5 | RUN pip install -r requirements.txt
6 | 


--------------------------------------------------------------------------------
/forecast_peython_wiki/train_forecast/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM python:3.7-stretch
2 | 
3 | COPY . . 
4 | 
5 | RUN pip install -r requirements.txt


--------------------------------------------------------------------------------
/forecast_peython_wiki/preprocess/requirements.txt:
--------------------------------------------------------------------------------
1 | click==7.0
2 | wget==3.2
3 | google-cloud-storage==1.25
4 | google==2.0.*
5 | pandas==1.0.*


--------------------------------------------------------------------------------
/entrypoint.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | echo "${INPUT_ENCODED_GOOGLE_APPLICATION_CREDENTIALS}" | base64 -d > ${INPUT_GOOGLE_APPLICATION_CREDENTIALS}
4 | python  /main.py


--------------------------------------------------------------------------------
/forecast_peython_wiki/train_forecast/requirements.txt:
--------------------------------------------------------------------------------
1 | click==7.0
2 | wget==3.2
3 | google-cloud-storage==1.25
4 | google==2.0.*
5 | pandas==1.0.*
6 | fbprophet==0.5
7 | holidays==0.9.12


--------------------------------------------------------------------------------
/forecast_peython_wiki/parameters.yaml:
--------------------------------------------------------------------------------
1 | gcp_bucket: 
2 |     github_action
3 | project: 
4 |     kubeflow-github-267119
5 | train_data:
6 |     train_data.csv 
7 | forecast_data: 
8 |     forecat_data.csv


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM python:3.7-stretch
 2 | 
 3 | LABEL "com.github.actions.name"="Submit Kubeflow Pipeline From GitHub"
 4 | LABEL "com.github.actions.icon"="upload-cloud"
 5 | LABEL "com.github.actions.color"="purple"
 6 | 
 7 | COPY . . 
 8 | 
 9 | RUN chmod +x /entrypoint.sh
10 | 
11 | RUN  pip install -r requirements.txt
12 | 
13 | ENTRYPOINT ["/entrypoint.sh"]
14 | 


--------------------------------------------------------------------------------
/.github/workflows/test-action-compile-deploy-run.yaml:
--------------------------------------------------------------------------------
 1 | name: Compile, Deploy and Run on Kubeflow
 2 | on: [push]
 3 | 
 4 | # Set environmental variables
 5 | 
 6 | jobs:
 7 |   build:
 8 |     runs-on: ubuntu-latest
 9 |     steps:
10 |     - name: checkout files in repo
11 |       uses: actions/checkout@master
12 | 
13 | 
14 |     - name: Submit Kubeflow pipeline
15 |       id: kubeflow
16 |       uses: NikeNano/kubeflow-github-action@master
17 |       with:
18 |         KUBEFLOW_URL: ${{ secrets.KUBEFLOW_URL }}
19 |         ENCODED_GOOGLE_APPLICATION_CREDENTIALS: ${{ secrets.GKE_KEY }}
20 |         GOOGLE_APPLICATION_CREDENTIALS: /tmp/gcloud-sa.json
21 |         CLIENT_ID: ${{ secrets.CLIENT_ID }}
22 |         PIPELINE_CODE_PATH: "example_pipeline.py"
23 |         PIPELINE_FUNCTION_NAME: "flipcoin_pipeline"
24 |         PIPELINE_PARAMETERS_PATH: "parameters.yaml"
25 |         EXPERIMENT_NAME: "Default"
26 |         RUN_PIPELINE: True
27 |         VERSION_GITHUB_SHA: False
28 | 


--------------------------------------------------------------------------------
/forecast_peython_wiki/preprocess/main.py:
--------------------------------------------------------------------------------
 1 | import click
 2 | import wget
 3 | import logging
 4 | 
 5 | from google.cloud import storage
 6 | 
 7 | 
 8 | def upload_blob(bucket_name: str, source_file_name: str, destination_blob_name: str):
 9 |     """Function to upload to gcp bucket
10 |     
11 |     Arguments:
12 |         bucket_name {str} -- The name of the bucket.
13 |         source_file_name {str} -- The name of the source file that should be uploaded.
14 |         destination_blob_name {str} -- The name of the file in the bucket. 
15 |     """
16 |     storage_client = storage.Client()
17 |     bucket = storage_client.bucket(bucket_name)
18 |     blob = bucket.blob(destination_blob_name)
19 |     blob.upload_from_filename(source_file_name)
20 |     logging.info(
21 |         "File {} uploaded to {}.".format(
22 |             source_file_name, destination_blob_name
23 |         )
24 |     )
25 | 
26 | 
27 | @click.command()
28 | @click.option("--url", default="https://raw.githubusercontent.com/facebook/prophet/master/examples/example_wp_log_peyton_manning.csv", 
29 | 	help="the file of interest", required=False)
30 | @click.option("--bucket", required=True, help="The name of the gcp bucket")
31 | @click.option("--destination_blob_name", default="raw_data.csv", help="The raw data filename", required=True)
32 | def main(url: str, bucket: str, destination_blob_name: str): 
33 |     filename = wget.download(url)   
34 |     upload_blob(bucket_name=bucket, source_file_name=filename, destination_blob_name=destination_blob_name)    
35 |     logging.info("File extracted and uploaded to bucket")
36 | 
37 | 
38 | if __name__ == "__main__":
39 |     main()
40 | 


--------------------------------------------------------------------------------
/action.yml:
--------------------------------------------------------------------------------
 1 | name: Manage Kubeflow Pipelines on GCP. 
 2 | description: Build, deploy and run a Kubeflow Pipeline on Google Cloud Platform.
 3 | author: Niklas Hansson
 4 | inputs:
 5 |   KUBEFLOW_URL:
 6 |     description: The endpoint where your Kubeflow UI is running. 
 7 |     required: true
 8 |   CLIENT_ID:
 9 |     description: The IAP client id, which was specified when the kubeflow deployment where setup using IAP.
10 |     require: true
11 |   PIPELINE_CODE_PATH:
12 |     description: The full path name including the filename of the python file that describes the pipeline you want to run on Kubeflow.  This should be relative to the root of the GitHub repository where the Action is triggered.
13 |     require: true
14 |   PIPELINE_FUNCTION_NAME:
15 |     description: The name of the pipeline, this name will be the name of the pipeline in the Kubeflow UI.
16 |     require: true
17 |   ENCODED_GOOGLE_APPLICATION_CREDENTIALS:
18 |     description: The base64 encoded google credentials
19 |     required: true
20 |   GOOGLE_APPLICATION_CREDENTIALS:
21 |     description: The path to the decoded google credentials
22 |     required: true
23 |   EXPERIMENT_NAME: 
24 |     description: The name of the experiment name within which the kubeflow experiment should run
25 |     required: false
26 |   PIPELINE_NAMESPACE: 
27 |     description: The namespace in which the pipeline should run
28 |     required: false
29 |   RUN_PIPELINE: 
30 |     description: Should github action also trigger the pipeline
31 |     required: false
32 |   VERSION_GITHUB_SHA: 
33 |     description: Should github action also trigger the pipeline
34 |     required: false
35 | outputs:
36 |   WORKFLOW_URL:
37 |     description: URL that is a link to pipeline in Kubeflow
38 | branding:
39 |   color: 'purple'
40 |   icon: 'upload-cloud'
41 | runs:
42 |   using: 'docker'
43 |   image: 'Dockerfile'
44 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | pip-wheel-metadata/
 24 | share/python-wheels/
 25 | *.egg-info/
 26 | .installed.cfg
 27 | *.egg
 28 | MANIFEST
 29 | 
 30 | # PyInstaller
 31 | #  Usually these files are written by a python script from a template
 32 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 33 | *.manifest
 34 | *.spec
 35 | 
 36 | # Installer logs
 37 | pip-log.txt
 38 | pip-delete-this-directory.txt
 39 | 
 40 | # Unit test / coverage reports
 41 | htmlcov/
 42 | .tox/
 43 | .nox/
 44 | .coverage
 45 | .coverage.*
 46 | .cache
 47 | nosetests.xml
 48 | coverage.xml
 49 | *.cover
 50 | *.py,cover
 51 | .hypothesis/
 52 | .pytest_cache/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | target/
 76 | 
 77 | # Jupyter Notebook
 78 | .ipynb_checkpoints
 79 | 
 80 | # IPython
 81 | profile_default/
 82 | ipython_config.py
 83 | 
 84 | # pyenv
 85 | .python-version
 86 | 
 87 | # pipenv
 88 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 89 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 90 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 91 | #   install all needed dependencies.
 92 | #Pipfile.lock
 93 | 
 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 95 | __pypackages__/
 96 | 
 97 | # Celery stuff
 98 | celerybeat-schedule
 99 | celerybeat.pid
100 | 
101 | # SageMath parsed files
102 | *.sage.py
103 | 
104 | # Environments
105 | .env
106 | .venv
107 | env/
108 | venv/
109 | ENV/
110 | env.bak/
111 | venv.bak/
112 | 
113 | # Spyder project settings
114 | .spyderproject
115 | .spyproject
116 | 
117 | # Rope project settings
118 | .ropeproject
119 | 
120 | # mkdocs documentation
121 | /site
122 | 
123 | # mypy
124 | .mypy_cache/
125 | .dmypy.json
126 | dmypy.json
127 | 
128 | # Pyre type checker
129 | .pyre/


--------------------------------------------------------------------------------
/.github/workflows/versioning_pipeline_action.yaml:
--------------------------------------------------------------------------------
 1 | name: Compile, Deploy and Run versioned pipeline on Kubeflow
 2 | on: [push]
 3 | 
 4 | # Set environmental variables
 5 | env:
 6 |   GKE_PROJECT: ${{ secrets.GKE_PROJECT }}
 7 |   
 8 | jobs:
 9 |   build:
10 |     runs-on: ubuntu-latest
11 |     steps:
12 |     - name: checkout files in repo
13 |       uses: actions/checkout@master
14 | 
15 |     - uses: GoogleCloudPlatform/github-actions/setup-gcloud@master
16 |       with:
17 |         version: '270.0.0'
18 |         service_account_email: ${{ secrets.GKE_EMAIL }}
19 |         service_account_key: ${{ secrets.GKE_KEY }}
20 | 
21 |     - run: |
22 |         gcloud auth configure-docker
23 |        
24 |     - name: Build preprocessing image
25 |       env: 
26 |         IMAGE_NAME: pre_image
27 |       run: |
28 |         docker build -t gcr.io/$GKE_PROJECT/$IMAGE_NAME:$GITHUB_SHA \
29 |           --build-arg GITHUB_SHA="$GITHUB_SHA" \
30 |           --build-arg GITHUB_REF="$GITHUB_REF" forecast_peython_wiki/preprocess/.
31 | 
32 |     - name: Publish preprocessing image
33 |       env: 
34 |         IMAGE_NAME: pre_image
35 |       run: |
36 |         echo gcr.io/$GKE_PROJECT/$IMAGE_NAME:$GITHUB_SHA
37 |         docker push gcr.io/$GKE_PROJECT/$IMAGE_NAME:$GITHUB_SHA
38 |   
39 | 
40 |     - name: Build train forecast image
41 |       env: 
42 |         IMAGE_NAME: train_forecast_image
43 |       run: |
44 |         docker build -t gcr.io/$GKE_PROJECT/$IMAGE_NAME:$GITHUB_SHA \
45 |           --build-arg GITHUB_SHA="$GITHUB_SHA" \
46 |           --build-arg GITHUB_REF="$GITHUB_REF" forecast_peython_wiki/train_forecast/.
47 |     
48 | 
49 |     - name: Publish train forecast image
50 |       env: 
51 |         IMAGE_NAME: train_forecast_image
52 |       run: |
53 |         echo gcr.io/$GKE_PROJECT/$IMAGE_NAME:$GITHUB_SHA
54 |         docker push gcr.io/$GKE_PROJECT/$IMAGE_NAME:$GITHUB_SHA
55 | 
56 |             
57 |     - name: Submit Kubeflow pipeline
58 |       id: kubeflow
59 |       uses: NikeNano/kubeflow-github-action@master
60 |       with:
61 |         KUBEFLOW_URL: ${{ secrets.KUBEFLOW_URL }}
62 |         ENCODED_GOOGLE_APPLICATION_CREDENTIALS: ${{ secrets.GKE_KEY }}
63 |         GOOGLE_APPLICATION_CREDENTIALS: /tmp/gcloud-sa.json
64 |         CLIENT_ID: ${{ secrets.CLIENT_ID }}
65 |         PIPELINE_CODE_PATH: "forecast_peython_wiki/deployment/pipline.py"
66 |         PIPELINE_FUNCTION_NAME: "pipeline"
67 |         PIPELINE_PARAMETERS_PATH: "forecast_peython_wiki/parameters.yaml"
68 |         EXPERIMENT_NAME: "Default"
69 |         RUN_PIPELINE: True
70 |         VERSION_GITHUB_SHA: True
71 | 


--------------------------------------------------------------------------------
/example_pipeline.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # Copyright 2019 Google LLC
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #      http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | import kfp
17 | from kfp import dsl
18 | 
19 | 
20 | def random_num_op(low, high):
21 |     """Generate a random number between low and high."""
22 |     return dsl.ContainerOp(
23 |         name='Generate random number',
24 |         image='python:alpine3.6',
25 |         command=['sh', '-c'],
26 |         arguments=['python -c "import random; print(random.randint($0, $1))" | tee $2', str(low), str(high), '/tmp/output'],
27 |         file_outputs={'output': '/tmp/output'}
28 |     )
29 | 
30 | 
31 | def flip_coin_op():
32 |     """Flip a coin and output heads or tails randomly."""
33 |     return dsl.ContainerOp(
34 |         name='Flip coin',
35 |         image='python:alpine3.6',
36 |         command=['sh', '-c'],
37 |         arguments=['python -c "import random; result = \'heads\' if random.randint(0,1) == 0 '
38 |                   'else \'tails\'; print(result)" | tee /tmp/output'],
39 |         file_outputs={'output': '/tmp/output'}
40 |     )
41 | 
42 | 
43 | def print_op(msg):
44 |     """Print a message."""
45 |     return dsl.ContainerOp(
46 |         name='Print',
47 |         image='alpine:3.6',
48 |         command=['echo', msg],
49 |     )
50 |     
51 | 
52 | @dsl.pipeline(
53 |     name='Conditional execution pipeline',
54 |     description='Shows how to use dsl.Condition().'
55 | )
56 | def flipcoin_pipeline():
57 |     flip = flip_coin_op()
58 |     with dsl.Condition(flip.output == 'heads'):
59 |         random_num_head = random_num_op(0, 9)
60 |         with dsl.Condition(random_num_head.output > 5):
61 |             print_op('heads and %s > 5!' % random_num_head.output)
62 |         with dsl.Condition(random_num_head.output <= 5):
63 |             print_op('heads and %s <= 5!' % random_num_head.output)
64 | 
65 |     with dsl.Condition(flip.output == 'tails'):
66 |         random_num_tail = random_num_op(10, 19)
67 |         with dsl.Condition(random_num_tail.output > 15):
68 |             print_op('tails and %s > 15!' % random_num_tail.output)
69 |         with dsl.Condition(random_num_tail.output <= 15):
70 |             print_op('tails and %s <= 15!' % random_num_tail.output)
71 | 


--------------------------------------------------------------------------------
/forecast_peython_wiki/deployment/pipline.py:
--------------------------------------------------------------------------------
 1 | import kfp
 2 | import datetime
 3 | import os
 4 | import click
 5 | import logging
 6 | import kfp
 7 | import kfp.dsl as dsl
 8 | import kfp.gcp as gcp
 9 | 
10 | def pipeline(github_sha :str):
11 |     """Returns the pipeline function with the github_sha used for the versioning of the containers and enviroment of the containers as well. 
12 | 
13 |     
14 |     Keyword Arguments:
15 |         env {str} -- The enviroment for which the pipeline is made for (default: {"develop"})
16 |         github_sha {str} --The github sha used for the versioning
17 |     """
18 |     @kfp.dsl.pipeline(
19 |         name="Example pipeline github action",
20 |         description="This pipeline show how you can version the pipeline components using the githash"
21 |     )
22 |     def timeseries_pipeline(gcp_bucket: str, project: str, train_data :str="train.csv", forecast_data: str="forecast.csv"):
23 |         """The kfp pipeline function. 
24 |         
25 |         Arguments:
26 |             gcp_bucket {str} -- The google bucket
27 |             project {str} -- The gcp project where the data should be stored
28 |         
29 |         Keyword Arguments:
30 |             train_data {str} -- The name of the train file that is uploaded to the bucket (default: {"train.csv"})
31 |             forecast_date {str} -- The name of the forecast file uploaded to the bucket (default: {"forecast.csv"})
32 |         """
33 |         pre_image = f"gcr.io/{project}/pre_image:{github_sha}"
34 |         train_forecast_image = f"gcr.io/{project}/train_forecast_image:{github_sha}"
35 |         operations = {}
36 |         operations['preprocess'] = dsl.ContainerOp(
37 |             name='Preprocess',
38 |             image=pre_image,
39 |             command=['python3'],
40 |             arguments=["main.py",
41 |                     "--url", "https://raw.githubusercontent.com/facebook/prophet/master/examples/example_wp_log_peyton_manning.csv",
42 |                     "--bucket", gcp_bucket,
43 |                     "--destination_blob_name", train_data
44 |             ]
45 |         ).set_image_pull_policy('Always')
46 | 
47 |         operations['train_forecast'] = dsl.ContainerOp(
48 |             name='Forecast',
49 |             image=train_forecast_image,
50 |             command=['python3'],
51 |             arguments=["main.py",
52 |                     "--bucket", gcp_bucket,
53 |                     "--source_blob_name", train_data,
54 |                     "--forecast_blob_name", forecast_data
55 |             ]
56 |         ).set_image_pull_policy('Always')
57 |         operations["train_forecast"].after(operations["preprocess"])
58 |         
59 | 
60 |         for _,operation in operations.items():
61 |             operation.apply(gcp.use_gcp_secret('user-gcp-sa'))
62 |             dsl.get_pipeline_conf()
63 | 
64 |         return operations
65 |             
66 |     return timeseries_pipeline


--------------------------------------------------------------------------------
/forecast_peython_wiki/train_forecast/main.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import tempfile
 3 | import click
 4 | import logging
 5 | import os 
 6 | 
 7 | from fbprophet import Prophet
 8 | from google.cloud import storage
 9 | 
10 | def download_blob(bucket_name: str, source_blob_name: str, destination_file_name: str):
11 |     """Function to download file from gcp bucet
12 |     
13 |     Arguments:
14 |         bucket_name {str} -- The name of the bucket from which the data should be downloaded.
15 |         source_blob_name {str} -- The name of the file in the bucket which should be downloaded.
16 |         destination_file_name {str} -- The local file pat for the downloaded file. 
17 |     """
18 | 
19 |     storage_client = storage.Client()
20 |     bucket = storage_client.bucket(bucket_name)
21 |     blob = bucket.blob(source_blob_name)
22 |     blob.download_to_filename(destination_file_name)
23 | 
24 |     logging.info(
25 |         "Blob {} downloaded to {}.".format(
26 |             source_blob_name, destination_file_name
27 |         )
28 |     )
29 | 
30 | 
31 | def upload_blob(bucket_name: str, source_file_name: str, destination_blob_name: str):
32 |     """Function to upload file to bucket. 
33 |     
34 |     Arguments:
35 |         bucket_name {str} -- The name of the bucket on gcp.
36 |         source_file_name {str} -- The filepath to the file that should be uploaded.
37 |         destination_blob_name {str} -- The name of the file in the bucket.
38 |     """
39 |     storage_client = storage.Client()
40 |     bucket = storage_client.bucket(bucket_name)
41 |     blob = bucket.blob(destination_blob_name)
42 |     blob.upload_from_filename(source_file_name)
43 |     logging.info(
44 |         "File {} uploaded to {}.".format(
45 |             source_file_name, destination_blob_name
46 |         )
47 |     )
48 | 
49 | 
50 | @click.command()
51 | @click.option("--bucket", required=True, help="The name of the gcp bucket")
52 | @click.option("--source_blob_name", default="raw_data.csv", help="The raw file to download", required=True)
53 | @click.option("--forecast_blob_name", default="raw_data.csv", help="The forecast to upload", required=True)
54 | def main(bucket: str, source_blob_name :str, forecast_blob_name:str):
55 |     with tempfile.TemporaryDirectory() as tmpdirname:
56 |         local_file = os.path.join(tmpdirname,"tmp.csv") 
57 |         download_blob(bucket_name=bucket, source_blob_name=source_blob_name, destination_file_name=local_file)
58 |         df = pd.read_csv(local_file)
59 |     # Train the model
60 |     m = Prophet()
61 |     logging.info("Starting training of the prophet model")
62 |     m.fit(df)
63 |     logging.info("The Propeht model is trained")
64 |     future = m.make_future_dataframe(periods=365)
65 |     forecast = m.predict(future)
66 |     with tempfile.TemporaryDirectory() as tmpdirname:
67 |         forecast_file = os.path.join(tmpdirname, "forecast.csv")
68 |         forecast.to_csv(forecast_file)
69 |         upload_blob(bucket_name=bucket, source_file_name=forecast_file, destination_blob_name=forecast_blob_name)
70 |     logging.info("The model training is done and forecasting is done")
71 | 
72 | 
73 | if __name__ == "__main__":
74 |     main()
75 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # This action Submits Kubeflow Pipelines to Kubeflow cluster running on Google Cloud Platform. 
  2 | 
  3 | The purpose of this action is to allow for automated deployments of [Kubeflow Pipelines](https://github.com/kubeflow/pipelines) on Google Cloud Platform (GCP). The action will collect the pipeline from a python file and compile it before uploading it to Kubeflow. The Kubeflow deployment must be using [IAP](https://www.kubeflow.org/docs/gke/deploy/monitor-iap-setup/) on GCP to work.
  4 | 
  5 | # Usage
  6 | 
  7 | ## Example Workflow that uses this action 
  8 | 
  9 | 
 10 | To compile a pipeline and upload it to kubeflow: 
 11 | 
 12 | ```yaml
 13 | name: Compile and Deploy Kubeflow pipeline
 14 | on: [push]
 15 | 
 16 | # Set environmental variables
 17 | 
 18 | jobs:
 19 |   build:
 20 |     runs-on: ubuntu-18.04
 21 |     steps:
 22 |     - name: checkout files in repo
 23 |       uses: actions/checkout@master
 24 | 
 25 | 
 26 |     - name: Submit Kubeflow pipeline
 27 |       id: kubeflow
 28 |       uses: NikeNano/kubeflow-github-action@master
 29 |       with:
 30 |         KUBEFLOW_URL: ${{ secrets.KUBEFLOW_URL }}
 31 |         ENCODED_GOOGLE_APPLICATION_CREDENTIALS: ${{ secrets.GKE_KEY }}
 32 |         GOOGLE_APPLICATION_CREDENTIALS: /tmp/gcloud-sa.json
 33 |         CLIENT_ID: ${{ secrets.CLIENT_ID }}
 34 |         PIPELINE_CODE_PATH: "example_pipeline.py"
 35 |         PIPELINE_FUNCTION_NAME: "flipcoin_pipeline"
 36 |         PIPELINE_PARAMETERS_PATH: "parameters.yaml"
 37 |         EXPERIMENT_NAME: "Default"
 38 |         RUN_PIPELINE: False
 39 |         VERSION_GITHUB_SHA: False
 40 | 
 41 | ```
 42 | 
 43 | If you also would like to run it use the following: 
 44 | 
 45 | ```yaml
 46 | name: Compile, Deploy and Run on Kubeflow
 47 | on: [push]
 48 | 
 49 | # Set environmental variables
 50 | 
 51 | jobs:
 52 |   build:
 53 |     runs-on: ubuntu-18.04
 54 |     steps:
 55 |     - name: checkout files in repo
 56 |       uses: actions/checkout@master
 57 | 
 58 | 
 59 |     - name: Submit Kubeflow pipeline
 60 |       id: kubeflow
 61 |       uses: NikeNano/kubeflow-github-action@master
 62 |       with:
 63 |         KUBEFLOW_URL: ${{ secrets.KUBEFLOW_URL }}
 64 |         ENCODED_GOOGLE_APPLICATION_CREDENTIALS: ${{ secrets.GKE_KEY }}
 65 |         GOOGLE_APPLICATION_CREDENTIALS: /tmp/gcloud-sa.json
 66 |         CLIENT_ID: ${{ secrets.CLIENT_ID }}
 67 |         PIPELINE_CODE_PATH: "example_pipeline.py"
 68 |         PIPELINE_FUNCTION_NAME: "flipcoin_pipeline"
 69 |         PIPELINE_PARAMETERS_PATH: "parameters.yaml"
 70 |         EXPERIMENT_NAME: "Default"
 71 |         RUN_PIPELINE: True
 72 |         VERSION_GITHUB_SHA: False
 73 | 
 74 | ```
 75 | The repo also contains an example where the containers in the pipeline are versioned with the github hash in order to improve operations and tracking of errors. However this requires that the pipelines function to be wrapped in a function with one argument: 
 76 | 
 77 | ```python 
 78 | 
 79 |   def pipeline(github_sha :str):
 80 |       ... 
 81 |       
 82 | ```
 83 | 
 84 | the containers is versioned with the hash: 
 85 | 
 86 | 
 87 | ```python
 88 |   pre_image = f"gcr.io/{project}/pre_image:{github_sha}"
 89 |   train_forecast_image = f"gcr.io/{project}/train_forecast_image:{github_sha}"
 90 | 
 91 | ```
 92 |       
 93 | for example see [here](https://github.com/NikeNano/kubeflow-github-action/blob/master/forecast_peython_wiki/deployment/pipline.py)
 94 | 
 95 | ## Mandatory inputs
 96 | 
 97 | 1) KUBEFLOW_URL: The URL to your kubeflow deployment
 98 | 2) GKE_KEY: Service account with access to kubeflow and rights to deploy, see [here](http://amygdala.github.io/kubeflow/ml/2019/08/22/remote-deploy.html) for example, the credentials needs to be bas64 encode:
 99 | 
100 | ``` bash
101 | cat path-to-key.json | base64
102 | ```
103 | 3) GOOGLE_APPLICATION_CREDENTIALS: The path to where you like to store the secrets, which needs to be decoded from GKE_KEY
104 | 3) CLIENT_ID: The IAP client secret
105 | 4) PIPELINE_CODE_PATH: The full path to the python file containing the pipeline
106 | 5) PIPELINE_FUNCTION_NAME: The name of the pipeline function the PIPELINE_CODE_PATH file
107 | 6) PIPELINE_PARAMETERS_PATH: The pipeline parameters
108 | 7) EXPERIMENT_NAME: The name of the kubeflow experiment within which the pipeline should run
109 | 8) RUN_PIPELINE: If you like to also run the pipeline set "True"
110 | 9) VERSION_GITHUB_SHA: If the pipeline containers are versioned with the github hash
111 | 
112 | 
113 | # Future work
114 | 
115 | Add so that pipelines can be scheduled to run as well. Soooon done! 
116 | 


--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import yaml
  3 | import kfp
  4 | import kfp.compiler as compiler
  5 | import click
  6 | import importlib.util
  7 | import logging
  8 | import sys
  9 | from datetime import datetime
 10 | 
 11 | 
 12 | logging.basicConfig(stream=sys.stdout, level=logging.INFO)
 13 | 
 14 | 
 15 | def load_function(pipeline_function_name: str, full_path_to_pipeline: str) -> object:
 16 |     """Function to load python function from filepath and filename
 17 | 
 18 |     Arguments:
 19 |         pipeline_function_name {str} -- The name of the pipeline function
 20 |         full_path_to_pipeline {str} -- The full path name including the filename of the python file that 
 21 |                                         describes the pipeline you want to run on Kubeflow
 22 | 
 23 |     Returns:
 24 |         object -- [description]
 25 |     """
 26 |     logging.info(
 27 |         f"Loading the pipeline function from: {full_path_to_pipeline}")
 28 |     logging.info(
 29 |         f"The name of the pipeline function is: {pipeline_function_name}")
 30 |     spec = importlib.util.spec_from_file_location(
 31 |         pipeline_function_name, full_path_to_pipeline)
 32 |     foo = importlib.util.module_from_spec(spec)
 33 |     spec.loader.exec_module(foo)
 34 |     pipeline_func = getattr(foo, pipeline_function_name)
 35 |     logging.info("Succesfully loaded the pipeline function.")
 36 |     return pipeline_func
 37 | 
 38 | 
 39 | def pipeline_compile(pipeline_function: object) -> str:
 40 |     """Function to compile pipeline. The pipeline is compiled to a zip file. 
 41 | 
 42 |     Arguments:
 43 |         pipeline_func {object} -- The kubeflow pipeline function
 44 | 
 45 |     Returns:
 46 |         str -- The name of the compiled kubeflow pipeline
 47 |     """
 48 |     pipeline_name_zip = pipeline_function.__name__ + ".zip"
 49 |     compiler.Compiler().compile(pipeline_function, pipeline_name_zip)
 50 |     logging.info("The pipeline function is compiled.")
 51 |     return pipeline_name_zip
 52 | 
 53 | 
 54 | def upload_pipeline(pipeline_name_zip: str, pipeline_name: str, kubeflow_url: str, client_id: str):
 55 |     """Function to upload pipeline to kubeflow. 
 56 | 
 57 |     Arguments:
 58 |         pipeline_name_zip {str} -- The name of the compiled pipeline.ArithmeticError
 59 |         pipeline_name {str} -- The name of the pipeline function. This will be the name in the kubeflow UI. 
 60 |     """
 61 |     client = kfp.Client(
 62 |         host=kubeflow_url,
 63 |         client_id=client_id,
 64 |     )
 65 |     client.upload_pipeline(
 66 |         pipeline_package_path=pipeline_name_zip,
 67 |         pipeline_name=pipeline_name)
 68 |     return client
 69 | 
 70 | 
 71 | def find_pipeline_id(pipeline_name: str, client: kfp.Client, page_size: str = 100, page_token: str = "") -> str:
 72 |     """Function to find the pipeline id of a pipeline. 
 73 | 
 74 |     Arguments:
 75 |         pipeline_name {str} -- The name of the pipeline of interest
 76 |         client {kfp.Client} -- The kfp client
 77 |         page_size {str} -- The number of pipelines to collect a each API request
 78 | 
 79 |     Keyword Arguments:
 80 |         page_token {str} -- The page token to use for the API request (default: {" "})
 81 | 
 82 |     Returns:
 83 |         [type] -- The pipeline id. If None no match
 84 |     """
 85 |     while True:
 86 |         pipelines = client.list_pipelines(
 87 |             page_size=page_size, page_token=page_token)
 88 |         for pipeline in pipelines.pipelines:
 89 |             if pipeline.name == pipeline_name:
 90 |                 logging.info(f"The pipeline id is: {pipeline.id}")
 91 |                 return pipeline.id
 92 |         # Start need to know where to do next itteration from
 93 |         page_token = pipelines.next_page_token
 94 |         # If no next tooken break
 95 |         if not page_token:
 96 |             logging.info(
 97 |                 f"Could not find the pipeline, is the name: {pipeline_name} correct?")
 98 |             break
 99 | 
100 | 
101 | def find_experiment_id(experiment_name: str, client: kfp.Client, page_size: int = 100, page_token: str = "") -> str:
102 |     """Function to return the experiment id
103 | 
104 |     Arguments:
105 |         experiment_name {str} -- The experiment name
106 |         client {kfp.Client} -- The kfp client
107 | 
108 |     Returns:
109 |         str -- The experiment id
110 |     """
111 |     while True:
112 |         experiments = client.list_experiments(
113 |             page_size=page_size, page_token=page_token)
114 |         for experiments in experiments.experiments:
115 |             if experiments.name == experiment_name:
116 |                 logging.info("Succesfully collected the experiment id")
117 |                 return experiments.id
118 |         # Start need to know where to do next itteration from
119 |         page_token = experiments.next_page_token
120 |         # If no next tooken break
121 |         if not page_token:
122 |             logging.info(
123 |                 f"Could not find the pipeline id, is the experiment name: {experiments_name} correct? ")
124 |             break
125 | 
126 | 
127 | def read_pipeline_params(pipeline_paramters_path: str) -> dict:
128 |     # [TODO] add docstring here
129 |     pipeline_params = {}
130 |     with open(pipeline_paramters_path) as f:
131 |         try:
132 |             pipeline_params = yaml.safe_load(f)
133 |             logging.info(f"The pipeline paramters is: {pipeline_params}")
134 |         except yaml.YAMLError as exc:
135 |             logging.info("The yaml parameters could not be loaded correctly.")
136 |             raise ValueError(
137 |                 "The yaml parameters could not be loaded correctly.")
138 |         logging.info(f"The paramters are: {pipeline_params}")
139 |     return pipeline_params
140 | 
141 | 
142 | def run_pipeline(client: kfp.Client, pipeline_name: str, pipeline_id: str, pipeline_paramters_path: dict):
143 |     experiment_id = find_experiment_id(
144 |         experiment_name=os.environ["INPUT_EXPERIMENT_NAME"], client=client)
145 |     if not experiment_id:
146 |         raise ValueError("Failed to find experiment with the name: {}".format(
147 |             os.environ["INPUT_EXPERIMENT_NAME"]))
148 |     logging.info(f"The expriment id is: {experiment_id}")
149 |     namespace = None
150 |     if (os.getenv("INPUT_PIPELINE_NAMESPACE") != None) and (str.isspace(os.getenv("INPUT_PIPELINE_NAMESPACE")) == False) and os.getenv("INPUT_PIPELINE_NAMESPACE"):
151 |         namespace = os.environ["INPUT_PIPELINE_NAMESPACE"]
152 |         logging.info(f"The namespace that will be used is: {namespace}")
153 |     # [TODO] What would be a good way to name the jobs
154 |     job_name = pipeline_name + datetime.now().strftime("%Y_%m_%d_%H_%M_%S")
155 |     logging.info(f"The job name is: {job_name}")
156 | 
157 |     pipeline_params = read_pipeline_params(
158 |         pipeline_paramters_path=pipeline_paramters_path)
159 |     pipeline_params = pipeline_params if pipeline_params != None else {}
160 |     logging.info(
161 |         f"experiment_id: {experiment_id}, job_name:{job_name}, pipeline_params:{pipeline_params}, pipeline_id:{pipeline_id}, namespace:{namespace}")
162 |     client.run_pipeline(
163 |         experiment_id=experiment_id,
164 |         job_name=job_name,
165 |         # Read this as a yaml, people seam to prefer that to json.
166 |         params=pipeline_params,
167 |         pipeline_id=pipeline_id,
168 |         namespace=namespace)
169 |     logging.info(
170 |         "Successfully started the pipeline, head over to kubeflow to check it out")
171 | 
172 | 
173 | def main():
174 |     logging.info(
175 |         "Started the process to compile and upload the pipeline to kubeflow.")
176 |     os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = os.environ["INPUT_GOOGLE_APPLICATION_CREDENTIALS"]
177 |     pipeline_function = load_function(pipeline_function_name=os.environ['INPUT_PIPELINE_FUNCTION_NAME'],
178 |                                       full_path_to_pipeline=os.environ['INPUT_PIPELINE_CODE_PATH'])
179 |     logging.info("The value of the VERSION_GITHUB_SHA is: {}".format(
180 |         os.environ["INPUT_VERSION_GITHUB_SHA"]))
181 |     if os.environ["INPUT_VERSION_GITHUB_SHA"] == "true":
182 |         logging.info("Versioned pipeline components")
183 |         pipeline_function = pipeline_function(
184 |             github_sha=os.environ["GITHUB_SHA"])
185 |     pipeline_name_zip = pipeline_compile(pipeline_function=pipeline_function)
186 |     pipeline_name = os.environ['INPUT_PIPELINE_FUNCTION_NAME'] + \
187 |         "_" + os.environ["GITHUB_SHA"]
188 |     client = upload_pipeline(pipeline_name_zip=pipeline_name_zip,
189 |                              pipeline_name=pipeline_name,
190 |                              kubeflow_url=os.environ['INPUT_KUBEFLOW_URL'],
191 |                              client_id=os.environ["INPUT_CLIENT_ID"])
192 |     logging.info(os.getenv("INPUT_RUN_PIPELINE"))
193 |     logging.info(os.environ["INPUT_EXPERIMENT_NAME"])
194 |     if os.getenv("INPUT_RUN_PIPELINE") == "true" and os.environ["INPUT_EXPERIMENT_NAME"]:
195 |         logging.info("Started the process to run the pipeline on kubeflow.")
196 |         pipeline_id = find_pipeline_id(pipeline_name=pipeline_name,
197 |                                        client=client)
198 |         run_pipeline(pipeline_name=pipeline_name,
199 |                      pipeline_id=pipeline_id,
200 |                      client=client,
201 |                      pipeline_paramters_path=os.environ["INPUT_PIPELINE_PARAMETERS_PATH"])
202 | 
203 | 
204 | if __name__ == "__main__":
205 |     main()
206 | 


--------------------------------------------------------------------------------
/client.py:
--------------------------------------------------------------------------------
  1 | import time
  2 | import logging
  3 | import json
  4 | import os
  5 | import re
  6 | import tarfile
  7 | import tempfile
  8 | import warnings
  9 | import yaml
 10 | import zipfile
 11 | import string
 12 | import random
 13 | import kfp
 14 | import kfp_server_api
 15 | 
 16 | from datetime import datetime
 17 | from typing import Mapping, Callable
 18 | from kfp.compiler import compiler
 19 | from kfp.compiler._k8s_helper import sanitize_k8s_name
 20 | from kfp._auth import get_auth_token, get_gcp_access_token
 21 | 
 22 | 
 23 | def _add_generated_apis(target_struct, api_module, api_client):
 24 |     '''Initializes a hierarchical API object based on the generated API module.
 25 |     PipelineServiceApi.create_pipeline becomes target_struct.pipelines.create_pipeline
 26 |     '''
 27 |     Struct = type('Struct', (), {})
 28 | 
 29 |     def camel_case_to_snake_case(name):
 30 |         import re
 31 |         return re.sub('([a-z0-9])([A-Z])', r'\1_\2', name).lower()
 32 | 
 33 |     for api_name in dir(api_module):
 34 |         if not api_name.endswith('ServiceApi'):
 35 |             continue
 36 | 
 37 |         short_api_name = camel_case_to_snake_case(
 38 |             api_name[0:-len('ServiceApi')]) + 's'
 39 |         api_struct = Struct()
 40 |         setattr(target_struct, short_api_name, api_struct)
 41 |         service_api = getattr(api_module.api, api_name)
 42 |         initialized_service_api = service_api(api_client)
 43 |         for member_name in dir(initialized_service_api):
 44 |             if member_name.startswith('_') or member_name.endswith('_with_http_info'):
 45 |                 continue
 46 | 
 47 |             bound_member = getattr(initialized_service_api, member_name)
 48 |             setattr(api_struct, member_name, bound_member)
 49 |     models_struct = Struct()
 50 |     for member_name in dir(api_module.models):
 51 |         if not member_name[0].islower():
 52 |             setattr(models_struct, member_name, getattr(
 53 |                 api_module.models, member_name))
 54 |     target_struct.api_models = models_struct
 55 | 
 56 | 
 57 | KF_PIPELINES_ENDPOINT_ENV = 'KF_PIPELINES_ENDPOINT'
 58 | KF_PIPELINES_UI_ENDPOINT_ENV = 'KF_PIPELINES_UI_ENDPOINT'
 59 | KF_PIPELINES_DEFAULT_EXPERIMENT_NAME = 'KF_PIPELINES_DEFAULT_EXPERIMENT_NAME'
 60 | KF_PIPELINES_OVERRIDE_EXPERIMENT_NAME = 'KF_PIPELINES_OVERRIDE_EXPERIMENT_NAME'
 61 | 
 62 | 
 63 | class Client(object):
 64 |     """ API Client for KubeFlow Pipeline.
 65 |     """
 66 | 
 67 |     # in-cluster DNS name of the pipeline service
 68 |     IN_CLUSTER_DNS_NAME = 'ml-pipeline.{}.svc.cluster.local:8888'
 69 |     KUBE_PROXY_PATH = 'api/v1/namespaces/{}/services/ml-pipeline:http/proxy/'
 70 | 
 71 |     # TODO: Wrap the configurations for different authentication methods.
 72 |     def __init__(self, host=None, client_id=None, namespace='kubeflow', other_client_id=None, other_client_secret=None):
 73 |         """Create a new instance of kfp client.
 74 |         Args:
 75 |           host: the host name to use to talk to Kubeflow Pipelines. If not set, the in-cluster
 76 |               service DNS name will be used, which only works if the current environment is a pod
 77 |               in the same cluster (such as a Jupyter instance spawned by Kubeflow's
 78 |               JupyterHub). If you have a different connection to cluster, such as a kubectl
 79 |               proxy connection, then set it to something like "127.0.0.1:8080/pipeline.
 80 |               If you connect to an IAP enabled cluster, set it to
 81 |               https://<your-deployment>.endpoints.<your-project>.cloud.goog/pipeline".
 82 |           client_id: The client ID used by Identity-Aware Proxy.
 83 |           namespace: the namespace where the kubeflow pipeline system is run.
 84 |           other_client_id: The client ID used to obtain the auth codes and refresh tokens.
 85 |             Reference: https://cloud.google.com/iap/docs/authentication-howto#authenticating_from_a_desktop_app.
 86 |           other_client_secret: The client secret used to obtain the auth codes and refresh tokens.
 87 |         """
 88 |         host = host or os.environ.get(KF_PIPELINES_ENDPOINT_ENV)
 89 |         self._uihost = os.environ.get(KF_PIPELINES_UI_ENDPOINT_ENV, host)
 90 |         config = self._load_config(
 91 |             host, client_id, namespace, other_client_id, other_client_secret)
 92 |         api_client = kfp_server_api.api_client.ApiClient(config)
 93 |         _add_generated_apis(self, kfp_server_api, api_client)
 94 | 
 95 |         self._run_api = kfp_server_api.api.run_service_api.RunServiceApi(
 96 |             api_client)
 97 |         self._job_api = kfp_server_api.api.job_service_api.JobServiceApi(
 98 |             api_client)
 99 |         self._experiment_api = kfp_server_api.api.experiment_service_api.ExperimentServiceApi(
100 |             api_client)
101 |         self._pipelines_api = kfp_server_api.api.pipeline_service_api.PipelineServiceApi(
102 |             api_client)
103 |         self._upload_api = kfp_server_api.api.PipelineUploadServiceApi(
104 |             api_client)
105 | 
106 |     def _load_config(self, host, client_id, namespace, other_client_id, other_client_secret):
107 |         config = kfp_server_api.configuration.Configuration()
108 |         if host:
109 |             config.host = host
110 | 
111 |         token = None
112 | 
113 |         # Obtain the tokens if it is inverse proxy or IAP.
114 |         if self._is_inverse_proxy_host(host):
115 |             token = get_gcp_access_token()
116 |         if self._is_iap_host(host, client_id):
117 |             token = get_auth_token(
118 |                 client_id, other_client_id, other_client_secret)
119 | 
120 |         if token:
121 |             config.api_key['authorization'] = token
122 |             config.api_key_prefix['authorization'] = 'Bearer'
123 |             return config
124 | 
125 |         if host:
126 |             # if host is explicitly set with auth token, it's probably a port forward address.
127 |             return config
128 | 
129 |         import kubernetes as k8s
130 |         in_cluster = True
131 |         try:
132 |             k8s.config.load_incluster_config()
133 |         except:
134 |             in_cluster = False
135 |             pass
136 | 
137 |         if in_cluster:
138 |             config.host = Client.IN_CLUSTER_DNS_NAME.format(namespace)
139 |             return config
140 | 
141 |         try:
142 |             k8s.config.load_kube_config(client_configuration=config)
143 |         except:
144 |             print('Failed to load kube config.')
145 |             return config
146 | 
147 |         if config.host:
148 |             config.host = config.host + '/' + \
149 |                 Client.KUBE_PROXY_PATH.format(namespace)
150 |         return config
151 | 
152 |     def _is_iap_host(self, host, client_id):
153 |         if host and client_id:
154 |             if re.match(r'\S+.endpoints.\S+.cloud.goog/{0,1}$', host):
155 |                 warnings.warn(
156 |                     'Suffix /pipeline is not ignorable for IAP host.')
157 |             return re.match(r'\S+.endpoints.\S+.cloud.goog/pipeline', host)
158 |         return False
159 | 
160 |     def _is_inverse_proxy_host(self, host):
161 |         if host:
162 |             return re.match(r'\S+.googleusercontent.com/{0,1}$', host)
163 |         return False
164 | 
165 |     def _is_ipython(self):
166 |         """Returns whether we are running in notebook."""
167 |         try:
168 |             import IPython
169 |             ipy = IPython.get_ipython()
170 |             if ipy is None:
171 |                 return False
172 |         except ImportError:
173 |             return False
174 | 
175 |         return True
176 | 
177 |     def _get_url_prefix(self):
178 |         if self._uihost:
179 |             # User's own connection.
180 |             if self._uihost.startswith('http://') or self._uihost.startswith('https://'):
181 |                 return self._uihost
182 |             else:
183 |                 return 'http://' + self._uihost
184 | 
185 |         # In-cluster pod. We could use relative URL.
186 |         return '/pipeline'
187 | 
188 |     def create_experiment(self, name, description=None):
189 |         """Create a new experiment.
190 |         Args:
191 |           name: the name of the experiment.
192 |           description: description of the experiment
193 |         Returns:
194 |           An Experiment object. Most important field is id.
195 |         """
196 | 
197 |         experiment = None
198 |         try:
199 |             experiment = self.get_experiment(experiment_name=name)
200 |         except:
201 |             # Ignore error if the experiment does not exist.
202 |             pass
203 | 
204 |         if not experiment:
205 |             logging.info('Creating experiment {}.'.format(name))
206 |             experiment = kfp_server_api.models.ApiExperiment(
207 |                 name=name, description=description)
208 |             experiment = self._experiment_api.create_experiment(
209 |                 body=experiment)
210 | 
211 |         if self._is_ipython():
212 |             import IPython
213 |             html = \
214 |                 ('Experiment link <a href="%s/#/experiments/details/%s" target="_blank" >here</a>'
215 |                  % (self._get_url_prefix(), experiment.id))
216 |             IPython.display.display(IPython.display.HTML(html))
217 |         return experiment
218 | 
219 |     def list_experiments(self, page_token='', page_size=10, sort_by=''):
220 |         """List experiments.
221 |         Args:
222 |           page_token: token for starting of the page.
223 |           page_size: size of the page.
224 |           sort_by: can be '[field_name]', '[field_name] des'. For example, 'name des'.
225 |         Returns:
226 |           A response object including a list of experiments and next page token.
227 |         """
228 |         response = self._experiment_api.list_experiment(
229 |             page_token=page_token, page_size=page_size, sort_by=sort_by)
230 |         return response
231 | 
232 |     def get_experiment(self, experiment_id=None, experiment_name=None):
233 |         """Get details of an experiment
234 |         Either experiment_id or experiment_name is required
235 |         Args:
236 |           experiment_id: id of the experiment. (Optional)
237 |           experiment_name: name of the experiment. (Optional)
238 |         Returns:
239 |           A response object including details of a experiment.
240 |         Throws:
241 |           Exception if experiment is not found or None of the arguments is provided
242 |         """
243 |         if experiment_id is None and experiment_name is None:
244 |             raise ValueError(
245 |                 'Either experiment_id or experiment_name is required')
246 |         if experiment_id is not None:
247 |             return self._experiment_api.get_experiment(id=experiment_id)
248 |         next_page_token = ''
249 |         while next_page_token is not None:
250 |             list_experiments_response = self.list_experiments(
251 |                 page_size=100, page_token=next_page_token)
252 |             next_page_token = list_experiments_response.next_page_token
253 |             for experiment in list_experiments_response.experiments:
254 |                 if experiment.name == experiment_name:
255 |                     return self._experiment_api.get_experiment(id=experiment.id)
256 |         raise ValueError(
257 |             'No experiment is found with name {}.'.format(experiment_name))
258 | 
259 |     def _extract_pipeline_yaml(self, package_file):
260 |         def _choose_pipeline_yaml_file(file_list) -> str:
261 |             yaml_files = [file for file in file_list if file.endswith('.yaml')]
262 |             if len(yaml_files) == 0:
263 |                 raise ValueError(
264 |                     'Invalid package. Missing pipeline yaml file in the package.')
265 | 
266 |             if 'pipeline.yaml' in yaml_files:
267 |                 return 'pipeline.yaml'
268 |             else:
269 |                 if len(yaml_files) == 1:
270 |                     return yaml_files[0]
271 |                 raise ValueError(
272 |                     'Invalid package. There is no pipeline.yaml file and there are multiple yaml files.')
273 | 
274 |         if package_file.endswith('.tar.gz') or package_file.endswith('.tgz'):
275 |             with tarfile.open(package_file, "r:gz") as tar:
276 |                 file_names = [member.name for member in tar if member.isfile()]
277 |                 pipeline_yaml_file = _choose_pipeline_yaml_file(file_names)
278 |                 with tar.extractfile(tar.getmember(pipeline_yaml_file)) as f:
279 |                     return yaml.safe_load(f)
280 |         elif package_file.endswith('.zip'):
281 |             with zipfile.ZipFile(package_file, 'r') as zip:
282 |                 pipeline_yaml_file = _choose_pipeline_yaml_file(zip.namelist())
283 |                 with zip.open(pipeline_yaml_file) as f:
284 |                     return yaml.safe_load(f)
285 |         elif package_file.endswith('.yaml') or package_file.endswith('.yml'):
286 |             with open(package_file, 'r') as f:
287 |                 return yaml.safe_load(f)
288 |         else:
289 |             raise ValueError('The package_file ' + package_file +
290 |                              ' should ends with one of the following formats: [.tar.gz, .tgz, .zip, .yaml, .yml]')
291 | 
292 |     def list_pipelines(self, page_token='', page_size=10, sort_by=''):
293 |         """List pipelines.
294 |         Args:
295 |           page_token: token for starting of the page.
296 |           page_size: size of the page.
297 |           sort_by: one of 'field_name', 'field_name des'. For example, 'name des'.
298 |         Returns:
299 |           A response object including a list of pipelines and next page token.
300 |         """
301 |         return self._pipelines_api.list_pipelines(page_token=page_token, page_size=page_size, sort_by=sort_by)
302 | 
303 |     # TODO: provide default namespace, similar to kubectl default namespaces.
304 |     def run_pipeline(self, experiment_id, job_name, pipeline_package_path=None, params={}, pipeline_id=None, namespace=None):
305 |         """Run a specified pipeline.
306 |         Args:
307 |           experiment_id: The string id of an experiment.
308 |           job_name: name of the job.
309 |           pipeline_package_path: local path of the pipeline package(the filename should end with one of the following .tar.gz, .tgz, .zip, .yaml, .yml).
310 |           params: a dictionary with key (string) as param name and value (string) as as param value.
311 |           pipeline_id: the string ID of a pipeline.
312 |           namespace: kubernetes namespace where the pipeline runs are created.
313 |             For single user deployment, leave it as None;
314 |             For multi user, input a namespace where the user is authorized
315 |         Returns:
316 |           A run object. Most important field is id.
317 |         """
318 | 
319 |         pipeline_json_string = None
320 |         if pipeline_package_path:
321 |             pipeline_obj = self._extract_pipeline_yaml(pipeline_package_path)
322 |             pipeline_json_string = json.dumps(pipeline_obj)
323 |         api_params = [kfp_server_api.ApiParameter(
324 |             name=sanitize_k8s_name(name=k, allow_capital_underscore=True),
325 |             value=str(v)) for k, v in params.items()]
326 |         resource_references = []
327 | 
328 |         key = kfp_server_api.models.ApiResourceKey(id=experiment_id,
329 |                                                    type=kfp_server_api.models.ApiResourceType.EXPERIMENT)
330 |         reference = kfp_server_api.models.ApiResourceReference(key=key,
331 |                                                                relationship=kfp_server_api.models.ApiRelationship.OWNER)
332 |         resource_references.append(reference)
333 |         if namespace is not None:
334 |             key = kfp_server_api.models.ApiResourceKey(id=namespace,
335 |                                                        type=kfp_server_api.models.ApiResourceType.NAMESPACE)
336 |             reference = kfp_server_api.models.ApiResourceReference(key=key,
337 |                                                                    name=namespace,
338 |                                                                    relationship=kfp_server_api.models.ApiRelationship.OWNER)
339 |             resource_references.append(reference)
340 |         spec = kfp_server_api.models.ApiPipelineSpec(
341 |             pipeline_id=pipeline_id,
342 |             workflow_manifest=pipeline_json_string,
343 |             parameters=api_params)
344 |         run_body = kfp_server_api.models.ApiRun(
345 |             pipeline_spec=spec, resource_references=resource_references, name=job_name)
346 | 
347 |         response = self._run_api.create_run(body=run_body)
348 | 
349 |         if self._is_ipython():
350 |             import IPython
351 |             html = ('Run link <a href="%s/#/runs/details/%s" target="_blank" >here</a>'
352 |                     % (self._get_url_prefix(), response.run.id))
353 |             IPython.display.display(IPython.display.HTML(html))
354 |         return response.run
355 | 
356 |     def schedule_pipeline(self, experiment_id, job_name, pipeline_package_path=None, params={}, pipeline_id=None, namespace=None):
357 |         """Schedule pipeline on kubeflow to run based upon a cron job
358 | 
359 |         Arguments:
360 |             experiment_id {[type]} -- The expriment within which we would like kubeflow 
361 |             job_name {[type]} -- The name of the scheduled job
362 | 
363 |         Keyword Arguments:
364 |             pipeline_package_path {[type]} -- The path to the pipeline package (default: {None})
365 |             params {dict} -- The pipeline parameters (default: {{}})
366 |             pipeline_id {[type]} -- The id of the pipeline which should run on schedule (default: {None})
367 |             namespace {[type]} -- The name space with which the pipeline should run (default: {None})
368 |         """
369 | 
370 |         pipeline_json_string = None
371 |         if pipeline_package_path:
372 |             pipeline_obj = self._extract_pipeline_yaml(pipeline_package_path)
373 |             pipeline_json_string = json.dumps(pipeline_obj)
374 |         api_params = [kfp_server_api.ApiParameter(
375 |             name=sanitize_k8s_name(name=k, allow_capital_underscore=True),
376 |             value=str(v)) for k, v in params.items()]
377 |         resource_references = []
378 | 
379 |         key = kfp_server_api.models.ApiResourceKey(id=experiment_id,
380 |                                                    type=kfp_server_api.models.ApiResourceType.EXPERIMENT)
381 |         reference = kfp_server_api.models.ApiResourceReference(key=key,
382 |                                                                relationship=kfp_server_api.models.ApiRelationship.OWNER)
383 |         resource_references.append(reference)
384 |         if namespace is not None:
385 |             key = kfp_server_api.models.ApiResourceKey(id=namespace,
386 |                                                        type=kfp_server_api.models.ApiResourceType.NAMESPACE)
387 |             reference = kfp_server_api.models.ApiResourceReference(key=key,
388 |                                                                    name=namespace,
389 |                                                                    relationship=kfp_server_api.models.ApiRelationship.OWNER)
390 |             resource_references.append(reference)
391 |         spec = kfp_server_api.models.ApiPipelineSpec(
392 |             pipeline_id=pipeline_id,
393 |             workflow_manifest=pipeline_json_string,
394 |             parameters=api_params)
395 | 
396 |         trigger = kfp_server_api.models.api_cron_schedule.ApiCronSchedule(
397 |             cron="0 0 9 ? * 2-6")
398 |         job_id = ''.join(random.choices(
399 |             string.ascii_uppercase + string.digits, k=10))
400 |         schedule_body = kfp_server_api.models.ApiJob(
401 |             id=job_id,
402 |             name="TestScheduling",
403 |             description="Schedule the pipeline using the API",
404 |             pipeline_spec=spec,
405 |             resource_references=resource_references,
406 |             max_concurrency=10,
407 |             trigger=trigger,
408 |             enabled=True,
409 |         )
410 | 
411 |     def create_run_from_pipeline_func(self, pipeline_func: Callable, arguments: Mapping[str, str], run_name=None, experiment_name=None, pipeline_conf: kfp.dsl.PipelineConf = None, namespace=None):
412 |         '''Runs pipeline on KFP-enabled Kubernetes cluster.
413 |         This command compiles the pipeline function, creates or gets an experiment and submits the pipeline for execution.
414 |         Args:
415 |           pipeline_func: A function that describes a pipeline by calling components and composing them into execution graph.
416 |           arguments: Arguments to the pipeline function provided as a dict.
417 |           run_name: Optional. Name of the run to be shown in the UI.
418 |           experiment_name: Optional. Name of the experiment to add the run to.
419 |           namespace: kubernetes namespace where the pipeline runs are created.
420 |             For single user deployment, leave it as None;
421 |             For multi user, input a namespace where the user is authorized
422 |         '''
423 |         # TODO: Check arguments against the pipeline function
424 |         pipeline_name = pipeline_func.__name__
425 |         run_name = run_name or pipeline_name + ' ' + \
426 |             datetime.now().strftime('%Y-%m-%d %H-%M-%S')
427 |         try:
428 |             (_, pipeline_package_path) = tempfile.mkstemp(suffix='.zip')
429 |             compiler.Compiler().compile(pipeline_func, pipeline_package_path,
430 |                                         pipeline_conf=pipeline_conf)
431 |             return self.create_run_from_pipeline_package(pipeline_package_path, arguments, run_name, experiment_name, namespace)
432 |         finally:
433 |             os.remove(pipeline_package_path)
434 | 
435 |     def create_run_from_pipeline_package(self, pipeline_file: str, arguments: Mapping[str, str], run_name=None, experiment_name=None, namespace=None):
436 |         '''Runs pipeline on KFP-enabled Kubernetes cluster.
437 |         This command compiles the pipeline function, creates or gets an experiment and submits the pipeline for execution.
438 |         Args:
439 |           pipeline_file: A compiled pipeline package file.
440 |           arguments: Arguments to the pipeline function provided as a dict.
441 |           run_name: Optional. Name of the run to be shown in the UI.
442 |           experiment_name: Optional. Name of the experiment to add the run to.
443 |           namespace: kubernetes namespace where the pipeline runs are created.
444 |             For single user deployment, leave it as None;
445 |             For multi user, input a namespace where the user is authorized
446 |         '''
447 | 
448 |         class RunPipelineResult:
449 |             def __init__(self, client, run_info):
450 |                 self._client = client
451 |                 self.run_info = run_info
452 |                 self.run_id = run_info.id
453 | 
454 |             def wait_for_run_completion(self, timeout=None):
455 |                 timeout = timeout or datetime.datetime.max - datetime.datetime.min
456 |                 return self._client.wait_for_run_completion(self.run_id, timeout)
457 | 
458 |             def __repr__(self):
459 |                 return 'RunPipelineResult(run_id={})'.format(self.run_id)
460 | 
461 |         # TODO: Check arguments against the pipeline function
462 |         pipeline_name = os.path.basename(pipeline_file)
463 |         experiment_name = experiment_name or os.environ.get(
464 |             KF_PIPELINES_DEFAULT_EXPERIMENT_NAME, None)
465 |         overridden_experiment_name = os.environ.get(
466 |             KF_PIPELINES_OVERRIDE_EXPERIMENT_NAME, experiment_name)
467 |         if overridden_experiment_name != experiment_name:
468 |             import warnings
469 |             warnings.warn('Changing experiment name from "{}" to "{}".'.format(
470 |                 experiment_name, overridden_experiment_name))
471 |         experiment_name = overridden_experiment_name or 'Default'
472 |         run_name = run_name or pipeline_name + ' ' + \
473 |             datetime.now().strftime('%Y-%m-%d %H-%M-%S')
474 |         experiment = self.create_experiment(name=experiment_name)
475 |         run_info = self.run_pipeline(
476 |             experiment.id, run_name, pipeline_file, arguments, namespace=namespace)
477 |         return RunPipelineResult(self, run_info)
478 | 
479 |     def list_runs(self, page_token='', page_size=10, sort_by='', experiment_id=None):
480 |         """List runs.
481 |         Args:
482 |           page_token: token for starting of the page.
483 |           page_size: size of the page.
484 |           sort_by: one of 'field_name', 'field_name des'. For example, 'name des'.
485 |           experiment_id: experiment id to filter upon
486 |         Returns:
487 |           A response object including a list of experiments and next page token.
488 |         """
489 |         if experiment_id is not None:
490 |             response = self._run_api.list_runs(page_token=page_token, page_size=page_size, sort_by=sort_by,
491 |                                                resource_reference_key_type=kfp_server_api.models.api_resource_type.ApiResourceType.EXPERIMENT, resource_reference_key_id=experiment_id)
492 |         else:
493 |             response = self._run_api.list_runs(
494 |                 page_token=page_token, page_size=page_size, sort_by=sort_by)
495 |         return response
496 | 
497 |     def get_run(self, run_id):
498 |         """Get run details.
499 |         Args:
500 |           id of the run.
501 |         Returns:
502 |           A response object including details of a run.
503 |         Throws:
504 |           Exception if run is not found.
505 |         """
506 |         return self._run_api.get_run(run_id=run_id)
507 | 
508 |     def wait_for_run_completion(self, run_id, timeout):
509 |         """Wait for a run to complete.
510 |         Args:
511 |           run_id: run id, returned from run_pipeline.
512 |           timeout: timeout in seconds.
513 |         Returns:
514 |           A run detail object: Most important fields are run and pipeline_runtime
515 |         """
516 |         status = 'Running:'
517 |         start_time = datetime.now()
518 |         while status is None or status.lower() not in ['succeeded', 'failed', 'skipped', 'error']:
519 |             get_run_response = self._run_api.get_run(run_id=run_id)
520 |             status = get_run_response.run.status
521 |             elapsed_time = (datetime.now() - start_time).seconds
522 |             logging.info('Waiting for the job to complete...')
523 |             if elapsed_time > timeout:
524 |                 raise TimeoutError('Run timeout')
525 |             time.sleep(5)
526 |         return get_run_response
527 | 
528 |     def _get_workflow_json(self, run_id):
529 |         """Get the workflow json.
530 |         Args:
531 |           run_id: run id, returned from run_pipeline.
532 |         Returns:
533 |           workflow: json workflow
534 |         """
535 |         get_run_response = self._run_api.get_run(run_id=run_id)
536 |         workflow = get_run_response.pipeline_runtime.workflow_manifest
537 |         workflow_json = json.loads(workflow)
538 |         return workflow_json
539 | 
540 |     def upload_pipeline(self, pipeline_package_path, pipeline_name=None):
541 |         """Uploads the pipeline to the Kubeflow Pipelines cluster.
542 |         Args:
543 |           pipeline_package_path: Local path to the pipeline package.
544 |           pipeline_name: Optional. Name of the pipeline to be shown in the UI.
545 |         Returns:
546 |           Server response object containing pipleine id and other information.
547 |         """
548 | 
549 |         response = self._upload_api.upload_pipeline(
550 |             pipeline_package_path, name=pipeline_name)
551 |         if self._is_ipython():
552 |             import IPython
553 |             html = 'Pipeline link <a href=%s/#/pipelines/details/%s>here</a>' % (
554 |                 self._get_url_prefix(), response.id)
555 |             IPython.display.display(IPython.display.HTML(html))
556 |         return response
557 | 
558 |     def get_pipeline(self, pipeline_id):
559 |         """Get pipeline details.
560 |         Args:
561 |           id of the pipeline.
562 |         Returns:
563 |           A response object including details of a pipeline.
564 |         Throws:
565 |           Exception if pipeline is not found.
566 |         """
567 |         return self._pipelines_api.get_pipeline(id=pipeline_id)
568 | 


--------------------------------------------------------------------------------