├── .dockerignore ├── parameters.yaml ├── requirements.txt ├── forecast_peython_wiki ├── preprocess │ ├── Dockerfile │ ├── requirements.txt │ └── main.py ├── train_forecast │ ├── Dockerfile │ ├── requirements.txt │ └── main.py ├── parameters.yaml └── deployment │ └── pipline.py ├── entrypoint.sh ├── Dockerfile ├── .github └── workflows │ ├── test-action-compile-deploy-run.yaml │ └── versioning_pipeline_action.yaml ├── action.yml ├── .gitignore ├── example_pipeline.py ├── README.md ├── main.py └── client.py /.dockerignore: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /parameters.yaml: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | kfp==0.2 -------------------------------------------------------------------------------- /forecast_peython_wiki/preprocess/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.7-stretch 2 | 3 | COPY . . 4 | 5 | RUN pip install -r requirements.txt 6 | -------------------------------------------------------------------------------- /forecast_peython_wiki/train_forecast/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.7-stretch 2 | 3 | COPY . . 4 | 5 | RUN pip install -r requirements.txt -------------------------------------------------------------------------------- /forecast_peython_wiki/preprocess/requirements.txt: -------------------------------------------------------------------------------- 1 | click==7.0 2 | wget==3.2 3 | google-cloud-storage==1.25 4 | google==2.0.* 5 | pandas==1.0.* -------------------------------------------------------------------------------- /entrypoint.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | echo "${INPUT_ENCODED_GOOGLE_APPLICATION_CREDENTIALS}" | base64 -d > ${INPUT_GOOGLE_APPLICATION_CREDENTIALS} 4 | python /main.py -------------------------------------------------------------------------------- /forecast_peython_wiki/train_forecast/requirements.txt: -------------------------------------------------------------------------------- 1 | click==7.0 2 | wget==3.2 3 | google-cloud-storage==1.25 4 | google==2.0.* 5 | pandas==1.0.* 6 | fbprophet==0.5 7 | holidays==0.9.12 -------------------------------------------------------------------------------- /forecast_peython_wiki/parameters.yaml: -------------------------------------------------------------------------------- 1 | gcp_bucket: 2 | github_action 3 | project: 4 | kubeflow-github-267119 5 | train_data: 6 | train_data.csv 7 | forecast_data: 8 | forecat_data.csv -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.7-stretch 2 | 3 | LABEL "com.github.actions.name"="Submit Kubeflow Pipeline From GitHub" 4 | LABEL "com.github.actions.icon"="upload-cloud" 5 | LABEL "com.github.actions.color"="purple" 6 | 7 | COPY . . 8 | 9 | RUN chmod +x /entrypoint.sh 10 | 11 | RUN pip install -r requirements.txt 12 | 13 | ENTRYPOINT ["/entrypoint.sh"] 14 | -------------------------------------------------------------------------------- /.github/workflows/test-action-compile-deploy-run.yaml: -------------------------------------------------------------------------------- 1 | name: Compile, Deploy and Run on Kubeflow 2 | on: [push] 3 | 4 | # Set environmental variables 5 | 6 | jobs: 7 | build: 8 | runs-on: ubuntu-latest 9 | steps: 10 | - name: checkout files in repo 11 | uses: actions/checkout@master 12 | 13 | 14 | - name: Submit Kubeflow pipeline 15 | id: kubeflow 16 | uses: NikeNano/kubeflow-github-action@master 17 | with: 18 | KUBEFLOW_URL: ${{ secrets.KUBEFLOW_URL }} 19 | ENCODED_GOOGLE_APPLICATION_CREDENTIALS: ${{ secrets.GKE_KEY }} 20 | GOOGLE_APPLICATION_CREDENTIALS: /tmp/gcloud-sa.json 21 | CLIENT_ID: ${{ secrets.CLIENT_ID }} 22 | PIPELINE_CODE_PATH: "example_pipeline.py" 23 | PIPELINE_FUNCTION_NAME: "flipcoin_pipeline" 24 | PIPELINE_PARAMETERS_PATH: "parameters.yaml" 25 | EXPERIMENT_NAME: "Default" 26 | RUN_PIPELINE: True 27 | VERSION_GITHUB_SHA: False 28 | -------------------------------------------------------------------------------- /forecast_peython_wiki/preprocess/main.py: -------------------------------------------------------------------------------- 1 | import click 2 | import wget 3 | import logging 4 | 5 | from google.cloud import storage 6 | 7 | 8 | def upload_blob(bucket_name: str, source_file_name: str, destination_blob_name: str): 9 | """Function to upload to gcp bucket 10 | 11 | Arguments: 12 | bucket_name {str} -- The name of the bucket. 13 | source_file_name {str} -- The name of the source file that should be uploaded. 14 | destination_blob_name {str} -- The name of the file in the bucket. 15 | """ 16 | storage_client = storage.Client() 17 | bucket = storage_client.bucket(bucket_name) 18 | blob = bucket.blob(destination_blob_name) 19 | blob.upload_from_filename(source_file_name) 20 | logging.info( 21 | "File {} uploaded to {}.".format( 22 | source_file_name, destination_blob_name 23 | ) 24 | ) 25 | 26 | 27 | @click.command() 28 | @click.option("--url", default="https://raw.githubusercontent.com/facebook/prophet/master/examples/example_wp_log_peyton_manning.csv", 29 | help="the file of interest", required=False) 30 | @click.option("--bucket", required=True, help="The name of the gcp bucket") 31 | @click.option("--destination_blob_name", default="raw_data.csv", help="The raw data filename", required=True) 32 | def main(url: str, bucket: str, destination_blob_name: str): 33 | filename = wget.download(url) 34 | upload_blob(bucket_name=bucket, source_file_name=filename, destination_blob_name=destination_blob_name) 35 | logging.info("File extracted and uploaded to bucket") 36 | 37 | 38 | if __name__ == "__main__": 39 | main() 40 | -------------------------------------------------------------------------------- /action.yml: -------------------------------------------------------------------------------- 1 | name: Manage Kubeflow Pipelines on GCP. 2 | description: Build, deploy and run a Kubeflow Pipeline on Google Cloud Platform. 3 | author: Niklas Hansson 4 | inputs: 5 | KUBEFLOW_URL: 6 | description: The endpoint where your Kubeflow UI is running. 7 | required: true 8 | CLIENT_ID: 9 | description: The IAP client id, which was specified when the kubeflow deployment where setup using IAP. 10 | require: true 11 | PIPELINE_CODE_PATH: 12 | description: The full path name including the filename of the python file that describes the pipeline you want to run on Kubeflow. This should be relative to the root of the GitHub repository where the Action is triggered. 13 | require: true 14 | PIPELINE_FUNCTION_NAME: 15 | description: The name of the pipeline, this name will be the name of the pipeline in the Kubeflow UI. 16 | require: true 17 | ENCODED_GOOGLE_APPLICATION_CREDENTIALS: 18 | description: The base64 encoded google credentials 19 | required: true 20 | GOOGLE_APPLICATION_CREDENTIALS: 21 | description: The path to the decoded google credentials 22 | required: true 23 | EXPERIMENT_NAME: 24 | description: The name of the experiment name within which the kubeflow experiment should run 25 | required: false 26 | PIPELINE_NAMESPACE: 27 | description: The namespace in which the pipeline should run 28 | required: false 29 | RUN_PIPELINE: 30 | description: Should github action also trigger the pipeline 31 | required: false 32 | VERSION_GITHUB_SHA: 33 | description: Should github action also trigger the pipeline 34 | required: false 35 | outputs: 36 | WORKFLOW_URL: 37 | description: URL that is a link to pipeline in Kubeflow 38 | branding: 39 | color: 'purple' 40 | icon: 'upload-cloud' 41 | runs: 42 | using: 'docker' 43 | image: 'Dockerfile' 44 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ -------------------------------------------------------------------------------- /.github/workflows/versioning_pipeline_action.yaml: -------------------------------------------------------------------------------- 1 | name: Compile, Deploy and Run versioned pipeline on Kubeflow 2 | on: [push] 3 | 4 | # Set environmental variables 5 | env: 6 | GKE_PROJECT: ${{ secrets.GKE_PROJECT }} 7 | 8 | jobs: 9 | build: 10 | runs-on: ubuntu-latest 11 | steps: 12 | - name: checkout files in repo 13 | uses: actions/checkout@master 14 | 15 | - uses: GoogleCloudPlatform/github-actions/setup-gcloud@master 16 | with: 17 | version: '270.0.0' 18 | service_account_email: ${{ secrets.GKE_EMAIL }} 19 | service_account_key: ${{ secrets.GKE_KEY }} 20 | 21 | - run: | 22 | gcloud auth configure-docker 23 | 24 | - name: Build preprocessing image 25 | env: 26 | IMAGE_NAME: pre_image 27 | run: | 28 | docker build -t gcr.io/$GKE_PROJECT/$IMAGE_NAME:$GITHUB_SHA \ 29 | --build-arg GITHUB_SHA="$GITHUB_SHA" \ 30 | --build-arg GITHUB_REF="$GITHUB_REF" forecast_peython_wiki/preprocess/. 31 | 32 | - name: Publish preprocessing image 33 | env: 34 | IMAGE_NAME: pre_image 35 | run: | 36 | echo gcr.io/$GKE_PROJECT/$IMAGE_NAME:$GITHUB_SHA 37 | docker push gcr.io/$GKE_PROJECT/$IMAGE_NAME:$GITHUB_SHA 38 | 39 | 40 | - name: Build train forecast image 41 | env: 42 | IMAGE_NAME: train_forecast_image 43 | run: | 44 | docker build -t gcr.io/$GKE_PROJECT/$IMAGE_NAME:$GITHUB_SHA \ 45 | --build-arg GITHUB_SHA="$GITHUB_SHA" \ 46 | --build-arg GITHUB_REF="$GITHUB_REF" forecast_peython_wiki/train_forecast/. 47 | 48 | 49 | - name: Publish train forecast image 50 | env: 51 | IMAGE_NAME: train_forecast_image 52 | run: | 53 | echo gcr.io/$GKE_PROJECT/$IMAGE_NAME:$GITHUB_SHA 54 | docker push gcr.io/$GKE_PROJECT/$IMAGE_NAME:$GITHUB_SHA 55 | 56 | 57 | - name: Submit Kubeflow pipeline 58 | id: kubeflow 59 | uses: NikeNano/kubeflow-github-action@master 60 | with: 61 | KUBEFLOW_URL: ${{ secrets.KUBEFLOW_URL }} 62 | ENCODED_GOOGLE_APPLICATION_CREDENTIALS: ${{ secrets.GKE_KEY }} 63 | GOOGLE_APPLICATION_CREDENTIALS: /tmp/gcloud-sa.json 64 | CLIENT_ID: ${{ secrets.CLIENT_ID }} 65 | PIPELINE_CODE_PATH: "forecast_peython_wiki/deployment/pipline.py" 66 | PIPELINE_FUNCTION_NAME: "pipeline" 67 | PIPELINE_PARAMETERS_PATH: "forecast_peython_wiki/parameters.yaml" 68 | EXPERIMENT_NAME: "Default" 69 | RUN_PIPELINE: True 70 | VERSION_GITHUB_SHA: True 71 | -------------------------------------------------------------------------------- /example_pipeline.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # Copyright 2019 Google LLC 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | import kfp 17 | from kfp import dsl 18 | 19 | 20 | def random_num_op(low, high): 21 | """Generate a random number between low and high.""" 22 | return dsl.ContainerOp( 23 | name='Generate random number', 24 | image='python:alpine3.6', 25 | command=['sh', '-c'], 26 | arguments=['python -c "import random; print(random.randint($0, $1))" | tee $2', str(low), str(high), '/tmp/output'], 27 | file_outputs={'output': '/tmp/output'} 28 | ) 29 | 30 | 31 | def flip_coin_op(): 32 | """Flip a coin and output heads or tails randomly.""" 33 | return dsl.ContainerOp( 34 | name='Flip coin', 35 | image='python:alpine3.6', 36 | command=['sh', '-c'], 37 | arguments=['python -c "import random; result = \'heads\' if random.randint(0,1) == 0 ' 38 | 'else \'tails\'; print(result)" | tee /tmp/output'], 39 | file_outputs={'output': '/tmp/output'} 40 | ) 41 | 42 | 43 | def print_op(msg): 44 | """Print a message.""" 45 | return dsl.ContainerOp( 46 | name='Print', 47 | image='alpine:3.6', 48 | command=['echo', msg], 49 | ) 50 | 51 | 52 | @dsl.pipeline( 53 | name='Conditional execution pipeline', 54 | description='Shows how to use dsl.Condition().' 55 | ) 56 | def flipcoin_pipeline(): 57 | flip = flip_coin_op() 58 | with dsl.Condition(flip.output == 'heads'): 59 | random_num_head = random_num_op(0, 9) 60 | with dsl.Condition(random_num_head.output > 5): 61 | print_op('heads and %s > 5!' % random_num_head.output) 62 | with dsl.Condition(random_num_head.output <= 5): 63 | print_op('heads and %s <= 5!' % random_num_head.output) 64 | 65 | with dsl.Condition(flip.output == 'tails'): 66 | random_num_tail = random_num_op(10, 19) 67 | with dsl.Condition(random_num_tail.output > 15): 68 | print_op('tails and %s > 15!' % random_num_tail.output) 69 | with dsl.Condition(random_num_tail.output <= 15): 70 | print_op('tails and %s <= 15!' % random_num_tail.output) 71 | -------------------------------------------------------------------------------- /forecast_peython_wiki/deployment/pipline.py: -------------------------------------------------------------------------------- 1 | import kfp 2 | import datetime 3 | import os 4 | import click 5 | import logging 6 | import kfp 7 | import kfp.dsl as dsl 8 | import kfp.gcp as gcp 9 | 10 | def pipeline(github_sha :str): 11 | """Returns the pipeline function with the github_sha used for the versioning of the containers and enviroment of the containers as well. 12 | 13 | 14 | Keyword Arguments: 15 | env {str} -- The enviroment for which the pipeline is made for (default: {"develop"}) 16 | github_sha {str} --The github sha used for the versioning 17 | """ 18 | @kfp.dsl.pipeline( 19 | name="Example pipeline github action", 20 | description="This pipeline show how you can version the pipeline components using the githash" 21 | ) 22 | def timeseries_pipeline(gcp_bucket: str, project: str, train_data :str="train.csv", forecast_data: str="forecast.csv"): 23 | """The kfp pipeline function. 24 | 25 | Arguments: 26 | gcp_bucket {str} -- The google bucket 27 | project {str} -- The gcp project where the data should be stored 28 | 29 | Keyword Arguments: 30 | train_data {str} -- The name of the train file that is uploaded to the bucket (default: {"train.csv"}) 31 | forecast_date {str} -- The name of the forecast file uploaded to the bucket (default: {"forecast.csv"}) 32 | """ 33 | pre_image = f"gcr.io/{project}/pre_image:{github_sha}" 34 | train_forecast_image = f"gcr.io/{project}/train_forecast_image:{github_sha}" 35 | operations = {} 36 | operations['preprocess'] = dsl.ContainerOp( 37 | name='Preprocess', 38 | image=pre_image, 39 | command=['python3'], 40 | arguments=["main.py", 41 | "--url", "https://raw.githubusercontent.com/facebook/prophet/master/examples/example_wp_log_peyton_manning.csv", 42 | "--bucket", gcp_bucket, 43 | "--destination_blob_name", train_data 44 | ] 45 | ).set_image_pull_policy('Always') 46 | 47 | operations['train_forecast'] = dsl.ContainerOp( 48 | name='Forecast', 49 | image=train_forecast_image, 50 | command=['python3'], 51 | arguments=["main.py", 52 | "--bucket", gcp_bucket, 53 | "--source_blob_name", train_data, 54 | "--forecast_blob_name", forecast_data 55 | ] 56 | ).set_image_pull_policy('Always') 57 | operations["train_forecast"].after(operations["preprocess"]) 58 | 59 | 60 | for _,operation in operations.items(): 61 | operation.apply(gcp.use_gcp_secret('user-gcp-sa')) 62 | dsl.get_pipeline_conf() 63 | 64 | return operations 65 | 66 | return timeseries_pipeline -------------------------------------------------------------------------------- /forecast_peython_wiki/train_forecast/main.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import tempfile 3 | import click 4 | import logging 5 | import os 6 | 7 | from fbprophet import Prophet 8 | from google.cloud import storage 9 | 10 | def download_blob(bucket_name: str, source_blob_name: str, destination_file_name: str): 11 | """Function to download file from gcp bucet 12 | 13 | Arguments: 14 | bucket_name {str} -- The name of the bucket from which the data should be downloaded. 15 | source_blob_name {str} -- The name of the file in the bucket which should be downloaded. 16 | destination_file_name {str} -- The local file pat for the downloaded file. 17 | """ 18 | 19 | storage_client = storage.Client() 20 | bucket = storage_client.bucket(bucket_name) 21 | blob = bucket.blob(source_blob_name) 22 | blob.download_to_filename(destination_file_name) 23 | 24 | logging.info( 25 | "Blob {} downloaded to {}.".format( 26 | source_blob_name, destination_file_name 27 | ) 28 | ) 29 | 30 | 31 | def upload_blob(bucket_name: str, source_file_name: str, destination_blob_name: str): 32 | """Function to upload file to bucket. 33 | 34 | Arguments: 35 | bucket_name {str} -- The name of the bucket on gcp. 36 | source_file_name {str} -- The filepath to the file that should be uploaded. 37 | destination_blob_name {str} -- The name of the file in the bucket. 38 | """ 39 | storage_client = storage.Client() 40 | bucket = storage_client.bucket(bucket_name) 41 | blob = bucket.blob(destination_blob_name) 42 | blob.upload_from_filename(source_file_name) 43 | logging.info( 44 | "File {} uploaded to {}.".format( 45 | source_file_name, destination_blob_name 46 | ) 47 | ) 48 | 49 | 50 | @click.command() 51 | @click.option("--bucket", required=True, help="The name of the gcp bucket") 52 | @click.option("--source_blob_name", default="raw_data.csv", help="The raw file to download", required=True) 53 | @click.option("--forecast_blob_name", default="raw_data.csv", help="The forecast to upload", required=True) 54 | def main(bucket: str, source_blob_name :str, forecast_blob_name:str): 55 | with tempfile.TemporaryDirectory() as tmpdirname: 56 | local_file = os.path.join(tmpdirname,"tmp.csv") 57 | download_blob(bucket_name=bucket, source_blob_name=source_blob_name, destination_file_name=local_file) 58 | df = pd.read_csv(local_file) 59 | # Train the model 60 | m = Prophet() 61 | logging.info("Starting training of the prophet model") 62 | m.fit(df) 63 | logging.info("The Propeht model is trained") 64 | future = m.make_future_dataframe(periods=365) 65 | forecast = m.predict(future) 66 | with tempfile.TemporaryDirectory() as tmpdirname: 67 | forecast_file = os.path.join(tmpdirname, "forecast.csv") 68 | forecast.to_csv(forecast_file) 69 | upload_blob(bucket_name=bucket, source_file_name=forecast_file, destination_blob_name=forecast_blob_name) 70 | logging.info("The model training is done and forecasting is done") 71 | 72 | 73 | if __name__ == "__main__": 74 | main() 75 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # This action Submits Kubeflow Pipelines to Kubeflow cluster running on Google Cloud Platform. 2 | 3 | The purpose of this action is to allow for automated deployments of [Kubeflow Pipelines](https://github.com/kubeflow/pipelines) on Google Cloud Platform (GCP). The action will collect the pipeline from a python file and compile it before uploading it to Kubeflow. The Kubeflow deployment must be using [IAP](https://www.kubeflow.org/docs/gke/deploy/monitor-iap-setup/) on GCP to work. 4 | 5 | # Usage 6 | 7 | ## Example Workflow that uses this action 8 | 9 | 10 | To compile a pipeline and upload it to kubeflow: 11 | 12 | ```yaml 13 | name: Compile and Deploy Kubeflow pipeline 14 | on: [push] 15 | 16 | # Set environmental variables 17 | 18 | jobs: 19 | build: 20 | runs-on: ubuntu-18.04 21 | steps: 22 | - name: checkout files in repo 23 | uses: actions/checkout@master 24 | 25 | 26 | - name: Submit Kubeflow pipeline 27 | id: kubeflow 28 | uses: NikeNano/kubeflow-github-action@master 29 | with: 30 | KUBEFLOW_URL: ${{ secrets.KUBEFLOW_URL }} 31 | ENCODED_GOOGLE_APPLICATION_CREDENTIALS: ${{ secrets.GKE_KEY }} 32 | GOOGLE_APPLICATION_CREDENTIALS: /tmp/gcloud-sa.json 33 | CLIENT_ID: ${{ secrets.CLIENT_ID }} 34 | PIPELINE_CODE_PATH: "example_pipeline.py" 35 | PIPELINE_FUNCTION_NAME: "flipcoin_pipeline" 36 | PIPELINE_PARAMETERS_PATH: "parameters.yaml" 37 | EXPERIMENT_NAME: "Default" 38 | RUN_PIPELINE: False 39 | VERSION_GITHUB_SHA: False 40 | 41 | ``` 42 | 43 | If you also would like to run it use the following: 44 | 45 | ```yaml 46 | name: Compile, Deploy and Run on Kubeflow 47 | on: [push] 48 | 49 | # Set environmental variables 50 | 51 | jobs: 52 | build: 53 | runs-on: ubuntu-18.04 54 | steps: 55 | - name: checkout files in repo 56 | uses: actions/checkout@master 57 | 58 | 59 | - name: Submit Kubeflow pipeline 60 | id: kubeflow 61 | uses: NikeNano/kubeflow-github-action@master 62 | with: 63 | KUBEFLOW_URL: ${{ secrets.KUBEFLOW_URL }} 64 | ENCODED_GOOGLE_APPLICATION_CREDENTIALS: ${{ secrets.GKE_KEY }} 65 | GOOGLE_APPLICATION_CREDENTIALS: /tmp/gcloud-sa.json 66 | CLIENT_ID: ${{ secrets.CLIENT_ID }} 67 | PIPELINE_CODE_PATH: "example_pipeline.py" 68 | PIPELINE_FUNCTION_NAME: "flipcoin_pipeline" 69 | PIPELINE_PARAMETERS_PATH: "parameters.yaml" 70 | EXPERIMENT_NAME: "Default" 71 | RUN_PIPELINE: True 72 | VERSION_GITHUB_SHA: False 73 | 74 | ``` 75 | The repo also contains an example where the containers in the pipeline are versioned with the github hash in order to improve operations and tracking of errors. However this requires that the pipelines function to be wrapped in a function with one argument: 76 | 77 | ```python 78 | 79 | def pipeline(github_sha :str): 80 | ... 81 | 82 | ``` 83 | 84 | the containers is versioned with the hash: 85 | 86 | 87 | ```python 88 | pre_image = f"gcr.io/{project}/pre_image:{github_sha}" 89 | train_forecast_image = f"gcr.io/{project}/train_forecast_image:{github_sha}" 90 | 91 | ``` 92 | 93 | for example see [here](https://github.com/NikeNano/kubeflow-github-action/blob/master/forecast_peython_wiki/deployment/pipline.py) 94 | 95 | ## Mandatory inputs 96 | 97 | 1) KUBEFLOW_URL: The URL to your kubeflow deployment 98 | 2) GKE_KEY: Service account with access to kubeflow and rights to deploy, see [here](http://amygdala.github.io/kubeflow/ml/2019/08/22/remote-deploy.html) for example, the credentials needs to be bas64 encode: 99 | 100 | ``` bash 101 | cat path-to-key.json | base64 102 | ``` 103 | 3) GOOGLE_APPLICATION_CREDENTIALS: The path to where you like to store the secrets, which needs to be decoded from GKE_KEY 104 | 3) CLIENT_ID: The IAP client secret 105 | 4) PIPELINE_CODE_PATH: The full path to the python file containing the pipeline 106 | 5) PIPELINE_FUNCTION_NAME: The name of the pipeline function the PIPELINE_CODE_PATH file 107 | 6) PIPELINE_PARAMETERS_PATH: The pipeline parameters 108 | 7) EXPERIMENT_NAME: The name of the kubeflow experiment within which the pipeline should run 109 | 8) RUN_PIPELINE: If you like to also run the pipeline set "True" 110 | 9) VERSION_GITHUB_SHA: If the pipeline containers are versioned with the github hash 111 | 112 | 113 | # Future work 114 | 115 | Add so that pipelines can be scheduled to run as well. Soooon done! 116 | -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | import os 2 | import yaml 3 | import kfp 4 | import kfp.compiler as compiler 5 | import click 6 | import importlib.util 7 | import logging 8 | import sys 9 | from datetime import datetime 10 | 11 | 12 | logging.basicConfig(stream=sys.stdout, level=logging.INFO) 13 | 14 | 15 | def load_function(pipeline_function_name: str, full_path_to_pipeline: str) -> object: 16 | """Function to load python function from filepath and filename 17 | 18 | Arguments: 19 | pipeline_function_name {str} -- The name of the pipeline function 20 | full_path_to_pipeline {str} -- The full path name including the filename of the python file that 21 | describes the pipeline you want to run on Kubeflow 22 | 23 | Returns: 24 | object -- [description] 25 | """ 26 | logging.info( 27 | f"Loading the pipeline function from: {full_path_to_pipeline}") 28 | logging.info( 29 | f"The name of the pipeline function is: {pipeline_function_name}") 30 | spec = importlib.util.spec_from_file_location( 31 | pipeline_function_name, full_path_to_pipeline) 32 | foo = importlib.util.module_from_spec(spec) 33 | spec.loader.exec_module(foo) 34 | pipeline_func = getattr(foo, pipeline_function_name) 35 | logging.info("Succesfully loaded the pipeline function.") 36 | return pipeline_func 37 | 38 | 39 | def pipeline_compile(pipeline_function: object) -> str: 40 | """Function to compile pipeline. The pipeline is compiled to a zip file. 41 | 42 | Arguments: 43 | pipeline_func {object} -- The kubeflow pipeline function 44 | 45 | Returns: 46 | str -- The name of the compiled kubeflow pipeline 47 | """ 48 | pipeline_name_zip = pipeline_function.__name__ + ".zip" 49 | compiler.Compiler().compile(pipeline_function, pipeline_name_zip) 50 | logging.info("The pipeline function is compiled.") 51 | return pipeline_name_zip 52 | 53 | 54 | def upload_pipeline(pipeline_name_zip: str, pipeline_name: str, kubeflow_url: str, client_id: str): 55 | """Function to upload pipeline to kubeflow. 56 | 57 | Arguments: 58 | pipeline_name_zip {str} -- The name of the compiled pipeline.ArithmeticError 59 | pipeline_name {str} -- The name of the pipeline function. This will be the name in the kubeflow UI. 60 | """ 61 | client = kfp.Client( 62 | host=kubeflow_url, 63 | client_id=client_id, 64 | ) 65 | client.upload_pipeline( 66 | pipeline_package_path=pipeline_name_zip, 67 | pipeline_name=pipeline_name) 68 | return client 69 | 70 | 71 | def find_pipeline_id(pipeline_name: str, client: kfp.Client, page_size: str = 100, page_token: str = "") -> str: 72 | """Function to find the pipeline id of a pipeline. 73 | 74 | Arguments: 75 | pipeline_name {str} -- The name of the pipeline of interest 76 | client {kfp.Client} -- The kfp client 77 | page_size {str} -- The number of pipelines to collect a each API request 78 | 79 | Keyword Arguments: 80 | page_token {str} -- The page token to use for the API request (default: {" "}) 81 | 82 | Returns: 83 | [type] -- The pipeline id. If None no match 84 | """ 85 | while True: 86 | pipelines = client.list_pipelines( 87 | page_size=page_size, page_token=page_token) 88 | for pipeline in pipelines.pipelines: 89 | if pipeline.name == pipeline_name: 90 | logging.info(f"The pipeline id is: {pipeline.id}") 91 | return pipeline.id 92 | # Start need to know where to do next itteration from 93 | page_token = pipelines.next_page_token 94 | # If no next tooken break 95 | if not page_token: 96 | logging.info( 97 | f"Could not find the pipeline, is the name: {pipeline_name} correct?") 98 | break 99 | 100 | 101 | def find_experiment_id(experiment_name: str, client: kfp.Client, page_size: int = 100, page_token: str = "") -> str: 102 | """Function to return the experiment id 103 | 104 | Arguments: 105 | experiment_name {str} -- The experiment name 106 | client {kfp.Client} -- The kfp client 107 | 108 | Returns: 109 | str -- The experiment id 110 | """ 111 | while True: 112 | experiments = client.list_experiments( 113 | page_size=page_size, page_token=page_token) 114 | for experiments in experiments.experiments: 115 | if experiments.name == experiment_name: 116 | logging.info("Succesfully collected the experiment id") 117 | return experiments.id 118 | # Start need to know where to do next itteration from 119 | page_token = experiments.next_page_token 120 | # If no next tooken break 121 | if not page_token: 122 | logging.info( 123 | f"Could not find the pipeline id, is the experiment name: {experiments_name} correct? ") 124 | break 125 | 126 | 127 | def read_pipeline_params(pipeline_paramters_path: str) -> dict: 128 | # [TODO] add docstring here 129 | pipeline_params = {} 130 | with open(pipeline_paramters_path) as f: 131 | try: 132 | pipeline_params = yaml.safe_load(f) 133 | logging.info(f"The pipeline paramters is: {pipeline_params}") 134 | except yaml.YAMLError as exc: 135 | logging.info("The yaml parameters could not be loaded correctly.") 136 | raise ValueError( 137 | "The yaml parameters could not be loaded correctly.") 138 | logging.info(f"The paramters are: {pipeline_params}") 139 | return pipeline_params 140 | 141 | 142 | def run_pipeline(client: kfp.Client, pipeline_name: str, pipeline_id: str, pipeline_paramters_path: dict): 143 | experiment_id = find_experiment_id( 144 | experiment_name=os.environ["INPUT_EXPERIMENT_NAME"], client=client) 145 | if not experiment_id: 146 | raise ValueError("Failed to find experiment with the name: {}".format( 147 | os.environ["INPUT_EXPERIMENT_NAME"])) 148 | logging.info(f"The expriment id is: {experiment_id}") 149 | namespace = None 150 | if (os.getenv("INPUT_PIPELINE_NAMESPACE") != None) and (str.isspace(os.getenv("INPUT_PIPELINE_NAMESPACE")) == False) and os.getenv("INPUT_PIPELINE_NAMESPACE"): 151 | namespace = os.environ["INPUT_PIPELINE_NAMESPACE"] 152 | logging.info(f"The namespace that will be used is: {namespace}") 153 | # [TODO] What would be a good way to name the jobs 154 | job_name = pipeline_name + datetime.now().strftime("%Y_%m_%d_%H_%M_%S") 155 | logging.info(f"The job name is: {job_name}") 156 | 157 | pipeline_params = read_pipeline_params( 158 | pipeline_paramters_path=pipeline_paramters_path) 159 | pipeline_params = pipeline_params if pipeline_params != None else {} 160 | logging.info( 161 | f"experiment_id: {experiment_id}, job_name:{job_name}, pipeline_params:{pipeline_params}, pipeline_id:{pipeline_id}, namespace:{namespace}") 162 | client.run_pipeline( 163 | experiment_id=experiment_id, 164 | job_name=job_name, 165 | # Read this as a yaml, people seam to prefer that to json. 166 | params=pipeline_params, 167 | pipeline_id=pipeline_id, 168 | namespace=namespace) 169 | logging.info( 170 | "Successfully started the pipeline, head over to kubeflow to check it out") 171 | 172 | 173 | def main(): 174 | logging.info( 175 | "Started the process to compile and upload the pipeline to kubeflow.") 176 | os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = os.environ["INPUT_GOOGLE_APPLICATION_CREDENTIALS"] 177 | pipeline_function = load_function(pipeline_function_name=os.environ['INPUT_PIPELINE_FUNCTION_NAME'], 178 | full_path_to_pipeline=os.environ['INPUT_PIPELINE_CODE_PATH']) 179 | logging.info("The value of the VERSION_GITHUB_SHA is: {}".format( 180 | os.environ["INPUT_VERSION_GITHUB_SHA"])) 181 | if os.environ["INPUT_VERSION_GITHUB_SHA"] == "true": 182 | logging.info("Versioned pipeline components") 183 | pipeline_function = pipeline_function( 184 | github_sha=os.environ["GITHUB_SHA"]) 185 | pipeline_name_zip = pipeline_compile(pipeline_function=pipeline_function) 186 | pipeline_name = os.environ['INPUT_PIPELINE_FUNCTION_NAME'] + \ 187 | "_" + os.environ["GITHUB_SHA"] 188 | client = upload_pipeline(pipeline_name_zip=pipeline_name_zip, 189 | pipeline_name=pipeline_name, 190 | kubeflow_url=os.environ['INPUT_KUBEFLOW_URL'], 191 | client_id=os.environ["INPUT_CLIENT_ID"]) 192 | logging.info(os.getenv("INPUT_RUN_PIPELINE")) 193 | logging.info(os.environ["INPUT_EXPERIMENT_NAME"]) 194 | if os.getenv("INPUT_RUN_PIPELINE") == "true" and os.environ["INPUT_EXPERIMENT_NAME"]: 195 | logging.info("Started the process to run the pipeline on kubeflow.") 196 | pipeline_id = find_pipeline_id(pipeline_name=pipeline_name, 197 | client=client) 198 | run_pipeline(pipeline_name=pipeline_name, 199 | pipeline_id=pipeline_id, 200 | client=client, 201 | pipeline_paramters_path=os.environ["INPUT_PIPELINE_PARAMETERS_PATH"]) 202 | 203 | 204 | if __name__ == "__main__": 205 | main() 206 | -------------------------------------------------------------------------------- /client.py: -------------------------------------------------------------------------------- 1 | import time 2 | import logging 3 | import json 4 | import os 5 | import re 6 | import tarfile 7 | import tempfile 8 | import warnings 9 | import yaml 10 | import zipfile 11 | import string 12 | import random 13 | import kfp 14 | import kfp_server_api 15 | 16 | from datetime import datetime 17 | from typing import Mapping, Callable 18 | from kfp.compiler import compiler 19 | from kfp.compiler._k8s_helper import sanitize_k8s_name 20 | from kfp._auth import get_auth_token, get_gcp_access_token 21 | 22 | 23 | def _add_generated_apis(target_struct, api_module, api_client): 24 | '''Initializes a hierarchical API object based on the generated API module. 25 | PipelineServiceApi.create_pipeline becomes target_struct.pipelines.create_pipeline 26 | ''' 27 | Struct = type('Struct', (), {}) 28 | 29 | def camel_case_to_snake_case(name): 30 | import re 31 | return re.sub('([a-z0-9])([A-Z])', r'\1_\2', name).lower() 32 | 33 | for api_name in dir(api_module): 34 | if not api_name.endswith('ServiceApi'): 35 | continue 36 | 37 | short_api_name = camel_case_to_snake_case( 38 | api_name[0:-len('ServiceApi')]) + 's' 39 | api_struct = Struct() 40 | setattr(target_struct, short_api_name, api_struct) 41 | service_api = getattr(api_module.api, api_name) 42 | initialized_service_api = service_api(api_client) 43 | for member_name in dir(initialized_service_api): 44 | if member_name.startswith('_') or member_name.endswith('_with_http_info'): 45 | continue 46 | 47 | bound_member = getattr(initialized_service_api, member_name) 48 | setattr(api_struct, member_name, bound_member) 49 | models_struct = Struct() 50 | for member_name in dir(api_module.models): 51 | if not member_name[0].islower(): 52 | setattr(models_struct, member_name, getattr( 53 | api_module.models, member_name)) 54 | target_struct.api_models = models_struct 55 | 56 | 57 | KF_PIPELINES_ENDPOINT_ENV = 'KF_PIPELINES_ENDPOINT' 58 | KF_PIPELINES_UI_ENDPOINT_ENV = 'KF_PIPELINES_UI_ENDPOINT' 59 | KF_PIPELINES_DEFAULT_EXPERIMENT_NAME = 'KF_PIPELINES_DEFAULT_EXPERIMENT_NAME' 60 | KF_PIPELINES_OVERRIDE_EXPERIMENT_NAME = 'KF_PIPELINES_OVERRIDE_EXPERIMENT_NAME' 61 | 62 | 63 | class Client(object): 64 | """ API Client for KubeFlow Pipeline. 65 | """ 66 | 67 | # in-cluster DNS name of the pipeline service 68 | IN_CLUSTER_DNS_NAME = 'ml-pipeline.{}.svc.cluster.local:8888' 69 | KUBE_PROXY_PATH = 'api/v1/namespaces/{}/services/ml-pipeline:http/proxy/' 70 | 71 | # TODO: Wrap the configurations for different authentication methods. 72 | def __init__(self, host=None, client_id=None, namespace='kubeflow', other_client_id=None, other_client_secret=None): 73 | """Create a new instance of kfp client. 74 | Args: 75 | host: the host name to use to talk to Kubeflow Pipelines. If not set, the in-cluster 76 | service DNS name will be used, which only works if the current environment is a pod 77 | in the same cluster (such as a Jupyter instance spawned by Kubeflow's 78 | JupyterHub). If you have a different connection to cluster, such as a kubectl 79 | proxy connection, then set it to something like "127.0.0.1:8080/pipeline. 80 | If you connect to an IAP enabled cluster, set it to 81 | https://.endpoints..cloud.goog/pipeline". 82 | client_id: The client ID used by Identity-Aware Proxy. 83 | namespace: the namespace where the kubeflow pipeline system is run. 84 | other_client_id: The client ID used to obtain the auth codes and refresh tokens. 85 | Reference: https://cloud.google.com/iap/docs/authentication-howto#authenticating_from_a_desktop_app. 86 | other_client_secret: The client secret used to obtain the auth codes and refresh tokens. 87 | """ 88 | host = host or os.environ.get(KF_PIPELINES_ENDPOINT_ENV) 89 | self._uihost = os.environ.get(KF_PIPELINES_UI_ENDPOINT_ENV, host) 90 | config = self._load_config( 91 | host, client_id, namespace, other_client_id, other_client_secret) 92 | api_client = kfp_server_api.api_client.ApiClient(config) 93 | _add_generated_apis(self, kfp_server_api, api_client) 94 | 95 | self._run_api = kfp_server_api.api.run_service_api.RunServiceApi( 96 | api_client) 97 | self._job_api = kfp_server_api.api.job_service_api.JobServiceApi( 98 | api_client) 99 | self._experiment_api = kfp_server_api.api.experiment_service_api.ExperimentServiceApi( 100 | api_client) 101 | self._pipelines_api = kfp_server_api.api.pipeline_service_api.PipelineServiceApi( 102 | api_client) 103 | self._upload_api = kfp_server_api.api.PipelineUploadServiceApi( 104 | api_client) 105 | 106 | def _load_config(self, host, client_id, namespace, other_client_id, other_client_secret): 107 | config = kfp_server_api.configuration.Configuration() 108 | if host: 109 | config.host = host 110 | 111 | token = None 112 | 113 | # Obtain the tokens if it is inverse proxy or IAP. 114 | if self._is_inverse_proxy_host(host): 115 | token = get_gcp_access_token() 116 | if self._is_iap_host(host, client_id): 117 | token = get_auth_token( 118 | client_id, other_client_id, other_client_secret) 119 | 120 | if token: 121 | config.api_key['authorization'] = token 122 | config.api_key_prefix['authorization'] = 'Bearer' 123 | return config 124 | 125 | if host: 126 | # if host is explicitly set with auth token, it's probably a port forward address. 127 | return config 128 | 129 | import kubernetes as k8s 130 | in_cluster = True 131 | try: 132 | k8s.config.load_incluster_config() 133 | except: 134 | in_cluster = False 135 | pass 136 | 137 | if in_cluster: 138 | config.host = Client.IN_CLUSTER_DNS_NAME.format(namespace) 139 | return config 140 | 141 | try: 142 | k8s.config.load_kube_config(client_configuration=config) 143 | except: 144 | print('Failed to load kube config.') 145 | return config 146 | 147 | if config.host: 148 | config.host = config.host + '/' + \ 149 | Client.KUBE_PROXY_PATH.format(namespace) 150 | return config 151 | 152 | def _is_iap_host(self, host, client_id): 153 | if host and client_id: 154 | if re.match(r'\S+.endpoints.\S+.cloud.goog/{0,1}$', host): 155 | warnings.warn( 156 | 'Suffix /pipeline is not ignorable for IAP host.') 157 | return re.match(r'\S+.endpoints.\S+.cloud.goog/pipeline', host) 158 | return False 159 | 160 | def _is_inverse_proxy_host(self, host): 161 | if host: 162 | return re.match(r'\S+.googleusercontent.com/{0,1}$', host) 163 | return False 164 | 165 | def _is_ipython(self): 166 | """Returns whether we are running in notebook.""" 167 | try: 168 | import IPython 169 | ipy = IPython.get_ipython() 170 | if ipy is None: 171 | return False 172 | except ImportError: 173 | return False 174 | 175 | return True 176 | 177 | def _get_url_prefix(self): 178 | if self._uihost: 179 | # User's own connection. 180 | if self._uihost.startswith('http://') or self._uihost.startswith('https://'): 181 | return self._uihost 182 | else: 183 | return 'http://' + self._uihost 184 | 185 | # In-cluster pod. We could use relative URL. 186 | return '/pipeline' 187 | 188 | def create_experiment(self, name, description=None): 189 | """Create a new experiment. 190 | Args: 191 | name: the name of the experiment. 192 | description: description of the experiment 193 | Returns: 194 | An Experiment object. Most important field is id. 195 | """ 196 | 197 | experiment = None 198 | try: 199 | experiment = self.get_experiment(experiment_name=name) 200 | except: 201 | # Ignore error if the experiment does not exist. 202 | pass 203 | 204 | if not experiment: 205 | logging.info('Creating experiment {}.'.format(name)) 206 | experiment = kfp_server_api.models.ApiExperiment( 207 | name=name, description=description) 208 | experiment = self._experiment_api.create_experiment( 209 | body=experiment) 210 | 211 | if self._is_ipython(): 212 | import IPython 213 | html = \ 214 | ('Experiment link here' 215 | % (self._get_url_prefix(), experiment.id)) 216 | IPython.display.display(IPython.display.HTML(html)) 217 | return experiment 218 | 219 | def list_experiments(self, page_token='', page_size=10, sort_by=''): 220 | """List experiments. 221 | Args: 222 | page_token: token for starting of the page. 223 | page_size: size of the page. 224 | sort_by: can be '[field_name]', '[field_name] des'. For example, 'name des'. 225 | Returns: 226 | A response object including a list of experiments and next page token. 227 | """ 228 | response = self._experiment_api.list_experiment( 229 | page_token=page_token, page_size=page_size, sort_by=sort_by) 230 | return response 231 | 232 | def get_experiment(self, experiment_id=None, experiment_name=None): 233 | """Get details of an experiment 234 | Either experiment_id or experiment_name is required 235 | Args: 236 | experiment_id: id of the experiment. (Optional) 237 | experiment_name: name of the experiment. (Optional) 238 | Returns: 239 | A response object including details of a experiment. 240 | Throws: 241 | Exception if experiment is not found or None of the arguments is provided 242 | """ 243 | if experiment_id is None and experiment_name is None: 244 | raise ValueError( 245 | 'Either experiment_id or experiment_name is required') 246 | if experiment_id is not None: 247 | return self._experiment_api.get_experiment(id=experiment_id) 248 | next_page_token = '' 249 | while next_page_token is not None: 250 | list_experiments_response = self.list_experiments( 251 | page_size=100, page_token=next_page_token) 252 | next_page_token = list_experiments_response.next_page_token 253 | for experiment in list_experiments_response.experiments: 254 | if experiment.name == experiment_name: 255 | return self._experiment_api.get_experiment(id=experiment.id) 256 | raise ValueError( 257 | 'No experiment is found with name {}.'.format(experiment_name)) 258 | 259 | def _extract_pipeline_yaml(self, package_file): 260 | def _choose_pipeline_yaml_file(file_list) -> str: 261 | yaml_files = [file for file in file_list if file.endswith('.yaml')] 262 | if len(yaml_files) == 0: 263 | raise ValueError( 264 | 'Invalid package. Missing pipeline yaml file in the package.') 265 | 266 | if 'pipeline.yaml' in yaml_files: 267 | return 'pipeline.yaml' 268 | else: 269 | if len(yaml_files) == 1: 270 | return yaml_files[0] 271 | raise ValueError( 272 | 'Invalid package. There is no pipeline.yaml file and there are multiple yaml files.') 273 | 274 | if package_file.endswith('.tar.gz') or package_file.endswith('.tgz'): 275 | with tarfile.open(package_file, "r:gz") as tar: 276 | file_names = [member.name for member in tar if member.isfile()] 277 | pipeline_yaml_file = _choose_pipeline_yaml_file(file_names) 278 | with tar.extractfile(tar.getmember(pipeline_yaml_file)) as f: 279 | return yaml.safe_load(f) 280 | elif package_file.endswith('.zip'): 281 | with zipfile.ZipFile(package_file, 'r') as zip: 282 | pipeline_yaml_file = _choose_pipeline_yaml_file(zip.namelist()) 283 | with zip.open(pipeline_yaml_file) as f: 284 | return yaml.safe_load(f) 285 | elif package_file.endswith('.yaml') or package_file.endswith('.yml'): 286 | with open(package_file, 'r') as f: 287 | return yaml.safe_load(f) 288 | else: 289 | raise ValueError('The package_file ' + package_file + 290 | ' should ends with one of the following formats: [.tar.gz, .tgz, .zip, .yaml, .yml]') 291 | 292 | def list_pipelines(self, page_token='', page_size=10, sort_by=''): 293 | """List pipelines. 294 | Args: 295 | page_token: token for starting of the page. 296 | page_size: size of the page. 297 | sort_by: one of 'field_name', 'field_name des'. For example, 'name des'. 298 | Returns: 299 | A response object including a list of pipelines and next page token. 300 | """ 301 | return self._pipelines_api.list_pipelines(page_token=page_token, page_size=page_size, sort_by=sort_by) 302 | 303 | # TODO: provide default namespace, similar to kubectl default namespaces. 304 | def run_pipeline(self, experiment_id, job_name, pipeline_package_path=None, params={}, pipeline_id=None, namespace=None): 305 | """Run a specified pipeline. 306 | Args: 307 | experiment_id: The string id of an experiment. 308 | job_name: name of the job. 309 | pipeline_package_path: local path of the pipeline package(the filename should end with one of the following .tar.gz, .tgz, .zip, .yaml, .yml). 310 | params: a dictionary with key (string) as param name and value (string) as as param value. 311 | pipeline_id: the string ID of a pipeline. 312 | namespace: kubernetes namespace where the pipeline runs are created. 313 | For single user deployment, leave it as None; 314 | For multi user, input a namespace where the user is authorized 315 | Returns: 316 | A run object. Most important field is id. 317 | """ 318 | 319 | pipeline_json_string = None 320 | if pipeline_package_path: 321 | pipeline_obj = self._extract_pipeline_yaml(pipeline_package_path) 322 | pipeline_json_string = json.dumps(pipeline_obj) 323 | api_params = [kfp_server_api.ApiParameter( 324 | name=sanitize_k8s_name(name=k, allow_capital_underscore=True), 325 | value=str(v)) for k, v in params.items()] 326 | resource_references = [] 327 | 328 | key = kfp_server_api.models.ApiResourceKey(id=experiment_id, 329 | type=kfp_server_api.models.ApiResourceType.EXPERIMENT) 330 | reference = kfp_server_api.models.ApiResourceReference(key=key, 331 | relationship=kfp_server_api.models.ApiRelationship.OWNER) 332 | resource_references.append(reference) 333 | if namespace is not None: 334 | key = kfp_server_api.models.ApiResourceKey(id=namespace, 335 | type=kfp_server_api.models.ApiResourceType.NAMESPACE) 336 | reference = kfp_server_api.models.ApiResourceReference(key=key, 337 | name=namespace, 338 | relationship=kfp_server_api.models.ApiRelationship.OWNER) 339 | resource_references.append(reference) 340 | spec = kfp_server_api.models.ApiPipelineSpec( 341 | pipeline_id=pipeline_id, 342 | workflow_manifest=pipeline_json_string, 343 | parameters=api_params) 344 | run_body = kfp_server_api.models.ApiRun( 345 | pipeline_spec=spec, resource_references=resource_references, name=job_name) 346 | 347 | response = self._run_api.create_run(body=run_body) 348 | 349 | if self._is_ipython(): 350 | import IPython 351 | html = ('Run link here' 352 | % (self._get_url_prefix(), response.run.id)) 353 | IPython.display.display(IPython.display.HTML(html)) 354 | return response.run 355 | 356 | def schedule_pipeline(self, experiment_id, job_name, pipeline_package_path=None, params={}, pipeline_id=None, namespace=None): 357 | """Schedule pipeline on kubeflow to run based upon a cron job 358 | 359 | Arguments: 360 | experiment_id {[type]} -- The expriment within which we would like kubeflow 361 | job_name {[type]} -- The name of the scheduled job 362 | 363 | Keyword Arguments: 364 | pipeline_package_path {[type]} -- The path to the pipeline package (default: {None}) 365 | params {dict} -- The pipeline parameters (default: {{}}) 366 | pipeline_id {[type]} -- The id of the pipeline which should run on schedule (default: {None}) 367 | namespace {[type]} -- The name space with which the pipeline should run (default: {None}) 368 | """ 369 | 370 | pipeline_json_string = None 371 | if pipeline_package_path: 372 | pipeline_obj = self._extract_pipeline_yaml(pipeline_package_path) 373 | pipeline_json_string = json.dumps(pipeline_obj) 374 | api_params = [kfp_server_api.ApiParameter( 375 | name=sanitize_k8s_name(name=k, allow_capital_underscore=True), 376 | value=str(v)) for k, v in params.items()] 377 | resource_references = [] 378 | 379 | key = kfp_server_api.models.ApiResourceKey(id=experiment_id, 380 | type=kfp_server_api.models.ApiResourceType.EXPERIMENT) 381 | reference = kfp_server_api.models.ApiResourceReference(key=key, 382 | relationship=kfp_server_api.models.ApiRelationship.OWNER) 383 | resource_references.append(reference) 384 | if namespace is not None: 385 | key = kfp_server_api.models.ApiResourceKey(id=namespace, 386 | type=kfp_server_api.models.ApiResourceType.NAMESPACE) 387 | reference = kfp_server_api.models.ApiResourceReference(key=key, 388 | name=namespace, 389 | relationship=kfp_server_api.models.ApiRelationship.OWNER) 390 | resource_references.append(reference) 391 | spec = kfp_server_api.models.ApiPipelineSpec( 392 | pipeline_id=pipeline_id, 393 | workflow_manifest=pipeline_json_string, 394 | parameters=api_params) 395 | 396 | trigger = kfp_server_api.models.api_cron_schedule.ApiCronSchedule( 397 | cron="0 0 9 ? * 2-6") 398 | job_id = ''.join(random.choices( 399 | string.ascii_uppercase + string.digits, k=10)) 400 | schedule_body = kfp_server_api.models.ApiJob( 401 | id=job_id, 402 | name="TestScheduling", 403 | description="Schedule the pipeline using the API", 404 | pipeline_spec=spec, 405 | resource_references=resource_references, 406 | max_concurrency=10, 407 | trigger=trigger, 408 | enabled=True, 409 | ) 410 | 411 | def create_run_from_pipeline_func(self, pipeline_func: Callable, arguments: Mapping[str, str], run_name=None, experiment_name=None, pipeline_conf: kfp.dsl.PipelineConf = None, namespace=None): 412 | '''Runs pipeline on KFP-enabled Kubernetes cluster. 413 | This command compiles the pipeline function, creates or gets an experiment and submits the pipeline for execution. 414 | Args: 415 | pipeline_func: A function that describes a pipeline by calling components and composing them into execution graph. 416 | arguments: Arguments to the pipeline function provided as a dict. 417 | run_name: Optional. Name of the run to be shown in the UI. 418 | experiment_name: Optional. Name of the experiment to add the run to. 419 | namespace: kubernetes namespace where the pipeline runs are created. 420 | For single user deployment, leave it as None; 421 | For multi user, input a namespace where the user is authorized 422 | ''' 423 | # TODO: Check arguments against the pipeline function 424 | pipeline_name = pipeline_func.__name__ 425 | run_name = run_name or pipeline_name + ' ' + \ 426 | datetime.now().strftime('%Y-%m-%d %H-%M-%S') 427 | try: 428 | (_, pipeline_package_path) = tempfile.mkstemp(suffix='.zip') 429 | compiler.Compiler().compile(pipeline_func, pipeline_package_path, 430 | pipeline_conf=pipeline_conf) 431 | return self.create_run_from_pipeline_package(pipeline_package_path, arguments, run_name, experiment_name, namespace) 432 | finally: 433 | os.remove(pipeline_package_path) 434 | 435 | def create_run_from_pipeline_package(self, pipeline_file: str, arguments: Mapping[str, str], run_name=None, experiment_name=None, namespace=None): 436 | '''Runs pipeline on KFP-enabled Kubernetes cluster. 437 | This command compiles the pipeline function, creates or gets an experiment and submits the pipeline for execution. 438 | Args: 439 | pipeline_file: A compiled pipeline package file. 440 | arguments: Arguments to the pipeline function provided as a dict. 441 | run_name: Optional. Name of the run to be shown in the UI. 442 | experiment_name: Optional. Name of the experiment to add the run to. 443 | namespace: kubernetes namespace where the pipeline runs are created. 444 | For single user deployment, leave it as None; 445 | For multi user, input a namespace where the user is authorized 446 | ''' 447 | 448 | class RunPipelineResult: 449 | def __init__(self, client, run_info): 450 | self._client = client 451 | self.run_info = run_info 452 | self.run_id = run_info.id 453 | 454 | def wait_for_run_completion(self, timeout=None): 455 | timeout = timeout or datetime.datetime.max - datetime.datetime.min 456 | return self._client.wait_for_run_completion(self.run_id, timeout) 457 | 458 | def __repr__(self): 459 | return 'RunPipelineResult(run_id={})'.format(self.run_id) 460 | 461 | # TODO: Check arguments against the pipeline function 462 | pipeline_name = os.path.basename(pipeline_file) 463 | experiment_name = experiment_name or os.environ.get( 464 | KF_PIPELINES_DEFAULT_EXPERIMENT_NAME, None) 465 | overridden_experiment_name = os.environ.get( 466 | KF_PIPELINES_OVERRIDE_EXPERIMENT_NAME, experiment_name) 467 | if overridden_experiment_name != experiment_name: 468 | import warnings 469 | warnings.warn('Changing experiment name from "{}" to "{}".'.format( 470 | experiment_name, overridden_experiment_name)) 471 | experiment_name = overridden_experiment_name or 'Default' 472 | run_name = run_name or pipeline_name + ' ' + \ 473 | datetime.now().strftime('%Y-%m-%d %H-%M-%S') 474 | experiment = self.create_experiment(name=experiment_name) 475 | run_info = self.run_pipeline( 476 | experiment.id, run_name, pipeline_file, arguments, namespace=namespace) 477 | return RunPipelineResult(self, run_info) 478 | 479 | def list_runs(self, page_token='', page_size=10, sort_by='', experiment_id=None): 480 | """List runs. 481 | Args: 482 | page_token: token for starting of the page. 483 | page_size: size of the page. 484 | sort_by: one of 'field_name', 'field_name des'. For example, 'name des'. 485 | experiment_id: experiment id to filter upon 486 | Returns: 487 | A response object including a list of experiments and next page token. 488 | """ 489 | if experiment_id is not None: 490 | response = self._run_api.list_runs(page_token=page_token, page_size=page_size, sort_by=sort_by, 491 | resource_reference_key_type=kfp_server_api.models.api_resource_type.ApiResourceType.EXPERIMENT, resource_reference_key_id=experiment_id) 492 | else: 493 | response = self._run_api.list_runs( 494 | page_token=page_token, page_size=page_size, sort_by=sort_by) 495 | return response 496 | 497 | def get_run(self, run_id): 498 | """Get run details. 499 | Args: 500 | id of the run. 501 | Returns: 502 | A response object including details of a run. 503 | Throws: 504 | Exception if run is not found. 505 | """ 506 | return self._run_api.get_run(run_id=run_id) 507 | 508 | def wait_for_run_completion(self, run_id, timeout): 509 | """Wait for a run to complete. 510 | Args: 511 | run_id: run id, returned from run_pipeline. 512 | timeout: timeout in seconds. 513 | Returns: 514 | A run detail object: Most important fields are run and pipeline_runtime 515 | """ 516 | status = 'Running:' 517 | start_time = datetime.now() 518 | while status is None or status.lower() not in ['succeeded', 'failed', 'skipped', 'error']: 519 | get_run_response = self._run_api.get_run(run_id=run_id) 520 | status = get_run_response.run.status 521 | elapsed_time = (datetime.now() - start_time).seconds 522 | logging.info('Waiting for the job to complete...') 523 | if elapsed_time > timeout: 524 | raise TimeoutError('Run timeout') 525 | time.sleep(5) 526 | return get_run_response 527 | 528 | def _get_workflow_json(self, run_id): 529 | """Get the workflow json. 530 | Args: 531 | run_id: run id, returned from run_pipeline. 532 | Returns: 533 | workflow: json workflow 534 | """ 535 | get_run_response = self._run_api.get_run(run_id=run_id) 536 | workflow = get_run_response.pipeline_runtime.workflow_manifest 537 | workflow_json = json.loads(workflow) 538 | return workflow_json 539 | 540 | def upload_pipeline(self, pipeline_package_path, pipeline_name=None): 541 | """Uploads the pipeline to the Kubeflow Pipelines cluster. 542 | Args: 543 | pipeline_package_path: Local path to the pipeline package. 544 | pipeline_name: Optional. Name of the pipeline to be shown in the UI. 545 | Returns: 546 | Server response object containing pipleine id and other information. 547 | """ 548 | 549 | response = self._upload_api.upload_pipeline( 550 | pipeline_package_path, name=pipeline_name) 551 | if self._is_ipython(): 552 | import IPython 553 | html = 'Pipeline link here' % ( 554 | self._get_url_prefix(), response.id) 555 | IPython.display.display(IPython.display.HTML(html)) 556 | return response 557 | 558 | def get_pipeline(self, pipeline_id): 559 | """Get pipeline details. 560 | Args: 561 | id of the pipeline. 562 | Returns: 563 | A response object including details of a pipeline. 564 | Throws: 565 | Exception if pipeline is not found. 566 | """ 567 | return self._pipelines_api.get_pipeline(id=pipeline_id) 568 | --------------------------------------------------------------------------------