├── .gitignore ├── LICENSE ├── README.md ├── examples └── basic_pipeline │ ├── notebook_step_1.py │ ├── notebook_step_2.py │ └── pipeline.ipynb ├── kfn ├── __init__.py ├── imports.py ├── injected_code.py ├── kf_notebook_runner.py ├── lib.py ├── nb_component_builder.py └── test │ ├── __init__.py │ ├── kf_notebook_runner_test.py │ ├── lib.py │ ├── nb_component_builder_test.py │ └── notebooks_source.py ├── requirements.txt ├── setup.py └── test.py /.gitignore: -------------------------------------------------------------------------------- 1 | .ipynb_checkpoints/ 2 | __pycache__/ 3 | *.pyc 4 | /dist/ 5 | build/ 6 | .settings/ 7 | .project 8 | .classpath 9 | .vscode/ 10 | .venv/ 11 | .env 12 | kfn.egg-info/ 13 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 bartgras 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # kf-notebook-component 2 | 3 | Kubeflow notebook component is a library to wrap Jupyter Notebook into Kubeflow component. 4 | 5 | ### Install: 6 | 7 | `pip instal kfp` 8 | 9 | ### Read introductory article 10 | 11 | [Medium](https://medium.com/@bartgras/execute-jupyter-notebook-as-a-pipeline-step-4dba8c45aebf) 12 | 13 | ### Examples 14 | 15 | Head to `/examples` to see example pipelines. -------------------------------------------------------------------------------- /examples/basic_pipeline/notebook_step_1.py: -------------------------------------------------------------------------------- 1 | # --- 2 | # jupyter: 3 | # jupytext: 4 | # formats: ipynb,py:percent 5 | # text_representation: 6 | # extension: .py 7 | # format_name: percent 8 | # format_version: '1.3' 9 | # jupytext_version: 1.6.0 10 | # kernelspec: 11 | # display_name: Python 3 12 | # language: python 13 | # name: python3 14 | # --- 15 | 16 | # %% tags=["parameters"] 17 | a = 11 18 | b = 10 19 | 20 | # %% 21 | print(a) 22 | 23 | # %% [markdown] 24 | # ### Titanic dataset features summary 25 | 26 | # %% 27 | import pandas as pd 28 | train_df = pd.read_csv('https://raw.githubusercontent.com/kubeflow-kale/examples/master/titanic-ml-dataset/data/train.csv') 29 | 30 | # %% 31 | train_df.describe() 32 | 33 | # %% [markdown] 34 | # #### Passengers age 35 | 36 | # %% 37 | _ = train_df['Age'].hist() 38 | 39 | # %% 40 | import numpy as np 41 | 42 | outputs = {'d': a+b, 'e': a/b} 43 | artifacts = {'x': np.random.randint(0, 100, 20)} 44 | metrics = {'accuracy': np.random.randint(0, 100), 'recall': np.random.randint(0, 100)} 45 | -------------------------------------------------------------------------------- /examples/basic_pipeline/notebook_step_2.py: -------------------------------------------------------------------------------- 1 | # --- 2 | # jupyter: 3 | # jupytext: 4 | # formats: ipynb,py:percent 5 | # text_representation: 6 | # extension: .py 7 | # format_name: percent 8 | # format_version: '1.3' 9 | # jupytext_version: 1.6.0 10 | # kernelspec: 11 | # display_name: Python 3 12 | # language: python 13 | # name: python3 14 | # --- 15 | 16 | # %% tags=["parameters"] 17 | x = None 18 | d = None 19 | 20 | # %% 21 | import numpy as np 22 | 23 | x 24 | 25 | # %% 26 | d 27 | -------------------------------------------------------------------------------- /examples/basic_pipeline/pipeline.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "metadata": { 3 | "language_info": { 4 | "codemirror_mode": { 5 | "name": "ipython", 6 | "version": 3 7 | }, 8 | "file_extension": ".py", 9 | "mimetype": "text/x-python", 10 | "name": "python", 11 | "nbconvert_exporter": "python", 12 | "pygments_lexer": "ipython3", 13 | "version": "3.7.5-final" 14 | }, 15 | "orig_nbformat": 2, 16 | "kernelspec": { 17 | "name": "python3", 18 | "display_name": "Python 3.7.5 64-bit ('.venv': venv)", 19 | "metadata": { 20 | "interpreter": { 21 | "hash": "fe3cdae4d9e5fcdfb30e08c92255820dd67d52d5b3cb5d054ff2d9e0daa778ac" 22 | } 23 | } 24 | } 25 | }, 26 | "nbformat": 4, 27 | "nbformat_minor": 2, 28 | "cells": [ 29 | { 30 | "source": [ 31 | "## Example notebook component pipeline\n", 32 | "\n", 33 | "First component:\n", 34 | "\n", 35 | "* get's `a`, `b`, `c` inputs from pipeline input.\n", 36 | "\n", 37 | "* executes notebook `notebook_step_1.py` that uses those variables and ouputs variable `d` and artifact `x`\n", 38 | "\n", 39 | "Second component:\n", 40 | "\n", 41 | "* injests variable `d` and artifact `x` from previous step\n" 42 | ], 43 | "cell_type": "markdown", 44 | "metadata": {} 45 | }, 46 | { 47 | "cell_type": "code", 48 | "execution_count": 1, 49 | "metadata": {}, 50 | "outputs": [], 51 | "source": [ 52 | "from kfn import NbComponentBuilder\n", 53 | "import kfp" 54 | ] 55 | }, 56 | { 57 | "cell_type": "code", 58 | "execution_count": null, 59 | "metadata": {}, 60 | "outputs": [], 61 | "source": [ 62 | "base_image = 'asia.gcr.io/ppb-services/tensorflow-1.14.0-py3-jupytext-papermill:latest'\n", 63 | "packages_to_install = ['pandas', 'matplotlib']" 64 | ] 65 | }, 66 | { 67 | "cell_type": "code", 68 | "execution_count": 2, 69 | "metadata": {}, 70 | "outputs": [ 71 | { 72 | "output_type": "stream", 73 | "name": "stdout", 74 | "text": [ 75 | "SIGNATURE: (c: int, a: int = '1', b: int = '2')\n" 76 | ] 77 | } 78 | ], 79 | "source": [ 80 | "nb = NbComponentBuilder(\n", 81 | " 'Task 1', \n", 82 | " inject_notebook_path='notebook_step_1.py', \n", 83 | " remove_nb_inputs=True)\n", 84 | "\n", 85 | "nb.add_input_param('a', int, 1)\n", 86 | "nb.add_input_param('b', int, 2)\n", 87 | "nb.add_input_param('c', int, None)\n", 88 | "nb.add_output_param('d', int)\n", 89 | "nb.add_output_artifact('x')\n", 90 | "first_op = nb.build_op(\n", 91 | " base_image=base_image, \n", 92 | " packages_to_install=packages_to_install)" 93 | ] 94 | }, 95 | { 96 | "cell_type": "code", 97 | "execution_count": null, 98 | "metadata": {}, 99 | "outputs": [], 100 | "source": [ 101 | "nb = NbComponentBuilder(\n", 102 | " 'Task 2', \n", 103 | " inject_notebook_path='notebook_step_2.py')\n", 104 | "\n", 105 | "nb.add_input_artifact('x')\n", 106 | "nb.add_input_param('d', int)\n", 107 | "second_op = nb.build_op(\n", 108 | " base_image=base_image, \n", 109 | " packages_to_install=packages_to_install)" 110 | ] 111 | }, 112 | { 113 | "cell_type": "code", 114 | "execution_count": null, 115 | "metadata": {}, 116 | "outputs": [], 117 | "source": [ 118 | "# Define pipeline\n", 119 | "@kfp.dsl.pipeline(name='Notebook pipeline example')\n", 120 | "def notebook_run_pipeline(\n", 121 | " a='a', b='b', c='c'\n", 122 | "):\n", 123 | " op1 = first_op(a=a, b=b, c=c) \n", 124 | " second_op(x=op1.outputs['x'], d=op1.outputs['d']) \n", 125 | "\n", 126 | "# Run\n", 127 | "arguments = {'a': 20, 'b': 30, 'c': 40}\n", 128 | "kfp.Client().create_run_from_pipeline_func(notebook_run_pipeline, arguments=arguments)" 129 | ] 130 | } 131 | ] 132 | } -------------------------------------------------------------------------------- /kfn/__init__.py: -------------------------------------------------------------------------------- 1 | from kfn.nb_component_builder import NbComponentBuilder -------------------------------------------------------------------------------- /kfn/imports.py: -------------------------------------------------------------------------------- 1 | # Imports required by kf_notebook_runner 2 | # Moved into separate file because this file has to be read separately and 3 | # injected into kubeflow component 4 | 5 | import os 6 | import re 7 | import pickle 8 | from jupytext.cli import jupytext 9 | import papermill 10 | from nbconvert import HTMLExporter 11 | from traitlets.config import Config 12 | from collections import namedtuple 13 | import json 14 | -------------------------------------------------------------------------------- /kfn/injected_code.py: -------------------------------------------------------------------------------- 1 | #from kfn.kf_notebook_runner import KFNotebookRunner 2 | 3 | notebook_injected_artifacts = ''' 4 | 5 | import pickle 6 | 7 | if '_input_artifacts' in locals(): 8 | exec('_in_artifacts = %s' % _input_artifacts) 9 | for artif in _in_artifacts: 10 | with open(artif[1], 'rb') as f: 11 | try: 12 | exec('%s = pickle.loads(f.read())' % artif[0] ) 13 | except pickle.UnpicklingError: 14 | with open(artif[1], 'r') as f: 15 | exec('%s = f.read()' % artif[0] ) 16 | 17 | ''' 18 | 19 | notebook_injected_code = ''' 20 | 21 | # %% 22 | import pickle 23 | 24 | if 'outputs' in locals() and type(outputs) == dict: 25 | with open('/tmp/outputs.pickle', 'wb') as f: 26 | f.write(pickle.dumps(outputs)) 27 | 28 | if '_output_artifacts' in locals(): 29 | exec('_artifacts = %s' % _output_artifacts) 30 | for artif in _artifacts: 31 | with open(artif[1], 'wb') as f: 32 | f.write(pickle.dumps(artifacts[artif[0]])) 33 | 34 | if 'metrics' in locals() and type(metrics) == dict: 35 | with open('/tmp/metrics.pickle', 'wb') as f: 36 | f.write(pickle.dumps(metrics)) 37 | 38 | ''' 39 | 40 | def exec_nb(_locals, input_params, output_params, use_injected_nb_source_code=False, remote_notebook_path=None, 41 | output_artifacts=None, input_artifacts=None, remove_nb_inputs=False): 42 | import json 43 | import pickle 44 | import random 45 | import string 46 | from collections import namedtuple 47 | 48 | random_filename = ''.join(random.choice(string.ascii_lowercase) 49 | for i in range(8)) 50 | tmp_notebook = '/tmp/%s.py' % random_filename 51 | 52 | params_dict = {} 53 | for param_name in input_params: 54 | params_dict[param_name] = _locals[param_name] 55 | 56 | if output_artifacts: 57 | params_dict.update({'_output_artifacts': str(output_artifacts)}) 58 | 59 | if input_artifacts: 60 | params_dict.update({'_input_artifacts': str(input_artifacts)}) 61 | 62 | if use_injected_nb_source_code: 63 | with open(tmp_notebook, 'w') as f: 64 | # Note: Variable `notebook_source` is being injected earlier 65 | f.write(notebook_source) 66 | 67 | if remote_notebook_path: 68 | # TODO: Download from GS and save to /tmp/notebook.py 69 | raise('Not implemented') 70 | 71 | nr = KFNotebookRunner( 72 | local_py_name=tmp_notebook, 73 | inject_params=params_dict, 74 | remove_nb_inputs=remove_nb_inputs) 75 | nr.run() 76 | 77 | metadata = { 78 | 'outputs' : [{ 79 | 'type': 'web-app', 80 | 'storage': 'inline', 81 | 'source': nr.notebook_html_output 82 | }] 83 | } 84 | 85 | metrics_list = [] 86 | 87 | if nr.metrics: 88 | if type(nr.metrics) != dict: 89 | raise(TypeError('Invalid metrics format. Passed values are: %s' % nr.metrics)) 90 | 91 | for mk, mv in nr.metrics.items(): 92 | try: 93 | float(mv) 94 | except: 95 | TypeError('Invalid metric: {%s:%s}' % (mk,mv)) 96 | 97 | metrics_list.append({ 98 | 'name': mk, 99 | 'numberValue': mv, 100 | 'format': "RAW", 101 | }) 102 | metrics = {'metrics': metrics_list} 103 | 104 | tuple_outputs = [json.dumps(metadata), json.dumps(metrics)] + [nr.outputs[p] for p in output_params] 105 | 106 | tuple_inputs = ['mlpipeline_ui_metadata', 'mlpipeline_metrics'] 107 | if output_params: 108 | tuple_inputs += output_params 109 | 110 | task_out = namedtuple('TaskOutput', tuple_inputs) 111 | return task_out(*tuple_outputs) -------------------------------------------------------------------------------- /kfn/kf_notebook_runner.py: -------------------------------------------------------------------------------- 1 | # TODO: FIX @@@@@@@@@@@@@@ 2 | # Note: Imports moved inside class to make it easier to inject 3 | # source code into Kubeflow component 4 | # import os 5 | # import re 6 | # import pickle 7 | # from jupytext.cli import jupytext 8 | # import papermill 9 | # from nbconvert import HTMLExporter 10 | # from traitlets.config import Config 11 | # from collections import namedtuple 12 | # import json 13 | 14 | from kfn.imports import os, re, pickle, jupytext, papermill, HTMLExporter, Config, namedtuple, json 15 | from kfn.injected_code import notebook_injected_artifacts, notebook_injected_code 16 | 17 | class KFNotebookRunner: 18 | def __init__(self, local_py_name, inject_params={}, remove_nb_inputs=False, kernel_name='python3'): 19 | """ 20 | Converts (jupytext format of "Notebook paired with percent script")).py file to .ipynb, executes 21 | it and generates separate output in HTML format. All converted/generated files will be written to 22 | the same directory as input py file. 23 | 24 | Parameters: 25 | ----------- 26 | - local_py_name: Path to py file that will be used for run. 27 | 28 | - inject_params: Parameters that will be injected to notebook. Follow papermill 29 | (https://papermill.readthedocs.io/en/latest/usage-parameterize.html) 30 | documentation how to do that. 31 | 32 | - remove_nb_inputs: By default HTML output will contain both code and 33 | output cells. Setting `remote_nb_inputs` to True will remove code cells. 34 | """ 35 | 36 | py_filename = os.path.split(local_py_name)[-1] 37 | path = os.path.split(local_py_name)[0] 38 | self.path_prefix = path + '/' + py_filename.split('.py')[0] 39 | self.inject_params = inject_params 40 | self.remove_nb_inputs = remove_nb_inputs 41 | self.kernel_name = kernel_name 42 | 43 | self._notebook_html_output = '' 44 | self._outputs = {} 45 | self._metrics = {} 46 | self._component_return_stuct = None 47 | 48 | def run(self): 49 | self.inject_saving_outputs() 50 | self.convert_and_run_in_notebook() 51 | self.nb_html_convert() 52 | self.build_component_output() 53 | self.build_component_metrics() 54 | 55 | def inject_saving_outputs(self): 56 | with open(self.path_prefix + '.py', 'r') as f_in: 57 | with open(self.path_prefix + '_inject_output.py', 'w') as f_out: 58 | file_content = f_in.read() 59 | if len(re.findall('tags=\["parameters"', file_content)) == 0: 60 | raise(Exception('Notebook is missing "parameters" tag set on one of it\'s cells')) 61 | file_content = re.sub(r'# %% tags=\["parameters"(.*)\](.+?)# %%', 62 | r'# %% tags=["parameters"\1]\2\n# %%\n{code}\n\n# %%'.format( 63 | code=notebook_injected_artifacts), 64 | file_content, 65 | flags=re.S) 66 | f_out.write(file_content) 67 | f_out.write(notebook_injected_code) 68 | 69 | def convert_and_run_in_notebook(self): 70 | jupytext([self.path_prefix + '_inject_output.py', '--to', 'ipynb']) 71 | 72 | papermill.execute_notebook( 73 | '%s_inject_output.ipynb' % self.path_prefix, 74 | '%s_inject_output_out.ipynb' % self.path_prefix, 75 | parameters=self.inject_params, 76 | kernel_name=self.kernel_name 77 | ) 78 | 79 | def nb_html_convert(self): 80 | c = Config() 81 | if self.remove_nb_inputs: 82 | c.HTMLExporter.exclude_input_prompt = True 83 | c.HTMLExporter.exclude_input = True 84 | c.HTMLExporter.exclude_output_prompt = True 85 | 86 | htmlExporter = HTMLExporter(config=c) 87 | htmlExporter.template_name = 'classic' 88 | body, _ = htmlExporter.from_filename("%s_inject_output_out.ipynb" % self.path_prefix) 89 | 90 | self._notebook_html_output = body 91 | 92 | def build_component_output(self): 93 | fname = '/tmp/outputs.pickle' 94 | if os.path.exists(fname): 95 | with open(fname, 'rb') as f: 96 | self._outputs = pickle.loads(f.read()) 97 | 98 | def build_component_metrics(self): 99 | fname = '/tmp/metrics.pickle' 100 | if os.path.exists(fname): 101 | with open(fname, 'rb') as f: 102 | self._metrics = pickle.loads(f.read()) 103 | 104 | @property 105 | def outputs(self): 106 | return self._outputs 107 | 108 | @property 109 | def metrics(self): 110 | return self._metrics 111 | 112 | @property 113 | def notebook_html_output(self): 114 | return self._notebook_html_output 115 | -------------------------------------------------------------------------------- /kfn/lib.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | import tempfile 4 | import importlib 5 | import inspect 6 | from jupytext.cli import jupytext 7 | from jupytext.paired_paths import InconsistentPath 8 | 9 | def convert_source_to_func(function_source, component_name): 10 | ''' 11 | Converts source string into function by writing it to 12 | disk, loading as module and getting funciton from that module. 13 | 14 | Parameters: 15 | ----------- 16 | - function_source - function source string 17 | - component_name - component name that should match function 18 | name in source code 19 | ''' 20 | tmp_dir = tempfile.gettempdir() 21 | comp_tmp_dir= os.path.join(tmp_dir, 'component_functions') 22 | 23 | if not os.path.exists(comp_tmp_dir): 24 | os.mkdir(comp_tmp_dir) 25 | open(os.path.join(comp_tmp_dir, '__init__.py'), 'a').close() 26 | func_filepath = os.path.join(comp_tmp_dir, '%s.py' % component_name) 27 | with open(func_filepath, 'w') as f: 28 | f.write(function_source) 29 | 30 | sys.path.append(comp_tmp_dir) 31 | task_module = importlib.import_module(component_name) 32 | importlib.reload(task_module) 33 | return getattr(task_module, component_name) 34 | 35 | 36 | class ExtraCodeBuilder: 37 | def __init__(self): 38 | self._extra_code = '' 39 | 40 | def inject_notebook(self, notebook_path): 41 | if not os.path.exists(notebook_path): 42 | raise FileNotFoundError('Incorrect notebook path: %s' % notebook_path) 43 | 44 | with open(notebook_path, 'r') as f: 45 | self._extra_code += '\nnotebook_source = \'\'\'' + f.read() + '\'\'\'\n\n' 46 | 47 | def add_code(self, code): 48 | '`code` can be `str`, `Class` or `function`' 49 | if type(code) == str: 50 | self._extra_code += '\n' + code + '\n' 51 | else: 52 | self._extra_code += '\n' + inspect.getsource(code) + '\n' 53 | 54 | @property 55 | def get_code(self): 56 | return self._extra_code 57 | -------------------------------------------------------------------------------- /kfn/nb_component_builder.py: -------------------------------------------------------------------------------- 1 | import re 2 | import pkgutil 3 | import kfp.components as comp 4 | from kfn.lib import ExtraCodeBuilder, convert_source_to_func 5 | from kfn.kf_notebook_runner import KFNotebookRunner 6 | from kfn.injected_code import notebook_injected_artifacts, notebook_injected_code, exec_nb 7 | 8 | class NbComponentBuilder: 9 | def __init__(self, op_name, inject_notebook_path=None, remote_notebook_path=None, 10 | remove_nb_inputs=False): 11 | ''' 12 | Builds Kubeflow component that executes code inside Jupyter Notebook 13 | 14 | Parameters: 15 | ----------- 16 | - op_name - Component name 17 | - inject_notebook_path - path to notebook .py file (jupytext format of "Notebook 18 | paired with percent script") 19 | - remote_notebook_path - path to Google/AWS storage from which notebook will be fetched 20 | - remove_nb_inputs - if True, generated notebook HTML output won't have notebook input cells 21 | ''' 22 | assert inject_notebook_path or remote_notebook_path, \ 23 | 'You need to provide either path to google storage or local filename path ' + \ 24 | 'of the notebook that will be injected into component' 25 | assert not (inject_notebook_path and remote_notebook_path), \ 26 | 'Choose either notebook source or path, can\'t do both.' 27 | 28 | self.op_name = re.sub(r'[^a-zA-Z0-9_]+', '', op_name.replace(' ','-').lower()) 29 | self.input_params = [] 30 | self.output_params = [] 31 | self.input_artifacts = [] 32 | self.output_artifacts = [] 33 | self.inject_notebook_path = inject_notebook_path 34 | self.remote_notebook_path = remote_notebook_path 35 | self.remove_nb_inputs = remove_nb_inputs 36 | 37 | self.extra_code_builder = ExtraCodeBuilder() 38 | imports_source = pkgutil.get_data(__name__, "imports.py") 39 | injected_code_source = pkgutil.get_data(__name__, "injected_code.py") 40 | 41 | for code in [imports_source.decode(), injected_code_source.decode(), KFNotebookRunner]: 42 | self.extra_code_builder.add_code(code) 43 | 44 | if self.inject_notebook_path: 45 | if not self.extra_code_builder: 46 | self.extra_code_builder = ExtraCodeBuilder() 47 | self.extra_code_builder.inject_notebook(self.inject_notebook_path) 48 | 49 | def add_input_param(self, param_name, param_type, default_value=None): 50 | self.input_params.append({ 51 | 'param_name': param_name, 52 | 'param_type': param_type, 53 | 'default_value': default_value 54 | }) 55 | 56 | def add_output_param(self, param_name, param_type): 57 | self.output_params.append({ 58 | 'param_name': param_name, 59 | 'param_type': param_type 60 | }) 61 | 62 | def add_input_artifact(self, name): 63 | self.input_artifacts.append(name) 64 | 65 | def add_output_artifact(self, name): 66 | self.output_artifacts.append(name) 67 | 68 | def build_component_function_source(self): 69 | def input_param_to_str(p): 70 | s = '%s: %s' % (p['param_name'], p['param_type'].__name__) 71 | if p.get('default_value'): 72 | s += ' = %s' % p['default_value'] 73 | return s 74 | 75 | func_body = ''' 76 | 77 | input_params = {input_params} 78 | output_params = {output_params} 79 | use_injected_nb_source_code = {inject_code} 80 | remote_notebook_path = '{remote_notebook_path}' 81 | ouput_artifacts = [{out_artif}] 82 | input_artifacts = [{in_artif}] 83 | remove_nb_inputs = {remove_nb_inputs} 84 | 85 | return exec_nb(locals(), input_params, output_params, use_injected_nb_source_code, 86 | remote_notebook_path, ouput_artifacts, input_artifacts, remove_nb_inputs) 87 | 88 | ''' 89 | 90 | func_body = func_body.format( 91 | input_params=self._notebook_inputs_params(), 92 | output_params=[p['param_name'] for p in self.output_params], 93 | inject_code=True if self.inject_notebook_path else False, 94 | remote_notebook_path=self.remote_notebook_path if self.remote_notebook_path else '', 95 | out_artif=', '.join(["('{a}', {a})".format(a=a) for a in self.output_artifacts]) 96 | if self.output_artifacts else '', 97 | in_artif=', '.join(["('{a}', {a})".format(a=a) for a in self.input_artifacts]) 98 | if self.input_artifacts else '', 99 | remove_nb_inputs='True' if self.remove_nb_inputs else 'False' 100 | ) 101 | 102 | args_str = [] 103 | args_str = ['%s: OutputPath(str)' % p for p in self.output_artifacts] 104 | args_str += ['%s: InputPath()' % p for p in self.input_artifacts] 105 | default_sorted_input_params = [i for i in self.input_params if not i.get('default_value')] + \ 106 | [i for i in self.input_params if i.get('default_value')] 107 | args_str += [input_param_to_str(p) for p in default_sorted_input_params] 108 | args_str = ', '.join(args_str) 109 | 110 | # ouput_artifacts don't have to be put here, they are being 111 | # outputed by adding OutputPath param on function's input 112 | tuple_params = ["('%s', %s)" % (p['param_name'], p['param_type'].__name__) for p in self.output_params] 113 | 114 | return_str = "NamedTuple('TaskOutput', [('mlpipeline_ui_metadata', 'UI_metadata'), " + \ 115 | "('mlpipeline_metrics', 'Metrics'), %s])" 116 | return_str = return_str % ', '.join(tuple_params) 117 | 118 | func_source = 'from kfp.components import InputPath, OutputPath\n' 119 | func_source += 'from typing import NamedTuple\n\n' 120 | func_source += f'def {self.op_name}({args_str}) -> {return_str}:\n{func_body}' 121 | return func_source 122 | 123 | def build_component_function(self): 124 | '''`kfp` module uses `inspect.getsource()` method which won't work unless 125 | function's source code is loaded from a file''' 126 | function_source = self.build_component_function_source() 127 | return convert_source_to_func(function_source, self.op_name) 128 | 129 | def build_op(self, base_image, packages_to_install=[], *args, **kwargs): 130 | task_op = comp.func_to_container_op( 131 | self.build_component_function(), 132 | base_image=base_image, 133 | packages_to_install=packages_to_install, 134 | extra_code=self.extra_code_builder.get_code, 135 | *args, **kwargs 136 | ) 137 | return task_op 138 | 139 | def _notebook_inputs_params(self): 140 | 'Returns inputs list formatted as a string' 141 | input_names = [i['param_name'] for i in self.input_params] + self.input_artifacts 142 | return '[' + ', '.join(["\'%s\'" % n for n in input_names]) + ']' 143 | -------------------------------------------------------------------------------- /kfn/test/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bartgras/kf-notebook-component/ac611588d19b118a00b7471cb2e61336c242e698/kfn/test/__init__.py -------------------------------------------------------------------------------- /kfn/test/kf_notebook_runner_test.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import os 3 | import re 4 | 5 | from kfn.kf_notebook_runner import KFNotebookRunner 6 | from kfn.test.notebooks_source import notebook_source, invalid_notebook_source 7 | from kfn.test.lib import get_tmp_notebook 8 | 9 | try: 10 | kernel = os.environ['NOTEBOOK_KERNEL_NAME'] 11 | except: 12 | raise Exception('Set environment variable that points to your ' + \ 13 | 'Jupyter kernel that will execute the notebook. ' + \ 14 | 'Example: NOTEBOOK_KERNEL_NAME=') 15 | 16 | class KFNotebookRunnerTestCase(unittest.TestCase): 17 | def test_executes_without_params(self): 18 | nb_file = get_tmp_notebook(notebook_source) 19 | runner = KFNotebookRunner(nb_file.name, kernel_name=kernel) 20 | runner.run() 21 | 22 | self.assertTrue(runner.notebook_html_output) 23 | self.assertEqual(runner.outputs, {'a': 11}) 24 | self.assertEqual(runner.metrics, {'accuracy': 1}) 25 | 26 | nb_pref = re.sub('.py$', '', nb_file.name) 27 | self.assertTrue(os.path.exists(nb_pref + '_inject_output.ipynb')) 28 | self.assertTrue(os.path.exists(nb_pref + '_inject_output_out.ipynb')) 29 | nb_file.close() 30 | 31 | def test_injectind_and_overwriting_inputs(self): 32 | nb_file = get_tmp_notebook(notebook_source) 33 | runner = KFNotebookRunner(nb_file.name, inject_params={'a': 1}, kernel_name=kernel) 34 | runner.run() 35 | 36 | self.assertEqual(runner.outputs['a'], 1) 37 | nb_file.close() 38 | 39 | def test_enabled_notebook_inputs(self): 40 | nb_file = get_tmp_notebook(notebook_source) 41 | runner = KFNotebookRunner(nb_file.name, kernel_name=kernel) 42 | runner.run() 43 | 44 | self.assertRegex(runner.notebook_html_output, r'.*TEST-COMMENT.*') 45 | nb_file.close() 46 | 47 | def test_disabled_notebook_inputs(self): 48 | nb_file = get_tmp_notebook(notebook_source) 49 | runner = KFNotebookRunner(nb_file.name, kernel_name=kernel, remove_nb_inputs=True) 50 | runner.run() 51 | 52 | self.assertNotRegex(runner.notebook_html_output, r'.*TEST-COMMENT.*') 53 | nb_file.close() 54 | 55 | def test_failing_without_parameters_tag(self): 56 | nb_file = get_tmp_notebook(invalid_notebook_source) 57 | runner = KFNotebookRunner(nb_file.name, kernel_name=kernel) 58 | 59 | with self.assertRaises(Exception): 60 | runner.run() 61 | nb_file.close() 62 | -------------------------------------------------------------------------------- /kfn/test/lib.py: -------------------------------------------------------------------------------- 1 | import tempfile 2 | 3 | def get_tmp_notebook(source): 4 | tf = tempfile.NamedTemporaryFile(suffix='.py') 5 | with open(tf.name, 'w') as f: 6 | f.write(source) 7 | return tf -------------------------------------------------------------------------------- /kfn/test/nb_component_builder_test.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import tempfile 3 | 4 | from kfn.nb_component_builder import NbComponentBuilder 5 | from kfn.test.notebooks_source import notebook_source, invalid_notebook_source 6 | from kfn.test.lib import get_tmp_notebook 7 | from kfp.components import InputPath, OutputPath 8 | 9 | class NbComponentBuilderTestCase(unittest.TestCase): 10 | def test_injecting_notebook_code(self): 11 | nb_file = get_tmp_notebook('NOTEBOOK SOURCE CODE') 12 | builder = NbComponentBuilder('op1', inject_notebook_path=nb_file.name) 13 | self.assertTrue('NOTEBOOK SOURCE CODE' in builder.extra_code_builder.get_code) 14 | 15 | def test_build_plain_function(self): 16 | nb_file = get_tmp_notebook(notebook_source) 17 | 18 | builder = NbComponentBuilder('op1', inject_notebook_path=nb_file.name) 19 | func_source = builder.build_component_function_source() 20 | annotation = "def op1() -> NamedTuple('TaskOutput', [('mlpipeline_ui_metadata', 'UI_metadata'), ('mlpipeline_metrics', 'Metrics'), ]):" 21 | self.assertTrue(annotation in func_source) 22 | 23 | def test_raise_notebook_not_found(self): 24 | with self.assertRaises(FileNotFoundError): 25 | NbComponentBuilder('op1', inject_notebook_path='/wrong/filename/path.py') 26 | 27 | def test_build_function(self): 28 | nb_file = get_tmp_notebook(notebook_source) 29 | builder = NbComponentBuilder('op1', inject_notebook_path=nb_file.name) 30 | x = builder.build_component_function() 31 | self.assertEquals(x.__name__, 'op1') 32 | 33 | def test_build_function_with_input_output_params(self): 34 | nb_file = get_tmp_notebook(notebook_source) 35 | builder = NbComponentBuilder('op1', inject_notebook_path=nb_file.name) 36 | builder.add_input_param('a', int, default_value=1) 37 | builder.add_output_param('x', float) 38 | func = builder.build_component_function() 39 | self.assertEqual(func.__annotations__['a'], int) 40 | self.assertEqual(list(func.__annotations__['return'].__annotations__.keys()), 41 | ['mlpipeline_ui_metadata', 'mlpipeline_metrics', 'x']) 42 | self.assertEqual(func.__annotations__['return'].__annotations__['x'], float) 43 | 44 | def test_build_function_with_input_output_artifacts(self): 45 | nb_file = get_tmp_notebook(notebook_source) 46 | builder = NbComponentBuilder('op1', inject_notebook_path=nb_file.name) 47 | builder.add_input_artifact('a_in') 48 | builder.add_output_artifact('a_out') 49 | func = builder.build_component_function() 50 | self.assertEqual(type(func.__annotations__['a_in']), type(InputPath())) 51 | self.assertEqual(type(func.__annotations__['a_out']), type(OutputPath())) 52 | -------------------------------------------------------------------------------- /kfn/test/notebooks_source.py: -------------------------------------------------------------------------------- 1 | 2 | invalid_notebook_source = '''# --- 3 | # jupyter: 4 | # jupytext: 5 | # formats: ipynb,py:percent 6 | # text_representation: 7 | # extension: .py 8 | # format_name: percent 9 | # format_version: '1.3' 10 | # jupytext_version: 1.6.0 11 | # kernelspec: 12 | # display_name: Python 3 13 | # language: python 14 | # name: python3 15 | # --- 16 | 17 | # %% 18 | # Nothing here 19 | 20 | ''' 21 | 22 | notebook_source = '''# --- 23 | # jupyter: 24 | # jupytext: 25 | # formats: ipynb,py:percent 26 | # text_representation: 27 | # extension: .py 28 | # format_name: percent 29 | # format_version: '1.3' 30 | # jupytext_version: 1.6.0 31 | # kernelspec: 32 | # display_name: Python 3 33 | # language: python 34 | # name: python3 35 | # --- 36 | 37 | # %% tags=["parameters"] 38 | a = 11 39 | b = 10 40 | 41 | # %% 42 | # TEST-COMMENT 43 | outputs = {'a': a} 44 | metrics = {'accuracy': 1} 45 | 46 | ''' 47 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | nbconvert==0.5.1 2 | papermill==2.2.2 3 | jupytext==1.7.1 4 | kfp==1.1.1 -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | 3 | setup(name='kfn', 4 | version='0.1', 5 | description='Kubeflow notebook component builder', 6 | url='https://github.com/bartgras/kf-notebook-component', 7 | author='Bart Grasza', 8 | author_email='bartgras@protonmail.com', 9 | license='MIT', 10 | packages=['kfn', 'kfn.test'], 11 | install_requires=[ 12 | 'kfp', 13 | 'papermill', 14 | 'jupytext', 15 | 'nbconvert' 16 | ], 17 | zip_safe=False) -------------------------------------------------------------------------------- /test.py: -------------------------------------------------------------------------------- 1 | # --- 2 | # jupyter: 3 | # jupytext 4 | 5 | 6 | 7 | something 8 | --------------------------------------------------------------------------------