├── .bumpversion.cfg ├── .editorconfig ├── .gitattributes ├── .github ├── PULL_REQUEST_TEMPLATE └── workflows │ └── build.yml ├── .gitignore ├── CONTRIBUTING.md ├── LICENSE ├── MANIFEST.in ├── Makefile ├── README.md ├── code-of-conduct.md ├── docs ├── dev │ └── release.md └── source │ └── images │ └── kfp-pipeline-example.png ├── etc ├── docker-scripts │ ├── __init__.py │ └── bootstrapper.py ├── pip.conf ├── requirements-elyra.txt └── tests │ ├── resources │ ├── test-archive.tgz │ ├── test-bad-archive.tgz │ ├── test-bad-archiveB.tgz │ ├── test-bad-notebookA.ipynb │ ├── test-bad-notebookB.ipynb │ ├── test-bad-requirements-elyra.txt │ ├── test-notebookA.ipynb │ └── test-requirements-elyra.txt │ └── test_bootstrapper.py ├── kfp_notebook ├── __init__.py ├── pipeline │ ├── __init__.py │ └── _notebook_op.py └── tests │ └── test_notebook_op.py ├── setup.cfg ├── setup.py ├── test-requirements.txt └── tox.ini /.bumpversion.cfg: -------------------------------------------------------------------------------- 1 | [bumpversion] 2 | current_version = 0.27.0.dev0 3 | commit = False 4 | tag = False 5 | parse = (?P\d+)\.(?P\d+)\.(?P\d+)(\.(?P[a-z]+)(?P\d+))? 6 | serialize = 7 | {major}.{minor}.{patch}.{release}{build} 8 | {major}.{minor}.{patch} 9 | 10 | [bumpversion:part:release] 11 | optional_value = prod 12 | first_value = dev 13 | values = 14 | dev 15 | prod 16 | 17 | [bumpversion:file:setup.py] 18 | 19 | [bumpversion:file:kfp_notebook/__init__.py] 20 | -------------------------------------------------------------------------------- /.editorconfig: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright 2018-2021 Elyra Authors 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # 16 | 17 | # http://editorconfig.org 18 | 19 | root = true 20 | 21 | [*] 22 | indent_style = space 23 | indent_size = 4 24 | trim_trailing_whitespace = true 25 | insert_final_newline = true 26 | charset = utf-8 27 | end_of_line = lf 28 | 29 | [*.bat] 30 | indent_style = tab 31 | end_of_line = crlf 32 | 33 | [LICENSE] 34 | insert_final_newline = false 35 | 36 | [Makefile] 37 | indent_style = tab 38 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright 2018-2021 Elyra Authors 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # 16 | 17 | # Set the default behavior to have all files normalized to Unix-style 18 | # line endings upon check-in. 19 | * text=auto 20 | 21 | # Declare files that will always have CRLF line endings on checkout. 22 | *.bat text eol=crlf 23 | 24 | # Denote all files that are truly binary and should not be modified. 25 | *.dll binary 26 | *.exp binary 27 | *.lib binary 28 | *.pdb binary 29 | *.exe binary 30 | -------------------------------------------------------------------------------- /.github/PULL_REQUEST_TEMPLATE: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Developer's Certificate of Origin 1.1 5 | 6 | By making a contribution to this project, I certify that: 7 | 8 | (a) The contribution was created in whole or in part by me and I 9 | have the right to submit it under the Apache License 2.0; or 10 | 11 | (b) The contribution is based upon previous work that, to the best 12 | of my knowledge, is covered under an appropriate open source 13 | license and I have the right under that license to submit that 14 | work with modifications, whether created in whole or in part 15 | by me, under the same open source license (unless I am 16 | permitted to submit under a different license), as indicated 17 | in the file; or 18 | 19 | (c) The contribution was provided directly to me by some other 20 | person who certified (a), (b) or (c) and I have not modified 21 | it. 22 | 23 | (d) I understand and agree that this project and the contribution 24 | are public and that a record of the contribution (including all 25 | personal information I submit with it, including my sign-off) is 26 | maintained indefinitely and may be redistributed consistent with 27 | this project or the open source license(s) involved. 28 | 29 | -------------------------------------------------------------------------------- /.github/workflows/build.yml: -------------------------------------------------------------------------------- 1 | name: Elyra KFP Notebook Operator Tests 2 | on: 3 | push: 4 | branches: '*' 5 | pull_request: 6 | branches: '*' 7 | schedule: 8 | # once a day at 3 am (UTC) (7 pm (PST)) 9 | - cron: '0 3 * * *' 10 | 11 | jobs: 12 | build: 13 | runs-on: ${{ matrix.os }} 14 | strategy: 15 | fail-fast: false 16 | matrix: 17 | os: [ubuntu-latest] 18 | python-version: [ '3.6', '3.7', '3.8' ] 19 | steps: 20 | - name: Checkout 21 | uses: actions/checkout@v1 22 | - name: Set up Python ${{ matrix.python-version }} 23 | uses: actions/setup-python@v1 24 | with: 25 | python-version: ${{ matrix.python-version }} 26 | architecture: 'x64' 27 | - name: Display dependency info 28 | run: | 29 | python --version 30 | node --version 31 | npm --version 32 | pip --version 33 | - name: Install Python dependencies 34 | run: | 35 | pip install --upgrade pip wheel flake8 36 | pip install -r etc/requirements-elyra.txt && pip install -r test-requirements.txt 37 | - name: Build 38 | run: | 39 | make install 40 | - name: Test with pytest 41 | run: | 42 | make test 43 | - name: Collect logs 44 | if: failure() 45 | run: cat /tmp/jupyterlab-debug-*.log 46 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright 2018-2021 Elyra Authors 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # 16 | 17 | # Byte-compiled / optimized / DLL files 18 | __pycache__/ 19 | *.py[cod] 20 | *$py.class 21 | 22 | # C extensions 23 | *.so 24 | 25 | # Distribution / packaging 26 | .Python 27 | env/ 28 | build/ 29 | develop-eggs/ 30 | dist/ 31 | downloads/ 32 | eggs/ 33 | .eggs/ 34 | lib/ 35 | lib64/ 36 | parts/ 37 | sdist/ 38 | var/ 39 | wheels/ 40 | *.egg-info/ 41 | .installed.cfg 42 | *.egg 43 | 44 | # PyInstaller 45 | # Usually these files are written by a python script from a template 46 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 47 | *.manifest 48 | *.spec 49 | 50 | # Installer logs 51 | pip-log.txt 52 | pip-delete-this-directory.txt 53 | 54 | # Unit test / coverage reports 55 | htmlcov/ 56 | .tox/ 57 | .coverage 58 | .coverage.* 59 | .cache 60 | nosetests.xml 61 | coverage.xml 62 | *.cover 63 | .hypothesis/ 64 | .pytest_cache/ 65 | 66 | # Translations 67 | *.mo 68 | *.pot 69 | 70 | # Django stuff: 71 | *.log 72 | local_settings.py 73 | 74 | # Flask stuff: 75 | instance/ 76 | .webassets-cache 77 | 78 | # Scrapy stuff: 79 | .scrapy 80 | 81 | # Sphinx documentation 82 | docs/_build/ 83 | 84 | # PyBuilder 85 | target/ 86 | 87 | # Jupyter Notebook 88 | .ipynb_checkpoints 89 | 90 | # pyenv 91 | .python-version 92 | 93 | # celery beat schedule file 94 | celerybeat-schedule 95 | 96 | # SageMath parsed files 97 | *.sage.py 98 | 99 | # dotenv 100 | .env 101 | 102 | # virtualenv 103 | .venv 104 | venv/ 105 | ENV/ 106 | 107 | # Spyder project settings 108 | .spyderproject 109 | .spyproject 110 | 111 | # Rope project settings 112 | .ropeproject 113 | 114 | # mkdocs documentation 115 | /site 116 | 117 | # mypy 118 | .mypy_cache/ 119 | 120 | 121 | # PyBuilder 122 | target/ 123 | 124 | .DS_Store 125 | 126 | # PyCharm 127 | .idea/ 128 | *.iml 129 | 130 | # Build-related 131 | .image-* 132 | 133 | ## VisualStudioCode ### 134 | .vscode/* 135 | 136 | ### VisualStudioCode Patch ### 137 | # Ignore all local history of files 138 | .history 139 | .ionide 140 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | 18 | 19 | # Contributing 20 | 21 | Welcome to Elyra! If you are interested in contributing to the [Elyra code repo](README.md) 22 | then checkout the [Contributor's Guide](https://github.com/elyra-ai/community/blob/master/contributing.md) and 23 | the [Code of Conduct](https://github.com/elyra-ai/community/blob/master/code-of-conduct.md). 24 | 25 | The [Elyra community repo](https://github.com/elyra-ai/community) contains information on how the community 26 | is organized and other information that is pertinent to contributing. 27 | 28 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "{}" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright {yyyy} {name of copyright owner} 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright 2018-2021 Elyra Authors 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # 16 | include CONTRIBUTING.md 17 | include LICENSE 18 | include README.md 19 | 20 | recursive-include tests * 21 | recursive-exclude * __pycache__ 22 | recursive-exclude * *.py[co] 23 | 24 | recursive-include *.md conf.py Makefile make.bat *.jpg *.png *.gif *.yml *.md *.sh 25 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright 2018-2021 Elyra Authors 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # 16 | .PHONY: clean clean-test clean-pyc clean-build help 17 | .DEFAULT_GOAL := help 18 | 19 | define PRINT_HELP_PYSCRIPT 20 | import re, sys 21 | 22 | for line in sys.stdin: 23 | match = re.match(r'^([a-zA-Z_-]+):.*?## (.*)$$', line) 24 | if match: 25 | target, help = match.groups() 26 | print("%-20s %s" % (target, help)) 27 | endef 28 | export PRINT_HELP_PYSCRIPT 29 | 30 | WHEEL_FILES:=$(shell find . -type f ! -path "./build/*" ! -path "./etc/*" ! -path "./docs/*" ! -path "./.git/*" ! -path "./.idea/*" ! -path "./dist/*" ! -path "./.image-*" ) 31 | WHEEL_FILE := dist/kfp-notebook*.whl 32 | TAR_FILE := dist/kfp-notebook*.tar.gz 33 | TAG := dev 34 | 35 | help: 36 | @python -c "$$PRINT_HELP_PYSCRIPT" < $(MAKEFILE_LIST) 37 | 38 | clean: clean-build clean-pyc clean-test ## remove all build, test, coverage and Python artifacts 39 | 40 | clean-build: 41 | rm -fr build/ 42 | rm -fr dist/ 43 | rm -fr .eggs/ 44 | find . -name '*.egg-info' -exec rm -fr {} + 45 | find . -name '*.egg' -exec rm -f {} + 46 | 47 | clean-pyc: 48 | find . -name '*.pyc' -exec rm -f {} + 49 | find . -name '*.pyo' -exec rm -f {} + 50 | find . -name '*~' -exec rm -f {} + 51 | find . -name '__pycache__' -exec rm -fr {} + 52 | 53 | clean-test: 54 | rm -fr .tox/ 55 | rm -f .coverage 56 | rm -fr htmlcov/ 57 | rm -fr .pytest_cache 58 | 59 | lint: test-dependencies ## check style with flake8 60 | flake8 kfp_notebook etc 61 | 62 | test-dependencies: 63 | @pip install -q -r test-requirements.txt 64 | 65 | test: test-dependencies test-start-minio ## run tests quickly with the default Python 66 | coverage run -m pytest -v; r=$$?; $(MAKE) test-stop-minio; exit $$r 67 | 68 | test-start-minio: ## start test_minio container (dev testing) 69 | docker run --name test_minio -d -p 9000:9000 minio/minio server /data 70 | 71 | test-stop-minio: ## stop test_minio container (dev testing) 72 | @-docker rm -f test_minio >/dev/null 2>&1 73 | 74 | test-all: ## run tests on every Python version with tox 75 | tox 76 | 77 | coverage: test ## check code coverage quickly with the default Python 78 | coverage report -m 79 | 80 | release: dist ## package and upload a release 81 | twine upload dist/* 82 | 83 | $(WHEEL_FILE): $(WHEEL_FILES) 84 | python setup.py bdist_wheel 85 | 86 | bdist: 87 | @make $(WHEEL_FILE) 88 | 89 | sdist: 90 | python setup.py sdist 91 | 92 | dist: clean lint ## builds source and wheel package 93 | @make sdist 94 | @make bdist 95 | ls -l dist 96 | 97 | install: clean dist ## install the package to the active Python's site-packages 98 | pip install --upgrade dist/*.whl 99 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 18 | 19 | # March 2022: This repository is no longer maintained 20 | 21 | The code for this project was merged into the https://github.com/elyra-ai/elyra repository. 22 | 23 | --- 24 | 25 | `kfp-notebook` implements Kubeflow Pipelines operator `NotebookOp` that supports processing of notebooks, Python scripts, and R scripts in pipelines. 26 | 27 | ## Building kfp-notebook 28 | 29 | ```bash 30 | make clean install 31 | ``` 32 | 33 | ## Usage 34 | 35 | The example below can easily be added to a `python script` or `jupyter notebook` for testing purposes. 36 | 37 | ```python 38 | import os 39 | import kfp 40 | from kfp_notebook.pipeline import NotebookOp 41 | from kubernetes.client.models import V1EnvVar 42 | 43 | # KubeFlow Pipelines API Endpoint 44 | kfp_url = 'http://dataplatform.ibm.com:32488/pipeline' 45 | 46 | # S3 Object Storage 47 | cos_endpoint = 'http://s3.us-south.cloud-object-storage.appdomain.cloud' 48 | cos_bucket = 'test-bucket' 49 | cos_username = 'test' 50 | cos_password = 'test123' 51 | cos_directory = 'test-directory' 52 | cos_dependencies_archive = 'test-archive.tar.gz' 53 | 54 | # Inputs and Outputs 55 | inputs = [] 56 | outputs = [] 57 | 58 | # Container Image 59 | image = 'tensorflow/tensorflow:latest' 60 | 61 | def run_notebook_op(op_name, notebook_path): 62 | 63 | notebook_op = NotebookOp(name=op_name, 64 | notebook=notebook_path, 65 | cos_endpoint=cos_endpoint, 66 | cos_bucket=cos_bucket, 67 | cos_directory=cos_directory, 68 | cos_dependencies_archive=cos_dependencies_archive, 69 | pipeline_outputs=outputs, 70 | pipeline_inputs=inputs, 71 | image=image) 72 | 73 | notebook_op.container.add_env_variable(V1EnvVar(name='AWS_ACCESS_KEY_ID', value=cos_username)) 74 | notebook_op.container.add_env_variable(V1EnvVar(name='AWS_SECRET_ACCESS_KEY', value=cos_password)) 75 | notebook_op.container.set_image_pull_policy('Always') 76 | 77 | return op 78 | 79 | def demo_pipeline(): 80 | stats_op = run_notebook_op('stats', 'generate-community-overview') 81 | contributions_op = run_notebook_op('contributions', 'generate-community-contributions') 82 | run_notebook_op('overview', 'overview').after(stats_op, contributions_op) 83 | 84 | # Compile the new pipeline 85 | kfp.compiler.Compiler().compile(demo_pipeline,'pipelines/pipeline.tar.gz') 86 | 87 | # Upload the compiled pipeline 88 | client = kfp.Client(host=kfp_url) 89 | pipeline_info = client.upload_pipeline('pipelines/pipeline.tar.gz',pipeline_name='pipeline-demo') 90 | 91 | # Create a new experiment 92 | experiment = client.create_experiment(name='demo-experiment') 93 | 94 | # Create a new run associated with experiment and our uploaded pipeline 95 | run = client.run_pipeline(experiment.id, 'demo-run', pipeline_id=pipeline_info.id) 96 | 97 | ``` 98 | 99 | ## Generated Kubeflow Pipelines 100 | 101 | ![Kubeflow Pipeline Example](docs/source/images/kfp-pipeline-example.png) 102 | -------------------------------------------------------------------------------- /code-of-conduct.md: -------------------------------------------------------------------------------- 1 | 18 | 19 | # Elyra Community Code of Conduct 20 | 21 | Please refer to our [Elyra Community Code of Conduct](https://github.com/elyra-ai/community/blob/master/code-of-conduct.md) 22 | -------------------------------------------------------------------------------- /docs/dev/release.md: -------------------------------------------------------------------------------- 1 | 18 | 19 | # Making a release 20 | 21 | We are using the bumpversion (more specifically bump2version active fork) to help with 22 | some updates during the release steps. 23 | 24 | * Update the release version (e.g. 0.13.0) 25 | 26 | ```bash 27 | bump2version release 28 | git commit -a -m"KFP Notebook release 0.13.0" 29 | git tag v0.13.0 30 | ``` 31 | 32 | Note: Use `bump2version suffix` when releasing from a `dev` suffixed version. 33 | 34 | * Build the release artifacts 35 | 36 | ```bash 37 | make clean dist 38 | twine upload --sign dist/* 39 | ``` 40 | 41 | * Preparing to the next development iteration 42 | 43 | ```bash 44 | bump2version minor 45 | git commit -a -m"Prepare for next development iteration" 46 | ``` 47 | 48 | * Publishing conda-forge package 49 | - https://github.com/conda-forge/kfp-notebook-feedstock 50 | -------------------------------------------------------------------------------- /docs/source/images/kfp-pipeline-example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/elyra-ai/kfp-notebook/ea86cf83def1aeee40eb3354772eef49dd3a7e53/docs/source/images/kfp-pipeline-example.png -------------------------------------------------------------------------------- /etc/docker-scripts/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Copyright 2018-2021 Elyra Authors 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | from .bootstrapper import * 19 | -------------------------------------------------------------------------------- /etc/docker-scripts/bootstrapper.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright 2018-2021 Elyra Authors 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # 16 | import glob 17 | import json 18 | import logging 19 | import os 20 | import subprocess 21 | import sys 22 | import time 23 | 24 | from abc import ABC, abstractmethod 25 | from packaging import version 26 | from pathlib import Path 27 | from tempfile import TemporaryFile 28 | from typing import Optional, Any, Type, TypeVar 29 | from urllib.parse import urljoin 30 | from urllib.parse import urlparse 31 | from urllib.parse import urlunparse 32 | 33 | # Inputs and Outputs separator character. If updated, 34 | # same-named variable in _notebook_op.py must be updated! 35 | INOUT_SEPARATOR = ';' 36 | 37 | # Setup forward reference for type hint on return from class factory method. See 38 | # https://stackoverflow.com/questions/39205527/can-you-annotate-return-type-when-value-is-instance-of-cls/39205612#39205612 39 | F = TypeVar('F', bound='FileOpBase') 40 | 41 | logger = logging.getLogger('elyra') 42 | enable_pipeline_info = os.getenv('ELYRA_ENABLE_PIPELINE_INFO', 'true').lower() == 'true' 43 | pipeline_name = None # global used in formatted logging 44 | operation_name = None # global used in formatted logging 45 | 46 | 47 | class FileOpBase(ABC): 48 | """Abstract base class for file-based operations""" 49 | filepath = None 50 | cos_client = None 51 | cos_bucket = None 52 | 53 | @classmethod 54 | def get_instance(cls: Type[F], **kwargs: Any) -> F: 55 | """Creates an appropriate subclass instance based on the extension of the filepath (-f) argument""" 56 | filepath = kwargs['filepath'] 57 | if '.ipynb' in filepath: 58 | return NotebookFileOp(**kwargs) 59 | elif '.py' in filepath: 60 | return PythonFileOp(**kwargs) 61 | elif '.r' in filepath: 62 | return RFileOp(**kwargs) 63 | else: 64 | raise ValueError('Unsupported file type: {}'.format(filepath)) 65 | 66 | def __init__(self, **kwargs: Any) -> None: 67 | """Initializes the FileOpBase instance""" 68 | import minio 69 | 70 | self.filepath = kwargs['filepath'] 71 | self.input_params = kwargs or [] 72 | self.cos_endpoint = urlparse(self.input_params.get('cos-endpoint')) 73 | self.cos_bucket = self.input_params.get('cos-bucket') 74 | 75 | # Infer secure from the endpoint's scheme. 76 | self.secure = self.cos_endpoint.scheme == 'https' 77 | 78 | self.cos_client = minio.Minio(self.cos_endpoint.netloc, 79 | access_key=os.getenv('AWS_ACCESS_KEY_ID'), 80 | secret_key=os.getenv('AWS_SECRET_ACCESS_KEY'), 81 | secure=self.secure) 82 | 83 | @abstractmethod 84 | def execute(self) -> None: 85 | """Execute the operation relative to derived class""" 86 | raise NotImplementedError("Method 'execute()' must be implemented by subclasses!") 87 | 88 | def process_dependencies(self) -> None: 89 | """Process dependencies 90 | 91 | If a dependency archive is present, it will be downloaded from object storage 92 | and expanded into the local directory. 93 | 94 | This method can be overridden by subclasses, although overrides should first 95 | call the superclass method. 96 | """ 97 | OpUtil.log_operation_info('processing dependencies') 98 | t0 = time.time() 99 | archive_file = self.input_params.get('cos-dependencies-archive') 100 | 101 | self.get_file_from_object_storage(archive_file) 102 | 103 | inputs = self.input_params.get('inputs') 104 | if inputs: 105 | input_list = inputs.split(INOUT_SEPARATOR) 106 | for file in input_list: 107 | self.get_file_from_object_storage(file.strip()) 108 | 109 | subprocess.call(['tar', '-zxvf', archive_file]) 110 | duration = time.time() - t0 111 | OpUtil.log_operation_info("dependencies processed", duration) 112 | 113 | def process_outputs(self) -> None: 114 | """Process outputs 115 | 116 | If outputs have been specified, it will upload the appropriate files to object storage 117 | 118 | This method can be overridden by subclasses, although overrides should first 119 | call the superclass method. 120 | """ 121 | OpUtil.log_operation_info('processing outputs') 122 | t0 = time.time() 123 | outputs = self.input_params.get('outputs') 124 | if outputs: 125 | output_list = outputs.split(INOUT_SEPARATOR) 126 | for file in output_list: 127 | self.process_output_file(file.strip()) 128 | duration = time.time() - t0 129 | OpUtil.log_operation_info('outputs processed', duration) 130 | 131 | def process_metrics_and_metadata(self) -> None: 132 | """Process metrics and metadata 133 | 134 | This method exposes metrics/metadata that the processed 135 | notebook | script produces in the KFP UI. 136 | 137 | This method should not be overridden by subclasses. 138 | """ 139 | 140 | OpUtil.log_operation_info('processing metrics and metadata') 141 | t0 = time.time() 142 | 143 | # Location where the KFP specific output files will be stored 144 | # in the environment where the bootsrapper is running. 145 | # Defaults to '/tmp' if not specified. 146 | output_path = Path(os.getenv('ELYRA_WRITABLE_CONTAINER_DIR', '/tmp')) 147 | 148 | # verify that output_path exists, is a directory 149 | # and writable by creating a temporary file in that location 150 | try: 151 | with TemporaryFile(mode='w', dir=output_path) as t: 152 | t.write('can write') 153 | except Exception: 154 | # output_path doesn't meet the requirements 155 | # treat this as a non-fatal error and log a warning 156 | logger.warning('Cannot create files in "{}".' 157 | .format(output_path)) 158 | OpUtil.log_operation_info('Aborted metrics and metadata processing', 159 | time.time() - t0) 160 | return 161 | 162 | # Name of the proprietary KFP UI metadata file. 163 | # Notebooks | scripts might (but don't have to) produce this file 164 | # as documented in 165 | # https://www.kubeflow.org/docs/pipelines/sdk/output-viewer/ 166 | # Each NotebookOp must declare this as an output file or 167 | # the KFP UI won't pick up the information. 168 | kfp_ui_metadata_filename = 'mlpipeline-ui-metadata.json' 169 | 170 | # Name of the proprietary KFP metadata file. 171 | # Notebooks | scripts might (but don't have to) produce this file 172 | # as documented in 173 | # https://www.kubeflow.org/docs/pipelines/sdk/pipelines-metrics/ 174 | # Each NotebookOp must declare this as an output file or 175 | # the KFP UI won't pick up the information. 176 | kfp_metrics_filename = 'mlpipeline-metrics.json' 177 | 178 | # If the notebook | Python script produced one of the files 179 | # copy it to the target location where KFP is looking for it. 180 | for filename in [kfp_ui_metadata_filename, kfp_metrics_filename]: 181 | try: 182 | src = Path('.') / filename 183 | logger.debug('Processing {} ...'.format(src)) 184 | # try to load the file, if one was created by the 185 | # notebook or script 186 | with open(src, 'r') as f: 187 | metadata = json.load(f) 188 | 189 | # the file exists and contains valid JSON 190 | logger.debug('File content: {}'.format(json.dumps(metadata))) 191 | 192 | target = output_path / filename 193 | # try to save the file in the destination location 194 | with open(target, 'w') as f: 195 | json.dump(metadata, f) 196 | except FileNotFoundError: 197 | # The script | notebook didn't produce the file 198 | # we are looking for. This is not an error condition 199 | # that needs to be handled. 200 | logger.debug('{} produced no file named {}' 201 | .format(self.filepath, 202 | src)) 203 | except ValueError as ve: 204 | # The file content could not be parsed. Log a warning 205 | # and treat this as a non-fatal error. 206 | logger.warning('Ignoring incompatible {} produced by {}: {} {}'. 207 | format(str(src), 208 | self.filepath, 209 | ve, 210 | str(ve))) 211 | except Exception as ex: 212 | # Something is wrong with the user-generated metadata file. 213 | # Log a warning and treat this as a non-fatal error. 214 | logger.warning('Error processing {} produced by {}: {} {}'. 215 | format(str(src), 216 | self.filepath, 217 | ex, 218 | str(ex))) 219 | 220 | # 221 | # Augment kfp_ui_metadata_filename with Elyra-specific information: 222 | # - link to object storage where input and output artifacts are 223 | # stored 224 | ui_metadata_output = output_path / kfp_ui_metadata_filename 225 | try: 226 | # re-load the file 227 | with open(ui_metadata_output, 'r') as f: 228 | metadata = json.load(f) 229 | except Exception: 230 | # ignore all errors 231 | metadata = {} 232 | 233 | # Assure the 'output' property exists and is of the correct type 234 | if metadata.get('outputs', None) is None or\ 235 | not isinstance(metadata['outputs'], list): 236 | metadata['outputs'] = [] 237 | 238 | # Define HREF for COS bucket: 239 | # // 240 | bucket_url =\ 241 | urljoin(urlunparse(self.cos_endpoint), 242 | '{}/{}/' 243 | .format(self.cos_bucket, 244 | self.input_params.get('cos-directory', ''))) 245 | 246 | # add Elyra metadata to 'outputs' 247 | metadata['outputs'].append({ 248 | 'storage': 'inline', 249 | 'source': '## Inputs for {}\n' 250 | '[{}]({})' 251 | .format(self.filepath, 252 | self.input_params['cos-dependencies-archive'], 253 | bucket_url), 254 | 'type': 'markdown' 255 | }) 256 | 257 | # print the content of the augmented metadata file 258 | logger.debug('Output UI metadata: {}'.format(json.dumps(metadata))) 259 | 260 | logger.debug('Saving UI metadata file as {} ...' 261 | .format(ui_metadata_output)) 262 | 263 | # Save [updated] KFP UI metadata file 264 | with open(ui_metadata_output, 'w') as f: 265 | json.dump(metadata, f) 266 | 267 | duration = time.time() - t0 268 | OpUtil.log_operation_info('metrics and metadata processed', duration) 269 | 270 | def get_object_storage_filename(self, filename: str) -> str: 271 | """Function to pre-pend cloud storage working dir to file name 272 | 273 | :param filename: the local file 274 | :return: the full path of the object storage file 275 | """ 276 | return os.path.join(self.input_params.get('cos-directory', ''), filename) 277 | 278 | def get_file_from_object_storage(self, file_to_get: str) -> None: 279 | """Utility function to get files from an object storage 280 | 281 | :param file_to_get: filename 282 | """ 283 | 284 | object_to_get = self.get_object_storage_filename(file_to_get) 285 | t0 = time.time() 286 | self.cos_client.fget_object(bucket_name=self.cos_bucket, 287 | object_name=object_to_get, 288 | file_path=file_to_get) 289 | duration = time.time() - t0 290 | OpUtil.log_operation_info(f"downloaded {file_to_get} from bucket: {self.cos_bucket}, object: {object_to_get}", 291 | duration) 292 | 293 | def put_file_to_object_storage(self, file_to_upload: str, object_name: Optional[str] = None) -> None: 294 | """Utility function to put files into an object storage 295 | 296 | :param file_to_upload: filename 297 | :param object_name: remote filename (used to rename) 298 | """ 299 | 300 | object_to_upload = object_name 301 | if not object_to_upload: 302 | object_to_upload = file_to_upload 303 | 304 | object_to_upload = self.get_object_storage_filename(object_to_upload) 305 | t0 = time.time() 306 | self.cos_client.fput_object(bucket_name=self.cos_bucket, 307 | object_name=object_to_upload, 308 | file_path=file_to_upload) 309 | duration = time.time() - t0 310 | OpUtil.log_operation_info(f"uploaded {file_to_upload} to bucket: {self.cos_bucket} object: {object_to_upload}", 311 | duration) 312 | 313 | def has_wildcard(self, filename): 314 | wildcards = ['*', '?'] 315 | return bool(any(c in filename for c in wildcards)) 316 | 317 | def process_output_file(self, output_file): 318 | """Puts the file to object storage. Handles wildcards and directories. """ 319 | 320 | matched_files = [output_file] 321 | if self.has_wildcard(output_file): # explode the wildcarded file 322 | matched_files = glob.glob(output_file) 323 | 324 | for matched_file in matched_files: 325 | if os.path.isdir(matched_file): 326 | for file in os.listdir(matched_file): 327 | self.process_output_file(os.path.join(matched_file, file)) 328 | else: 329 | self.put_file_to_object_storage(matched_file) 330 | 331 | 332 | class NotebookFileOp(FileOpBase): 333 | """Perform Notebook File Operation""" 334 | 335 | def execute(self) -> None: 336 | """Execute the Notebook and upload results to object storage""" 337 | notebook = os.path.basename(self.filepath) 338 | notebook_name = notebook.replace('.ipynb', '') 339 | notebook_output = notebook_name + '-output.ipynb' 340 | notebook_html = notebook_name + '.html' 341 | 342 | try: 343 | OpUtil.log_operation_info(f"executing notebook using 'papermill {notebook} {notebook_output}'") 344 | t0 = time.time() 345 | # Include kernel selection in execution time 346 | kernel_name = NotebookFileOp.find_best_kernel(notebook) 347 | 348 | import papermill 349 | papermill.execute_notebook(notebook, notebook_output, kernel_name=kernel_name) 350 | duration = time.time() - t0 351 | OpUtil.log_operation_info("notebook execution completed", duration) 352 | 353 | NotebookFileOp.convert_notebook_to_html(notebook_output, notebook_html) 354 | self.put_file_to_object_storage(notebook_output, notebook) 355 | self.put_file_to_object_storage(notebook_html) 356 | self.process_outputs() 357 | except Exception as ex: 358 | # log in case of errors 359 | logger.error("Unexpected error: {}".format(sys.exc_info()[0])) 360 | 361 | NotebookFileOp.convert_notebook_to_html(notebook_output, notebook_html) 362 | self.put_file_to_object_storage(notebook_output, notebook) 363 | self.put_file_to_object_storage(notebook_html) 364 | raise ex 365 | 366 | @staticmethod 367 | def convert_notebook_to_html(notebook_file: str, html_file: str) -> str: 368 | """Function to convert a Jupyter notebook file (.ipynb) into an html file 369 | 370 | :param notebook_file: object storage client 371 | :param html_file: name of what the html output file should be 372 | :return: html_file: the converted notebook in html format 373 | """ 374 | import nbconvert 375 | import nbformat 376 | 377 | OpUtil.log_operation_info(f"converting from {notebook_file} to {html_file}") 378 | t0 = time.time() 379 | nb = nbformat.read(notebook_file, as_version=4) 380 | html_exporter = nbconvert.HTMLExporter() 381 | data, resources = html_exporter.from_notebook_node(nb) 382 | with open(html_file, "w") as f: 383 | f.write(data) 384 | f.close() 385 | 386 | duration = time.time() - t0 387 | OpUtil.log_operation_info(f"{notebook_file} converted to {html_file}", duration) 388 | return html_file 389 | 390 | @staticmethod 391 | def find_best_kernel(notebook_file: str) -> str: 392 | """Determines the best kernel to use via the following algorithm: 393 | 394 | 1. Loads notebook and gets kernel_name and kernel_language from NB metadata. 395 | 2. Gets the list of configured kernels using KernelSpecManager. 396 | 3. If notebook kernel_name is in list, use that, else 397 | 4. If not found, load each configured kernel.json file and find a language match. 398 | 5. On first match, log info message regarding the switch and use that kernel. 399 | 6. If no language match is found, revert to notebook kernel and log warning message. 400 | """ 401 | import json 402 | import nbformat 403 | from jupyter_client.kernelspec import KernelSpecManager 404 | 405 | nb = nbformat.read(notebook_file, 4) 406 | 407 | nb_kspec = nb.metadata.kernelspec 408 | nb_kernel_name = nb_kspec.get('name') 409 | nb_kernel_lang = nb_kspec.get('language') 410 | 411 | kernel_specs = KernelSpecManager().find_kernel_specs() 412 | 413 | # see if we have a direct match... 414 | if nb_kernel_name in kernel_specs.keys(): 415 | return nb_kernel_name 416 | 417 | # no match found for kernel, try matching language... 418 | for name, file in kernel_specs.items(): 419 | # load file (JSON) and pick out language, if match, use first found 420 | with open(os.path.join(file, 'kernel.json')) as f: 421 | kspec = json.load(f) 422 | if kspec.get('language').lower() == nb_kernel_lang.lower(): 423 | matched_kernel = os.path.basename(file) 424 | logger.info(f"Matched kernel by language ({nb_kernel_lang}), using kernel " 425 | f"'{matched_kernel}' instead of the missing kernel '{nb_kernel_name}'.") 426 | return matched_kernel 427 | 428 | # no match found for language, return notebook kernel and let execution fail 429 | logger.warning(f"Reverting back to missing notebook kernel '{nb_kernel_name}' since no " 430 | f"language match ({nb_kernel_lang}) was found in current kernel specifications.") 431 | return nb_kernel_name 432 | 433 | 434 | class PythonFileOp(FileOpBase): 435 | """Perform Python File Operation""" 436 | 437 | def execute(self) -> None: 438 | """Execute the Python script and upload results to object storage""" 439 | python_script = os.path.basename(self.filepath) 440 | python_script_name = python_script.replace('.py', '') 441 | python_script_output = python_script_name + '.log' 442 | 443 | try: 444 | OpUtil.log_operation_info(f"executing python script using " 445 | f"'python3 {python_script}' to '{python_script_output}'") 446 | t0 = time.time() 447 | with open(python_script_output, "w") as log_file: 448 | subprocess.run(['python3', python_script], stdout=log_file, stderr=subprocess.STDOUT, check=True) 449 | 450 | duration = time.time() - t0 451 | OpUtil.log_operation_info("python script execution completed", duration) 452 | 453 | self.put_file_to_object_storage(python_script_output, python_script_output) 454 | self.process_outputs() 455 | except Exception as ex: 456 | # log in case of errors 457 | logger.error("Unexpected error: {}".format(sys.exc_info()[0])) 458 | logger.error("Error details: {}".format(ex)) 459 | 460 | self.put_file_to_object_storage(python_script_output, python_script_output) 461 | raise ex 462 | 463 | 464 | class RFileOp(FileOpBase): 465 | """Perform R File Operation""" 466 | 467 | def execute(self) -> None: 468 | """Execute the R script and upload results to object storage""" 469 | r_script = os.path.basename(self.filepath) 470 | r_script_name = r_script.replace('.r', '') 471 | r_script_output = r_script_name + '.log' 472 | 473 | try: 474 | OpUtil.log_operation_info(f"executing R script using " 475 | f"'Rscript {r_script}' to '{r_script_output}'") 476 | t0 = time.time() 477 | with open(r_script_output, "w") as log_file: 478 | subprocess.run(['Rscript', r_script], stdout=log_file, stderr=subprocess.STDOUT, check=True) 479 | 480 | duration = time.time() - t0 481 | OpUtil.log_operation_info("R script execution completed", duration) 482 | 483 | self.put_file_to_object_storage(r_script_output, r_script_output) 484 | self.process_outputs() 485 | except Exception as ex: 486 | # log in case of errors 487 | logger.error("Unexpected error: {}".format(sys.exc_info()[0])) 488 | logger.error("Error details: {}".format(ex)) 489 | 490 | self.put_file_to_object_storage(r_script_output, r_script_output) 491 | raise ex 492 | 493 | 494 | class OpUtil(object): 495 | """Utility functions for preparing file execution.""" 496 | @classmethod 497 | def package_install(cls, user_volume_path) -> None: 498 | OpUtil.log_operation_info("Installing packages") 499 | t0 = time.time() 500 | elyra_packages = cls.package_list_to_dict("requirements-elyra.txt") 501 | current_packages = cls.package_list_to_dict("requirements-current.txt") 502 | to_install_list = [] 503 | 504 | for package, ver in elyra_packages.items(): 505 | if package in current_packages: 506 | if "git+" in current_packages[package]: 507 | logger.warning(f"WARNING: Source package {package} found already installed from " 508 | f"{current_packages[package]}. This may conflict with the required " 509 | f"version: {ver} . Skipping...") 510 | elif isinstance(version.parse(current_packages[package]), version.LegacyVersion): 511 | logger.warning(f"WARNING: Package {package} found with unsupported Legacy version " 512 | f"scheme {current_packages[package]} already installed. Skipping...") 513 | elif version.parse(ver) > version.parse(current_packages[package]): 514 | logger.info(f"Updating {package} package from version {current_packages[package]} to {ver}...") 515 | to_install_list.append(package + '==' + ver) 516 | elif version.parse(ver) < version.parse(current_packages[package]): 517 | logger.info(f"Newer {package} package with version {current_packages[package]} " 518 | f"already installed. Skipping...") 519 | else: 520 | logger.info(f"Package not found. Installing {package} package with version {ver}...") 521 | to_install_list.append(package + '==' + ver) 522 | 523 | if to_install_list: 524 | if user_volume_path: 525 | to_install_list.insert(0, '--target=' + user_volume_path) 526 | to_install_list.append('--no-cache-dir') 527 | 528 | subprocess.run([sys.executable, '-m', 'pip', 'install'] + to_install_list, check=True) 529 | 530 | if user_volume_path: 531 | os.environ["PIP_CONFIG_FILE"] = user_volume_path + "/pip.conf" 532 | 533 | subprocess.run([sys.executable, '-m', 'pip', 'freeze']) 534 | duration = time.time() - t0 535 | OpUtil.log_operation_info("Packages installed", duration) 536 | 537 | @classmethod 538 | def package_list_to_dict(cls, filename: str) -> dict: 539 | package_dict = {} 540 | with open(filename) as fh: 541 | for line in fh: 542 | if line[0] != '#': 543 | if " @ " in line: 544 | package_name, package_version = line.strip('\n').split(sep=" @ ") 545 | elif "===" in line: 546 | package_name, package_version = line.strip('\n').split(sep="===") 547 | else: 548 | package_name, package_version = line.strip('\n').split(sep="==") 549 | 550 | package_dict[package_name] = package_version 551 | 552 | return package_dict 553 | 554 | @classmethod 555 | def parse_arguments(cls, args) -> dict: 556 | import argparse 557 | global pipeline_name, operation_name 558 | 559 | logger.debug("Parsing Arguments.....") 560 | parser = argparse.ArgumentParser() 561 | parser.add_argument('-e', '--cos-endpoint', dest="cos-endpoint", help='Cloud object storage endpoint', 562 | required=True) 563 | parser.add_argument('-b', '--cos-bucket', dest="cos-bucket", help='Cloud object storage bucket to use', 564 | required=True) 565 | parser.add_argument('-d', '--cos-directory', dest="cos-directory", 566 | help='Working directory in cloud object storage bucket to use', required=True) 567 | parser.add_argument('-t', '--cos-dependencies-archive', dest="cos-dependencies-archive", 568 | help='Archive containing notebook and dependency artifacts', required=True) 569 | parser.add_argument('-f', '--file', dest="filepath", help='File to execute', required=True) 570 | parser.add_argument('-o', '--outputs', dest="outputs", help='Files to output to object store', required=False) 571 | parser.add_argument('-i', '--inputs', dest="inputs", help='Files to pull in from parent node', required=False) 572 | parser.add_argument('-p', '--user-volume-path', dest="user-volume-path", 573 | help='Directory in Volume to install python libraries into', required=False) 574 | parsed_args = vars(parser.parse_args(args)) 575 | 576 | # cos-directory is the pipeline name, set as global 577 | pipeline_name = parsed_args.get('cos-directory') 578 | # operation/node name is the basename of the non-suffixed filepath, set as global 579 | operation_name = os.path.basename(os.path.splitext(parsed_args.get('filepath'))[0]) 580 | 581 | return parsed_args 582 | 583 | @classmethod 584 | def log_operation_info(cls, action_clause: str, duration_secs: Optional[float] = None) -> None: 585 | """Produces a formatted log INFO message used entirely for support purposes. 586 | 587 | This method is intended to be called for any entries that should be captured across aggregated 588 | log files to identify steps within a given pipeline and each of its operations. As a result, 589 | calls to this method should produce single-line entries in the log (no embedded newlines). 590 | Each entry is prefixed with the pipeline name. 591 | 592 | General logging should NOT use this method but use logger.() statements directly. 593 | 594 | :param action_clause: str representing the action that is being logged 595 | :param duration_secs: optional float value representing the duration of the action being logged 596 | """ 597 | global pipeline_name, operation_name 598 | if enable_pipeline_info: 599 | duration_clause = f"({duration_secs:.3f} secs)" if duration_secs else "" 600 | logger.info(f"'{pipeline_name}':'{operation_name}' - {action_clause} {duration_clause}") 601 | 602 | 603 | def main(): 604 | # Configure logger format, level 605 | logging.basicConfig(format='[%(levelname)1.1s %(asctime)s.%(msecs).03d] %(message)s', 606 | datefmt='%H:%M:%S', 607 | level=logging.DEBUG) 608 | # Setup packages and gather arguments 609 | input_params = OpUtil.parse_arguments(sys.argv[1:]) 610 | OpUtil.log_operation_info("starting operation") 611 | t0 = time.time() 612 | OpUtil.package_install(user_volume_path=input_params.get('user-volume-path')) 613 | 614 | # Create the appropriate instance, process dependencies and execute the operation 615 | file_op = FileOpBase.get_instance(**input_params) 616 | 617 | file_op.process_dependencies() 618 | 619 | file_op.execute() 620 | 621 | # Process notebook | script metrics and KFP UI metadata 622 | file_op.process_metrics_and_metadata() 623 | 624 | duration = time.time() - t0 625 | OpUtil.log_operation_info("operation completed", duration) 626 | 627 | 628 | if __name__ == '__main__': 629 | main() 630 | -------------------------------------------------------------------------------- /etc/pip.conf: -------------------------------------------------------------------------------- 1 | [global] 2 | target=/opt/app-root/src/jupyter-work-dir/python3/ 3 | -------------------------------------------------------------------------------- /etc/requirements-elyra.txt: -------------------------------------------------------------------------------- 1 | # This is a comprehensive list of python dependencies that Elyra requires to execute Jupyter notebooks. 2 | ipykernel==5.3.0 3 | ipython==7.15.0 4 | ipython-genutils==0.2.0 5 | jupyter-client==6.1.6 6 | jupyter-core==4.6.3 7 | minio==6.0.2 8 | nbclient==0.4.1 9 | nbconvert==5.6.1 10 | nbformat==5.0.7 11 | papermill==2.1.2 12 | pyzmq==19.0.1 13 | prompt-toolkit==3.0.5 14 | requests==2.25.1 15 | tornado==6.0.4 16 | traitlets==4.3.3 17 | urllib3==1.26.5 18 | # 19 | # These excluded are transitive dependencies of the included python packages. 20 | #ansiwrap==0.8.4 21 | #appdirs==1.4.4 22 | #appnope==0.1.0 23 | #async-generator==1.10 24 | #attrs==19.3.0 25 | #backcall==0.2.0 26 | #black==19.10b0 27 | #bleach==3.1.5 28 | #certifi==2020.4.5.2 29 | #chardet==3.0.4 30 | #click==7.1.2 31 | #configparser==5.0.0 32 | #decorator==4.4.2 33 | #defusedxml==0.6.0 34 | #entrypoints==0.3 35 | #idna==2.9 36 | #importlib-metadata==1.6.1 37 | #jedi==0.17.0 38 | #Jinja2==2.11.2 39 | #jsonschema==3.2.0 40 | #MarkupSafe==1.1.1 41 | #mistune==0.8.4 42 | #nest-asyncio==1.3.3 43 | #packaging==20.4 44 | #pandocfilters==1.4.2 45 | #parso==0.7.0 46 | #pathspec==0.8.0 47 | #pexpect==4.8.0 48 | #pickleshare==0.7.5 49 | #ptyprocess==0.6.0 50 | #Pygments==2.6.1 51 | #pyparsing==2.4.7 52 | #pyrsistent==0.16.0 53 | #python-dateutil==2.8.1 54 | #pytz==2020.1 55 | #PyYAML==5.3.1 56 | #regex==2020.6.8 57 | #six==1.15.0 58 | #tenacity==6.2.0 59 | #testpath==0.4.4 60 | #textwrap3==0.9.2 61 | #toml==0.10.1 62 | #tqdm==4.46.1 63 | #typed-ast==1.4.1 64 | #wcwidth==0.2.4 65 | #webencodings==0.5.1 66 | #zipp==3.1.0 67 | # 68 | -------------------------------------------------------------------------------- /etc/tests/resources/test-archive.tgz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/elyra-ai/kfp-notebook/ea86cf83def1aeee40eb3354772eef49dd3a7e53/etc/tests/resources/test-archive.tgz -------------------------------------------------------------------------------- /etc/tests/resources/test-bad-archive.tgz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/elyra-ai/kfp-notebook/ea86cf83def1aeee40eb3354772eef49dd3a7e53/etc/tests/resources/test-bad-archive.tgz -------------------------------------------------------------------------------- /etc/tests/resources/test-bad-archiveB.tgz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/elyra-ai/kfp-notebook/ea86cf83def1aeee40eb3354772eef49dd3a7e53/etc/tests/resources/test-bad-archiveB.tgz -------------------------------------------------------------------------------- /etc/tests/resources/test-bad-notebookA.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "malformed", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "logA = open(\"A.txt\", \"r\").read()\n", 10 | "print(logA)" 11 | ] 12 | } 13 | ], 14 | "metadata": { 15 | "kernelspec": { 16 | "display_name": "Python 3", 17 | "language": "python", 18 | "name": "python3" 19 | }, 20 | "language_info": { 21 | "codemirror_mode": { 22 | "name": "ipython", 23 | "version": 3 24 | }, 25 | "file_extension": ".py", 26 | "mimetype": "text/x-python", 27 | "name": "python", 28 | "nbconvert_exporter": "python", 29 | "pygments_lexer": "ipython3", 30 | "version": "3.7.0" 31 | } 32 | }, 33 | "nbformat": 4, 34 | "nbformat_minor": 4 35 | } 36 | -------------------------------------------------------------------------------- /etc/tests/resources/test-bad-notebookB.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "logA = open(\"A.txt\", \"r\").read()\n", 10 | "print(logA)" 11 | ] 12 | } 13 | ], 14 | "metadata": { 15 | "kernelspec": { 16 | "display_name": "Python 3", 17 | "language": "python", 18 | "name": "python3" 19 | }, 20 | "language_info": { 21 | "codemirror_mode": { 22 | "name": "ipython", 23 | "version": 3 24 | }, 25 | "file_extension": ".py", 26 | "mimetype": "text/x-python", 27 | "name": "python", 28 | "nbconvert_exporter": "python", 29 | "pygments_lexer": "ipython3", 30 | "version": "3.7.0" 31 | } 32 | }, 33 | "nbformat": 4, 34 | "nbformat_minor": 4 35 | } 36 | -------------------------------------------------------------------------------- /etc/tests/resources/test-bad-requirements-elyra.txt: -------------------------------------------------------------------------------- 1 | ipykernel==5.3.0 2 | ipython==7.15.0 3 | ipython-genutils==0.2.0 4 | papermill==1.2.2 5 | jupyter-client=4.0.1 6 | #jedi==0.17.0 7 | #Jinja2==2.11.2 8 | #jsonschema==3.2.0 9 | -------------------------------------------------------------------------------- /etc/tests/resources/test-notebookA.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [ 8 | { 9 | "name": "stdout", 10 | "output_type": "stream", 11 | "text": [ 12 | "TEST_ENV_VAR1: None\n" 13 | ] 14 | }, 15 | { 16 | "data": { 17 | "text/plain": [ 18 | "'test-file/test,file/test,file-copy.txt'" 19 | ] 20 | }, 21 | "execution_count": 1, 22 | "metadata": {}, 23 | "output_type": "execute_result" 24 | } 25 | ], 26 | "source": [ 27 | "import os\n", 28 | "from shutil import copyfile\n", 29 | "\n", 30 | "print(\"TEST_ENV_VAR1: {}\".format(os.getenv(\"TEST_ENV_VAR1\")))\n", 31 | "\n", 32 | "os.makedirs(\"test-file/test,file\", exist_ok=True)\n", 33 | "copyfile('test-file.txt', 'test-file/test-file-copy.txt')\n", 34 | "copyfile('test,file.txt', 'test-file/test,file/test,file-copy.txt')" 35 | ] 36 | }, 37 | { 38 | "cell_type": "code", 39 | "execution_count": null, 40 | "metadata": {}, 41 | "outputs": [], 42 | "source": [] 43 | } 44 | ], 45 | "metadata": { 46 | "kernelspec": { 47 | "display_name": "Python 3", 48 | "language": "python", 49 | "name": "python3" 50 | }, 51 | "language_info": { 52 | "codemirror_mode": { 53 | "name": "ipython", 54 | "version": 3 55 | }, 56 | "file_extension": ".py", 57 | "mimetype": "text/x-python", 58 | "name": "python", 59 | "nbconvert_exporter": "python", 60 | "pygments_lexer": "ipython3", 61 | "version": "3.7.0" 62 | } 63 | }, 64 | "nbformat": 4, 65 | "nbformat_minor": 4 66 | } 67 | -------------------------------------------------------------------------------- /etc/tests/resources/test-requirements-elyra.txt: -------------------------------------------------------------------------------- 1 | ipykernel==5.3.0 2 | ipython==7.15.0 3 | ipython-genutils==0.2.0 4 | #jedi==0.17.0 5 | #Jinja2==2.11.2 6 | #jsonschema==3.2.0 7 | jupyter-client==6.1.6 8 | jupyter-core==4.6.3 9 | minio==6.0.2 10 | nbclient==0.4.1 11 | nbconvert==5.6.1 12 | nbformat==5.0.7 13 | papermill==2.1.2 14 | prompt-toolkit==3.0.5 15 | pyzmq==19.0.1 16 | requests==2.25.1 17 | text-extensions-for-pandas @ git+https://github.com/frreiss/text-extensions-for-pandas@a0dcb9196c6de6a2f58194cc49e48a25a18d099d 18 | tornado==6.0.4 19 | traitlets==4.3.3 20 | urllib3==1.26.5 21 | zebra===0.1.32 22 | -------------------------------------------------------------------------------- /etc/tests/test_bootstrapper.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright 2018-2021 Elyra Authors 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # 16 | 17 | import json 18 | import hashlib 19 | import logging 20 | import minio 21 | import nbformat 22 | import os 23 | import papermill 24 | import pytest 25 | import mock 26 | import sys 27 | 28 | from pathlib import Path 29 | 30 | from tempfile import TemporaryFile 31 | sys.path.append('etc/docker-scripts/') 32 | import bootstrapper 33 | 34 | 35 | # To run this test from an IDE: 36 | # 1. set PYTHONPATH='`path-to-repo`/etc/docker-scripts' and working directory to `path-to-repo` 37 | # 2. Manually launch test_minio container: docker run --name test_minio -d -p 9000:9000 minio/minio server /data 38 | # (this is located in Makefile) 39 | # 40 | # NOTE: Any changes to etc/tests/resources/test-notebookA.ipynb require an 41 | # update of etc/tests/resources/test-archive.tgz using the command below: 42 | # tar -cvzf test-archive.tgz test-notebookA.ipynb 43 | 44 | 45 | MINIO_HOST_PORT = os.getenv("MINIO_HOST_PORT", "127.0.0.1:9000") 46 | 47 | 48 | @pytest.fixture(scope='function') 49 | def s3_setup(): 50 | bucket_name = "test-bucket" 51 | cos_client = minio.Minio(MINIO_HOST_PORT, 52 | access_key="minioadmin", 53 | secret_key="minioadmin", 54 | secure=False) 55 | cos_client.make_bucket(bucket_name) 56 | 57 | yield cos_client 58 | 59 | cleanup_files = cos_client.list_objects(bucket_name, recursive=True) 60 | for file in cleanup_files: 61 | cos_client.remove_object(bucket_name, file.object_name) 62 | cos_client.remove_bucket(bucket_name) 63 | 64 | 65 | def main_method_setup_execution(monkeypatch, s3_setup, tmpdir, argument_dict): 66 | """Primary body for main method testing...""" 67 | monkeypatch.setattr(bootstrapper.OpUtil, 'parse_arguments', lambda x: argument_dict) 68 | monkeypatch.setattr(bootstrapper.OpUtil, 'package_install', mock.Mock(return_value=True)) 69 | 70 | monkeypatch.setenv("AWS_ACCESS_KEY_ID", "minioadmin") 71 | monkeypatch.setenv("AWS_SECRET_ACCESS_KEY", "minioadmin") 72 | monkeypatch.setenv("TEST_ENV_VAR1", "test_env_var1") 73 | 74 | s3_setup.fput_object(bucket_name=argument_dict['cos-bucket'], 75 | object_name="test-directory/test-file.txt", 76 | file_path="etc/tests/resources/test-requirements-elyra.txt") 77 | s3_setup.fput_object(bucket_name=argument_dict['cos-bucket'], 78 | object_name="test-directory/test,file.txt", 79 | file_path="etc/tests/resources/test-bad-requirements-elyra.txt") 80 | s3_setup.fput_object(bucket_name=argument_dict['cos-bucket'], 81 | object_name="test-directory/test-archive.tgz", 82 | file_path="etc/tests/resources/test-archive.tgz") 83 | 84 | with tmpdir.as_cwd(): 85 | bootstrapper.main() 86 | test_file_list = ['test-archive.tgz', 87 | 'test-file.txt', 88 | 'test,file.txt', 89 | 'test-file/test-file-copy.txt', 90 | 'test-file/test,file/test,file-copy.txt', 91 | 'test-notebookA.ipynb', 92 | 'test-notebookA-output.ipynb', 93 | 'test-notebookA.html'] 94 | # Ensure working directory has all the files. 95 | for file in test_file_list: 96 | assert os.path.isfile(file) 97 | # Ensure upload directory has all the files EXCEPT the output notebook 98 | # since it was it is uploaded as the input notebook (test-notebookA.ipynb) 99 | # (which is included in the archive at start). 100 | for file in test_file_list: 101 | if file != 'test-notebookA-output.ipynb': 102 | assert s3_setup.stat_object(bucket_name=argument_dict['cos-bucket'], 103 | object_name="test-directory/" + file) 104 | if file == "test-notebookA.html": 105 | with open("test-notebookA.html") as html_file: 106 | assert 'TEST_ENV_VAR1: test_env_var1' in html_file.read() 107 | 108 | 109 | def _get_operation_instance(monkeypatch, s3_setup): 110 | config = { 111 | 'cos-endpoint': 'http://' + MINIO_HOST_PORT, 112 | 'cos-user': 'minioadmin', 113 | 'cos-password': 'minioadmin', 114 | 'cos-bucket': 'test-bucket', 115 | 'filepath': 'untitled.ipynb' 116 | } 117 | 118 | op = bootstrapper.FileOpBase.get_instance(**config) 119 | 120 | # use the same minio instance used by the test 121 | # to avoid access denied errors when two minio 122 | # instances exist 123 | monkeypatch.setattr(op, "cos_client", s3_setup) 124 | 125 | return op 126 | 127 | 128 | def test_main_method(monkeypatch, s3_setup, tmpdir): 129 | argument_dict = {'cos-endpoint': 'http://' + MINIO_HOST_PORT, 130 | 'cos-bucket': 'test-bucket', 131 | 'cos-directory': 'test-directory', 132 | 'cos-dependencies-archive': 'test-archive.tgz', 133 | 'filepath': 'etc/tests/resources/test-notebookA.ipynb', 134 | 'inputs': 'test-file.txt;test,file.txt', 135 | 'outputs': 'test-file/test-file-copy.txt;test-file/test,file/test,file-copy.txt', 136 | 'user-volume-path': None} 137 | main_method_setup_execution(monkeypatch, s3_setup, tmpdir, argument_dict) 138 | 139 | 140 | def test_main_method_with_wildcard_outputs(monkeypatch, s3_setup, tmpdir): 141 | argument_dict = {'cos-endpoint': 'http://' + MINIO_HOST_PORT, 142 | 'cos-bucket': 'test-bucket', 143 | 'cos-directory': 'test-directory', 144 | 'cos-dependencies-archive': 'test-archive.tgz', 145 | 'filepath': 'etc/tests/resources/test-notebookA.ipynb', 146 | 'inputs': 'test-file.txt;test,file.txt', 147 | 'outputs': 'test-file/*', 148 | 'user-volume-path': None} 149 | main_method_setup_execution(monkeypatch, s3_setup, tmpdir, argument_dict) 150 | 151 | 152 | def test_main_method_with_dir_outputs(monkeypatch, s3_setup, tmpdir): 153 | argument_dict = {'cos-endpoint': 'http://' + MINIO_HOST_PORT, 154 | 'cos-bucket': 'test-bucket', 155 | 'cos-directory': 'test-directory', 156 | 'cos-dependencies-archive': 'test-archive.tgz', 157 | 'filepath': 'etc/tests/resources/test-notebookA.ipynb', 158 | 'inputs': 'test-file.txt;test,file.txt', 159 | 'outputs': 'test-file', # this is the directory that contains the outputs 160 | 'user-volume-path': None} 161 | main_method_setup_execution(monkeypatch, s3_setup, tmpdir, argument_dict) 162 | 163 | 164 | def is_writable_dir(path): 165 | """Helper method determines whether 'path' is a writable directory 166 | """ 167 | try: 168 | with TemporaryFile(mode='w', dir=path) as t: 169 | t.write('1') 170 | return True 171 | except Exception: 172 | return False 173 | 174 | 175 | def remove_file(filename, fail_ok=True): 176 | """Removes filename. If fail_ok is False an assert is raised 177 | if removal failed for any reason, e.g. filenotfound 178 | """ 179 | try: 180 | os.remove(filename) 181 | except OSError as ose: 182 | if fail_ok is False: 183 | raise AssertionError('Cannot remove {}: {} {}' 184 | .format(filename, 185 | str(ose), 186 | ose)) 187 | 188 | 189 | def test_process_metrics_method_not_writable_dir(monkeypatch, s3_setup, tmpdir): 190 | """Test for process_metrics_and_metadata 191 | 192 | Validates that the method can handle output directory that is not writable 193 | """ 194 | 195 | # remove "default" output file if it already exists 196 | output_metadata_file = Path('/tmp') / 'mlpipeline-ui-metadata.json' 197 | remove_file(output_metadata_file) 198 | 199 | try: 200 | monkeypatch.setenv('ELYRA_WRITABLE_CONTAINER_DIR', '/good/time/to/fail') 201 | argument_dict = {'cos-endpoint': 'http://' + MINIO_HOST_PORT, 202 | 'cos-bucket': 'test-bucket', 203 | 'cos-directory': 'test-directory', 204 | 'cos-dependencies-archive': 'test-archive.tgz', 205 | 'filepath': 'etc/tests/resources/test-notebookA.ipynb', 206 | 'inputs': 'test-file.txt;test,file.txt', 207 | 'outputs': 'test-file/test-file-copy.txt;test-file/test,file/test,file-copy.txt', 208 | 'user-volume-path': None} 209 | main_method_setup_execution(monkeypatch, s3_setup, tmpdir, argument_dict) 210 | except Exception as ex: 211 | print('Writable dir test failed: {} {}'.format(str(ex), ex)) 212 | assert False 213 | 214 | assert output_metadata_file.exists() is False 215 | 216 | 217 | def test_process_metrics_method_no_metadata_file(monkeypatch, s3_setup, tmpdir): 218 | """Test for process_metrics_and_metadata 219 | 220 | Verifies that the method produces a valid KFP UI metadata file if 221 | the node's script | notebook did not generate this metadata file. 222 | """ 223 | argument_dict = {'cos-endpoint': 'http://' + MINIO_HOST_PORT, 224 | 'cos-bucket': 'test-bucket', 225 | 'cos-directory': 'test-directory', 226 | 'cos-dependencies-archive': 'test-archive.tgz', 227 | 'filepath': 'etc/tests/resources/test-notebookA.ipynb', 228 | 'inputs': 'test-file.txt;test,file.txt', 229 | 'outputs': 'test-file/test-file-copy.txt;test-file/test,file/test,file-copy.txt', 230 | 'user-volume-path': None} 231 | 232 | output_path = Path(tmpdir) 233 | # metadata file name and location 234 | metadata_file = output_path / 'mlpipeline-ui-metadata.json' 235 | # remove file if it already exists 236 | remove_file(metadata_file) 237 | 238 | # override the default output directory to make this test platform 239 | # independent 240 | monkeypatch.setenv('ELYRA_WRITABLE_CONTAINER_DIR', str(tmpdir)) 241 | main_method_setup_execution(monkeypatch, s3_setup, tmpdir, argument_dict) 242 | 243 | # process_metrics should have generated a file named mlpipeline-ui-metadata.json 244 | # in tmpdir 245 | 246 | try: 247 | with open(metadata_file, 'r') as f: 248 | metadata = json.load(f) 249 | assert metadata.get('outputs') is not None 250 | assert isinstance(metadata['outputs'], list) 251 | assert len(metadata['outputs']) == 1 252 | assert metadata['outputs'][0]['storage'] == 'inline' 253 | assert metadata['outputs'][0]['type'] == 'markdown' 254 | assert '{}/{}/{}'.format(argument_dict['cos-endpoint'], 255 | argument_dict['cos-bucket'], 256 | argument_dict['cos-directory']) \ 257 | in metadata['outputs'][0]['source'] 258 | assert argument_dict['cos-dependencies-archive']\ 259 | in metadata['outputs'][0]['source'] 260 | except AssertionError: 261 | raise 262 | except Exception as ex: 263 | # Potential reasons for failures: 264 | # file not found, invalid JSON 265 | print('Validation of "{}" failed: {}'.format(str(ex), ex)) 266 | assert False 267 | 268 | 269 | def test_process_metrics_method_valid_metadata_file(monkeypatch, s3_setup, tmpdir): 270 | """Test for process_metrics_and_metadata 271 | 272 | Verifies that the method produces a valid KFP UI metadata file if 273 | the node's script | notebook generated this metadata file. 274 | """ 275 | argument_dict = {'cos-endpoint': 'http://' + MINIO_HOST_PORT, 276 | 'cos-bucket': 'test-bucket', 277 | 'cos-directory': 'test-directory', 278 | 'cos-dependencies-archive': 'test-archive.tgz', 279 | 'filepath': 'etc/tests/resources/test-notebookA.ipynb', 280 | 'inputs': 'test-file.txt;test,file.txt', 281 | 'outputs': 'test-file/test-file-copy.txt;test-file/test,file/test,file-copy.txt', 282 | 'user-volume-path': None} 283 | 284 | output_path = Path(tmpdir) 285 | # metadata file name and location 286 | input_metadata_file = 'mlpipeline-ui-metadata.json' 287 | output_metadata_file = output_path / input_metadata_file 288 | # remove output_metadata_file if it already exists 289 | remove_file(output_metadata_file) 290 | 291 | # 292 | # Simulate some custom metadata that the script | notebook produced 293 | # 294 | custom_metadata = { 295 | 'some_property': 'some property value', 296 | 'outputs': [ 297 | { 298 | 'source': 'gs://project/bucket/file.md', 299 | 'type': 'markdown' 300 | } 301 | ] 302 | } 303 | 304 | with tmpdir.as_cwd(): 305 | with open(input_metadata_file, 'w') as f: 306 | json.dump(custom_metadata, f) 307 | # override the default output directory to make this test platform 308 | # independent 309 | monkeypatch.setenv('ELYRA_WRITABLE_CONTAINER_DIR', str(tmpdir)) 310 | main_method_setup_execution(monkeypatch, s3_setup, tmpdir, argument_dict) 311 | 312 | # output_metadata_file should now exist 313 | 314 | try: 315 | with open(output_metadata_file, 'r') as f: 316 | metadata = json.load(f) 317 | assert metadata.get('some_property') is not None 318 | assert metadata['some_property'] == custom_metadata['some_property'] 319 | assert metadata.get('outputs') is not None 320 | assert isinstance(metadata['outputs'], list) 321 | assert len(metadata['outputs']) == 2 322 | for output in metadata['outputs']: 323 | if output.get('storage') is not None: 324 | assert output['storage'] == 'inline' 325 | assert output['type'] == 'markdown' 326 | assert '{}/{}/{}'.format(argument_dict['cos-endpoint'], 327 | argument_dict['cos-bucket'], 328 | argument_dict['cos-directory']) \ 329 | in output['source'] 330 | assert argument_dict['cos-dependencies-archive']\ 331 | in output['source'] 332 | else: 333 | assert output['type'] ==\ 334 | custom_metadata['outputs'][0]['type'] 335 | assert output['source'] ==\ 336 | custom_metadata['outputs'][0]['source'] 337 | except AssertionError: 338 | raise 339 | except Exception as ex: 340 | # Potential reasons for failures: 341 | # file not found, invalid JSON 342 | print('Validation of "{}" failed: {}'.format(str(ex), ex)) 343 | assert False 344 | 345 | 346 | def test_process_metrics_method_invalid_metadata_file(monkeypatch, s3_setup, tmpdir): 347 | """Test for process_metrics_and_metadata 348 | 349 | Verifies that the method produces a valid KFP UI metadata file if 350 | the node's script | notebook generated an invalid metadata file. 351 | """ 352 | argument_dict = {'cos-endpoint': 'http://' + MINIO_HOST_PORT, 353 | 'cos-bucket': 'test-bucket', 354 | 'cos-directory': 'test-directory', 355 | 'cos-dependencies-archive': 'test-archive.tgz', 356 | 'filepath': 'etc/tests/resources/test-notebookA.ipynb', 357 | 'inputs': 'test-file.txt;test,file.txt', 358 | 'outputs': 'test-file/test-file-copy.txt;test-file/test,file/test,file-copy.txt', 359 | 'user-volume-path': None} 360 | 361 | output_path = Path(tmpdir) 362 | # metadata file name and location 363 | input_metadata_file = 'mlpipeline-ui-metadata.json' 364 | output_metadata_file = output_path / input_metadata_file 365 | # remove output_metadata_file if it already exists 366 | remove_file(output_metadata_file) 367 | 368 | # 369 | # Populate the metadata file with some custom data that's not JSON 370 | # 371 | 372 | with tmpdir.as_cwd(): 373 | with open(input_metadata_file, 'w') as f: 374 | f.write('I am not a valid JSON data structure') 375 | f.write('1,2,3,4,5,6,7') 376 | 377 | # override the default output directory to make this test platform 378 | # independent 379 | monkeypatch.setenv('ELYRA_WRITABLE_CONTAINER_DIR', str(tmpdir)) 380 | main_method_setup_execution(monkeypatch, s3_setup, tmpdir, argument_dict) 381 | 382 | # process_metrics replaces the existing metadata file 383 | # because its content cannot be merged 384 | 385 | try: 386 | with open(output_metadata_file, 'r') as f: 387 | metadata = json.load(f) 388 | assert metadata.get('outputs') is not None 389 | assert isinstance(metadata['outputs'], list) 390 | assert len(metadata['outputs']) == 1 391 | assert metadata['outputs'][0]['storage'] == 'inline' 392 | assert metadata['outputs'][0]['type'] == 'markdown' 393 | assert '{}/{}/{}'.format(argument_dict['cos-endpoint'], 394 | argument_dict['cos-bucket'], 395 | argument_dict['cos-directory']) \ 396 | in metadata['outputs'][0]['source'] 397 | assert argument_dict['cos-dependencies-archive']\ 398 | in metadata['outputs'][0]['source'] 399 | except AssertionError: 400 | raise 401 | except Exception as ex: 402 | # Potential reasons for failures: 403 | # file not found, invalid JSON 404 | print('Validation of "{}" failed: {}'.format(str(ex), ex)) 405 | assert False 406 | 407 | 408 | def test_fail_bad_endpoint_main_method(monkeypatch, tmpdir): 409 | argument_dict = {'cos-endpoint': MINIO_HOST_PORT, 410 | 'cos-bucket': 'test-bucket', 411 | 'cos-directory': 'test-directory', 412 | 'cos-dependencies-archive': 'test-archive.tgz', 413 | 'filepath': 'etc/tests/resources/test-notebookA.ipynb', 414 | 'inputs': 'test-file.txt', 415 | 'outputs': 'test-file/test-file-copy.txt', 416 | 'user-volume-path': None} 417 | monkeypatch.setattr(bootstrapper.OpUtil, "parse_arguments", lambda x: argument_dict) 418 | monkeypatch.setattr(bootstrapper.OpUtil, 'package_install', mock.Mock(return_value=True)) 419 | 420 | mocked_func = mock.Mock(return_value="default", side_effect=['test-archive.tgz', 421 | 'test-file.txt', 422 | 'test-notebookA-output.ipynb', 423 | 'test-notebookA.html', 424 | 'test-file.txt']) 425 | monkeypatch.setattr(bootstrapper.FileOpBase, "get_object_storage_filename", mocked_func) 426 | 427 | monkeypatch.setenv("AWS_ACCESS_KEY_ID", "minioadmin") 428 | monkeypatch.setenv("AWS_SECRET_ACCESS_KEY", "minioadmin") 429 | 430 | with tmpdir.as_cwd(): 431 | with pytest.raises(minio.error.InvalidEndpointError): 432 | bootstrapper.main() 433 | 434 | 435 | def test_fail_bad_notebook_main_method(monkeypatch, s3_setup, tmpdir): 436 | argument_dict = {'cos-endpoint': 'http://' + MINIO_HOST_PORT, 437 | 'cos-bucket': 'test-bucket', 438 | 'cos-directory': 'test-directory', 439 | 'cos-dependencies-archive': 'test-bad-archiveB.tgz', 440 | 'filepath': 'etc/tests/resources/test-bad-notebookB.ipynb', 441 | 'inputs': 'test-file.txt', 442 | 'outputs': 'test-file/test-copy-file.txt', 443 | 'user-volume-path': None} 444 | 445 | monkeypatch.setattr(bootstrapper.OpUtil, "parse_arguments", lambda x: argument_dict) 446 | monkeypatch.setattr(bootstrapper.OpUtil, 'package_install', mock.Mock(return_value=True)) 447 | 448 | mocked_func = mock.Mock(return_value="default", side_effect=['test-bad-archiveB.tgz', 449 | 'test-file.txt', 450 | 'test-bad-notebookB-output.ipynb', 451 | 'test-bad-notebookB.html', 452 | 'test-file.txt']) 453 | monkeypatch.setattr(bootstrapper.FileOpBase, "get_object_storage_filename", mocked_func) 454 | 455 | monkeypatch.setenv("AWS_ACCESS_KEY_ID", "minioadmin") 456 | monkeypatch.setenv("AWS_SECRET_ACCESS_KEY", "minioadmin") 457 | 458 | s3_setup.fput_object(bucket_name=argument_dict['cos-bucket'], 459 | object_name="test-file.txt", 460 | file_path="README.md") 461 | s3_setup.fput_object(bucket_name=argument_dict['cos-bucket'], 462 | object_name="test-bad-archiveB.tgz", 463 | file_path="etc/tests/resources/test-bad-archiveB.tgz") 464 | 465 | with tmpdir.as_cwd(): 466 | with pytest.raises(papermill.exceptions.PapermillExecutionError): 467 | bootstrapper.main() 468 | 469 | 470 | def test_package_installation(monkeypatch, virtualenv): 471 | elyra_dict = {'ipykernel': '5.3.0', 472 | 'ansiwrap': '0.8.4', 473 | 'packaging': '20.0', 474 | 'text-extensions-for-pandas': '0.0.1-prealpha' 475 | } 476 | to_install_dict = {'bleach': '3.1.5', 477 | 'ansiwrap': '0.7.0', 478 | 'packaging': '20.4', 479 | 'text-extensions-for-pandas': "0.0.1-prealpha" 480 | } 481 | correct_dict = {'ipykernel': '5.3.0', 482 | 'ansiwrap': '0.8.4', 483 | 'packaging': '20.4', 484 | 'text-extensions-for-pandas': "0.0.1-prealpha" 485 | } 486 | 487 | mocked_func = mock.Mock(return_value="default", side_effect=[elyra_dict, to_install_dict]) 488 | 489 | monkeypatch.setattr(bootstrapper.OpUtil, "package_list_to_dict", mocked_func) 490 | monkeypatch.setattr(sys, "executable", virtualenv.python) 491 | 492 | virtualenv.run("python3 -m pip install bleach==3.1.5") 493 | virtualenv.run("python3 -m pip install ansiwrap==0.7.0") 494 | virtualenv.run("python3 -m pip install packaging==20.4") 495 | virtualenv.run("python3 -m pip install git+https://github.com/akchinSTC/" 496 | "text-extensions-for-pandas@50d5a1688fb723b5dd8139761830d3419042fee5") 497 | 498 | bootstrapper.OpUtil.package_install(user_volume_path=None) 499 | virtual_env_dict = {} 500 | output = virtualenv.run("python3 -m pip freeze", capture=True) 501 | print("This is the [pip freeze] output :\n" + output) 502 | for line in output.strip().split('\n'): 503 | if " @ " in line: 504 | package_name, package_version = line.strip('\n').split(sep=" @ ") 505 | elif "===" in line: 506 | package_name, package_version = line.strip('\n').split(sep="===") 507 | else: 508 | package_name, package_version = line.strip('\n').split(sep="==") 509 | virtual_env_dict[package_name] = package_version 510 | 511 | for package, version in correct_dict.items(): 512 | assert virtual_env_dict[package] == version 513 | 514 | 515 | def test_package_installation_with_target_path(monkeypatch, virtualenv): 516 | # TODO : Need to add test for direct-source e.g. ' @ ' 517 | elyra_dict = {'ipykernel': '5.3.0', 518 | 'ansiwrap': '0.8.4', 519 | 'packaging': '20.0', 520 | 'text-extensions-for-pandas': '0.0.1-prealpha' 521 | } 522 | to_install_dict = {'bleach': '3.1.5', 523 | 'ansiwrap': '0.7.0', 524 | 'packaging': '21.0', 525 | 'text-extensions-for-pandas': "0.0.1-prealpha" 526 | } 527 | correct_dict = {'ipykernel': '5.3.0', 528 | 'ansiwrap': '0.8.4', 529 | 'packaging': '21.0', 530 | 'text-extensions-for-pandas': "0.0.1-prealpha" 531 | } 532 | 533 | mocked_func = mock.Mock(return_value="default", side_effect=[elyra_dict, to_install_dict]) 534 | 535 | monkeypatch.setattr(bootstrapper.OpUtil, "package_list_to_dict", mocked_func) 536 | monkeypatch.setattr(sys, "executable", virtualenv.python) 537 | 538 | virtualenv.run("python3 -m pip install --upgrade pip") 539 | virtualenv.run("python3 -m pip install --target='/tmp/lib/' bleach==3.1.5") 540 | virtualenv.run("python3 -m pip install --target='/tmp/lib/' ansiwrap==0.7.0") 541 | virtualenv.run("python3 -m pip install --target='/tmp/lib/' packaging==20.9") 542 | virtualenv.run("python3 -m pip install --target='/tmp/lib/' git+https://github.com/akchinSTC/" 543 | "text-extensions-for-pandas@50d5a1688fb723b5dd8139761830d3419042fee5") 544 | 545 | bootstrapper.OpUtil.package_install(user_volume_path='/tmp/lib/') 546 | virtual_env_dict = {} 547 | output = virtualenv.run("python3 -m pip freeze --path=/tmp/lib/", capture=True) 548 | print("This is the [pip freeze] output :\n" + output) 549 | for line in output.strip().split('\n'): 550 | if " @ " in line: 551 | package_name, package_version = line.strip('\n').split(sep=" @ ") 552 | elif "===" in line: 553 | package_name, package_version = line.strip('\n').split(sep="===") 554 | else: 555 | package_name, package_version = line.strip('\n').split(sep="==") 556 | virtual_env_dict[package_name] = package_version 557 | 558 | for package, version in correct_dict.items(): 559 | assert virtual_env_dict[package] == version 560 | 561 | 562 | def test_convert_notebook_to_html(tmpdir): 563 | notebook_file = os.getcwd() + "/etc/tests/resources/test-notebookA.ipynb" 564 | notebook_output_html_file = "test-notebookA.html" 565 | 566 | with tmpdir.as_cwd(): 567 | bootstrapper.NotebookFileOp.convert_notebook_to_html(notebook_file, notebook_output_html_file) 568 | 569 | assert os.path.isfile(notebook_output_html_file) 570 | # Validate that an html file got generated from the notebook 571 | with open(notebook_output_html_file, 'r') as html_file: 572 | html_data = html_file.read() 573 | assert html_data.startswith("") 574 | assert ""TEST_ENV_VAR1"" in html_data # from os.getenv("TEST_ENV_VAR1") 575 | assert html_data.endswith("\n") 576 | 577 | 578 | def test_fail_convert_notebook_to_html(tmpdir): 579 | notebook_file = os.getcwd() + "/etc/tests/resources/test-bad-notebookA.ipynb" 580 | notebook_output_html_file = "bad-notebookA.html" 581 | with tmpdir.as_cwd(): 582 | # Recent versions raising typeError due to #1130 583 | # https://github.com/jupyter/nbconvert/pull/1130 584 | with pytest.raises((TypeError, nbformat.validator.NotebookValidationError)): 585 | bootstrapper.NotebookFileOp.convert_notebook_to_html(notebook_file, notebook_output_html_file) 586 | 587 | 588 | def test_get_file_object_store(monkeypatch, s3_setup, tmpdir): 589 | file_to_get = "README.md" 590 | current_directory = os.getcwd() + '/' 591 | bucket_name = "test-bucket" 592 | 593 | s3_setup.fput_object(bucket_name=bucket_name, 594 | object_name=file_to_get, 595 | file_path=file_to_get) 596 | 597 | with tmpdir.as_cwd(): 598 | op = _get_operation_instance(monkeypatch, s3_setup) 599 | 600 | op.get_file_from_object_storage(file_to_get) 601 | assert os.path.isfile(file_to_get) 602 | assert _fileChecksum(file_to_get) == _fileChecksum(current_directory + file_to_get) 603 | 604 | 605 | def test_fail_get_file_object_store(monkeypatch, s3_setup, tmpdir): 606 | file_to_get = "test-file.txt" 607 | 608 | with tmpdir.as_cwd(): 609 | with pytest.raises(minio.error.NoSuchKey): 610 | op = _get_operation_instance(monkeypatch, s3_setup) 611 | op.get_file_from_object_storage(file_to_get=file_to_get) 612 | 613 | 614 | def test_put_file_object_store(monkeypatch, s3_setup, tmpdir): 615 | bucket_name = "test-bucket" 616 | file_to_put = "LICENSE" 617 | current_directory = os.getcwd() + '/' 618 | 619 | op = _get_operation_instance(monkeypatch, s3_setup) 620 | op.put_file_to_object_storage(file_to_upload=file_to_put) 621 | 622 | with tmpdir.as_cwd(): 623 | s3_setup.fget_object(bucket_name, file_to_put, file_to_put) 624 | assert os.path.isfile(file_to_put) 625 | assert _fileChecksum(file_to_put) == _fileChecksum(current_directory + file_to_put) 626 | 627 | 628 | def test_fail_invalid_filename_put_file_object_store(monkeypatch, s3_setup): 629 | file_to_put = "LICENSE_NOT_HERE" 630 | 631 | with pytest.raises(FileNotFoundError): 632 | op = _get_operation_instance(monkeypatch, s3_setup) 633 | op.put_file_to_object_storage(file_to_upload=file_to_put) 634 | 635 | 636 | def test_fail_bucket_put_file_object_store(monkeypatch, s3_setup): 637 | bucket_name = "test-bucket-not-exist" 638 | file_to_put = "LICENSE" 639 | 640 | with pytest.raises(minio.error.NoSuchBucket): 641 | op = _get_operation_instance(monkeypatch, s3_setup) 642 | monkeypatch.setattr(op, "cos_bucket", bucket_name) 643 | op.put_file_to_object_storage(file_to_upload=file_to_put) 644 | 645 | 646 | def test_find_best_kernel_nb(tmpdir): 647 | source_nb_file = os.path.join(os.getcwd(), "etc/tests/resources/test-notebookA.ipynb") 648 | nb_file = os.path.join(tmpdir, "test-notebookA.ipynb") 649 | 650 | # "Copy" nb file to destination - this test does not update the kernel or language. 651 | nb = nbformat.read(source_nb_file, 4) 652 | nbformat.write(nb, nb_file) 653 | 654 | with tmpdir.as_cwd(): 655 | kernel_name = bootstrapper.NotebookFileOp.find_best_kernel(nb_file) 656 | assert kernel_name == nb.metadata.kernelspec['name'] 657 | 658 | 659 | def test_find_best_kernel_lang(tmpdir, caplog): 660 | caplog.set_level(logging.INFO) 661 | source_nb_file = os.path.join(os.getcwd(), "etc/tests/resources/test-notebookA.ipynb") 662 | nb_file = os.path.join(tmpdir, "test-notebookA.ipynb") 663 | 664 | # "Copy" nb file to destination after updating the kernel name - forcing a language match 665 | nb = nbformat.read(source_nb_file, 4) 666 | nb.metadata.kernelspec['name'] = 'test-kernel' 667 | nb.metadata.kernelspec['language'] = 'PYTHON' # test case-insensitivity 668 | nbformat.write(nb, nb_file) 669 | 670 | with tmpdir.as_cwd(): 671 | kernel_name = bootstrapper.NotebookFileOp.find_best_kernel(nb_file) 672 | assert kernel_name == 'python3' 673 | assert len(caplog.records) == 1 674 | assert caplog.records[0].message.startswith("Matched kernel by language (PYTHON)") 675 | 676 | 677 | def test_find_best_kernel_nomatch(tmpdir, caplog): 678 | source_nb_file = os.path.join(os.getcwd(), "etc/tests/resources/test-notebookA.ipynb") 679 | nb_file = os.path.join(tmpdir, "test-notebookA.ipynb") 680 | 681 | # "Copy" nb file to destination after updating the kernel name and language - forcing use of updated name 682 | nb = nbformat.read(source_nb_file, 4) 683 | nb.metadata.kernelspec['name'] = 'test-kernel' 684 | nb.metadata.kernelspec['language'] = 'test-language' 685 | nbformat.write(nb, nb_file) 686 | 687 | with tmpdir.as_cwd(): 688 | kernel_name = bootstrapper.NotebookFileOp.find_best_kernel(nb_file) 689 | assert kernel_name == 'test-kernel' 690 | assert len(caplog.records) == 1 691 | assert caplog.records[0].message.startswith("Reverting back to missing notebook kernel 'test-kernel'") 692 | 693 | 694 | def test_parse_arguments(): 695 | test_args = ['-e', 'http://test.me.now', 696 | '-d', 'test-directory', 697 | '-t', 'test-archive.tgz', 698 | '-f', 'test-notebook.ipynb', 699 | '-b', 'test-bucket', 700 | '-p', '/tmp/lib'] 701 | args_dict = bootstrapper.OpUtil.parse_arguments(test_args) 702 | 703 | assert args_dict['cos-endpoint'] == 'http://test.me.now' 704 | assert args_dict['cos-directory'] == 'test-directory' 705 | assert args_dict['cos-dependencies-archive'] == 'test-archive.tgz' 706 | assert args_dict['cos-bucket'] == 'test-bucket' 707 | assert args_dict['filepath'] == 'test-notebook.ipynb' 708 | assert args_dict['user-volume-path'] == '/tmp/lib' 709 | assert not args_dict['inputs'] 710 | assert not args_dict['outputs'] 711 | 712 | 713 | def test_fail_missing_notebook_parse_arguments(): 714 | test_args = ['-e', 'http://test.me.now', 715 | '-d', 'test-directory', 716 | '-t', 'test-archive.tgz', 717 | '-b', 'test-bucket'] 718 | with pytest.raises(SystemExit): 719 | bootstrapper.OpUtil.parse_arguments(test_args) 720 | 721 | 722 | def test_fail_missing_endpoint_parse_arguments(): 723 | test_args = ['-d', 'test-directory', 724 | '-t', 'test-archive.tgz', 725 | '-f', 'test-notebook.ipynb', 726 | '-b', 'test-bucket'] 727 | with pytest.raises(SystemExit): 728 | bootstrapper.OpUtil.parse_arguments(test_args) 729 | 730 | 731 | def test_fail_missing_archive_parse_arguments(): 732 | test_args = ['-e', 'http://test.me.now', 733 | '-d', 'test-directory', 734 | '-f', 'test-notebook.ipynb', 735 | '-b', 'test-bucket'] 736 | with pytest.raises(SystemExit): 737 | bootstrapper.OpUtil.parse_arguments(test_args) 738 | 739 | 740 | def test_fail_missing_bucket_parse_arguments(): 741 | test_args = ['-e', 'http://test.me.now', 742 | '-d', 'test-directory', 743 | '-t', 'test-archive.tgz', 744 | '-f', 'test-notebook.ipynb'] 745 | with pytest.raises(SystemExit): 746 | bootstrapper.OpUtil.parse_arguments(test_args) 747 | 748 | 749 | def test_fail_missing_directory_parse_arguments(): 750 | test_args = ['-e', 'http://test.me.now', 751 | '-t', 'test-archive.tgz', 752 | '-f', 'test-notebook.ipynb', 753 | '-b', 'test-bucket'] 754 | with pytest.raises(SystemExit): 755 | bootstrapper.OpUtil.parse_arguments(test_args) 756 | 757 | 758 | @pytest.mark.skip(reason='leaving as informational - not sure worth checking if reqs change') 759 | def test_requirements_file(): 760 | requirements_file = "etc/tests/resources/test-requirements-elyra.txt" 761 | correct_number_of_packages = 18 762 | list_dict = bootstrapper.OpUtil.package_list_to_dict(requirements_file) 763 | assert len(list_dict) == correct_number_of_packages 764 | 765 | 766 | def test_fail_requirements_file_bad_delimiter(): 767 | bad_requirements_file = "etc/tests/resources/test-bad-requirements-elyra.txt" 768 | with pytest.raises(ValueError): 769 | bootstrapper.OpUtil.package_list_to_dict(bad_requirements_file) 770 | 771 | 772 | def _fileChecksum(filename): 773 | hasher = hashlib.sha256() 774 | 775 | with open(filename, 'rb') as afile: 776 | buf = afile.read(65536) 777 | while len(buf) > 0: 778 | hasher.update(buf) 779 | buf = afile.read(65536) 780 | checksum = hasher.hexdigest() 781 | return checksum 782 | -------------------------------------------------------------------------------- /kfp_notebook/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Copyright 2018-2021 Elyra Authors 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | __version__ = '0.27.0.dev0' 19 | -------------------------------------------------------------------------------- /kfp_notebook/pipeline/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Copyright 2018-2021 Elyra Authors 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | from ._notebook_op import NotebookOp 19 | -------------------------------------------------------------------------------- /kfp_notebook/pipeline/_notebook_op.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Copyright 2018-2021 Elyra Authors 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | import os 19 | import string 20 | 21 | from kfp.dsl import ContainerOp 22 | from kfp_notebook import __version__ 23 | from kubernetes.client.models import V1EmptyDirVolumeSource, V1EnvVar, V1Volume, V1VolumeMount 24 | from kubernetes.client.models import V1EnvVarSource 25 | from kubernetes.client.models import V1ObjectFieldSelector 26 | from typing import Dict, List, Optional 27 | 28 | 29 | """ 30 | The NotebookOp uses a python script to bootstrap the user supplied image with the required dependencies. 31 | In order for the script run properly, the image used, must at a minimum, have the 'curl' utility available 32 | and have python3 33 | """ 34 | 35 | # Inputs and Outputs separator character. If updated, 36 | # same-named variable in bootstrapper.py must be updated! 37 | INOUT_SEPARATOR = ';' 38 | 39 | ELYRA_GITHUB_ORG = os.getenv("ELYRA_GITHUB_ORG", "elyra-ai") 40 | ELYRA_GITHUB_BRANCH = os.getenv("ELYRA_GITHUB_BRANCH", "master" if 'dev' in __version__ else "v" + __version__) 41 | ELYRA_PIP_CONFIG_URL = os.getenv('ELYRA_PIP_CONFIG_URL', 'https://raw.githubusercontent.com/{org}/kfp-notebook/' 42 | '{branch}/etc/pip.conf'. 43 | format(org=ELYRA_GITHUB_ORG, branch=ELYRA_GITHUB_BRANCH)) 44 | ELYRA_BOOTSTRAP_SCRIPT_URL = os.getenv('ELYRA_BOOTSTRAP_SCRIPT_URL', 'https://raw.githubusercontent.com/{org}/' 45 | 'kfp-notebook/{branch}/etc/docker-scripts/' 46 | 'bootstrapper.py'. 47 | format(org=ELYRA_GITHUB_ORG, 48 | branch=ELYRA_GITHUB_BRANCH)) 49 | ELYRA_REQUIREMENTS_URL = os.getenv('ELYRA_REQUIREMENTS_URL', 'https://raw.githubusercontent.com/{org}/' 50 | 'kfp-notebook/{branch}/etc/requirements-elyra.txt'. 51 | format(org=ELYRA_GITHUB_ORG, 52 | branch=ELYRA_GITHUB_BRANCH)) 53 | 54 | 55 | class NotebookOp(ContainerOp): 56 | 57 | def __init__(self, 58 | pipeline_name: str, 59 | experiment_name: str, 60 | notebook: str, 61 | cos_endpoint: str, 62 | cos_bucket: str, 63 | cos_directory: str, 64 | cos_dependencies_archive: str, 65 | pipeline_version: Optional[str] = '', 66 | pipeline_source: Optional[str] = None, 67 | pipeline_outputs: Optional[List[str]] = None, 68 | pipeline_inputs: Optional[List[str]] = None, 69 | pipeline_envs: Optional[Dict[str, str]] = None, 70 | requirements_url: Optional[str] = None, 71 | bootstrap_script_url: Optional[str] = None, 72 | emptydir_volume_size: Optional[str] = None, 73 | cpu_request: Optional[str] = None, 74 | mem_request: Optional[str] = None, 75 | gpu_limit: Optional[str] = None, 76 | workflow_engine: Optional[str] = 'argo', 77 | **kwargs): 78 | """Create a new instance of ContainerOp. 79 | Args: 80 | pipeline_name: pipeline that this op belongs to 81 | experiment_name: the experiment where pipeline_name is executed 82 | notebook: name of the notebook that will be executed per this operation 83 | cos_endpoint: object storage endpoint e.g weaikish1.fyre.ibm.com:30442 84 | cos_bucket: bucket to retrieve archive from 85 | cos_directory: name of the directory in the object storage bucket to pull 86 | cos_dependencies_archive: archive file name to get from object storage bucket e.g archive1.tar.gz 87 | pipeline_version: optional version identifier 88 | pipeline_source: pipeline source 89 | pipeline_outputs: comma delimited list of files produced by the notebook 90 | pipeline_inputs: comma delimited list of files to be consumed/are required by the notebook 91 | pipeline_envs: dictionary of environmental variables to set in the container prior to execution 92 | requirements_url: URL to a python requirements.txt file to be installed prior to running the notebook 93 | bootstrap_script_url: URL to a custom python bootstrap script to run 94 | emptydir_volume_size: Size(GB) of the volume to create for the workspace when using CRIO container runtime 95 | cpu_request: number of CPUs requested for the operation 96 | mem_request: memory requested for the operation (in Gi) 97 | gpu_limit: maximum number of GPUs allowed for the operation 98 | workflow_engine: Kubeflow workflow engine, defaults to 'argo' 99 | kwargs: additional key value pairs to pass e.g. name, image, sidecars & is_exit_handler. 100 | See Kubeflow pipelines ContainerOp definition for more parameters or how to use 101 | https://kubeflow-pipelines.readthedocs.io/en/latest/source/kfp.dsl.html#kfp.dsl.ContainerOp 102 | """ 103 | 104 | self.pipeline_name = pipeline_name 105 | self.pipeline_version = pipeline_version 106 | self.pipeline_source = pipeline_source 107 | self.experiment_name = experiment_name 108 | self.notebook = notebook 109 | self.notebook_name = os.path.basename(notebook) 110 | self.cos_endpoint = cos_endpoint 111 | self.cos_bucket = cos_bucket 112 | self.cos_directory = cos_directory 113 | self.cos_dependencies_archive = cos_dependencies_archive 114 | self.container_work_dir_root_path = "./" 115 | self.container_work_dir_name = "jupyter-work-dir/" 116 | self.container_work_dir = self.container_work_dir_root_path + self.container_work_dir_name 117 | self.bootstrap_script_url = bootstrap_script_url 118 | self.requirements_url = requirements_url 119 | self.pipeline_outputs = pipeline_outputs 120 | self.pipeline_inputs = pipeline_inputs 121 | self.pipeline_envs = pipeline_envs 122 | self.cpu_request = cpu_request 123 | self.mem_request = mem_request 124 | self.gpu_limit = gpu_limit 125 | 126 | argument_list = [] 127 | 128 | """ CRI-o support for kfp pipelines 129 | We need to attach an emptydir volume for each notebook that runs since CRI-o runtime does not allow 130 | us to write to the base image layer file system, only to volumes. 131 | """ 132 | self.emptydir_volume_name = "workspace" 133 | self.emptydir_volume_size = emptydir_volume_size 134 | self.python_user_lib_path = '' 135 | self.python_user_lib_path_target = '' 136 | self.python_pip_config_url = '' 137 | 138 | if self.emptydir_volume_size: 139 | self.container_work_dir_root_path = "/opt/app-root/src/" 140 | self.container_python_dir_name = "python3/" 141 | self.container_work_dir = self.container_work_dir_root_path + self.container_work_dir_name 142 | self.python_user_lib_path = self.container_work_dir + self.container_python_dir_name 143 | self.python_user_lib_path_target = '--target=' + self.python_user_lib_path 144 | self.python_pip_config_url = ELYRA_PIP_CONFIG_URL 145 | 146 | if not self.bootstrap_script_url: 147 | self.bootstrap_script_url = ELYRA_BOOTSTRAP_SCRIPT_URL 148 | 149 | if not self.requirements_url: 150 | self.requirements_url = ELYRA_REQUIREMENTS_URL 151 | 152 | if 'name' not in kwargs: 153 | raise TypeError("You need to provide a name for the operation.") 154 | elif not kwargs.get('name'): 155 | raise ValueError("You need to provide a name for the operation.") 156 | 157 | if 'image' not in kwargs: 158 | raise ValueError("You need to provide an image.") 159 | 160 | if not notebook: 161 | raise ValueError("You need to provide a notebook.") 162 | 163 | if 'arguments' not in kwargs: 164 | """ If no arguments are passed, we use our own. 165 | If ['arguments'] are set, we assume container's ENTRYPOINT is set and dependencies are installed 166 | NOTE: Images being pulled must have python3 available on PATH and cURL utility 167 | """ 168 | 169 | argument_list.append('mkdir -p {container_work_dir} && cd {container_work_dir} && ' 170 | 'curl -H "Cache-Control: no-cache" -L {bootscript_url} --output bootstrapper.py && ' 171 | 'curl -H "Cache-Control: no-cache" -L {reqs_url} --output requirements-elyra.txt && ' 172 | .format(container_work_dir=self.container_work_dir, 173 | bootscript_url=self.bootstrap_script_url, 174 | reqs_url=self.requirements_url) 175 | ) 176 | 177 | if self.emptydir_volume_size: 178 | argument_list.append('mkdir {container_python_dir} && cd {container_python_dir} && ' 179 | 'curl -H "Cache-Control: no-cache" -L {python_pip_config_url} ' 180 | '--output pip.conf && cd .. &&' 181 | .format(python_pip_config_url=self.python_pip_config_url, 182 | container_python_dir=self.container_python_dir_name) 183 | ) 184 | 185 | argument_list.append('python3 -m pip install {python_user_lib_path_target} packaging && ' 186 | 'python3 -m pip freeze > requirements-current.txt && ' 187 | 'python3 bootstrapper.py ' 188 | '--cos-endpoint {cos_endpoint} ' 189 | '--cos-bucket {cos_bucket} ' 190 | '--cos-directory "{cos_directory}" ' 191 | '--cos-dependencies-archive "{cos_dependencies_archive}" ' 192 | '--file "{notebook}" ' 193 | .format(cos_endpoint=self.cos_endpoint, 194 | cos_bucket=self.cos_bucket, 195 | cos_directory=self.cos_directory, 196 | cos_dependencies_archive=self.cos_dependencies_archive, 197 | notebook=self.notebook, 198 | python_user_lib_path_target=self.python_user_lib_path_target) 199 | ) 200 | 201 | if self.pipeline_inputs: 202 | inputs_str = self._artifact_list_to_str(self.pipeline_inputs) 203 | argument_list.append('--inputs "{}" '.format(inputs_str)) 204 | 205 | if self.pipeline_outputs: 206 | outputs_str = self._artifact_list_to_str(self.pipeline_outputs) 207 | argument_list.append('--outputs "{}" '.format(outputs_str)) 208 | 209 | if self.emptydir_volume_size: 210 | argument_list.append('--user-volume-path "{}" '.format(self.python_user_lib_path)) 211 | 212 | kwargs['command'] = ['sh', '-c'] 213 | kwargs['arguments'] = "".join(argument_list) 214 | 215 | super().__init__(**kwargs) 216 | 217 | # We must deal with the envs after the superclass initialization since these amend the 218 | # container attribute that isn't available until now. 219 | if self.pipeline_envs: 220 | for key, value in self.pipeline_envs.items(): # Convert dict entries to format kfp needs 221 | self.container.add_env_variable(V1EnvVar(name=key, value=value)) 222 | 223 | # If crio volume size is found then assume kubeflow pipelines environment is using CRI-o as 224 | # its container runtime 225 | if self.emptydir_volume_size: 226 | self.add_volume(V1Volume(empty_dir=V1EmptyDirVolumeSource( 227 | medium="", 228 | size_limit=self.emptydir_volume_size), 229 | name=self.emptydir_volume_name)) 230 | 231 | self.container.add_volume_mount(V1VolumeMount(mount_path=self.container_work_dir_root_path, 232 | name=self.emptydir_volume_name)) 233 | 234 | # Append to PYTHONPATH location of elyra dependencies in installed in Volume 235 | self.container.add_env_variable(V1EnvVar(name='PYTHONPATH', 236 | value=self.python_user_lib_path)) 237 | 238 | if self.cpu_request: 239 | self.container.set_cpu_request(cpu=str(cpu_request)) 240 | 241 | if self.mem_request: 242 | self.container.set_memory_request(memory=str(mem_request) + "G") 243 | 244 | if self.gpu_limit: 245 | gpu_vendor = self.pipeline_envs.get('GPU_VENDOR', 'nvidia') 246 | self.container.set_gpu_limit(gpu=str(gpu_limit), vendor=gpu_vendor) 247 | 248 | # Generate unique ELYRA_RUN_NAME value and expose it as an environment 249 | # variable in the container 250 | if workflow_engine and workflow_engine.lower() == 'argo': 251 | run_name_placeholder = '{{workflow.annotations.pipelines.kubeflow.org/run_name}}' 252 | self.container.add_env_variable(V1EnvVar(name='ELYRA_RUN_NAME', 253 | value=run_name_placeholder)) 254 | else: 255 | # For Tekton derive the value from the specified pod annotation 256 | annotation = 'pipelines.kubeflow.org/run_name' 257 | field_path = f"metadata.annotations['{annotation}']" 258 | self.container.add_env_variable(V1EnvVar(name='ELYRA_RUN_NAME', 259 | value_from=V1EnvVarSource( 260 | field_ref=V1ObjectFieldSelector(field_path=field_path)))) 261 | 262 | # Attach metadata to the pod 263 | # Node type (a static type for this op) 264 | self.add_pod_label('elyra/node-type', 265 | NotebookOp._normalize_label_value( 266 | 'notebook-script')) 267 | # Pipeline name 268 | self.add_pod_label('elyra/pipeline-name', 269 | NotebookOp._normalize_label_value(self.pipeline_name)) 270 | # Pipeline version 271 | self.add_pod_label('elyra/pipeline-version', 272 | NotebookOp._normalize_label_value(self.pipeline_version)) 273 | # Experiment name 274 | self.add_pod_label('elyra/experiment-name', 275 | NotebookOp._normalize_label_value(self.experiment_name)) 276 | # Pipeline node name 277 | self.add_pod_label('elyra/node-name', 278 | NotebookOp._normalize_label_value(kwargs.get('name'))) 279 | # Pipeline node file 280 | self.add_pod_annotation('elyra/node-file-name', 281 | self.notebook) 282 | 283 | # Identify the pipeline source, which can be a 284 | # pipeline file (mypipeline.pipeline), a Python 285 | # script or notebook that was submitted 286 | if self.pipeline_source is not None: 287 | self.add_pod_annotation('elyra/pipeline-source', 288 | self.pipeline_source) 289 | 290 | def _artifact_list_to_str(self, pipeline_array): 291 | trimmed_artifact_list = [] 292 | for artifact_name in pipeline_array: 293 | if INOUT_SEPARATOR in artifact_name: # if INOUT_SEPARATOR is in name, throw since this is our separator 294 | raise \ 295 | ValueError("Illegal character ({}) found in filename '{}'.".format(INOUT_SEPARATOR, artifact_name)) 296 | trimmed_artifact_list.append(artifact_name.strip()) 297 | return INOUT_SEPARATOR.join(trimmed_artifact_list) 298 | 299 | @staticmethod 300 | def _normalize_label_value(value): 301 | 302 | """Produce a Kubernetes-compliant label from value 303 | 304 | Valid label values must be 63 characters or less and 305 | must be empty or begin and end with an alphanumeric 306 | character ([a-z0-9A-Z]) with dashes (-), underscores 307 | (_), dots (.), and alphanumerics between. 308 | """ 309 | 310 | if value is None or len(value) == 0: 311 | return '' # nothing to do 312 | 313 | max_length = 63 314 | # This char is added at the front and/or back 315 | # of value, if the first and/or last character 316 | # is invalid. For example a value of "-abc" 317 | # is converted to "a-abc". The specified character 318 | # must meet the label value constraints. 319 | valid_char = 'a' 320 | # This char is used to replace invalid characters 321 | # that are in the "middle" of value. For example 322 | # a value of "abc%def" is converted to "abc_def". 323 | # The specified character must meet the label value 324 | # constraints. 325 | valid_middle_char = '_' 326 | 327 | # must begin with [0-9a-zA-Z] 328 | valid_chars = string.ascii_letters + string.digits 329 | if value[0] not in valid_chars: 330 | value = valid_char + value 331 | 332 | value = value[:max_length] # enforce max length 333 | 334 | # must end with [0-9a-zA-Z] 335 | if value[-1] not in valid_chars: 336 | if len(value) <= max_length - 1: 337 | # append valid character if max length 338 | # would not be exceeded 339 | value = value + valid_char 340 | else: 341 | # replace with valid character 342 | value = value[:-1] + valid_char 343 | 344 | # middle chars must be [0-9a-zA-Z\-_.] 345 | valid_chars = valid_chars + '-_.' 346 | 347 | newstr = '' 348 | for c in range(len(value)): 349 | if value[c] not in valid_chars: 350 | newstr = newstr + valid_middle_char 351 | else: 352 | newstr = newstr + value[c] 353 | value = newstr 354 | 355 | return value 356 | -------------------------------------------------------------------------------- /kfp_notebook/tests/test_notebook_op.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright 2018-2021 Elyra Authors 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # 16 | from kfp_notebook.pipeline import NotebookOp 17 | import pytest 18 | import string 19 | 20 | 21 | @pytest.fixture 22 | def notebook_op(): 23 | return NotebookOp(name="test", 24 | pipeline_name="test-pipeline", 25 | experiment_name="experiment-name", 26 | notebook="test_notebook.ipynb", 27 | cos_endpoint="http://testserver:32525", 28 | cos_bucket="test_bucket", 29 | cos_directory="test_directory", 30 | cos_dependencies_archive="test_archive.tgz", 31 | image="test/image:dev") 32 | 33 | 34 | def test_fail_without_cos_endpoint(): 35 | with pytest.raises(TypeError): 36 | NotebookOp(name="test", 37 | pipeline_name="test-pipeline", 38 | experiment_name="experiment-name", 39 | notebook="test_notebook.ipynb", 40 | cos_bucket="test_bucket", 41 | cos_directory="test_directory", 42 | cos_dependencies_archive="test_archive.tgz", 43 | image="test/image:dev") 44 | 45 | 46 | def test_fail_without_cos_bucket(): 47 | with pytest.raises(TypeError): 48 | NotebookOp(name="test", 49 | pipeline_name="test-pipeline", 50 | experiment_name="experiment-name", 51 | notebook="test_notebook.ipynb", 52 | cos_endpoint="http://testserver:32525", 53 | cos_directory="test_directory", 54 | cos_dependencies_archive="test_archive.tgz", 55 | image="test/image:dev") 56 | 57 | 58 | def test_fail_without_cos_directory(): 59 | with pytest.raises(TypeError): 60 | NotebookOp(name="test", 61 | pipeline_name="test-pipeline", 62 | experiment_name="experiment-name", 63 | notebook="test_notebook.ipynb", 64 | cos_endpoint="http://testserver:32525", 65 | cos_bucket="test_bucket", 66 | cos_dependencies_archive="test_archive.tgz", 67 | image="test/image:dev") 68 | 69 | 70 | def test_fail_without_cos_dependencies_archive(): 71 | with pytest.raises(TypeError): 72 | NotebookOp(name="test", 73 | pipeline_name="test-pipeline", 74 | experiment_name="experiment-name", 75 | notebook="test_notebook.ipynb", 76 | cos_endpoint="http://testserver:32525", 77 | cos_bucket="test_bucket", 78 | cos_directory="test_directory", 79 | image="test/image:dev") 80 | 81 | 82 | def test_fail_without_runtime_image(): 83 | with pytest.raises(ValueError) as error_info: 84 | NotebookOp(name="test", 85 | pipeline_name="test-pipeline", 86 | experiment_name="experiment-name", 87 | notebook="test_notebook.ipynb", 88 | cos_endpoint="http://testserver:32525", 89 | cos_bucket="test_bucket", 90 | cos_directory="test_directory", 91 | cos_dependencies_archive="test_archive.tgz") 92 | assert "You need to provide an image." == str(error_info.value) 93 | 94 | 95 | def test_fail_without_notebook(): 96 | with pytest.raises(TypeError): 97 | NotebookOp(name="test", 98 | pipeline_name="test-pipeline", 99 | experiment_name="experiment-name", 100 | cos_endpoint="http://testserver:32525", 101 | cos_bucket="test_bucket", 102 | cos_directory="test_directory", 103 | cos_dependencies_archive="test_archive.tgz", 104 | image="test/image:dev") 105 | 106 | 107 | def test_fail_without_name(): 108 | with pytest.raises(TypeError): 109 | NotebookOp(pipeline_name="test-pipeline", 110 | experiment_name="experiment-name", 111 | notebook="test_notebook.ipynb", 112 | cos_endpoint="http://testserver:32525", 113 | cos_bucket="test_bucket", 114 | cos_directory="test_directory", 115 | cos_dependencies_archive="test_archive.tgz", 116 | image="test/image:dev") 117 | 118 | 119 | def test_fail_with_empty_string_as_name(): 120 | with pytest.raises(ValueError): 121 | NotebookOp(name="", 122 | pipeline_name="test-pipeline", 123 | experiment_name="experiment-name", 124 | notebook="test_notebook.ipynb", 125 | cos_endpoint="http://testserver:32525", 126 | cos_bucket="test_bucket", 127 | cos_directory="test_directory", 128 | cos_dependencies_archive="test_archive.tgz", 129 | image="test/image:dev") 130 | 131 | 132 | def test_fail_with_empty_string_as_notebook(): 133 | with pytest.raises(ValueError) as error_info: 134 | NotebookOp(name="test", 135 | pipeline_name="test-pipeline", 136 | experiment_name="experiment-name", 137 | notebook="", 138 | cos_endpoint="http://testserver:32525", 139 | cos_bucket="test_bucket", 140 | cos_directory="test_directory", 141 | cos_dependencies_archive="test_archive.tgz", 142 | image="test/image:dev") 143 | assert "You need to provide a notebook." == str(error_info.value) 144 | 145 | 146 | def test_fail_without_pipeline_name(): 147 | with pytest.raises(TypeError): 148 | NotebookOp(name="test", 149 | experiment_name="experiment-name", 150 | notebook="test_notebook.ipynb", 151 | cos_endpoint="http://testserver:32525", 152 | cos_bucket="test_bucket", 153 | cos_directory="test_directory", 154 | cos_dependencies_archive="test_archive.tgz", 155 | image="test/image:dev") 156 | 157 | 158 | def test_fail_without_experiment_name(): 159 | with pytest.raises(TypeError): 160 | NotebookOp(name="test", 161 | pipeline_name="test-pipeline", 162 | notebook="test_notebook.ipynb", 163 | cos_endpoint="http://testserver:32525", 164 | cos_bucket="test_bucket", 165 | cos_directory="test_directory", 166 | cos_dependencies_archive="test_archive.tgz", 167 | image="test/image:dev") 168 | 169 | 170 | def test_properly_set_notebook_name_when_in_subdirectory(): 171 | notebook_op = NotebookOp(name="test", 172 | pipeline_name="test-pipeline", 173 | experiment_name="experiment-name", 174 | notebook="foo/test_notebook.ipynb", 175 | cos_endpoint="http://testserver:32525", 176 | cos_bucket="test_bucket", 177 | cos_directory="test_directory", 178 | cos_dependencies_archive="test_archive.tgz", 179 | image="test/image:dev") 180 | assert "test_notebook.ipynb" == notebook_op.notebook_name 181 | 182 | 183 | def test_properly_set_python_script_name_when_in_subdirectory(): 184 | notebook_op = NotebookOp(name="test", 185 | pipeline_name="test-pipeline", 186 | experiment_name="experiment-name", 187 | notebook="foo/test.py", 188 | cos_endpoint="http://testserver:32525", 189 | cos_bucket="test_bucket", 190 | cos_directory="test_directory", 191 | cos_dependencies_archive="test_archive.tgz", 192 | image="test/image:dev") 193 | assert "test.py" == notebook_op.notebook_name 194 | 195 | 196 | def test_user_crio_volume_creation(): 197 | notebook_op = NotebookOp(name="test", 198 | pipeline_name="test-pipeline", 199 | experiment_name="experiment-name", 200 | notebook="test_notebook.ipynb", 201 | cos_endpoint="http://testserver:32525", 202 | cos_bucket="test_bucket", 203 | cos_directory="test_directory", 204 | cos_dependencies_archive="test_archive.tgz", 205 | image="test/image:dev", 206 | emptydir_volume_size='20Gi') 207 | assert notebook_op.emptydir_volume_size == '20Gi' 208 | assert notebook_op.container_work_dir_root_path == '/opt/app-root/src/' 209 | assert notebook_op.container.volume_mounts.__len__() == 1 210 | # Environment variables: PYTHONPATH, ELYRA_RUN_NAME 211 | assert notebook_op.container.env.__len__() == 2, notebook_op.container.env 212 | 213 | 214 | @pytest.mark.skip(reason="not sure if we should even test this") 215 | def test_default_bootstrap_url(notebook_op): 216 | assert notebook_op.bootstrap_script_url == \ 217 | 'https://raw.githubusercontent.com/elyra-ai/kfp-notebook/v0.9.1/etc/docker-scripts/bootstrapper.py' 218 | 219 | 220 | def test_override_bootstrap_url(): 221 | notebook_op = NotebookOp(name="test", 222 | pipeline_name="test-pipeline", 223 | experiment_name="experiment-name", 224 | bootstrap_script_url="https://test.server.com/bootscript.py", 225 | notebook="test_notebook.ipynb", 226 | cos_endpoint="http://testserver:32525", 227 | cos_bucket="test_bucket", 228 | cos_directory="test_directory", 229 | cos_dependencies_archive="test_archive.tgz", 230 | image="test/image:dev") 231 | assert notebook_op.bootstrap_script_url == "https://test.server.com/bootscript.py" 232 | 233 | 234 | @pytest.mark.skip(reason="not sure if we should even test this") 235 | def test_default_requirements_url(notebook_op): 236 | assert notebook_op.requirements_url == \ 237 | 'https://raw.githubusercontent.com/elyra-ai/kfp-notebook/v0.9.1/etc/requirements-elyra.txt' 238 | 239 | 240 | def test_override_requirements_url(): 241 | notebook_op = NotebookOp(name="test", 242 | pipeline_name="test-pipeline", 243 | experiment_name="experiment-name", 244 | requirements_url="https://test.server.com/requirements.py", 245 | notebook="test_notebook.ipynb", 246 | cos_endpoint="http://testserver:32525", 247 | cos_bucket="test_bucket", 248 | cos_directory="test_directory", 249 | cos_dependencies_archive="test_archive.tgz", 250 | image="test/image:dev") 251 | assert notebook_op.requirements_url == "https://test.server.com/requirements.py" 252 | 253 | 254 | def test_construct_with_both_pipeline_inputs_and_outputs(): 255 | notebook_op = NotebookOp(name="test", 256 | pipeline_name="test-pipeline", 257 | experiment_name="experiment-name", 258 | notebook="test_notebook.ipynb", 259 | cos_endpoint="http://testserver:32525", 260 | cos_bucket="test_bucket", 261 | cos_directory="test_directory", 262 | cos_dependencies_archive="test_archive.tgz", 263 | pipeline_inputs=['test_input1.txt', 'test_input2.txt'], 264 | pipeline_outputs=['test_output1.txt', 'test_output2.txt'], 265 | image="test/image:dev") 266 | assert notebook_op.pipeline_inputs == ['test_input1.txt', 'test_input2.txt'] 267 | assert notebook_op.pipeline_outputs == ['test_output1.txt', 'test_output2.txt'] 268 | 269 | assert '--inputs "test_input1.txt;test_input2.txt"' in notebook_op.container.args[0] 270 | assert '--outputs "test_output1.txt;test_output2.txt"' in notebook_op.container.args[0] 271 | 272 | 273 | def test_construct_wildcard_outputs(): 274 | notebook_op = NotebookOp(name="test", 275 | pipeline_name="test-pipeline", 276 | experiment_name="experiment-name", 277 | notebook="test_notebook.ipynb", 278 | cos_endpoint="http://testserver:32525", 279 | cos_bucket="test_bucket", 280 | cos_directory="test_directory", 281 | cos_dependencies_archive="test_archive.tgz", 282 | pipeline_inputs=['test_input1.txt', 'test_input2.txt'], 283 | pipeline_outputs=['test_out*', 'foo.tar'], 284 | image="test/image:dev") 285 | assert notebook_op.pipeline_inputs == ['test_input1.txt', 'test_input2.txt'] 286 | assert notebook_op.pipeline_outputs == ['test_out*', 'foo.tar'] 287 | 288 | assert '--inputs "test_input1.txt;test_input2.txt"' in notebook_op.container.args[0] 289 | assert '--outputs "test_out*;foo.tar"' in notebook_op.container.args[0] 290 | 291 | 292 | def test_construct_with_only_pipeline_inputs(): 293 | notebook_op = NotebookOp(name="test", 294 | pipeline_name="test-pipeline", 295 | experiment_name="experiment-name", 296 | notebook="test_notebook.ipynb", 297 | cos_endpoint="http://testserver:32525", 298 | cos_bucket="test_bucket", 299 | cos_directory="test_directory", 300 | cos_dependencies_archive="test_archive.tgz", 301 | pipeline_inputs=['test_input1.txt', 'test,input2.txt'], 302 | pipeline_outputs=[], 303 | image="test/image:dev") 304 | assert notebook_op.pipeline_inputs == ['test_input1.txt', 'test,input2.txt'] 305 | assert '--inputs "test_input1.txt;test,input2.txt"' in notebook_op.container.args[0] 306 | 307 | 308 | def test_construct_with_bad_pipeline_inputs(): 309 | with pytest.raises(ValueError) as error_info: 310 | NotebookOp(name="test", 311 | pipeline_name="test-pipeline", 312 | experiment_name="experiment-name", 313 | notebook="test_notebook.ipynb", 314 | cos_endpoint="http://testserver:32525", 315 | cos_bucket="test_bucket", 316 | cos_directory="test_directory", 317 | cos_dependencies_archive="test_archive.tgz", 318 | pipeline_inputs=['test_input1.txt', 'test;input2.txt'], 319 | pipeline_outputs=[], 320 | image="test/image:dev") 321 | assert "Illegal character (;) found in filename 'test;input2.txt'." == str(error_info.value) 322 | 323 | 324 | def test_construct_with_only_pipeline_outputs(): 325 | notebook_op = NotebookOp(name="test", 326 | pipeline_name="test-pipeline", 327 | experiment_name="experiment-name", 328 | notebook="test_notebook.ipynb", 329 | cos_endpoint="http://testserver:32525", 330 | cos_bucket="test_bucket", 331 | cos_directory="test_directory", 332 | cos_dependencies_archive="test_archive.tgz", 333 | pipeline_outputs=['test_output1.txt', 'test,output2.txt'], 334 | pipeline_envs={}, 335 | image="test/image:dev") 336 | assert notebook_op.pipeline_outputs == ['test_output1.txt', 'test,output2.txt'] 337 | assert '--outputs "test_output1.txt;test,output2.txt"' in notebook_op.container.args[0] 338 | 339 | 340 | def test_construct_with_bad_pipeline_outputs(): 341 | with pytest.raises(ValueError) as error_info: 342 | NotebookOp(name="test", 343 | pipeline_name="test-pipeline", 344 | experiment_name="experiment-name", 345 | notebook="test_notebook.ipynb", 346 | cos_endpoint="http://testserver:32525", 347 | cos_bucket="test_bucket", 348 | cos_directory="test_directory", 349 | cos_dependencies_archive="test_archive.tgz", 350 | pipeline_outputs=['test_output1.txt', 'test;output2.txt'], 351 | image="test/image:dev") 352 | assert "Illegal character (;) found in filename 'test;output2.txt'." == str(error_info.value) 353 | 354 | 355 | def test_construct_with_env_variables_argo(): 356 | notebook_op = NotebookOp(name="test", 357 | pipeline_name="test-pipeline", 358 | experiment_name="experiment-name", 359 | notebook="test_notebook.ipynb", 360 | cos_endpoint="http://testserver:32525", 361 | cos_bucket="test_bucket", 362 | cos_directory="test_directory", 363 | cos_dependencies_archive="test_archive.tgz", 364 | pipeline_envs={"ENV_VAR_ONE": "1", "ENV_VAR_TWO": "2", "ENV_VAR_THREE": "3"}, 365 | image="test/image:dev") 366 | 367 | confirmation_names = ["ENV_VAR_ONE", "ENV_VAR_TWO", "ENV_VAR_THREE", 368 | "ELYRA_RUN_NAME"] 369 | confirmation_values = ["1", "2", "3", 370 | "{{workflow.annotations.pipelines.kubeflow.org/run_name}}"] 371 | for env_val in notebook_op.container.env: 372 | assert env_val.name in confirmation_names 373 | assert env_val.value in confirmation_values 374 | confirmation_names.remove(env_val.name) 375 | confirmation_values.remove(env_val.value) 376 | 377 | # Verify confirmation values have been drained. 378 | assert len(confirmation_names) == 0 379 | assert len(confirmation_values) == 0 380 | 381 | # same as before but explicitly specify the workflow engine type 382 | # as Argo 383 | notebook_op = NotebookOp(name="test", 384 | pipeline_name="test-pipeline", 385 | experiment_name="experiment-name", 386 | notebook="test_notebook.ipynb", 387 | cos_endpoint="http://testserver:32525", 388 | cos_bucket="test_bucket", 389 | cos_directory="test_directory", 390 | cos_dependencies_archive="test_archive.tgz", 391 | pipeline_envs={"ENV_VAR_ONE": "1", "ENV_VAR_TWO": "2", "ENV_VAR_THREE": "3"}, 392 | image="test/image:dev", 393 | workflow_engine="Argo") 394 | 395 | confirmation_names = ["ENV_VAR_ONE", "ENV_VAR_TWO", "ENV_VAR_THREE", 396 | "ELYRA_RUN_NAME"] 397 | confirmation_values = ["1", "2", "3", 398 | "{{workflow.annotations.pipelines.kubeflow.org/run_name}}"] 399 | for env_val in notebook_op.container.env: 400 | assert env_val.name in confirmation_names 401 | assert env_val.value in confirmation_values 402 | confirmation_names.remove(env_val.name) 403 | confirmation_values.remove(env_val.value) 404 | 405 | # Verify confirmation values have been drained. 406 | assert len(confirmation_names) == 0 407 | assert len(confirmation_values) == 0 408 | 409 | 410 | def test_construct_with_env_variables_tekton(): 411 | notebook_op = NotebookOp(name="test", 412 | pipeline_name="test-pipeline", 413 | experiment_name="experiment-name", 414 | notebook="test_notebook.ipynb", 415 | cos_endpoint="http://testserver:32525", 416 | cos_bucket="test_bucket", 417 | cos_directory="test_directory", 418 | cos_dependencies_archive="test_archive.tgz", 419 | pipeline_envs={"ENV_VAR_ONE": "1", "ENV_VAR_TWO": "2", "ENV_VAR_THREE": "3"}, 420 | image="test/image:dev", 421 | workflow_engine="Tekton") 422 | 423 | confirmation_names = ["ENV_VAR_ONE", "ENV_VAR_TWO", "ENV_VAR_THREE", 424 | "ELYRA_RUN_NAME"] 425 | confirmation_values = ["1", "2", "3"] 426 | field_path = "metadata.annotations['pipelines.kubeflow.org/run_name']" 427 | for env_val in notebook_op.container.env: 428 | assert env_val.name in confirmation_names 429 | confirmation_names.remove(env_val.name) 430 | if env_val.name == 'ELYRA_RUN_NAME': 431 | assert env_val.value_from.field_ref.field_path == field_path, env_val.value_from.field_ref 432 | else: 433 | assert env_val.value in confirmation_values 434 | confirmation_values.remove(env_val.value) 435 | 436 | # Verify confirmation values have been drained. 437 | assert len(confirmation_names) == 0 438 | assert len(confirmation_values) == 0 439 | 440 | 441 | def test_normalize_label_value(): 442 | valid_middle_chars = '-_.' 443 | 444 | # test min length 445 | assert NotebookOp._normalize_label_value(None) == '' 446 | assert NotebookOp._normalize_label_value('') == '' 447 | # test max length (63) 448 | assert NotebookOp._normalize_label_value('a' * 63) ==\ 449 | 'a' * 63 450 | assert NotebookOp._normalize_label_value('a' * 64) ==\ 451 | 'a' * 63 # truncated 452 | # test first and last char 453 | assert NotebookOp._normalize_label_value('1') == '1' 454 | assert NotebookOp._normalize_label_value('22') == '22' 455 | assert NotebookOp._normalize_label_value('3_3') == '3_3' 456 | assert NotebookOp._normalize_label_value('4u4') == '4u4' 457 | assert NotebookOp._normalize_label_value('5$5') == '5_5' 458 | 459 | # test first char 460 | for c in string.printable: 461 | if c in string.ascii_letters + string.digits: 462 | # first char is valid 463 | # no length violation 464 | assert NotebookOp._normalize_label_value(c) == c 465 | assert NotebookOp._normalize_label_value(c + 'B') == c + 'B' 466 | # max length 467 | assert NotebookOp._normalize_label_value(c + 'B' * 62) ==\ 468 | (c + 'B' * 62) 469 | # max length exceeded 470 | assert NotebookOp._normalize_label_value(c + 'B' * 63) ==\ 471 | (c + 'B' * 62) # truncated 472 | else: 473 | # first char is invalid, e.g. '#a', and becomes the 474 | # second char, which might require replacement 475 | rv = c 476 | if c not in valid_middle_chars: 477 | rv = '_' 478 | # no length violation 479 | assert NotebookOp._normalize_label_value(c) == 'a' + rv + 'a' 480 | assert NotebookOp._normalize_label_value(c + 'B') == 'a' + rv + 'B' 481 | # max length 482 | assert NotebookOp._normalize_label_value(c + 'B' * 62) ==\ 483 | ('a' + rv + 'B' * 61) # truncated 484 | # max length exceeded 485 | assert NotebookOp._normalize_label_value(c + 'B' * 63) ==\ 486 | ('a' + rv + 'B' * 61) # truncated 487 | 488 | # test last char 489 | for c in string.printable: 490 | if c in string.ascii_letters + string.digits: 491 | # no length violation 492 | assert NotebookOp._normalize_label_value('b' + c) == 'b' + c 493 | # max length 494 | assert NotebookOp._normalize_label_value('b' * 62 + c) ==\ 495 | ('b' * 62 + c) 496 | # max length exceeded 497 | assert NotebookOp._normalize_label_value('b' * 63 + c) ==\ 498 | ('b' * 63) 499 | else: 500 | # last char is invalid, e.g. 'a#', and requires 501 | # patching 502 | rv = c 503 | if c not in valid_middle_chars: 504 | rv = '_' 505 | # no length violation (char is appended) 506 | assert NotebookOp._normalize_label_value('b' + c) == 'b' + rv + 'a' 507 | # max length (char is replaced) 508 | assert NotebookOp._normalize_label_value('b' * 62 + c) ==\ 509 | ('b' * 62 + 'a') 510 | # max length exceeded (no action required) 511 | assert NotebookOp._normalize_label_value('b' * 63 + c) ==\ 512 | ('b' * 63) 513 | 514 | # test first and last char 515 | for c in string.printable: 516 | if c in string.ascii_letters + string.digits: 517 | # no length violation 518 | assert NotebookOp._normalize_label_value(c + 'b' + c) ==\ 519 | c + 'b' + c # nothing is modified 520 | # max length 521 | assert NotebookOp._normalize_label_value(c + 'b' * 61 + c) ==\ 522 | (c + 'b' * 61 + c) # nothing is modified 523 | # max length exceeded 524 | assert NotebookOp._normalize_label_value(c + 'b' * 62 + c) ==\ 525 | c + 'b' * 62 # truncate only 526 | else: 527 | # first and last characters are invalid, e.g. '#a#' 528 | rv = c 529 | if c not in valid_middle_chars: 530 | rv = '_' 531 | # no length violation 532 | assert NotebookOp._normalize_label_value(c + 'b' + c) ==\ 533 | 'a' + rv + 'b' + rv + 'a' 534 | # max length 535 | assert NotebookOp._normalize_label_value(c + 'b' * 59 + c) ==\ 536 | ('a' + rv + 'b' * 59 + rv + 'a') 537 | # max length exceeded after processing, scenario 1 538 | # resolved by adding char before first, replace last 539 | assert NotebookOp._normalize_label_value(c + 'b' * 60 + c) ==\ 540 | ('a' + rv + 'b' * 60 + 'a') 541 | # max length exceeded after processing, scenario 2 542 | # resolved by adding char before first, appending after last 543 | assert NotebookOp._normalize_label_value(c + 'b' * 59 + c) ==\ 544 | ('a' + rv + 'b' * 59 + rv + 'a') 545 | # max length exceeded before processing, scenario 1 546 | # resolved by adding char before first, truncating last 547 | assert NotebookOp._normalize_label_value(c + 'b' * 62 + c) ==\ 548 | ('a' + rv + 'b' * 61) 549 | # max length exceeded before processing, scenario 2 550 | # resolved by adding char before first, replacing last 551 | assert NotebookOp._normalize_label_value(c + 'b' * 60 + c * 3) ==\ 552 | ('a' + rv + 'b' * 60 + 'a') 553 | 554 | # test char in a position other than first and last 555 | # if invalid, the char is replaced with '_' 556 | for c in string.printable: 557 | if c in string.ascii_letters + string.digits + '-_.': 558 | assert NotebookOp._normalize_label_value('A' + c + 'Z') ==\ 559 | 'A' + c + 'Z' 560 | else: 561 | assert NotebookOp._normalize_label_value('A' + c + 'Z') == 'A_Z' 562 | 563 | # encore 564 | assert NotebookOp._normalize_label_value(r'¯\_(ツ)_/¯') == 'a_________a' 565 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright 2018-2021 Elyra Authors 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # 16 | 17 | [bdist_wheel] 18 | universal = 0 19 | 20 | [flake8] 21 | # References: 22 | # https://flake8.readthedocs.io/en/latest/user/configuration.html 23 | # https://flake8.readthedocs.io/en/latest/user/error-codes.html 24 | # https://docs.openstack.org/hacking/latest/user/hacking.html 25 | exclude = __init__.py 26 | ignore = 27 | # Import formatting 28 | E4, 29 | # Comparing types instead of isinstance 30 | E721, 31 | # Assigning lambda expression 32 | E731, 33 | # Ambiguous variable names 34 | E741, 35 | # Include name with TODOs as in # TODO(yourname) 36 | H101, 37 | # Enable mocking 38 | H216, 39 | # Do not import more than one module per line 40 | H301, 41 | # Alphabetically order imports by the full module path 42 | H306, 43 | # Multi line docstrings should start without a leading new line 44 | H404, 45 | # Multi line docstrings should start with a one line summary followed by an empty line 46 | H405, 47 | # Import statements are in the wrong order 48 | I100, 49 | # Imported names are in the wrong order 50 | I101, 51 | # Missing newline between import groups 52 | I201, 53 | # Additional newline in a group of imports 54 | I202, 55 | # Allow breaks after binary operators 56 | W504, 57 | 58 | max-line-length = 120 59 | 60 | [aliases] 61 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # 3 | # Copyright 2018-2021 Elyra Authors 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | """The setup script.""" 19 | 20 | from setuptools import setup, find_packages 21 | 22 | with open('README.md') as readme_file: 23 | readme = readme_file.read() 24 | 25 | requirements = [ 26 | 'click>=6.0', 27 | 'bumpversion>=0.5.3', 28 | 'wheel>=0.30.0', 29 | 'watchdog>=0.8.3', 30 | 'flake8>=3.5.0,<3.9.0', 31 | 'tox>=2.9.1', 32 | 'coverage>=4.5.1', 33 | 'twine>=1.10.0', 34 | 'kfp==1.6.3', 35 | ] 36 | 37 | setup_requirements = [ ] 38 | 39 | test_requirements = [ ] 40 | 41 | setup( 42 | classifiers=[ 43 | 'Development Status :: 4 - Beta', 44 | 'Intended Audience :: Developers', 45 | 'License :: OSI Approved :: Apache Software License', 46 | 'Natural Language :: English', 47 | 'Programming Language :: Python :: 3.6', 48 | 'Programming Language :: Python :: 3.7', 49 | 'Programming Language :: Python :: 3.8', 50 | ], 51 | description="Jupyter Notebook operator for Kubeflow Pipelines", 52 | long_description=readme, 53 | long_description_content_type='text/markdown', 54 | install_requires=requirements, 55 | license='Apache License, Version 2.0', 56 | include_package_data=True, 57 | keywords='jupyter, kubeflow, pipeline', 58 | name='kfp-notebook', 59 | packages=find_packages(), 60 | setup_requires=setup_requirements, 61 | test_suite='tests', 62 | tests_require=test_requirements, 63 | url='https://github.com/elyra-ai/kfp-notebook', 64 | version='0.27.0.dev0', 65 | zip_safe=False, 66 | ) 67 | -------------------------------------------------------------------------------- /test-requirements.txt: -------------------------------------------------------------------------------- 1 | ipython 2 | minio 3 | mock 4 | notebook 5 | nbconvert 6 | papermill 7 | pytest 8 | pytest_virtualenv 9 | flake8>=3.5.0,<3.9.0 10 | -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright 2018-2021 Elyra Authors 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # 16 | [tox] 17 | envlist = py38, py36, py37, flake8 18 | 19 | [travis] 20 | python = 21 | 3.8: py38 22 | 3.7: py37 23 | 3.6: py36 24 | 25 | [testenv:flake8] 26 | basepython = python 27 | deps = flake8 28 | commands = flake8 notebook 29 | 30 | [testenv] 31 | setenv = 32 | PYTHONPATH = {toxinidir} 33 | 34 | commands = pytest 35 | 36 | --------------------------------------------------------------------------------