├── .github ├── dependabot.yml ├── main.yml └── workflows │ ├── publish_pypi.yml │ └── test.yml ├── .gitignore ├── .pre-commit-config.yaml ├── CHANGELOG.md ├── CODEOWNERS ├── LICENSE.txt ├── README.md ├── docs ├── conda.md ├── development.md └── installation.md ├── imgs ├── img.png ├── img_1.png └── img_2.png ├── logos └── squared.png ├── mypy.ini ├── pull_request_template.md ├── release.py ├── requirements.txt ├── rocket ├── __init__.py ├── file_watcher.py ├── logger.py ├── rocket.py └── utils.py ├── rocket_local.py ├── setup.py ├── squared.png └── tests ├── __init__.py ├── conftest.py ├── resources ├── poetry-test │ ├── README.md │ ├── poetry.lock │ ├── poetry_test │ │ └── __init__.py │ └── pyproject.toml └── python-test │ ├── README.md │ ├── python_test │ └── __init__.py │ └── setup.py └── test_rocket.py /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | - package-ecosystem: "pip" 4 | directory: "/" 5 | schedule: 6 | interval: "weekly" 7 | -------------------------------------------------------------------------------- /.github/main.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | 3 | on: [push] 4 | 5 | jobs: 6 | test: 7 | runs-on: ubuntu-18.04 8 | strategy: 9 | matrix: 10 | python-version: [3.9, 3.8, 3.7] 11 | steps: 12 | - uses: actions/checkout@ee0669bd1cc54295c223e0bb666b733df41de1c5 # v2.7.0 13 | - name: Set up Python 14 | uses: actions/setup-python@e9aba2c848f5ebd159c070c61ea2c4e2b122355e # v2.3.4 15 | with: 16 | python-version: ${{ matrix.python-version }} 17 | - name: Install pip dependencies 18 | run: pip install -r requirements-test.txt 19 | - name: Run tests 20 | run: pytest tests/ -v 21 | 22 | - name: type check 23 | run: mypy . 24 | -------------------------------------------------------------------------------- /.github/workflows/publish_pypi.yml: -------------------------------------------------------------------------------- 1 | name: Publish python poetry package 2 | on: 3 | # Triggers the workflow on push or pull request events but only for the "main" branch 4 | push: 5 | branches: ["main"] 6 | 7 | concurrency: 8 | group: ${{ github.workflow }}${{ github.ref_name != github.event.repository.default_branch && github.ref || github.run_id }} 9 | cancel-in-progress: ${{ github.ref_name != github.event.repository.default_branch }} 10 | 11 | jobs: 12 | build: 13 | runs-on: ubuntu-latest 14 | steps: 15 | - uses: actions/checkout@v3 16 | - name: Setup Python 17 | uses: actions/setup-python@v5.3.0 18 | with: 19 | python-version: 3.10.* 20 | - name: Build and publish to pypi 21 | shell: bash 22 | env: 23 | PYPI_DB_ROCKET: ${{ secrets.PYPI_DB_ROCKET }} 24 | run: | 25 | pip install -r requirements.txt 26 | python3 -m build --no-isolation 27 | echo "Build successfull, uploading now..." 28 | python3 -m twine upload dist/* -u "__token__" -p "$PYPI_DB_ROCKET" --skip-existing 29 | echo "Upload successfull!" 30 | -------------------------------------------------------------------------------- /.github/workflows/test.yml: -------------------------------------------------------------------------------- 1 | name: Tests 2 | 3 | on: 4 | push: 5 | branches: ["main", "master"] 6 | pull_request: 7 | branches: ["main", "master"] 8 | workflow_dispatch: 9 | 10 | concurrency: 11 | group: ${{ github.workflow }}${{ github.ref_name != github.event.repository.default_branch && github.ref || github.run_id }} 12 | cancel-in-progress: ${{ github.ref_name != github.event.repository.default_branch }} 13 | 14 | jobs: 15 | test: 16 | runs-on: ubuntu-latest 17 | steps: 18 | - uses: actions/checkout@v3 19 | - name: Setup Python 20 | uses: actions/setup-python@v5.3.0 21 | with: 22 | python-version: 3.10.* 23 | - name: Install dependencies 24 | run: | 25 | pip install --upgrade pip 26 | pip install -r requirements.txt 27 | - name: Run a multi-line script 28 | run: | 29 | pytest 30 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # vscode 129 | .vscode 130 | metastore_db/* 131 | 132 | .idea -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | fail_fast: True 2 | stages: [commit] 3 | repos: 4 | - repo: https://github.com/pre-commit/pre-commit-hooks 5 | rev: v3.2.0 6 | hooks: 7 | - id: trailing-whitespace 8 | stages: [commit, manual] 9 | - id: end-of-file-fixer 10 | stages: [commit, manual] 11 | - id: check-yaml 12 | stages: [commit, manual] 13 | - id: check-added-large-files 14 | stages: [commit, manual] 15 | - id: check-docstring-first 16 | stages: [commit, manual] 17 | # you can enable the following checks if you disable them from drone 18 | # as drone has credentials in place 19 | # - id: detect-aws-credentials 20 | # - id: detect-private-key 21 | - repo: https://github.com/pre-commit/mirrors-isort 22 | rev: v5.6.4 23 | hooks: 24 | - id: isort 25 | stages: [commit] 26 | - repo: https://github.com/ambv/black 27 | rev: 20.8b1 28 | hooks: 29 | - id: black 30 | language_version: python3.7 31 | stages: [commit] 32 | - repo: https://github.com/pre-commit/mirrors-mypy 33 | rev: v0.790 34 | hooks: 35 | - id: mypy 36 | args: [--config-file, mypy.ini] 37 | stages: [commit, manual] 38 | # - repo: https://gitlab.com/pycqa/flake8 39 | # rev: 3.7.9 40 | # hooks: 41 | # - id: flake8 42 | # stages: [commit, manual] 43 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog db-rocket 2 | 3 | ## Version 3.1.0 4 | - Use uv when installing packages 5 | 6 | ## Version 3.0.6 7 | - Warn when failing to create requirements.txt file with poetry instead of raising an error 8 | 9 | ## Version 3.0.6 10 | - Create folder before copying file 11 | 12 | ## Version 3.0.5 13 | - Revert enforcing the creation of .databrickscfg file 14 | 15 | ## Version 3.0.3 16 | - Add warning when DATABRICKS_TOKEN is set rather than failing when its not set. The bulk of our use-cases rely on the token being set via databricks configure command. The token via environment variable is only used for CI and we should treat as an edge case. 17 | 18 | ## Version 3.0.2 19 | - Add databricks cli configuration check 20 | 21 | ## Version 3.0.1 22 | - Add workaround for making --watch command work with --use-volumes 23 | 24 | ## Version 3.0.0 25 | - Add `use_volumes` and `dst_path` arguments to support uploading to Unity Catalog Volumes. 26 | 27 | ## Version 2.1.0 28 | - New paramter for ``rocket launch --glob_path=<...>``, which allows to specify a list of globs for files to deploy during launch. 29 | 30 | ## Version 2.0.4 31 | - Update version number. 32 | 33 | ## Version 2.0.3 34 | - Add instruction to restart Python with dbutils (needed for newer Databricks runtimes) 35 | 36 | ## Version 2.0.2 37 | - fix wheel uploading to root dbfs path 38 | 39 | ## Version 2.0.1 40 | - fix function not found error 41 | 42 | ## Version 2.0.0 43 | - Simplify code structure 44 | - Make sync of project more smooth by using a mix of `-e` & installation of `requirements.txt` 45 | 46 | ## Version 1.3.6 47 | 48 | - Fix bug of updates not getting detected 49 | - Put files into a project folder 50 | 51 | ## Version 1.3.5 52 | 53 | - Replace self-calling CLI with while loop 54 | 55 | ## Version 1.3.4 56 | 57 | - Simplify setup by using `pip install -e` 58 | 59 | ## Version 1.3.3 60 | 61 | - Fix watch stopping due to maximum recursion 62 | 63 | ## Version 1.3.2 64 | 65 | - Refine prints to be more clear 66 | 67 | ## Version 1.3.1 68 | 69 | - Adding Markdown documentation to package description 70 | 71 | ## Version 1.3.0 72 | 73 | - Remove `rocket trigger` CLI 74 | - Add synchronization of project files to databricks file system 75 | - Replace `print` statements with `logger.info` 76 | - Replace running watch in shell with python code 77 | 78 | ## Version 1.2.0 79 | 80 | - Fix security issue with command injection, changes the behaviour of the watch command. 81 | 82 | ## Version 1.1.5 83 | 84 | - Adding extra index urls to install command 85 | 86 | ## Version 1.1.4 87 | 88 | - Fix error in rocket trigger cmd 89 | 90 | ## Version 1.1.3 91 | 92 | - Typo 93 | 94 | ## Version 1.1.2 95 | 96 | - Pin watchdog dependency with minimum requirement 97 | - dbrocket launch rather then trigger 98 | 99 | ## Version 1.1.1 100 | 101 | - Error with token missing only on trigger command not on __init__ anymore. 102 | 103 | ## Version 1.1.0 104 | 105 | - Create new binary dbrocket 106 | - Create dbrocket setup to initalize a setup.py 107 | - Use defualt values for trigger binary 108 | - Improve docs 109 | 110 | ## Version 1.0.4 111 | 112 | - Upgrade dependencies 113 | - Add github actions ci. 114 | 115 | ## Version 1.0.3 116 | 117 | - Remove message about spark 6 support. 118 | - Add instruction about upgrading pip. 119 | - Remove local cli. 120 | - Add better error message when fails to copy to databricks 121 | 122 | ## Version 1.0.2 123 | 124 | feature: Add support for poetry projects test: Add test for dbrocket build process 125 | -------------------------------------------------------------------------------- /CODEOWNERS: -------------------------------------------------------------------------------- 1 | * @getyourguide/mlp 2 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # NOTE: We are in the progress of deprecating db-rocket for databricks native tooling. 2 | 3 | ## Databricks-Rocket 4 | 5 | 6 | 7 | [![PyPI version](https://badge.fury.io/py/databricks-rocket.svg)](https://badge.fury.io/py/databricks-rocket) 8 | ![PyPI downloads](https://img.shields.io/pypi/dm/databricks-rocket) 9 | 10 | Databricks-Rocket (short db-rockets), keeps your local Python scripts installed and synchronized with a Databricks notebook. Every change on your local machine 11 | is automatically reflected in the notebook. This shortens the feedback loop for developing git-based projects and 12 | eliminates the need to set up a local development environment. 13 | 14 | ## Installation 15 | 16 | Install `databricks-rocket` using pip: 17 | 18 | ```sh 19 | pip install databricks-rocket 20 | ``` 21 | 22 | ## Setup 23 | 24 | Ensure you've created a personal access token in 25 | Databricks ([offical documentation](https://docs.databricks.com/dev-tools/cli/index.html)). Afterward, set up the 26 | Databricks CLI by executing: 27 | 28 | ```sh 29 | databricks configure --token 30 | ``` 31 | 32 | Alternatively, you can set the Databricks token and host in your environment variables: 33 | 34 | ```sh 35 | export DATABRICKS_HOST="mydatabrickshost" 36 | export DATABRICKS_TOKEN="mydatabrickstoken" 37 | ``` 38 | 39 | If your project isn't already a pip package, you'll need to convert it into one. Use dbrocket for this: 40 | 41 | ```sh 42 | rocket setup 43 | ``` 44 | 45 | Will create a setup.py for you. 46 | 47 | ## Usage 48 | 49 | ### To Sync Your Project 50 | 51 | By default, `databricks-rocket` syncs your project to DBFS automatically. This allows you to update your code and have 52 | those changes reflected in your Databricks notebook without restarting the Python kernel. Simply execute: 53 | 54 | ```sh 55 | rocket launch 56 | ``` 57 | 58 | You'll then receive the exact command to run in your notebook. Example: 59 | 60 | ```sh 61 | stevenmi@MacBook db-rocket % rocket launch --watch=False 62 | >> Watch activated. Uploaded your project to databricks. Install your project in your databricks notebook by running: 63 | >> %pip install --upgrade pip 64 | >> %pip install -r /dbfs/temp/stevenmi/db-rocket/requirements.txt 65 | >> %pip install --no-deps -e /dbfs/temp/stevenmi/db-rocket 66 | 67 | and following in a new Python cell: 68 | >> %load_ext autoreload 69 | >> %autoreload 2 70 | ``` 71 | 72 | Finally, add the content in you databricks notebook: 73 | ![imgs/img_2.png](imgs/img_2.png) 74 | 75 | #### Include non-python files 76 | Upload all root level json files: 77 | ```shell 78 | rocket launch --glob_path="*,json" 79 | ``` 80 | On top also upload all env files: 81 | ```shell 82 | rocket launch --glob_path="[\"*.json\", \".env*\"]" 83 | ``` 84 | When specifying lists, be mindful about the formatting of the parameter string. 85 | 86 | ### To Upload Your Python Package 87 | 88 | If you've disabled the watch feature, `databricks-rocket` will only upload your project as a wheel to DBFS: 89 | 90 | ```sh 91 | rocket launch --watch=False 92 | ``` 93 | 94 | Example: 95 | 96 | ```sh 97 | stevenmi@MacBook db-rocket % rocket launch --watch=False 98 | >> Watch is disabled. Building creating a python wheel from your project 99 | >> Found setup.py. Building python library 100 | >> Uploaded ./dist/databricks_rocket-2.0.0-py3-none-any.whl to dbfs:/temp/stevenmi/db-rocket/dist/databricks_rocket-2.0.0-py3-none-any.whl 101 | >> Uploaded wheel to databricks. Install your library in your databricks notebook by running: 102 | >> %pip install --upgrade pip 103 | >> %pip install /dbfs/temp/stevenmi/db-rocket/databricks_rocket-2.0.0-py3-none-any.whl --force-reinstall 104 | ``` 105 | 106 | ## Blogposts 107 | 108 | - [DBrocket 2.0](https://www.getyourguide.careers/posts/improving-data-science-productivity-with-db-rocket-2-0): A summary of the big improvements we made to the tool in the new release. 109 | - [DB Rocket 1.0](https://www.getyourguide.careers/posts/open-sourcing-db-rocket-for-data-scientists) post also gives more details about the rationale around dbrocket. 110 | 111 | ## Support 112 | 113 | - Databricks: >=7 114 | - Python: >=3.7 115 | - Tested on Platform: Linux, MacOs. Windows will probably not work but contributions are welcomed! 116 | - Supports uploading to Unity Catalog Volumes starting from version 3.0.0. Note that the underlying dependency, `databricks-sdk`, is still in beta. We do not recommend using UC Volumes in production. 117 | 118 | ## Acknowledgments 119 | 120 | - Thanks Leon Poli for the Logo :) 121 | - Thanks Stephane Leonard for source-code and documentation improvements :) 122 | - Thanks Malachi Soord for the CICD setup and README improvements 123 | 124 | Contributions are welcomed! 125 | 126 | 127 | # Security 128 | 129 | For security issues please contact [security@getyourguide.com](mailto:security@getyourguide.com). 130 | 131 | # Legal 132 | 133 | db-rocket is licensed under the Apache License, Version 2.0. See [LICENSE](LICENSE.txt) for the full text. 134 | -------------------------------------------------------------------------------- /docs/conda.md: -------------------------------------------------------------------------------- 1 | # 1. Install Conda 2 | 3 | Install on your machine following the [official instructions](https://docs.conda.io/projects/continuumio-conda/en/latest/user-guide/install/macos.html) 4 | 5 | # 2. Create a conda enviroment 6 | 7 | ```sh 8 | conda create -n dbrocket python=3.7 9 | ``` 10 | 11 | # 3. Activate the enviroment 12 | 13 | ```sh 14 | conda activate dbrocket 15 | ``` 16 | 17 | From now on continue with the installation instructions from the readme. 18 | -------------------------------------------------------------------------------- /docs/development.md: -------------------------------------------------------------------------------- 1 | # Developing on local machine 2 | 3 | ## 1. Clone the repo 4 | 5 | Find the right branch. 6 | 7 | ## 2. Install local db rocket in dev. mode 8 | 9 | 10 | ```sh 11 | cd dbrocket_folder 12 | pip install -e . 13 | 14 | ``` 15 | 16 | From here one rocket should be the dev one. 17 | 18 | ## Build the package and upload it to PyPi 19 | 20 | One needs to get pypi crendentials to upload. Reach out in case of need. 21 | 22 | ```sh 23 | pip install twine 24 | ./release.py release 25 | ``` 26 | -------------------------------------------------------------------------------- /docs/installation.md: -------------------------------------------------------------------------------- 1 | 2 | ## Installing 3 | 4 | For a clean python installation (specially on MacOs) we recommend [using conda](docs/conda.md) 5 | 6 | ### Troubleshooting 7 | 8 | On MacOs, also upgrade the build library: 9 | 10 | ```sh 11 | python3 -m pip install --upgrade build 12 | ``` 13 | -------------------------------------------------------------------------------- /imgs/img.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/getyourguide/db-rocket/643d38f762feef8c1356af7c3e9511117327a3f9/imgs/img.png -------------------------------------------------------------------------------- /imgs/img_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/getyourguide/db-rocket/643d38f762feef8c1356af7c3e9511117327a3f9/imgs/img_1.png -------------------------------------------------------------------------------- /imgs/img_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/getyourguide/db-rocket/643d38f762feef8c1356af7c3e9511117327a3f9/imgs/img_2.png -------------------------------------------------------------------------------- /logos/squared.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/getyourguide/db-rocket/643d38f762feef8c1356af7c3e9511117327a3f9/logos/squared.png -------------------------------------------------------------------------------- /mypy.ini: -------------------------------------------------------------------------------- 1 | [mypy] 2 | python_version = 3.7 3 | warn_unused_configs = False 4 | disallow_untyped_defs = False 5 | warn_return_any = False 6 | ignore_missing_imports = True 7 | check_untyped_defs = False 8 | -------------------------------------------------------------------------------- /pull_request_template.md: -------------------------------------------------------------------------------- 1 | 17 | 18 | ## Description 19 | 20 | 23 | 24 | ## Added tests? 25 | 26 | - [ ] 👍 yes 27 | - [ ] 🙅 no, because they aren't needed 28 | 29 | ## Added to documentation? 30 | 31 | - [ ] 👍 README.md 32 | - [ ] 👍 CHANGELOG.md 33 | - [ ] 👍 Additional documentation in /docs 34 | - [ ] 👍 Relevant code documentation 35 | - [ ] 🙅 no, because they aren’t needed 36 | -------------------------------------------------------------------------------- /release.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import os 4 | 5 | 6 | class Release: 7 | """ 8 | Class responsible to serve the library in pypi, not part of rocket executable 9 | """ 10 | 11 | """Module responsible for building db-rocket itself and publishing it to pypi""" 12 | 13 | def release(self): 14 | """ 15 | Build rocket for pypi doing all steps. Run it on the root of rocket project. 16 | """ 17 | os.system("rm -rf dist/* || true") 18 | os.system("python3 -m build --no-isolation") 19 | self.build() 20 | self.upload() 21 | 22 | def build(self): 23 | """ 24 | Build rocket for pypi. Run it on the root of rocket project. 25 | """ 26 | os.system("rm -rf dist/* || true") 27 | os.system("python3 -m build --no-isolation") 28 | print("Build successfull, uploading now") 29 | 30 | def upload(self): 31 | """ 32 | Upload new package to pipy 33 | :return: 34 | """ 35 | os.system("python3 -m twine upload dist/*") 36 | 37 | 38 | if __name__ == "__main__": 39 | import fire 40 | 41 | fire.Fire(Release) 42 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | fire~=0.4.0 2 | watchdog~=2.1.9 3 | databricks-cli~=0.17.0 4 | argh 5 | build~=0.8.0 6 | pyyaml 7 | pytest 8 | poetry 9 | mypy 10 | SecretStorage 11 | readme-renderer 12 | twine 13 | databricks-sdk==0.33.0 14 | wheel 15 | -------------------------------------------------------------------------------- /rocket/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /rocket/file_watcher.py: -------------------------------------------------------------------------------- 1 | import glob 2 | import os 3 | import time 4 | 5 | from typing import List 6 | from watchdog.events import FileSystemEventHandler 7 | from watchdog.observers import Observer 8 | 9 | from rocket.utils import gather_glob_paths 10 | 11 | 12 | class FileWatcher: 13 | class _Handler(FileSystemEventHandler): 14 | def __init__(self, watcher_instance): 15 | self.watcher_instance = watcher_instance 16 | 17 | def on_modified(self, event): 18 | _current_glob_files = gather_glob_paths(self.watcher_instance.glob_paths) 19 | if event.src_path in _current_glob_files: 20 | self.watcher_instance.modified_files.add(event.src_path) 21 | elif event.is_directory: 22 | return 23 | elif os.path.splitext(event.src_path)[1] == ".py": 24 | self.watcher_instance.modified_files.add(event.src_path) 25 | 26 | def __init__(self, path_to_watch, callback, recursive=True, glob_paths: List[str] = None): 27 | self.path_to_watch = path_to_watch 28 | self.callback = callback 29 | self.recursive = recursive 30 | self.observer = Observer() 31 | self.modified_files = set() 32 | self.glob_paths = glob_paths 33 | if self.glob_paths is None: 34 | self.glob_paths = [] 35 | self.handler = self._Handler(self) 36 | 37 | def start(self): 38 | self.observer.schedule( 39 | self.handler, self.path_to_watch, recursive=self.recursive 40 | ) 41 | self.observer.start() 42 | try: 43 | while True: 44 | time.sleep(1) 45 | if self.modified_files: 46 | self.callback(list(self.modified_files)) 47 | self.modified_files.clear() 48 | except KeyboardInterrupt: 49 | self.observer.stop() 50 | self.observer.join() 51 | 52 | def stop(self): 53 | self.observer.stop() 54 | -------------------------------------------------------------------------------- /rocket/logger.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import sys 3 | 4 | 5 | def configure_logger() -> logging.Logger: 6 | logger = logging.getLogger("dbrocket") 7 | logger.addHandler(logging.StreamHandler(sys.stdout)) 8 | logger.setLevel(logging.INFO) 9 | return logger 10 | 11 | 12 | logger = configure_logger() 13 | -------------------------------------------------------------------------------- /rocket/rocket.py: -------------------------------------------------------------------------------- 1 | import os 2 | from typing import Optional, List, Union 3 | 4 | import fire 5 | 6 | from databricks.sdk import WorkspaceClient 7 | from rocket.file_watcher import FileWatcher 8 | from rocket.logger import logger 9 | from rocket.utils import ( 10 | execute_shell_command, 11 | extract_python_package_dirs, 12 | extract_python_files_from_folder, 13 | execute_for_each_multithreaded, 14 | gather_glob_paths, 15 | ) 16 | 17 | 18 | class Rocket: 19 | """Entry point of the installed program, all public methods are options of the program""" 20 | 21 | # in seconds 22 | _interval_repeat_watch: int = 2 23 | _python_executable: str = "python3" 24 | _rocket_executable: str = "rocket" 25 | 26 | def setup(self): 27 | """ 28 | Initialize the application. 29 | """ 30 | if os.path.exists("setup.py") or os.path.exists(f"pyproject.toml"): 31 | logger.info("Packaging file already exists so no need to create a new one") 32 | return 33 | 34 | content = """ 35 | import setuptools 36 | 37 | setuptools.setup( 38 | name="myproject", 39 | version="0.0.1", 40 | author="", 41 | author_email="", 42 | description="", 43 | url="https://github.com/getyourguide/databricks-rocket", 44 | packages=setuptools.find_packages(), 45 | ) 46 | """ 47 | 48 | with open("setup.py", "a") as myfile: 49 | myfile.write(content) 50 | logger.info("Setup.py file created, feel free to modify it with your needs.") 51 | 52 | def launch( 53 | self, 54 | project_location: str = ".", 55 | dbfs_path: Optional[str] = None, 56 | watch: bool = True, 57 | glob_path: Optional[Union[str, List[str]]] = None, 58 | use_volumes: Optional[bool] = False, 59 | dst_path: Optional[str] = None, 60 | ) -> None: 61 | """ 62 | Entrypoint of the application, triggers a build and deploy 63 | :param project_location: path to project code, default: `"."` 64 | :param dbfs_path: path where the wheel will be stored, ex: dbfs:/tmp/myteam/myproject. Only support dbfs path. 65 | :param watch: Set to false if you don't want to automatically sync your files 66 | :param glob_path: glob string or list of strings for additional files to deploy, e.g. "*.json" 67 | :param use_volumes: upload files to unity catalog volumes. 68 | :param dst_path: Destination path to store the files. Support both dbfs:/ and /Volumes. Ideally, we should use dst_path and deprecate dbfs_path. 69 | :return: 70 | """ 71 | 72 | home = os.environ['HOME'] 73 | if os.getenv("DATABRICKS_TOKEN"): 74 | print("Note: DATABRICKS_TOKEN is set, it could override the token in ~/.databrickscfg and cause errors.") 75 | 76 | base_dbfs_access_error_message = ("Is your databricks token is set and valid? " 77 | "Try to generate a new token and update existing one with " 78 | "`databricks configure --token`.") 79 | if use_volumes: 80 | try: 81 | workspace_client = WorkspaceClient() 82 | workspace_client.dbutils.fs.ls("dbfs:/") 83 | except Exception as e: 84 | raise Exception( 85 | f"Could not access dbfs using databricks SDK. {base_dbfs_access_error_message} Error details: {e}" 86 | ) 87 | db_path = self.get_volumes_path(dst_path) 88 | else: 89 | try: 90 | execute_shell_command(f"databricks fs ls dbfs:/") 91 | except Exception as e: 92 | raise Exception( 93 | f"Error accessing DBFS via databricks-cli. {base_dbfs_access_error_message} Error details: {e}" 94 | ) 95 | path_to_use = dst_path if dst_path else dbfs_path 96 | db_path = self.get_dbfs_path(path_to_use) 97 | 98 | if watch: 99 | project_name = os.path.abspath(project_location).split("/")[-1] 100 | db_path = f"{db_path}/{project_name}" 101 | 102 | glob_paths = [] 103 | if isinstance(glob_path, str): 104 | glob_paths = [os.path.join(project_location, glob_path)] 105 | elif isinstance(glob_path, list): 106 | glob_paths = [os.path.join(project_location, path) for path in glob_path] 107 | 108 | self._build_and_deploy(watch=watch, project_location=project_location, db_path=db_path, glob_paths=glob_paths) 109 | if watch: 110 | watcher = FileWatcher( 111 | project_location, 112 | lambda x: self._build_and_deploy( 113 | watch=watch, 114 | modified_files=watcher.modified_files, 115 | db_path=db_path, 116 | project_location=project_location, 117 | glob_paths=glob_path 118 | ), 119 | glob_paths=glob_paths, 120 | ) 121 | watcher.start() 122 | 123 | def _build_and_deploy( 124 | self, 125 | watch: bool, 126 | project_location: str, 127 | db_path: str, 128 | modified_files: Optional[List[str]] = None, 129 | glob_paths: Optional[List[str]] = None 130 | ) -> None: 131 | if modified_files: 132 | logger.info(f"Found changes in {modified_files}. Overwriting them.") 133 | self._deploy( 134 | file_paths=modified_files, 135 | db_path=db_path, 136 | project_location=project_location, 137 | ) 138 | return 139 | 140 | if not watch: 141 | logger.info( 142 | "Watch is disabled. Building creating a python wheel from your project" 143 | ) 144 | wheel_path, wheel_file = self._create_python_project_wheel(project_location) 145 | self._deploy( 146 | file_paths=[wheel_path], 147 | db_path=db_path, 148 | project_location=os.path.dirname(wheel_path), 149 | ) 150 | install_path = f"{self.get_install_path(db_path)}/{wheel_file}" 151 | 152 | dependency_files = ["requirements.in", "requirements.txt"] 153 | index_urls = [] 154 | for dependency_file in dependency_files: 155 | dependency_file_path = f"{project_location}/{dependency_file}" 156 | if os.path.exists(dependency_file_path): 157 | with open(dependency_file_path) as f: 158 | index_urls = [ 159 | line.strip() 160 | for line in f.readlines() 161 | if "index-url" in line 162 | ] 163 | index_urls_options = " ".join(index_urls) 164 | logger.info(f"""Uploaded wheel to databricks. Install your library in your databricks notebook by running: 165 | %pip install --upgrade pip 166 | %pip install {index_urls_options} {install_path} --force-reinstall""") 167 | return 168 | 169 | package_dirs = extract_python_package_dirs(project_location) 170 | files = set() 171 | for package_dir in package_dirs: 172 | files.update(extract_python_files_from_folder(package_dir)) 173 | 174 | if glob_paths is not None: 175 | files.update(gather_glob_paths(glob_paths)) 176 | 177 | project_files = ["setup.py", "pyproject.toml", "README.md"] 178 | for project_file in project_files: 179 | if os.path.exists(f"{project_location}/{project_file}"): 180 | files.add(f"{project_location}/{project_file}") 181 | 182 | if os.path.exists(f"{project_location}/pyproject.toml"): 183 | try: 184 | execute_shell_command( 185 | "poetry export -f requirements.txt --with-credentials --without-hashes --output requirements.txt" 186 | ) 187 | except Exception as e: 188 | logger.warn(f"Failed to create requirements.txt with poetry: {e}") 189 | 190 | dependency_file_exist = False 191 | dependency_files = ["requirements.in", "requirements.txt"] 192 | uploaded_dependency_file = "" 193 | index_urls = [] 194 | for dependency_file in dependency_files: 195 | dependency_file_path = f"{project_location}/{dependency_file}" 196 | if os.path.exists(dependency_file_path): 197 | files.add(dependency_file_path) 198 | uploaded_dependency_file = dependency_file 199 | dependency_file_exist = True 200 | with open(dependency_file_path) as f: 201 | index_urls = [ 202 | line.strip() for line in f.readlines() if "index-url" in line 203 | ] 204 | self._deploy( 205 | file_paths=list(files), db_path=db_path, project_location=project_location 206 | ) 207 | 208 | install_path = self.get_install_path(db_path) 209 | index_urls_options = " ".join(index_urls) 210 | extra_watch_command = "" 211 | if not self.is_dbfs(db_path): 212 | # The install path is supposed to get added to sys.path, but this doesn't work when using volumes with 213 | # tropic 3.5 (running databricks 15.4)...so, add it to sys.path manually 214 | extra_watch_command = f"import sys; sys.path.append('{install_path}')" 215 | 216 | if dependency_file_exist: 217 | logger.info( 218 | f"""Watch activated. Uploaded your project to databricks. Install your project in your databricks notebook by running: 219 | %sh 220 | pip install uv 221 | uv pip install {index_urls_options} -r {install_path}/{uploaded_dependency_file} 222 | uv pip install --no-deps -e {install_path} 223 | 224 | and in a new Python cell: 225 | dbutils.library.restartPython() 226 | 227 | and and in one more Python cell: 228 | %load_ext autoreload 229 | %autoreload 2 230 | {extra_watch_command}""" 231 | ) 232 | else: 233 | logger.info( 234 | f"""Watch activated. Uploaded your project to databricks. Install your project in your databricks notebook by running: 235 | %sh 236 | pip install uv 237 | uv pip install -e {install_path} 238 | 239 | and following in a new Python cell: 240 | %load_ext autoreload 241 | %autoreload 2""" 242 | ) 243 | 244 | def _deploy( 245 | self, 246 | file_paths: List[str], 247 | db_path: str, 248 | project_location: str 249 | ) -> None: 250 | if self.is_dbfs(db_path): 251 | self._deploy_dbfs(file_paths, db_path, project_location) 252 | else: 253 | w = WorkspaceClient() 254 | self._deploy_volumes(file_paths, db_path, project_location, w) 255 | 256 | def _deploy_dbfs( 257 | self, 258 | file_paths: List[str], 259 | db_path: str, 260 | project_location: str 261 | ): 262 | def helper(file: str) -> None: 263 | target_path = f"{db_path}/{os.path.relpath(file, project_location)}" 264 | target_folder = os.path.dirname(target_path) 265 | execute_shell_command(f"databricks fs mkdirs {target_folder}") 266 | execute_shell_command(f"databricks fs cp --recursive --overwrite {file} {target_path}") 267 | logger.info(f"Uploaded {file} to {target_path}") 268 | 269 | execute_for_each_multithreaded(file_paths, lambda x: helper(x)) 270 | 271 | def _deploy_volumes( 272 | self, 273 | file_paths: List[str], 274 | db_path: str, 275 | project_location: str, 276 | workspace_client 277 | ): 278 | def helper(wc, file: str) -> None: 279 | # sdk asks an absolute path 280 | if not os.path.isabs(file): 281 | cwd = os.getcwd() 282 | file = f"{cwd}/{file}" 283 | target_path = f"{db_path}/{os.path.relpath(file, project_location)}" 284 | # if the file already exists, sdk returns error message: The file being created already exists. 285 | # a feature request is already here: https://github.com/databricks/databricks-sdk-py/issues/548 286 | try: 287 | wc.dbutils.fs.rm(target_path) 288 | except Exception: 289 | pass 290 | # sdk uses urllibs3 to parse paths. 291 | # It need to be file:// to be recognized as a local file. Otherwise it raises file not exist error 292 | wc.dbutils.fs.cp(f"file://{file}", target_path) 293 | logger.info(f"Uploaded {file} to {target_path}") 294 | 295 | execute_for_each_multithreaded(file_paths, lambda x: helper(workspace_client, x)) 296 | 297 | def _create_python_project_wheel(self, project_location: str) -> (str, str): 298 | dist_location = f"{project_location}/dist" 299 | execute_shell_command(f"rm {dist_location}/* 2>/dev/null || true") 300 | 301 | if os.path.exists(f"{project_location}/setup.py"): 302 | logger.info("Found setup.py. Building python library") 303 | execute_shell_command( 304 | f"cd {project_location} ; {self._python_executable} -m build --outdir {dist_location} 2>/dev/null" 305 | ) 306 | elif os.path.exists(f"{project_location}/pyproject.toml"): 307 | logger.info("Found pyproject.toml. Building python library with poetry") 308 | execute_shell_command( 309 | f"cd {project_location} ; poetry build --format wheel" 310 | ) 311 | else: 312 | raise Exception( 313 | "To be turned into a library your project has to contain a setup.py or pyproject.toml file" 314 | ) 315 | 316 | wheel_file = execute_shell_command( 317 | f"cd {dist_location}; ls *.whl 2>/dev/null | head -n 1" 318 | ).replace("\n", "") 319 | wheel_path = f"{dist_location}/{wheel_file}" 320 | return wheel_path, wheel_file 321 | 322 | def get_dbfs_path(self, path: Optional[str]) -> str: 323 | if path: 324 | logger.warning("The `dbfs_path` parameter is planned for deprecation. Please use the `dst_path` parameter instead.") 325 | if not self.is_dbfs(path): 326 | raise Exception("`dbfs_path` must start with dbfs:/") 327 | return path or f"dbfs:/temp/{os.environ['USER']}" 328 | 329 | def get_volumes_path(self, path: Optional[str]) -> str: 330 | if path and not path.startswith("/Volumes"): 331 | raise Exception("`use_volumes` is true. `dst_path` must start with /Volumes") 332 | return path or f"/Volumes/main/data_products/volume/db_rocket/{os.environ['USER']}" 333 | 334 | def get_install_path(self, db_path): 335 | if self.is_dbfs(db_path): 336 | return f'{db_path.replace("dbfs:/", "/dbfs/")}' 337 | return db_path 338 | 339 | def is_dbfs(self, db_path: str): 340 | return db_path.startswith("dbfs:/") 341 | 342 | 343 | def main(): 344 | fire.Fire(Rocket) 345 | -------------------------------------------------------------------------------- /rocket/utils.py: -------------------------------------------------------------------------------- 1 | import concurrent.futures 2 | import glob 3 | import os 4 | import subprocess 5 | 6 | from typing import List, Set 7 | from rocket.logger import logger 8 | 9 | 10 | def execute_for_each_multithreaded(lst, func, max_threads=None): 11 | """ 12 | Execute a given function for each entry in the list using multiple threads. 13 | 14 | Parameters: 15 | - lst: List of items to process 16 | - func: Function to apply to each item 17 | - max_threads: Maximum number of threads to use (default is None, which means as many as items in the list) 18 | 19 | Returns: 20 | - List of results after applying the function 21 | """ 22 | with concurrent.futures.ThreadPoolExecutor(max_workers=max_threads) as executor: 23 | return list(executor.map(func, lst)) 24 | 25 | 26 | def extract_package_name_from_wheel(wheel_filename): 27 | # Split the filename on '-' and take the first part 28 | return wheel_filename.split("-")[0] 29 | 30 | 31 | def extract_project_name_from_wheel(wheel_filename): 32 | return extract_package_name_from_wheel(wheel_filename).replace("_", "-") 33 | 34 | 35 | def extract_python_package_dirs(root_dir): 36 | packages = [] 37 | for item in os.listdir(root_dir): 38 | item_path = os.path.join(root_dir, item) 39 | if os.path.isdir(item_path) and "__init__.py" in os.listdir(item_path): 40 | packages.append(item_path) 41 | return packages 42 | 43 | 44 | def execute_shell_command(cmd) -> str: 45 | logger.debug(f"Running shell command: {cmd} ") 46 | return subprocess.check_output(cmd, shell=True).decode("utf-8") 47 | 48 | 49 | def extract_python_files_from_folder(path): 50 | py_files = [] 51 | 52 | for root, dirs, files in os.walk(path): 53 | for file in files: 54 | if file.endswith(".py"): 55 | py_files.append(os.path.join(root, file)) 56 | 57 | return py_files 58 | 59 | 60 | def gather_glob_paths(glob_paths: List[str]) -> Set[str]: 61 | _unique_paths = set() 62 | for glob_path in glob_paths: 63 | _unique_paths.update(glob.glob(glob_path)) 64 | return _unique_paths 65 | -------------------------------------------------------------------------------- /rocket_local.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | from rocket.rocket import main 4 | 5 | if __name__ == "__main__": 6 | main() 7 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import setuptools 2 | 3 | # load the README file and use it as the long_description for PyPI 4 | try: 5 | with open("README.md", encoding="utf8") as f: 6 | readme = f.read() 7 | except Exception as e: 8 | readme = "" 9 | 10 | setuptools.setup( 11 | name="databricks-rocket", 12 | version="3.1.0", 13 | author="GetYourGuide", 14 | author_email="engineering.data-products@getyourguide.com", 15 | description="Keep your local python scripts installed and in sync with a databricks notebook. Shortens the feedback loop to develop projects using a hybrid enviroment", 16 | long_description=readme, 17 | long_description_content_type="text/markdown", 18 | url="https://github.com/getyourguide/db-rocket", 19 | packages=setuptools.find_packages(), 20 | install_requires=["fire", "watchdog~=2.1.9", "build", "databricks_cli", "databricks-sdk"], 21 | entry_points={ 22 | "console_scripts": ["rocket=rocket.rocket:main", "dbrocket=rocket.rocket:main"] 23 | }, 24 | license="Apache 2.0", 25 | ) 26 | -------------------------------------------------------------------------------- /squared.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/getyourguide/db-rocket/643d38f762feef8c1356af7c3e9511117327a3f9/squared.png -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/getyourguide/db-rocket/643d38f762feef8c1356af7c3e9511117327a3f9/tests/__init__.py -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import pytest as pytest 4 | 5 | from rocket.rocket import Rocket 6 | 7 | 8 | @pytest.fixture() 9 | def python_project_path() -> str: 10 | test_dir = os.path.dirname(os.path.realpath(__file__)) 11 | project_path = os.path.join(test_dir, "resources", "python-test") 12 | return project_path 13 | 14 | 15 | @pytest.fixture() 16 | def poetry_project_path() -> str: 17 | test_dir = os.path.dirname(os.path.realpath(__file__)) 18 | project_path = os.path.join(test_dir, "resources", "poetry-test") 19 | return project_path 20 | 21 | 22 | @pytest.fixture() 23 | def rocket() -> Rocket: 24 | rocket = Rocket() 25 | return rocket 26 | -------------------------------------------------------------------------------- /tests/resources/poetry-test/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/getyourguide/db-rocket/643d38f762feef8c1356af7c3e9511117327a3f9/tests/resources/poetry-test/README.md -------------------------------------------------------------------------------- /tests/resources/poetry-test/poetry.lock: -------------------------------------------------------------------------------- 1 | [[package]] 2 | name = "atomicwrites" 3 | version = "1.4.1" 4 | description = "Atomic file writes." 5 | category = "dev" 6 | optional = false 7 | python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" 8 | 9 | [[package]] 10 | name = "attrs" 11 | version = "22.1.0" 12 | description = "Classes Without Boilerplate" 13 | category = "dev" 14 | optional = false 15 | python-versions = ">=3.5" 16 | 17 | [package.extras] 18 | dev = ["cloudpickle", "coverage[toml] (>=5.0.2)", "furo", "hypothesis", "mypy (>=0.900,!=0.940)", "pre-commit", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "sphinx", "sphinx-notfound-page", "zope.interface"] 19 | docs = ["furo", "sphinx", "sphinx-notfound-page", "zope.interface"] 20 | tests = ["cloudpickle", "coverage[toml] (>=5.0.2)", "hypothesis", "mypy (>=0.900,!=0.940)", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "zope.interface"] 21 | tests_no_zope = ["cloudpickle", "coverage[toml] (>=5.0.2)", "hypothesis", "mypy (>=0.900,!=0.940)", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins"] 22 | 23 | [[package]] 24 | name = "colorama" 25 | version = "0.4.5" 26 | description = "Cross-platform colored terminal text." 27 | category = "dev" 28 | optional = false 29 | python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" 30 | 31 | [[package]] 32 | name = "more-itertools" 33 | version = "8.14.0" 34 | description = "More routines for operating on iterables, beyond itertools" 35 | category = "dev" 36 | optional = false 37 | python-versions = ">=3.5" 38 | 39 | [[package]] 40 | name = "packaging" 41 | version = "21.3" 42 | description = "Core utilities for Python packages" 43 | category = "dev" 44 | optional = false 45 | python-versions = ">=3.6" 46 | 47 | [package.dependencies] 48 | pyparsing = ">=2.0.2,<3.0.5 || >3.0.5" 49 | 50 | [[package]] 51 | name = "pluggy" 52 | version = "0.13.1" 53 | description = "plugin and hook calling mechanisms for python" 54 | category = "dev" 55 | optional = false 56 | python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" 57 | 58 | [package.extras] 59 | dev = ["pre-commit", "tox"] 60 | 61 | [[package]] 62 | name = "py" 63 | version = "1.11.0" 64 | description = "library with cross-python path, ini-parsing, io, code, log facilities" 65 | category = "dev" 66 | optional = false 67 | python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" 68 | 69 | [[package]] 70 | name = "pyparsing" 71 | version = "3.0.9" 72 | description = "pyparsing module - Classes and methods to define and execute parsing grammars" 73 | category = "dev" 74 | optional = false 75 | python-versions = ">=3.6.8" 76 | 77 | [package.extras] 78 | diagrams = ["jinja2", "railroad-diagrams"] 79 | 80 | [[package]] 81 | name = "pytest" 82 | version = "5.4.3" 83 | description = "pytest: simple powerful testing with Python" 84 | category = "dev" 85 | optional = false 86 | python-versions = ">=3.5" 87 | 88 | [package.dependencies] 89 | atomicwrites = {version = ">=1.0", markers = "sys_platform == \"win32\""} 90 | attrs = ">=17.4.0" 91 | colorama = {version = "*", markers = "sys_platform == \"win32\""} 92 | more-itertools = ">=4.0.0" 93 | packaging = "*" 94 | pluggy = ">=0.12,<1.0" 95 | py = ">=1.5.0" 96 | wcwidth = "*" 97 | 98 | [package.extras] 99 | checkqa-mypy = ["mypy (==v0.761)"] 100 | testing = ["argcomplete", "hypothesis (>=3.56)", "mock", "nose", "requests", "xmlschema"] 101 | 102 | [[package]] 103 | name = "wcwidth" 104 | version = "0.2.5" 105 | description = "Measures the displayed width of unicode strings in a terminal" 106 | category = "dev" 107 | optional = false 108 | python-versions = "*" 109 | 110 | [metadata] 111 | lock-version = "1.1" 112 | python-versions = "^3.8" 113 | content-hash = "c27944f25b55067b06883f1cea204be7d97841a4b8228fab69b91895347494ad" 114 | 115 | [metadata.files] 116 | atomicwrites = [ 117 | {file = "atomicwrites-1.4.1.tar.gz", hash = "sha256:81b2c9071a49367a7f770170e5eec8cb66567cfbbc8c73d20ce5ca4a8d71cf11"}, 118 | ] 119 | attrs = [ 120 | {file = "attrs-22.1.0-py2.py3-none-any.whl", hash = "sha256:86efa402f67bf2df34f51a335487cf46b1ec130d02b8d39fd248abfd30da551c"}, 121 | {file = "attrs-22.1.0.tar.gz", hash = "sha256:29adc2665447e5191d0e7c568fde78b21f9672d344281d0c6e1ab085429b22b6"}, 122 | ] 123 | colorama = [ 124 | {file = "colorama-0.4.5-py2.py3-none-any.whl", hash = "sha256:854bf444933e37f5824ae7bfc1e98d5bce2ebe4160d46b5edf346a89358e99da"}, 125 | {file = "colorama-0.4.5.tar.gz", hash = "sha256:e6c6b4334fc50988a639d9b98aa429a0b57da6e17b9a44f0451f930b6967b7a4"}, 126 | ] 127 | more-itertools = [ 128 | {file = "more-itertools-8.14.0.tar.gz", hash = "sha256:c09443cd3d5438b8dafccd867a6bc1cb0894389e90cb53d227456b0b0bccb750"}, 129 | {file = "more_itertools-8.14.0-py3-none-any.whl", hash = "sha256:1bc4f91ee5b1b31ac7ceacc17c09befe6a40a503907baf9c839c229b5095cfd2"}, 130 | ] 131 | packaging = [ 132 | {file = "packaging-21.3-py3-none-any.whl", hash = "sha256:ef103e05f519cdc783ae24ea4e2e0f508a9c99b2d4969652eed6a2e1ea5bd522"}, 133 | {file = "packaging-21.3.tar.gz", hash = "sha256:dd47c42927d89ab911e606518907cc2d3a1f38bbd026385970643f9c5b8ecfeb"}, 134 | ] 135 | pluggy = [ 136 | {file = "pluggy-0.13.1-py2.py3-none-any.whl", hash = "sha256:966c145cd83c96502c3c3868f50408687b38434af77734af1e9ca461a4081d2d"}, 137 | {file = "pluggy-0.13.1.tar.gz", hash = "sha256:15b2acde666561e1298d71b523007ed7364de07029219b604cf808bfa1c765b0"}, 138 | ] 139 | py = [ 140 | {file = "py-1.11.0-py2.py3-none-any.whl", hash = "sha256:607c53218732647dff4acdfcd50cb62615cedf612e72d1724fb1a0cc6405b378"}, 141 | {file = "py-1.11.0.tar.gz", hash = "sha256:51c75c4126074b472f746a24399ad32f6053d1b34b68d2fa41e558e6f4a98719"}, 142 | ] 143 | pyparsing = [ 144 | {file = "pyparsing-3.0.9-py3-none-any.whl", hash = "sha256:5026bae9a10eeaefb61dab2f09052b9f4307d44aee4eda64b309723d8d206bbc"}, 145 | {file = "pyparsing-3.0.9.tar.gz", hash = "sha256:2b020ecf7d21b687f219b71ecad3631f644a47f01403fa1d1036b0c6416d70fb"}, 146 | ] 147 | pytest = [ 148 | {file = "pytest-5.4.3-py3-none-any.whl", hash = "sha256:5c0db86b698e8f170ba4582a492248919255fcd4c79b1ee64ace34301fb589a1"}, 149 | {file = "pytest-5.4.3.tar.gz", hash = "sha256:7979331bfcba207414f5e1263b5a0f8f521d0f457318836a7355531ed1a4c7d8"}, 150 | ] 151 | wcwidth = [ 152 | {file = "wcwidth-0.2.5-py2.py3-none-any.whl", hash = "sha256:beb4802a9cebb9144e99086eff703a642a13d6a0052920003a230f3294bbe784"}, 153 | {file = "wcwidth-0.2.5.tar.gz", hash = "sha256:c4d647b99872929fdb7bdcaa4fbe7f01413ed3d98077df798530e5b04f116c83"}, 154 | ] 155 | -------------------------------------------------------------------------------- /tests/resources/poetry-test/poetry_test/__init__.py: -------------------------------------------------------------------------------- 1 | __version__ = "0.1.0" 2 | -------------------------------------------------------------------------------- /tests/resources/poetry-test/pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.poetry] 2 | name = "poetry-test" 3 | version = "0.1.0" 4 | description = "" 5 | authors = ["Steven Mi "] 6 | 7 | [tool.poetry.dependencies] 8 | python = "^3.8" 9 | 10 | [tool.poetry.dev-dependencies] 11 | pytest = "^5.2" 12 | 13 | [build-system] 14 | requires = ["poetry-core>=1.0.0"] 15 | build-backend = "poetry.core.masonry.api" -------------------------------------------------------------------------------- /tests/resources/python-test/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/getyourguide/db-rocket/643d38f762feef8c1356af7c3e9511117327a3f9/tests/resources/python-test/README.md -------------------------------------------------------------------------------- /tests/resources/python-test/python_test/__init__.py: -------------------------------------------------------------------------------- 1 | __version__ = "0.1.0" 2 | -------------------------------------------------------------------------------- /tests/resources/python-test/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import find_packages, setup 2 | 3 | setup( 4 | name="foo", 5 | version="1.0", 6 | packages=find_packages(), 7 | ) 8 | -------------------------------------------------------------------------------- /tests/test_rocket.py: -------------------------------------------------------------------------------- 1 | from rocket.rocket import Rocket 2 | 3 | 4 | def test_create_python_wheel_from_python_project_successful(rocket: Rocket, python_project_path: str): 5 | """ 6 | Test if DB Rocket can build a python project 7 | """ 8 | wheel_path, wheel_file = rocket._create_python_project_wheel(python_project_path) 9 | assert wheel_file 10 | assert wheel_path 11 | 12 | 13 | def test_create_python_wheel_from_poetry_project_successful(rocket: Rocket, poetry_project_path: str): 14 | """ 15 | Test if DB Rocket can build a poetry project 16 | """ 17 | wheel_path, wheel_file = rocket._create_python_project_wheel(poetry_project_path) 18 | assert wheel_file 19 | assert wheel_path 20 | 21 | 22 | def test_create_python_wheel_from_temp_folder_raises_exception(rocket: Rocket): 23 | """ 24 | Test if DB Rocket will raise an error if project is not a supported project 25 | """ 26 | try: 27 | wheel_path, wheel_file = rocket._create_python_project_wheel("/tmp") 28 | except: 29 | assert True 30 | 31 | --------------------------------------------------------------------------------