├── .github
├── dependabot.yml
├── main.yml
└── workflows
│ ├── publish_pypi.yml
│ └── test.yml
├── .gitignore
├── .pre-commit-config.yaml
├── CHANGELOG.md
├── CODEOWNERS
├── LICENSE.txt
├── README.md
├── docs
├── conda.md
├── development.md
└── installation.md
├── imgs
├── img.png
├── img_1.png
└── img_2.png
├── logos
└── squared.png
├── mypy.ini
├── pull_request_template.md
├── release.py
├── requirements.txt
├── rocket
├── __init__.py
├── file_watcher.py
├── logger.py
├── rocket.py
└── utils.py
├── rocket_local.py
├── setup.py
├── squared.png
└── tests
├── __init__.py
├── conftest.py
├── resources
├── poetry-test
│ ├── README.md
│ ├── poetry.lock
│ ├── poetry_test
│ │ └── __init__.py
│ └── pyproject.toml
└── python-test
│ ├── README.md
│ ├── python_test
│ └── __init__.py
│ └── setup.py
└── test_rocket.py
/.github/dependabot.yml:
--------------------------------------------------------------------------------
1 | version: 2
2 | updates:
3 | - package-ecosystem: "pip"
4 | directory: "/"
5 | schedule:
6 | interval: "weekly"
7 |
--------------------------------------------------------------------------------
/.github/main.yml:
--------------------------------------------------------------------------------
1 | name: CI
2 |
3 | on: [push]
4 |
5 | jobs:
6 | test:
7 | runs-on: ubuntu-18.04
8 | strategy:
9 | matrix:
10 | python-version: [3.9, 3.8, 3.7]
11 | steps:
12 | - uses: actions/checkout@ee0669bd1cc54295c223e0bb666b733df41de1c5 # v2.7.0
13 | - name: Set up Python
14 | uses: actions/setup-python@e9aba2c848f5ebd159c070c61ea2c4e2b122355e # v2.3.4
15 | with:
16 | python-version: ${{ matrix.python-version }}
17 | - name: Install pip dependencies
18 | run: pip install -r requirements-test.txt
19 | - name: Run tests
20 | run: pytest tests/ -v
21 |
22 | - name: type check
23 | run: mypy .
24 |
--------------------------------------------------------------------------------
/.github/workflows/publish_pypi.yml:
--------------------------------------------------------------------------------
1 | name: Publish python poetry package
2 | on:
3 | # Triggers the workflow on push or pull request events but only for the "main" branch
4 | push:
5 | branches: ["main"]
6 |
7 | concurrency:
8 | group: ${{ github.workflow }}${{ github.ref_name != github.event.repository.default_branch && github.ref || github.run_id }}
9 | cancel-in-progress: ${{ github.ref_name != github.event.repository.default_branch }}
10 |
11 | jobs:
12 | build:
13 | runs-on: ubuntu-latest
14 | steps:
15 | - uses: actions/checkout@v3
16 | - name: Setup Python
17 | uses: actions/setup-python@v5.3.0
18 | with:
19 | python-version: 3.10.*
20 | - name: Build and publish to pypi
21 | shell: bash
22 | env:
23 | PYPI_DB_ROCKET: ${{ secrets.PYPI_DB_ROCKET }}
24 | run: |
25 | pip install -r requirements.txt
26 | python3 -m build --no-isolation
27 | echo "Build successfull, uploading now..."
28 | python3 -m twine upload dist/* -u "__token__" -p "$PYPI_DB_ROCKET" --skip-existing
29 | echo "Upload successfull!"
30 |
--------------------------------------------------------------------------------
/.github/workflows/test.yml:
--------------------------------------------------------------------------------
1 | name: Tests
2 |
3 | on:
4 | push:
5 | branches: ["main", "master"]
6 | pull_request:
7 | branches: ["main", "master"]
8 | workflow_dispatch:
9 |
10 | concurrency:
11 | group: ${{ github.workflow }}${{ github.ref_name != github.event.repository.default_branch && github.ref || github.run_id }}
12 | cancel-in-progress: ${{ github.ref_name != github.event.repository.default_branch }}
13 |
14 | jobs:
15 | test:
16 | runs-on: ubuntu-latest
17 | steps:
18 | - uses: actions/checkout@v3
19 | - name: Setup Python
20 | uses: actions/setup-python@v5.3.0
21 | with:
22 | python-version: 3.10.*
23 | - name: Install dependencies
24 | run: |
25 | pip install --upgrade pip
26 | pip install -r requirements.txt
27 | - name: Run a multi-line script
28 | run: |
29 | pytest
30 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | wheels/
23 | pip-wheel-metadata/
24 | share/python-wheels/
25 | *.egg-info/
26 | .installed.cfg
27 | *.egg
28 | MANIFEST
29 |
30 | # PyInstaller
31 | # Usually these files are written by a python script from a template
32 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
33 | *.manifest
34 | *.spec
35 |
36 | # Installer logs
37 | pip-log.txt
38 | pip-delete-this-directory.txt
39 |
40 | # Unit test / coverage reports
41 | htmlcov/
42 | .tox/
43 | .nox/
44 | .coverage
45 | .coverage.*
46 | .cache
47 | nosetests.xml
48 | coverage.xml
49 | *.cover
50 | *.py,cover
51 | .hypothesis/
52 | .pytest_cache/
53 |
54 | # Translations
55 | *.mo
56 | *.pot
57 |
58 | # Django stuff:
59 | *.log
60 | local_settings.py
61 | db.sqlite3
62 | db.sqlite3-journal
63 |
64 | # Flask stuff:
65 | instance/
66 | .webassets-cache
67 |
68 | # Scrapy stuff:
69 | .scrapy
70 |
71 | # Sphinx documentation
72 | docs/_build/
73 |
74 | # PyBuilder
75 | target/
76 |
77 | # Jupyter Notebook
78 | .ipynb_checkpoints
79 |
80 | # IPython
81 | profile_default/
82 | ipython_config.py
83 |
84 | # pyenv
85 | .python-version
86 |
87 | # pipenv
88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
91 | # install all needed dependencies.
92 | #Pipfile.lock
93 |
94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
95 | __pypackages__/
96 |
97 | # Celery stuff
98 | celerybeat-schedule
99 | celerybeat.pid
100 |
101 | # SageMath parsed files
102 | *.sage.py
103 |
104 | # Environments
105 | .env
106 | .venv
107 | env/
108 | venv/
109 | ENV/
110 | env.bak/
111 | venv.bak/
112 |
113 | # Spyder project settings
114 | .spyderproject
115 | .spyproject
116 |
117 | # Rope project settings
118 | .ropeproject
119 |
120 | # mkdocs documentation
121 | /site
122 |
123 | # mypy
124 | .mypy_cache/
125 | .dmypy.json
126 | dmypy.json
127 |
128 | # vscode
129 | .vscode
130 | metastore_db/*
131 |
132 | .idea
--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
1 | fail_fast: True
2 | stages: [commit]
3 | repos:
4 | - repo: https://github.com/pre-commit/pre-commit-hooks
5 | rev: v3.2.0
6 | hooks:
7 | - id: trailing-whitespace
8 | stages: [commit, manual]
9 | - id: end-of-file-fixer
10 | stages: [commit, manual]
11 | - id: check-yaml
12 | stages: [commit, manual]
13 | - id: check-added-large-files
14 | stages: [commit, manual]
15 | - id: check-docstring-first
16 | stages: [commit, manual]
17 | # you can enable the following checks if you disable them from drone
18 | # as drone has credentials in place
19 | # - id: detect-aws-credentials
20 | # - id: detect-private-key
21 | - repo: https://github.com/pre-commit/mirrors-isort
22 | rev: v5.6.4
23 | hooks:
24 | - id: isort
25 | stages: [commit]
26 | - repo: https://github.com/ambv/black
27 | rev: 20.8b1
28 | hooks:
29 | - id: black
30 | language_version: python3.7
31 | stages: [commit]
32 | - repo: https://github.com/pre-commit/mirrors-mypy
33 | rev: v0.790
34 | hooks:
35 | - id: mypy
36 | args: [--config-file, mypy.ini]
37 | stages: [commit, manual]
38 | # - repo: https://gitlab.com/pycqa/flake8
39 | # rev: 3.7.9
40 | # hooks:
41 | # - id: flake8
42 | # stages: [commit, manual]
43 |
--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
1 | # Changelog db-rocket
2 |
3 | ## Version 3.1.0
4 | - Use uv when installing packages
5 |
6 | ## Version 3.0.6
7 | - Warn when failing to create requirements.txt file with poetry instead of raising an error
8 |
9 | ## Version 3.0.6
10 | - Create folder before copying file
11 |
12 | ## Version 3.0.5
13 | - Revert enforcing the creation of .databrickscfg file
14 |
15 | ## Version 3.0.3
16 | - Add warning when DATABRICKS_TOKEN is set rather than failing when its not set. The bulk of our use-cases rely on the token being set via databricks configure command. The token via environment variable is only used for CI and we should treat as an edge case.
17 |
18 | ## Version 3.0.2
19 | - Add databricks cli configuration check
20 |
21 | ## Version 3.0.1
22 | - Add workaround for making --watch command work with --use-volumes
23 |
24 | ## Version 3.0.0
25 | - Add `use_volumes` and `dst_path` arguments to support uploading to Unity Catalog Volumes.
26 |
27 | ## Version 2.1.0
28 | - New paramter for ``rocket launch --glob_path=<...>``, which allows to specify a list of globs for files to deploy during launch.
29 |
30 | ## Version 2.0.4
31 | - Update version number.
32 |
33 | ## Version 2.0.3
34 | - Add instruction to restart Python with dbutils (needed for newer Databricks runtimes)
35 |
36 | ## Version 2.0.2
37 | - fix wheel uploading to root dbfs path
38 |
39 | ## Version 2.0.1
40 | - fix function not found error
41 |
42 | ## Version 2.0.0
43 | - Simplify code structure
44 | - Make sync of project more smooth by using a mix of `-e` & installation of `requirements.txt`
45 |
46 | ## Version 1.3.6
47 |
48 | - Fix bug of updates not getting detected
49 | - Put files into a project folder
50 |
51 | ## Version 1.3.5
52 |
53 | - Replace self-calling CLI with while loop
54 |
55 | ## Version 1.3.4
56 |
57 | - Simplify setup by using `pip install -e`
58 |
59 | ## Version 1.3.3
60 |
61 | - Fix watch stopping due to maximum recursion
62 |
63 | ## Version 1.3.2
64 |
65 | - Refine prints to be more clear
66 |
67 | ## Version 1.3.1
68 |
69 | - Adding Markdown documentation to package description
70 |
71 | ## Version 1.3.0
72 |
73 | - Remove `rocket trigger` CLI
74 | - Add synchronization of project files to databricks file system
75 | - Replace `print` statements with `logger.info`
76 | - Replace running watch in shell with python code
77 |
78 | ## Version 1.2.0
79 |
80 | - Fix security issue with command injection, changes the behaviour of the watch command.
81 |
82 | ## Version 1.1.5
83 |
84 | - Adding extra index urls to install command
85 |
86 | ## Version 1.1.4
87 |
88 | - Fix error in rocket trigger cmd
89 |
90 | ## Version 1.1.3
91 |
92 | - Typo
93 |
94 | ## Version 1.1.2
95 |
96 | - Pin watchdog dependency with minimum requirement
97 | - dbrocket launch rather then trigger
98 |
99 | ## Version 1.1.1
100 |
101 | - Error with token missing only on trigger command not on __init__ anymore.
102 |
103 | ## Version 1.1.0
104 |
105 | - Create new binary dbrocket
106 | - Create dbrocket setup to initalize a setup.py
107 | - Use defualt values for trigger binary
108 | - Improve docs
109 |
110 | ## Version 1.0.4
111 |
112 | - Upgrade dependencies
113 | - Add github actions ci.
114 |
115 | ## Version 1.0.3
116 |
117 | - Remove message about spark 6 support.
118 | - Add instruction about upgrading pip.
119 | - Remove local cli.
120 | - Add better error message when fails to copy to databricks
121 |
122 | ## Version 1.0.2
123 |
124 | feature: Add support for poetry projects test: Add test for dbrocket build process
125 |
--------------------------------------------------------------------------------
/CODEOWNERS:
--------------------------------------------------------------------------------
1 | * @getyourguide/mlp
2 |
--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
1 | Apache License
2 | Version 2.0, January 2004
3 | http://www.apache.org/licenses/
4 |
5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6 |
7 | 1. Definitions.
8 |
9 | "License" shall mean the terms and conditions for use, reproduction,
10 | and distribution as defined by Sections 1 through 9 of this document.
11 |
12 | "Licensor" shall mean the copyright owner or entity authorized by
13 | the copyright owner that is granting the License.
14 |
15 | "Legal Entity" shall mean the union of the acting entity and all
16 | other entities that control, are controlled by, or are under common
17 | control with that entity. For the purposes of this definition,
18 | "control" means (i) the power, direct or indirect, to cause the
19 | direction or management of such entity, whether by contract or
20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
21 | outstanding shares, or (iii) beneficial ownership of such entity.
22 |
23 | "You" (or "Your") shall mean an individual or Legal Entity
24 | exercising permissions granted by this License.
25 |
26 | "Source" form shall mean the preferred form for making modifications,
27 | including but not limited to software source code, documentation
28 | source, and configuration files.
29 |
30 | "Object" form shall mean any form resulting from mechanical
31 | transformation or translation of a Source form, including but
32 | not limited to compiled object code, generated documentation,
33 | and conversions to other media types.
34 |
35 | "Work" shall mean the work of authorship, whether in Source or
36 | Object form, made available under the License, as indicated by a
37 | copyright notice that is included in or attached to the work
38 | (an example is provided in the Appendix below).
39 |
40 | "Derivative Works" shall mean any work, whether in Source or Object
41 | form, that is based on (or derived from) the Work and for which the
42 | editorial revisions, annotations, elaborations, or other modifications
43 | represent, as a whole, an original work of authorship. For the purposes
44 | of this License, Derivative Works shall not include works that remain
45 | separable from, or merely link (or bind by name) to the interfaces of,
46 | the Work and Derivative Works thereof.
47 |
48 | "Contribution" shall mean any work of authorship, including
49 | the original version of the Work and any modifications or additions
50 | to that Work or Derivative Works thereof, that is intentionally
51 | submitted to Licensor for inclusion in the Work by the copyright owner
52 | or by an individual or Legal Entity authorized to submit on behalf of
53 | the copyright owner. For the purposes of this definition, "submitted"
54 | means any form of electronic, verbal, or written communication sent
55 | to the Licensor or its representatives, including but not limited to
56 | communication on electronic mailing lists, source code control systems,
57 | and issue tracking systems that are managed by, or on behalf of, the
58 | Licensor for the purpose of discussing and improving the Work, but
59 | excluding communication that is conspicuously marked or otherwise
60 | designated in writing by the copyright owner as "Not a Contribution."
61 |
62 | "Contributor" shall mean Licensor and any individual or Legal Entity
63 | on behalf of whom a Contribution has been received by Licensor and
64 | subsequently incorporated within the Work.
65 |
66 | 2. Grant of Copyright License. Subject to the terms and conditions of
67 | this License, each Contributor hereby grants to You a perpetual,
68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69 | copyright license to reproduce, prepare Derivative Works of,
70 | publicly display, publicly perform, sublicense, and distribute the
71 | Work and such Derivative Works in Source or Object form.
72 |
73 | 3. Grant of Patent License. Subject to the terms and conditions of
74 | this License, each Contributor hereby grants to You a perpetual,
75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76 | (except as stated in this section) patent license to make, have made,
77 | use, offer to sell, sell, import, and otherwise transfer the Work,
78 | where such license applies only to those patent claims licensable
79 | by such Contributor that are necessarily infringed by their
80 | Contribution(s) alone or by combination of their Contribution(s)
81 | with the Work to which such Contribution(s) was submitted. If You
82 | institute patent litigation against any entity (including a
83 | cross-claim or counterclaim in a lawsuit) alleging that the Work
84 | or a Contribution incorporated within the Work constitutes direct
85 | or contributory patent infringement, then any patent licenses
86 | granted to You under this License for that Work shall terminate
87 | as of the date such litigation is filed.
88 |
89 | 4. Redistribution. You may reproduce and distribute copies of the
90 | Work or Derivative Works thereof in any medium, with or without
91 | modifications, and in Source or Object form, provided that You
92 | meet the following conditions:
93 |
94 | (a) You must give any other recipients of the Work or
95 | Derivative Works a copy of this License; and
96 |
97 | (b) You must cause any modified files to carry prominent notices
98 | stating that You changed the files; and
99 |
100 | (c) You must retain, in the Source form of any Derivative Works
101 | that You distribute, all copyright, patent, trademark, and
102 | attribution notices from the Source form of the Work,
103 | excluding those notices that do not pertain to any part of
104 | the Derivative Works; and
105 |
106 | (d) If the Work includes a "NOTICE" text file as part of its
107 | distribution, then any Derivative Works that You distribute must
108 | include a readable copy of the attribution notices contained
109 | within such NOTICE file, excluding those notices that do not
110 | pertain to any part of the Derivative Works, in at least one
111 | of the following places: within a NOTICE text file distributed
112 | as part of the Derivative Works; within the Source form or
113 | documentation, if provided along with the Derivative Works; or,
114 | within a display generated by the Derivative Works, if and
115 | wherever such third-party notices normally appear. The contents
116 | of the NOTICE file are for informational purposes only and
117 | do not modify the License. You may add Your own attribution
118 | notices within Derivative Works that You distribute, alongside
119 | or as an addendum to the NOTICE text from the Work, provided
120 | that such additional attribution notices cannot be construed
121 | as modifying the License.
122 |
123 | You may add Your own copyright statement to Your modifications and
124 | may provide additional or different license terms and conditions
125 | for use, reproduction, or distribution of Your modifications, or
126 | for any such Derivative Works as a whole, provided Your use,
127 | reproduction, and distribution of the Work otherwise complies with
128 | the conditions stated in this License.
129 |
130 | 5. Submission of Contributions. Unless You explicitly state otherwise,
131 | any Contribution intentionally submitted for inclusion in the Work
132 | by You to the Licensor shall be under the terms and conditions of
133 | this License, without any additional terms or conditions.
134 | Notwithstanding the above, nothing herein shall supersede or modify
135 | the terms of any separate license agreement you may have executed
136 | with Licensor regarding such Contributions.
137 |
138 | 6. Trademarks. This License does not grant permission to use the trade
139 | names, trademarks, service marks, or product names of the Licensor,
140 | except as required for reasonable and customary use in describing the
141 | origin of the Work and reproducing the content of the NOTICE file.
142 |
143 | 7. Disclaimer of Warranty. Unless required by applicable law or
144 | agreed to in writing, Licensor provides the Work (and each
145 | Contributor provides its Contributions) on an "AS IS" BASIS,
146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 | implied, including, without limitation, any warranties or conditions
148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 | PARTICULAR PURPOSE. You are solely responsible for determining the
150 | appropriateness of using or redistributing the Work and assume any
151 | risks associated with Your exercise of permissions under this License.
152 |
153 | 8. Limitation of Liability. In no event and under no legal theory,
154 | whether in tort (including negligence), contract, or otherwise,
155 | unless required by applicable law (such as deliberate and grossly
156 | negligent acts) or agreed to in writing, shall any Contributor be
157 | liable to You for damages, including any direct, indirect, special,
158 | incidental, or consequential damages of any character arising as a
159 | result of this License or out of the use or inability to use the
160 | Work (including but not limited to damages for loss of goodwill,
161 | work stoppage, computer failure or malfunction, or any and all
162 | other commercial damages or losses), even if such Contributor
163 | has been advised of the possibility of such damages.
164 |
165 | 9. Accepting Warranty or Additional Liability. While redistributing
166 | the Work or Derivative Works thereof, You may choose to offer,
167 | and charge a fee for, acceptance of support, warranty, indemnity,
168 | or other liability obligations and/or rights consistent with this
169 | License. However, in accepting such obligations, You may act only
170 | on Your own behalf and on Your sole responsibility, not on behalf
171 | of any other Contributor, and only if You agree to indemnify,
172 | defend, and hold each Contributor harmless for any liability
173 | incurred by, or claims asserted against, such Contributor by reason
174 | of your accepting any such warranty or additional liability.
175 |
176 | END OF TERMS AND CONDITIONS
177 |
178 | APPENDIX: How to apply the Apache License to your work.
179 |
180 | To apply the Apache License to your work, attach the following
181 | boilerplate notice, with the fields enclosed by brackets "[]"
182 | replaced with your own identifying information. (Don't include
183 | the brackets!) The text should be enclosed in the appropriate
184 | comment syntax for the file format. We also recommend that a
185 | file or class name and description of purpose be included on the
186 | same "printed page" as the copyright notice for easier
187 | identification within third-party archives.
188 |
189 | Copyright [yyyy] [name of copyright owner]
190 |
191 | Licensed under the Apache License, Version 2.0 (the "License");
192 | you may not use this file except in compliance with the License.
193 | You may obtain a copy of the License at
194 |
195 | http://www.apache.org/licenses/LICENSE-2.0
196 |
197 | Unless required by applicable law or agreed to in writing, software
198 | distributed under the License is distributed on an "AS IS" BASIS,
199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 | See the License for the specific language governing permissions and
201 | limitations under the License.
202 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # NOTE: We are in the progress of deprecating db-rocket for databricks native tooling.
2 |
3 | ## Databricks-Rocket
4 |
5 |
6 |
7 | [](https://badge.fury.io/py/databricks-rocket)
8 | 
9 |
10 | Databricks-Rocket (short db-rockets), keeps your local Python scripts installed and synchronized with a Databricks notebook. Every change on your local machine
11 | is automatically reflected in the notebook. This shortens the feedback loop for developing git-based projects and
12 | eliminates the need to set up a local development environment.
13 |
14 | ## Installation
15 |
16 | Install `databricks-rocket` using pip:
17 |
18 | ```sh
19 | pip install databricks-rocket
20 | ```
21 |
22 | ## Setup
23 |
24 | Ensure you've created a personal access token in
25 | Databricks ([offical documentation](https://docs.databricks.com/dev-tools/cli/index.html)). Afterward, set up the
26 | Databricks CLI by executing:
27 |
28 | ```sh
29 | databricks configure --token
30 | ```
31 |
32 | Alternatively, you can set the Databricks token and host in your environment variables:
33 |
34 | ```sh
35 | export DATABRICKS_HOST="mydatabrickshost"
36 | export DATABRICKS_TOKEN="mydatabrickstoken"
37 | ```
38 |
39 | If your project isn't already a pip package, you'll need to convert it into one. Use dbrocket for this:
40 |
41 | ```sh
42 | rocket setup
43 | ```
44 |
45 | Will create a setup.py for you.
46 |
47 | ## Usage
48 |
49 | ### To Sync Your Project
50 |
51 | By default, `databricks-rocket` syncs your project to DBFS automatically. This allows you to update your code and have
52 | those changes reflected in your Databricks notebook without restarting the Python kernel. Simply execute:
53 |
54 | ```sh
55 | rocket launch
56 | ```
57 |
58 | You'll then receive the exact command to run in your notebook. Example:
59 |
60 | ```sh
61 | stevenmi@MacBook db-rocket % rocket launch --watch=False
62 | >> Watch activated. Uploaded your project to databricks. Install your project in your databricks notebook by running:
63 | >> %pip install --upgrade pip
64 | >> %pip install -r /dbfs/temp/stevenmi/db-rocket/requirements.txt
65 | >> %pip install --no-deps -e /dbfs/temp/stevenmi/db-rocket
66 |
67 | and following in a new Python cell:
68 | >> %load_ext autoreload
69 | >> %autoreload 2
70 | ```
71 |
72 | Finally, add the content in you databricks notebook:
73 | 
74 |
75 | #### Include non-python files
76 | Upload all root level json files:
77 | ```shell
78 | rocket launch --glob_path="*,json"
79 | ```
80 | On top also upload all env files:
81 | ```shell
82 | rocket launch --glob_path="[\"*.json\", \".env*\"]"
83 | ```
84 | When specifying lists, be mindful about the formatting of the parameter string.
85 |
86 | ### To Upload Your Python Package
87 |
88 | If you've disabled the watch feature, `databricks-rocket` will only upload your project as a wheel to DBFS:
89 |
90 | ```sh
91 | rocket launch --watch=False
92 | ```
93 |
94 | Example:
95 |
96 | ```sh
97 | stevenmi@MacBook db-rocket % rocket launch --watch=False
98 | >> Watch is disabled. Building creating a python wheel from your project
99 | >> Found setup.py. Building python library
100 | >> Uploaded ./dist/databricks_rocket-2.0.0-py3-none-any.whl to dbfs:/temp/stevenmi/db-rocket/dist/databricks_rocket-2.0.0-py3-none-any.whl
101 | >> Uploaded wheel to databricks. Install your library in your databricks notebook by running:
102 | >> %pip install --upgrade pip
103 | >> %pip install /dbfs/temp/stevenmi/db-rocket/databricks_rocket-2.0.0-py3-none-any.whl --force-reinstall
104 | ```
105 |
106 | ## Blogposts
107 |
108 | - [DBrocket 2.0](https://www.getyourguide.careers/posts/improving-data-science-productivity-with-db-rocket-2-0): A summary of the big improvements we made to the tool in the new release.
109 | - [DB Rocket 1.0](https://www.getyourguide.careers/posts/open-sourcing-db-rocket-for-data-scientists) post also gives more details about the rationale around dbrocket.
110 |
111 | ## Support
112 |
113 | - Databricks: >=7
114 | - Python: >=3.7
115 | - Tested on Platform: Linux, MacOs. Windows will probably not work but contributions are welcomed!
116 | - Supports uploading to Unity Catalog Volumes starting from version 3.0.0. Note that the underlying dependency, `databricks-sdk`, is still in beta. We do not recommend using UC Volumes in production.
117 |
118 | ## Acknowledgments
119 |
120 | - Thanks Leon Poli for the Logo :)
121 | - Thanks Stephane Leonard for source-code and documentation improvements :)
122 | - Thanks Malachi Soord for the CICD setup and README improvements
123 |
124 | Contributions are welcomed!
125 |
126 |
127 | # Security
128 |
129 | For security issues please contact [security@getyourguide.com](mailto:security@getyourguide.com).
130 |
131 | # Legal
132 |
133 | db-rocket is licensed under the Apache License, Version 2.0. See [LICENSE](LICENSE.txt) for the full text.
134 |
--------------------------------------------------------------------------------
/docs/conda.md:
--------------------------------------------------------------------------------
1 | # 1. Install Conda
2 |
3 | Install on your machine following the [official instructions](https://docs.conda.io/projects/continuumio-conda/en/latest/user-guide/install/macos.html)
4 |
5 | # 2. Create a conda enviroment
6 |
7 | ```sh
8 | conda create -n dbrocket python=3.7
9 | ```
10 |
11 | # 3. Activate the enviroment
12 |
13 | ```sh
14 | conda activate dbrocket
15 | ```
16 |
17 | From now on continue with the installation instructions from the readme.
18 |
--------------------------------------------------------------------------------
/docs/development.md:
--------------------------------------------------------------------------------
1 | # Developing on local machine
2 |
3 | ## 1. Clone the repo
4 |
5 | Find the right branch.
6 |
7 | ## 2. Install local db rocket in dev. mode
8 |
9 |
10 | ```sh
11 | cd dbrocket_folder
12 | pip install -e .
13 |
14 | ```
15 |
16 | From here one rocket should be the dev one.
17 |
18 | ## Build the package and upload it to PyPi
19 |
20 | One needs to get pypi crendentials to upload. Reach out in case of need.
21 |
22 | ```sh
23 | pip install twine
24 | ./release.py release
25 | ```
26 |
--------------------------------------------------------------------------------
/docs/installation.md:
--------------------------------------------------------------------------------
1 |
2 | ## Installing
3 |
4 | For a clean python installation (specially on MacOs) we recommend [using conda](docs/conda.md)
5 |
6 | ### Troubleshooting
7 |
8 | On MacOs, also upgrade the build library:
9 |
10 | ```sh
11 | python3 -m pip install --upgrade build
12 | ```
13 |
--------------------------------------------------------------------------------
/imgs/img.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/getyourguide/db-rocket/643d38f762feef8c1356af7c3e9511117327a3f9/imgs/img.png
--------------------------------------------------------------------------------
/imgs/img_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/getyourguide/db-rocket/643d38f762feef8c1356af7c3e9511117327a3f9/imgs/img_1.png
--------------------------------------------------------------------------------
/imgs/img_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/getyourguide/db-rocket/643d38f762feef8c1356af7c3e9511117327a3f9/imgs/img_2.png
--------------------------------------------------------------------------------
/logos/squared.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/getyourguide/db-rocket/643d38f762feef8c1356af7c3e9511117327a3f9/logos/squared.png
--------------------------------------------------------------------------------
/mypy.ini:
--------------------------------------------------------------------------------
1 | [mypy]
2 | python_version = 3.7
3 | warn_unused_configs = False
4 | disallow_untyped_defs = False
5 | warn_return_any = False
6 | ignore_missing_imports = True
7 | check_untyped_defs = False
8 |
--------------------------------------------------------------------------------
/pull_request_template.md:
--------------------------------------------------------------------------------
1 |
17 |
18 | ## Description
19 |
20 |
23 |
24 | ## Added tests?
25 |
26 | - [ ] 👍 yes
27 | - [ ] 🙅 no, because they aren't needed
28 |
29 | ## Added to documentation?
30 |
31 | - [ ] 👍 README.md
32 | - [ ] 👍 CHANGELOG.md
33 | - [ ] 👍 Additional documentation in /docs
34 | - [ ] 👍 Relevant code documentation
35 | - [ ] 🙅 no, because they aren’t needed
36 |
--------------------------------------------------------------------------------
/release.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 |
3 | import os
4 |
5 |
6 | class Release:
7 | """
8 | Class responsible to serve the library in pypi, not part of rocket executable
9 | """
10 |
11 | """Module responsible for building db-rocket itself and publishing it to pypi"""
12 |
13 | def release(self):
14 | """
15 | Build rocket for pypi doing all steps. Run it on the root of rocket project.
16 | """
17 | os.system("rm -rf dist/* || true")
18 | os.system("python3 -m build --no-isolation")
19 | self.build()
20 | self.upload()
21 |
22 | def build(self):
23 | """
24 | Build rocket for pypi. Run it on the root of rocket project.
25 | """
26 | os.system("rm -rf dist/* || true")
27 | os.system("python3 -m build --no-isolation")
28 | print("Build successfull, uploading now")
29 |
30 | def upload(self):
31 | """
32 | Upload new package to pipy
33 | :return:
34 | """
35 | os.system("python3 -m twine upload dist/*")
36 |
37 |
38 | if __name__ == "__main__":
39 | import fire
40 |
41 | fire.Fire(Release)
42 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | fire~=0.4.0
2 | watchdog~=2.1.9
3 | databricks-cli~=0.17.0
4 | argh
5 | build~=0.8.0
6 | pyyaml
7 | pytest
8 | poetry
9 | mypy
10 | SecretStorage
11 | readme-renderer
12 | twine
13 | databricks-sdk==0.33.0
14 | wheel
15 |
--------------------------------------------------------------------------------
/rocket/__init__.py:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/rocket/file_watcher.py:
--------------------------------------------------------------------------------
1 | import glob
2 | import os
3 | import time
4 |
5 | from typing import List
6 | from watchdog.events import FileSystemEventHandler
7 | from watchdog.observers import Observer
8 |
9 | from rocket.utils import gather_glob_paths
10 |
11 |
12 | class FileWatcher:
13 | class _Handler(FileSystemEventHandler):
14 | def __init__(self, watcher_instance):
15 | self.watcher_instance = watcher_instance
16 |
17 | def on_modified(self, event):
18 | _current_glob_files = gather_glob_paths(self.watcher_instance.glob_paths)
19 | if event.src_path in _current_glob_files:
20 | self.watcher_instance.modified_files.add(event.src_path)
21 | elif event.is_directory:
22 | return
23 | elif os.path.splitext(event.src_path)[1] == ".py":
24 | self.watcher_instance.modified_files.add(event.src_path)
25 |
26 | def __init__(self, path_to_watch, callback, recursive=True, glob_paths: List[str] = None):
27 | self.path_to_watch = path_to_watch
28 | self.callback = callback
29 | self.recursive = recursive
30 | self.observer = Observer()
31 | self.modified_files = set()
32 | self.glob_paths = glob_paths
33 | if self.glob_paths is None:
34 | self.glob_paths = []
35 | self.handler = self._Handler(self)
36 |
37 | def start(self):
38 | self.observer.schedule(
39 | self.handler, self.path_to_watch, recursive=self.recursive
40 | )
41 | self.observer.start()
42 | try:
43 | while True:
44 | time.sleep(1)
45 | if self.modified_files:
46 | self.callback(list(self.modified_files))
47 | self.modified_files.clear()
48 | except KeyboardInterrupt:
49 | self.observer.stop()
50 | self.observer.join()
51 |
52 | def stop(self):
53 | self.observer.stop()
54 |
--------------------------------------------------------------------------------
/rocket/logger.py:
--------------------------------------------------------------------------------
1 | import logging
2 | import sys
3 |
4 |
5 | def configure_logger() -> logging.Logger:
6 | logger = logging.getLogger("dbrocket")
7 | logger.addHandler(logging.StreamHandler(sys.stdout))
8 | logger.setLevel(logging.INFO)
9 | return logger
10 |
11 |
12 | logger = configure_logger()
13 |
--------------------------------------------------------------------------------
/rocket/rocket.py:
--------------------------------------------------------------------------------
1 | import os
2 | from typing import Optional, List, Union
3 |
4 | import fire
5 |
6 | from databricks.sdk import WorkspaceClient
7 | from rocket.file_watcher import FileWatcher
8 | from rocket.logger import logger
9 | from rocket.utils import (
10 | execute_shell_command,
11 | extract_python_package_dirs,
12 | extract_python_files_from_folder,
13 | execute_for_each_multithreaded,
14 | gather_glob_paths,
15 | )
16 |
17 |
18 | class Rocket:
19 | """Entry point of the installed program, all public methods are options of the program"""
20 |
21 | # in seconds
22 | _interval_repeat_watch: int = 2
23 | _python_executable: str = "python3"
24 | _rocket_executable: str = "rocket"
25 |
26 | def setup(self):
27 | """
28 | Initialize the application.
29 | """
30 | if os.path.exists("setup.py") or os.path.exists(f"pyproject.toml"):
31 | logger.info("Packaging file already exists so no need to create a new one")
32 | return
33 |
34 | content = """
35 | import setuptools
36 |
37 | setuptools.setup(
38 | name="myproject",
39 | version="0.0.1",
40 | author="",
41 | author_email="",
42 | description="",
43 | url="https://github.com/getyourguide/databricks-rocket",
44 | packages=setuptools.find_packages(),
45 | )
46 | """
47 |
48 | with open("setup.py", "a") as myfile:
49 | myfile.write(content)
50 | logger.info("Setup.py file created, feel free to modify it with your needs.")
51 |
52 | def launch(
53 | self,
54 | project_location: str = ".",
55 | dbfs_path: Optional[str] = None,
56 | watch: bool = True,
57 | glob_path: Optional[Union[str, List[str]]] = None,
58 | use_volumes: Optional[bool] = False,
59 | dst_path: Optional[str] = None,
60 | ) -> None:
61 | """
62 | Entrypoint of the application, triggers a build and deploy
63 | :param project_location: path to project code, default: `"."`
64 | :param dbfs_path: path where the wheel will be stored, ex: dbfs:/tmp/myteam/myproject. Only support dbfs path.
65 | :param watch: Set to false if you don't want to automatically sync your files
66 | :param glob_path: glob string or list of strings for additional files to deploy, e.g. "*.json"
67 | :param use_volumes: upload files to unity catalog volumes.
68 | :param dst_path: Destination path to store the files. Support both dbfs:/ and /Volumes. Ideally, we should use dst_path and deprecate dbfs_path.
69 | :return:
70 | """
71 |
72 | home = os.environ['HOME']
73 | if os.getenv("DATABRICKS_TOKEN"):
74 | print("Note: DATABRICKS_TOKEN is set, it could override the token in ~/.databrickscfg and cause errors.")
75 |
76 | base_dbfs_access_error_message = ("Is your databricks token is set and valid? "
77 | "Try to generate a new token and update existing one with "
78 | "`databricks configure --token`.")
79 | if use_volumes:
80 | try:
81 | workspace_client = WorkspaceClient()
82 | workspace_client.dbutils.fs.ls("dbfs:/")
83 | except Exception as e:
84 | raise Exception(
85 | f"Could not access dbfs using databricks SDK. {base_dbfs_access_error_message} Error details: {e}"
86 | )
87 | db_path = self.get_volumes_path(dst_path)
88 | else:
89 | try:
90 | execute_shell_command(f"databricks fs ls dbfs:/")
91 | except Exception as e:
92 | raise Exception(
93 | f"Error accessing DBFS via databricks-cli. {base_dbfs_access_error_message} Error details: {e}"
94 | )
95 | path_to_use = dst_path if dst_path else dbfs_path
96 | db_path = self.get_dbfs_path(path_to_use)
97 |
98 | if watch:
99 | project_name = os.path.abspath(project_location).split("/")[-1]
100 | db_path = f"{db_path}/{project_name}"
101 |
102 | glob_paths = []
103 | if isinstance(glob_path, str):
104 | glob_paths = [os.path.join(project_location, glob_path)]
105 | elif isinstance(glob_path, list):
106 | glob_paths = [os.path.join(project_location, path) for path in glob_path]
107 |
108 | self._build_and_deploy(watch=watch, project_location=project_location, db_path=db_path, glob_paths=glob_paths)
109 | if watch:
110 | watcher = FileWatcher(
111 | project_location,
112 | lambda x: self._build_and_deploy(
113 | watch=watch,
114 | modified_files=watcher.modified_files,
115 | db_path=db_path,
116 | project_location=project_location,
117 | glob_paths=glob_path
118 | ),
119 | glob_paths=glob_paths,
120 | )
121 | watcher.start()
122 |
123 | def _build_and_deploy(
124 | self,
125 | watch: bool,
126 | project_location: str,
127 | db_path: str,
128 | modified_files: Optional[List[str]] = None,
129 | glob_paths: Optional[List[str]] = None
130 | ) -> None:
131 | if modified_files:
132 | logger.info(f"Found changes in {modified_files}. Overwriting them.")
133 | self._deploy(
134 | file_paths=modified_files,
135 | db_path=db_path,
136 | project_location=project_location,
137 | )
138 | return
139 |
140 | if not watch:
141 | logger.info(
142 | "Watch is disabled. Building creating a python wheel from your project"
143 | )
144 | wheel_path, wheel_file = self._create_python_project_wheel(project_location)
145 | self._deploy(
146 | file_paths=[wheel_path],
147 | db_path=db_path,
148 | project_location=os.path.dirname(wheel_path),
149 | )
150 | install_path = f"{self.get_install_path(db_path)}/{wheel_file}"
151 |
152 | dependency_files = ["requirements.in", "requirements.txt"]
153 | index_urls = []
154 | for dependency_file in dependency_files:
155 | dependency_file_path = f"{project_location}/{dependency_file}"
156 | if os.path.exists(dependency_file_path):
157 | with open(dependency_file_path) as f:
158 | index_urls = [
159 | line.strip()
160 | for line in f.readlines()
161 | if "index-url" in line
162 | ]
163 | index_urls_options = " ".join(index_urls)
164 | logger.info(f"""Uploaded wheel to databricks. Install your library in your databricks notebook by running:
165 | %pip install --upgrade pip
166 | %pip install {index_urls_options} {install_path} --force-reinstall""")
167 | return
168 |
169 | package_dirs = extract_python_package_dirs(project_location)
170 | files = set()
171 | for package_dir in package_dirs:
172 | files.update(extract_python_files_from_folder(package_dir))
173 |
174 | if glob_paths is not None:
175 | files.update(gather_glob_paths(glob_paths))
176 |
177 | project_files = ["setup.py", "pyproject.toml", "README.md"]
178 | for project_file in project_files:
179 | if os.path.exists(f"{project_location}/{project_file}"):
180 | files.add(f"{project_location}/{project_file}")
181 |
182 | if os.path.exists(f"{project_location}/pyproject.toml"):
183 | try:
184 | execute_shell_command(
185 | "poetry export -f requirements.txt --with-credentials --without-hashes --output requirements.txt"
186 | )
187 | except Exception as e:
188 | logger.warn(f"Failed to create requirements.txt with poetry: {e}")
189 |
190 | dependency_file_exist = False
191 | dependency_files = ["requirements.in", "requirements.txt"]
192 | uploaded_dependency_file = ""
193 | index_urls = []
194 | for dependency_file in dependency_files:
195 | dependency_file_path = f"{project_location}/{dependency_file}"
196 | if os.path.exists(dependency_file_path):
197 | files.add(dependency_file_path)
198 | uploaded_dependency_file = dependency_file
199 | dependency_file_exist = True
200 | with open(dependency_file_path) as f:
201 | index_urls = [
202 | line.strip() for line in f.readlines() if "index-url" in line
203 | ]
204 | self._deploy(
205 | file_paths=list(files), db_path=db_path, project_location=project_location
206 | )
207 |
208 | install_path = self.get_install_path(db_path)
209 | index_urls_options = " ".join(index_urls)
210 | extra_watch_command = ""
211 | if not self.is_dbfs(db_path):
212 | # The install path is supposed to get added to sys.path, but this doesn't work when using volumes with
213 | # tropic 3.5 (running databricks 15.4)...so, add it to sys.path manually
214 | extra_watch_command = f"import sys; sys.path.append('{install_path}')"
215 |
216 | if dependency_file_exist:
217 | logger.info(
218 | f"""Watch activated. Uploaded your project to databricks. Install your project in your databricks notebook by running:
219 | %sh
220 | pip install uv
221 | uv pip install {index_urls_options} -r {install_path}/{uploaded_dependency_file}
222 | uv pip install --no-deps -e {install_path}
223 |
224 | and in a new Python cell:
225 | dbutils.library.restartPython()
226 |
227 | and and in one more Python cell:
228 | %load_ext autoreload
229 | %autoreload 2
230 | {extra_watch_command}"""
231 | )
232 | else:
233 | logger.info(
234 | f"""Watch activated. Uploaded your project to databricks. Install your project in your databricks notebook by running:
235 | %sh
236 | pip install uv
237 | uv pip install -e {install_path}
238 |
239 | and following in a new Python cell:
240 | %load_ext autoreload
241 | %autoreload 2"""
242 | )
243 |
244 | def _deploy(
245 | self,
246 | file_paths: List[str],
247 | db_path: str,
248 | project_location: str
249 | ) -> None:
250 | if self.is_dbfs(db_path):
251 | self._deploy_dbfs(file_paths, db_path, project_location)
252 | else:
253 | w = WorkspaceClient()
254 | self._deploy_volumes(file_paths, db_path, project_location, w)
255 |
256 | def _deploy_dbfs(
257 | self,
258 | file_paths: List[str],
259 | db_path: str,
260 | project_location: str
261 | ):
262 | def helper(file: str) -> None:
263 | target_path = f"{db_path}/{os.path.relpath(file, project_location)}"
264 | target_folder = os.path.dirname(target_path)
265 | execute_shell_command(f"databricks fs mkdirs {target_folder}")
266 | execute_shell_command(f"databricks fs cp --recursive --overwrite {file} {target_path}")
267 | logger.info(f"Uploaded {file} to {target_path}")
268 |
269 | execute_for_each_multithreaded(file_paths, lambda x: helper(x))
270 |
271 | def _deploy_volumes(
272 | self,
273 | file_paths: List[str],
274 | db_path: str,
275 | project_location: str,
276 | workspace_client
277 | ):
278 | def helper(wc, file: str) -> None:
279 | # sdk asks an absolute path
280 | if not os.path.isabs(file):
281 | cwd = os.getcwd()
282 | file = f"{cwd}/{file}"
283 | target_path = f"{db_path}/{os.path.relpath(file, project_location)}"
284 | # if the file already exists, sdk returns error message: The file being created already exists.
285 | # a feature request is already here: https://github.com/databricks/databricks-sdk-py/issues/548
286 | try:
287 | wc.dbutils.fs.rm(target_path)
288 | except Exception:
289 | pass
290 | # sdk uses urllibs3 to parse paths.
291 | # It need to be file:// to be recognized as a local file. Otherwise it raises file not exist error
292 | wc.dbutils.fs.cp(f"file://{file}", target_path)
293 | logger.info(f"Uploaded {file} to {target_path}")
294 |
295 | execute_for_each_multithreaded(file_paths, lambda x: helper(workspace_client, x))
296 |
297 | def _create_python_project_wheel(self, project_location: str) -> (str, str):
298 | dist_location = f"{project_location}/dist"
299 | execute_shell_command(f"rm {dist_location}/* 2>/dev/null || true")
300 |
301 | if os.path.exists(f"{project_location}/setup.py"):
302 | logger.info("Found setup.py. Building python library")
303 | execute_shell_command(
304 | f"cd {project_location} ; {self._python_executable} -m build --outdir {dist_location} 2>/dev/null"
305 | )
306 | elif os.path.exists(f"{project_location}/pyproject.toml"):
307 | logger.info("Found pyproject.toml. Building python library with poetry")
308 | execute_shell_command(
309 | f"cd {project_location} ; poetry build --format wheel"
310 | )
311 | else:
312 | raise Exception(
313 | "To be turned into a library your project has to contain a setup.py or pyproject.toml file"
314 | )
315 |
316 | wheel_file = execute_shell_command(
317 | f"cd {dist_location}; ls *.whl 2>/dev/null | head -n 1"
318 | ).replace("\n", "")
319 | wheel_path = f"{dist_location}/{wheel_file}"
320 | return wheel_path, wheel_file
321 |
322 | def get_dbfs_path(self, path: Optional[str]) -> str:
323 | if path:
324 | logger.warning("The `dbfs_path` parameter is planned for deprecation. Please use the `dst_path` parameter instead.")
325 | if not self.is_dbfs(path):
326 | raise Exception("`dbfs_path` must start with dbfs:/")
327 | return path or f"dbfs:/temp/{os.environ['USER']}"
328 |
329 | def get_volumes_path(self, path: Optional[str]) -> str:
330 | if path and not path.startswith("/Volumes"):
331 | raise Exception("`use_volumes` is true. `dst_path` must start with /Volumes")
332 | return path or f"/Volumes/main/data_products/volume/db_rocket/{os.environ['USER']}"
333 |
334 | def get_install_path(self, db_path):
335 | if self.is_dbfs(db_path):
336 | return f'{db_path.replace("dbfs:/", "/dbfs/")}'
337 | return db_path
338 |
339 | def is_dbfs(self, db_path: str):
340 | return db_path.startswith("dbfs:/")
341 |
342 |
343 | def main():
344 | fire.Fire(Rocket)
345 |
--------------------------------------------------------------------------------
/rocket/utils.py:
--------------------------------------------------------------------------------
1 | import concurrent.futures
2 | import glob
3 | import os
4 | import subprocess
5 |
6 | from typing import List, Set
7 | from rocket.logger import logger
8 |
9 |
10 | def execute_for_each_multithreaded(lst, func, max_threads=None):
11 | """
12 | Execute a given function for each entry in the list using multiple threads.
13 |
14 | Parameters:
15 | - lst: List of items to process
16 | - func: Function to apply to each item
17 | - max_threads: Maximum number of threads to use (default is None, which means as many as items in the list)
18 |
19 | Returns:
20 | - List of results after applying the function
21 | """
22 | with concurrent.futures.ThreadPoolExecutor(max_workers=max_threads) as executor:
23 | return list(executor.map(func, lst))
24 |
25 |
26 | def extract_package_name_from_wheel(wheel_filename):
27 | # Split the filename on '-' and take the first part
28 | return wheel_filename.split("-")[0]
29 |
30 |
31 | def extract_project_name_from_wheel(wheel_filename):
32 | return extract_package_name_from_wheel(wheel_filename).replace("_", "-")
33 |
34 |
35 | def extract_python_package_dirs(root_dir):
36 | packages = []
37 | for item in os.listdir(root_dir):
38 | item_path = os.path.join(root_dir, item)
39 | if os.path.isdir(item_path) and "__init__.py" in os.listdir(item_path):
40 | packages.append(item_path)
41 | return packages
42 |
43 |
44 | def execute_shell_command(cmd) -> str:
45 | logger.debug(f"Running shell command: {cmd} ")
46 | return subprocess.check_output(cmd, shell=True).decode("utf-8")
47 |
48 |
49 | def extract_python_files_from_folder(path):
50 | py_files = []
51 |
52 | for root, dirs, files in os.walk(path):
53 | for file in files:
54 | if file.endswith(".py"):
55 | py_files.append(os.path.join(root, file))
56 |
57 | return py_files
58 |
59 |
60 | def gather_glob_paths(glob_paths: List[str]) -> Set[str]:
61 | _unique_paths = set()
62 | for glob_path in glob_paths:
63 | _unique_paths.update(glob.glob(glob_path))
64 | return _unique_paths
65 |
--------------------------------------------------------------------------------
/rocket_local.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 |
3 | from rocket.rocket import main
4 |
5 | if __name__ == "__main__":
6 | main()
7 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | import setuptools
2 |
3 | # load the README file and use it as the long_description for PyPI
4 | try:
5 | with open("README.md", encoding="utf8") as f:
6 | readme = f.read()
7 | except Exception as e:
8 | readme = ""
9 |
10 | setuptools.setup(
11 | name="databricks-rocket",
12 | version="3.1.0",
13 | author="GetYourGuide",
14 | author_email="engineering.data-products@getyourguide.com",
15 | description="Keep your local python scripts installed and in sync with a databricks notebook. Shortens the feedback loop to develop projects using a hybrid enviroment",
16 | long_description=readme,
17 | long_description_content_type="text/markdown",
18 | url="https://github.com/getyourguide/db-rocket",
19 | packages=setuptools.find_packages(),
20 | install_requires=["fire", "watchdog~=2.1.9", "build", "databricks_cli", "databricks-sdk"],
21 | entry_points={
22 | "console_scripts": ["rocket=rocket.rocket:main", "dbrocket=rocket.rocket:main"]
23 | },
24 | license="Apache 2.0",
25 | )
26 |
--------------------------------------------------------------------------------
/squared.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/getyourguide/db-rocket/643d38f762feef8c1356af7c3e9511117327a3f9/squared.png
--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/getyourguide/db-rocket/643d38f762feef8c1356af7c3e9511117327a3f9/tests/__init__.py
--------------------------------------------------------------------------------
/tests/conftest.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | import pytest as pytest
4 |
5 | from rocket.rocket import Rocket
6 |
7 |
8 | @pytest.fixture()
9 | def python_project_path() -> str:
10 | test_dir = os.path.dirname(os.path.realpath(__file__))
11 | project_path = os.path.join(test_dir, "resources", "python-test")
12 | return project_path
13 |
14 |
15 | @pytest.fixture()
16 | def poetry_project_path() -> str:
17 | test_dir = os.path.dirname(os.path.realpath(__file__))
18 | project_path = os.path.join(test_dir, "resources", "poetry-test")
19 | return project_path
20 |
21 |
22 | @pytest.fixture()
23 | def rocket() -> Rocket:
24 | rocket = Rocket()
25 | return rocket
26 |
--------------------------------------------------------------------------------
/tests/resources/poetry-test/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/getyourguide/db-rocket/643d38f762feef8c1356af7c3e9511117327a3f9/tests/resources/poetry-test/README.md
--------------------------------------------------------------------------------
/tests/resources/poetry-test/poetry.lock:
--------------------------------------------------------------------------------
1 | [[package]]
2 | name = "atomicwrites"
3 | version = "1.4.1"
4 | description = "Atomic file writes."
5 | category = "dev"
6 | optional = false
7 | python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
8 |
9 | [[package]]
10 | name = "attrs"
11 | version = "22.1.0"
12 | description = "Classes Without Boilerplate"
13 | category = "dev"
14 | optional = false
15 | python-versions = ">=3.5"
16 |
17 | [package.extras]
18 | dev = ["cloudpickle", "coverage[toml] (>=5.0.2)", "furo", "hypothesis", "mypy (>=0.900,!=0.940)", "pre-commit", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "sphinx", "sphinx-notfound-page", "zope.interface"]
19 | docs = ["furo", "sphinx", "sphinx-notfound-page", "zope.interface"]
20 | tests = ["cloudpickle", "coverage[toml] (>=5.0.2)", "hypothesis", "mypy (>=0.900,!=0.940)", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "zope.interface"]
21 | tests_no_zope = ["cloudpickle", "coverage[toml] (>=5.0.2)", "hypothesis", "mypy (>=0.900,!=0.940)", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins"]
22 |
23 | [[package]]
24 | name = "colorama"
25 | version = "0.4.5"
26 | description = "Cross-platform colored terminal text."
27 | category = "dev"
28 | optional = false
29 | python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
30 |
31 | [[package]]
32 | name = "more-itertools"
33 | version = "8.14.0"
34 | description = "More routines for operating on iterables, beyond itertools"
35 | category = "dev"
36 | optional = false
37 | python-versions = ">=3.5"
38 |
39 | [[package]]
40 | name = "packaging"
41 | version = "21.3"
42 | description = "Core utilities for Python packages"
43 | category = "dev"
44 | optional = false
45 | python-versions = ">=3.6"
46 |
47 | [package.dependencies]
48 | pyparsing = ">=2.0.2,<3.0.5 || >3.0.5"
49 |
50 | [[package]]
51 | name = "pluggy"
52 | version = "0.13.1"
53 | description = "plugin and hook calling mechanisms for python"
54 | category = "dev"
55 | optional = false
56 | python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
57 |
58 | [package.extras]
59 | dev = ["pre-commit", "tox"]
60 |
61 | [[package]]
62 | name = "py"
63 | version = "1.11.0"
64 | description = "library with cross-python path, ini-parsing, io, code, log facilities"
65 | category = "dev"
66 | optional = false
67 | python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
68 |
69 | [[package]]
70 | name = "pyparsing"
71 | version = "3.0.9"
72 | description = "pyparsing module - Classes and methods to define and execute parsing grammars"
73 | category = "dev"
74 | optional = false
75 | python-versions = ">=3.6.8"
76 |
77 | [package.extras]
78 | diagrams = ["jinja2", "railroad-diagrams"]
79 |
80 | [[package]]
81 | name = "pytest"
82 | version = "5.4.3"
83 | description = "pytest: simple powerful testing with Python"
84 | category = "dev"
85 | optional = false
86 | python-versions = ">=3.5"
87 |
88 | [package.dependencies]
89 | atomicwrites = {version = ">=1.0", markers = "sys_platform == \"win32\""}
90 | attrs = ">=17.4.0"
91 | colorama = {version = "*", markers = "sys_platform == \"win32\""}
92 | more-itertools = ">=4.0.0"
93 | packaging = "*"
94 | pluggy = ">=0.12,<1.0"
95 | py = ">=1.5.0"
96 | wcwidth = "*"
97 |
98 | [package.extras]
99 | checkqa-mypy = ["mypy (==v0.761)"]
100 | testing = ["argcomplete", "hypothesis (>=3.56)", "mock", "nose", "requests", "xmlschema"]
101 |
102 | [[package]]
103 | name = "wcwidth"
104 | version = "0.2.5"
105 | description = "Measures the displayed width of unicode strings in a terminal"
106 | category = "dev"
107 | optional = false
108 | python-versions = "*"
109 |
110 | [metadata]
111 | lock-version = "1.1"
112 | python-versions = "^3.8"
113 | content-hash = "c27944f25b55067b06883f1cea204be7d97841a4b8228fab69b91895347494ad"
114 |
115 | [metadata.files]
116 | atomicwrites = [
117 | {file = "atomicwrites-1.4.1.tar.gz", hash = "sha256:81b2c9071a49367a7f770170e5eec8cb66567cfbbc8c73d20ce5ca4a8d71cf11"},
118 | ]
119 | attrs = [
120 | {file = "attrs-22.1.0-py2.py3-none-any.whl", hash = "sha256:86efa402f67bf2df34f51a335487cf46b1ec130d02b8d39fd248abfd30da551c"},
121 | {file = "attrs-22.1.0.tar.gz", hash = "sha256:29adc2665447e5191d0e7c568fde78b21f9672d344281d0c6e1ab085429b22b6"},
122 | ]
123 | colorama = [
124 | {file = "colorama-0.4.5-py2.py3-none-any.whl", hash = "sha256:854bf444933e37f5824ae7bfc1e98d5bce2ebe4160d46b5edf346a89358e99da"},
125 | {file = "colorama-0.4.5.tar.gz", hash = "sha256:e6c6b4334fc50988a639d9b98aa429a0b57da6e17b9a44f0451f930b6967b7a4"},
126 | ]
127 | more-itertools = [
128 | {file = "more-itertools-8.14.0.tar.gz", hash = "sha256:c09443cd3d5438b8dafccd867a6bc1cb0894389e90cb53d227456b0b0bccb750"},
129 | {file = "more_itertools-8.14.0-py3-none-any.whl", hash = "sha256:1bc4f91ee5b1b31ac7ceacc17c09befe6a40a503907baf9c839c229b5095cfd2"},
130 | ]
131 | packaging = [
132 | {file = "packaging-21.3-py3-none-any.whl", hash = "sha256:ef103e05f519cdc783ae24ea4e2e0f508a9c99b2d4969652eed6a2e1ea5bd522"},
133 | {file = "packaging-21.3.tar.gz", hash = "sha256:dd47c42927d89ab911e606518907cc2d3a1f38bbd026385970643f9c5b8ecfeb"},
134 | ]
135 | pluggy = [
136 | {file = "pluggy-0.13.1-py2.py3-none-any.whl", hash = "sha256:966c145cd83c96502c3c3868f50408687b38434af77734af1e9ca461a4081d2d"},
137 | {file = "pluggy-0.13.1.tar.gz", hash = "sha256:15b2acde666561e1298d71b523007ed7364de07029219b604cf808bfa1c765b0"},
138 | ]
139 | py = [
140 | {file = "py-1.11.0-py2.py3-none-any.whl", hash = "sha256:607c53218732647dff4acdfcd50cb62615cedf612e72d1724fb1a0cc6405b378"},
141 | {file = "py-1.11.0.tar.gz", hash = "sha256:51c75c4126074b472f746a24399ad32f6053d1b34b68d2fa41e558e6f4a98719"},
142 | ]
143 | pyparsing = [
144 | {file = "pyparsing-3.0.9-py3-none-any.whl", hash = "sha256:5026bae9a10eeaefb61dab2f09052b9f4307d44aee4eda64b309723d8d206bbc"},
145 | {file = "pyparsing-3.0.9.tar.gz", hash = "sha256:2b020ecf7d21b687f219b71ecad3631f644a47f01403fa1d1036b0c6416d70fb"},
146 | ]
147 | pytest = [
148 | {file = "pytest-5.4.3-py3-none-any.whl", hash = "sha256:5c0db86b698e8f170ba4582a492248919255fcd4c79b1ee64ace34301fb589a1"},
149 | {file = "pytest-5.4.3.tar.gz", hash = "sha256:7979331bfcba207414f5e1263b5a0f8f521d0f457318836a7355531ed1a4c7d8"},
150 | ]
151 | wcwidth = [
152 | {file = "wcwidth-0.2.5-py2.py3-none-any.whl", hash = "sha256:beb4802a9cebb9144e99086eff703a642a13d6a0052920003a230f3294bbe784"},
153 | {file = "wcwidth-0.2.5.tar.gz", hash = "sha256:c4d647b99872929fdb7bdcaa4fbe7f01413ed3d98077df798530e5b04f116c83"},
154 | ]
155 |
--------------------------------------------------------------------------------
/tests/resources/poetry-test/poetry_test/__init__.py:
--------------------------------------------------------------------------------
1 | __version__ = "0.1.0"
2 |
--------------------------------------------------------------------------------
/tests/resources/poetry-test/pyproject.toml:
--------------------------------------------------------------------------------
1 | [tool.poetry]
2 | name = "poetry-test"
3 | version = "0.1.0"
4 | description = ""
5 | authors = ["Steven Mi "]
6 |
7 | [tool.poetry.dependencies]
8 | python = "^3.8"
9 |
10 | [tool.poetry.dev-dependencies]
11 | pytest = "^5.2"
12 |
13 | [build-system]
14 | requires = ["poetry-core>=1.0.0"]
15 | build-backend = "poetry.core.masonry.api"
--------------------------------------------------------------------------------
/tests/resources/python-test/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/getyourguide/db-rocket/643d38f762feef8c1356af7c3e9511117327a3f9/tests/resources/python-test/README.md
--------------------------------------------------------------------------------
/tests/resources/python-test/python_test/__init__.py:
--------------------------------------------------------------------------------
1 | __version__ = "0.1.0"
2 |
--------------------------------------------------------------------------------
/tests/resources/python-test/setup.py:
--------------------------------------------------------------------------------
1 | from setuptools import find_packages, setup
2 |
3 | setup(
4 | name="foo",
5 | version="1.0",
6 | packages=find_packages(),
7 | )
8 |
--------------------------------------------------------------------------------
/tests/test_rocket.py:
--------------------------------------------------------------------------------
1 | from rocket.rocket import Rocket
2 |
3 |
4 | def test_create_python_wheel_from_python_project_successful(rocket: Rocket, python_project_path: str):
5 | """
6 | Test if DB Rocket can build a python project
7 | """
8 | wheel_path, wheel_file = rocket._create_python_project_wheel(python_project_path)
9 | assert wheel_file
10 | assert wheel_path
11 |
12 |
13 | def test_create_python_wheel_from_poetry_project_successful(rocket: Rocket, poetry_project_path: str):
14 | """
15 | Test if DB Rocket can build a poetry project
16 | """
17 | wheel_path, wheel_file = rocket._create_python_project_wheel(poetry_project_path)
18 | assert wheel_file
19 | assert wheel_path
20 |
21 |
22 | def test_create_python_wheel_from_temp_folder_raises_exception(rocket: Rocket):
23 | """
24 | Test if DB Rocket will raise an error if project is not a supported project
25 | """
26 | try:
27 | wheel_path, wheel_file = rocket._create_python_project_wheel("/tmp")
28 | except:
29 | assert True
30 |
31 |
--------------------------------------------------------------------------------