├── .coveragerc
├── .github
    ├── ISSUE_TEMPLATE
    │   ├── bug_report.md
    │   └── feature_request.md
    ├── PULL_REQUEST_TEMPLATE.md
    └── workflows
    │   └── onpush.yml
├── .gitignore
├── .pre-commit-config.yaml
├── CODEOWNERS
├── CONTRIBUTING.md
├── CONTRIBUTORS.md
├── Dockerfile
├── LICENSE
├── MANIFEST.in
├── Makefile
├── README.md
├── brickflow
    ├── __init__.py
    ├── bundles
    │   ├── __init__.py
    │   └── model.py
    ├── cli
    │   ├── __init__.py
    │   ├── bundles.py
    │   ├── commands.py
    │   ├── configure.py
    │   ├── constants.py
    │   ├── entrypoint.template
    │   ├── gitignore_template.txt
    │   └── projects.py
    ├── codegen
    │   ├── __init__.py
    │   └── databricks_bundle.py
    ├── context
    │   ├── __init__.py
    │   └── context.py
    ├── engine
    │   ├── __init__.py
    │   ├── compute.py
    │   ├── hooks.py
    │   ├── project.py
    │   ├── task.py
    │   ├── utils.py
    │   └── workflow.py
    ├── hints
    │   ├── __init__.py
    │   ├── hint.py
    │   └── py.typed
    └── resolver
    │   └── __init__.py
├── brickflow_plugins
    ├── __init__.py
    ├── airflow
    │   ├── __init__.py
    │   ├── brickflow_task_plugin.py
    │   ├── context
    │   │   └── __init__.py
    │   ├── cronhelper.py
    │   ├── operators
    │   │   ├── __init__.py
    │   │   ├── external_tasks.py
    │   │   ├── external_tasks_tableau.py
    │   │   └── native_operators.py
    │   └── vendor
    │   │   ├── __init__.py
    │   │   ├── context.py
    │   │   ├── timetable.py
    │   │   └── timezone.py
    ├── databricks
    │   ├── __init__.py
    │   ├── box_operator.py
    │   ├── run_job.py
    │   ├── sla_sensor.py
    │   ├── uc_to_snowflake_operator.py
    │   └── workflow_dependency_sensor.py
    └── secrets
    │   └── __init__.py
├── docs
    ├── api
    │   ├── airflow_external_task_dependency.md
    │   ├── airflow_native_operators.md
    │   ├── airflow_tableau_operators.md
    │   ├── box_operator.md
    │   ├── cli.md
    │   ├── compute.md
    │   ├── context.md
    │   ├── project.md
    │   ├── secrets.md
    │   ├── sla_sensor.md
    │   ├── task.md
    │   ├── uc_to_snowflake_operator.md
    │   ├── workflow.md
    │   └── workflow_dependency_sensor.md
    ├── bundles-quickstart.md
    ├── cli
    │   └── reference.md
    ├── css
    │   └── custom.css
    ├── environment-variables.md
    ├── faq
    │   └── faq.md
    ├── highlevel.md
    ├── how-imports-work.md
    ├── img
    │   ├── bf_logo.png
    │   ├── bf_logo_1.png
    │   ├── maintainance.png
    │   └── workflow.png
    ├── index.md
    ├── projects.md
    ├── tasks.md
    ├── upgrades
    │   └── upgrade-pre-0-10-0-to-0-10-0.md
    └── workflows.md
├── examples
    ├── brickflow_examples
    │   ├── .brickflow-project-root.yml
    │   ├── .gitignore
    │   ├── README.md
    │   ├── __init__.py
    │   ├── brickflow-multi-project.yml
    │   ├── notebooks
    │   │   ├── __init__.py
    │   │   └── example_notebook.py
    │   ├── src
    │   │   ├── __init__.py
    │   │   ├── python
    │   │   │   ├── __init__.py
    │   │   │   ├── lending_data_show.py
    │   │   │   └── setup_data.py
    │   │   └── sql
    │   │   │   └── sample.sql
    │   └── workflows
    │   │   ├── __init__.py
    │   │   ├── demo_wf.py
    │   │   └── entrypoint.py
    ├── brickflow_for_each_task_examples
    │   ├── .brickflow-project-root.yml
    │   ├── README.md
    │   ├── __init__.py
    │   ├── brickflow-multi-project.yml
    │   ├── notebooks
    │   │   ├── __init__.py
    │   │   └── example_notebook.py
    │   ├── src
    │   │   ├── __init__.py
    │   │   └── python
    │   │   │   ├── __init__.py
    │   │   │   └── print_args.py
    │   └── workflows
    │   │   ├── __init__.py
    │   │   ├── entrypoint.py
    │   │   └── for_each_task_wf.py
    └── brickflow_serverless_examples
    │   ├── .brickflow-project-root.yml
    │   ├── .gitignore
    │   ├── README.md
    │   ├── __init__.py
    │   ├── brickflow-multi-project.yml
    │   ├── notebooks
    │       ├── __init__.py
    │       └── example_notebook.py
    │   ├── src
    │       ├── __init__.py
    │       └── python
    │       │   ├── __init__.py
    │       │   └── example.py
    │   └── workflows
    │       ├── __init__.py
    │       ├── demo_serverless_wf.py
    │       └── entrypoint.py
├── mkdocs.yml
├── poetry.lock
├── prospector.yaml
├── pyproject.toml
├── tests
    ├── __init__.py
    ├── airflow_plugins
    │   ├── __init__.py
    │   ├── test_autosys.py
    │   ├── test_tableau.py
    │   └── test_task_dependency.py
    ├── cli
    │   ├── __init__.py
    │   ├── sample_yaml_project
    │   │   ├── .brickflow-project-root.yaml
    │   │   └── brickflow-multi-project.yaml
    │   ├── sample_yml_project
    │   │   ├── .brickflow-project-root.yml
    │   │   └── brickflow-multi-project.yml
    │   ├── test_bundles.py
    │   ├── test_cli.py
    │   └── test_projects.py
    ├── codegen
    │   ├── __init__.py
    │   ├── expected_bundles
    │   │   ├── dev_bundle_monorepo.yml
    │   │   ├── dev_bundle_polyrepo.yml
    │   │   ├── dev_bundle_polyrepo_with_auto_libs.yml
    │   │   ├── local_bundle.yml
    │   │   ├── local_bundle_continuous_schedule.yml
    │   │   ├── local_bundle_foreach_task.yml
    │   │   ├── local_bundle_prefix_suffix.yml
    │   │   └── local_serverless_bundle.yml
    │   ├── sample_serverless_workflow.py
    │   ├── sample_workflows.py
    │   └── test_databricks_bundle.py
    ├── context
    │   ├── __init__.py
    │   └── test_context.py
    ├── databricks_plugins
    │   ├── __init__.py
    │   ├── test_box_operator.py
    │   ├── test_run_job.py
    │   ├── test_sla_sensor.py
    │   ├── test_workflow_dependency_sensor.py
    │   └── test_workflow_task_dependency_sensor.py
    ├── engine
    │   ├── __init__.py
    │   ├── sample_workflow.py
    │   ├── sample_workflow_2.py
    │   ├── test_compute.py
    │   ├── test_engine.py
    │   ├── test_project.py
    │   ├── test_task.py
    │   ├── test_utils.py
    │   └── test_workflow.py
    ├── resolver
    │   └── test_resolver.py
    ├── sample_workflows
    │   ├── __init__.py
    │   ├── sample_workflow_1.py
    │   └── sample_workflow_2.py
    ├── test_brickflow.py
    └── test_plugins.py
└── tools
    ├── README.md
    ├── gen-bundle.sh
    ├── install_databricks_cli.py
    ├── modify_model.py
    └── modify_schema.py


/.coveragerc:
--------------------------------------------------------------------------------
 1 | [run]
 2 | omit =
 3 |     *tests*
 4 |     brickflow/tf/*
 5 |     '*/.local/*',
 6 |     '**',
 7 |     'tests/*',
 8 |     '*/tests/*',
 9 |     # omit anything in a .venv directory anywhere
10 |     '.venv/*',
11 |     "*/site-packages/*"
12 | 
13 | [html]
14 | skip_empty = true
15 | 
16 | [report]
17 | skip_empty = true
18 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug_report.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Bug report
 3 | about: Create a report to help us improve
 4 | title: "[BUG] Please add your bug title here"
 5 | labels: bug
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | **Describe the bug**
11 | A clear and concise description of what the bug is.
12 | 
13 | **To Reproduce**
14 | Steps to reproduce the behavior:
15 | 1. Go to '...'
16 | 2. Click on '....'
17 | 3. Scroll down to '....'
18 | 4. See error
19 | 
20 | **Expected behavior**
21 | A clear and concise description of what you expected to happen.
22 | 
23 | **Screenshots**
24 | If applicable, add screenshots to help explain your problem.
25 | 
26 | **Cloud Information**
27 | <!--- Go over all the following points, and put an `x` in all the boxes that apply. -->
28 | <!--- If you're unsure about any of these, don't hesitate to ask. We're here to help! -->
29 | - [ ] AWS
30 | - [ ] Azure
31 | - [ ] GCP
32 | - [ ] Other
33 | 
34 | **Desktop (please complete the following information):**
35 |  - OS: [e.g. iOS]
36 |  - Browser [e.g. chrome, safari]
37 |  - Version [e.g. 22]
38 | 
39 | **Additional context**
40 | Add any other context about the problem here.
41 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature_request.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Feature request
 3 | about: Suggest an idea for this project
 4 | title: "[FEATURE] Please add your feature request title"
 5 | labels: enhancement
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | **Is your feature request related to a problem? Please describe.**
11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
12 | 
13 | **Cloud Information**
14 | <!--- Go over all the following points, and put an `x` in all the boxes that apply. -->
15 | <!--- If you're unsure about any of these, don't hesitate to ask. We're here to help! -->
16 | - [ ] AWS
17 | - [ ] Azure
18 | - [ ] GCP
19 | - [ ] Other
20 | 
21 | **Describe the solution you'd like**
22 | A clear and concise description of what you want to happen.
23 | 
24 | **Describe alternatives you've considered**
25 | A clear and concise description of any alternative solutions or features you've considered.
26 | 
27 | **Additional context**
28 | Add any other context or screenshots about the feature request here.
29 | 


--------------------------------------------------------------------------------
/.github/PULL_REQUEST_TEMPLATE.md:
--------------------------------------------------------------------------------
 1 | <!--- Provide a general summary of your changes in the Title above -->
 2 | 
 3 | ## Description
 4 | <!--- Describe your changes in detail -->
 5 | 
 6 | ## Related Issue
 7 | <!--- This project only accepts pull requests related to open issues -->
 8 | <!--- If suggesting a new feature or change, please discuss it in an issue first -->
 9 | <!--- If fixing a bug, there should be an issue describing it with steps to reproduce -->
10 | <!--- Please link to the issue here: -->
11 | 
12 | ## Motivation and Context
13 | <!--- Why is this change required? What problem does it solve? -->
14 | 
15 | ## How Has This Been Tested?
16 | <!--- Please describe in detail how you tested your changes. -->
17 | <!--- Include details of your testing environment, and the tests you ran to -->
18 | <!--- see how your change affects other areas of the code, etc. -->
19 | 
20 | ## Screenshots (if appropriate):
21 | 
22 | ## Types of changes
23 | <!--- What types of changes does your code introduce? Put an `x` in all the boxes that apply: -->
24 | - [ ] Bug fix (non-breaking change which fixes an issue)
25 | - [ ] New feature (non-breaking change which adds functionality)
26 | - [ ] Breaking change (fix or feature that would cause existing functionality to change)
27 | 
28 | ## Checklist:
29 | <!--- Go over all the following points, and put an `x` in all the boxes that apply. -->
30 | <!--- If you're unsure about any of these, don't hesitate to ask. We're here to help! -->
31 | - [ ] My code follows the code style of this project.
32 | - [ ] My change requires a change to the documentation.
33 | - [ ] I have updated the documentation accordingly.
34 | - [ ] I have read the **CONTRIBUTING** document.
35 | - [ ] I have added tests to cover my changes.
36 | - [ ] All new and existing tests passed.
37 | 


--------------------------------------------------------------------------------
/.github/workflows/onpush.yml:
--------------------------------------------------------------------------------
  1 | name: build
  2 | 
  3 | on:
  4 |   pull_request:
  5 |     types: [ opened, synchronize ]
  6 |   push:
  7 |     branches: [ main ]
  8 |   release:
  9 |     types: [ created ]
 10 | 
 11 | jobs:
 12 |   test-pipeline:
 13 |     runs-on: ${{ matrix.os }}
 14 |     container:
 15 |       image: python:${{ matrix.python-version }}
 16 |       options: --user 1001 # run as the runner user instead of root
 17 |     strategy:
 18 |       max-parallel: 2
 19 |       matrix:
 20 |         python-version: [ '3.9' ]
 21 |         os: [ ubuntu-latest ]
 22 | 
 23 |     steps:
 24 |       - uses: actions/checkout@v3
 25 | 
 26 |       - name: Set up JDK # used for py4j for cronutils
 27 |         uses: actions/setup-java@v3
 28 |         with:
 29 |           java-version: '8'
 30 |           distribution: 'adopt'
 31 | 
 32 |       - name: Install pip
 33 |         run: python -m pip install --upgrade pip
 34 | 
 35 |       - name: Install and configure Poetry
 36 |         uses: snok/install-poetry@v1
 37 | 
 38 |       - name: Install poetry and build tools
 39 |         run: |
 40 |           export PATH=$PATH:$HOME/.local/bin
 41 |           poetry self add "poetry-dynamic-versioning[plugin]"
 42 | 
 43 |       - name: Install dependencies
 44 |         run: |
 45 |           export PATH=$PATH:$HOME/.local/bin
 46 |           make poetry
 47 | 
 48 |       - name: Install, lint and test
 49 |         run: |
 50 |           export PATH=$PATH:$HOME/.local/bin
 51 |           export GITHUB_ACTIONS=true
 52 |           make cov
 53 | 
 54 |       - name: Publish test coverage
 55 |         uses: codecov/codecov-action@v3
 56 |         with:
 57 |           token: ${{ secrets.CODECOV_TOKEN }}
 58 |           files: coverage.xml
 59 | 
 60 |   deploy:
 61 |     name: Deploy to PyPi
 62 |     runs-on: ${{ matrix.os }}
 63 |     container:
 64 |       image: python:${{ matrix.python-version }}
 65 |       options: --user 1001 # run as the runner user instead of root
 66 |     strategy:
 67 |       max-parallel: 2
 68 |       matrix:
 69 |         python-version: [ '3.9' ]
 70 |         os: [ ubuntu-latest ]
 71 |     needs:
 72 |       - test-pipeline
 73 |     if: github.event_name == 'release'
 74 |     steps:
 75 |       - uses: actions/checkout@v3 # use latest version of the checkout action
 76 | 
 77 |       - name: Set up JDK # used for py4j for cronutils
 78 |         uses: actions/setup-java@v3
 79 |         with:
 80 |           java-version: '8'
 81 |           distribution: 'adopt'
 82 | 
 83 |       - name: Install pip
 84 |         run: python -m pip install --upgrade pip
 85 | 
 86 |       - name: Install and configure Poetry
 87 |         uses: snok/install-poetry@v1
 88 | 
 89 |       - name: Install build tools
 90 |         run: |
 91 |           export PATH=$PATH:$HOME/.local/bin
 92 |           poetry self add "poetry-dynamic-versioning[plugin]"
 93 | 
 94 |       - name: Install dependencies
 95 |         run: |
 96 |           export PATH=$PATH:$HOME/.local/bin
 97 |           make poetry
 98 | 
 99 |       - name: Install wheel and twine
100 |         run: python -m pip install wheel twine
101 | 
102 |       - name: Build and publish
103 |         env:
104 |           TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }}
105 |           TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
106 |         run: |
107 |           export PATH=$PATH:$HOME/.local/bin
108 |           make build
109 |           twine upload dist/*
110 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Created by https://www.toptal.com/developers/gitignore/api/terraform,pycharm+all,macos,windows
  2 | # Edit at https://www.toptal.com/developers/gitignore?templates=terraform,pycharm+all,macos,windows
  3 | 
  4 | ### macOS ###
  5 | # General
  6 | .DS_Store
  7 | .AppleDouble
  8 | .LSOverride
  9 | 
 10 | # Icon must end with two
 11 | Icon
 12 | 
 13 | 
 14 | # Thumbnails
 15 | ._*
 16 | 
 17 | # Files that might appear in the root of a volume
 18 | .DocumentRevisions-V100
 19 | .fseventsd
 20 | .Spotlight-V100
 21 | .TemporaryItems
 22 | .Trashes
 23 | .VolumeIcon.icns
 24 | .com.apple.timemachine.donotpresent
 25 | 
 26 | # Directories potentially created on remote AFP share
 27 | .AppleDB
 28 | .AppleDesktop
 29 | Network Trash Folder
 30 | Temporary Items
 31 | .apdisk
 32 | 
 33 | ### macOS Patch ###
 34 | # iCloud generated files
 35 | *.icloud
 36 | 
 37 | ### PyCharm+all ###
 38 | # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider
 39 | # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
 40 | 
 41 | # User-specific stuff
 42 | .idea/**/workspace.xml
 43 | .idea/**/tasks.xml
 44 | .idea/**/usage.statistics.xml
 45 | .idea/**/dictionaries
 46 | .idea/**/shelf
 47 | 
 48 | # AWS User-specific
 49 | .idea/**/aws.xml
 50 | 
 51 | # Generated files
 52 | .idea/**/contentModel.xml
 53 | 
 54 | # Sensitive or high-churn files
 55 | .idea/**/dataSources/
 56 | .idea/**/dataSources.ids
 57 | .idea/**/dataSources.local.xml
 58 | .idea/**/sqlDataSources.xml
 59 | .idea/**/dynamic.xml
 60 | .idea/**/uiDesigner.xml
 61 | .idea/**/dbnavigator.xml
 62 | 
 63 | # Gradle
 64 | .idea/**/gradle.xml
 65 | .idea/**/libraries
 66 | 
 67 | # Gradle and Maven with auto-import
 68 | # When using Gradle or Maven with auto-import, you should exclude module files,
 69 | # since they will be recreated, and may cause churn.  Uncomment if using
 70 | # auto-import.
 71 | # .idea/artifacts
 72 | # .idea/compiler.xml
 73 | # .idea/jarRepositories.xml
 74 | # .idea/modules.xml
 75 | # .idea/*.iml
 76 | # .idea/modules
 77 | # *.iml
 78 | # *.ipr
 79 | 
 80 | # CMake
 81 | cmake-build-*/
 82 | 
 83 | # Mongo Explorer plugin
 84 | .idea/**/mongoSettings.xml
 85 | 
 86 | # File-based project format
 87 | *.iws
 88 | 
 89 | # IntelliJ
 90 | out/
 91 | 
 92 | # mpeltonen/sbt-idea plugin
 93 | .idea_modules/
 94 | 
 95 | # JIRA plugin
 96 | atlassian-ide-plugin.xml
 97 | 
 98 | # Cursive Clojure plugin
 99 | .idea/replstate.xml
100 | 
101 | # SonarLint plugin
102 | .idea/sonarlint/
103 | 
104 | # Crashlytics plugin (for Android Studio and IntelliJ)
105 | com_crashlytics_export_strings.xml
106 | crashlytics.properties
107 | crashlytics-build.properties
108 | fabric.properties
109 | 
110 | # Editor-based Rest Client
111 | .idea/httpRequests
112 | 
113 | # Android studio 3.1+ serialized cache file
114 | .idea/caches/build_file_checksums.ser
115 | 
116 | ### PyCharm+all Patch ###
117 | # Ignore everything but code style settings and run configurations
118 | # that are supposed to be shared within teams.
119 | 
120 | .idea/*
121 | 
122 | ### Terraform ###
123 | # Local .terraform directories
124 | **/.terraform/*
125 | 
126 | # .tfstate files
127 | *.tfstate
128 | *.tfstate.*
129 | 
130 | # Crash log files
131 | crash.log
132 | crash.*.log
133 | 
134 | # Exclude all .tfvars files, which are likely to contain sensitive data, such as
135 | # password, private keys, and other secrets. These should not be part of version
136 | # control as they are data points which are potentially sensitive and subject
137 | # to change depending on the environment.
138 | *.tfvars
139 | *.tfvars.json
140 | 
141 | # Ignore override files as they are usually used to override resources locally and so
142 | # are not checked in
143 | override.tf
144 | override.tf.json
145 | *_override.tf
146 | *_override.tf.json
147 | 
148 | # Include override files you do wish to add to version control using negated pattern
149 | # !example_override.tf
150 | 
151 | # Include tfplan files to ignore the plan output of command: terraform plan -out=tfplan
152 | # example: *tfplan*
153 | 
154 | # Ignore CLI configuration files
155 | .terraformrc
156 | terraform.rc
157 | 
158 | ### Windows ###
159 | # Windows thumbnail cache files
160 | Thumbs.db
161 | Thumbs.db:encryptable
162 | ehthumbs.db
163 | ehthumbs_vista.db
164 | 
165 | # Dump file
166 | *.stackdump
167 | 
168 | # Folder config file
169 | [Dd]esktop.ini
170 | 
171 | # Recycle Bin used on file shares
172 | $RECYCLE.BIN/
173 | 
174 | # Windows Installer files
175 | *.cab
176 | *.msi
177 | *.msix
178 | *.msm
179 | *.msp
180 | 
181 | # Windows shortcuts
182 | *.lnk
183 | 
184 | # End of https://www.toptal.com/developers/gitignore/api/terraform,pycharm+all,macos,windows
185 | 
186 | # BUILD
187 | 
188 | brickflow.egg-info
189 | .eggs
190 | dist
191 | build
192 | 
193 | # SAMPLES / TESTING
194 | brickflow/sample_dags
195 | main*.py
196 | 
197 | # Coverage related
198 | .coverage
199 | coverage.xml
200 | site
201 | scripts
202 | __pycache__
203 | integration_workflows
204 | 
205 | *venv
206 | 
207 | # VScode
208 | .vscode
209 | 
210 | # GENERATED BY BRICKFLOW CLI --START--
211 | 
212 | ### Terraform ###
213 | # Local .terraform directories
214 | **/.terraform/*
215 | 
216 | # .tfstate files
217 | *.tfstate
218 | *.tfstate.*
219 | 
220 | # Crash log files
221 | crash.log
222 | crash.*.log
223 | 
224 | # Exclude all .tfvars files, which are likely to contain sensitive data, such as
225 | # password, private keys, and other secrets. These should not be part of version
226 | # control as they are data points which are potentially sensitive and subject
227 | # to change depending on the environment.
228 | *.tfvars
229 | *.tfvars.json
230 | 
231 | # Ignore override files as they are usually used to override resources locally and so
232 | # are not checked in
233 | override.tf
234 | override.tf.json
235 | *_override.tf
236 | *_override.tf.json
237 | 
238 | # Include override files you do wish to add to version control using negated pattern
239 | # !example_override.tf
240 | 
241 | # Include tfplan files to ignore the plan output of command: terraform plan -out=tfplan
242 | # example: *tfplan*
243 | 
244 | # Ignore CLI configuration files
245 | .terraformrc
246 | terraform.rc
247 | 
248 | # GENERATED BY BRICKFLOW CLI --END--
249 | 
250 | bundle.yml
251 | 
252 | brickflow/bundles/schema.json
253 | brickflow/bundles/transformed_schema.json
254 | .databricks
255 | cdktf.out


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | repos:
 2 |   - repo: local
 3 |     hooks:
 4 |       - id: make-check
 5 |         name: Running Lint Checks
 6 |         entry: make check
 7 |         language: system
 8 |         files: '\.py$'
 9 |         pass_filenames: false
10 |         always_run: true
11 |         stages: [commit]
12 |       - id: make-cov
13 |         name: Running Lint Checks & Test Suite
14 |         entry: make cov
15 |         language: system
16 |         files: '\.py$'
17 |         pass_filenames: false
18 |         always_run: true
19 |         stages: [push]


--------------------------------------------------------------------------------
/CODEOWNERS:
--------------------------------------------------------------------------------
1 | # This is a comment.
2 | # Each line is a file pattern followed by one or more owners.
3 | 
4 | # These owners will be the default owners for everything in
5 | # the repo. Unless a later match takes precedence,
6 | # @Nike-Inc/brickflow-dev will be requested for
7 | # review when someone opens a pull request.
8 | *       @Nike-Inc/brickflow-dev @asingamaneni @stikkireddy @newfront
9 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
  1 | # How to contribute
  2 | 
  3 | There are a few guidelines that we need contributors to follow so that we are able to process requests as efficiently as possible. If you have any questions or concerns please feel free to contact us at [opensource@nike.com](mailto:opensource@nike.com).
  4 | 
  5 | ## Getting Started
  6 | 
  7 | * Review our [Code of Conduct](https://github.com/Nike-Inc/nike-inc.github.io/blob/master/CONDUCT.md)
  8 | * Submit the [Individual Contributor License Agreement](https://www.clahub.com/agreements/Nike-Inc/fastbreak)
  9 | * Make sure you have a [GitHub account](https://github.com/signup/free)
 10 | * Submit a ticket for your issue, assuming one does not already exist.
 11 |     * Clearly describe the issue including steps to reproduce when it is a bug.
 12 |     * Make sure you fill in the earliest version that you know has the issue.
 13 | * Fork the repository on GitHub
 14 | 
 15 | ## Making Changes
 16 | 
 17 | * Create a feature branch off of `main` before you start your work.
 18 |     * Please avoid working directly on the `main` branch.
 19 | * Setup the required package manager [poetry](#-package-manager)
 20 | * Setup the dev environment [see below](#-dev-environment-setup)
 21 | * Make commits of logical units.
 22 |     * You may be asked to squash unnecessary commits down to logical units.
 23 | * Check for unnecessary whitespace with `git diff --check` before committing.
 24 | * Write meaningful, descriptive commit messages.
 25 | * Please follow existing code conventions when working on a file
 26 | * Make sure to check the standards on the code [see below](#-linting-and-standards)
 27 | * Install java 11 since it's required for unit tests while running 'make tests'
 28 | * Make sure to test the code before you push changes [see below](#-testing)
 29 | 
 30 | ## 🤝 Submitting Changes
 31 | 
 32 | * Push your changes to a topic branch in your fork of the repository.
 33 | * Submit a pull request to the repository in the Nike-Inc organization.
 34 | * After feedback has been given we expect responses within two weeks. After two weeks we may close the pull request 
 35 | if it isn't showing any activity.
 36 | * Bug fixes or features that lack appropriate tests may not be considered for merge.
 37 | * Changes that lower test coverage may not be considered for merge.
 38 | 
 39 | ### 📦 Package manager
 40 | 
 41 | We use `make` for managing different steps of setup and maintenance in the project. You can install make by following
 42 | the instructions [here](https://formulae.brew.sh/formula/make)
 43 | 
 44 | We use `poetry` as our package manager.
 45 | 
 46 | Please DO NOT use pip or conda to install the dependencies. Instead, use poetry:
 47 | 
 48 | ```bash
 49 | make poetry-install
 50 | ```
 51 | 
 52 | ### 📌 Dev Environment Setup
 53 | 
 54 | To ensure our standards, make sure to install the required packages.
 55 | 
 56 | ```bash
 57 | make dev
 58 | ```
 59 | 
 60 | ### 🧹 Linting and Standards
 61 | 
 62 | We use `pylint`, `black` and `mypy` to maintain standards in the codebase
 63 | 
 64 | ```bash
 65 | make check
 66 | ```
 67 | 
 68 | Make sure that the linter does not report any errors or warnings before submitting a pull request.
 69 | 
 70 | ### 🧪 Testing
 71 | 
 72 | We use `pytest` to test our code. You can run the tests by running the following command:
 73 |  
 74 | ```bash
 75 | make test
 76 | ```
 77 | 
 78 | #### 🧪 Integration Testing
 79 | * Once you add a feature or a bug fix in brickflow, create a whl file from your feature branch
 80 |   * run 'poetry build' to generate the whl under the dist folder
 81 | * Install brickflow from the whl file
 82 |   * pip install -whl file path-
 83 | * Upload the whl file to Databricks workspace
 84 |   * Databricks Workspace --> Add --> Library
 85 | * Copy the path of the uploaded whl file and paste it in the entrypoint.py as a Wheel Library
 86 |   * libraries=[
 87 |             WheelTaskLibrary("dbfs:/FileStore/jars/dummy.whl")
 88 |         ],
 89 | * Create a workflow and deploy it to make sure the feature or bug fix works as expected
 90 | 
 91 | Make sure that all tests pass before submitting a pull request.
 92 | 
 93 | ## 🚀 Release Process
 94 | 
 95 | At the moment, the release process is manual. We try to make frequent releases. Usually, we release a new version when we have a new feature or bugfix. A developer with admin rights to the repository will create a new release on GitHub, and then publish the new version to PyPI.
 96 | 
 97 | # Additional Resources
 98 | 
 99 | * [General GitHub documentation](https://help.github.com/)
100 | * [GitHub pull request documentation](https://help.github.com/send-pull-requests/)
101 | * [Nike's Code of Conduct](https://github.com/Nike-Inc/nike-inc.github.io/blob/master/CONDUCT.md)
102 | * [Nike's Individual Contributor License Agreement](https://www.clahub.com/agreements/Nike-Inc/fastbreak)
103 | * [Nike OSS](https://nike-inc.github.io/)


--------------------------------------------------------------------------------
/CONTRIBUTORS.md:
--------------------------------------------------------------------------------
 1 | # Authors
 2 | * [Ashok Singamaneni](https://www.linkedin.com/in/ashok-singamaneni-193b1a32/)
 3 | * [Sriharsha Tikkireddy](https://www.linkedin.com/in/sriharsha-tikkireddy/)
 4 | 
 5 | # Contributors
 6 | Thanks to the contributors who helped on this project apart from the authors
 7 | * [Danny Meijer](https://www.linkedin.com/in/dannydatascientist/)
 8 | * [Pariksheet Marotrao Barapatre](https://www.linkedin.com/in/pari-data-products/)
 9 | * [Bhargav Sangars](https://www.linkedin.com/in/bhargav-sangars-a4b61037/)
10 | * [Brend Braeckmans](https://www.linkedin.com/in/brendbraeckmans/)
11 | * [Rebecca Raj Shree](https://www.linkedin.com/in/rebecca-raj-shree/)
12 | * [Brent (Johnson) Spetner](https://www.linkedin.com/in/brentjohnsoneng/)
13 | * [Dmitrii Grigorev](https://www.linkedin.com/in/dmitrii-grigorev-074739135/)
14 | * [Chanukya Konuganti](https://www.linkedin.com/in/chanukyakonuganti/)
15 | * [Maxim Mityutko](https://www.linkedin.com/in/mityutko/)
16 | * [Raju Gujjalapati](https://in.linkedin.com/in/raju-gujjalapati-470a88171)
17 | * [Madhusudan Koukutla](https://www.linkedin.com/in/madhusudan-reddy/)
18 | * [Surya Teja Jagatha](https://www.linkedin.com/in/surya-teja-jagatha/)
19 | * [Iris Meerman](https://www.linkedin.com/in/iris-meerman-92694675/)
20 | * [Michael Espiritu](https://www.linkedin.com/in/michaelespiritu92/)
21 | * [Riccardo Iacomini](https://www.linkedin.com/in/riccardo-iacomini-b757b6118/)
22 | 
23 | # Honorary Mentions
24 | Thanks to the team below for invaluable insights and support throughout the initial release of this project
25 | 
26 | * [Joe Hollow](https://www.linkedin.com/in/joe-hollow-23088b1/)
27 | * [Aditya Chaturvedi](https://www.linkedin.com/in/chaturvediaditya/)
28 | * [Scott Haines](https://www.linkedin.com/in/scotthaines/)
29 | * [Arijit Banerjee](https://www.linkedin.com/in/massborn/)
30 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM ubuntu:20.04
 2 | USER root
 3 | 
 4 | # DO NOT ADD AS ENV:
 5 | #   debconf noninteractive
 6 | #          This is the anti-frontend. It never interacts with you  at  all,
 7 | #          and  makes  the  default  answers  be used for all questions. It
 8 | #          might mail error messages to root, but that's it;  otherwise  it
 9 | #          is  completely  silent  and  unobtrusive, a perfect frontend for
10 | #          automatic installs. If you are using this front-end, and require
11 | #          non-default  answers  to questions, you will need to preseed the
12 | #          debconf database; see the section below  on  Unattended  Package
13 | #          Installation for more details.
14 | 
15 | RUN echo 'debconf debconf/frontend select Noninteractive' | debconf-set-selections && \
16 |     apt-get update -y && apt-get install -y git curl wget unzip software-properties-common
17 | SHELL ["/bin/bash", "-c"]
18 | 
19 | ENV NODE_VERSION 18.14.0
20 | 
21 | RUN echo 'debconf debconf/frontend select Noninteractive' | debconf-set-selections  \
22 |     && curl https://raw.githubusercontent.com/creationix/nvm/master/install.sh | bash  \
23 |     && . $HOME/.nvm/nvm.sh \
24 |     && nvm install $NODE_VERSION \
25 |     && nvm use $NODE_VERSION \
26 |     && npm install --global cdktf-cli@latest
27 | 
28 | ENV NODE_PATH /root/.nvm/versions/node/v$NODE_VERSION/lib/node_modules
29 | ENV PATH /root/.nvm/versions/node/v$NODE_VERSION/bin:$PATH
30 | ENV NVM_DIR /root/.nvm
31 | 
32 | RUN add-apt-repository ppa:deadsnakes/ppa
33 | RUN apt-get install -y python3.9 python3-pip python3.9-distutils && ln -s /usr/bin/python3.9 /usr/bin/python
34 | 
35 | ARG CACHEBUST=1
36 | 
37 | RUN curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py && \
38 |     python3.9 get-pip.py && \
39 |     ln -s /usr/local/bin/pip3.9 /usr/bin/pip3 && \
40 |     ln -s /usr/local/bin/pip3.9 /usr/bin/pip
41 | 
42 | RUN python -m pip install -U pip && pip install -U setuptools poetry
43 | 
44 | WORKDIR /brickflow
45 | 
46 | COPY . .
47 | 
48 | VOLUME ["/brickflow", "$(pwd)"]
49 | 
50 | RUN poetry install
51 | 
52 | CMD ["/bin/bash"]
53 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | # Include the license file
2 | include LICENSE.txt


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | black-check:
 2 | 	@poetry run black --check .
 3 | 
 4 | fmt:
 5 | 	@poetry run black .
 6 | 
 7 | check: black-check mypy
 8 | 	@poetry run prospector --profile prospector.yaml --no-autodetect
 9 | 
10 | mypy:
11 | 	@poetry run mypy
12 | 
13 | cov: check
14 | 	@poetry run coverage run --source=brickflow --omit "brickflow/sample_dags/*,sample_workflows/*,brickflow/tf/*" -m pytest && \
15 | 	poetry run coverage report -m && \
16 | 	poetry run coverage xml
17 | 
18 | gen-bundle-sdk:
19 | 	@pip install . --force-reinstall
20 | 	@./tools/gen-bundle.sh
21 | 
22 | dev:
23 | 	@poetry install --all-extras --with dev
24 | 	@poetry run pre-commit install
25 | 	@poetry run pre-commit install --hook-type pre-push
26 | 
27 | deploy_env_setup:
28 | 	@poetry install --all-extras --with dev
29 | 
30 | test:
31 | 	@poetry run coverage run --source=brickflow --omit "brickflow/bundles/*,brickflow/sample_dags/*,sample_workflows/*,brickflow/tf/*" -m pytest && \
32 | 	poetry run coverage report -m && \
33 | 	poetry run coverage html
34 | 
35 | clean:
36 | 	@rm -rf dist
37 | 
38 | build: clean
39 | 	@poetry build
40 | 
41 | poetry:
42 | 	@poetry install --all-extras --with dev
43 | 
44 | coverage: check test
45 | 
46 | docs:
47 | 	@poetry run mike deploy -u dev latest
48 | 	@poetry run mike set-default latest
49 | 	@poetry run mike serve
50 | 
51 | deploy-docs:
52 | 	@poetry run mike deploy --push --update-aliases $(version) latest
53 | 
54 | docker-local:
55 | 	docker build -t brickflow:latest --build-arg CACHEBUST="$(shell date +%s)" .
56 | 
57 | poetry-install:
58 | 	@pip install --upgrade setuptools && pip install poetry && poetry self add "poetry-dynamic-versioning[plugin]"
59 | 
60 | get-version:
61 | 	@poetry version
62 | 
63 | requirements:
64 | 	@poetry export -f requirements.txt --output requirements.txt --with dev --without-hashes
65 | 
66 | docker-build:
67 | 	@docker build -t brickflow-local .
68 | 
69 | docker: docker-build
70 | 	@docker run -it -v "$(shell pwd)":/brickflow brickflow-local /bin/bash
71 | 
72 | .PHONY: docs


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Brickflow
  2 | 
  3 | [//]: # ([![CodeQL]&#40;https://github.com/Nike-Inc/brickflow/actions/workflows/codeql-analysis.yml/badge.svg&#41;]&#40;https://github.com/Nike-Inc/brickflow/actions/workflows/codeql-analysis.yml&#41;)
  4 | [![build](https://github.com/Nike-Inc/brickflow/actions/workflows/onpush.yml/badge.svg)](https://github.com/Nike-Inc/brickflow/actions/workflows/onpush.yml)
  5 | [![codecov](https://codecov.io/gh/Nike-Inc/brickflow/branch/main/graph/badge.svg)](https://codecov.io/gh/Nike-Inc/brickflow)
  6 | [![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
  7 | [![Checked with mypy](http://www.mypy-lang.org/static/mypy_badge.svg)](http://mypy-lang.org/)
  8 | [![License](https://img.shields.io/badge/License-Apache_2.0-blue.svg)](https://opensource.org/licenses/Apache-2.0)
  9 | ![PYPI version](https://img.shields.io/pypi/v/brickflows.svg)
 10 | ![PYPI - Downloads](https://static.pepy.tech/badge/brickflows)
 11 | ![PYPI - Python Version](https://img.shields.io/pypi/pyversions/brickflows.svg)
 12 | 
 13 | <p align="center">
 14 | BrickFlow is specifically designed to enable the development of Databricks workflows using Python, streamlining the 
 15 | process through a command-line interface (CLI) tool.</p>
 16 | 
 17 | <p align="center">
 18 | <img src=https://raw.githubusercontent.com/Nike-Inc/brickflow/master/docs/img/bf_logo_1.png width="400" height="400"></p>
 19 | 
 20 | ---
 21 | 
 22 | ### Contributors
 23 | 
 24 | Thanks to all the [contributors](https://github.com/Nike-Inc/brickflow/blob/main/CONTRIBUTORS.md) who have helped ideate, develop and bring Brickflow to its current state. 
 25 | 
 26 | ### Contributing
 27 | 
 28 | We're delighted that you're interested in contributing to our project! To get started, 
 29 | please carefully read and follow the guidelines provided in our [contributing](https://github.com/Nike-Inc/brickflow/blob/main/CONTRIBUTING.md) document.
 30 | 
 31 | ### Documentation
 32 | 
 33 | Brickflow documentation can be found [here](https://engineering.nike.com/brickflow/).
 34 | 
 35 | ### Getting Started
 36 | 
 37 | #### Prerequisites
 38 | 1. Install brickflows
 39 | 
 40 | ```shell
 41 | pip install brickflows
 42 | ```
 43 | 
 44 | 2. Install [Databricks CLI](https://docs.databricks.com/en/dev-tools/cli/databricks-cli.html)
 45 | 
 46 | ```shell
 47 | curl -fsSL https://raw.githubusercontent.com/databricks/setup-cli/main/install.sh | sudo sh
 48 | ```
 49 | 
 50 | 3. Configure Databricks cli with workspace token. This configures your `~/.databrickscfg` file.
 51 | 
 52 | ```shell
 53 | databricks configure --token
 54 | ```
 55 | 
 56 | #### Hello World workflow
 57 | 1. Create your first workflow using brickflow
 58 | ```shell
 59 | mkdir hello-world-brickflow
 60 | cd hello-world-brickflow
 61 | brickflow projects add
 62 | ```
 63 | 
 64 | 2. Provide the following inputs
 65 | ```shell
 66 | Project name: hello-world-brickflow
 67 | Path from repo root to project root (optional) [.]: .
 68 | Path from project root to workflows dir: workflows
 69 | Git https url: https://github.com/Nike-Inc/brickflow.git
 70 | Brickflow version [auto]:<hit enter>
 71 | Spark expectations version [0.5.0]: 0.8.0
 72 | Skip entrypoint [y/N]: N
 73 | ```
 74 | _Note: You can provide your own github repo url._
 75 | 
 76 | 3. Create a new file hello_world_wf.py in the workflows directory
 77 | ```shell
 78 | touch workflows/hello_world_wf.py
 79 | ```
 80 | 
 81 | 4. Copy the following code in hello_world_wf.py file
 82 | ```python
 83 | from brickflow import (
 84 |     ctx,
 85 |     Cluster,
 86 |     Workflow,
 87 |     NotebookTask,
 88 | )
 89 | from airflow.operators.bash import BashOperator
 90 | 
 91 | 
 92 | cluster = Cluster(
 93 |     name="job_cluster",
 94 |     node_type_id="m6gd.xlarge",
 95 |     spark_version="13.3.x-scala2.12",
 96 |     min_workers=1,
 97 |     max_workers=2,
 98 | )
 99 | 
100 | wf = Workflow(
101 |     "hello_world_workflow",
102 |     default_cluster=cluster,
103 |     tags={
104 |         "product_id": "brickflow_demo",
105 |     },
106 |     common_task_parameters={
107 |         "catalog": "<uc-catalog-name>",
108 |         "database": "<uc-schema-name>",
109 |     },
110 | )
111 | 
112 | @wf.task
113 | # this task does nothing but explains the use of context object
114 | def start():
115 |     print(f"Environment: {ctx.env}")
116 | 
117 | @wf.notebook_task
118 | # this task runs a databricks notebook
119 | def example_notebook():
120 |     return NotebookTask(
121 |         notebook_path="notebooks/example_notebook.py",
122 |         base_parameters={
123 |             "some_parameter": "some_value",  # in the notebook access these via dbutils.widgets.get("some_parameter")
124 |         },
125 |     )
126 | 
127 | 
128 | @wf.task(depends_on=[start, example_notebook])
129 | # this task runs a bash command
130 | def list_lending_club_data_files():
131 |     return BashOperator(
132 |         task_id=list_lending_club_data_files.__name__,
133 |         bash_command="ls -lrt /dbfs/databricks-datasets/samples/lending_club/parquet/",
134 |     )
135 | 
136 | @wf.task(depends_on=list_lending_club_data_files)
137 | # this task runs the pyspark code
138 | def lending_data_ingest():
139 |     ctx.spark.sql(
140 |         f"""
141 |         CREATE TABLE IF NOT EXISTS
142 |         {ctx.dbutils_widget_get_or_else(key="catalog", debug="development")}.\
143 |         {ctx.dbutils_widget_get_or_else(key="database", debug="dummy_database")}.\
144 |         {ctx.dbutils_widget_get_or_else(key="brickflow_env", debug="local")}_lending_data_ingest
145 |         USING DELTA -- this is default just for explicit purpose
146 |         SELECT * FROM parquet.`dbfs:/databricks-datasets/samples/lending_club/parquet/`
147 |     """
148 |     )
149 | ```
150 | _Note: Modify the values of catalog/database for common_task_parameters._
151 | 
152 | 
153 | 5. Create a new file example_notebook.py in the notebooks directory
154 | ```shell
155 | mkdir notebooks
156 | touch notebooks/example_notebook.py
157 | ```
158 | 6. Copy the following code in the example_notebook.py file
159 | ```python
160 | # Databricks notebook source
161 | 
162 | print("hello world")
163 | ```
164 | 
165 | #### Deploy the workflow to databricks
166 | ```shell
167 | brickflow projects deploy --project hello-world-brickflow -e local
168 | ```
169 | 
170 | ### Run the demo workflow
171 | 1. Login to databricks workspace
172 | 2. Go to the workflows and select the workflow
173 | <p align="center">
174 | <img src=https://raw.githubusercontent.com/Nike-Inc/brickflow/master/docs/img/workflow.png?raw=true width=1000></p>
175 | 4. click on the run button
176 | 
177 | ### Examples
178 | Refer to the [examples](https://github.com/Nike-Inc/brickflow/tree/main/examples/brickflow_examples) for more examples.
179 | 
180 | 
181 | 


--------------------------------------------------------------------------------
/brickflow/bundles/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nike-Inc/brickflow/44560e2be1d3b27f587d916d9f6ef81b0edade3d/brickflow/bundles/__init__.py


--------------------------------------------------------------------------------
/brickflow/cli/commands.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | import os
 4 | import subprocess
 5 | from typing import Optional, Union, Tuple, List
 6 | 
 7 | from click import ClickException
 8 | 
 9 | from brickflow import _ilog
10 | 
11 | 
12 | def exec_command(
13 |     path_to_executable: str,
14 |     base_command: Optional[str],
15 |     args: Union[Tuple[str] | List[str]],
16 |     capture_output: bool = False,
17 | ) -> Optional[str]:
18 |     os.environ["PYTHONPATH"] = os.getcwd()
19 |     my_env = os.environ.copy()
20 |     try:
21 |         _args = list(args)
22 |         # add a base command if its provided for proxying for brickflow deploy
23 |         if base_command is not None:
24 |             _args = [base_command] + _args
25 |         _ilog.info("Executing command: %s", " ".join([path_to_executable, *_args]))
26 | 
27 |         if capture_output is True:
28 |             res = subprocess.run(
29 |                 [path_to_executable, *_args],
30 |                 check=True,
31 |                 env=my_env,
32 |                 capture_output=True,
33 |                 text=True,
34 |             )
35 |             return res.stdout.strip()
36 | 
37 |         subprocess.run([path_to_executable, *_args], check=True, env=my_env)
38 |     except subprocess.CalledProcessError as e:
39 |         raise ClickException(str(e))
40 | 
41 |     return None
42 | 


--------------------------------------------------------------------------------
/brickflow/cli/configure.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations
  2 | 
  3 | import importlib
  4 | import os
  5 | import re
  6 | import sys
  7 | from pathlib import Path
  8 | from typing import Callable, Any, Optional
  9 | 
 10 | import click
 11 | from jinja2 import Environment, BaseLoader
 12 | 
 13 | from brickflow import _ilog, BrickflowProjectConstants, get_entrypoint_python
 14 | from brickflow.cli.commands import exec_command
 15 | 
 16 | PWD = Path(__file__).parent.absolute()
 17 | GITIGNORE_TEMPLATE = PWD / "gitignore_template.txt"
 18 | GIT_PATH = Path(".git")
 19 | 
 20 | 
 21 | class GitNotFoundError(Exception):
 22 |     pass
 23 | 
 24 | 
 25 | class GitIgnoreNotFoundError(Exception):
 26 |     pass
 27 | 
 28 | 
 29 | def _gitignore_exists() -> bool:
 30 |     return os.path.exists(".gitignore") and os.path.isfile(".gitignore")
 31 | 
 32 | 
 33 | def _create_gitignore_if_not_exists() -> None:
 34 |     if _gitignore_exists() is False:
 35 |         Path(".gitignore").touch(mode=0o755)
 36 | 
 37 | 
 38 | def _get_gitignore() -> str:
 39 |     return Path(".gitignore").read_text(encoding="utf-8")
 40 | 
 41 | 
 42 | def _get_gitignore_template() -> str:
 43 |     return GITIGNORE_TEMPLATE.read_text()
 44 | 
 45 | 
 46 | def _write_gitignore(data: str) -> None:
 47 |     Path(".gitignore").write_text(encoding="utf-8", data=data)
 48 | 
 49 | 
 50 | def _update_gitignore() -> None:
 51 |     search_regex = re.compile(
 52 |         r"(# GENERATED BY BRICKFLOW CLI --START--(.|\n)*# GENERATED BY BRICKFLOW CLI --END--)"
 53 |     )
 54 | 
 55 |     git_ignore_data = _get_gitignore()
 56 |     git_ignore_template = _get_gitignore_template()
 57 |     search = search_regex.findall(git_ignore_data)
 58 |     if len(search) > 0:
 59 |         search_match = search[0][0]
 60 |         gitignore_file_data = git_ignore_data.replace(search_match, git_ignore_template)
 61 |     else:
 62 |         gitignore_file_data = "\n\n".join([git_ignore_data, git_ignore_template])
 63 |     _write_gitignore(gitignore_file_data)
 64 | 
 65 | 
 66 | def _validate_package(path_str: str) -> str:
 67 |     folder_path: Path = Path(path_str)
 68 | 
 69 |     if not folder_path.exists():
 70 |         raise ImportError(f"Invalid pkg error: {folder_path.as_posix()}")
 71 | 
 72 |     sys.path.append(os.getcwd())
 73 |     folder_pkg_path: str = folder_path.as_posix().replace("/", ".")
 74 | 
 75 |     for module in folder_path.glob("**/*.py"):  # only find python files
 76 |         # ignore __init__.py
 77 |         if module.name == "__init__.py":
 78 |             continue
 79 |         module_name = module.as_posix().replace(".py", "").replace("/", ".")
 80 |         # import all the modules into the mod object and not actually import them using __import__
 81 |         mod = importlib.import_module(module_name)
 82 |         click.echo(f"Scanned module: {mod.__name__}")
 83 | 
 84 |     return folder_pkg_path
 85 | 
 86 | 
 87 | def render_template(**kwargs) -> str:  # type: ignore
 88 |     template = Path(__file__).parent.absolute() / "entrypoint.template"
 89 |     with template.open("r") as f:
 90 |         data = f.read()
 91 |         return Environment(loader=BaseLoader()).from_string(data).render(**kwargs)
 92 | 
 93 | 
 94 | def create_entry_point(working_dir: str, data: str) -> None:
 95 |     path = Path(working_dir) / "entrypoint.py"
 96 |     if path.exists():
 97 |         click.echo(f"Path: {str(path.absolute())} already exists...")
 98 |         # path = Path(working_dir) / "entrypoint.py.new"
 99 |     else:
100 |         click.echo(f"Creating file in path: {str(path.absolute())}...")
101 |         path.write_text(data)
102 | 
103 | 
104 | def create_brickflow_project_root_marker() -> None:
105 |     path = Path(
106 |         f"{BrickflowProjectConstants.DEFAULT_MULTI_PROJECT_ROOT_FILE_NAME.value}."
107 |         f"{BrickflowProjectConstants.DEFAULT_CONFIG_FILE_TYPE.value}"
108 |     )
109 |     if path.exists():
110 |         click.echo(f"Path: {str(path.absolute())} already exists...")
111 |         # path = Path(working_dir) / "entrypoint.py.new"
112 |     else:
113 |         click.echo(f"Creating file in path: {str(path.absolute())}...")
114 |         path.write_text(
115 |             "# DO NOT MODIFY THIS FILE - IT IS AUTO GENERATED BY BRICKFLOW AND RESERVED FOR FUTURE USAGE",
116 |             encoding="utf-8",
117 |         )
118 | 
119 | 
120 | def bind_env_var(env_var: str) -> Callable:
121 |     def callback(
122 |         ctx: click.Context,  # noqa
123 |         param: str,  # noqa
124 |         value: Any,
125 |     ) -> None:
126 |         # pylint: disable=unused-argument
127 |         if value is not None and len(value) > 0:
128 |             _ilog.info("Setting env var: %s to %s...", env_var, value)
129 |             if isinstance(value, list):
130 |                 os.environ[env_var] = ",".join(value)
131 |             if isinstance(value, tuple):
132 |                 os.environ[env_var] = ",".join(value)
133 |             elif isinstance(value, bool):
134 |                 os.environ[env_var] = str(value).lower()
135 |             else:
136 |                 os.environ[env_var] = value
137 | 
138 |     return callback
139 | 
140 | 
141 | def get_entrypoint(**kwargs: Any) -> str:
142 |     wd: Optional[str] = kwargs.get("workflows_dir")
143 |     if wd is None:
144 |         raise ValueError(
145 |             "workflows_dir not set, please set it using --workflows-dir or -wd"
146 |         )
147 |     return str(Path(wd) / "entrypoint.py")
148 | 
149 | 
150 | def log_important_versions(bundle_cli: str) -> None:
151 |     version = exec_command(bundle_cli, "--version", [], capture_output=True)
152 |     _ilog.info("Using bundle version: %s", version)
153 |     log_python_version()
154 | 
155 | 
156 | def log_python_version() -> None:
157 |     version = exec_command(
158 |         get_entrypoint_python(), "--version", [], capture_output=True
159 |     )
160 |     _ilog.info("Using python version: %s", version)
161 | 


--------------------------------------------------------------------------------
/brickflow/cli/constants.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | from enum import Enum
 4 | 
 5 | from decouple import config
 6 | 
 7 | from brickflow import BrickflowEnvVars
 8 | 
 9 | 
10 | class BrickflowDeployMode(Enum):
11 |     BUNDLE = "bundle"
12 | 
13 | 
14 | INTERACTIVE_MODE = config(
15 |     BrickflowEnvVars.BRICKFLOW_INTERACTIVE_MODE.value, default=True, cast=bool
16 | )
17 | 


--------------------------------------------------------------------------------
/brickflow/cli/entrypoint.template:
--------------------------------------------------------------------------------
 1 | # Databricks notebook source
 2 | 
 3 | from brickflow import Project, PypiTaskLibrary, MavenTaskLibrary  # make sure brickflow imports are at the top
 4 | 
 5 | import {{ pkg }}
 6 | 
 7 | def main() -> None:
 8 |     """Project entrypoint"""
 9 |     with Project(
10 |         "{{ project_name }}",
11 |         git_repo="{{ git_https_url }}",
12 |         provider="{{ git_provider }}",
13 |         libraries=[
14 |             # PypiTaskLibrary(package="spark-expectations=={{spark_expectations_version}}"), # Uncomment if spark-expectations is needed
15 |         ],
16 |     ) as f:
17 |         f.add_pkg({{pkg}})
18 | 
19 | 
20 | if __name__ == "__main__":
21 |     main()
22 | 
23 | 


--------------------------------------------------------------------------------
/brickflow/cli/gitignore_template.txt:
--------------------------------------------------------------------------------
 1 | # GENERATED BY BRICKFLOW CLI --START--
 2 | 
 3 | ### Terraform ###
 4 | # Local .terraform directories
 5 | **/.terraform/*
 6 | 
 7 | # .tfstate files
 8 | *.tfstate
 9 | *.tfstate.*
10 | 
11 | # Crash log files
12 | crash.log
13 | crash.*.log
14 | 
15 | # Exclude all .tfvars files, which are likely to contain sensitive data, such as
16 | # password, private keys, and other secrets. These should not be part of version
17 | # control as they are data points which are potentially sensitive and subject
18 | # to change depending on the environment.
19 | *.tfvars
20 | *.tfvars.json
21 | 
22 | # Ignore override files as they are usually used to override resources locally and so
23 | # are not checked in
24 | override.tf
25 | override.tf.json
26 | *_override.tf
27 | *_override.tf.json
28 | 
29 | # Include override files you do wish to add to version control using negated pattern
30 | # !example_override.tf
31 | 
32 | # Include tfplan files to ignore the plan output of command: terraform plan -out=tfplan
33 | # example: *tfplan*
34 | 
35 | # Ignore CLI configuration files
36 | .terraformrc
37 | terraform.rc
38 | 
39 | # GENERATED BY BRICKFLOW CLI --END--


--------------------------------------------------------------------------------
/brickflow/codegen/__init__.py:
--------------------------------------------------------------------------------
 1 | import abc
 2 | from enum import Enum
 3 | from pathlib import Path
 4 | 
 5 | from typing import TYPE_CHECKING, Optional, Dict, Any
 6 | 
 7 | from decouple import config
 8 | 
 9 | from brickflow import get_brickflow_version, BrickflowEnvVars, BrickflowDefaultEnvs
10 | 
11 | if TYPE_CHECKING:
12 |     from brickflow.engine.project import _Project
13 | 
14 | 
15 | class CodegenInterface(abc.ABC):
16 |     def __init__(
17 |         self, project: "_Project", id_: str, env: str, **_: Any
18 |     ) -> None:  # noqa
19 |         self.env: str = env
20 |         self.project: "_Project" = project
21 |         self.id_ = id_
22 | 
23 |     @abc.abstractmethod
24 |     def synth(self) -> None:
25 |         pass
26 | 
27 | 
28 | class DatabricksDefaultClusterTagKeys(Enum):
29 |     ENVIRONMENT = "environment"
30 |     DEPLOYED_BY = "deployed_by"
31 |     DEPLOYED_AT = "deployed_at"
32 |     BRICKFLOW_PROJECT_NAME = "brickflow_project_name"
33 |     BRICKFLOW_DEPLOYMENT_MODE = "brickflow_deployment_mode"
34 |     DATABRICKS_TF_PROVIDER_VERSION = "databricks_tf_provider_version"
35 |     BRICKFLOW_VERSION = "brickflow_version"
36 | 
37 | 
38 | BRICKFLOW_BUILTIN_DEPLOY_TAGS = {
39 |     "brickflow_version": get_brickflow_version()
40 |     or "undefined",  # certain scenarios get_brickflow_version maybe None
41 | }
42 | 
43 | 
44 | def get_brickflow_tags(
45 |     user_defined_tags: Optional[Dict[str, str]], other_tags: Dict[str, str]
46 | ) -> Dict[str, str]:
47 |     return {**(user_defined_tags or {}), **other_tags, **BRICKFLOW_BUILTIN_DEPLOY_TAGS}
48 | 
49 | 
50 | def handle_mono_repo_path(project: "_Project", env: str) -> str:
51 |     base_path = config(
52 |         BrickflowEnvVars.BRICKFLOW_MONOREPO_PATH_TO_BUNDLE_ROOT.value, None
53 |     )
54 | 
55 |     if project.entry_point_path is None:
56 |         raise ValueError("project.entry_point_path is None")
57 | 
58 |     if base_path is None or env == BrickflowDefaultEnvs.LOCAL.value:
59 |         return project.entry_point_path
60 |     else:
61 |         return str(Path(base_path) / project.entry_point_path)
62 | 


--------------------------------------------------------------------------------
/brickflow/context/__init__.py:
--------------------------------------------------------------------------------
 1 | from .context import (
 2 |     ctx,
 3 |     Context,
 4 |     BrickflowTaskComs,
 5 |     BRANCH_SKIP_EXCEPT,
 6 |     SKIP_EXCEPT_HACK,
 7 |     RETURN_VALUE_KEY,
 8 |     BrickflowInternalVariables,
 9 |     BrickflowBuiltInTaskVariables,
10 |     BrickflowTaskComsObject,
11 |     TaskComsObjectResult,
12 | )
13 | 
14 | __all__ = [
15 |     "ctx",
16 |     "Context",
17 |     "BrickflowTaskComs",
18 |     "BRANCH_SKIP_EXCEPT",
19 |     "SKIP_EXCEPT_HACK",
20 |     "RETURN_VALUE_KEY",
21 |     "BrickflowInternalVariables",
22 |     "BrickflowBuiltInTaskVariables",
23 |     "BrickflowTaskComsObject",
24 |     "TaskComsObjectResult",
25 | ]
26 | 


--------------------------------------------------------------------------------
/brickflow/engine/__init__.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | import functools
 4 | import logging
 5 | import subprocess
 6 | import sys
 7 | from typing import Callable
 8 | 
 9 | from brickflow import log, get_default_log_handler
10 | 
11 | 
12 | def _call(cmd: str, **kwargs: bool) -> bytes:
13 |     return subprocess.check_output(  # type: ignore
14 |         [
15 |             cmd,
16 |         ],
17 |         **kwargs,
18 |     )
19 | 
20 | 
21 | def get_current_commit() -> str:
22 |     p = _call('git log -n 1 --pretty=format:"%H"', shell=True)
23 |     return p.strip().decode("utf-8")
24 | 
25 | 
26 | def with_brickflow_logger(f: Callable) -> Callable:
27 |     @functools.wraps(f)
28 |     def func(*args, **kwargs):  # type: ignore
29 |         _self = args[0]
30 |         log.handlers = []
31 |         logger_handler = logging.StreamHandler(
32 |             stream=sys.stdout
33 |         )  # Handler for the logger
34 |         # First, generic formatter:
35 |         logger_handler.setFormatter(
36 |             logging.Formatter(
37 |                 f"[%(asctime)s] [%(levelname)s] [brickflow:{_self.name}] "
38 |                 "{%(module)s.py:%(funcName)s:%(lineno)d} - %(message)s"
39 |             )
40 |         )
41 |         log.addHandler(logger_handler)
42 |         resp = f(*args, **kwargs)
43 | 
44 |         log.handlers = [get_default_log_handler()]
45 | 
46 |         return resp
47 | 
48 |     return func
49 | 
50 | 
51 | ROOT_NODE = "root"
52 | 


--------------------------------------------------------------------------------
/brickflow/engine/hooks.py:
--------------------------------------------------------------------------------
 1 | from typing import TYPE_CHECKING
 2 | 
 3 | import pluggy
 4 | 
 5 | if TYPE_CHECKING:
 6 |     from brickflow.engine.task import Task, TaskResponse  # pragma: no cover
 7 |     from brickflow.engine.workflow import Workflow  # pragma: no cover
 8 | 
 9 | BRICKFLOW_TASK_PLUGINS = "brickflow_task_plugins"
10 | 
11 | brickflow_plugin_spec = pluggy.HookspecMarker(BRICKFLOW_TASK_PLUGINS)
12 | 
13 | 
14 | class BrickflowTaskPluginSpec:
15 |     @staticmethod
16 |     def handle_user_result_errors(resp: "TaskResponse") -> None:
17 |         """Custom execute method that is able to be plugged in."""
18 |         if resp.user_code_error is not None:
19 |             original_message = str(resp.user_code_error)
20 |             additional_info = (
21 |                 "BRICKFLOW_USER_OR_DBR_ERROR: This is an error thrown in user code. \n"
22 |                 f"BRICKFLOW_INPUT_ARGS: {resp.input_kwargs}\n"
23 |                 "Original Exception Message: "
24 |             )
25 |             new_message = additional_info + original_message
26 |             resp.user_code_error.args = (new_message,)
27 |             raise resp.user_code_error
28 | 
29 |     @staticmethod
30 |     @brickflow_plugin_spec(firstresult=True)
31 |     def task_execute(task: "Task", workflow: "Workflow") -> "TaskResponse":
32 |         """Custom execute method that is able to be plugged in."""
33 |         raise NotImplementedError("task_execute must be implemented by a plugin")
34 | 
35 |     @staticmethod
36 |     @brickflow_plugin_spec(firstresult=True)
37 |     def handle_results(
38 |         resp: "TaskResponse", task: "Task", workflow: "Workflow"
39 |     ) -> "TaskResponse":
40 |         """Custom execute method that is able to be plugged in."""
41 |         raise NotImplementedError("handle_results must be implemented by a plugin")
42 | 


--------------------------------------------------------------------------------
/brickflow/engine/utils.py:
--------------------------------------------------------------------------------
 1 | import functools
 2 | from typing import Callable, Type, List, Iterator, Union
 3 | import pathlib
 4 | import os
 5 | 
 6 | from pydantic import SecretStr
 7 | from databricks.sdk import WorkspaceClient
 8 | 
 9 | from brickflow.context import ctx
10 | from brickflow.hints import propagate_hint
11 | 
12 | 
13 | @propagate_hint
14 | def wraps_keyerror(error_class: Type[Exception], msg: str) -> Callable:
15 |     def wrapper(f: Callable) -> Callable:
16 |         @functools.wraps(f)
17 |         def func(*args, **kwargs):  # type: ignore
18 |             try:
19 |                 return f(*args, **kwargs)
20 |             except KeyError as e:
21 |                 raise error_class(
22 |                     f"{msg}; err: {str(e)}; args: {args}; kwargs: {kwargs}"
23 |                 )
24 | 
25 |         return func
26 | 
27 |     return wrapper
28 | 
29 | 
30 | def get_properties(some_obj: Type) -> List[str]:
31 |     def _property_iter() -> Iterator[str]:
32 |         for k, v in some_obj.__dict__.items():
33 |             if isinstance(v, property):
34 |                 yield k
35 | 
36 |     return list(_property_iter())
37 | 
38 | 
39 | def get_job_id(
40 |     job_name: str, host: Union[str, None] = None, token: Union[str, SecretStr] = None
41 | ) -> Union[float, None]:
42 |     """
43 |     Get the job id from the specified Databricks workspace for a given job name.
44 | 
45 |     Parameters
46 |     ----------
47 |     job_name: str
48 |         Job name (case-insensitive)
49 |     host: str
50 |         Databricks workspace URL
51 |     token: str
52 |         Databricks API token
53 | 
54 |     Returns
55 |     -------
56 |     str
57 |         Databricks job id
58 |     """
59 |     ctx.log.info("Searching job id for job name: %s", job_name)
60 | 
61 |     if host:
62 |         host = host.rstrip("/")
63 |     token = token.get_secret_value() if isinstance(token, SecretStr) else token
64 | 
65 |     workspace_obj = WorkspaceClient(host=host, token=token)
66 |     jobs_list = workspace_obj.jobs.list(name=job_name)
67 | 
68 |     try:
69 |         for job in jobs_list:
70 |             ctx.log.info("Job id for job '%s' is %s", job_name, job.job_id)
71 |             return job.job_id
72 |         else:  # pylint: disable=useless-else-on-loop
73 |             raise ValueError
74 |     except ValueError:
75 |         raise ValueError(f"No job found with name {job_name}")
76 |     except Exception as e:
77 |         ctx.log.info("An error occurred: %s", e)
78 | 
79 |     return None
80 | 
81 | 
82 | def get_bf_project_root() -> pathlib.Path:
83 |     """Returns the root directory of the current Brickflow project
84 | 
85 |     Parameters:
86 |         _file (str): file path where the function is called
87 | 
88 |     Returns:
89 |         pathlib.Path: Brickflow project root directory
90 |     """
91 |     try:
92 |         _file_name = os.getcwd()
93 |         _project_root = pathlib.Path(_file_name).resolve().parents[0]
94 |         ctx.log.info("Setting Brickflow project root as %s", _project_root)
95 |         return _project_root
96 |     except Exception as e:
97 |         ctx.log.info("An error occurred: %s", e)
98 |         raise e
99 | 


--------------------------------------------------------------------------------
/brickflow/hints/__init__.py:
--------------------------------------------------------------------------------
1 | from brickflow.hints.hint import propagate_hint
2 | 
3 | __all__ = ["propagate_hint"]
4 | 


--------------------------------------------------------------------------------
/brickflow/hints/hint.py:
--------------------------------------------------------------------------------
1 | from typing import Callable
2 | 
3 | 
4 | # propagate type hints for decorated functions
5 | def propagate_hint(decorator: Callable) -> Callable:
6 |     return decorator
7 | 


--------------------------------------------------------------------------------
/brickflow/hints/py.typed:
--------------------------------------------------------------------------------
1 | from typing import Callable
2 | 
3 | def propagate_hint(decorator: Callable) -> Callable: ...


--------------------------------------------------------------------------------
/brickflow/resolver/__init__.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | import inspect
 4 | import os
 5 | import sys
 6 | from pathlib import Path
 7 | from typing import Union, Any, List, Optional
 8 | import pathlib
 9 | 
10 | from brickflow import BrickflowProjectConstants, _ilog, ctx
11 | 
12 | 
13 | def add_to_sys_path(directory: Union[str, pathlib.Path]) -> None:
14 |     dir_str = str(directory)
15 |     if dir_str not in sys.path and os.path.isdir(dir_str):
16 |         sys.path.append(dir_str)
17 | 
18 | 
19 | def get_caller_file_paths() -> List[str]:
20 |     caller_file_paths = []
21 |     frames = inspect.stack()[1:]  # Exclude the current frame
22 | 
23 |     for frame in frames:
24 |         caller_file_paths.append(frame.filename)
25 | 
26 |     return list(set(caller_file_paths))
27 | 
28 | 
29 | class BrickflowRootNotFound(Exception):
30 |     pass
31 | 
32 | 
33 | def go_up_till_brickflow_root(cur_path: str) -> str:
34 |     if cur_path.startswith("<"):
35 |         raise BrickflowRootNotFound("Invalid brickflow root.")
36 | 
37 |     path = pathlib.Path(cur_path).resolve()
38 | 
39 |     valid_roots = [
40 |         f"{BrickflowProjectConstants.DEFAULT_MULTI_PROJECT_ROOT_FILE_NAME.value}."
41 |         f"{BrickflowProjectConstants.DEFAULT_CONFIG_FILE_TYPE.value}",
42 |         f"{BrickflowProjectConstants.DEFAULT_MULTI_PROJECT_CONFIG_FILE_NAME.value}."
43 |         f"{BrickflowProjectConstants.DEFAULT_CONFIG_FILE_TYPE.value}",
44 |     ]
45 | 
46 |     # recurse to see if there is a brickflow root and return the path
47 |     while not path.is_dir() or not any(
48 |         file.name in valid_roots for file in path.iterdir()
49 |     ):
50 |         path = path.parent
51 | 
52 |         if path == path.parent:
53 |             raise BrickflowRootNotFound(
54 |                 "Brickflow root directory not found in path hierarchy."
55 |             )
56 | 
57 |     return str(path.resolve())
58 | 
59 | 
60 | def get_relative_path_to_brickflow_root() -> None:
61 |     paths = get_caller_file_paths()
62 |     _ilog.info("Brickflow setting up python path resolution...")
63 |     # if inside notebook also get that path
64 |     notebook_path = get_notebook_ws_path(ctx.dbutils)
65 |     if notebook_path is not None:
66 |         paths.append(notebook_path)
67 | 
68 |     for path in paths:
69 |         try:
70 |             resolved_path = go_up_till_brickflow_root(path)
71 |             _ilog.info("Brickflow root input path - %s", path)
72 |             _ilog.info("Brickflow root found - %s", resolved_path)
73 |             add_to_sys_path(resolved_path)
74 |             _ilog.info("Sys path set to: %s", str(sys.path))
75 |         except BrickflowRootNotFound:
76 |             _ilog.info("Unable to find for path: %s", path)
77 |         except PermissionError:
78 |             _ilog.info("Most likely not accessible due to shared cluster: %s", path)
79 | 
80 | 
81 | def get_notebook_ws_path(dbutils: Optional[Any]) -> Optional[str]:
82 |     if dbutils is not None:
83 |         return str(
84 |             "/Workspace"
85 |             / Path(
86 |                 dbutils.notebook.entry_point.getDbutils()
87 |                 .notebook()
88 |                 .getContext()
89 |                 .notebookPath()
90 |                 .get()
91 |                 .lstrip("/")
92 |             )
93 |         )
94 |     return None
95 | 


--------------------------------------------------------------------------------
/brickflow_plugins/__init__.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | from typing import List, Optional
 3 | 
 4 | import pluggy
 5 | 
 6 | from brickflow import get_default_log_handler
 7 | 
 8 | 
 9 | def setup_logger():
10 |     _log = logging.getLogger(__name__)  # Logger
11 |     _log.setLevel(logging.INFO)
12 |     logger_handler = get_default_log_handler("brickflow-plugins")
13 |     _log.addHandler(logger_handler)
14 |     _log.propagate = False
15 |     return _log
16 | 
17 | 
18 | log = setup_logger()
19 | 
20 | from brickflow_plugins.airflow.operators.external_tasks import (
21 |     TaskDependencySensor,
22 |     AutosysSensor,
23 |     AirflowProxyOktaClusterAuth,
24 | )
25 | from brickflow_plugins.airflow.operators.external_tasks_tableau import (
26 |     TableauRefreshDataSourceOperator,
27 |     TableauRefreshWorkBookOperator,
28 | )
29 | from brickflow_plugins.airflow.operators.native_operators import (
30 |     BashOperator,
31 |     BranchPythonOperator,
32 |     ShortCircuitOperator,
33 | )
34 | from brickflow_plugins.databricks.workflow_dependency_sensor import (
35 |     WorkflowDependencySensor,
36 |     WorkflowTaskDependencySensor,
37 | )
38 | from brickflow_plugins.databricks.uc_to_snowflake_operator import (
39 |     SnowflakeOperator,
40 |     UcToSnowflakeOperator,
41 | )
42 | from brickflow_plugins.databricks.box_operator import (
43 |     BoxToVolumesOperator,
44 |     VolumesToBoxOperator,
45 |     BoxOperator,
46 | )
47 | from brickflow_plugins.databricks.sla_sensor import SLASensor
48 | 
49 | 
50 | def load_plugins(cache_bust: Optional[pluggy.PluginManager] = None) -> None:
51 |     from brickflow.engine.task import get_plugin_manager
52 |     from brickflow_plugins.airflow.brickflow_task_plugin import (
53 |         AirflowOperatorBrickflowTaskPluginImpl,
54 |     )
55 | 
56 |     if cache_bust is not None:
57 |         cache_bust.register(
58 |             AirflowOperatorBrickflowTaskPluginImpl(), name="airflow-plugin"
59 |         )
60 |         return
61 | 
62 |     get_plugin_manager().register(AirflowOperatorBrickflowTaskPluginImpl())
63 | 
64 | 
65 | def ensure_installation():
66 |     """Ensures that the brickflow_plugins package is installed in the current environment."""
67 |     from brickflow_plugins.airflow.cronhelper import cron_helper  # noqa
68 |     import airflow  # noqa
69 | 
70 | 
71 | __all__: List[str] = [
72 |     "TaskDependencySensor",
73 |     "AutosysSensor",
74 |     "AirflowProxyOktaClusterAuth",
75 |     "BashOperator",
76 |     "BranchPythonOperator",
77 |     "ShortCircuitOperator",
78 |     "WorkflowDependencySensor",
79 |     "WorkflowTaskDependencySensor",
80 |     "SnowflakeOperator",
81 |     "UcToSnowflakeOperator",
82 |     "TableauRefreshDataSourceOperator",
83 |     "TableauRefreshWorkBookOperator",
84 |     "BoxToVolumesOperator",
85 |     "VolumesToBoxOperator",
86 |     "BoxOperator",
87 |     "SLASensor",
88 |     "load_plugins",
89 |     "ensure_installation",
90 | ]
91 | 


--------------------------------------------------------------------------------
/brickflow_plugins/airflow/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nike-Inc/brickflow/44560e2be1d3b27f587d916d9f6ef81b0edade3d/brickflow_plugins/airflow/__init__.py


--------------------------------------------------------------------------------
/brickflow_plugins/airflow/brickflow_task_plugin.py:
--------------------------------------------------------------------------------
 1 | from typing import Optional
 2 | import datetime
 3 | import pendulum
 4 | 
 5 | try:
 6 |     from airflow import macros
 7 |     from airflow.models import BaseOperator
 8 |     from airflow.utils.context import Context
 9 | except ImportError:
10 |     raise ImportError(
11 |         "You must install airflow to use airflow plugins, "
12 |         "please try pip install brickflow[apache-airflow]"
13 |     )
14 | 
15 | from jinja2 import Environment
16 | from brickflow.context import ctx
17 | from brickflow.engine.hooks import BrickflowTaskPluginSpec
18 | from brickflow.engine.task import brickflow_task_plugin_impl, Task, TaskResponse
19 | from brickflow.engine.workflow import Workflow
20 | 
21 | from brickflow_plugins import log
22 | from brickflow_plugins.airflow.context import get_task_context
23 | from brickflow_plugins.airflow.operators import get_modifier_chain
24 | from brickflow_plugins.secrets import BrickflowSecretsBackend
25 | 
26 | 
27 | def epoch_to_pendulum_datetime(epoch_str: Optional[str]):
28 |     if epoch_str is None:
29 |         return None
30 |     return pendulum.instance(datetime.datetime.fromtimestamp(int(epoch_str) / 1000))
31 | 
32 | 
33 | class AirflowOperatorBrickflowTaskPluginImpl(BrickflowTaskPluginSpec):
34 |     @staticmethod
35 |     @brickflow_task_plugin_impl(tryfirst=True)
36 |     def handle_results(
37 |         resp: "TaskResponse", task: "Task", workflow: "Workflow"
38 |     ) -> "TaskResponse":
39 |         log.info(
40 |             "using AirflowOperatorBrickflowTaskPlugin for handling results for task: %s",
41 |             task.task_id,
42 |         )
43 | 
44 |         BrickflowTaskPluginSpec.handle_user_result_errors(resp)
45 | 
46 |         _operator = resp.response
47 | 
48 |         if not isinstance(_operator, BaseOperator):
49 |             return resp
50 | 
51 |         operator_modifier_chain = get_modifier_chain()
52 |         # modify any functionality of operators and then
53 |         _operator = operator_modifier_chain.modify(_operator, task, workflow)
54 | 
55 |         if hasattr(_operator, "log"):
56 |             # overwrite the operator logger if it has one to the brickflow logger
57 |             setattr(_operator, "_log", ctx.log)
58 | 
59 |         context: Context = get_task_context(
60 |             task.task_id,
61 |             _operator,
62 |             workflow.schedule_quartz_expression,
63 |             epoch_to_pendulum_datetime(ctx.start_time(debug=None)),
64 |             tz=workflow.timezone,
65 |         )
66 | 
67 |         env: Optional[Environment] = Environment()
68 |         env.globals.update({"macros": macros, "ti": context})
69 |         with BrickflowSecretsBackend():
70 |             _operator.render_template_fields(context, jinja_env=env)
71 |             op_resp = _operator.execute(context)
72 |             return TaskResponse(
73 |                 response=op_resp,
74 |                 push_return_value=_operator.do_xcom_push,
75 |             )
76 | 


--------------------------------------------------------------------------------
/brickflow_plugins/airflow/context/__init__.py:
--------------------------------------------------------------------------------
 1 | from typing import Optional
 2 | 
 3 | try:
 4 |     from airflow.models import BaseOperator
 5 |     from airflow.utils.context import Context
 6 | except ImportError:
 7 |     raise ImportError(
 8 |         "You must install airflow to use airflow plugins, "
 9 |         "please try pip install brickflow[apache-airflow]"
10 |     )
11 | 
12 | from pendulum import DateTime
13 | from brickflow.context import ctx, RETURN_VALUE_KEY
14 | from brickflow_plugins.airflow.cronhelper import cron_helper
15 | from brickflow_plugins.airflow.vendor.timetable import create_timetable
16 | from brickflow_plugins.airflow.vendor.timezone import TIMEZONE
17 | 
18 | 
19 | class CrossDagXComsNotSupportedError(Exception):
20 |     pass
21 | 
22 | 
23 | class XComsPullMultipleTaskIdsError(Exception):
24 |     pass
25 | 
26 | 
27 | class FakeTaskInstance(object):
28 |     def __init__(
29 |         self,
30 |         task_id: str,
31 |         operator: BaseOperator,
32 |         execution_date: str,
33 |     ):
34 |         self._operator = operator
35 |         self._execution_date = execution_date
36 |         self._task_id = task_id
37 | 
38 |     def xcom_push(self, key, value):
39 |         ctx.task_coms.put(task_id=self._task_id, key=key, value=value)
40 | 
41 |     def xcom_pull(self, task_ids, key=RETURN_VALUE_KEY, dag_id=None):
42 |         if dag_id is not None:
43 |             raise CrossDagXComsNotSupportedError(
44 |                 "Cross dag xcoms not supported in framework raise feature request."
45 |             )
46 |         if isinstance(task_ids, list) and len(task_ids) > 1:
47 |             raise XComsPullMultipleTaskIdsError(
48 |                 "Currently xcoms pull only supports one task_id please raise feature "
49 |                 "request."
50 |             )
51 |         task_id = task_ids[0] if isinstance(task_ids, list) else task_ids
52 |         return ctx.task_coms.get(task_id, key)
53 | 
54 |     @property
55 |     def execution_date(self):
56 |         return self._execution_date
57 | 
58 |     @property
59 |     def operator(self):
60 |         return self._operator
61 | 
62 | 
63 | def execution_timestamp(
64 |     quartz_cron_statement: Optional[str] = None,
65 |     ts: Optional[DateTime] = None,
66 |     tz=TIMEZONE,
67 | ) -> DateTime:
68 |     if quartz_cron_statement is None:
69 |         return DateTime.utcnow()
70 |     if ts is None:
71 |         ts = DateTime.utcnow()
72 |     cron = cron_helper.quartz_to_unix(quartz_cron_statement)
73 |     tt = create_timetable(cron, tz)
74 |     return tt.align_to_prev(ts)
75 | 
76 | 
77 | def get_task_context(
78 |     task_id, operator: BaseOperator, quartz_cron_statement, ts, tz=TIMEZONE
79 | ) -> Context:
80 |     execution_ts = execution_timestamp(quartz_cron_statement, ts, tz)
81 |     return Context(
82 |         **{
83 |             "execution_date": str(execution_ts),
84 |             "ds": execution_ts.strftime("%Y-%m-%d"),
85 |             "ds_nodash": execution_ts.strftime("%Y%m%d"),
86 |             "ts": str(execution_ts),
87 |             "ts_nodash": execution_ts.strftime("%Y%m%d%H%M%S"),
88 |             "ti": FakeTaskInstance(task_id, operator, str(execution_ts)),
89 |         }
90 |     )
91 | 


--------------------------------------------------------------------------------
/brickflow_plugins/airflow/cronhelper.py:
--------------------------------------------------------------------------------
  1 | import re
  2 | import functools
  3 | 
  4 | from brickflow_plugins import log
  5 | 
  6 | 
  7 | class CronHelper:
  8 |     EVERY_X_UNITS_REPLACE_PLACEHOLDER = "%s"
  9 |     QUARTZ_EVERY_X_UNITS_REGEX = re.compile(r"^0/(\d+)$")  # For handling 0/5 units
 10 |     UNIX_EVERY_X_UNITS_REGEX = re.compile(r"^\*/(\d+)$")  # For handling */5 units
 11 |     QUARTZ_EVERY_X_UNITS_REPLACE_PATTERN = f"0/{EVERY_X_UNITS_REPLACE_PLACEHOLDER}"
 12 |     UNIX_EVERY_X_UNITS_REPLACE_PATTERN = f"*/{EVERY_X_UNITS_REPLACE_PLACEHOLDER}"
 13 | 
 14 |     @staticmethod
 15 |     def __get_expression_parts(expression: str) -> list:
 16 |         parts = [part.strip() for part in expression.split(" ")]
 17 | 
 18 |         # Unix cron expression have 5 parts, Quartz cron expression have 6 or 7 parts
 19 |         if len(parts) in [5, 7]:
 20 |             return parts
 21 |         # Year is an optional part in Quartz cron expression, adding the extra element to mimic 7 part Quartz expression
 22 |         if len(parts) == 6:
 23 |             parts.append("*")
 24 |             return parts
 25 | 
 26 |         raise ValueError("Invalid cron expression!")
 27 | 
 28 |     @staticmethod
 29 |     def convert_interval_parts(part: str, is_quartz: bool = False) -> str:
 30 |         every_x_units_pattern = (
 31 |             CronHelper.QUARTZ_EVERY_X_UNITS_REGEX
 32 |             if is_quartz
 33 |             else CronHelper.UNIX_EVERY_X_UNITS_REGEX
 34 |         )
 35 |         matches = every_x_units_pattern.match(part)
 36 |         every_x_units_replace_pattern = (
 37 |             CronHelper.QUARTZ_EVERY_X_UNITS_REPLACE_PATTERN
 38 |             if is_quartz
 39 |             else CronHelper.UNIX_EVERY_X_UNITS_REPLACE_PATTERN
 40 |         )
 41 | 
 42 |         if matches:
 43 |             return every_x_units_replace_pattern.replace(
 44 |                 CronHelper.EVERY_X_UNITS_REPLACE_PLACEHOLDER, matches.group(1)
 45 |             )
 46 | 
 47 |         return part
 48 | 
 49 |     @functools.lru_cache(maxsize=128)  # cron expression conversion will not change
 50 |     def unix_to_quartz(self, unix_cron: str) -> str:
 51 |         parts = self.__get_expression_parts(expression=unix_cron)
 52 | 
 53 |         if len(parts) != 5:
 54 |             raise ValueError("Invalid Unix cron expression")
 55 | 
 56 |         minute, hour, dom, month, dow = map(self.convert_interval_parts, parts)
 57 | 
 58 |         # Converting Unix DOW to Quartz DOW
 59 |         def shift_days(day: str) -> str:
 60 |             """
 61 |             Quartz DOW starts from 1 (Sunday) while Unix DOW starts from 0 (Sunday)
 62 |             """
 63 |             if "-" in day:
 64 |                 return "-".join([shift_days(day=d) for d in day.split("-")])
 65 | 
 66 |             # Unix cron Sunday can be represented as 0 or 7, but only as 1 in Quartz cron
 67 |             if day in ["0", "7"]:
 68 |                 return "1"
 69 |             if day in ["SUN", "MON", "TUE", "WED", "THU", "FRI", "SAT"]:
 70 |                 return day
 71 |             return str(int(day) + 1)
 72 | 
 73 |         if "," in dow:
 74 |             quartz_dow = ",".join([shift_days(day=day) for day in dow.split(",")])
 75 |         elif dow == "*":
 76 |             quartz_dow = dow
 77 |         else:
 78 |             quartz_dow = shift_days(day=dow)
 79 | 
 80 |         quartz_dom = dom
 81 | 
 82 |         if dom != "*" and dow == "*":
 83 |             quartz_dow = "?"
 84 |         elif dom == "*":
 85 |             quartz_dom = "?"
 86 | 
 87 |         quartz_cron = f"0 {minute} {hour} {quartz_dom} {month} {quartz_dow} *"
 88 |         log.info("Converted unix cron %s to quartz cron %s", unix_cron, quartz_cron)
 89 |         return quartz_cron
 90 | 
 91 |     @functools.lru_cache(maxsize=128)  # cron expression conversion will not change
 92 |     def quartz_to_unix(self, quartz_cron: str) -> str:
 93 |         parts = self.__get_expression_parts(expression=quartz_cron)
 94 | 
 95 |         if len(parts) != 7:
 96 |             raise ValueError("Invalid Quartz cron expression")
 97 | 
 98 |         if "L" in quartz_cron or "W" in quartz_cron or "#" in quartz_cron:
 99 |             raise ValueError("Support for 'L, W, #' in Quartz cron is not implemented")
100 | 
101 |         # Unix cron expression does not support '?'
102 |         parts = [part.replace("?", "*") for part in parts]
103 | 
104 |         _, minute, hour, dom, month, dow, _ = map(
105 |             lambda part: self.convert_interval_parts(part, True), parts
106 |         )
107 | 
108 |         # Converting Quartz DOW to Unix DOW
109 |         def shift_days(day: str) -> str:
110 |             """
111 |             Quartz DOW starts from 1 (Sunday) while Unix DOW starts from 0 (Sunday)
112 |             """
113 |             if "-" in day:
114 |                 return "-".join([shift_days(day=d) for d in day.split("-")])
115 |             if day in ["SUN", "MON", "TUE", "WED", "THU", "FRI", "SAT"]:
116 |                 return day
117 | 
118 |             return str(int(day) - 1)
119 | 
120 |         if "," in dow:
121 |             unix_dow = ",".join([shift_days(day=day) for day in dow.split(",")])
122 |         elif dow == "*":
123 |             unix_dow = "*"
124 |         else:
125 |             unix_dow = shift_days(day=dow)
126 | 
127 |         unix_dom = dom
128 | 
129 |         unix_cron = f"{minute} {hour} {unix_dom} {month} {unix_dow}"
130 |         log.info("Converted quartz cron %s to unix cron %s", quartz_cron, unix_cron)
131 |         return unix_cron
132 | 
133 | 
134 | cron_helper = CronHelper()
135 | 


--------------------------------------------------------------------------------
/brickflow_plugins/airflow/operators/__init__.py:
--------------------------------------------------------------------------------
  1 | import functools
  2 | import os
  3 | from abc import abstractmethod, ABCMeta
  4 | from typing import Optional
  5 | 
  6 | try:
  7 |     from airflow.models import BaseOperator, Pool
  8 |     from airflow.utils.weight_rule import WeightRule
  9 | except ImportError:
 10 |     raise ImportError(
 11 |         "You must install airflow to use airflow plugins, "
 12 |         "please try pip install brickflow[apache-airflow]"
 13 |     )
 14 | 
 15 | from brickflow.engine.task import Task
 16 | from brickflow.engine.workflow import Workflow
 17 | 
 18 | 
 19 | class AirflowTaskDoesNotExistError(Exception):
 20 |     pass
 21 | 
 22 | 
 23 | class UnsupportedAirflowTaskFieldError(Exception):
 24 |     pass
 25 | 
 26 | 
 27 | class UnsupportedAirflowOperatorError(Exception):
 28 |     pass
 29 | 
 30 | 
 31 | class AbstractOperatorModifier(metaclass=ABCMeta):
 32 |     @abstractmethod
 33 |     def set_next(
 34 |         self, op_handler: "AbstractOperatorModifier"
 35 |     ) -> "AbstractOperatorModifier":
 36 |         pass
 37 | 
 38 |     @abstractmethod
 39 |     def modify(
 40 |         self, operator: BaseOperator, task: Task, workflow: Workflow
 41 |     ) -> "BaseOperator":
 42 |         pass
 43 | 
 44 | 
 45 | class OperatorModifier(AbstractOperatorModifier):
 46 |     def __init__(self):
 47 |         self._next_handler: Optional[AbstractOperatorModifier] = None
 48 | 
 49 |     def set_next(
 50 |         self, op_handler: "AbstractOperatorModifier"
 51 |     ) -> "AbstractOperatorModifier":
 52 |         self._next_handler = op_handler
 53 |         return op_handler
 54 | 
 55 |     @abstractmethod
 56 |     def modify(
 57 |         self, operator: BaseOperator, task: Task, workflow: Workflow
 58 |     ) -> Optional["BaseOperator"]:
 59 |         if self._next_handler is not None:
 60 |             return self._next_handler.modify(operator, task, workflow)
 61 | 
 62 |         return None
 63 | 
 64 | 
 65 | class InvalidFieldChecker(OperatorModifier):
 66 |     UNSUPPORTED_TASK_NONE_FIELDS = {
 67 |         "email_on_retry": True,
 68 |         "email_on_failure": True,
 69 |         "sla": None,
 70 |         "execution_timeout": None,
 71 |         "on_failure_callback": None,
 72 |         "on_success_callback": None,
 73 |         "on_retry_callback": None,
 74 |         "inlets": [],
 75 |         "outlets": [],
 76 |         "task_concurrency": None,
 77 |         "max_active_tis_per_dag": None,
 78 |         "run_as_user": None,
 79 |         "depends_on_past": False,
 80 |         "wait_for_downstream": False,
 81 |         "max_retry_delay": None,
 82 |         "priority_weight": 1,
 83 |         "weight_rule": WeightRule.DOWNSTREAM,
 84 |         "pool": Pool.DEFAULT_POOL_NAME,
 85 |         "pool_slots": 1,
 86 |         "resources": None,
 87 |         "executor_config": {},
 88 |         "email": None,
 89 |     }
 90 | 
 91 |     def _validate_task_fields(self, operator: BaseOperator, task: Task) -> None:
 92 |         unsupported_fields = []
 93 |         for field, default_value in self.UNSUPPORTED_TASK_NONE_FIELDS.items():
 94 |             if hasattr(operator, field) is False:
 95 |                 continue
 96 |             value = getattr(operator, field)
 97 |             if value != default_value:
 98 |                 unsupported_fields.append(field)
 99 |         if unsupported_fields:
100 |             raise UnsupportedAirflowTaskFieldError(
101 |                 f"Unsupported fields: {unsupported_fields} for task: {task.task_id}"
102 |             )
103 | 
104 |     def modify(
105 |         self, operator: BaseOperator, task: Task, workflow: Workflow
106 |     ) -> Optional["BaseOperator"]:
107 |         if isinstance(operator, BaseOperator):
108 |             self._validate_task_fields(operator, task)
109 |             return super().modify(operator, task, workflow)
110 | 
111 | 
112 | class CatchAllOperatorModifier(OperatorModifier):
113 |     SUPPORTED_OPERATORS = [
114 |         "BranchPythonOperator",
115 |         "PythonOperator",
116 |         "BashOperator",
117 |         "ShortCircuitOperator",
118 |         "TaskDependencySensor",
119 |         "AutosysSensor",
120 |         "TableauRefreshDataSourceOperator",
121 |         "TableauRefreshWorkBookOperator",
122 |     ]
123 | 
124 |     def _validate_operators(self, operator: BaseOperator, task: Task) -> None:
125 |         if (
126 |             issubclass(operator.__class__, BaseOperator)
127 |             and operator.__class__.__name__ in self.SUPPORTED_OPERATORS
128 |         ):
129 |             return
130 |         raise UnsupportedAirflowOperatorError(
131 |             f"Unsupported airflow operator: {type(task)} for task: {task.task_id}"
132 |         )
133 | 
134 |     def modify(
135 |         self, operator: BaseOperator, task: Task, workflow: Workflow
136 |     ) -> Optional["BaseOperator"]:
137 |         if isinstance(operator, BaseOperator):
138 |             self._validate_operators(operator, task)
139 |             return operator
140 | 
141 | 
142 | def get_modifier_chain():
143 |     from brickflow_plugins.airflow import operators
144 |     import importlib
145 |     import inspect
146 | 
147 |     start_chain = InvalidFieldChecker()
148 |     next_node = start_chain
149 |     pkg = operators
150 |     file_name = pkg.__file__
151 |     for module in os.listdir(os.path.dirname(file_name)):
152 |         # only find python files and ignore __init__.py
153 |         if module == "__init__.py" or module[-3:] != ".py":
154 |             continue
155 |         module_name = module.replace(".py", "")
156 |         # import all the modules into the mod object and not actually import them using __import__
157 |         mod = importlib.import_module(f"{pkg.__name__}.{module_name}")
158 |         for obj in dir(mod):
159 |             module_item = getattr(mod, obj)
160 |             # if issubclass(module_item, OperatorModifier):
161 |             if (
162 |                 inspect.isclass(module_item)
163 |                 and module_item != operators.OperatorModifier
164 |                 and issubclass(module_item, operators.OperatorModifier)
165 |             ):
166 |                 # print(module_item)
167 |                 next_node = next_node.set_next(module_item())
168 | 
169 |     next_node.set_next(CatchAllOperatorModifier())
170 |     return start_chain
171 | 
172 | 
173 | def check_if(klass):
174 |     def outer(f):
175 |         @functools.wraps(f)
176 |         def inner(*args, **kwargs) -> Optional["BaseOperator"]:
177 |             self, operator = args[0], args[1]
178 |             super_func = getattr(super(type(self), self), f.__name__)
179 |             if not isinstance(operator, klass):
180 |                 # super function won't accept self
181 |                 # this is to go along the chain
182 |                 return super_func(*args[1:], **kwargs)
183 |             return f(*args, **kwargs)
184 | 
185 |         return inner
186 | 
187 |     return outer
188 | 


--------------------------------------------------------------------------------
/brickflow_plugins/airflow/operators/native_operators.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import subprocess
  3 | import sys
  4 | import tempfile
  5 | import time
  6 | import types
  7 | from typing import Optional
  8 | 
  9 | from airflow.operators.bash import BashOperator
 10 | from airflow.operators.python import BranchPythonOperator, ShortCircuitOperator
 11 | 
 12 | from brickflow.context import BRANCH_SKIP_EXCEPT, SKIP_EXCEPT_HACK
 13 | from brickflow.engine.task import Task
 14 | from brickflow.engine.workflow import Workflow
 15 | from brickflow_plugins import log
 16 | from brickflow_plugins.airflow.operators import OperatorModifier, check_if
 17 | 
 18 | 
 19 | def _bash_execute(self, context):  # pylint:disable=unused-argument
 20 |     p = None
 21 |     returncode = None
 22 |     start = time.time()
 23 |     env = self.env
 24 |     if env is None:
 25 |         env = os.environ.copy()
 26 | 
 27 |     # log.info("Command: %s", self.bash_command)
 28 | 
 29 |     with tempfile.TemporaryDirectory(prefix="airflowtmp") as tmp_dir:
 30 |         try:
 31 |             p = subprocess.Popen(  # pylint:disable=consider-using-with
 32 |                 self.bash_command,
 33 |                 shell=True,
 34 |                 cwd=tmp_dir,
 35 |                 executable="/bin/bash",
 36 |                 stderr=subprocess.STDOUT,
 37 |                 stdout=subprocess.PIPE,
 38 |                 universal_newlines=True,
 39 |                 env=env,
 40 |             )
 41 |             for line in iter(p.stdout.readline, ""):
 42 |                 resp = line
 43 |                 log.info("[STDOUT]: %s", line.rstrip())
 44 |             returncode = p.wait()
 45 |             p = None
 46 |             sys.stdout.flush()
 47 |             if returncode != 0:
 48 |                 raise subprocess.CalledProcessError(returncode, self.bash_command)
 49 |         finally:
 50 |             end = time.time()
 51 |             if p is not None:
 52 |                 p.terminate()
 53 |                 p.wait()
 54 |             log.info("Command: exited with return code %s", returncode)
 55 |             log.info("Command took %s seconds", end - start)
 56 | 
 57 |         if self.do_xcom_push is True:
 58 |             return resp[:-1]  # skip newline char at end
 59 |         return
 60 | 
 61 | 
 62 | def _bash_empty_on_kill(self):  # pylint:disable=unused-argument
 63 |     pass
 64 | 
 65 | 
 66 | def _skip_all_except(
 67 |     self, ti: "FakeTaskInstance", branch_task_ids
 68 | ):  # pylint:disable=unused-argument
 69 |     log.info("Skipping all tasks except: %s", branch_task_ids)
 70 |     ti.xcom_push(BRANCH_SKIP_EXCEPT, branch_task_ids)
 71 | 
 72 | 
 73 | def _short_circuit_execute(self, context):
 74 |     condition = super(ShortCircuitOperator, self).execute(context)
 75 |     log.info("Condition result is %s", condition)
 76 | 
 77 |     if condition:
 78 |         log.info("Proceeding with downstream tasks...")
 79 |         return
 80 | 
 81 |     # log
 82 |     log.info("Skipping downstream tasks...")
 83 |     ti = context["ti"]
 84 |     ti.xcom_push(BRANCH_SKIP_EXCEPT, SKIP_EXCEPT_HACK)
 85 | 
 86 | 
 87 | class BashOperatorModifier(OperatorModifier):
 88 |     @check_if(BashOperator)
 89 |     def modify(
 90 |         self, operator: BashOperator, task: Task, workflow: Workflow
 91 |     ) -> Optional["BashOperator"]:
 92 |         f = types.MethodType(_bash_execute, operator)
 93 |         operator.execute = f
 94 |         operator.on_kill = _bash_empty_on_kill
 95 |         return operator
 96 | 
 97 | 
 98 | class BranchPythonOperatorModifier(OperatorModifier):
 99 |     @check_if(BranchPythonOperator)
100 |     def modify(
101 |         self, operator: BranchPythonOperator, task: Task, workflow: Workflow
102 |     ) -> Optional["BranchPythonOperator"]:
103 |         f = types.MethodType(_skip_all_except, operator)
104 |         operator.skip_all_except = f
105 |         return operator
106 | 
107 | 
108 | class ShortCircuitOperatorModifier(OperatorModifier):
109 |     @check_if(ShortCircuitOperator)
110 |     def modify(
111 |         self, operator: ShortCircuitOperator, task: Task, workflow: Workflow
112 |     ) -> Optional["ShortCircuitOperator"]:
113 |         f = types.MethodType(_short_circuit_execute, operator)
114 |         operator.execute = f
115 |         return operator
116 | 


--------------------------------------------------------------------------------
/brickflow_plugins/airflow/vendor/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nike-Inc/brickflow/44560e2be1d3b27f587d916d9f6ef81b0edade3d/brickflow_plugins/airflow/vendor/__init__.py


--------------------------------------------------------------------------------
/brickflow_plugins/airflow/vendor/context.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | import contextlib
 4 | import copy
 5 | from typing import MutableMapping, Any, Iterator, KeysView, ItemsView, ValuesView
 6 | 
 7 | 
 8 | class Context(MutableMapping[str, Any]):
 9 |     """Jinja2 template context for task rendering.
10 | 
11 |     This is a mapping (dict-like) class that can lazily emit warnings when
12 |     (and only when) deprecated context keys are accessed.
13 |     """
14 | 
15 |     _DEPRECATION_REPLACEMENTS: dict[str, list[str]] = {
16 |         "execution_date": ["data_interval_start", "logical_date"],
17 |         "next_ds": ["{{ data_interval_end | ds }}"],
18 |         "next_ds_nodash": ["{{ data_interval_end | ds_nodash }}"],
19 |         "next_execution_date": ["data_interval_end"],
20 |         "prev_ds": [],
21 |         "prev_ds_nodash": [],
22 |         "prev_execution_date": [],
23 |         "prev_execution_date_success": ["prev_data_interval_start_success"],
24 |         "tomorrow_ds": [],
25 |         "tomorrow_ds_nodash": [],
26 |         "yesterday_ds": [],
27 |         "yesterday_ds_nodash": [],
28 |     }
29 | 
30 |     def __init__(
31 |         self, context: MutableMapping[str, Any] | None = None, **kwargs: Any
32 |     ) -> None:
33 |         self._context: MutableMapping[str, Any] = context or {}
34 |         if kwargs:
35 |             self._context.update(kwargs)
36 |         self._deprecation_replacements = self._DEPRECATION_REPLACEMENTS.copy()
37 | 
38 |     def __repr__(self) -> str:
39 |         return repr(self._context)
40 | 
41 |     def __reduce_ex__(self, protocol: int) -> tuple[Any, ...]:
42 |         """Pickle the context as a dict.
43 | 
44 |         We are intentionally going through ``__getitem__`` in this function,
45 |         instead of using ``items()``, to trigger deprecation warnings.
46 |         """
47 |         items = [(key, self[key]) for key in self._context]
48 |         return dict, (items,)
49 | 
50 |     def __copy__(self) -> Context:
51 |         new = type(self)(copy.copy(self._context))
52 |         new._deprecation_replacements = self._deprecation_replacements.copy()
53 |         return new
54 | 
55 |     def __getitem__(self, key: str) -> Any:
56 |         # with contextlib.suppress(KeyError):
57 |         #     warnings.warn(_create_deprecation_warning(key, self._deprecation_replacements[key]))
58 |         with contextlib.suppress(KeyError):
59 |             return self._context[key]
60 |         raise KeyError(key)
61 | 
62 |     def __setitem__(self, key: str, value: Any) -> None:
63 |         self._deprecation_replacements.pop(key, None)
64 |         self._context[key] = value
65 | 
66 |     def __delitem__(self, key: str) -> None:
67 |         self._deprecation_replacements.pop(key, None)
68 |         del self._context[key]
69 | 
70 |     def __contains__(self, key: object) -> bool:
71 |         return key in self._context
72 | 
73 |     def __iter__(self) -> Iterator[str]:
74 |         return iter(self._context)
75 | 
76 |     def __len__(self) -> int:
77 |         return len(self._context)
78 | 
79 |     def __eq__(self, other: Any) -> bool:
80 |         if not isinstance(other, Context):
81 |             return NotImplemented
82 |         return self._context == other._context
83 | 
84 |     def __ne__(self, other: Any) -> bool:
85 |         if not isinstance(other, Context):
86 |             return NotImplemented
87 |         return self._context != other._context
88 | 
89 |     def keys(self) -> KeysView[str]:
90 |         return self._context.keys()
91 | 
92 |     def items(self):
93 |         return ItemsView(self._context)
94 | 
95 |     def values(self):
96 |         return ValuesView(self._context)
97 | 


--------------------------------------------------------------------------------
/brickflow_plugins/databricks/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nike-Inc/brickflow/44560e2be1d3b27f587d916d9f6ef81b0edade3d/brickflow_plugins/databricks/__init__.py


--------------------------------------------------------------------------------
/brickflow_plugins/databricks/run_job.py:
--------------------------------------------------------------------------------
 1 | from typing import Union
 2 | from pydantic import SecretStr
 3 | 
 4 | from databricks.sdk import WorkspaceClient
 5 | from brickflow.context import ctx
 6 | from brickflow.engine.utils import get_job_id
 7 | 
 8 | 
 9 | class RunJobInRemoteWorkspace:
10 |     """
11 |     Currently Databricks does not natively support running a job in a remote workspace via the RunJobTask.
12 |     This plugin adds this functionality. However, it aims to be a temporary solution until Databricks adds this
13 |     functionality natively.
14 |     The plugin does not support neither passing the parameters to the remote job, nor waiting for the job to finish.
15 | 
16 |     Examples
17 |     --------
18 |         service_principle_pat = ctx.dbutils.secrets.get("scope", "service_principle_id")
19 |         WorkflowDependencySensor(
20 |             databricks_host=https://your_workspace_url.cloud.databricks.com,
21 |             databricks_token=service_principle_pat,
22 |             job_name="foo",
23 |         )
24 |         In the above snippet Databricks secrets are used as a secure service to store the databricks token.
25 |         If you get your token from another secret management service, like AWS Secrets Manager, GCP Secret Manager
26 |         or Azure Key Vault, just pass it in the databricks_token argument.
27 |     """
28 | 
29 |     def __init__(
30 |         self,
31 |         databricks_host: str,
32 |         databricks_token: Union[str, SecretStr],
33 |         job_name: str,
34 |     ):
35 |         self.databricks_host = databricks_host
36 |         self.databricks_token = (
37 |             databricks_token
38 |             if isinstance(databricks_token, SecretStr)
39 |             else SecretStr(databricks_token)
40 |         )
41 |         self.job_name = job_name
42 |         self._workspace_obj = WorkspaceClient(
43 |             host=self.databricks_host, token=self.databricks_token.get_secret_value()
44 |         )
45 | 
46 |     def execute(self):
47 |         job_id = get_job_id(
48 |             host=self.databricks_host,
49 |             token=self.databricks_token,
50 |             job_name=self.job_name,
51 |         )
52 |         # TODO: add support for passing parameters to the remote job
53 |         # TODO: wait for the job to finish
54 |         run = self._workspace_obj.jobs.run_now(job_id)
55 |         ctx.log.info("Job run status: %s", run.response)
56 | 


--------------------------------------------------------------------------------
/brickflow_plugins/secrets/__init__.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations
  2 | 
  3 | import abc
  4 | import base64
  5 | import functools
  6 | import os
  7 | from typing import Optional, Tuple, Union, List
  8 | from urllib.parse import urlparse, ParseResult
  9 | 
 10 | import pluggy
 11 | 
 12 | try:
 13 |     from airflow.secrets import BaseSecretsBackend
 14 | except ImportError:
 15 |     raise ImportError(
 16 |         "You must install airflow to use airflow plugins, "
 17 |         "please try pip install brickflow[apache-airflow]"
 18 |     )
 19 | 
 20 | from brickflow_plugins import log
 21 | 
 22 | BRICKFLOW_SECRETS_BACKEND = "brickflow_secrets_backend"
 23 | 
 24 | brickflow_secrets_plugin_spec = pluggy.HookspecMarker(BRICKFLOW_SECRETS_BACKEND)
 25 | 
 26 | 
 27 | class BrickflowSecretPluginSpec:
 28 |     @staticmethod
 29 |     @brickflow_secrets_plugin_spec(firstresult=True)
 30 |     def get_secret_value(url_parsed_result: ParseResult) -> Optional["str"]:
 31 |         """Custom execute method that is able to be plugged in."""
 32 | 
 33 | 
 34 | @functools.lru_cache
 35 | def get_brickflow_tasks_hook() -> BrickflowSecretPluginSpec:
 36 |     pm = pluggy.PluginManager(BRICKFLOW_SECRETS_BACKEND)
 37 |     pm.add_hookspecs(BrickflowSecretPluginSpec)
 38 |     pm.load_setuptools_entrypoints(BRICKFLOW_SECRETS_BACKEND)
 39 |     pm.register(CerberusBrickflowSecretPluginImpl())
 40 |     pm.register(Base64BrickflowSecretPluginImpl())
 41 |     for name, plugin_instance in pm.list_name_plugin():
 42 |         log.info(
 43 |             "Loaded plugin with name: %s and class: %s",
 44 |             name,
 45 |             plugin_instance.__class__.__name__,
 46 |         )
 47 |     return pm.hook
 48 | 
 49 | 
 50 | brickflow_secrets_backend_plugin_impl = pluggy.HookimplMarker(BRICKFLOW_SECRETS_BACKEND)
 51 | 
 52 | 
 53 | class AbstractSecretsHelper(abc.ABC):
 54 |     PROTOCOL_STARTS_WITH: Optional[Union[str, List[str]]] = None
 55 | 
 56 |     def get_secret_value_from_url(self, url_parsed_result: ParseResult):
 57 |         allowed_protocols = (
 58 |             [self.PROTOCOL_STARTS_WITH]
 59 |             if isinstance(self.PROTOCOL_STARTS_WITH, str)
 60 |             else self.PROTOCOL_STARTS_WITH
 61 |         )
 62 |         if self.PROTOCOL_STARTS_WITH is not None and not any(
 63 |             [
 64 |                 url_parsed_result.scheme.lower().startswith(protocol)
 65 |                 for protocol in allowed_protocols
 66 |             ]
 67 |         ):
 68 |             return None
 69 |         return self._get_secret_value_from_url(url_parsed_result)
 70 | 
 71 |     @staticmethod
 72 |     @abc.abstractmethod
 73 |     def _get_secret_value_from_url(url_parsed_result: ParseResult) -> str:
 74 |         pass
 75 | 
 76 | 
 77 | class B64SecretsHelper(AbstractSecretsHelper):
 78 |     PROTOCOL_STARTS_WITH = ["base64", "b64"]
 79 | 
 80 |     @staticmethod
 81 |     def _get_secret_value_from_url(url_parsed_result: ParseResult) -> str:
 82 |         b64data = url_parsed_result.netloc.encode("utf-8")
 83 |         return base64.b64decode(b64data).decode("utf-8")
 84 | 
 85 | 
 86 | class CerberusSecretsHelper(AbstractSecretsHelper):
 87 |     PROTOCOL_STARTS_WITH = "cerberus"
 88 | 
 89 |     @staticmethod
 90 |     def parse_path_and_key(path: Optional[str]) -> Optional[Tuple[str, str]]:
 91 |         if path is not None:
 92 |             _cleaned_path = path.lstrip("/").rstrip("/")
 93 |             return "/".join(_cleaned_path.split("/")[:-1]), _cleaned_path.split("/")[-1]
 94 |         return None
 95 | 
 96 |     @staticmethod
 97 |     def _get_secret_value_from_url(url_parsed_result: ParseResult) -> str:
 98 |         try:
 99 |             from cerberus.client import CerberusClient
100 |         except ImportError:
101 |             raise ImportError(
102 |                 "You must install cerberus-client to use the cerberus secrets backend, "
103 |                 "please try pip install brickflow[cerberus]"
104 |             )
105 |         parts = url_parsed_result.scheme.lower().split("+")
106 |         protocol = "https"
107 |         if len(parts) == 2:
108 |             protocol = parts[1]
109 |         _client = CerberusClient(f"{protocol}://{url_parsed_result.netloc}")
110 |         _path, _key = CerberusSecretsHelper.parse_path_and_key(url_parsed_result.path)
111 |         data = _client.get_secrets_data(_path)
112 |         return data[_key]
113 | 
114 | 
115 | class CerberusBrickflowSecretPluginImpl(BrickflowSecretPluginSpec):
116 |     @staticmethod
117 |     @brickflow_secrets_backend_plugin_impl
118 |     def get_secret_value(url_parsed_result: ParseResult) -> Optional["str"]:
119 |         return CerberusSecretsHelper().get_secret_value_from_url(url_parsed_result)
120 | 
121 | 
122 | class Base64BrickflowSecretPluginImpl(BrickflowSecretPluginSpec):
123 |     @staticmethod
124 |     @brickflow_secrets_backend_plugin_impl
125 |     def get_secret_value(url_parsed_result: ParseResult) -> Optional["str"]:
126 |         return B64SecretsHelper().get_secret_value_from_url(url_parsed_result)
127 | 
128 | 
129 | class DatabricksSecretsBrickflowSecretPluginImpl(BrickflowSecretPluginSpec):
130 |     @staticmethod
131 |     @brickflow_secrets_backend_plugin_impl
132 |     def get_secret_value(url_parsed_result: ParseResult) -> Optional["str"]:
133 |         # not implemented yet
134 |         return None
135 | 
136 | 
137 | class BrickflowSecretsBackend(BaseSecretsBackend):  # noqa
138 |     def __enter__(self):
139 |         self.set_backend_env()
140 |         return self
141 | 
142 |     def __exit__(self, exc_type, exc_val, exc_tb):
143 |         self.unset_backend_env()
144 | 
145 |     def get_conn_value(self, conn_id: str) -> str | None:
146 |         parsed_url = urlparse(conn_id)
147 |         return get_brickflow_tasks_hook().get_secret_value(url_parsed_result=parsed_url)
148 | 
149 |     def _get_secrets_backend_env(self):
150 |         return {
151 |             "AIRFLOW__SECRETS__BACKEND": f"{self.__class__.__module__}.{self.__class__.__name__}",
152 |             "AIRFLOW__SECRETS__BACKEND_KWARGS": "",
153 |         }
154 | 
155 |     def set_backend_env(self):
156 |         for k, v in self._get_secrets_backend_env().items():
157 |             os.environ[k] = v
158 | 
159 |     def unset_backend_env(self):
160 |         for k in self._get_secrets_backend_env().keys():
161 |             os.environ.pop(k, None)
162 | 


--------------------------------------------------------------------------------
/docs/api/airflow_external_task_dependency.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | search:
 3 |   exclude: true
 4 | ---
 5 | 
 6 | ::: brickflow_plugins.airflow.operators.external_tasks
 7 |     handler: python
 8 |     options:
 9 |         filters:
10 |             - "!^_[^_]"
11 |             - "!^__[^__]"
12 | 


--------------------------------------------------------------------------------
/docs/api/airflow_native_operators.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | search:
 3 |   exclude: true
 4 | ---
 5 | 
 6 | ::: brickflow_plugins.airflow.operators.native_operators
 7 |     handler: python
 8 |     options:
 9 |         filters:
10 |             - "!^_[^_]"
11 |             - "!^__[^__]"
12 | 


--------------------------------------------------------------------------------
/docs/api/airflow_tableau_operators.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | search:
 3 |   exclude: true
 4 | ---
 5 | 
 6 | ::: brickflow_plugins.airflow.operators.external_tasks_tableau
 7 |     handler: python
 8 |     options:
 9 |         filters:
10 |             - "!^_[^_]"
11 |             - "!^__[^__]"
12 | 


--------------------------------------------------------------------------------
/docs/api/box_operator.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | search:
 3 |   exclude: true
 4 | ---
 5 | 
 6 | ::: brickflow_plugins.databricks.box_operator
 7 |     handler: python
 8 |     options:
 9 |         filters:
10 |             - "!^_[^_]"
11 |             - "!^__[^__]"
12 | 


--------------------------------------------------------------------------------
/docs/api/cli.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | search:
 3 |   exclude: true
 4 | ---
 5 | 
 6 | ::: brickflow.cli
 7 |     handler: python
 8 |     options:
 9 |         filters:
10 |             - "!^_[^_]"
11 | 
12 | 


--------------------------------------------------------------------------------
/docs/api/compute.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | search:
 3 |   exclude: true
 4 | ---
 5 | 
 6 | ::: brickflow.engine.compute
 7 |     handler: python
 8 |     options:
 9 |         members:
10 |             - Cluster
11 |             - Runtimes
12 |         filters:
13 |             - "!^_[^_]"
14 | 
15 | 


--------------------------------------------------------------------------------
/docs/api/context.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | search:
 3 |   exclude: true
 4 | ---
 5 | 
 6 | ::: brickflow.context.context
 7 |     handler: python
 8 |     options:
 9 |         filters:
10 |             - "!^_[^_]"
11 |             - "!^__[^__]"
12 | 
13 | 


--------------------------------------------------------------------------------
/docs/api/project.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | search:
 3 |   exclude: true
 4 | ---
 5 | 
 6 | ::: brickflow.engine.project
 7 |     handler: python
 8 |     options:
 9 |         members:
10 |             - Project
11 |             - BrickFlowEnvVars
12 |         filters:
13 |             - "!^_[^_]"
14 | 
15 | 


--------------------------------------------------------------------------------
/docs/api/secrets.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | search:
 3 |   exclude: true
 4 | ---
 5 | 
 6 | ::: brickflow_plugins.secrets
 7 |     handler: python
 8 |     options:
 9 |         filters:
10 |             - "!^_[^_]"
11 |             - "!^__[^__]"
12 | 


--------------------------------------------------------------------------------
/docs/api/sla_sensor.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | search:
 3 |   exclude: true
 4 | ---
 5 | 
 6 | ::: brickflow_plugins.databricks.sla_sensor
 7 |     handler: python
 8 |     options:
 9 |         members:
10 |             - SLASensor
11 |         filters:
12 |             - "!^_[^_]"
13 |             - "!^__[^__]"
14 | 


--------------------------------------------------------------------------------
/docs/api/task.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | search:
 3 |   exclude: true
 4 | ---
 5 | 
 6 | ::: brickflow.engine.task
 7 |     handler: python
 8 |     options:
 9 |         members:
10 |             - Task
11 |             - EmailNotifications
12 |             - JarTaskLibrary
13 |             - EggTaskLibrary
14 |             - WheelTaskLibrary
15 |             - PypiTaskLibrary
16 |             - MavenTaskLibrary
17 |             - CranTaskLibrary
18 |             - BrickflowTriggerRule
19 |             - BrickflowTaskEnvVars
20 |             - TaskSettings
21 |             - TaskType
22 |         filters:
23 |             - "!^_[^_]"
24 |             - "!^__[^__]"
25 | 
26 | 


--------------------------------------------------------------------------------
/docs/api/uc_to_snowflake_operator.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | search:
 3 |   exclude: true
 4 | ---
 5 | 
 6 | ::: brickflow_plugins.databricks.uc_to_snowflake_operator
 7 |     handler: python
 8 |     options:
 9 |         filters:
10 |             - "!^_[^_]"
11 |             - "!^__[^__]"
12 | 


--------------------------------------------------------------------------------
/docs/api/workflow.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | search:
 3 |   exclude: true
 4 | ---
 5 | 
 6 | ::: brickflow.engine.workflow
 7 |     handler: python
 8 |     options:
 9 |         members:
10 |             - Workflow
11 |             - WorkspacePermissions
12 |             - User
13 |             - Group
14 |             - ServicePrincipal
15 |         filters:
16 |             - "!^_[^_]"
17 |             - "!^__[^__]"
18 | 
19 | 


--------------------------------------------------------------------------------
/docs/api/workflow_dependency_sensor.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | search:
 3 |   exclude: true
 4 | ---
 5 | 
 6 | ::: brickflow_plugins.databricks.workflow_dependency_sensor
 7 |     handler: python
 8 |     options:
 9 |         members:
10 |             - WorkflowDependencySensor
11 |             - WorkflowTaskDependencySensor
12 |         filters:
13 |             - "!^_[^_]"
14 |             - "!^__[^__]"
15 | 


--------------------------------------------------------------------------------
/docs/bundles-quickstart.md:
--------------------------------------------------------------------------------
  1 | # BrickFlow v1.3.1 Quickstart Guide
  2 | 
  3 | This guide will help you get started with BrickFlow v1.3.1, walking you through project setup and deployment.
  4 | 
  5 | ## Prerequisites
  6 | 
  7 | 1. Local environment setup:
  8 |    - Python >= 3.8
  9 |    - Databricks CLI configured with access token
 10 |    - BrickFlow CLI
 11 | 
 12 | ### Installation Steps
 13 | 
 14 | 1. Install Databricks CLI and configure it:
 15 | ```bash
 16 | pip install databricks-cli
 17 | databricks configure -t
 18 | ```
 19 | 
 20 | 2. Install BrickFlow CLI:
 21 | ```bash
 22 | pip install brickflows
 23 | ```
 24 | 
 25 | 3. Verify your installation:
 26 | ```bash
 27 | bf --help
 28 | databricks workspace list /  # Add --profile <profile> if using specific profile
 29 | ```
 30 | 
 31 | ## Creating Your First Project
 32 | 
 33 | 1. Navigate to your repository root (where `.git` folder is located)
 34 | 
 35 | 2. Initialize a new BrickFlow project:
 36 | ```bash
 37 | bf projects add
 38 | ```
 39 | 
 40 | 3. Follow the prompts:
 41 |    - Project Name: Enter your desired project name
 42 |    - Path from repo root to project root: Press Enter for default (`.`) or specify path
 43 |    - Path from project root to workflows dir: Enter the directory for your workflows
 44 |    - Git https url: Enter your repository URL
 45 |    - Brickflow version: Enter `1.3.1` (or press Enter for `auto`)
 46 |    - Spark expectations version: Press Enter for default (`0.8.0`)
 47 |    - Skip entrypoint: Choose `N` unless you have a specific reason to skip
 48 | 
 49 | 4. Update your `.gitignore` file:
 50 | ```
 51 | **/bundle.yml
 52 | .databricks/
 53 | ```
 54 | 
 55 | ## Project Structure
 56 | 
 57 | Your project will follow either a monorepo or polyrepo style:
 58 | 
 59 | ### Monorepo Structure Example:
 60 | ```
 61 | repo-root/
 62 | ├── .git
 63 | ├── projects/
 64 | │   ├── project_abc/
 65 | │   │   ├── lib/
 66 | │   │   │   ├── __init__.py
 67 | │   │   │   └── shared_functions.py
 68 | │   │   ├── workflows/
 69 | │   │   │   ├── __init__.py
 70 | │   │   │   ├── entrypoint.py
 71 | │   │   │   └── workflow_abc.py
 72 | │   │   └── .brickflow-project-root.yml
 73 | ```
 74 | 
 75 | ### Polyrepo Structure Example:
 76 | ```
 77 | repo-root/
 78 | ├── .git
 79 | ├── src/
 80 | │   ├── lib/
 81 | │   │   ├── __init__.py
 82 | │   │   └── shared_functions.py
 83 | │   ├── workflows/
 84 | │   │   ├── __init__.py
 85 | │   │   ├── entrypoint.py
 86 | │   │   └── workflow.py
 87 | ├── .brickflow-project-root.yml
 88 | ```
 89 | 
 90 | ## Validating Your Project
 91 | 
 92 | 1. Synthesize your project configuration:
 93 | ```bash
 94 | bf projects synth --project <project_name> --profile <profile>
 95 | ```
 96 | 
 97 | 2. Verify the output shows:
 98 | ```
 99 | SUCCESSFULLY SYNTHESIZED BUNDLE.YML FOR PROJECT: <project_name>
100 | ```
101 | 
102 | ## Deploying Your Project
103 | 
104 | ### Development Deployment
105 | ```bash
106 | bf projects deploy --project <project> -p <profile> --force-acquire-lock
107 | ```
108 | 
109 | ### Environment-Specific Deployments
110 | ```bash
111 | # Dev environment
112 | bf projects deploy --project <project> -p <profile> -e dev --force-acquire-lock
113 | 
114 | # Test environment
115 | bf projects deploy --project <project> -p <profile> -e test --force-acquire-lock
116 | 
117 | # Production environment
118 | bf projects deploy --project <project> -p <profile> -e prod --force-acquire-lock
119 | ```
120 | 
121 | ### Release Candidate Deployments
122 | For testing specific versions or pull requests:
123 | 
124 | ```bash
125 | # Deploy RC version
126 | BRICKFLOW_WORKFLOW_SUFFIX="1.3.1-rc1" bf projects deploy --project <project> -p <profile> -e test --force-acquire-lock
127 | 
128 | # Deploy PR version
129 | BRICKFLOW_WORKFLOW_SUFFIX="1.3.1-pr34" bf projects deploy --project <project> -p <profile> -e test --force-acquire-lock
130 | ```
131 | 
132 | ## Cleaning Up
133 | 
134 | ### Destroying Deployments
135 | ```bash
136 | # Destroy main deployment
137 | bf projects destroy --project <project> -p <profile> --force-acquire-lock
138 | 
139 | # Destroy RC deployment
140 | BRICKFLOW_WORKFLOW_SUFFIX="1.3.1-rc1" bf projects destroy --project <project> -p <profile> -e test --force-acquire-lock
141 | 
142 | # Destroy PR deployment
143 | BRICKFLOW_WORKFLOW_SUFFIX="1.3.1-pr34" bf projects destroy --project <project> -p <profile> -e test --force-acquire-lock
144 | ```
145 | 
146 | ## Troubleshooting
147 | 
148 | 1. If synthesis fails:
149 |    - Verify you're in the repository root directory
150 |    - Check that all paths in configuration files are correct
151 |    - Ensure all required __init__.py files exist
152 | 
153 | 2. If deployment fails:
154 |    - Verify Databricks CLI configuration
155 |    - Check permissions in your Databricks workspace
156 |    - Verify environment variables are set correctly
157 | 
158 | ## Next Steps
159 | 
160 | After successful deployment:
161 | 1. Monitor your workflows in the Databricks workspace
162 | 2. Set up CI/CD pipelines for automated deployments
163 | 3. Configure environment-specific variables
164 | 4. Set up monitoring and alerting


--------------------------------------------------------------------------------
/docs/cli/reference.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | search:
 3 |   exclude: true
 4 | ---
 5 | 
 6 | This page provides documentation for our command line tools.
 7 | 
 8 | 
 9 | ::: mkdocs-click
10 |     :module: brickflow.cli
11 |     :command: cli
12 |     :prog_name: bf
13 |     :depth: 1
14 | 
15 | 
16 | 
17 | 


--------------------------------------------------------------------------------
/docs/css/custom.css:
--------------------------------------------------------------------------------
 1 | .md-footer-nav { display: none; }
 2 | 
 3 | .md-footer__inner:not([hidden]) {
 4 |     display: none
 5 | }
 6 | 
 7 | /* Indentation. */
 8 | div.doc-contents:not(.first) {
 9 |   padding-left: 25px;
10 |   border-left: .05rem solid var(--md-typeset-table-color);
11 | }
12 | 
13 | /* Mark external links as such. */
14 | a.autorefs-external::after {
15 |   /* https://primer.style/octicons/arrow-up-right-24 */
16 |   background-image: url('data:image/svg+xml,<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path fill="rgb(0, 0, 0)" d="M18.25 15.5a.75.75 0 00.75-.75v-9a.75.75 0 00-.75-.75h-9a.75.75 0 000 1.5h7.19L6.22 16.72a.75.75 0 101.06 1.06L17.5 7.56v7.19c0 .414.336.75.75.75z"></path></svg>');
17 |   content: ' ';
18 | 
19 |   display: inline-block;
20 |   position: relative;
21 |   top: 0.1em;
22 |   margin-left: 0.2em;
23 |   margin-right: 0.1em;
24 | 
25 |   height: 1em;
26 |   width: 1em;
27 |   border-radius: 100%;
28 |   background-color: var(--md-typeset-a-color);
29 | }
30 | a.autorefs-external:hover::after {
31 |   background-color: var(--md-accent-fg-color);
32 | }


--------------------------------------------------------------------------------
/docs/highlevel.md:
--------------------------------------------------------------------------------
 1 | ## Brickflow Overview
 2 | 
 3 | The objective of Brickflow is to provide a thin layer on top of databricks workflows to help deploy 
 4 | and manage workflows in Databricks. It also provides plugins/extras to be able to run airflow 
 5 | operators directly in the workflows.
 6 | 
 7 | ## Brickflow to Airflow Term Mapping
 8 | 
 9 | | Object                                    | Airflow                           | Brickflow                                         |
10 | |-------------------------------------------|-----------------------------------|---------------------------------------------------|
11 | | Collection of Workflows                   | Airflow Cluster (Airflow Dag Bag) | Project/Entrypoint                                |
12 | | Workflow                                  | Airflow Dag                       | Workflow                                          |
13 | | Task                                      | Airflow Operator                  | Task                                              |
14 | | Schedule                                  | Unix Cron                         | Quartz Cron                                       |
15 | | Inter Task Communication                  | XComs                             | Task Values                                       |
16 | | Managing Connections to External Services | Airflow Connections               | Mocked Airflow connections or Databricks Secrets  |
17 | | Variables to Tasks                        | Variables                         | Task Parameters [ctx.get_parameter(key, default)] |
18 | | Context values (execution_date, etc.)     | Airflow Macros, context["ti"]     | ctx.<task parameter\>                             |
19 | 


--------------------------------------------------------------------------------
/docs/how-imports-work.md:
--------------------------------------------------------------------------------
 1 | ### How do imports work?
 2 | 
 3 | !!! warning
 4 | 
 5 |     **This is very important to understand how imports work for mono repos. Please read this carefully. Otherwise you might run into issues during deployments.**
 6 | 
 7 | When using brickflow projects every project will have a `.brickflow-project-root.yml` file. When you import brickflow,
 8 | which you will
 9 | in your entrypoint or workflows, brickflow will inspect all paths all stackframes during the import and recursively go
10 | up the path until it finds the `.brickflow-project-root.yml` file.
11 | The first instance of brickflow-project-root.yml will be added to the sys.path to help with module imports.
12 | 
13 | Let us take a quick example of how to get imports to properly work!
14 | 
15 | Let us say you have a project structure like this:
16 | 
17 | ```
18 |     repo-root/
19 |     ├── .git
20 |     ├── projects/
21 |     │   ├── project_abc/
22 |     │   │   ├── lib/
23 |     │   │   │   ├── __init__.py
24 |     │   │   │   └── shared_functions.py
25 |     │   │   ├── workflows/
26 |     │   │   │   ├── __init__.py
27 |     │   │   │   ├── entrypoint.py
28 |     │   │   │   └── workflow_abc.py
29 |     │   │   ├── setup.py
30 |     │   │   └── .brickflow-project-root.yml
31 |     │   └── project_xyz/
32 |     │       ├── workflows_geo_b/
33 |     │       │   ├── entrypoint.py
34 |     │       │   └── workflow_xyz.py
35 |     │       ├── workflows_geo_a/
36 |     │       │   ├── entrypoint.py
37 |     │       │   └── workflow_xyz.py
38 |     │       └── .brickflow-project-root.yml
39 |     ├── .gitignore
40 |     ├── brickflow-multi-project.yml
41 |     └── README.md
42 | ```
43 | 
44 | If let us say you are looking at adding imports from lib into `workflow_abc.py`, you need to:
45 | 
46 | ```python
47 | from lib import share_functions
48 | 
49 | share_functions.some_function(....)
50 | ```
51 | 
52 | Since in the project structure the `.brickflow-project-root.yml` is at `repo-root/projects/project_abc` then everything
53 | in that `project_abc` folder is
54 | added to sys.path in python. So you can import any of the folders under there.


--------------------------------------------------------------------------------
/docs/img/bf_logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nike-Inc/brickflow/44560e2be1d3b27f587d916d9f6ef81b0edade3d/docs/img/bf_logo.png


--------------------------------------------------------------------------------
/docs/img/bf_logo_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nike-Inc/brickflow/44560e2be1d3b27f587d916d9f6ef81b0edade3d/docs/img/bf_logo_1.png


--------------------------------------------------------------------------------
/docs/img/maintainance.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nike-Inc/brickflow/44560e2be1d3b27f587d916d9f6ef81b0edade3d/docs/img/maintainance.png


--------------------------------------------------------------------------------
/docs/img/workflow.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nike-Inc/brickflow/44560e2be1d3b27f587d916d9f6ef81b0edade3d/docs/img/workflow.png


--------------------------------------------------------------------------------
/docs/index.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | hide:
 3 |   - navigation
 4 | ---
 5 | 
 6 | # BrickFlow
 7 | 
 8 | BrickFlow is a CLI tool for development and deployment of Python based Databricks Workflows in a declarative way.
 9 | 
10 | ## Concept
11 | 
12 | `brickflow` aims to improve development experience for building any pipelines on databricks via:
13 | 
14 | - Providing a declarative way to describe workflows via decorators
15 | - Provide intelligent defaults to compute targets
16 | - Provide a code and git first approach to managing and deploying workflows
17 | - Use databricks asset bundles to deploy workflows seamlessly. It is powered using terraform which helps manage state
18 |   across deployments.
19 | - CLI tool helps facilitate setting up a projects
20 | - Provides additional functionality through the context library to be able to do additional things for workflows.
21 | 
22 | 
23 | ## Feedback
24 | 
25 | Issues with `brickflow`? Found a :octicons-bug-24: bug?
26 | Have a great idea for an addition? Want to improve the documentation? Please feel
27 | free to file an [issue](https://github.com/Nike-Inc/brickflow/issues/new/choose).
28 | 
29 | ## Contributing
30 | 
31 | To contribute please fork and create a pull request. Here is
32 | a [guide](https://github.com/Nike-Inc/brickflow/blob/main/CONTRIBUTING.md) to help you through this process.


--------------------------------------------------------------------------------
/docs/projects.md:
--------------------------------------------------------------------------------
 1 | The project is similar to a map cluster it can be composed of various different Workflows or dags.
 2 | 
 3 | 
 4 | Here is an example of an entrypoint. 
 5 | Click the plus buttons to understand all the parts of the entrypoint file.
 6 | 
 7 | ```python title="entrypoint.py"
 8 | # Databricks notebook source  (1)
 9 | 
10 | import examples.brickflow_examples.workflows
11 | 
12 | from brickflow import Project, PypiTaskLibrary, MavenTaskLibrary
13 | 
14 | 
15 | def main() -> None:
16 |     """Project entrypoint"""
17 |     with Project(
18 |             "brickflow-demo",  # (3)!
19 |             git_repo="https://github.com/nike-inc/brickflow",  # (4)!
20 |             provider="github",  # (5)!
21 |             libraries=[  # (6)!
22 |                 PypiTaskLibrary(package="networkx"),
23 |             ],
24 |     ) as f:
25 |         f.add_pkg(examples.brickflow_examples.workflows)  # (7)!
26 | 
27 | 
28 | if __name__ == "__main__":  # (2)!
29 |     main()
30 | ```
31 | 
32 | 
33 | 1. Uploading this Python file into databricks with this comment on the first line treats the python file
34 |     as a notebook.
35 | 2. This makes sure this only runs when this file is run via python entrypoint.py
36 | 3. This is the project name you provided when you do `bf projects add`
37 | 4. This is the git repo that is introspected when running `bf projects add`
38 | 5. This is the github provider that you decide on.
39 | 6. You can provide a list of packages that need to be installed in all of your clusters when running ETL.
40 | 7. You can add multiple packages in your project where you are defining workflows.


--------------------------------------------------------------------------------
/docs/upgrades/upgrade-pre-0-10-0-to-0-10-0.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | search:
 3 |   boost: 2 
 4 | ---
 5 | 
 6 | ## Upgrade checklist
 7 | 
 8 | * [x] The package has been renamed from `brickflow` to `brickflows`. Please run:
 9 | 
10 |     ```
11 |     pip uninstall brickflow
12 |     ```
13 |     
14 |     and then
15 |     
16 |     ```
17 |     pip install brickflows>=0.10.0
18 |     bf --version
19 |     ```
20 | 
21 | * [x] If you are upgrading from a CDKTF version of brickflow then do not worry, the existing workflows as long as you do
22 |   not change their names will be imported.
23 | 
24 | * [x] Start using project configurations following the [quickstart guide](../../bundles-quickstart/#brickflow-projects-setup).
25 | 
26 | * [x] Confirm the existence of the following files:
27 | 
28 |   * brickflow-multi-project.yml
29 |   * brickflow-project-root.yml
30 |   * Please reference [concepts](../../bundles-quickstart/#concepts)
31 |     and [initialize project](../../bundles-quickstart/#initialize-project) for more details.
32 | 
33 | * [x] RelativePathPackageResolver has been removed from the project to offer a seamless import
34 |   as long as you import brickflow at the top.
35 | 
36 | * [x] Ensure import for brickflow is at the top of your entrypoint.py
37 | 
38 | * [x] Ensure import for brickflow is at the top of your entrypoint.py
39 | 
40 | 
41 | * [x] Ensure your entrypoint looks like this. **Make sure to click the plus buttons and read the highlighted sections**:
42 | 
43 | ```python linenums="1" hl_lines="5 7 15 18"
44 | # Databricks notebook source
45 | 
46 | # COMMAND ----------
47 | 
48 | from brickflow import Project # (1)!
49 | 
50 | import workflows # (2)!
51 | 
52 | def main() -> None:
53 |     """Project entrypoint"""
54 |     with Project(
55 |         "product_abc_workflows_2",
56 |         git_repo="https://github.com/stikkireddy/mono-repo-test",
57 |         provider="github",
58 |         libraries=[  # (3)!
59 |             # PypiTaskLibrary(package="spark-expectations==0.5.0"), # Uncomment if spark-expectations is needed
60 |         ],
61 |         enable_plugins=True, # (4)!
62 |     ) as f:
63 |         f.add_pkg(workflows)
64 | 
65 | 
66 | if __name__ == "__main__":
67 |     main()
68 | ```
69 | 
70 | 1. Make sure brickflow is at the top of your imports! This will help resolve paths and allow other libraries to be
71 |    imported correctly.
72 | 2. Import your modules after brickflow has been imported! Make sure your optimize imports doesnt reorder your imports!
73 | 3. Make sure you remove brickflow and brickflow plugins and cron utils from this list.
74 | 4. Make sure you have enable_plugins=True. This will enable the plugins to be loaded to support airflow operators, etc.
75 |    Disable this if you dont want to install airflow.
76 | 
77 | 
78 | 


--------------------------------------------------------------------------------
/examples/brickflow_examples/.brickflow-project-root.yml:
--------------------------------------------------------------------------------
 1 | # DO NOT MODIFY THIS FILE - IT IS AUTO GENERATED BY BRICKFLOW AND RESERVED FOR FUTURE USAGE
 2 | projects:
 3 |   brickflow-demo:
 4 |     brickflow_version: auto
 5 |     deployment_mode: bundle
 6 |     enable_plugins: true
 7 |     name: brickflow-demo
 8 |     path_from_repo_root_to_project_root: .
 9 |     path_project_root_to_workflows_dir: workflows
10 | version: v1
11 | 


--------------------------------------------------------------------------------
/examples/brickflow_examples/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | pip-wheel-metadata/
 24 | share/python-wheels/
 25 | *.egg-info/
 26 | .installed.cfg
 27 | *.egg
 28 | MANIFEST
 29 | 
 30 | # PyInstaller
 31 | #  Usually these files are written by a python script from a template
 32 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 33 | *.manifest
 34 | *.spec
 35 | 
 36 | # Installer logs
 37 | pip-log.txt
 38 | pip-delete-this-directory.txt
 39 | 
 40 | # Unit test / coverage reports
 41 | htmlcov/
 42 | .tox/
 43 | .nox/
 44 | .coverage
 45 | .coverage.*
 46 | .cache
 47 | nosetests.xml
 48 | coverage.xml
 49 | *.cover
 50 | *.py,cover
 51 | .hypothesis/
 52 | .pytest_cache/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | target/
 76 | 
 77 | # Jupyter Notebook
 78 | .ipynb_checkpoints
 79 | 
 80 | # IPython
 81 | profile_default/
 82 | ipython_config.py
 83 | 
 84 | # pyenv
 85 | .python-version
 86 | 
 87 | # pipenv
 88 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 89 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 90 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 91 | #   install all needed dependencies.
 92 | #Pipfile.lock
 93 | 
 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 95 | __pypackages__/
 96 | 
 97 | # Celery stuff
 98 | celerybeat-schedule
 99 | celerybeat.pid
100 | 
101 | # SageMath parsed files
102 | *.sage.py
103 | 
104 | # Environments
105 | .env
106 | .venv
107 | env/
108 | venv/
109 | ENV/
110 | env.bak/
111 | venv.bak/
112 | 
113 | # Spyder project settings
114 | .spyderproject
115 | .spyproject
116 | 
117 | # Rope project settings
118 | .ropeproject
119 | 
120 | # mkdocs documentation
121 | /site
122 | 
123 | # mypy
124 | .mypy_cache/
125 | .dmypy.json
126 | dmypy.json
127 | 
128 | # Pyre type checker
129 | .pyre/
130 | 
131 | 
132 | # GENERATED BY BRICKFLOW CLI --START--
133 | 
134 | ### Terraform ###
135 | # Local .terraform directories
136 | **/.terraform/*
137 | 
138 | # .tfstate files
139 | *.tfstate
140 | *.tfstate.*
141 | 
142 | # Crash log files
143 | crash.log
144 | crash.*.log
145 | 
146 | # Exclude all .tfvars files, which are likely to contain sensitive data, such as
147 | # password, private keys, and other secrets. These should not be part of version
148 | # control as they are data points which are potentially sensitive and subject
149 | # to change depending on the environment.
150 | *.tfvars
151 | *.tfvars.json
152 | 
153 | # Ignore override files as they are usually used to override resources locally and so
154 | # are not checked in
155 | override.tf
156 | override.tf.json
157 | *_override.tf
158 | *_override.tf.json
159 | 
160 | # Include override files you do wish to add to version control using negated pattern
161 | # !example_override.tf
162 | 
163 | # Include tfplan files to ignore the plan output of command: terraform plan -out=tfplan
164 | # example: *tfplan*
165 | 
166 | # Ignore CLI configuration files
167 | .terraformrc
168 | terraform.rc
169 | 
170 | # GENERATED BY BRICKFLOW CLI --END--
171 | 
172 | .idea
173 | bundle.yml


--------------------------------------------------------------------------------
/examples/brickflow_examples/README.md:
--------------------------------------------------------------------------------
  1 | # brickflow-examples
  2 | This repository consists of examples for brickflow
  3 | 
  4 | ## Getting Started
  5 | 
  6 | ### Prerequisites
  7 | 1.Install brickflows
  8 | 
  9 | ```shell
 10 | pip install brickflows
 11 | ```
 12 | 
 13 | 2.Install [Databricks CLI](https://docs.databricks.com/en/dev-tools/cli/databricks-cli.html)
 14 | 
 15 |     ```shell
 16 |     curl -fsSL https://raw.githubusercontent.com/databricks/setup-cli/main/install.sh | sudo sh
 17 |     ```
 18 | 
 19 | 3.Configure Databricks cli with workspace token. This configures your `~/.databrickscfg` file.
 20 | 
 21 |     ```shell
 22 |     databricks configure --token
 23 |     ```
 24 | 
 25 | ### Clone the repository
 26 | 
 27 | ```shell
 28 | git clone https://github.com/Nike-Inc/brickflow.git
 29 | cd brickflow/examples/brickflow_examples
 30 | ```
 31 | 
 32 | ### Hello World workflow
 33 | - Create your first workflow using brickflow
 34 | - Create a new file hello_world_workflow.py in the workflows directory
 35 | - Add the following code to the file
 36 | ```python
 37 | from brickflow import (
 38 |     Cluster,
 39 |     Workflow,
 40 |     NotebookTask,
 41 | )
 42 | from brickflow.context import ctx
 43 | from airflow.operators.bash import BashOperator
 44 | 
 45 | 
 46 | cluster = Cluster(
 47 |     name="job_cluster",
 48 |     node_type_id="m6gd.xlarge",
 49 |     spark_version="13.3.x-scala2.12",
 50 |     min_workers=1,
 51 |     max_workers=2,
 52 | )
 53 | 
 54 | wf = Workflow(
 55 |     "hello_world_workflow",
 56 |     default_cluster=cluster,
 57 |     tags={
 58 |         "product_id": "brickflow_demo",
 59 |     },
 60 |     common_task_parameters={
 61 |         "catalog": "<uc-catalog-name>",
 62 |         "database": "<uc-schema-name>",
 63 |     },
 64 | )
 65 | 
 66 | @wf.task
 67 | # this task does nothing but explains the use of context object
 68 | def start():
 69 |     print(f"Environment: {ctx.env}")
 70 | 
 71 | @wf.notebook_task
 72 | # this task runs a databricks notebook
 73 | def example_notebook():
 74 |     return NotebookTask(
 75 |         notebook_path="notebooks/example_notebook.py",
 76 |         base_parameters={
 77 |             "some_parameter": "some_value",  # in the notebook access these via dbutils.widgets.get("some_parameter")
 78 |         },
 79 |     )
 80 | 
 81 | 
 82 | @wf.task(depends_on=[start, example_notebook])
 83 | # this task runs a bash command
 84 | def list_lending_club_data_files():
 85 |     return BashOperator(
 86 |         task_id=list_lending_club_data_files.__name__,
 87 |         bash_command="ls -lrt /dbfs/databricks-datasets/samples/lending_club/parquet/",
 88 |     )
 89 | 
 90 | @wf.task(depends_on=list_lending_club_data_files)
 91 | # this task runs the pyspark code
 92 | def lending_data_ingest():
 93 |     ctx.spark.sql(
 94 |         f"""
 95 |         CREATE TABLE IF NOT EXISTS
 96 |         {ctx.dbutils_widget_get_or_else(key="catalog", debug="development")}.\
 97 |         {ctx.dbutils_widget_get_or_else(key="database", debug="dummy_database")}.\
 98 |         {ctx.dbutils_widget_get_or_else(key="brickflow_env", debug="local")}_lending_data_ingest
 99 |         USING DELTA -- this is default just for explicit purpose
100 |         SELECT * FROM parquet.`dbfs:/databricks-datasets/samples/lending_club/parquet/`
101 |     """
102 |     )
103 | ```
104 | _Note: Modify the values of catalog/database for common_task_parameters._
105 | 
106 | ### Update demo_wf.py
107 | - demo_wf.py explains the various tasks and options available for the tasks
108 | - You can remove the demo_wf.py in case you just to run the hello_world_workflow.py
109 | - In case you want to run the demo_wf.py, update the below params with your values
110 |    - default_cluster
111 |    - common_task_parameters
112 |    - permissions
113 |    - default_task_settings
114 | 
115 | ### Deploy the workflow to databricks
116 | ```shell
117 | brickflow projects deploy --project brickflow-demo -e local
118 | ```
119 | 
120 | ### Run the demo workflow
121 | - login to databricks workspace
122 | - go to the workflows and select the workflow
123 | ![img.png](../../docs/img/workflow.png)
124 | - click on the run button
125 | 


--------------------------------------------------------------------------------
/examples/brickflow_examples/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nike-Inc/brickflow/44560e2be1d3b27f587d916d9f6ef81b0edade3d/examples/brickflow_examples/__init__.py


--------------------------------------------------------------------------------
/examples/brickflow_examples/brickflow-multi-project.yml:
--------------------------------------------------------------------------------
1 | project_roots:
2 |   brickflow-demo:
3 |     root_yaml_rel_path: .
4 | version: v1
5 | 


--------------------------------------------------------------------------------
/examples/brickflow_examples/notebooks/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nike-Inc/brickflow/44560e2be1d3b27f587d916d9f6ef81b0edade3d/examples/brickflow_examples/notebooks/__init__.py


--------------------------------------------------------------------------------
/examples/brickflow_examples/notebooks/example_notebook.py:
--------------------------------------------------------------------------------
1 | # Databricks notebook source
2 | 
3 | print("hello world")
4 | 


--------------------------------------------------------------------------------
/examples/brickflow_examples/src/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nike-Inc/brickflow/44560e2be1d3b27f587d916d9f6ef81b0edade3d/examples/brickflow_examples/src/__init__.py


--------------------------------------------------------------------------------
/examples/brickflow_examples/src/python/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nike-Inc/brickflow/44560e2be1d3b27f587d916d9f6ef81b0edade3d/examples/brickflow_examples/src/python/__init__.py


--------------------------------------------------------------------------------
/examples/brickflow_examples/src/python/lending_data_show.py:
--------------------------------------------------------------------------------
 1 | from brickflow.context import ctx
 2 | 
 3 | 
 4 | def lending_data_print():
 5 |     ctx.spark.sql(
 6 |         """
 7 |         SELECT
 8 |         addr_state, *
 9 |         FROM
10 |         parquet.`dbfs:/databricks-datasets/samples/lending_club/parquet/` limit 10
11 |     """
12 |     ).show(truncate=False)
13 | 
14 | 
15 | if __name__ == "__main__":
16 |     lending_data_print()
17 | 


--------------------------------------------------------------------------------
/examples/brickflow_examples/src/python/setup_data.py:
--------------------------------------------------------------------------------
  1 | # Databricks notebook source
  2 | # MAGIC %fs ls dbfs:/databricks-datasets/samples/lending_club/parquet/
  3 | 
  4 | # COMMAND ----------
  5 | 
  6 | # MAGIC %sql
  7 | # MAGIC SELECT
  8 | # MAGIC   addr_state, *
  9 | # MAGIC FROM
 10 | # MAGIC   parquet.`dbfs:/databricks-datasets/samples/lending_club/parquet/`
 11 | 
 12 | # COMMAND ----------
 13 | 
 14 | 
 15 | # -- ingest step
 16 | catalog = "development"
 17 | database = "team_databricks_sme"
 18 | spark.sql(
 19 |     f"""
 20 | CREATE TABLE IF NOT EXISTS {catalog}.{database}.lending_data
 21 | USING DELTA -- this is default just for explicit purpose
 22 | SELECT * FROM parquet.`dbfs:/databricks-datasets/samples/lending_club/parquet/`
 23 | """
 24 | )
 25 | 
 26 | # COMMAND ----------
 27 | 
 28 | # Step 2
 29 | catalog = "development"
 30 | database = "team_databricks_sme"
 31 | spark.sql(
 32 |     f"""
 33 | OPTIMIZE {catalog}.{database}.lending_data;
 34 | """
 35 | )
 36 | 
 37 | # COMMAND ----------
 38 | 
 39 | # MAGIC %sql
 40 | # MAGIC SELECT distinct addr_state FROM development.team_databricks_sme.lending_data
 41 | 
 42 | # COMMAND ----------
 43 | 
 44 | 
 45 | # -- T&S 1 process AZ data
 46 | catalog = "development"
 47 | database = "team_databricks_sme"
 48 | spark.sql(
 49 |     f"""
 50 | CREATE OR REPLACE TABLE {catalog}.{database}.lending_data_az_geo
 51 | USING DELTA -- this is default just for explicit purpose
 52 | SELECT * FROM {catalog}.{database}.lending_data where addr_state = 'AZ'
 53 | """
 54 | )
 55 | 
 56 | # COMMAND ----------
 57 | 
 58 | # -- T&S 2 process CA data
 59 | catalog = "development"
 60 | database = "team_databricks_sme"
 61 | spark.sql(
 62 |     f"""
 63 | CREATE OR REPLACE TABLE {catalog}.{database}.lending_data_ca_geo
 64 | USING DELTA -- this is default just for explicit purpose
 65 | SELECT * FROM {catalog}.{database}.lending_data where addr_state = 'CA'
 66 | """
 67 | )
 68 | 
 69 | # COMMAND ----------
 70 | 
 71 | # -- T&S 3 process IL data
 72 | catalog = "development"
 73 | database = "team_databricks_sme"
 74 | spark.sql(
 75 |     f"""
 76 | CREATE OR REPLACE TABLE {catalog}.{database}.lending_data_il_geo
 77 | USING DELTA -- this is default just for explicit purpose
 78 | SELECT * FROM {catalog}.{database}.≈ where addr_state = 'IL'
 79 | """
 80 | )
 81 | 
 82 | # COMMAND ----------
 83 | 
 84 | # -- Union Data Together
 85 | catalog = "development"
 86 | database = "team_databricks_sme"
 87 | spark.sql(
 88 |     f"""
 89 | CREATE OR REPLACE TABLE {catalog}.{database}.lending_data_az_ca_il_geo
 90 | USING DELTA -- this is default just for explicit purpose
 91 | SELECT * FROM {catalog}.{database}.lending_data_az_geo
 92 | UNION ALL
 93 | SELECT * FROM {catalog}.{database}.lending_data_ca_geo
 94 | UNION ALL
 95 | SELECT * FROM {catalog}.{database}.lending_data_il_geo
 96 | """
 97 | )
 98 | 
 99 | # COMMAND ----------
100 | 
101 | # -- Union Data Together
102 | catalog = "development"
103 | database = "team_databricks_sme"
104 | spark.sql(
105 |     f"""
106 | SELECT * FROM {catalog}.{database}.lending_data_az_ca_il_geo
107 | """
108 | ).limit(10).toPandas().to_csv("data.csv")
109 | with open("data.csv", "r") as f:
110 |     print(f.read())
111 | 
112 | # COMMAND ----------
113 | 


--------------------------------------------------------------------------------
/examples/brickflow_examples/src/sql/sample.sql:
--------------------------------------------------------------------------------
1 | create or replace table $database.$schema.sample as 
2 | select * from $database.$schema.source 


--------------------------------------------------------------------------------
/examples/brickflow_examples/workflows/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nike-Inc/brickflow/44560e2be1d3b27f587d916d9f6ef81b0edade3d/examples/brickflow_examples/workflows/__init__.py


--------------------------------------------------------------------------------
/examples/brickflow_examples/workflows/entrypoint.py:
--------------------------------------------------------------------------------
 1 | # Databricks notebook source
 2 | 
 3 | import brickflow
 4 | from brickflow import Project, PypiTaskLibrary
 5 | import workflows
 6 | 
 7 | 
 8 | def main() -> None:
 9 |     with Project(
10 |         "brickflow-demo",
11 |         git_repo="https://github.com/Nike-Inc/brickflow",
12 |         provider="github",
13 |         libraries=[
14 |             PypiTaskLibrary(
15 |                 package="spark-expectations==0.8.0"
16 |             ),  # comment if spark-expectations is not needed
17 |         ],
18 |     ) as f:
19 |         f.add_pkg(workflows)
20 | 
21 | 
22 | if __name__ == "__main__":
23 |     main()
24 | 


--------------------------------------------------------------------------------
/examples/brickflow_for_each_task_examples/.brickflow-project-root.yml:
--------------------------------------------------------------------------------
 1 | # DO NOT MODIFY THIS FILE - IT IS AUTO GENERATED BY BRICKFLOW AND RESERVED FOR FUTURE USAGE
 2 | projects:
 3 |   for_each_task_examples:
 4 |     brickflow_version: auto
 5 |     deployment_mode: bundle
 6 |     enable_plugins: true
 7 |     name: for_each_task_examples
 8 |     path_from_repo_root_to_project_root: examples/brickflow_for_each_task_examples
 9 |     path_project_root_to_workflows_dir: workflows
10 | version: v1
11 | 


--------------------------------------------------------------------------------
/examples/brickflow_for_each_task_examples/README.md:
--------------------------------------------------------------------------------
 1 | # Brickflow for each task examples
 2 | This repository contains some examples on how to use the fo each task type in brickflow.
 3 | 
 4 | ## Getting Started
 5 | 
 6 | ### Prerequisites
 7 | 1.Install brickflows
 8 | 
 9 | ```shell
10 | pip install brickflows
11 | ```
12 | 
13 | 2.Install [Databricks CLI](https://docs.databricks.com/en/dev-tools/cli/databricks-cli.html)
14 | 
15 |     ```shell
16 |     curl -fsSL https://raw.githubusercontent.com/databricks/setup-cli/main/install.sh | sudo sh
17 |     ```
18 | 
19 | 3.Configure Databricks cli with workspace token. This configures your `~/.databrickscfg` file.
20 | 
21 |     ```shell
22 |     databricks configure --token
23 |     ```
24 | 
25 | ### Clone the repository
26 | 
27 | ```shell
28 | git clone https://github.com/Nike-Inc/brickflow.git
29 | cd brickflow/examples/brickflow_serverless_examples
30 | ```
31 | 
32 | ### Customize the workflow
33 | 
34 | Replace all the placeholders in workflows/for_each_task_workflow.py with configuration values compatible with your databricks workspace
35 | 
36 | 
37 | ### Deploy the workflow to databricks
38 | ```shell
39 | brickflow projects deploy --project for_each_task_examples -e local
40 | ```
41 | 
42 | ### Run the demo workflow
43 | - login to databricks workspace
44 | - go to the workflows and select the workflow
45 | - click on the run button
46 | 


--------------------------------------------------------------------------------
/examples/brickflow_for_each_task_examples/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nike-Inc/brickflow/44560e2be1d3b27f587d916d9f6ef81b0edade3d/examples/brickflow_for_each_task_examples/__init__.py


--------------------------------------------------------------------------------
/examples/brickflow_for_each_task_examples/brickflow-multi-project.yml:
--------------------------------------------------------------------------------
1 | project_roots:
2 |   for_each_task_examples:
3 |     root_yaml_rel_path: .
4 | version: v1
5 | 


--------------------------------------------------------------------------------
/examples/brickflow_for_each_task_examples/notebooks/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nike-Inc/brickflow/44560e2be1d3b27f587d916d9f6ef81b0edade3d/examples/brickflow_for_each_task_examples/notebooks/__init__.py


--------------------------------------------------------------------------------
/examples/brickflow_for_each_task_examples/notebooks/example_notebook.py:
--------------------------------------------------------------------------------
1 | # Databricks notebook source
2 | 
3 | param = dbutils.widgets.get("looped_parameter")
4 | print(f"Hey this is a nested notebook running with inputs: {param}")
5 | 


--------------------------------------------------------------------------------
/examples/brickflow_for_each_task_examples/src/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nike-Inc/brickflow/44560e2be1d3b27f587d916d9f6ef81b0edade3d/examples/brickflow_for_each_task_examples/src/__init__.py


--------------------------------------------------------------------------------
/examples/brickflow_for_each_task_examples/src/python/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nike-Inc/brickflow/44560e2be1d3b27f587d916d9f6ef81b0edade3d/examples/brickflow_for_each_task_examples/src/python/__init__.py


--------------------------------------------------------------------------------
/examples/brickflow_for_each_task_examples/src/python/print_args.py:
--------------------------------------------------------------------------------
1 | import sys
2 | 
3 | if __name__ == "__main__":
4 |     print(f"Hello, running with input {sys.argv}")
5 | 


--------------------------------------------------------------------------------
/examples/brickflow_for_each_task_examples/workflows/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nike-Inc/brickflow/44560e2be1d3b27f587d916d9f6ef81b0edade3d/examples/brickflow_for_each_task_examples/workflows/__init__.py


--------------------------------------------------------------------------------
/examples/brickflow_for_each_task_examples/workflows/entrypoint.py:
--------------------------------------------------------------------------------
 1 | # Databricks notebook source
 2 | 
 3 | import brickflow
 4 | from brickflow import Project
 5 | import workflows
 6 | 
 7 | 
 8 | def main() -> None:
 9 |     with Project(
10 |         "for_each_task_examples",
11 |         git_repo="https://github.com/Nike-Inc/brickflow",
12 |         provider="github",
13 |     ) as f:
14 |         f.add_pkg(workflows)
15 | 
16 | 
17 | if __name__ == "__main__":
18 |     main()
19 | 


--------------------------------------------------------------------------------
/examples/brickflow_for_each_task_examples/workflows/for_each_task_wf.py:
--------------------------------------------------------------------------------
  1 | from brickflow import (
  2 |     Workflow,
  3 |     WorkflowPermissions,
  4 |     User,
  5 |     NotebookTask,
  6 |     Cluster,
  7 |     JarTaskLibrary,
  8 |     SparkJarTask,
  9 |     SparkPythonTask,
 10 |     SqlTask,
 11 | )
 12 | 
 13 | from brickflow.context import ctx
 14 | from brickflow.engine.task import JobsTasksForEachTaskConfigs
 15 | 
 16 | cluster = Cluster(
 17 |     name=f"job_cluster_for_each_task_examples",
 18 |     driver_node_type_id="r7g.large",
 19 |     node_type_id="r7g.large",
 20 |     spark_version="13.3.x-scala2.12",
 21 |     min_workers=1,
 22 |     max_workers=1,
 23 |     policy_id="<your-policy-id>",  # replace with an existing policy id
 24 | )
 25 | 
 26 | wf = Workflow(
 27 |     "for_each_task_examples_wf",
 28 |     default_cluster=cluster,
 29 |     permissions=WorkflowPermissions(
 30 |         can_manage=[
 31 |             User(
 32 |                 "<someuser@somedomain.com>"  # replace email with existing users' email on databricks
 33 |             )
 34 |         ],
 35 |     ),
 36 | )
 37 | 
 38 | 
 39 | @wf.task
 40 | def example_task():
 41 |     print("This is a dependant task!")
 42 | 
 43 | 
 44 | @wf.for_each_task(
 45 |     depends_on=example_task,
 46 |     for_each_task_conf=JobsTasksForEachTaskConfigs(
 47 |         # Inputs can be provided by either a python iterable or a json-string
 48 |         inputs=[
 49 |             "AZ",
 50 |             "CA",
 51 |             "IL",
 52 |         ],
 53 |         concurrency=3,
 54 |     ),
 55 | )
 56 | def example_notebook():
 57 |     return NotebookTask(
 58 |         notebook_path="notebooks/example_notebook.py",
 59 |         base_parameters={"looped_parameter": "{{input}}"},
 60 |     )
 61 | 
 62 | 
 63 | @wf.for_each_task(
 64 |     depends_on=example_task,
 65 |     for_each_task_conf=JobsTasksForEachTaskConfigs(
 66 |         inputs='["1", "2", "3"]', concurrency=3
 67 |     ),
 68 | )
 69 | def example_brickflow_task(*, test_param="{{input}}"):
 70 |     print(f"Test param: {test_param}")
 71 |     param = ctx.get_parameter("looped_parameter")
 72 |     print(f"Nested brickflow task running with input: {param}")
 73 | 
 74 | 
 75 | @wf.for_each_task(
 76 |     depends_on=example_task,
 77 |     libraries=[
 78 |         JarTaskLibrary(
 79 |             jar="<dbfs:/some/path/to/The.jar>"
 80 |         )  # Replace with actual jar path
 81 |     ],
 82 |     for_each_task_conf=JobsTasksForEachTaskConfigs(
 83 |         inputs="[1,2,3]",
 84 |         concurrency=1,
 85 |     ),
 86 | )
 87 | def for_each_spark_jar():
 88 |     return SparkJarTask(
 89 |         main_class_name="com.example.MainClass",  # Replace with actual main class name
 90 |         parameters=["{{input}}"],
 91 |     )
 92 | 
 93 | 
 94 | @wf.for_each_task(
 95 |     depends_on=example_task,
 96 |     for_each_task_conf=JobsTasksForEachTaskConfigs(
 97 |         inputs="[1,2,3]",
 98 |         concurrency=1,
 99 |     ),
100 | )
101 | def for_each_spark_python():
102 |     return SparkPythonTask(
103 |         python_file="examples/brickflow_for_each_task_examples/src/python/print_args.py",
104 |         source="WORKSPACE",
105 |         parameters=["{{input}}"],
106 |     )
107 | 
108 | 
109 | @wf.for_each_task(
110 |     depends_on=example_notebook,
111 |     for_each_task_conf=JobsTasksForEachTaskConfigs(
112 |         inputs="[1,2,3]",
113 |         concurrency=1,
114 |     ),
115 | )
116 | def for_each_sql_task() -> any:
117 |     return SqlTask(
118 |         query_id="<some-query-id>",  # Replace with actual query id
119 |         warehouse_id="<some-warehouse-id>",  # Replace with actual warehouse id
120 |         parameters={"looped_parameter": "{{input}}"},
121 |     )
122 | 


--------------------------------------------------------------------------------
/examples/brickflow_serverless_examples/.brickflow-project-root.yml:
--------------------------------------------------------------------------------
 1 | # DO NOT MODIFY THIS FILE - IT IS AUTO GENERATED BY BRICKFLOW AND RESERVED FOR FUTURE USAGE
 2 | projects:
 3 |   brickflow-serverless-demo:
 4 |     brickflow_version: auto
 5 |     deployment_mode: bundle
 6 |     enable_plugins: true
 7 |     name: brickflow-serverless-demo
 8 |     path_from_repo_root_to_project_root: .
 9 |     path_project_root_to_workflows_dir: workflows
10 | version: v1
11 | 


--------------------------------------------------------------------------------
/examples/brickflow_serverless_examples/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | pip-wheel-metadata/
 24 | share/python-wheels/
 25 | *.egg-info/
 26 | .installed.cfg
 27 | *.egg
 28 | MANIFEST
 29 | 
 30 | # PyInstaller
 31 | #  Usually these files are written by a python script from a template
 32 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 33 | *.manifest
 34 | *.spec
 35 | 
 36 | # Installer logs
 37 | pip-log.txt
 38 | pip-delete-this-directory.txt
 39 | 
 40 | # Unit test / coverage reports
 41 | htmlcov/
 42 | .tox/
 43 | .nox/
 44 | .coverage
 45 | .coverage.*
 46 | .cache
 47 | nosetests.xml
 48 | coverage.xml
 49 | *.cover
 50 | *.py,cover
 51 | .hypothesis/
 52 | .pytest_cache/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | target/
 76 | 
 77 | # Jupyter Notebook
 78 | .ipynb_checkpoints
 79 | 
 80 | # IPython
 81 | profile_default/
 82 | ipython_config.py
 83 | 
 84 | # pyenv
 85 | .python-version
 86 | 
 87 | # pipenv
 88 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 89 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 90 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 91 | #   install all needed dependencies.
 92 | #Pipfile.lock
 93 | 
 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 95 | __pypackages__/
 96 | 
 97 | # Celery stuff
 98 | celerybeat-schedule
 99 | celerybeat.pid
100 | 
101 | # SageMath parsed files
102 | *.sage.py
103 | 
104 | # Environments
105 | .env
106 | .venv
107 | env/
108 | venv/
109 | ENV/
110 | env.bak/
111 | venv.bak/
112 | 
113 | # Spyder project settings
114 | .spyderproject
115 | .spyproject
116 | 
117 | # Rope project settings
118 | .ropeproject
119 | 
120 | # mkdocs documentation
121 | /site
122 | 
123 | # mypy
124 | .mypy_cache/
125 | .dmypy.json
126 | dmypy.json
127 | 
128 | # Pyre type checker
129 | .pyre/
130 | 
131 | 
132 | # GENERATED BY BRICKFLOW CLI --START--
133 | 
134 | ### Terraform ###
135 | # Local .terraform directories
136 | **/.terraform/*
137 | 
138 | # .tfstate files
139 | *.tfstate
140 | *.tfstate.*
141 | 
142 | # Crash log files
143 | crash.log
144 | crash.*.log
145 | 
146 | # Exclude all .tfvars files, which are likely to contain sensitive data, such as
147 | # password, private keys, and other secrets. These should not be part of version
148 | # control as they are data points which are potentially sensitive and subject
149 | # to change depending on the environment.
150 | *.tfvars
151 | *.tfvars.json
152 | 
153 | # Ignore override files as they are usually used to override resources locally and so
154 | # are not checked in
155 | override.tf
156 | override.tf.json
157 | *_override.tf
158 | *_override.tf.json
159 | 
160 | # Include override files you do wish to add to version control using negated pattern
161 | # !example_override.tf
162 | 
163 | # Include tfplan files to ignore the plan output of command: terraform plan -out=tfplan
164 | # example: *tfplan*
165 | 
166 | # Ignore CLI configuration files
167 | .terraformrc
168 | terraform.rc
169 | 
170 | # GENERATED BY BRICKFLOW CLI --END--
171 | 
172 | .idea
173 | bundle.yml


--------------------------------------------------------------------------------
/examples/brickflow_serverless_examples/README.md:
--------------------------------------------------------------------------------
 1 | # Brickflows Serverless Example
 2 | This project contains the example of the serverless workflow, that contains:
 3 | - notebook task
 4 | - python task
 5 | - native Brickflow entrypoint task
 6 | 
 7 | Note that in notebook task and entrypoint task the dependencies are set through magic `pip install` commands within
 8 | the notebook.
 9 | 
10 | ## Getting Started
11 | 
12 | ### Prerequisites
13 | 1.Install brickflows
14 | 
15 | ```shell
16 | pip install brickflows
17 | ```
18 | 
19 | 2.Install [Databricks CLI](https://docs.databricks.com/en/dev-tools/cli/databricks-cli.html)
20 | 
21 |     ```shell
22 |     curl -fsSL https://raw.githubusercontent.com/databricks/setup-cli/main/install.sh | sudo sh
23 |     ```
24 | 
25 | 3.Configure Databricks cli with workspace token. This configures your `~/.databrickscfg` file.
26 | 
27 |     ```shell
28 |     databricks configure --token
29 |     ```
30 | 
31 | ### Clone the repository
32 | 
33 | ```shell
34 | git clone https://github.com/Nike-Inc/brickflow.git
35 | cd brickflow/examples/brickflow_serverless_examples
36 | ```
37 | 
38 | ### Deploy the workflow to databricks
39 | ```shell
40 | brickflow projects deploy --project brickflow-serverless-demo -e local
41 | ```
42 | 
43 | ### Run the demo workflow
44 | - login to databricks workspace
45 | - go to the workflows and select the workflow
46 | - click on the run button
47 | 


--------------------------------------------------------------------------------
/examples/brickflow_serverless_examples/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nike-Inc/brickflow/44560e2be1d3b27f587d916d9f6ef81b0edade3d/examples/brickflow_serverless_examples/__init__.py


--------------------------------------------------------------------------------
/examples/brickflow_serverless_examples/brickflow-multi-project.yml:
--------------------------------------------------------------------------------
1 | project_roots:
2 |   brickflow-serverless-demo:
3 |     root_yaml_rel_path: .
4 | version: v1
5 | 


--------------------------------------------------------------------------------
/examples/brickflow_serverless_examples/notebooks/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nike-Inc/brickflow/44560e2be1d3b27f587d916d9f6ef81b0edade3d/examples/brickflow_serverless_examples/notebooks/__init__.py


--------------------------------------------------------------------------------
/examples/brickflow_serverless_examples/notebooks/example_notebook.py:
--------------------------------------------------------------------------------
 1 | # Databricks notebook source
 2 | # MAGIC %pip install pytz==2024.2
 3 | 
 4 | # COMMAND ----------
 5 | import pytz
 6 | from datetime import datetime
 7 | 
 8 | 
 9 | def get_current_time_in_timezone(timezone_str):
10 |     # Get the timezone object
11 |     timezone = pytz.timezone(timezone_str)
12 |     # Get the current time in the specified timezone
13 |     current_time = datetime.now(timezone)
14 |     return current_time
15 | 
16 | 
17 | # Example usage
18 | timezones = ["UTC", "Europe/Amsterdam", "Asia/Tokyo", "America/New_York"]
19 | for tz in timezones:
20 |     print(f"Current time in {tz}: {get_current_time_in_timezone(tz)}")
21 | 


--------------------------------------------------------------------------------
/examples/brickflow_serverless_examples/src/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nike-Inc/brickflow/44560e2be1d3b27f587d916d9f6ef81b0edade3d/examples/brickflow_serverless_examples/src/__init__.py


--------------------------------------------------------------------------------
/examples/brickflow_serverless_examples/src/python/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nike-Inc/brickflow/44560e2be1d3b27f587d916d9f6ef81b0edade3d/examples/brickflow_serverless_examples/src/python/__init__.py


--------------------------------------------------------------------------------
/examples/brickflow_serverless_examples/src/python/example.py:
--------------------------------------------------------------------------------
 1 | import pytz
 2 | from datetime import datetime
 3 | import argparse
 4 | 
 5 | 
 6 | def get_current_time_in_timezone(timezone_str):
 7 |     # Get the timezone object
 8 |     timezone = pytz.timezone(timezone_str)
 9 |     # Get the current time in the specified timezone
10 |     current_time = datetime.now(timezone)
11 |     return current_time
12 | 
13 | 
14 | if __name__ == "__main__":
15 |     parser = argparse.ArgumentParser(
16 |         description="Get the current time in a specified timezone."
17 |     )
18 |     parser.add_argument(
19 |         "--timezone",
20 |         type=str,
21 |         required=True,
22 |         help="The timezone to get the current time for.",
23 |     )
24 |     args = parser.parse_args()
25 | 
26 |     try:
27 |         current_time = get_current_time_in_timezone(args.timezone)
28 |         print(f"Current time in {args.timezone}: {current_time}")
29 |     except pytz.UnknownTimeZoneError:
30 |         print(f"Unknown timezone: {args.timezone}")
31 | 


--------------------------------------------------------------------------------
/examples/brickflow_serverless_examples/workflows/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nike-Inc/brickflow/44560e2be1d3b27f587d916d9f6ef81b0edade3d/examples/brickflow_serverless_examples/workflows/__init__.py


--------------------------------------------------------------------------------
/examples/brickflow_serverless_examples/workflows/demo_serverless_wf.py:
--------------------------------------------------------------------------------
 1 | from brickflow import (
 2 |     Workflow,
 3 |     NotebookTask,
 4 |     SparkPythonTask,
 5 | )
 6 | from brickflow.engine.task import PypiTaskLibrary
 7 | 
 8 | wf = Workflow(
 9 |     "brickflow-serverless-demo",
10 |     schedule_quartz_expression="0 0/20 0 ? * * *",
11 |     libraries=[
12 |         PypiTaskLibrary(package="pytz==2024.2"),
13 |         #  Custom repositories are not supported for serverless workloads, due to Databricks CLI limitations.
14 |         #  Refer to: https://github.com/databricks/cli/pull/1842This will be fixed in the future releases, use wheel instead.
15 |         # PypiTaskLibrary(
16 |         #     package="my-lib==1.2.3", repo="https://artifactory.my-org.com/api/pypi/python-virtual/simple"
17 |         # ),
18 |     ],
19 | )
20 | 
21 | 
22 | @wf.task
23 | def entrypoint_task():
24 |     pass
25 | 
26 | 
27 | @wf.notebook_task
28 | def notebook_task():
29 |     return NotebookTask(
30 |         notebook_path="notebooks/example_notebook.py",
31 |         base_parameters={
32 |             "some_parameter": "some_value",  # in the notebook access these via dbutils.widgets.get("some_parameter")
33 |         },
34 |     )  # type: ignore
35 | 
36 | 
37 | @wf.spark_python_task
38 | def spark_python_task():
39 |     return SparkPythonTask(
40 |         python_file="/src/python/example.py",
41 |         source="GIT",
42 |         parameters=["--timezone", "UTC"],
43 |     )  # type: ignore
44 | 


--------------------------------------------------------------------------------
/examples/brickflow_serverless_examples/workflows/entrypoint.py:
--------------------------------------------------------------------------------
 1 | # Databricks notebook source
 2 | # This should point to the `brickflows` version with serverless support or the wheel file with the same
 3 | # MAGIC %pip install brickflows==1.2.1
 4 | # MAGIC %pip install koheesio==0.8.1
 5 | # MAGIC %restart_python
 6 | 
 7 | # COMMAND ----------
 8 | import brickflow
 9 | from brickflow import Project, PypiTaskLibrary
10 | import workflows
11 | 
12 | 
13 | def main() -> None:
14 |     with Project(
15 |         "brickflow-serverless-demo",
16 |         git_repo="https://github.com/Nike-Inc/brickflow",
17 |         provider="github",
18 |     ) as f:
19 |         f.add_pkg(workflows)
20 | 
21 | 
22 | if __name__ == "__main__":
23 |     main()
24 | 


--------------------------------------------------------------------------------
/mkdocs.yml:
--------------------------------------------------------------------------------
  1 | site_name: BrickFlow
  2 | site_description: Brickflow is a tool for managing and deploying scalable workflows on Databricks.
  3 | site_url: https://brickflow.readthedocs.io/en/latest/
  4 | 
  5 | theme:
  6 |   name: material
  7 |   palette:
  8 |     - scheme: default
  9 |       primary: indigo
 10 |       accent: indigo
 11 |       toggle:
 12 |         icon: material/brightness-7
 13 |         name: Switch to dark mode
 14 |     - scheme: slate
 15 |       primary: indigo
 16 |       accent: indigo
 17 |       toggle:
 18 |         icon: material/brightness-4
 19 |         name: Switch to light mode
 20 |   features:
 21 |     # - announce.dismiss
 22 |     - content.code.annotate
 23 |     # - content.tabs.link
 24 |     - content.tooltips
 25 |     - content.code.copy
 26 |     # - header.autohide
 27 |     # - navigation.expand
 28 |     - navigation.indexes
 29 |     - navigation.instant
 30 |     # - navigation.prune
 31 |     # - navigation.sections
 32 |     - navigation.tabs
 33 |     - navigation.tabs.sticky
 34 |     - navigation.top
 35 |     - navigation.tracking
 36 |     - navigation.expand
 37 |     - search.highlight
 38 |     - search.share
 39 |     - search.suggest
 40 |     - toc.follow
 41 |   font:
 42 |     text: Roboto
 43 |     code: Roboto Mono
 44 |   logo: img/bf_logo.png
 45 |   favicon: img/bf_logo.png
 46 |   language: en
 47 | 
 48 | repo_name: nike/brickflow
 49 | repo_url: https://github.com/Nike-Inc/brickflow
 50 | 
 51 | plugins:
 52 |   - search:
 53 |       lang: en
 54 |   - mkdocstrings:
 55 |       handlers:
 56 |         python:
 57 |           paths: [ "brickflow" ]  # search packages in the src folder
 58 |           options:
 59 |             show_source: true
 60 |             show_root_heading: false
 61 |             heading_level: 1
 62 |             merge_init_into_class: true
 63 |             show_if_no_docstring: true
 64 |             show_root_full_path: true
 65 |             show_root_members_full_path: true
 66 |             show_root_toc_entry: false
 67 |             show_category_heading: true
 68 |             show_signature_annotations: true
 69 |             separate_signature: false
 70 | 
 71 | markdown_extensions:
 72 |   - abbr
 73 |   - admonition
 74 |   - mkdocs-click
 75 |   - attr_list
 76 |   - def_list
 77 |   - footnotes
 78 |   - md_in_html
 79 |   - toc:
 80 |       permalink: true
 81 |   - pymdownx.arithmatex:
 82 |       generic: true
 83 |   - pymdownx.betterem:
 84 |       smart_enable: all
 85 |   - pymdownx.caret
 86 |   - pymdownx.details
 87 |   - pymdownx.emoji:
 88 |       emoji_generator: !!python/name:materialx.emoji.to_svg
 89 |       emoji_index: !!python/name:materialx.emoji.twemoji
 90 |   - pymdownx.highlight:
 91 |       anchor_linenums: true
 92 |   - pymdownx.inlinehilite
 93 |   - pymdownx.keys
 94 |   - pymdownx.magiclink:
 95 |       repo_url_shorthand: true
 96 |       user: squidfunk
 97 |       repo: mkdocs-material
 98 |   - pymdownx.mark
 99 |   - pymdownx.smartsymbols
100 |   - pymdownx.superfences:
101 |       custom_fences:
102 |         - name: mermaid
103 |           class: mermaid
104 |           format: !!python/name:pymdownx.superfences.fence_code_format
105 |   - pymdownx.tabbed:
106 |       alternate_style: true
107 |   - pymdownx.tasklist:
108 |       custom_checkbox: true
109 |   - pymdownx.tilde
110 | 
111 | watch:
112 |   - brickflow
113 |   - brickflow_plugins
114 | 
115 | extra_css:
116 |   - css/custom.css
117 | 
118 | nav:
119 |   - Home: index.md
120 |   - Quickstart:
121 |       - Brickflow Projects: bundles-quickstart.md
122 |       - Upgrading Versions:
123 |           - Upgrading to v0.10.x: upgrades/upgrade-pre-0-10-0-to-0-10-0.md
124 |   - Concepts:
125 |       - HighLevel: highlevel.md
126 |       - Workflows: workflows.md
127 |       - Tasks: tasks.md
128 |       - Projects: projects.md
129 |       - ENV Variables: environment-variables.md
130 |       - Importing Modules: how-imports-work.md
131 |   - FAQ: faq/faq.md
132 |   - CLI:
133 |       - Commands: cli/reference.md
134 |   - Python API:
135 |       - Engine:
136 |           - Project: api/project.md
137 |           - Workflow: api/workflow.md
138 |           - Compute: api/compute.md
139 |           - Task: api/task.md
140 |       - Context: api/context.md
141 |       - CLI: api/cli.md
142 |       - Brickflow Plugins:
143 |           - AirflowTaskDependencySensor: api/airflow_external_task_dependency.md
144 |           - AirflowNativeOperators: api/airflow_native_operators.md
145 |           - WorkflowDependencySensor: api/workflow_dependency_sensor.md
146 |           - SnowflakeOperator: api/uc_to_snowflake_operator.md
147 |           - UcToSnowflakeOperator: api/uc_to_snowflake_operator.md
148 |           - Secrets: api/secrets.md
149 |           - TableauRefreshDataSourceOperator: api/airflow_tableau_operators.md
150 |           - TableauRefreshWorkbookOperator: api/airflow_tableau_operators.md
151 |           - BoxToVolumeOperator: api/box_operator.md
152 |           - VolumeToBoxOperator: api/box_operator.md
153 |           - BoxOperator: api/box_operator.md
154 | 
155 | 
156 | extra:
157 |   generator: false
158 |   version:
159 |     provider: mike
160 |     default: latest


--------------------------------------------------------------------------------
/prospector.yaml:
--------------------------------------------------------------------------------
 1 | strictness: high
 2 | test-warnings: True
 3 | doc-warnings: false
 4 | 
 5 | ignore-paths:
 6 |   - build
 7 |   - venv
 8 |   - venv3
 9 |   - venv2
10 |   - site
11 |   - docs
12 |   - tests/engine/sample_workflows.py
13 |   - tools
14 |   - .databricks
15 |   - .mypy_cache
16 |   - brickflow/bundles
17 |   - brickflow/sample_dags
18 |   - main.py
19 |   - main2.py
20 |   - .eggs
21 |   - htmlcov
22 |   - sample_workflows
23 |   - integration_workflows
24 |   - scripts
25 |   - tests/test_brickflow.py
26 |   - examples
27 |   - brickflow_plugins # will eventually need to remove once there are tests and linting logic is applied
28 | 
29 | max-line-length: 120
30 | 
31 | pylint:
32 |   disable:
33 |     - too-many-branches
34 |     - too-many-statements
35 |     - too-many-instance-attributes
36 |     - cyclic-import
37 |     - len-as-condition
38 |     - invalid-name
39 |     - no-else-return
40 |     - no-self-use
41 |     - protected-access
42 |     - too-many-arguments
43 |     - too-many-locals # TBD: this rule is actually a good one, we need to enable it and refactor code
44 |     - inconsistent-return-statements
45 |     - import-outside-toplevel
46 |     - consider-using-set-comprehension
47 |     - useless-object-inheritance
48 |     - unnecessary-pass
49 |     - raise-missing-from # pretty strange requirement with acquaint logic
50 |     - broad-except
51 |     - arguments-differ
52 | 
53 | pycodestyle:
54 |   # W293: disabled because we have newlines in docstrings
55 |   # E203: disabled because pep8 and black disagree on whitespace before colon in some cases
56 |   disable: W293,E203,E203 # conflicts with black formatting
57 | 
58 | pyflakes:
59 |   disable:
60 |     - F821 # ignore undefined name errors
61 | 
62 | mccabe:
63 |   disable:
64 |     - MC0001
65 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
  1 | [tool.poetry]
  2 | name = "brickflows"
  3 | version = "0.11.0a0"
  4 | description = "Deploy scalable workflows to databricks using python"
  5 | authors = ["Ashok Singamaneni, Sriharsha Tikkireddy"]
  6 | readme = "README.md"
  7 | license = "Apache License 2.0"
  8 | homepage = "https://github.com/Nike-Inc/brickflow"
  9 | repository = "https://github.com/Nike-Inc/brickflow"
 10 | packages = [{ include = "brickflow" }, { include = "brickflow_plugins" }]
 11 | include = ["LICENSE", "entrypoint.template", "gitignore_template.txt"]
 12 | exclude = ["sample_workflows", "tests"]
 13 | 
 14 | [tool.black]
 15 | line-length = 88
 16 | target-version = ['py39', 'py310']
 17 | include = '\.pyi?$'
 18 | extend-exclude = '''
 19 | /(
 20 |   # The following are specific to Black, you probably don't want those.
 21 |   | brickflow/tf
 22 |   | venv
 23 |   | brickflow.egg-info
 24 |   | dist
 25 |   | brickflow/bundles
 26 | )/
 27 | '''
 28 | 
 29 | [tool.poetry.dependencies]
 30 | python = ">=3.9,<3.12"            # pyspark <3.5 does not play happy with python 3.11. The latest DBRs Runtime (15.4) ships with Python 3.11.
 31 | Jinja2 = ">=3.1.5"
 32 | click = "^8.1.3"
 33 | databricks-sdk = ">=0.1.8 <1.0.0"
 34 | networkx = "3.1"
 35 | pendulum = "2.1.2"
 36 | pluggy = "^1.0.0"
 37 | pydantic = ">=2.0.0 <3.0.0"
 38 | python-decouple = "3.8"
 39 | pyyaml = "^6.0"
 40 | requests = ">=2.28.2 <3.0.0"
 41 | # cerberus-python-client = {version = "~2.5.4",  optional = true } # Users might have to manually install cerberus-python-client if required
 42 | # tableauserverclient = {version = "~0.25", optional = true } # Users might have to manually install tableauserverclient if required
 43 | 
 44 | 
 45 | [tool.poetry.scripts]
 46 | bf = "brickflow.cli:cli"
 47 | brickflow = "brickflow.cli:cli"
 48 | 
 49 | [tool.poetry.group.dev.dependencies]
 50 | black = "^24.3.0"
 51 | coverage = "^7.2.5"
 52 | datamodel-code-generator = "^0.25.2"
 53 | deepdiff = "^6.3.0"
 54 | mypy = "^1.3.0"
 55 | pre-commit = "^3.3.1"
 56 | prospector = "^1.10.3"
 57 | py4j = "^0.10.9.7"
 58 | pytest = ">=7.3.1 <8.0.0"
 59 | pytest-mock = "^3.10.0"
 60 | types-PyYAML = "*"                       # only for development purposes no need to make installation req
 61 | types-requests = ">=2.28.11.16 <3.0.0.0" # only for development purposes no need to make installation req
 62 | apache-airflow = "^2.7.3"
 63 | snowflake = "^0.6.0"
 64 | tableauserverclient = "^0.25"
 65 | boxsdk = "^3.9.2"
 66 | cerberus-python-client = "^2.5.4"
 67 | watchdog = "<4.0.0"
 68 | requests-mock = "1.12.1"
 69 | pyspark = "^3.0.0"
 70 | apache-airflow-providers-fab = ">=1.5.2"
 71 | 
 72 | [tool.poetry.group.docs.dependencies]
 73 | mdx-include = "^1.4.2"
 74 | mike = "^2.1.3"
 75 | mkdocs-click = "^0.8.1"
 76 | mkdocs-material = "^9.5.49"
 77 | mkdocstrings = { extras = ["python"], version = "^0.27.0" }
 78 | 
 79 | [build-system]
 80 | requires = ["poetry-core", "poetry-dynamic-versioning"]
 81 | build-backend = "poetry_dynamic_versioning.backend"
 82 | 
 83 | [tool.poetry-dynamic-versioning]
 84 | enable = true
 85 | vcs = "git"
 86 | bump = true
 87 | style = "semver"
 88 | 
 89 | [tool.coverage]
 90 | [tool.coverage.run]
 91 | omit = [
 92 |     # omit anything in a .local directory anywhere
 93 |     '*/.local/*',
 94 |     '**',
 95 |     'tests/*',
 96 |     '*/tests/*',
 97 |     # omit anything in a .venv directory anywhere
 98 |     '.venv/*',
 99 |     "*/site-packages/*",
100 | ]
101 | 
102 | [tool.coverage.report]
103 | skip_empty = true
104 | 
105 | [tool.mypy]
106 | disallow_untyped_defs = true
107 | ignore_missing_imports = true
108 | files = [
109 |     "brickflow/context/*.py",
110 |     "brickflow/cli/*.py",
111 |     "brickflow/hints/*.py",
112 |     "brickflow/engine/*.py",
113 |     "brickflow/resolver/*.py",
114 |     "brickflow/codegen/*.py",
115 | ]
116 | follow_imports = "skip"
117 | 
118 | [tool.pylint.main]
119 | fail-under = 9.0
120 | 
121 | 
122 | [tool.pylint."messages control"]
123 | disable = ["too-many-lines", "too-many-positional-arguments"]
124 | 


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nike-Inc/brickflow/44560e2be1d3b27f587d916d9f6ef81b0edade3d/tests/__init__.py


--------------------------------------------------------------------------------
/tests/airflow_plugins/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nike-Inc/brickflow/44560e2be1d3b27f587d916d9f6ef81b0edade3d/tests/airflow_plugins/__init__.py


--------------------------------------------------------------------------------
/tests/airflow_plugins/test_autosys.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | from requests.exceptions import HTTPError
 3 | from requests_mock.mocker import Mocker as RequestsMocker
 4 | 
 5 | from brickflow_plugins.airflow.operators.external_tasks import AutosysSensor
 6 | 
 7 | 
 8 | class TestAutosysSensor:
 9 |     @pytest.fixture(autouse=True, name="api", scope="class")
10 |     def mock_api(self):
11 |         rm = RequestsMocker()
12 |         rm.register_uri(
13 |             method="GET",
14 |             url="https://42.autosys.my-org.com/foo",
15 |             response_list=[
16 |                 # Test 1: Success
17 |                 {
18 |                     "json": {"status": "SU", "lastEndUTC": "2024-01-01T00:55:00Z"},
19 |                     "status_code": int(200),
20 |                 },
21 |                 # Test 2: Raise Error
22 |                 {
23 |                     "json": {},
24 |                     "status_code": int(404),
25 |                 },
26 |                 # Test 3: Poke 4 times until success
27 |                 {
28 |                     "json": {"status": "FA", "lastEndUTC": "2024-01-01T00:55:00Z"},
29 |                     "status_code": int(200),
30 |                 },
31 |                 {
32 |                     "json": {"status": "UNK", "lastEndUTC": None},
33 |                     "status_code": int(200),
34 |                 },
35 |                 {
36 |                     "json": {"status": "UNK", "lastEndUTC": ""},
37 |                     "status_code": int(200),
38 |                 },
39 |                 {
40 |                     "json": {"status": "SU", "lastEndUTC": "2024-01-01T01:55:00Z"},
41 |                     "status_code": int(200),
42 |                 },
43 |             ],
44 |         )
45 |         yield rm
46 | 
47 |     @pytest.fixture()
48 |     def sensor(self):
49 |         yield AutosysSensor(
50 |             task_id="test",
51 |             url="https://42.autosys.my-org.com/",
52 |             job_name="foo",
53 |             poke_interval=1,
54 |             time_delta={"hours": 1},
55 |         )
56 | 
57 |     def test_success(self, api, caplog, sensor):
58 |         with api:
59 |             sensor.poke(context={"execution_date": "2024-01-01T01:00:00Z"})
60 |         assert caplog.text.count("Poking again") == 0
61 |         assert "Success criteria met. Exiting" in caplog.text
62 | 
63 |     def test_non_200(self, api, sensor):
64 |         with pytest.raises(HTTPError):
65 |             with api:
66 |                 sensor.poke(context={"execution_date": "2024-01-01T01:00:00Z"})
67 | 
68 |     def test_poking(self, api, caplog, sensor):
69 |         with api:
70 |             sensor.poke(context={"execution_date": "2024-01-01T02:00:00Z"})
71 |         assert caplog.text.count("Poking again") == 3
72 |         assert "Success criteria met. Exiting" in caplog.text
73 | 


--------------------------------------------------------------------------------
/tests/cli/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nike-Inc/brickflow/44560e2be1d3b27f587d916d9f6ef81b0edade3d/tests/cli/__init__.py


--------------------------------------------------------------------------------
/tests/cli/sample_yaml_project/.brickflow-project-root.yaml:
--------------------------------------------------------------------------------
1 | version: v1
2 | projects:
3 |   test_cli_project:
4 |     name: test_cli_project
5 |     brickflow_version: 1.2.1
6 |     deployment_mode: bundle
7 |     enable_plugins: false
8 |     path_from_repo_root_to_project_root: some/test/path
9 |     path_project_root_to_workflows_dir:  path/to/workflows


--------------------------------------------------------------------------------
/tests/cli/sample_yaml_project/brickflow-multi-project.yaml:
--------------------------------------------------------------------------------
1 | version: v1
2 | project_roots:
3 |   test_cli_project:
4 |     root_yaml_rel_path: .
5 | 


--------------------------------------------------------------------------------
/tests/cli/sample_yml_project/.brickflow-project-root.yml:
--------------------------------------------------------------------------------
1 | version: v1
2 | projects:
3 |   test_cli_project:
4 |     name: test_cli_project
5 |     brickflow_version: 1.2.1
6 |     deployment_mode: bundle
7 |     enable_plugins: false
8 |     path_from_repo_root_to_project_root: some/test/path
9 |     path_project_root_to_workflows_dir:  path/to/workflows


--------------------------------------------------------------------------------
/tests/cli/sample_yml_project/brickflow-multi-project.yml:
--------------------------------------------------------------------------------
1 | version: v1
2 | project_roots:
3 |   test_cli_project:
4 |     root_yaml_rel_path: .
5 | 


--------------------------------------------------------------------------------
/tests/cli/test_bundles.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import os
  3 | from typing import Optional
  4 | from unittest.mock import patch, Mock
  5 | from pytest import LogCaptureFixture
  6 | import pytest
  7 | 
  8 | from brickflow import BrickflowEnvVars, _ilog
  9 | from brickflow.cli.bundles import bundle_deploy, bundle_destroy
 10 | 
 11 | 
 12 | class TestBundles:
 13 |     @patch("brickflow.cli.bundles.should_deploy", return_value=True)
 14 |     @patch("brickflow.cli.bundles.exec_command")
 15 |     @patch.dict(
 16 |         os.environ, {BrickflowEnvVars.BRICKFLOW_BUNDLE_CLI_VERSION.value: "0.203.0"}
 17 |     )
 18 |     def test_bundle_deploy_new_cli(self, mock_exec_command: Mock, _: Mock):
 19 |         mock_exec_command.side_effect = lambda *args, **kwargs: None
 20 |         mock_exec_command.return_value = None
 21 |         # workflows_dir needed to make the function work due to bundle sync
 22 |         bundle_deploy(
 23 |             force_acquire_lock=True,
 24 |             workflows_dir="somedir",
 25 |             debug=True,
 26 |             fail_on_active_runs=True,
 27 |         )
 28 |         bundle_cli = os.environ[BrickflowEnvVars.BRICKFLOW_BUNDLE_CLI_EXEC.value]
 29 |         mock_exec_command.assert_called_with(
 30 |             bundle_cli,
 31 |             "bundle",
 32 |             [
 33 |                 "deploy",
 34 |                 "-t",
 35 |                 "local",
 36 |                 "--fail-on-active-runs",
 37 |                 "--force-lock",
 38 |                 "--debug",
 39 |             ],
 40 |         )
 41 |         bundle_destroy(force_acquire_lock=True, workflows_dir="somedir", debug=True)
 42 |         bundle_cli = os.environ[BrickflowEnvVars.BRICKFLOW_BUNDLE_CLI_EXEC.value]
 43 |         mock_exec_command.assert_called_with(
 44 |             bundle_cli,
 45 |             "bundle",
 46 |             ["destroy", "-t", "local", "--force-lock", "--debug"],
 47 |         )
 48 | 
 49 |     @patch("brickflow.cli.bundles.should_deploy", return_value=True)
 50 |     @patch("brickflow.cli.bundles.exec_command")
 51 |     @patch.dict(
 52 |         os.environ,
 53 |         {
 54 |             BrickflowEnvVars.BRICKFLOW_BUNDLE_CLI_VERSION.value: "0.201.0",
 55 |             BrickflowEnvVars.BRICKFLOW_BUNDLE_CLI_EXEC.value: "databricks",
 56 |         },
 57 |     )
 58 |     def test_bundle_deploy_old_cli(self, mock_exec_command: Mock, _: Mock):
 59 |         mock_exec_command.side_effect = lambda *args, **kwargs: None
 60 |         mock_exec_command.return_value = None
 61 |         # workflows_dir needed to make the function work due to bundle sync
 62 |         bundle_deploy(force_acquire_lock=True, workflows_dir="somedir")
 63 |         bundle_cli = os.environ[BrickflowEnvVars.BRICKFLOW_BUNDLE_CLI_EXEC.value]
 64 |         mock_exec_command.assert_called_with(
 65 |             bundle_cli,
 66 |             "bundle",
 67 |             ["deploy", "-t", "local", "--force"],
 68 |         )
 69 |         bundle_destroy(force_acquire_lock=True, workflows_dir="somedir")
 70 |         bundle_cli = os.environ[BrickflowEnvVars.BRICKFLOW_BUNDLE_CLI_EXEC.value]
 71 |         mock_exec_command.assert_called_with(
 72 |             bundle_cli,
 73 |             "bundle",
 74 |             ["destroy", "-t", "local", "--force"],
 75 |         )
 76 | 
 77 |     @patch("brickflow.cli.bundles.exec_command")
 78 |     @patch.dict(
 79 |         os.environ, {BrickflowEnvVars.BRICKFLOW_BUNDLE_CLI_VERSION.value: "0.203.0"}
 80 |     )
 81 |     def test_deploy_no_workflows(
 82 |         self, mock_exec_command: Mock, caplog: LogCaptureFixture
 83 |     ):
 84 |         mock_exec_command.side_effect = lambda *args, **kwargs: None
 85 |         mock_exec_command.return_value = None
 86 | 
 87 |         # Adjusting the log level and propagating it to the root logger to make sure it's captured by caplog
 88 |         _ilog.propagate = True
 89 |         _ilog.level = logging.WARN
 90 | 
 91 |         with caplog.at_level(logging.WARN):
 92 |             # running this should not fail but log a warning stating that no bundle has been found
 93 |             bundle_deploy(force_acquire_lock=True, workflows_dir="somedir")
 94 | 
 95 |         assert "No bundle.yml found, skipping deployment." in [
 96 |             rec.message for rec in caplog.records
 97 |         ]
 98 | 
 99 |     @pytest.mark.parametrize(
100 |         "input_arch,expected_arch",
101 |         [
102 |             ("x86_64", "amd64"),  # Test one x86_64 variant
103 |             ("amd64", "amd64"),  # Test alternative x86_64 name
104 |             ("i386", "386"),  # Test one 32-bit variant
105 |             ("i686", "386"),  # Test alternative 32-bit name
106 |             ("arm64", "arm64"),  # Test one ARM variant
107 |             ("aarch64", "arm64"),  # Test alternative ARM name
108 |             ("X86_64", "amd64"),  # Test case insensitivity
109 |             ("unsupported_arch", None),  # Test unsupported architecture
110 |         ],
111 |     )
112 |     def test_get_arch_mappings(
113 |         self, input_arch: str, expected_arch: Optional[str]
114 |     ) -> None:
115 |         from brickflow.cli.bundles import get_arch
116 | 
117 |         with patch("platform.machine") as mock_machine:
118 |             mock_machine.return_value = input_arch
119 | 
120 |             if expected_arch is None:
121 |                 with pytest.raises(RuntimeError) as exc_info:
122 |                     get_arch()
123 |                 assert f"Unsupported architecture: {input_arch}" in str(exc_info.value)
124 |             else:
125 |                 assert get_arch() == expected_arch
126 | 


--------------------------------------------------------------------------------
/tests/cli/test_cli.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import shutil
 3 | import traceback
 4 | from unittest.mock import patch, Mock
 5 | 
 6 | import click
 7 | from click.testing import CliRunner
 8 | 
 9 | from brickflow import BrickflowProjectDeploymentSettings, BrickflowEnvVars
10 | from brickflow.cli import (
11 |     cli,
12 |     exec_command,
13 | )
14 | from brickflow.cli.bundles import (
15 |     bundle_download_path,
16 |     download_and_unzip_databricks_cli,
17 |     get_force_lock_flag,
18 | )
19 | from brickflow.cli.projects import handle_libraries
20 | 
21 | 
22 | def fake_run(*_, **__):
23 |     click.echo("hello world")
24 | 
25 | 
26 | # TODO: Add more tests to the cli
27 | class TestCli:
28 |     def test_no_command_error(self):
29 |         runner = CliRunner()
30 |         non_existent_command = "non_existent_command"
31 |         result = runner.invoke(cli, ["non_existent_command"])  # noqa
32 |         assert result.exit_code == 2
33 |         assert result.output.strip().endswith(
34 |             f"Error: No such command '{non_existent_command}'."
35 |         )
36 | 
37 |     @patch("webbrowser.open")
38 |     def test_docs(self, browser: Mock):
39 |         runner = CliRunner()
40 |         browser.return_value = None
41 |         result = runner.invoke(cli, ["docs"])  # noqa
42 |         assert result.exit_code == 0, traceback.print_exception(*result.exc_info)
43 |         assert result.output.strip().startswith("Opening browser for docs...")
44 |         browser.assert_called_once_with(
45 |             "https://engineering.nike.com/brickflow/", new=2
46 |         )
47 | 
48 |     def test_force_arg(self):
49 |         with patch.dict(
50 |             os.environ, {BrickflowEnvVars.BRICKFLOW_BUNDLE_CLI_VERSION.value: "0.203.0"}
51 |         ):
52 |             assert get_force_lock_flag() == "--force-lock"
53 |         with patch.dict(
54 |             os.environ, {BrickflowEnvVars.BRICKFLOW_BUNDLE_CLI_VERSION.value: "auto"}
55 |         ):
56 |             assert get_force_lock_flag() == "--force-lock"
57 |         with patch.dict(
58 |             os.environ,
59 |             {BrickflowEnvVars.BRICKFLOW_BUNDLE_CLI_VERSION.value: "something else"},
60 |         ):
61 |             assert get_force_lock_flag() == "--force-lock"
62 |         with patch.dict(
63 |             os.environ, {BrickflowEnvVars.BRICKFLOW_BUNDLE_CLI_VERSION.value: "0.202.0"}
64 |         ):
65 |             assert get_force_lock_flag() == "--force"
66 | 
67 |     def test_install_cli(self):
68 |         expected_version = "0.200.0"
69 |         url = bundle_download_path(expected_version)
70 |         file_path = download_and_unzip_databricks_cli(url, expected_version)
71 |         assert url is not None
72 |         version_value = exec_command(file_path, "--version", [], capture_output=True)
73 |         assert (
74 |             version_value.strip() == f"Databricks CLI v{expected_version}"
75 |         ), version_value
76 |         directory_path = ".databricks"
77 |         if os.path.exists(directory_path):
78 |             shutil.rmtree(directory_path)
79 | 
80 |     def test_projects_handle_libraries(self):
81 |         bpd = BrickflowProjectDeploymentSettings()
82 |         bpd.brickflow_auto_add_libraries = None
83 |         handle_libraries(skip_libraries=True)
84 |         assert bpd.brickflow_auto_add_libraries is False
85 |         handle_libraries(skip_libraries=False)
86 |         assert bpd.brickflow_auto_add_libraries is True
87 |         bpd.brickflow_auto_add_libraries = None
88 | 


--------------------------------------------------------------------------------
/tests/cli/test_projects.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | import shutil
 3 | import os
 4 | import pytest
 5 | from brickflow import ConfigFileType
 6 | from brickflow.cli.projects import MultiProjectManager, get_brickflow_root
 7 | 
 8 | 
 9 | @pytest.mark.parametrize(
10 |     "project_folder,extension",
11 |     [("sample_yml_project", "yml"), ("sample_yaml_project", "yaml")],
12 | )
13 | def test_get_brickflow_root(project_folder, extension):
14 |     cwd = os.getcwd()
15 |     test_folder = str(Path(__file__).parent)
16 | 
17 |     # Creating empty test directories
18 |     os.makedirs(f"{test_folder}/{project_folder}/some/dummy/dir", exist_ok=True)
19 |     os.chdir(f"{test_folder}/{project_folder}/some/dummy/dir")
20 | 
21 |     actual = get_brickflow_root()
22 |     assert actual == Path(
23 |         f"{test_folder}/{project_folder}/brickflow-multi-project.{extension}"
24 |     )
25 | 
26 |     # Cleanup
27 |     shutil.rmtree(f"{test_folder}/{project_folder}/some")
28 |     os.chdir(cwd)
29 | 
30 | 
31 | @pytest.mark.parametrize(
32 |     "project_folder, config_type",
33 |     [
34 |         ("sample_yml_project", ConfigFileType.YML),
35 |         ("sample_yaml_project", ConfigFileType.YAML),
36 |     ],
37 | )
38 | def test_multi_project_manager_yaml(project_folder, config_type):
39 |     cwd = os.getcwd()
40 |     test_folder = str(Path(__file__).parent)
41 |     os.chdir(test_folder)
42 | 
43 |     config_file_name = (
44 |         f"{test_folder}/{project_folder}/brickflow-multi-project.{config_type.value}"
45 |     )
46 |     manager = MultiProjectManager(
47 |         config_file_name=config_file_name, file_type=config_type
48 |     )
49 |     assert manager._brickflow_multi_project_config.version == "v1"
50 |     expected_project_config = {
51 |         "version": "v1",
52 |         "projects": {
53 |             "test_cli_project": {
54 |                 "name": "test_cli_project",
55 |                 "path_from_repo_root_to_project_root": "some/test/path",
56 |                 "path_project_root_to_workflows_dir": "path/to/workflows",
57 |                 "deployment_mode": "bundle",
58 |                 "brickflow_version": "1.2.1",
59 |                 "enable_plugins": False,
60 |             }
61 |         },
62 |     }
63 |     assert manager._project_config_dict["."].model_dump() == expected_project_config
64 | 
65 |     os.chdir(cwd)
66 | 


--------------------------------------------------------------------------------
/tests/codegen/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nike-Inc/brickflow/44560e2be1d3b27f587d916d9f6ef81b0edade3d/tests/codegen/__init__.py


--------------------------------------------------------------------------------
/tests/codegen/expected_bundles/local_bundle_continuous_schedule.yml:
--------------------------------------------------------------------------------
  1 | "bundle":
  2 |   "name": "test-project"
  3 | "targets":
  4 |   "test-project-local":
  5 |     "resources":
  6 |       "jobs":
  7 |         "wf-test-2":
  8 |           "continuous":
  9 |             "pause_status": "PAUSED"
 10 |           "email_notifications": null
 11 |           "git_source": null
 12 |           "health":
 13 |             "rules":
 14 |             - "metric": "RUN_DURATION_SECONDS"
 15 |               "op": "GREATER_THAN"
 16 |               "value": 7200.0
 17 |           "job_clusters":
 18 |           - "job_cluster_key": "sample_job_cluster"
 19 |             "new_cluster":
 20 |               "aws_attributes": null
 21 |               "custom_tags":
 22 |                 "brickflow_deployment_mode": "Databricks Asset Bundles"
 23 |                 "brickflow_project_name": "test-project"
 24 |                 "brickflow_version": "1.0.0"
 25 |                 "deployed_at": "1704067200000"
 26 |                 "deployed_by": "test_user"
 27 |                 "environment": "local"
 28 |               "data_security_mode": "SINGLE_USER"
 29 |               "driver_instance_pool_id": null
 30 |               "driver_node_type_id": null
 31 |               "enable_elastic_disk": null
 32 |               "init_scripts": null
 33 |               "instance_pool_id": null
 34 |               "node_type_id": "m6gd.xlarge"
 35 |               "num_workers": 1.0
 36 |               "policy_id": null
 37 |               "runtime_engine": null
 38 |               "spark_conf": null
 39 |               "spark_env_vars": null
 40 |               "spark_version": "13.3.x-scala2.12"
 41 |           "max_concurrent_runs": 1.0
 42 |           "name": "test_user_wf-test-2"
 43 |           "notification_settings": null
 44 |           "permissions":
 45 |           - "level": "IS_OWNER"
 46 |             "user_name": "abc@abc.com"
 47 |           - "level": "CAN_MANAGE"
 48 |             "user_name": "abc@abc.com"
 49 |           - "level": "CAN_MANAGE_RUN"
 50 |             "user_name": "abc@abc.com"
 51 |           - "level": "CAN_VIEW"
 52 |             "user_name": "abc@abc.com"
 53 |           "run_as":
 54 |             "user_name": "abc@abc.com"
 55 |           "schedule": null
 56 |           "tags":
 57 |             "brickflow_deployment_mode": "Databricks Asset Bundles"
 58 |             "brickflow_project_name": "test-project"
 59 |             "brickflow_version": "1.0.0"
 60 |             "deployed_at": "1704067200000"
 61 |             "deployed_by": "test_user"
 62 |             "environment": "local"
 63 |             "test": "test2"
 64 |           "tasks":
 65 |           - "depends_on": []
 66 |             "email_notifications": {}
 67 |             "webhook_notifications": {}
 68 |             "job_cluster_key": "sample_job_cluster"
 69 |             "libraries": []
 70 |             "max_retries": null
 71 |             "min_retry_interval_millis": null
 72 |             "notebook_task":
 73 |               "base_parameters":
 74 |                 "all_tasks1": "test"
 75 |                 "all_tasks3": "123"
 76 |                 "brickflow_env": "local"
 77 |                 "brickflow_internal_only_run_tasks": ""
 78 |                 "brickflow_internal_task_name": "{{task_key}}"
 79 |                 "brickflow_internal_workflow_name": "wf-test-2"
 80 |                 "brickflow_internal_workflow_prefix": ""
 81 |                 "brickflow_internal_workflow_suffix": ""
 82 |                 "brickflow_job_id": "{{job_id}}"
 83 |                 "brickflow_parent_run_id": "{{parent_run_id}}"
 84 |                 "brickflow_run_id": "{{run_id}}"
 85 |                 "brickflow_start_date": "{{start_date}}"
 86 |                 "brickflow_start_time": "{{start_time}}"
 87 |                 "brickflow_task_key": "{{task_key}}"
 88 |                 "brickflow_task_retry_count": "{{task_retry_count}}"
 89 |                 "test": "var"
 90 |               "notebook_path": "test_databricks_bundle.py"
 91 |               "source": "WORKSPACE"
 92 |             "retry_on_timeout": null
 93 |             "task_key": "task_function2"
 94 |             "timeout_seconds": null
 95 |           "timeout_seconds": null
 96 |           "trigger": null
 97 |           "webhook_notifications": null
 98 |       "pipelines": {}
 99 |     "workspace":
100 |       "file_path": "/Users/${workspace.current_user.userName}/.brickflow_bundles/test-project/local/files"
101 |       "root_path": "/Users/${workspace.current_user.userName}/.brickflow_bundles/test-project/local"
102 |       "state_path": "/Users/${workspace.current_user.userName}/.brickflow_bundles/test-project/local/state"
103 | "workspace": {}


--------------------------------------------------------------------------------
/tests/codegen/expected_bundles/local_serverless_bundle.yml:
--------------------------------------------------------------------------------
 1 | "bundle":
 2 |   "name": "test-project"
 3 | "targets":
 4 |   "test-project-local":
 5 |     "resources":
 6 |       "jobs":
 7 |         "brickflow-serverless-demo":
 8 |           "continuous": null
 9 |           "email_notifications": null
10 |           "environments":
11 |           - "environment_key": "Default"
12 |             "spec":
13 |               "client": "1"
14 |               "dependencies":
15 |               - "pytz==2024.2"
16 |           "health": {}
17 |           "job_clusters": []
18 |           "max_concurrent_runs": 1.0
19 |           "name": "test_user_brickflow-serverless-demo"
20 |           "notification_settings": null
21 |           "parameters": null
22 |           "permissions": null
23 |           "schedule":
24 |             "pause_status": "PAUSED"
25 |             "quartz_cron_expression": "0 0/20 0 ? * * *"
26 |             "timezone_id": "UTC"
27 |           "tags":
28 |             "brickflow_deployment_mode": "Databricks Asset Bundles"
29 |             "brickflow_project_name": "test-project"
30 |             "brickflow_version": "1.0.0"
31 |             "deployed_at": "1704067200000"
32 |             "deployed_by": "test_user"
33 |             "environment": "local"
34 |           "tasks":
35 |           - "depends_on": []
36 |             "email_notifications": {}
37 |             "webhook_notifications": {}
38 |             "max_retries": null
39 |             "min_retry_interval_millis": null
40 |             "notebook_task":
41 |               "base_parameters":
42 |                 "brickflow_env": "local"
43 |                 "brickflow_internal_only_run_tasks": ""
44 |                 "brickflow_internal_task_name": "{{task_key}}"
45 |                 "brickflow_internal_workflow_name": "brickflow-serverless-demo"
46 |                 "brickflow_internal_workflow_prefix": ""
47 |                 "brickflow_internal_workflow_suffix": ""
48 |                 "brickflow_job_id": "{{job_id}}"
49 |                 "brickflow_parent_run_id": "{{parent_run_id}}"
50 |                 "brickflow_run_id": "{{run_id}}"
51 |                 "brickflow_start_date": "{{start_date}}"
52 |                 "brickflow_start_time": "{{start_time}}"
53 |                 "brickflow_task_key": "{{task_key}}"
54 |                 "brickflow_task_retry_count": "{{task_retry_count}}"
55 |               "notebook_path": "test_databricks_bundle.py"
56 |               "source": "WORKSPACE"
57 |             "retry_on_timeout": null
58 |             "task_key": "entrypoint_task"
59 |             "timeout_seconds": null
60 |           - "depends_on": []
61 |             "email_notifications": {}
62 |             "webhook_notifications": {}
63 |             "max_retries": null
64 |             "min_retry_interval_millis": null
65 |             "notebook_task":
66 |               "base_parameters":
67 |                 "some_parameter": "some_value"
68 |               "notebook_path": "notebooks/example_notebook.py"
69 |             "retry_on_timeout": null
70 |             "task_key": "notebook_task"
71 |             "timeout_seconds": null
72 |           - "depends_on": []
73 |             "email_notifications": {}
74 |             "webhook_notifications": {}
75 |             "environment_key": "Default"
76 |             "max_retries": null
77 |             "min_retry_interval_millis": null
78 |             "retry_on_timeout": null
79 |             "spark_python_task":
80 |               "parameters":
81 |               - "--timezone"
82 |               - "UTC"
83 |               "python_file": "/Workspace/Users/${workspace.current_user.userName}/.brickflow_bundles/test-project/local/files/spark/python/src/run_task.py"
84 |               "source": "WORKSPACE"
85 |             "task_key": "spark_python_task"
86 |             "timeout_seconds": null
87 |           "timeout_seconds": null
88 |           "trigger": null
89 |           "webhook_notifications": null
90 |       "pipelines": {}
91 |     "workspace":
92 |       "file_path": "/Users/${workspace.current_user.userName}/.brickflow_bundles/test-project/local/files"
93 |       "root_path": "/Users/${workspace.current_user.userName}/.brickflow_bundles/test-project/local"
94 |       "state_path": "/Users/${workspace.current_user.userName}/.brickflow_bundles/test-project/local/state"
95 | "workspace": {}
96 | 


--------------------------------------------------------------------------------
/tests/codegen/sample_serverless_workflow.py:
--------------------------------------------------------------------------------
 1 | from brickflow import (
 2 |     Workflow,
 3 |     NotebookTask,
 4 |     SparkPythonTask,
 5 | )
 6 | from brickflow.engine.task import PypiTaskLibrary
 7 | 
 8 | wf = Workflow(
 9 |     "brickflow-serverless-demo",
10 |     schedule_quartz_expression="0 0/20 0 ? * * *",
11 |     libraries=[PypiTaskLibrary(package="pytz==2024.2")],
12 | )
13 | 
14 | 
15 | @wf.task
16 | def entrypoint_task():
17 |     pass
18 | 
19 | 
20 | @wf.notebook_task
21 | def notebook_task():
22 |     return NotebookTask(
23 |         notebook_path="notebooks/example_notebook.py",
24 |         base_parameters={
25 |             "some_parameter": "some_value",  # in the notebook access these via dbutils.widgets.get("some_parameter")
26 |         },
27 |     )  # type: ignore
28 | 
29 | 
30 | @wf.spark_python_task
31 | def spark_python_task():
32 |     return SparkPythonTask(
33 |         python_file="./products/test-project/spark/python/src/run_task.py",
34 |         source="GIT",
35 |         parameters=["--timezone", "UTC"],
36 |     )  # type: ignore
37 | 


--------------------------------------------------------------------------------
/tests/context/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nike-Inc/brickflow/44560e2be1d3b27f587d916d9f6ef81b0edade3d/tests/context/__init__.py


--------------------------------------------------------------------------------
/tests/databricks_plugins/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nike-Inc/brickflow/44560e2be1d3b27f587d916d9f6ef81b0edade3d/tests/databricks_plugins/__init__.py


--------------------------------------------------------------------------------
/tests/databricks_plugins/test_run_job.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | 
 3 | import pytest
 4 | from requests_mock.mocker import Mocker as RequestsMocker
 5 | 
 6 | from brickflow.engine.utils import ctx
 7 | from brickflow_plugins.databricks.run_job import RunJobInRemoteWorkspace
 8 | 
 9 | 
10 | class TestRunJob:
11 |     workspace_url = "https://42.cloud.databricks.com"
12 |     endpoint_url = f"{workspace_url}/api/.*/jobs/run-now"
13 |     response = {"run_id": 37, "number_in_job": 42}
14 | 
15 |     ctx.log.propagate = True
16 | 
17 |     @pytest.fixture(autouse=True)
18 |     def mock_get_job_id(self, mocker):
19 |         mocker.patch(
20 |             "brickflow_plugins.databricks.run_job.get_job_id",
21 |             return_value=1,
22 |         )
23 | 
24 |     @pytest.fixture(autouse=True, name="api")
25 |     def mock_api(self):
26 |         rm = RequestsMocker()
27 |         rm.post(re.compile(self.endpoint_url), json=self.response, status_code=int(200))
28 |         yield rm
29 | 
30 |     def test_run_job(self, api, caplog):
31 |         with api:
32 |             RunJobInRemoteWorkspace(
33 |                 databricks_host=self.workspace_url,
34 |                 databricks_token="token",
35 |                 job_name="foo",
36 |             ).execute()
37 | 
38 |         assert "RunNowResponse(number_in_job=42, run_id=37)" in caplog.text
39 | 


--------------------------------------------------------------------------------
/tests/databricks_plugins/test_workflow_dependency_sensor.py:
--------------------------------------------------------------------------------
 1 | from datetime import timedelta
 2 | 
 3 | import pytest
 4 | from requests_mock.mocker import Mocker as RequestsMocker
 5 | 
 6 | from brickflow_plugins.databricks.workflow_dependency_sensor import (
 7 |     WorkflowDependencySensor,
 8 | )
 9 | 
10 | 
11 | class TestWorkflowDependencySensor:
12 |     workspace_url = "https://42.cloud.databricks.com"
13 |     endpoint_url = f"{workspace_url}/api/2.1/jobs/get"
14 |     response = {}
15 | 
16 |     def test_sensor_failure_403(self):
17 |         api = RequestsMocker()
18 |         api.get(self.endpoint_url, json=self.response, status_code=int(403))
19 | 
20 |         # Databricks SDK will throw PermissionDenied exception if the job_id is not found or
21 |         # user doesn't have permission
22 |         from databricks.sdk.errors.platform import PermissionDenied
23 | 
24 |         with api:
25 |             sensor = WorkflowDependencySensor(
26 |                 databricks_host=self.workspace_url,
27 |                 databricks_token="token",
28 |                 dependency_job_id="1",
29 |                 delta=timedelta(seconds=1),
30 |                 timeout_seconds=1,
31 |                 poke_interval_seconds=1,
32 |             )
33 | 
34 |             with pytest.raises(PermissionDenied):
35 |                 sensor.execute()
36 | 


--------------------------------------------------------------------------------
/tests/databricks_plugins/test_workflow_task_dependency_sensor.py:
--------------------------------------------------------------------------------
  1 | from datetime import timedelta
  2 | 
  3 | import pytest
  4 | from requests_mock.mocker import Mocker as RequestsMocker
  5 | 
  6 | from brickflow_plugins.databricks.workflow_dependency_sensor import (
  7 |     WorkflowTaskDependencySensor,
  8 |     WorkflowDependencySensorTimeOutException,
  9 | )
 10 | 
 11 | 
 12 | class TestWorkflowTaskDependencySensor:
 13 |     workspace_url = "https://42.cloud.databricks.com"
 14 |     endpoint_url = f"{workspace_url}/api/2.1/jobs/runs/list"
 15 |     response = {
 16 |         "runs": [
 17 |             {
 18 |                 "job_id": 1,
 19 |                 "run_id": 1,
 20 |                 "start_time": 1704063600000,
 21 |                 "state": {
 22 |                     "result_state": "SUCCESS",
 23 |                 },
 24 |                 "tasks": [
 25 |                     {
 26 |                         "run_id": 100,
 27 |                         "task_key": "foo",
 28 |                         "state": {
 29 |                             "result_state": "SUCCESS",
 30 |                         },
 31 |                     },
 32 |                     {
 33 |                         "run_id": 200,
 34 |                         "task_key": "bar",
 35 |                         "state": {
 36 |                             "result_state": "FAILED",
 37 |                         },
 38 |                     },
 39 |                     {
 40 |                         "run_id": 300,
 41 |                         "task_key": "baz",
 42 |                         "state": {},
 43 |                     },
 44 |                 ],
 45 |             }
 46 |         ]
 47 |     }
 48 | 
 49 |     @pytest.fixture(autouse=True)
 50 |     def mock_get_execution_start_time_unix_milliseconds(self, mocker):
 51 |         mocker.patch.object(
 52 |             WorkflowTaskDependencySensor,
 53 |             "get_execution_start_time_unix_milliseconds",
 54 |             return_value=1704063600000,
 55 |         )
 56 | 
 57 |     @pytest.fixture(autouse=True)
 58 |     def mock_get_job_id(self, mocker):
 59 |         mocker.patch(
 60 |             "brickflow_plugins.databricks.workflow_dependency_sensor.get_job_id",
 61 |             return_value=1,
 62 |         )
 63 | 
 64 |     @pytest.fixture(autouse=True, name="api")
 65 |     def mock_api(self):
 66 |         rm = RequestsMocker()
 67 |         rm.get(self.endpoint_url, json=self.response, status_code=int(200))
 68 |         yield rm
 69 | 
 70 |     def test_sensor_success(self, caplog, api):
 71 |         with api:
 72 |             sensor = WorkflowTaskDependencySensor(
 73 |                 databricks_host=self.workspace_url,
 74 |                 databricks_token="token",
 75 |                 dependency_job_name="job",
 76 |                 dependency_task_name="foo",
 77 |                 delta=timedelta(seconds=1),
 78 |                 timeout_seconds=1,
 79 |                 poke_interval_seconds=1,
 80 |             )
 81 | 
 82 |             sensor.execute()
 83 | 
 84 |             assert (
 85 |                 "Found the run_id '1' and 'foo' task with state: SUCCESS" in caplog.text
 86 |             )
 87 |             assert "Found a successful run: 1" in caplog.text
 88 | 
 89 |     def test_sensor_failure(self, caplog, api):
 90 |         with api:
 91 |             sensor = WorkflowTaskDependencySensor(
 92 |                 databricks_host=self.workspace_url,
 93 |                 databricks_token="token",
 94 |                 dependency_job_name="job",
 95 |                 dependency_task_name="bar",
 96 |                 delta=timedelta(seconds=1),
 97 |                 timeout_seconds=1,
 98 |                 poke_interval_seconds=1,
 99 |             )
100 | 
101 |             with pytest.raises(WorkflowDependencySensorTimeOutException):
102 |                 sensor.execute()
103 | 
104 |             assert (
105 |                 "Found the run_id '1' and 'bar' task with state: FAILED"
106 |                 in caplog.messages
107 |             )
108 |             assert "Didn't find a successful task run yet..." in caplog.messages
109 | 
110 |     def test_sensor_no_state(self, caplog, api):
111 |         with api:
112 |             sensor = WorkflowTaskDependencySensor(
113 |                 databricks_host=self.workspace_url,
114 |                 databricks_token="token",
115 |                 dependency_job_name="job",
116 |                 dependency_task_name="baz",
117 |                 delta=timedelta(seconds=1),
118 |                 timeout_seconds=1,
119 |                 poke_interval_seconds=1,
120 |             )
121 | 
122 |             with pytest.raises(WorkflowDependencySensorTimeOutException):
123 |                 sensor.execute()
124 | 
125 |             assert (
126 |                 "Found the run_id '1' and 'baz' but the task has not started yet..."
127 |                 in caplog.messages
128 |             )
129 |             assert "Didn't find a successful task run yet..." in caplog.messages
130 | 


--------------------------------------------------------------------------------
/tests/engine/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nike-Inc/brickflow/44560e2be1d3b27f587d916d9f6ef81b0edade3d/tests/engine/__init__.py


--------------------------------------------------------------------------------
/tests/engine/sample_workflow.py:
--------------------------------------------------------------------------------
  1 | from brickflow.engine.compute import Cluster
  2 | from brickflow.engine.task import (
  3 |     BrickflowTriggerRule,
  4 |     TaskType,
  5 |     TaskResponse,
  6 |     DLTPipeline,
  7 |     RunJobTask,
  8 | )
  9 | from brickflow.engine.workflow import Workflow, WorkflowPermissions, User
 10 | 
 11 | wf = Workflow(
 12 |     "test",
 13 |     default_cluster=Cluster.from_existing_cluster("existing_cluster_id"),
 14 |     schedule_quartz_expression="* * * * *",
 15 |     permissions=WorkflowPermissions(
 16 |         owner=User("abc@abc.com"),
 17 |         can_manage_run=[User("abc@abc.com")],
 18 |         can_view=[User("abc@abc.com")],
 19 |         can_manage=[User("abc@abc.com")],
 20 |     ),
 21 |     tags={"test": "test2"},
 22 |     common_task_parameters={"all_tasks1": "test", "all_tasks3": "123"},  # type: ignore
 23 |     health={
 24 |         "rules": [
 25 |             {"metric": "RUN_DURATION_SECONDS", "op": "GREATER_THAN", "value": 7200}
 26 |         ]
 27 |     },
 28 |     timeout_seconds=42,
 29 | )
 30 | 
 31 | 
 32 | @wf.task()
 33 | def task_function(*, test="var"):
 34 |     return test
 35 | 
 36 | 
 37 | @wf.task()
 38 | def task_function_with_error(*, test="var"):
 39 |     raise ValueError("throwing random error")
 40 | 
 41 | 
 42 | @wf.task
 43 | def task_function_no_deco_args():
 44 |     return "hello world"
 45 | 
 46 | 
 47 | @wf.dlt_task
 48 | def dlt_pipeline():
 49 |     # pass
 50 |     return DLTPipeline(
 51 |         name="hello world",
 52 |         storage="123",
 53 |         language="PYTHON",
 54 |         configuration={},
 55 |         cluster=Cluster(
 56 |             "test",
 57 |             "someversion",
 58 |             "vm-node",
 59 |             custom_tags={"name": "test"},
 60 |             min_workers=2,
 61 |             max_workers=10,
 62 |         ),
 63 |         notebook_path="scripts/spark_script_1.py",
 64 |     )
 65 | 
 66 | 
 67 | @wf.dlt_task
 68 | def dlt_pipeline_2():
 69 |     # pass
 70 |     return DLTPipeline(
 71 |         name="hello world",
 72 |         storage="123",
 73 |         language="PYTHON",
 74 |         configuration={},
 75 |         notebook_path="scripts/spark_script_2.py",
 76 |     )
 77 | 
 78 | 
 79 | @wf.task()
 80 | def task_function_nokwargs():
 81 |     return "hello world"
 82 | 
 83 | 
 84 | @wf.task(depends_on=task_function)
 85 | def task_function_2():
 86 |     return "hello world"
 87 | 
 88 | 
 89 | @wf.task(depends_on="task_function_2")
 90 | def task_function_3():
 91 |     return "hello world"
 92 | 
 93 | 
 94 | @wf.task(depends_on="task_function_3", trigger_rule=BrickflowTriggerRule.NONE_FAILED)
 95 | def task_function_4():
 96 |     return "hello world"
 97 | 
 98 | 
 99 | @wf.task(
100 |     task_type=TaskType.CUSTOM_PYTHON_TASK,
101 |     trigger_rule=BrickflowTriggerRule.NONE_FAILED,
102 |     custom_execute_callback=lambda x: TaskResponse(x.name, push_return_value=True),
103 | )
104 | def custom_python_task_push():
105 |     pass
106 | 
107 | 
108 | @wf.run_job_task()
109 | def run_job_task():
110 |     return RunJobTask(job_name="foo", host="https://foo.cloud.databricks.com")
111 | 


--------------------------------------------------------------------------------
/tests/engine/sample_workflow_2.py:
--------------------------------------------------------------------------------
 1 | from brickflow import Cluster, Workflow
 2 | 
 3 | wf = Workflow(
 4 |     "test1", default_cluster=Cluster.from_existing_cluster("existing_cluster_id")
 5 | )
 6 | 
 7 | 
 8 | @wf.task()
 9 | def task_function(*, test="var"):
10 |     return test
11 | 


--------------------------------------------------------------------------------
/tests/engine/test_compute.py:
--------------------------------------------------------------------------------
  1 | import pytest
  2 | 
  3 | from brickflow.engine.compute import Cluster
  4 | 
  5 | 
  6 | class TestCompute:
  7 |     def test_autoscale(self):
  8 |         workers = 1234
  9 |         cluster = Cluster(
 10 |             "name", "spark_version", "vm-node", min_workers=workers, max_workers=workers
 11 |         )
 12 |         assert cluster.autoscale() == {
 13 |             "autoscale": {
 14 |                 "min_workers": workers,
 15 |                 "max_workers": workers,
 16 |             }
 17 |         }
 18 | 
 19 |         cluster = Cluster("name", "spark_version", "vm-node")
 20 |         assert not cluster.autoscale()
 21 | 
 22 |     def test_job_task_field(self):
 23 |         cluster = Cluster.from_existing_cluster("existing_cluster_id")
 24 |         assert cluster.job_task_field_dict == {
 25 |             "existing_cluster_id": "existing_cluster_id"
 26 |         }
 27 |         cluster = Cluster("name", "spark_version", "vm-node")
 28 |         assert cluster.job_task_field_dict == {"job_cluster_key": "name"}
 29 | 
 30 |     def test_dict(self):
 31 |         cluster = Cluster.from_existing_cluster("existing_cluster_id")
 32 |         assert "existing_cluster_id" not in cluster.as_dict()
 33 | 
 34 |     def test_valid_cluster(self):
 35 |         with pytest.raises(AssertionError):
 36 |             Cluster(
 37 |                 "some_name", "some_version", "some_vm", min_workers=8, max_workers=4
 38 |             )
 39 | 
 40 |         with pytest.raises(AssertionError):
 41 |             Cluster(
 42 |                 "some_name",
 43 |                 "some_version",
 44 |                 "some_vm",
 45 |                 num_workers=3,
 46 |                 min_workers=2,
 47 |                 max_workers=4,
 48 |             )
 49 | 
 50 |         with pytest.raises(AssertionError):
 51 |             Cluster("some_name", "some_version", "some_vm", max_workers=4)
 52 | 
 53 |     def test_node_type_or_instance_pool(self):
 54 |         assert (
 55 |             Cluster(
 56 |                 "some_name",
 57 |                 "some_version",
 58 |                 node_type_id="some_vm",
 59 |                 driver_node_type_id="other_vm",
 60 |             ).node_type_id
 61 |             == "some_vm"
 62 |         )
 63 |         assert (
 64 |             Cluster(
 65 |                 "some_name", "some_version", instance_pool_id="some_instance_pool_id"
 66 |             ).instance_pool_id
 67 |             == "some_instance_pool_id"
 68 |         )
 69 |         with pytest.raises(
 70 |             AssertionError, match="Must specify either instance_pool_id or node_type_id"
 71 |         ):
 72 |             Cluster(
 73 |                 "some_name",
 74 |                 "some_version",
 75 |             )
 76 | 
 77 |         with pytest.raises(
 78 |             AssertionError,
 79 |             match="Cannot specify instance_pool_id if node_type_id has been specified",
 80 |         ):
 81 |             Cluster(
 82 |                 "some_name",
 83 |                 "some_version",
 84 |                 node_type_id="some_vm",
 85 |                 instance_pool_id="1234",
 86 |             )
 87 |         with pytest.raises(
 88 |             AssertionError,
 89 |             match=(
 90 |                 "Cannot specify driver_node_type_id if instance_pool_id"
 91 |                 " or driver_instance_pool_id has been specified"
 92 |             ),
 93 |         ):
 94 |             Cluster(
 95 |                 "some_name",
 96 |                 "some_version",
 97 |                 driver_node_type_id="other_vm",
 98 |                 instance_pool_id="1234",
 99 |             )
100 |         with pytest.raises(
101 |             AssertionError,
102 |             match=(
103 |                 "Cannot specify driver_node_type_id if instance_pool_id"
104 |                 " or driver_instance_pool_id has been specified"
105 |             ),
106 |         ):
107 |             Cluster(
108 |                 "some_name",
109 |                 "some_version",
110 |                 node_type_id="some_vm",
111 |                 driver_node_type_id="other_vm",
112 |                 driver_instance_pool_id="1234",
113 |             )
114 |         with pytest.raises(
115 |             AssertionError,
116 |             match=(
117 |                 "Cannot specify driver_node_type_id if instance_pool_id"
118 |                 " or driver_instance_pool_id has been specified"
119 |             ),
120 |         ):
121 |             Cluster(
122 |                 "some_name",
123 |                 "some_version",
124 |                 driver_node_type_id="other_vm",
125 |                 instance_pool_id="1234",
126 |                 driver_instance_pool_id="12345",
127 |             )
128 | 


--------------------------------------------------------------------------------
/tests/engine/test_engine.py:
--------------------------------------------------------------------------------
 1 | import subprocess
 2 | 
 3 | from brickflow.engine import (
 4 |     get_current_commit,
 5 | )
 6 | 
 7 | 
 8 | class TestEngine:
 9 |     def test_get_current_commit(self, mocker):
10 |         branch = "some_random_sha"
11 |         mocker.patch("subprocess.check_output")
12 |         subprocess.check_output.return_value = branch.encode("utf-8")
13 |         assert get_current_commit() == branch
14 |         subprocess.check_output.assert_called_once_with(
15 |             ['git log -n 1 --pretty=format:"%H"'], shell=True
16 |         )  # noqa
17 | 


--------------------------------------------------------------------------------
/tests/engine/test_utils.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | import pathlib
 3 | import pytest
 4 | from requests_mock.mocker import Mocker as RequestsMocker
 5 | 
 6 | from pydantic import SecretStr
 7 | 
 8 | from brickflow.engine.utils import get_job_id, ctx, get_bf_project_root
 9 | 
10 | 
11 | class TestUtils:
12 |     workspace_url = "https://42.cloud.databricks.com"
13 |     endpoint_url = f"{workspace_url}/api/.*/jobs/list"
14 | 
15 |     ctx.log.propagate = True
16 | 
17 |     @pytest.fixture(autouse=True, name="api", scope="class")
18 |     def mock_api(self):
19 |         rm = RequestsMocker()
20 |         rm.register_uri(
21 |             method="GET",
22 |             url=re.compile(self.endpoint_url),
23 |             response_list=[
24 |                 {
25 |                     "json": {"jobs": [{"job_id": 1234, "settings": {"name": "foo"}}]},
26 |                     "status_code": int(200),
27 |                 },
28 |                 {
29 |                     "json": {"has_more": False},
30 |                     "status_code": int(200),
31 |                 },
32 |                 {
33 |                     "json": {},
34 |                     "status_code": int(404),
35 |                 },
36 |             ],
37 |         )
38 |         yield rm
39 | 
40 |     def test_get_job_id_success(self, api):
41 |         with api:
42 |             job_id = get_job_id(
43 |                 job_name="foo",
44 |                 host=self.workspace_url,
45 |                 token=SecretStr("token"),
46 |             )
47 |         assert job_id == 1234
48 | 
49 |     def test_get_job_id_failure(self, api):
50 |         with pytest.raises(ValueError):
51 |             with api:
52 |                 get_job_id(job_name="bar", host=self.workspace_url, token="token")
53 | 
54 |     def test_get_job_id_non_200(self, caplog, api):
55 |         with api:
56 |             get_job_id(job_name="buz", host=self.workspace_url, token="token")
57 |         assert "An error occurred: request failed" in caplog.text
58 | 
59 |     def test_get_bf_project_root(self):
60 |         # Set up expected path which is the root of the repo
61 |         expected_root = pathlib.Path.cwd().parents[0]
62 |         # Execute the function
63 |         actual_root = get_bf_project_root()
64 |         # Assert the result
65 |         assert actual_root == expected_root
66 | 


--------------------------------------------------------------------------------
/tests/resolver/test_resolver.py:
--------------------------------------------------------------------------------
 1 | # test_resolver.py
 2 | from typing import Type
 3 | 
 4 | import pytest
 5 | 
 6 | import brickflow
 7 | from brickflow.resolver import (
 8 |     BrickflowRootNotFound,
 9 | )
10 | 
11 | 
12 | @pytest.fixture
13 | def default_mocks(mocker):
14 |     # Create mocks for the three methods
15 |     mocker.patch(
16 |         "brickflow.resolver.get_caller_file_paths", return_value=["path1", "path2"]
17 |     )
18 |     mocker.patch(
19 |         "brickflow.resolver.get_notebook_ws_path", return_value="/notebook/ws/path"
20 |     )
21 | 
22 | 
23 | def test_resolver_methods(default_mocks, mocker):  # noqa
24 |     error_msg = "This is a test message"
25 | 
26 |     def make_exception_function(exc: Type[Exception]):
27 |         def raise_exception(*args, **kwargs):
28 |             raise exc(error_msg)
29 | 
30 |         return raise_exception
31 | 
32 |     # catch random error
33 |     mocker.patch(
34 |         "brickflow.resolver.go_up_till_brickflow_root",
35 |         side_effect=make_exception_function(ValueError),
36 |     )
37 |     with pytest.raises(ValueError, match=error_msg):
38 |         brickflow.resolver.get_relative_path_to_brickflow_root()
39 | 
40 |     mocker.patch(
41 |         "brickflow.resolver.go_up_till_brickflow_root",
42 |         side_effect=make_exception_function(BrickflowRootNotFound),
43 |     )
44 | 
45 |     brickflow.resolver.get_relative_path_to_brickflow_root()
46 | 
47 |     mocker.patch(
48 |         "brickflow.resolver.go_up_till_brickflow_root",
49 |         side_effect=make_exception_function(PermissionError),
50 |     )
51 | 
52 |     brickflow.resolver.get_relative_path_to_brickflow_root()
53 | 


--------------------------------------------------------------------------------
/tests/sample_workflows/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nike-Inc/brickflow/44560e2be1d3b27f587d916d9f6ef81b0edade3d/tests/sample_workflows/__init__.py


--------------------------------------------------------------------------------
/tests/sample_workflows/sample_workflow_1.py:
--------------------------------------------------------------------------------
 1 | from brickflow.engine.compute import Cluster
 2 | from brickflow.engine.task import BrickflowTriggerRule, TaskType, TaskResponse
 3 | from brickflow.engine.workflow import Workflow
 4 | 
 5 | wf = Workflow(
 6 |     "test",
 7 |     default_cluster=Cluster.from_existing_cluster("XXXX-XXXXXX-XXXXXXXX"),
 8 |     tags={"test": "test2"},
 9 |     common_task_parameters={"all_tasks1": "test", "all_tasks3": "123"},  # type: ignore
10 | )
11 | 
12 | 
13 | @wf.task()
14 | def task_function():
15 |     return "hello world"
16 | 
17 | 
18 | @wf.task
19 | def task_function_no_deco_args():
20 |     return "hello world"
21 | 
22 | 
23 | @wf.task()
24 | def task_function_nokwargs():
25 |     return "hello world"
26 | 
27 | 
28 | @wf.task(depends_on=task_function)
29 | def task_function_2():
30 |     return "hello world"
31 | 
32 | 
33 | @wf.task(depends_on="task_function_2")
34 | def task_function_3():
35 |     return "hello world"
36 | 
37 | 
38 | @wf.task(depends_on="task_function_3", trigger_rule=BrickflowTriggerRule.NONE_FAILED)
39 | def task_function_4():
40 |     return "hello world"
41 | 
42 | 
43 | @wf.task(
44 |     task_type=TaskType.CUSTOM_PYTHON_TASK,
45 |     trigger_rule=BrickflowTriggerRule.NONE_FAILED,
46 |     custom_execute_callback=lambda x: TaskResponse(x.name, push_return_value=True),
47 | )
48 | def custom_python_task_push():
49 |     pass
50 | 


--------------------------------------------------------------------------------
/tests/sample_workflows/sample_workflow_2.py:
--------------------------------------------------------------------------------
 1 | from brickflow.engine.compute import Cluster
 2 | from brickflow.engine.task import BrickflowTriggerRule, TaskType, TaskResponse
 3 | from brickflow.engine.workflow import Workflow
 4 | 
 5 | wf = Workflow(
 6 |     "test2",
 7 |     default_cluster=Cluster.from_existing_cluster("XXXX-XXXXXX-XXXXXXXX"),
 8 |     tags={"test": "test2"},
 9 | )
10 | 
11 | 
12 | @wf.task()
13 | def task_function():
14 |     return "hello world"
15 | 
16 | 
17 | @wf.task
18 | def task_function_no_deco_args():
19 |     return "hello world"
20 | 
21 | 
22 | @wf.task()
23 | def task_function_nokwargs():
24 |     return "hello world"
25 | 
26 | 
27 | @wf.task(depends_on=task_function)
28 | def task_function_2():
29 |     return "hello world"
30 | 
31 | 
32 | @wf.task(depends_on="task_function_2")
33 | def task_function_3():
34 |     return "hello world"
35 | 
36 | 
37 | @wf.task(depends_on="task_function_3", trigger_rule=BrickflowTriggerRule.NONE_FAILED)
38 | def task_function_4():
39 |     return "hello world"
40 | 
41 | 
42 | @wf.task(
43 |     task_type=TaskType.CUSTOM_PYTHON_TASK,
44 |     trigger_rule=BrickflowTriggerRule.NONE_FAILED,
45 |     custom_execute_callback=lambda x: TaskResponse(x.name, push_return_value=True),
46 | )
47 | def custom_python_task_push():
48 |     pass
49 | 


--------------------------------------------------------------------------------
/tests/test_brickflow.py:
--------------------------------------------------------------------------------
 1 | # pylint: disable=unused-import
 2 | import pytest
 3 | from brickflow import get_config_file_type, ConfigFileType
 4 | 
 5 | 
 6 | def test_imports():
 7 |     try:
 8 |         from brickflow import (
 9 |             log,
10 |             _ilog,
11 |             BrickflowEnvVars,
12 |             BrickflowDefaultEnvs,
13 |             ctx,
14 |             Workflow,
15 |             WorkflowPermissions,
16 |             User,
17 |             Group,
18 |             ServicePrincipal,
19 |             Task,
20 |             TaskType,
21 |             TaskResponse,
22 |             BrickflowTriggerRule,
23 |             BrickflowTaskEnvVars,
24 |             StorageBasedTaskLibrary,
25 |             JarTaskLibrary,
26 |             EggTaskLibrary,
27 |             WheelTaskLibrary,
28 |             PypiTaskLibrary,
29 |             MavenTaskLibrary,
30 |             CranTaskLibrary,
31 |             EmailNotifications,
32 |             DLTPipeline,
33 |             DLTEdition,
34 |             DLTChannels,
35 |             Cluster,
36 |             Runtimes,
37 |             Project,
38 |         )
39 | 
40 |         print("All imports Succeeded")
41 |     except ImportError as e:
42 |         print(f"Import failed: {e}")
43 | 
44 | 
45 | @pytest.mark.parametrize(
46 |     "config_file_name,expected_extension",
47 |     [
48 |         (".brickflow-project-root.yaml", ConfigFileType.YAML),
49 |         (".brickflow-project-root.yml", ConfigFileType.YML),
50 |         (".brickflow-project-root.json", ConfigFileType.YAML),
51 |     ],
52 | )
53 | def test_get_config_type(config_file_name, expected_extension):
54 |     actual = get_config_file_type(f"some/brickflow/root/{config_file_name}")
55 |     assert actual == expected_extension
56 | 


--------------------------------------------------------------------------------
/tests/test_plugins.py:
--------------------------------------------------------------------------------
 1 | import copy
 2 | from typing import List
 3 | from unittest import mock
 4 | 
 5 | import pluggy
 6 | import pytest
 7 | 
 8 | from brickflow.engine.task import get_plugin_manager, get_brickflow_tasks_hook
 9 | 
10 | 
11 | def assert_plugin_manager(
12 |     pm: pluggy.PluginManager, expected_plugins: List[str]
13 | ) -> None:
14 |     num_expected_plugins = len(expected_plugins)
15 |     assert (
16 |         len(pm.get_plugins()) == num_expected_plugins
17 |     ), f"import error should only {num_expected_plugins} plugins"
18 |     for plugin in expected_plugins:
19 |         assert pm.has_plugin(plugin), f"plugin manager should have {plugin} plugin"
20 | 
21 |     all_plugins = set([pm.get_name(plugin_impl) for plugin_impl in pm.get_plugins()])
22 |     assert all_plugins == set(expected_plugins), (
23 |         f"plugin manager should have {expected_plugins} " f"plugins and nothing more"
24 |     )
25 | 
26 | 
27 | class TestBrickflowPlugins:
28 |     def test_plugins_installed(self):
29 |         pm = copy.deepcopy(get_plugin_manager())
30 |         get_brickflow_tasks_hook(pm)
31 |         assert_plugin_manager(pm, ["airflow-plugin", "default"])
32 | 
33 |     def test_plugins_load_plugins_import_error(self):
34 |         with mock.patch("brickflow_plugins.load_plugins") as load_plugins_mock:
35 |             load_plugins_mock.side_effect = ImportError
36 |             pm = copy.deepcopy(get_plugin_manager())
37 |             get_brickflow_tasks_hook(pm)
38 |             assert_plugin_manager(pm, ["default"])
39 | 
40 |     def test_plugins_ensure_installation_import_error(self):
41 |         with mock.patch("brickflow_plugins.ensure_installation") as load_plugins_mock:
42 |             load_plugins_mock.side_effect = ImportError
43 |             pm = copy.deepcopy(get_plugin_manager())
44 |             get_brickflow_tasks_hook(pm)
45 |             assert_plugin_manager(pm, ["default"])
46 | 
47 |     @pytest.mark.parametrize(
48 |         "quartz_cron, expected_unix_cron",
49 |         [
50 |             ("0 * * ? * * *", "* * * * *"),
51 |             ("0 */5 * ? * * *", "*/5 * * * *"),
52 |             ("0 30 * ? * * *", "30 * * * *"),
53 |             ("0 0 12 ? * * *", "0 12 * * *"),
54 |             ("0 0 12 ? * 2 *", "0 12 * * 1"),
55 |             ("0 0 0 10 * ? *", "0 0 10 * *"),
56 |             ("0 0 0 1 1 ? *", "0 0 1 1 *"),
57 |             ("0 0/5 14,18 * * ?", "0/5 14,18 * * *"),
58 |             ("0 0 12 ? * 1,2,5-7 *", "0 12 * * 0,1,4-6"),
59 |             ("0 0 12 ? * SUN,MON,THU-SAT *", "0 12 * * SUN,MON,THU-SAT"),
60 |         ],
61 |     )
62 |     def test_cron_conversion(self, quartz_cron, expected_unix_cron):
63 |         import brickflow_plugins.airflow.cronhelper as cronhelper  # noqa
64 | 
65 |         converted_unix_cron = cronhelper.cron_helper.quartz_to_unix(quartz_cron)
66 |         converted_quartz_cron = cronhelper.cron_helper.unix_to_quartz(
67 |             converted_unix_cron
68 |         )
69 |         converted_unix_cron_second = cronhelper.cron_helper.quartz_to_unix(
70 |             converted_quartz_cron
71 |         )
72 | 
73 |         assert (
74 |             converted_unix_cron == converted_unix_cron_second
75 |         ), "cron conversion should be idempotent"
76 |         assert converted_unix_cron == expected_unix_cron
77 | 
78 |     @pytest.mark.parametrize(
79 |         "quartz_cron",
80 |         [
81 |             "0 0 12 ? * L *",
82 |             "0 0 12 ? * 1L *",
83 |             "0 0 12 ? * 1W *",
84 |             "0 0 12 ? * 1#5 *",
85 |         ],
86 |     )
87 |     def test_unsupported_cron_expressions(self, quartz_cron):
88 |         import brickflow_plugins.airflow.cronhelper as cronhelper  # noqa
89 | 
90 |         with pytest.raises(ValueError):
91 |             cronhelper.cron_helper.quartz_to_unix(quartz_cron)
92 | 


--------------------------------------------------------------------------------
/tools/README.md:
--------------------------------------------------------------------------------
 1 | # Code generate tools
 2 | 
 3 | Use this to code generate `brickflow/bundles/model.py` 
 4 | 
 5 | Make sure you are in the repository root and are using a *nix machine.
 6 | 
 7 | ```shell
 8 | ./tools/gen-bundle.sh <version>  # example: ./tools/gen-bundle.sh 0.201.0
 9 | ```
10 | 
11 | Please note the version defaults to what is defaulted in brickflow. 


--------------------------------------------------------------------------------
/tools/gen-bundle.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | # Check if the version argument is provided
 4 | if [ $# -lt 1 ]; then
 5 |   echo "Usage: $0 <version>"
 6 | #  exit 1
 7 | fi
 8 | 
 9 | set -e  # Exit on any command failure
10 | 
11 | # Set the provided version as an environment variable
12 | export BUNDLE_CODE_GEN_CLI_VERSION="$1"
13 | 
14 | rm -rf .databricks/bin/cli/
15 | poetry install
16 | poetry run python tools/install_databricks_cli.py
17 | poetry run python tools/modify_schema.py
18 | poetry run datamodel-codegen --input brickflow/bundles/transformed_schema.json \
19 | 		--use-title-as-name \
20 | 		--disable-appending-item-suffix \
21 | 		--collapse-root-models \
22 | 		--capitalise-enum-members \
23 | 		--enum-field-as-literal all \
24 | 		--input-file-type jsonschema \
25 | 		--output brickflow/bundles/model.py
26 | echo "✅  Code generation completed successfully!"
27 | poetry run python tools/modify_model.py
28 | echo "✅  Updated and patched model successfully!"
29 | echo "# generated with Databricks CLI Version: $(.databricks/bin/cli/*/databricks --version)" | \
30 |   cat - brickflow/bundles/model.py > /tmp/codegen && \
31 |    mv /tmp/codegen brickflow/bundles/model.py
32 | echo "✅  Modified the front matter of the script!"
33 | poetry run python brickflow/bundles/model.py # validate python file
34 | echo "✅  Validated the file is proper python code!"
35 | 


--------------------------------------------------------------------------------
/tools/install_databricks_cli.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | try:
 4 |     from brickflow import BrickflowEnvVars
 5 |     from brickflow.cli import bundle_cli_setup
 6 |     from brickflow.cli.bundles import get_valid_bundle_cli
 7 |     from brickflow.engine import _call
 8 | except ImportError:
 9 |     raise ImportError("Please install brickflow to use this script")
10 | 
11 | if __name__ == "__main__":
12 |     cli_version = os.environ.get("BUNDLE_CODE_GEN_CLI_VERSION", None)
13 |     if cli_version is not None and cli_version != "":
14 |         os.environ[BrickflowEnvVars.BRICKFLOW_BUNDLE_CLI_VERSION.value] = cli_version
15 | 
16 |     bundle_cli_setup()
17 |     bundle_cli = get_valid_bundle_cli(
18 |         os.environ[BrickflowEnvVars.BRICKFLOW_BUNDLE_CLI_EXEC.value]
19 |     )
20 |     print(f"Using Databricks CLI: {bundle_cli}")
21 |     print(_call(f"{bundle_cli} --version", shell=True).decode("utf-8"))
22 |     _call(f"{bundle_cli} bundle schema > brickflow/bundles/schema.json", shell=True)
23 | 


--------------------------------------------------------------------------------
/tools/modify_model.py:
--------------------------------------------------------------------------------
 1 | if __name__ == "__main__":
 2 |     import re
 3 | 
 4 |     # string = "class Artifacts1(BaseModel)"
 5 |     regex_pattern = r"(?<=class\s)[A-Za-z]\w+"
 6 |     file_path = "brickflow/bundles/model.py"
 7 | 
 8 |     bad_class_names = {}
 9 | 
10 |     def remove_number_from_end(string):
11 |         match = re.search(r"\d+$", string)
12 |         if match:
13 |             number = match.group(0)
14 |             string_without_number = string[: -len(number)]
15 |             return string_without_number
16 |         else:
17 |             return None
18 | 
19 |     def remove_timestamp_line(input_code: str) -> str:
20 |         return "\n".join(
21 |             [
22 |                 _line
23 |                 for _line in input_code.split("\n")
24 |                 if not _line.startswith("#   timestamp: ")
25 |             ]
26 |         )
27 | 
28 |     def replace_class_config_extras(input_code: str) -> str:
29 |         pattern = r"extra\s*=\s*Extra\.forbid"
30 |         return re.sub(
31 |             pattern, 'extra = "forbid"\n        protected_namespaces = ()', input_code
32 |         )
33 | 
34 |     def replace_regex_with_pattern(input_code: str) -> str:
35 |         pattern = r"regex="
36 |         return re.sub(pattern, "pattern=", input_code)
37 | 
38 |     with open(file_path, "r") as f:
39 |         lines = f.readlines()
40 |         for line in lines:
41 |             match = re.search(regex_pattern, line)
42 |             if match:
43 |                 dynamic_value = match.group(0)
44 |                 if remove_number_from_end(dynamic_value):
45 |                     bad_class_names[dynamic_value] = remove_number_from_end(
46 |                         dynamic_value
47 |                     )
48 | 
49 |     with open(file_path, "r") as r:
50 |         data = r.read()
51 | 
52 |         with open(file_path, "w") as w:
53 |             for key, value in bad_class_names.items():
54 |                 data = data.replace(key, value)
55 |             data = remove_timestamp_line(data)
56 |             # remove extra config to remove deprecation warning
57 |             data = replace_class_config_extras(data)
58 |             # replace regex with pattern
59 |             data = replace_regex_with_pattern(data)
60 |             w.write(data)
61 | 


--------------------------------------------------------------------------------