├── .github ├── ISSUE_TEMPLATE │ ├── bug_report.md │ └── feature_request.md ├── Logo.png ├── Symbol.png ├── prism_logo_dark.png ├── prism_logo_light.png └── workflows │ ├── ci-linux.yml │ ├── ci-macos.yml │ ├── imports-linux.yml │ ├── imports-macosx.yml │ ├── imports-windows.yml │ ├── python-publish.yml │ └── style.yml ├── .gitignore ├── .pre-commit-config.yaml ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── LICENSE ├── MANIFEST.in ├── README.md ├── dev_requirements.txt ├── prism ├── __init__.py ├── admin.py ├── callbacks │ ├── __init__.py │ └── callback.py ├── cli │ ├── __init__.py │ └── init.py ├── client │ ├── __init__.py │ ├── client.py │ ├── parser.py │ ├── runner.py │ └── visualizer.py ├── connectors │ ├── __init__.py │ ├── base.py │ ├── bigquery.py │ ├── postgres.py │ ├── presto.py │ ├── redshift.py │ ├── snowflake.py │ └── trino.py ├── constants.py ├── db │ ├── __init__.py │ ├── factory.py │ ├── mixins.py │ └── setup.py ├── decorators │ ├── __init__.py │ ├── target.py │ └── task.py ├── docs │ ├── __init__.py │ └── build │ │ ├── 311ea03002abadcdcaba.png │ │ ├── __init__.py │ │ ├── ae8a93980ebb6c55123b.ico │ │ ├── ce188596011a8fa32931.png │ │ ├── d4df11de40d39920ff8c.svg │ │ ├── index.html │ │ ├── index2.html │ │ └── main.js.LICENSE.txt ├── engine │ ├── __init__.py │ ├── compiled_task.py │ ├── compiler.py │ ├── executor.py │ ├── manifest.py │ └── module.py ├── exceptions.py ├── logging │ ├── __init__.py │ ├── events.py │ ├── execution.py │ └── loggers.py ├── main.py ├── runtime │ ├── __init__.py │ └── current_run.py ├── target.py ├── task.py ├── templates │ ├── __init__.py │ └── starter_project │ │ ├── .gitignore │ │ ├── __init__.py │ │ ├── main.py │ │ ├── output │ │ └── .exists │ │ └── tasks │ │ ├── class_task.py │ │ └── decorated_task.py ├── tests │ ├── __init__.py │ ├── integration │ │ ├── __init__.py │ │ ├── additional_package │ │ │ ├── __init__.py │ │ │ ├── cli_callbacks.py │ │ │ ├── cli_connectors.py │ │ │ └── utils.py │ │ ├── integration_utils.py │ │ ├── test_cli.py │ │ ├── test_projects │ │ │ ├── 001_init │ │ │ │ ├── .gitignore │ │ │ │ ├── __init__.py │ │ │ │ ├── main.py │ │ │ │ ├── output │ │ │ │ │ └── .exists │ │ │ │ └── tasks │ │ │ │ │ ├── class_task.py │ │ │ │ │ └── decorated_task.py │ │ │ ├── 004_simple_project │ │ │ │ ├── __init__.py │ │ │ │ ├── dev │ │ │ │ │ └── dev.ipynb │ │ │ │ ├── modules │ │ │ │ │ ├── module01.py │ │ │ │ │ ├── module02.py │ │ │ │ │ └── module03.py │ │ │ │ └── output │ │ │ │ │ └── .exists │ │ │ ├── 005_simple_project_no_null_tasks │ │ │ │ ├── __init__.py │ │ │ │ ├── output │ │ │ │ │ ├── task01.txt │ │ │ │ │ └── task02.txt │ │ │ │ └── tasks │ │ │ │ │ ├── module01.py │ │ │ │ │ ├── module02.py │ │ │ │ │ ├── module03.py │ │ │ │ │ └── module04.py │ │ │ ├── 010_project_nested_module_dirs │ │ │ │ ├── __init__.py │ │ │ │ ├── dev │ │ │ │ │ └── dev.ipynb │ │ │ │ ├── output │ │ │ │ │ ├── task01.txt │ │ │ │ │ └── task02.txt │ │ │ │ ├── prism_project.py │ │ │ │ └── tasks │ │ │ │ │ ├── extract │ │ │ │ │ ├── module01.py │ │ │ │ │ └── module02.py │ │ │ │ │ ├── load │ │ │ │ │ └── module03.py │ │ │ │ │ └── module04.py │ │ │ ├── 011_bad_task_ref │ │ │ │ ├── __init__.py │ │ │ │ ├── dev │ │ │ │ │ └── dev.ipynb │ │ │ │ ├── modules │ │ │ │ │ ├── extract │ │ │ │ │ │ ├── module01.py │ │ │ │ │ │ └── module02.py │ │ │ │ │ ├── load │ │ │ │ │ │ └── module03.py │ │ │ │ │ └── module04.py │ │ │ │ ├── output │ │ │ │ │ ├── module01.txt │ │ │ │ │ └── module02.txt │ │ │ │ └── prism_project.py │ │ │ ├── 012_concurrency │ │ │ │ ├── __init__.py │ │ │ │ ├── dev │ │ │ │ │ └── dev.ipynb │ │ │ │ ├── modules │ │ │ │ │ ├── module01.py │ │ │ │ │ ├── module02.py │ │ │ │ │ ├── module03.py │ │ │ │ │ └── module04.py │ │ │ │ ├── output │ │ │ │ │ └── .exists │ │ │ │ └── prism_project.py │ │ │ ├── 013_connectors │ │ │ │ ├── __init__.py │ │ │ │ ├── output │ │ │ │ │ └── .exists │ │ │ │ └── tasks │ │ │ │ │ ├── bad_adapter.py │ │ │ │ │ ├── postgres_task.py │ │ │ │ │ ├── snowflake_task.py │ │ │ │ │ └── spark_task.py │ │ │ ├── 014_project_with_package_lookup │ │ │ │ ├── __init__.py │ │ │ │ ├── output │ │ │ │ │ └── .exists │ │ │ │ └── tasks │ │ │ │ │ └── module01.py │ │ │ ├── 020_dec_retries │ │ │ │ ├── __init__.py │ │ │ │ ├── output │ │ │ │ │ └── .exists │ │ │ │ ├── prism_project.py │ │ │ │ ├── tasks │ │ │ │ │ ├── extract.py │ │ │ │ │ └── load.py │ │ │ │ └── triggers.yml │ │ │ ├── 023_skipped_task │ │ │ │ ├── __init__.py │ │ │ │ ├── callback.txt │ │ │ │ ├── output │ │ │ │ │ ├── .exists │ │ │ │ │ ├── task01.txt │ │ │ │ │ └── task02.txt │ │ │ │ ├── prism_project.py │ │ │ │ └── tasks │ │ │ │ │ ├── task01.py │ │ │ │ │ └── task02.py │ │ │ └── common │ │ │ │ ├── __init__.py │ │ │ │ └── functions.py │ │ ├── test_run.py │ │ └── test_visualizer.py │ └── unit │ │ ├── __init__.py │ │ ├── test_callbacks.py │ │ ├── test_compiled_projects │ │ ├── __init__.py │ │ ├── dag_cycle │ │ │ ├── __init__.py │ │ │ ├── moduleA.py │ │ │ ├── moduleB.py │ │ │ ├── moduleC.py │ │ │ ├── moduleD.py │ │ │ └── moduleE.py │ │ ├── task_ref_15nodes │ │ │ ├── __init__.py │ │ │ ├── task01.py │ │ │ ├── task02.py │ │ │ ├── task03.py │ │ │ ├── task04.py │ │ │ ├── task05.py │ │ │ ├── task06.py │ │ │ ├── task07.py │ │ │ ├── task08.py │ │ │ ├── task09.py │ │ │ ├── task10.py │ │ │ ├── task11.py │ │ │ ├── task12.py │ │ │ ├── task13.py │ │ │ ├── task14.py │ │ │ └── task15.py │ │ ├── task_ref_3nodes │ │ │ ├── __init__.py │ │ │ ├── task01.py │ │ │ ├── task02.py │ │ │ └── task03.py │ │ ├── task_ref_5nodes │ │ │ ├── __init__.py │ │ │ ├── taskA.py │ │ │ ├── taskB.py │ │ │ ├── taskC.py │ │ │ ├── taskD.py │ │ │ └── taskE.py │ │ ├── task_ref_norefs │ │ │ ├── __init__.py │ │ │ ├── moduleA.py │ │ │ ├── moduleB.py │ │ │ ├── moduleC.py │ │ │ ├── moduleD.py │ │ │ └── moduleE.py │ │ └── task_ref_selfref │ │ │ ├── __init__.py │ │ │ ├── moduleA.py │ │ │ ├── moduleB.py │ │ │ ├── moduleC.py │ │ │ ├── moduleD.py │ │ │ └── moduleE.py │ │ ├── test_compiler.py │ │ ├── test_import.py │ │ ├── test_module.py │ │ ├── test_project_parser.py │ │ └── test_tasks │ │ ├── cls_bad_run_extra_arg.py │ │ ├── cls_diff_import_structure.py │ │ ├── cls_multiple_prism_tasks.py │ │ ├── cls_no_prism_task.py │ │ ├── cls_no_run_func.py │ │ ├── cls_one_prism_task.py │ │ ├── cls_other_classes.py │ │ ├── cls_task_with_id.py │ │ ├── cls_task_with_target.py │ │ ├── cls_tasks_refs.py │ │ ├── dec_bad_dec_no_parentheses.py │ │ ├── dec_bad_run_extra_arg.py │ │ ├── dec_diff_decorator_structure.py │ │ ├── dec_multiple_prism_tasks.py │ │ ├── dec_no_prism_task.py │ │ ├── dec_one_prism_task.py │ │ ├── dec_other_functions.py │ │ ├── dec_task_with_id.py │ │ ├── dec_task_with_target.py │ │ ├── dec_tasks_refs.py │ │ └── example_tasks_dir │ │ ├── func_0.py │ │ ├── func_1.py │ │ ├── hello.py │ │ ├── nested │ │ └── foo.py │ │ └── world.py └── utils.py ├── pyproject.toml ├── setup.cfg ├── setup.py └── tox.ini /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Create a report to help us improve 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Describe the bug** 11 | A clear and concise description of what the bug is. 12 | 13 | **Project structure** 14 | Create a skeleton below with your project structure. 15 | ``` 16 | project 17 | |-- prism_project.py 18 | |-- tasks 19 | |-- task01.py 20 | ... 21 | ... 22 | ``` 23 | 24 | **CLI Arguments** 25 | Copy and paste the CLI command used to produce the error (e.g., ```$ prism run```) 26 | 27 | **Traceback** 28 | Run your command using the ```--full-tb``` option and paste a screenshot of the resulting error message. 29 | 30 | **Expected behavior** 31 | A clear and concise description of what you expected to happen. 32 | 33 | **Desktop (please complete the following information):** 34 | - OS: [e.g., iOS] 35 | - Python version: [e.g., 3.7] 36 | 37 | **Additional context** 38 | Add any other context about the problem here. 39 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest an idea for this project 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Is your feature request related to a problem? Please describe.** 11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] 12 | 13 | **Describe the solution you'd like** 14 | A clear and concise description of what you want to happen. 15 | 16 | **Describe alternatives you've considered** 17 | A clear and concise description of any alternative solutions or features you've considered. 18 | 19 | **Additional context** 20 | Add any other context or screenshots about the feature request here. 21 | -------------------------------------------------------------------------------- /.github/Logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/runprism/prism/a6a762a1ce44a696b1dd6be793726974b9c80c82/.github/Logo.png -------------------------------------------------------------------------------- /.github/Symbol.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/runprism/prism/a6a762a1ce44a696b1dd6be793726974b9c80c82/.github/Symbol.png -------------------------------------------------------------------------------- /.github/prism_logo_dark.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/runprism/prism/a6a762a1ce44a696b1dd6be793726974b9c80c82/.github/prism_logo_dark.png -------------------------------------------------------------------------------- /.github/prism_logo_light.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/runprism/prism/a6a762a1ce44a696b1dd6be793726974b9c80c82/.github/prism_logo_light.png -------------------------------------------------------------------------------- /.github/workflows/ci-linux.yml: -------------------------------------------------------------------------------- 1 | name: CI Linux 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | paths: 8 | - 'prism/**' 9 | pull_request: 10 | branches: 11 | - main 12 | paths: 13 | - 'prism/**' 14 | 15 | jobs: 16 | test: 17 | runs-on: ubuntu-latest 18 | strategy: 19 | matrix: 20 | python-version: ['3.8', '3.9', '3.10', '3.11'] 21 | steps: 22 | - uses: actions/checkout@v2 23 | - name: Set up Python ${{ matrix.python-version }} 24 | uses: actions/setup-python@v2 25 | with: 26 | python-version: ${{ matrix.python-version }} 27 | - name: Install dependencies 28 | run: | 29 | python -m pip install --upgrade pip 30 | pip install tox tox-gh-actions 31 | - name: Test with tox 32 | env: 33 | SHELL: ${{ secrets.SHELL }} 34 | SNOWFLAKE_ACCOUNT: ${{ secrets.SNOWFLAKE_ACCOUNT }} 35 | SNOWFLAKE_DATABASE: ${{ secrets.SNOWFLAKE_DATABASE }} 36 | SNOWFLAKE_PASSWORD: ${{ secrets.SNOWFLAKE_PASSWORD }} 37 | SNOWFLAKE_ROLE: ${{ secrets.SNOWFLAKE_ROLE }} 38 | SNOWFLAKE_SCHEMA: ${{ secrets.SNOWFLAKE_SCHEMA }} 39 | SNOWFLAKE_USER: ${{ secrets.SNOWFLAKE_USER }} 40 | SNOWFLAKE_WAREHOUSE: ${{ secrets.SNOWFLAKE_WAREHOUSE }} 41 | POSTGRES_USER: ${{ secrets.POSTGRES_USER }} 42 | POSTGRES_PASSWORD: ${{ secrets.POSTGRES_PASSWORD }} 43 | POSTGRES_DB: ${{ secrets.POSTGRES_DB }} 44 | POSTGRES_HOST: ${{ secrets.POSTGRES_HOST }} 45 | GOOGLE_APPLICATION_CREDENTIALS: ${{ secrets.GOOGLE_APPLICATION_CREDENTIALS }} 46 | run: tox 47 | -------------------------------------------------------------------------------- /.github/workflows/ci-macos.yml: -------------------------------------------------------------------------------- 1 | name: CI MacOS 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | paths: 8 | - 'prism/**' 9 | pull_request: 10 | branches: 11 | - main 12 | paths: 13 | - 'prism/**' 14 | 15 | jobs: 16 | test: 17 | runs-on: macos-latest 18 | strategy: 19 | matrix: 20 | python-version: ['3.8', '3.9', '3.10', '3.11'] 21 | steps: 22 | - uses: actions/checkout@v2 23 | - name: Set up Python ${{ matrix.python-version }} 24 | uses: actions/setup-python@v2 25 | with: 26 | python-version: ${{ matrix.python-version }} 27 | - name: Install dependencies 28 | run: | 29 | python -m pip install --upgrade pip 30 | pip install tox tox-gh-actions 31 | - name: Test with tox 32 | env: 33 | SHELL: ${{ secrets.SHELL }} 34 | SNOWFLAKE_ACCOUNT: ${{ secrets.SNOWFLAKE_ACCOUNT }} 35 | SNOWFLAKE_DATABASE: ${{ secrets.SNOWFLAKE_DATABASE }} 36 | SNOWFLAKE_PASSWORD: ${{ secrets.SNOWFLAKE_PASSWORD }} 37 | SNOWFLAKE_ROLE: ${{ secrets.SNOWFLAKE_ROLE }} 38 | SNOWFLAKE_SCHEMA: ${{ secrets.SNOWFLAKE_SCHEMA }} 39 | SNOWFLAKE_USER: ${{ secrets.SNOWFLAKE_USER }} 40 | SNOWFLAKE_WAREHOUSE: ${{ secrets.SNOWFLAKE_WAREHOUSE }} 41 | POSTGRES_USER: ${{ secrets.POSTGRES_USER }} 42 | POSTGRES_PASSWORD: ${{ secrets.POSTGRES_PASSWORD }} 43 | POSTGRES_DB: ${{ secrets.POSTGRES_DB }} 44 | POSTGRES_HOST: ${{ secrets.POSTGRES_HOST }} 45 | GOOGLE_APPLICATION_CREDENTIALS: ${{ secrets.GOOGLE_APPLICATION_CREDENTIALS }} 46 | run: tox 47 | -------------------------------------------------------------------------------- /.github/workflows/imports-linux.yml: -------------------------------------------------------------------------------- 1 | name: Imports (Linux) 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | pull_request: 8 | branches: 9 | - main 10 | 11 | jobs: 12 | test: 13 | runs-on: ubuntu-latest 14 | strategy: 15 | matrix: 16 | python-version: ['3.9'] 17 | steps: 18 | - uses: actions/checkout@v2 19 | - name: Set up Python ${{ matrix.python-version }} 20 | uses: actions/setup-python@v2 21 | with: 22 | python-version: ${{ matrix.python-version }} 23 | - name: Test imports 24 | run: 25 | python -m pip install --upgrade pip 26 | pip install . 27 | pip install .[snowflake] 28 | pip install .[bigquery] 29 | pip install .[redshift] 30 | pip install .[postgres] 31 | pip install .[trino] 32 | pip install .[presto] 33 | pip install .[pyspark] 34 | pip install .[dbt] 35 | pip install .[docker] 36 | 37 | 38 | # EOF -------------------------------------------------------------------------------- /.github/workflows/imports-macosx.yml: -------------------------------------------------------------------------------- 1 | name: Imports (MacOS) 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | pull_request: 8 | branches: 9 | - main 10 | 11 | jobs: 12 | test: 13 | runs-on: macos-latest 14 | strategy: 15 | matrix: 16 | python-version: ['3.9'] 17 | steps: 18 | - uses: actions/checkout@v2 19 | - name: Set up Python ${{ matrix.python-version }} 20 | uses: actions/setup-python@v2 21 | with: 22 | python-version: ${{ matrix.python-version }} 23 | - name: Test imports 24 | run: 25 | python -m pip install --upgrade pip 26 | pip install . 27 | pip install .[snowflake] 28 | pip install .[bigquery] 29 | pip install .[redshift] 30 | pip install .[postgres] 31 | pip install .[trino] 32 | pip install .[presto] 33 | pip install .[pyspark] 34 | pip install .[dbt] 35 | pip install .[docker] 36 | 37 | 38 | # EOF -------------------------------------------------------------------------------- /.github/workflows/imports-windows.yml: -------------------------------------------------------------------------------- 1 | name: Imports (Windows) 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | pull_request: 8 | branches: 9 | - main 10 | 11 | jobs: 12 | test: 13 | runs-on: windows-latest 14 | strategy: 15 | matrix: 16 | python-version: ['3.9'] 17 | steps: 18 | - uses: actions/checkout@v2 19 | - name: Set up Python ${{ matrix.python-version }} 20 | uses: actions/setup-python@v2 21 | with: 22 | python-version: ${{ matrix.python-version }} 23 | - name: Test imports 24 | run: 25 | python -m pip install --upgrade pip 26 | pip install . 27 | pip install .[snowflake] 28 | pip install .[bigquery] 29 | pip install .[redshift] 30 | pip install .[postgres] 31 | pip install .[trino] 32 | pip install .[presto] 33 | pip install .[pyspark] 34 | pip install .[dbt] 35 | pip install .[docker] 36 | 37 | 38 | # EOF -------------------------------------------------------------------------------- /.github/workflows/python-publish.yml: -------------------------------------------------------------------------------- 1 | name: Upload Python Package 2 | 3 | on: 4 | push: 5 | tags: 6 | v* 7 | 8 | permissions: 9 | contents: read 10 | 11 | jobs: 12 | deploy: 13 | runs-on: ubuntu-latest 14 | steps: 15 | - uses: actions/checkout@v3 16 | - name: Set up Python 17 | uses: actions/setup-python@v3 18 | with: 19 | python-version: '3.x' 20 | - name: Install dependencies 21 | run: | 22 | python -m pip install --upgrade pip 23 | pip install build 24 | - name: Build package 25 | run: python -m build 26 | - name: Publish package 27 | uses: pypa/gh-action-pypi-publish@27b31702a0e7fc50959f5ad993c78deac1bdfc29 28 | with: 29 | user: __token__ 30 | password: ${{ secrets.PYPI_API_TOKEN }} 31 | -------------------------------------------------------------------------------- /.github/workflows/style.yml: -------------------------------------------------------------------------------- 1 | name: mypy and ruff 2 | 3 | on: 4 | - push 5 | - pull_request 6 | 7 | jobs: 8 | test: 9 | runs-on: ubuntu-latest 10 | steps: 11 | - uses: actions/checkout@v2 12 | - name: Set up Python 3.10 13 | uses: actions/setup-python@v2 14 | with: 15 | python-version: '3.10' 16 | - name: Install dependencies 17 | run: | 18 | python -m pip install --upgrade pip 19 | pip install -e .[dev] 20 | - name: mypy 21 | run: | 22 | mypy prism/callbacks/ prism/cli/ prism/client/ prism/connectors/ prism/db/ prism/decorators/ prism/engine/ prism/logging/ prism/runtime/ prism/exceptions.py prism/main.py prism/target.py prism/task.py prism/utils.py 23 | ruff check prism/callbacks/ prism/cli/ prism/client/ prism/connectors/ prism/db/ prism/decorators/ prism/engine/ prism/logging/ prism/runtime/ prism/exceptions.py prism/main.py prism/target.py prism/task.py prism/utils.py 24 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | **/.py[cod] 3 | **/__pycache__ 4 | 5 | # OSX files 6 | **/.DS_Store 7 | 8 | # DBT artifacts 9 | **/.msgpack 10 | **/.user.yml 11 | 12 | # VSCode settings 13 | **/.vscode 14 | 15 | # Distribution / packaging 16 | *.egg-info/ 17 | pip-wheel-metadata/ 18 | dist/ 19 | 20 | # Testing 21 | .mypy_cache 22 | .pytest_cache 23 | .ruff_cache 24 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://github.com/pre-commit/pre-commit-hooks 3 | rev: v4.1.0 4 | hooks: 5 | - id: check-added-large-files 6 | - id: check-merge-conflict 7 | - id: detect-private-key 8 | - id: end-of-file-fixer 9 | - id: trailing-whitespace 10 | 11 | - repo: https://github.com/astral-sh/ruff-pre-commit 12 | rev: v0.1.14 13 | hooks: 14 | - id: ruff 15 | args: ["--fix"] 16 | - id: ruff-format 17 | 18 | - repo: https://github.com/PyCQA/isort 19 | rev: 5.11.5 20 | hooks: 21 | - id: isort 22 | args: [--profile=black] 23 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Contributor Code of Conduct 2 | 3 | ## Our Pledge 4 | 5 | We as members, contributors, and leaders pledge to make participation in our 6 | community a harassment-free experience for everyone, regardless of age, body 7 | size, visible or invisible disability, ethnicity, sex characteristics, gender 8 | identity and expression, level of experience, education, socio-economic status, 9 | nationality, personal appearance, race, caste, color, religion, or sexual 10 | identity and orientation. 11 | 12 | We pledge to act and interact in ways that contribute to an open, welcoming, 13 | diverse, inclusive, and healthy community. 14 | 15 | ## Our Standards 16 | 17 | Examples of behavior that contributes to a positive environment for our 18 | community include: 19 | 20 | * Demonstrating empathy and kindness toward other people 21 | * Being respectful of differing opinions, viewpoints, and experiences 22 | * Giving and gracefully accepting constructive feedback 23 | * Accepting responsibility and apologizing to those affected by our mistakes, 24 | and learning from the experience 25 | * Focusing on what is best not just for us as individuals, but for the overall 26 | community 27 | 28 | Examples of unacceptable behavior include: 29 | 30 | * The use of sexualized language or imagery, and sexual attention or advances of 31 | any kind 32 | * Trolling, insulting or derogatory comments, and personal or political attacks 33 | * Public or private harassment 34 | * Publishing others' private information, such as a physical or email address, 35 | without their explicit permission 36 | * Other conduct which could reasonably be considered inappropriate in a 37 | professional setting 38 | 39 | ## Enforcement Responsibilities 40 | 41 | Community leaders are responsible for clarifying and enforcing our standards of 42 | acceptable behavior and will take appropriate and fair corrective action in 43 | response to any behavior that they deem inappropriate, threatening, offensive, 44 | or harmful. 45 | 46 | Community leaders have the right and responsibility to remove, edit, or reject 47 | comments, commits, code, wiki edits, issues, and other contributions that are 48 | not aligned to this Code of Conduct, and will communicate reasons for moderation 49 | decisions when appropriate. 50 | 51 | ## Scope 52 | 53 | This Code of Conduct applies within all community spaces, and also applies when 54 | an individual is officially representing the community in public spaces. 55 | Examples of representing our community include using an official e-mail address, 56 | posting via an official social media account, or acting as an appointed 57 | representative at an online or offline event. 58 | 59 | ## Enforcement 60 | 61 | Instances of abusive, harassing, or otherwise unacceptable behavior may be 62 | reported to the community leaders responsible for enforcement at 63 | hello@runprism.com. 64 | All complaints will be reviewed and investigated promptly and fairly. 65 | 66 | All community leaders are obligated to respect the privacy and security of the 67 | reporter of any incident. 68 | 69 | ## Enforcement Guidelines 70 | 71 | Community leaders will follow these Community Impact Guidelines in determining 72 | the consequences for any action they deem in violation of this Code of Conduct: 73 | 74 | ### 1. Correction 75 | 76 | **Community Impact**: Use of inappropriate language or other behavior deemed 77 | unprofessional or unwelcome in the community. 78 | 79 | **Consequence**: A private, written warning from community leaders, providing 80 | clarity around the nature of the violation and an explanation of why the 81 | behavior was inappropriate. A public apology may be requested. 82 | 83 | ### 2. Warning 84 | 85 | **Community Impact**: A violation through a single incident or series of 86 | actions. 87 | 88 | **Consequence**: A warning with consequences for continued behavior. No 89 | interaction with the people involved, including unsolicited interaction with 90 | those enforcing the Code of Conduct, for a specified period of time. This 91 | includes avoiding interactions in community spaces as well as external channels 92 | like social media. Violating these terms may lead to a temporary or permanent 93 | ban. 94 | 95 | ### 3. Temporary Ban 96 | 97 | **Community Impact**: A serious violation of community standards, including 98 | sustained inappropriate behavior. 99 | 100 | **Consequence**: A temporary ban from any sort of interaction or public 101 | communication with the community for a specified period of time. No public or 102 | private interaction with the people involved, including unsolicited interaction 103 | with those enforcing the Code of Conduct, is allowed during this period. 104 | Violating these terms may lead to a permanent ban. 105 | 106 | ### 4. Permanent Ban 107 | 108 | **Community Impact**: Demonstrating a pattern of violation of community 109 | standards, including sustained inappropriate behavior, harassment of an 110 | individual, or aggression toward or disparagement of classes of individuals. 111 | 112 | **Consequence**: A permanent ban from any sort of public interaction within the 113 | community. 114 | 115 | ## Attribution 116 | 117 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], 118 | version 2.1, available at 119 | [https://www.contributor-covenant.org/version/2/1/code_of_conduct.html][v2.1]. 120 | 121 | Community Impact Guidelines were inspired by 122 | [Mozilla's code of conduct enforcement ladder][Mozilla CoC]. 123 | 124 | For answers to common questions about this code of conduct, see the FAQ at 125 | [https://www.contributor-covenant.org/faq][FAQ]. Translations are available at 126 | [https://www.contributor-covenant.org/translations][translations]. 127 | 128 | [homepage]: https://www.contributor-covenant.org 129 | [v2.1]: https://www.contributor-covenant.org/version/2/1/code_of_conduct.html 130 | [Mozilla CoC]: https://github.com/mozilla/diversity 131 | [FAQ]: https://www.contributor-covenant.org/faq 132 | [translations]: https://www.contributor-covenant.org/translations -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing 2 | 3 | Thank you for considering contributing to Prism! We greatly appreciate your effort in making Prism a best-in-class product. 4 | 5 | ## How do I get started? 6 | 7 | Prism is currently in beta, so we are not accepting contributions at this time. Once we release our stable API, we will update the guidelines and begin accepting contributions! 8 | 9 | ## Raising an issue 10 | 11 | If you notice a bug, please raise an issue using the bug report template. 12 | 13 | ## Suggesting a new feature 14 | 15 | If you would like to suggest a new feature, please raise an issue using the feature request template. -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | recursive-include prism/templates *.png *.svg *.exists *.ico *.txt *.ipynb *.py *.sql *.yml *.html *.md .gitkeep .gitignore 2 | recursive-include prism/docs *.png *.svg *.ico *.txt *.html 3 | recursive-include prism/agents *.sh -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |

2 | prism logo 3 |

4 |

5 | 6 | PyPI 7 | 8 | 9 | 10 | 11 | 12 | 13 |

14 |
15 | 16 | [![CI Linux](https://github.com/runprism/prism/actions/workflows/ci-linux.yml/badge.svg)](https://github.com/runprism/prism/actions/workflows/ci-linux.yml) 17 | [![CI MacOS](https://github.com/runprism/prism/actions/workflows/ci-macos.yml/badge.svg)](https://github.com/runprism/prism/actions/workflows/ci-macos.yml) 18 | [![Checked with mypy](http://www.mypy-lang.org/static/mypy_badge.svg)](http://mypy-lang.org/) 19 | [![Checked with flake8](https://img.shields.io/badge/flake8-checked-blueviolet)](https://flake8.pycqa.org/en/latest/) 20 | 21 | 22 |
23 | 24 | # :wave: Welcome to Prism! 25 | [Prism](https://www.runprism.com/) is the easiest way to create data pipelines in Python. 26 | 27 | ## Introduction 28 | Data projects often require multiple steps that need to be executed in sequence (think extract-transform-load, data wrangling, etc.). With Prism, users can break down their project into modular tasks, manage dependencies, and execute complex computations in sequence. 29 | 30 | Here are some of Prism's main features: 31 | - **Real-time dependency declaration**: With Prism, analysts can declare dependencies using a simple function call. No need to explicitly keep track of the pipeline order — at runtime, Prism automatically parses the function calls and builds the dependency graph. 32 | - **Intuitive logging**: Prism automatically logs events for parsing the configuration files, compiling the tasks and creating the DAG, and executing the tasks. No configuration is required. 33 | - **Flexible CLI**: Users can instantiate, compile, and run projects using a simple, but powerful command-line interface. 34 | - **“Batteries included”**: Prism comes with all the essentials needed to get up and running quickly. Users can create and run their first DAG in less than 2 minutes. 35 | - **Integrations**: Prism integrates with several tools that are popular in the data community, including Snowflake, Google BigQuery, Redshift, PySpark, and dbt. We're adding more integrations every day, so let us know what you'd like to see! 36 | 37 | 38 | ## Getting Started 39 | 40 | Prism can be installed via ```pip```. Prism requires Python >= 3.7. 41 | 42 | ``` 43 | pip install --upgrade pip 44 | pip install prism-ds 45 | ``` 46 | 47 | Start your first Prism project with the `prism init` command: 48 | ``` 49 | $ prism init --project-name my_first_project 50 | ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 51 | Running with Prism v0.3.0... 52 | Creating template project at ./my_first_project... 53 | 54 | ______ 55 | ____ __ \_____(_)________ _______ 56 | _____ /_/ / ___/ / ___/ __ `__ \ ____ 57 | ____ / ____/ / / (__ ) / / / / / _____ 58 | ___/_/ /_/ /_/____/_/ /_/ /_/ ___ 59 | 60 | Welcome to Prism, the easiest way to create clean, modular data pipelines 61 | using Python! 62 | 63 | To get started, navigate to your newly created project "my_first_project" and try 64 | running the following commands: 65 | > python main.py 66 | > prism run 67 | > prism graph 68 | 69 | Consult the documentation here for more information on how to get started. 70 | docs.runprism.com 71 | 72 | Happy building! 73 | 74 | Done! 75 | ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 76 | ``` 77 | 78 | Run your project by navigating to your project directory and running `prism run`: 79 | ``` 80 | $ cd my_first_project 81 | $ prism run 82 | ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 83 | [HH:MM:SS] INFO | Running with Prism v0.3.0... 84 | [HH:MM:SS] INFO | Creating run magnetic-pony-BBDYfwdDzH for client my_first_project-1.0... 85 | [HH:MM:SS] INFO | Found 2 task(s) in 2 module(s) in job magnetic-pony-BBDYfwdDzH... 86 | 87 | [HH:MM:SS] INFO | Parsing task dependencies............................................... [RUN] 88 | [HH:MM:SS] INFO | FINISHED parsing task dependencies...................................... [DONE in 0.01s] 89 | 90 | ────────────────────────────────────────────── Tasks ────────────────────────────────────────────── 91 | [HH:MM:SS] INFO | 1 of 2 RUNNING TASK example-decorated-task.............................. [RUN] 92 | [HH:MM:SS] INFO | 1 of 2 FINISHED TASK example-decorated-task............................. [DONE in 0.02s] 93 | [HH:MM:SS] INFO | 2 of 2 RUNNING TASK example-class-task.................................. [RUN] 94 | [HH:MM:SS] INFO | 2 of 2 FINISHED TASK example-class-task................................. [DONE in 0.02s] 95 | 96 | Done! 97 | ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 98 | ``` 99 | 100 | ## Documentation 101 | To get started with Prism projects, check out our [documentation](https://docs.runprism.com). Some sections of interest include: 102 | 103 | - :key: [Fundamentals](https://docs.runprism.com/fundamentals) 104 | - :seedling: [CLI](https://docs.runprism.com/cli) 105 | - :electric_plug: [Integrations](https://docs.runprism.com/integrations) 106 | - :bulb: [Use Cases](https://docs.runprism.com/use-cases) 107 | 108 | In addition, check out some [example projects](https://github.com/runprism/prism_examples). 109 | 110 | 111 | ## Integrations 112 | Prism integrates with a wide variety of third-party developer tools There are two kinds of integrations that Prism supports: adapters, and agents. 113 | 114 | ### Adapters 115 | Adapters allow users to connect to data warehouses or analytics engines. Prism currently supports the following adapters: 116 | | Adapter | Command | 117 | | ------------ | ----------- | 118 | | **Google BigQuery** | ```pip install "prism-ds[bigquery]"``` | 119 | | **Postgres** | ```pip install "prism-ds[postgres]"``` | 120 | | **Presto** | ```pip install "prism-ds[presto]"``` | 121 | | **Redshift** | ```pip install "prism-ds[redshift]"``` | 122 | | **Snowflake** | ```pip install "prism-ds[snowflake]"``` | 123 | | **Trino** | ```pip install "prism-ds[trino]"``` | 124 | 125 | 126 | ## Product Roadmap 127 | 128 | We're always looking to improve our product. Here's what we're working on at the moment: 129 | 130 | - **Compatibility with Alto agents**: Docker containers, EC2 clusters, EMR clusters, Databricks clusters, and more! 131 | - **Additional adapters**: Celery, Dask, MySQL, Presto, and more! 132 | - **Cloud deployment**: Managed orchestration platform to deploy Prism projects in the cloud 133 | 134 | Let us know if you'd like to see another feature! 135 | -------------------------------------------------------------------------------- /dev_requirements.txt: -------------------------------------------------------------------------------- 1 | -e .[snowflake] 2 | -e .[bigquery] 3 | -e .[redshift] 4 | -e .[postgres] 5 | -e .[presto] 6 | -e .[trino] 7 | -e .[pyspark] 8 | -e .[dev] 9 | -------------------------------------------------------------------------------- /prism/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/runprism/prism/a6a762a1ce44a696b1dd6be793726974b9c80c82/prism/__init__.py -------------------------------------------------------------------------------- /prism/admin.py: -------------------------------------------------------------------------------- 1 | """ 2 | Functions for DAG run admin variables 3 | """ 4 | 5 | # Imports 6 | import coolname 7 | import uuid 8 | import shortuuid 9 | 10 | # Words to ignore and exclude from slug 11 | IGNORE_LIST = { 12 | "sexy", 13 | "demonic", 14 | "kickass", 15 | "heretic", 16 | "godlike", 17 | "booby", 18 | "chubby", 19 | "gay", 20 | "sloppy", 21 | "funky", 22 | "juicy", 23 | "beaver", 24 | "curvy", 25 | "fat", 26 | "flashy", 27 | "flat", 28 | "thick", 29 | "nippy", 30 | } 31 | 32 | 33 | # Functions 34 | def generate_run_id() -> str: 35 | """ 36 | Cryptographically secure run ID (using UUID) 37 | """ 38 | return str(uuid.uuid4()).replace("-", "") 39 | 40 | 41 | def generate_run_slug() -> str: 42 | """ 43 | Run slug concatenated with a short UUID. Not necessarily cryptographically secure. 44 | We offer this in addition to the run ID because it's more human-readable. 45 | """ 46 | uuid_short = str(shortuuid.ShortUUID().random(length=10)) 47 | slug = coolname.generate_slug(2) 48 | 49 | # Regenerate words if they include ignored words 50 | while IGNORE_LIST.intersection(slug.split("-")): 51 | slug = coolname.generate_slug(2) 52 | 53 | return f'{slug}-{uuid_short.replace("-", "")}' 54 | -------------------------------------------------------------------------------- /prism/callbacks/__init__.py: -------------------------------------------------------------------------------- 1 | from .callback import _PrismCallback # noqa: F401, F403 2 | -------------------------------------------------------------------------------- /prism/callbacks/callback.py: -------------------------------------------------------------------------------- 1 | import importlib 2 | import inspect 3 | from typing import Any, Callable 4 | 5 | 6 | class _PrismCallback: 7 | """ 8 | Callbacks allow users to call specific functions when a run successfully executes or 9 | when a project fails. This is a super thin wrapper around the actual callback 10 | function. 11 | """ 12 | 13 | callback_func: Callable[[], Any] 14 | name: str 15 | 16 | def __init__(self, callback_func: Callable[[], Any]): 17 | self.callback_func = callback_func 18 | self.name = callback_func.__name__ 19 | 20 | # Check if the argument as any args. If it does, raise an error. For now, users 21 | # cannot specify arguments in a callback function. They can access information 22 | # about the run that triggered the callback via the `CurrentRun` object. 23 | signature = inspect.signature(self.callback_func) 24 | args = list(signature.parameters.keys()) 25 | if len(args) > 0: 26 | raise ValueError( 27 | f"Callback function `{self.name}` cannot have any arguments.", 28 | ) 29 | 30 | @classmethod 31 | def from_str(cls, import_path: str): 32 | module_name = ".".join(import_path.split(".")[:-1]) 33 | fn_name = import_path.split(".")[-1] 34 | imported_mod = importlib.import_module(module_name) 35 | fn = getattr(imported_mod, fn_name) 36 | return cls(fn) 37 | 38 | def run(self): 39 | self.callback_func() 40 | -------------------------------------------------------------------------------- /prism/cli/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/runprism/prism/a6a762a1ce44a696b1dd6be793726974b9c80c82/prism/cli/__init__.py -------------------------------------------------------------------------------- /prism/cli/init.py: -------------------------------------------------------------------------------- 1 | # Standard library imports 2 | import shutil 3 | from pathlib import Path 4 | from typing import Literal, Optional 5 | 6 | import click 7 | 8 | import prism.constants 9 | 10 | # Prism-specific imports 11 | import prism.db.setup 12 | import prism.exceptions 13 | from prism.logging.events import ( 14 | CreatingPrismProjectTemplate, 15 | InitSuccessfulEvent, 16 | fire_empty_line_event, 17 | fire_init_events, 18 | fire_tail_events, 19 | ) 20 | from prism.logging.loggers import console_print, set_up_logger 21 | from prism.templates.starter_project import STARTER_PROJECT_TEMPLATE_DIR 22 | 23 | TASK_COMPLETE_MSG = """ ______ 24 | ____ __ \_____(_)________ _______ 25 | _____ /_/ / ___/ / ___/ __ `__ \ ____ 26 | ____ / ____/ / / (__ ) / / / / / _____ 27 | ___/_/ /_/ /_/____/_/ /_/ /_/ ___ 28 | 29 | Welcome to Prism, the easiest way to create clean, modular data pipelines 30 | using Python! 31 | 32 | To get started, navigate to your newly created project "{project_name}" and try 33 | running the following commands: 34 | > python main.py 35 | > prism run 36 | > prism graph 37 | 38 | Consult the documentation here for more information on how to get started. 39 | {docs_url} 40 | 41 | Happy building!""" 42 | 43 | 44 | def initialize_project( 45 | project_name: Optional[str], 46 | log_level: Literal["info", "warning", "error", "debug", "critical"], 47 | ) -> None: 48 | """ 49 | Initialize a Prism project. The project itself is nothing special — it's just a 50 | template project to help the user get started. 51 | 52 | args: 53 | project_name: name for new project 54 | log_level: log level 55 | returns: 56 | None 57 | """ 58 | set_up_logger(log_level, None) 59 | fire_init_events() 60 | 61 | # If the project name wasn't provided by the user, prompt them 62 | if project_name is None: 63 | project_name = click.prompt("What is the desired project name?") 64 | fire_empty_line_event() 65 | 66 | # Set up the database 67 | prism.db.setup.setup() 68 | 69 | # If the project_name already exists witin the working directory, throw an error 70 | wkdir = Path.cwd() 71 | project_dir = wkdir / project_name 72 | if project_dir.is_dir(): 73 | raise prism.exceptions.ProjectAlreadyExistsException(project_dir) 74 | 75 | # Template directory 76 | template_dir = STARTER_PROJECT_TEMPLATE_DIR 77 | console_print(CreatingPrismProjectTemplate(project_dir).message()) 78 | shutil.copytree( 79 | template_dir, 80 | project_dir, 81 | ignore=shutil.ignore_patterns(*prism.constants.IGNORE_FILES), 82 | ) 83 | fire_empty_line_event() 84 | console_print( 85 | InitSuccessfulEvent( 86 | msg=TASK_COMPLETE_MSG.format( 87 | project_name=project_name, docs_url="docs.runprism.com" 88 | ) 89 | ).message() 90 | ) 91 | fire_tail_events() 92 | return None 93 | -------------------------------------------------------------------------------- /prism/client/__init__.py: -------------------------------------------------------------------------------- 1 | from .client import * # noqa: F401, F403 2 | from .visualizer import * # noqa: F401, F403 3 | -------------------------------------------------------------------------------- /prism/client/parser.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | import os 3 | from typing import List, Optional, Union 4 | import re 5 | 6 | # Prism-specific imports 7 | from prism.engine.module import _PrismModule 8 | from prism.engine.compiler import _DagCompiler, _CompiledDag 9 | import prism.logging.execution 10 | import prism.logging.loggers 11 | 12 | 13 | class ProjectParser(object): 14 | project_dir: Path 15 | tasks_dir: Union[str, Path] 16 | all_tasks_downstream: bool 17 | 18 | def __init__( 19 | self, 20 | project_dir: Path, 21 | tasks_dir: Union[str, Path], 22 | all_tasks_downstream: bool, 23 | ): 24 | self.project_dir = project_dir 25 | self.tasks_dir = tasks_dir 26 | self.all_tasks_downstream = all_tasks_downstream 27 | 28 | self._confirm_tasks_dir_exists(self.tasks_dir) 29 | 30 | def _confirm_tasks_dir_exists(self, tasks_dir: Union[str, Path]) -> None: 31 | if not Path(tasks_dir).is_dir(): 32 | raise prism.exceptions.CompileException( 33 | message=f"`{tasks_dir}` is not a directory!" 34 | ) 35 | return None 36 | 37 | def _get_all_modules_in_dir(self, rootdir: Union[str, Path]) -> List[str]: 38 | """ 39 | Get all modules in the `rootdir`. Returns a list of relative paths for modules 40 | in `dir` 41 | 42 | args: 43 | rootdir: root directory to search 44 | returns: 45 | list of relative paths of modules in `dir` 46 | """ 47 | module_relpaths: List[str] = [] 48 | if not Path(rootdir).is_dir(): 49 | raise ValueError(f"`{rootdir}` is not a directory!") 50 | for root, _, files in os.walk(rootdir): 51 | for fname in files: 52 | if len(re.findall(r"\.py$", fname)) > 0: 53 | full_path = Path(root) / fname 54 | rel_path = os.path.relpath(full_path, rootdir) 55 | module_relpaths.append(str(rel_path)) 56 | return module_relpaths 57 | 58 | def parse_all_modules(self) -> List[_PrismModule]: 59 | """ 60 | Create a `_PrismModule` object for each module in the project. This object 61 | contains all the task nodes in the module (along with the task's refs and 62 | targets). 63 | 64 | returns: 65 | list of _PrismModule objects 66 | """ 67 | module_relpaths = self._get_all_modules_in_dir(self.tasks_dir) 68 | prism_modules: List[_PrismModule] = [] 69 | for relpath in module_relpaths: 70 | mod = _PrismModule(self.project_dir, self.tasks_dir, relpath) 71 | if len(mod.prism_task_nodes.keys()) > 0: 72 | prism_modules.append(mod) 73 | return prism_modules 74 | 75 | def compile_dag( 76 | self, 77 | project_id: str, 78 | run_slug: Optional[str], 79 | tasks_dir: Union[str, Path], 80 | parsed_module_objs: List[_PrismModule], 81 | user_arg_task_ids: List[str], 82 | user_arg_all_downstream: bool, 83 | ) -> _CompiledDag: 84 | """ 85 | Wrapper for the `compile` method in the DagCompiler class 86 | 87 | args: 88 | project_id: project ID 89 | tasks_dir: directory containing all tasks 90 | parsed_module_objs: list of _PrismModule objects associated with modules 91 | that contain tasks 92 | user_arg_task_ids: task IDs passed in by the user at runtime 93 | user_arg_all_downstream: boolean indicating whether the user wants to run 94 | all tasks downstream of inputted `user_arg_task_ids` 95 | project: PrismProject 96 | returns: 97 | CompiledDag object 98 | """ 99 | dag_compiler = _DagCompiler( 100 | project_id, 101 | run_slug, 102 | tasks_dir, 103 | parsed_module_objs, 104 | user_arg_task_ids, 105 | user_arg_all_downstream, 106 | ) 107 | compiled_dag = dag_compiler.compile() 108 | return compiled_dag 109 | -------------------------------------------------------------------------------- /prism/client/visualizer.py: -------------------------------------------------------------------------------- 1 | from http.server import SimpleHTTPRequestHandler 2 | import os 3 | from pathlib import Path 4 | from typing import List, Union 5 | import shutil 6 | from socketserver import TCPServer 7 | import sys 8 | import signal 9 | import webbrowser 10 | 11 | from watchdog.observers.api import BaseObserver 12 | from watchdog.observers import Observer 13 | from watchdog.events import PatternMatchingEventHandler 14 | 15 | # Prism-specific imports 16 | from prism.client.parser import ProjectParser 17 | from prism.docs import DOCS_INDEX_FILE_DIR 18 | from prism.engine.compiler import _CompiledDag 19 | from prism.engine.manifest import TaskManifest, Manifest 20 | import prism.logging.execution 21 | from prism.logging.events import ( 22 | fire_header_events, 23 | fire_serving_docs_events, 24 | fire_empty_line_event, 25 | fire_tail_events, 26 | fire_reload_docs_event, 27 | ) 28 | import prism.logging.loggers 29 | 30 | 31 | class PrismVisualizer(object): 32 | project_id: str 33 | project_dir: Path 34 | tasks_dir: Union[str, Path] 35 | port: int 36 | open_window: bool 37 | hot_reload: bool 38 | 39 | project_parser: ProjectParser 40 | observer: BaseObserver 41 | 42 | def __init__( 43 | self, 44 | project_id: str, 45 | project_dir: Path, 46 | tasks_dir: Union[str, Path], 47 | port: int, 48 | open_window: bool, 49 | hot_reload: bool, 50 | ): 51 | self.project_id = project_id 52 | self.project_dir = project_dir 53 | self.tasks_dir = tasks_dir 54 | self.port = port 55 | self.open_window = open_window 56 | self.hot_reload = hot_reload 57 | 58 | # Project parser 59 | self.project_parser = ProjectParser(self.project_dir, self.tasks_dir, True) 60 | 61 | # Event handler for hot reloading. Note that we only start this observer if 62 | # `hot_reload` is True. Otherwise, we don't use it. 63 | reload_handler = PatternMatchingEventHandler( 64 | patterns=["*"], 65 | ignore_patterns=None, 66 | ignore_directories=False, 67 | case_sensitive=False, 68 | ) 69 | 70 | def on_created(event): 71 | self._event_handler() 72 | 73 | def on_deleted(event): 74 | self._event_handler() 75 | 76 | def on_modified(event): 77 | self._event_handler() 78 | 79 | def on_moved(event): 80 | self._event_handler() 81 | 82 | # mypy doesn't like that we're assigning a function to a method. But it works, 83 | # so ignore. 84 | reload_handler.on_created = on_created # type: ignore 85 | reload_handler.on_deleted = on_deleted # type: ignore 86 | reload_handler.on_modified = on_modified # type: ignore 87 | reload_handler.on_moved = on_moved # type: ignore 88 | 89 | # Observer 90 | path = str(self.tasks_dir) 91 | self.observer = Observer() 92 | self.observer.schedule(reload_handler, path, recursive=True) 93 | 94 | def create_docs_dir(self, project_dir: Path) -> Path: 95 | """ 96 | Create a docs/ directory in the project directory 97 | 98 | args: 99 | project_dir: Prism project directory 100 | returns: 101 | document directory 102 | """ 103 | # Create compiled directory 104 | docs_dir = project_dir / "docs" 105 | if not docs_dir.is_dir(): 106 | docs_dir.mkdir(parents=True, exist_ok=True) 107 | return docs_dir 108 | 109 | def create_manifest( 110 | self, 111 | compiled_dag: _CompiledDag, 112 | docs_dir: Path, 113 | ) -> None: 114 | task_manifests: List[TaskManifest] = [] 115 | 116 | task_mods = compiled_dag.task_mods 117 | task_refs = compiled_dag.task_refs 118 | task_targets = compiled_dag.task_targets 119 | 120 | for tid, mod in task_mods.items(): 121 | ref_srcs = task_refs[tid] 122 | target_locs = task_targets[tid] 123 | 124 | # Construct task manifest 125 | manifest = TaskManifest() 126 | manifest.add_task(Path(mod.module_task_relpath), tid) 127 | manifest.add_refs( 128 | target_module=Path(mod.module_task_relpath), 129 | target_task=tid, 130 | sources=ref_srcs, 131 | ) 132 | manifest.add_targets( 133 | module_relative_path=Path(mod.module_task_relpath), 134 | task_name=tid, 135 | locs=target_locs, 136 | ) 137 | 138 | # Add to list 139 | task_manifests.append(manifest) 140 | 141 | # Manifest 142 | full_manifest = Manifest(task_manifests) 143 | if Path(docs_dir / "build" / "manifest.json").is_file(): 144 | os.unlink(Path(docs_dir / "build" / "manifest.json")) 145 | full_manifest.json_dump(Path(docs_dir / "build")) 146 | return None 147 | 148 | def _event_handler(self): 149 | """ 150 | Every time the user updates their tasks, we should recompile the DAG and 151 | regenerate the manifest. 152 | """ 153 | parsed_module_objs = self.project_parser.parse_all_modules() 154 | compiled_dag = self.project_parser.compile_dag( 155 | project_id=self.project_id, 156 | run_slug=None, 157 | tasks_dir=self.tasks_dir, 158 | parsed_module_objs=parsed_module_objs, 159 | user_arg_task_ids=[], 160 | user_arg_all_downstream=True, 161 | ) 162 | 163 | # Create docs dir and copy the build directory into the docs folder. 164 | docs_dir = self.create_docs_dir(self.project_dir) 165 | build_dir = docs_dir / "build" 166 | shutil.copytree( # type: ignore 167 | DOCS_INDEX_FILE_DIR, build_dir, dirs_exist_ok=True 168 | ) 169 | self.create_manifest(compiled_dag, docs_dir) 170 | fire_reload_docs_event() 171 | 172 | def graph(self): 173 | try: 174 | parsed_module_objs = self.project_parser.parse_all_modules() 175 | num_modules = len(parsed_module_objs) 176 | num_tasks = 0 177 | for mod in parsed_module_objs: 178 | num_tasks += len(mod.prism_task_nodes.keys()) 179 | fire_header_events( 180 | project_id=self.project_id, 181 | run_slug=None, 182 | num_tasks=num_tasks, 183 | num_modules=num_modules, 184 | ) 185 | 186 | # Compile the DAG 187 | compiled_dag_em = prism.logging.execution._ExecutionEventManager( 188 | idx=None, 189 | total=None, 190 | name="Parsing task dependencies", 191 | func=self.project_parser.compile_dag, 192 | ) 193 | compiled_dag = compiled_dag_em.run( 194 | fire_exec_events=True, 195 | project_id=self.project_id, 196 | run_slug=None, 197 | tasks_dir=self.tasks_dir, 198 | parsed_module_objs=parsed_module_objs, 199 | user_arg_task_ids=[], 200 | user_arg_all_downstream=True, 201 | ) 202 | 203 | # Create docs dir and copy the build directory into the docs folder. 204 | docs_dir = self.create_docs_dir(self.project_dir) 205 | build_dir = docs_dir / "build" 206 | shutil.copytree( # type: ignore 207 | DOCS_INDEX_FILE_DIR, build_dir, dirs_exist_ok=True 208 | ) 209 | self.create_manifest(compiled_dag, docs_dir) 210 | 211 | # Send clean messages when Ctrl+C is pressed 212 | def handler(signum, frame): 213 | fire_empty_line_event() 214 | res = input("Shutdown the Prism docs server (y/n)? ") 215 | if res == "y": 216 | fire_tail_events() 217 | if self.hot_reload: 218 | self.observer.stop() 219 | sys.exit(0) 220 | else: 221 | pass 222 | 223 | signal.signal(signal.SIGINT, handler) 224 | 225 | # Serve the docs 226 | os.chdir(build_dir) 227 | port = self.port 228 | address = "127.0.0.1" 229 | fire_serving_docs_events(address, port) 230 | 231 | # mypy doesn't think SimpleHTTPRequestHandler is ok here, but it is 232 | httpd = TCPServer((address, port), SimpleHTTPRequestHandler) 233 | 234 | if self.open_window: 235 | webbrowser.open_new_tab(f"http://{address}:{port}") 236 | try: 237 | if self.hot_reload: 238 | self.observer.start() 239 | httpd.serve_forever() 240 | finally: 241 | httpd.shutdown() 242 | httpd.server_close() 243 | return None 244 | except Exception: 245 | prism.logging.loggers.CONSOLE.print_exception( 246 | show_locals=False, suppress=[prism], width=120 247 | ) 248 | sys.exit(1) 249 | -------------------------------------------------------------------------------- /prism/connectors/__init__.py: -------------------------------------------------------------------------------- 1 | from .bigquery import * # noqa: F401, F403 2 | from .postgres import * # noqa: F401, F403 3 | from .presto import * # noqa: F401, F403 4 | from .redshift import * # noqa: F401, F403 5 | from .snowflake import * # noqa: F401, F403 6 | from .trino import * # noqa: F401, F403 7 | -------------------------------------------------------------------------------- /prism/connectors/base.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | from typing import Any, Union, Optional 3 | 4 | 5 | class Connector: 6 | id: str 7 | engine: Any 8 | 9 | def __init__(self, id: str, **kwargs): 10 | self.id = id 11 | for k, v in kwargs.items(): 12 | setattr(self, k, v) 13 | 14 | def create_engine(self): 15 | raise NotImplementedError 16 | 17 | def execute_sql( 18 | self, sql: str, return_type: Optional[str] 19 | ) -> Union[pd.DataFrame, Any]: 20 | raise NotImplementedError 21 | -------------------------------------------------------------------------------- /prism/connectors/bigquery.py: -------------------------------------------------------------------------------- 1 | # Standard library imports 2 | from pathlib import Path 3 | from typing import Any, List, Literal, Optional, Tuple, Union 4 | 5 | import pandas as pd 6 | 7 | from prism.utils import requires_dependencies 8 | 9 | # Prism-specific imports 10 | from .base import Connector 11 | 12 | 13 | class BigQueryConnector(Connector): 14 | creds: Union[str, Path] 15 | 16 | # This should be an instance of the `bigquery.Client` class, but we don't want to 17 | # import bigquery class unless the user calls the `create_engine` method. 18 | engine: Any 19 | 20 | def __init__(self, id: str, creds: Union[str, Path]): 21 | super().__init__( 22 | id, 23 | creds=creds, 24 | ) 25 | 26 | @requires_dependencies(["google.cloud", "google.oauth2"], "bigquery") # noqa 27 | def create_engine(self): 28 | """ 29 | Parse Google BigQuery adapter, represented as a dict and return the Google 30 | BigQuery connector object 31 | 32 | args: 33 | adapter_dict: Google BigQuery adapter represented as a dictionary 34 | adapter_name: name assigned to adapter 35 | profile_name: profile name containing adapter 36 | returns: 37 | Snowflake connector object 38 | """ 39 | # Import Python client for Google BigQuery 40 | from google.cloud import bigquery 41 | from google.oauth2 import service_account 42 | 43 | # Get configuration and check if config is valid 44 | credentials = service_account.Credentials.from_service_account_file(self.creds) 45 | 46 | # Connection 47 | ctx = bigquery.Client(credentials=credentials) 48 | return ctx 49 | 50 | @requires_dependencies(["google.cloud", "google.oauth2"], "bigquery") # noqa 51 | def execute_sql( 52 | self, 53 | sql: str, 54 | return_type: Optional[Literal["pandas"]], 55 | ) -> Union[pd.DataFrame, Any]: 56 | """ 57 | Execute the SQL query 58 | """ 59 | # Type hinting is kind of a pain here, so ignore for now. 60 | job = self.engine.query(sql) 61 | if return_type == "pandas": 62 | df: pd.DataFrame = job.to_dataframe() 63 | return df 64 | data = job.result() 65 | res: List[Tuple[Any, ...]] = [] 66 | for row in data: 67 | res.append(row) 68 | return res 69 | -------------------------------------------------------------------------------- /prism/connectors/postgres.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | from typing import Any, List, Literal, Optional, Tuple, Union 3 | 4 | # Prism-specific imports 5 | from prism.utils import requires_dependencies 6 | from prism.connectors.base import Connector 7 | 8 | 9 | class PostgresConnector(Connector): 10 | user: str 11 | password: str 12 | port: int 13 | host: str 14 | database: str 15 | autocommit: bool 16 | 17 | # This should be an instance of the `psycopg2.extensions.connection`, but we don't 18 | # want to import psycopg2 unless the user creates calls the `create_engine` method. 19 | engine: Any 20 | 21 | def __init__( 22 | self, 23 | id: str, 24 | user: str, 25 | password: str, 26 | port: int, 27 | host: str, 28 | database: str, 29 | autocommit: bool = True, 30 | ): 31 | super().__init__( 32 | id, 33 | user=user, 34 | password=password, 35 | port=port, 36 | host=host, 37 | database=database, 38 | autocommit=autocommit, 39 | ) 40 | 41 | # Create engine 42 | self.engine = self.create_engine() 43 | 44 | @requires_dependencies("psycopg2", "postgres") 45 | def create_engine(self) -> Any: 46 | """ 47 | Create the Postgres connection using `psycopg2` 48 | """ 49 | import psycopg2 50 | 51 | conn = psycopg2.connect( 52 | dbname=self.database, 53 | host=self.host, 54 | port=self.port, 55 | user=self.user, 56 | password=self.password, 57 | ) 58 | conn.set_session(autocommit=self.autocommit) 59 | return conn 60 | 61 | @requires_dependencies("psycopg2", "postgres") 62 | def execute_sql( 63 | self, 64 | sql: str, 65 | return_type: Optional[Literal["pandas"]], 66 | ) -> Union[pd.DataFrame, List[Tuple[Any, ...]]]: 67 | # For type hinting 68 | import psycopg2 69 | 70 | # Create cursor for every SQL query -- this ensures thread safety 71 | cursor: psycopg2.extensions.cursor = self.engine.cursor() 72 | cursor.execute(sql) 73 | data = cursor.fetchall() 74 | 75 | # If the return type is `pandas`, then return a DataFrame 76 | if return_type == "pandas": 77 | cols = [] 78 | for elts in cursor.description: 79 | cols.append(elts[0]) 80 | df: pd.DataFrame = pd.DataFrame(data=data, columns=cols) 81 | cursor.close() 82 | return df 83 | 84 | # Otherwise, return the data as it exists 85 | else: 86 | cursor.close() 87 | return data # type: ignore 88 | -------------------------------------------------------------------------------- /prism/connectors/presto.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | from typing import Any, List, Literal, Optional, Union 3 | 4 | # Prism-specific imports 5 | from prism.connectors.base import Connector 6 | from prism.utils import requires_dependencies 7 | 8 | 9 | #################### 10 | # Class definition # 11 | #################### 12 | 13 | 14 | class PrestoConnector(Connector): 15 | user: str 16 | password: str 17 | port: int 18 | host: str 19 | http_scheme: Optional[str] 20 | catalog: Optional[str] 21 | schema: Optional[str] 22 | 23 | # This should be an instance of the `prestodb.dbapi.Connection`, but we don't want 24 | # to import prestodb unless the user creates calls the `create_engine` method. 25 | engine: Any 26 | 27 | def __init__( 28 | self, 29 | id: str, 30 | user: str, 31 | password: str, 32 | port: int, 33 | host: str, 34 | http_scheme: Optional[str] = None, 35 | catalog: Optional[str] = None, 36 | schema: Optional[str] = None, 37 | ): 38 | super().__init__( 39 | id, 40 | user=user, 41 | password=password, 42 | port=port, 43 | host=host, 44 | http_scheme=http_scheme, 45 | catalog=catalog, 46 | schema=schema, 47 | ) 48 | 49 | # Minor validation 50 | if self.schema is not None: 51 | if self.catalog is None: 52 | raise ValueError( 53 | "`catalog` cannot be `None` when `schema` is specified" 54 | ) # noqa: E501 55 | 56 | # Create engine 57 | self.engine = self.create_engine() 58 | 59 | @requires_dependencies( 60 | "prestodb", 61 | "presto", 62 | ) 63 | def create_engine(self) -> Any: 64 | """ 65 | Create the PrestoDB connection 66 | """ 67 | import prestodb 68 | 69 | if self.schema is not None: 70 | conn = prestodb.dbapi.connect( 71 | host=self.host, 72 | port=self.port, 73 | http_scheme=self.http_scheme if self.http_scheme else "https", 74 | auth=prestodb.auth.BasicAuthentication( 75 | self.user, 76 | self.password, 77 | ), 78 | catalog=self.catalog, 79 | schema=self.schema, 80 | ) 81 | 82 | # Just catalog is present 83 | elif self.catalog is not None: 84 | conn = prestodb.dbapi.connect( 85 | host=self.host, 86 | port=self.port, 87 | http_scheme=self.http_scheme if self.http_scheme else "https", 88 | auth=prestodb.auth.BasicAuthentication( 89 | self.user, 90 | self.password, 91 | ), 92 | catalog=self.catalog, 93 | ) 94 | 95 | # Neither catalog nor schema is present 96 | else: 97 | conn = prestodb.dbapi.connect( 98 | host=self.host, 99 | port=self.port, 100 | http_scheme=self.http_scheme if self.http_scheme else "https", 101 | auth=prestodb.auth.BasicAuthentication( 102 | self.user, 103 | self.password, 104 | ), 105 | ) 106 | return conn 107 | 108 | @requires_dependencies( 109 | "prestodb", 110 | "presto", 111 | ) 112 | def execute_sql( 113 | self, 114 | sql: str, 115 | return_type: Optional[Literal["pandas"]], 116 | ) -> Union[pd.DataFrame, List[List[Any]]]: 117 | # For type hinting 118 | import prestodb 119 | 120 | # Create cursor for every SQL query -- this ensures thread safety 121 | cursor: prestodb.dbapi.Cursor = self.engine.cursor() 122 | cursor.execute(sql) 123 | data = cursor.fetchall() 124 | 125 | # If the return type is `pandas`, then return a DataFrame 126 | if return_type == "pandas": 127 | cols = [] 128 | for elts in cursor.description: 129 | cols.append(elts[0]) 130 | df: pd.DataFrame = pd.DataFrame(data=data, columns=cols) 131 | cursor.close() 132 | return df 133 | else: 134 | cursor.close() 135 | return data # type: ignore 136 | -------------------------------------------------------------------------------- /prism/connectors/redshift.py: -------------------------------------------------------------------------------- 1 | from prism.connectors.postgres import PostgresConnector 2 | 3 | 4 | class RedshiftConnector(PostgresConnector): 5 | pass 6 | -------------------------------------------------------------------------------- /prism/connectors/snowflake.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | from typing import Any, Dict, List, Literal, Optional, Tuple, Union 3 | 4 | # Prism-specific imports 5 | from prism.connectors.base import Connector 6 | from prism.utils import requires_dependencies 7 | 8 | 9 | class SnowflakeConnector(Connector): 10 | user: str 11 | password: str 12 | account: str 13 | role: str 14 | warehouse: str 15 | database: str 16 | schema: str 17 | 18 | # This should be an instance of the `snowflake.connector.Connection` class, but we 19 | # don't want to import snowflake.connector class unless the user 20 | # calls the `create_engine` method. 21 | engine: Any 22 | 23 | def __init__( 24 | self, 25 | id: str, 26 | user: str, 27 | password: str, 28 | account: str, 29 | role: str, 30 | warehouse: str, 31 | database: str, 32 | schema: str, 33 | ): 34 | super().__init__( 35 | id, 36 | user=user, 37 | password=password, 38 | account=account, 39 | role=role, 40 | warehouse=warehouse, 41 | database=database, 42 | schema=schema, 43 | ) 44 | 45 | self.engine = self.create_engine() 46 | 47 | @requires_dependencies(["snowflake.connector", "pyarrow"], "snowflake") 48 | def create_engine(self) -> Any: 49 | """ 50 | Create the Snowflake connection 51 | """ 52 | import snowflake.connector 53 | 54 | conn = snowflake.connector.connect( 55 | account=self.account, 56 | user=self.user, 57 | password=self.password, 58 | database=self.database, 59 | schema=self.schema, 60 | warehouse=self.warehouse, 61 | role=self.role, 62 | ) 63 | return conn 64 | 65 | @requires_dependencies(["snowflake.connector", "pyarrow"], "snowflake") 66 | def execute_sql( 67 | self, 68 | sql: str, 69 | return_type: Optional[Literal["pandas"]], 70 | ) -> Union[pd.DataFrame, List[Tuple[Any]], List[Dict[Any, Any]]]: 71 | # For type hinting 72 | import snowflake.connector 73 | 74 | # Create cursor for every SQL query -- this ensures thread safety 75 | cursor: snowflake.connector.cursor.SnowflakeCursor = self.engine.cursor() 76 | cursor.execute(sql) 77 | 78 | # If the return type is `pandas`, then return a DataFrame 79 | if return_type == "pandas": 80 | df: pd.DataFrame = cursor.fetch_pandas_all() 81 | cursor.close() 82 | return df 83 | 84 | # Otherwise, just return the data 85 | else: 86 | data = cursor.fetchall() 87 | cursor.close() 88 | return data 89 | -------------------------------------------------------------------------------- /prism/connectors/trino.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | from typing import Any, List, Literal, Optional, Union 3 | 4 | # Prism-specific imports 5 | from prism.connectors.base import Connector 6 | from prism.utils import requires_dependencies 7 | 8 | 9 | #################### 10 | # Class definition # 11 | #################### 12 | 13 | 14 | class TrinoConnector(Connector): 15 | user: str 16 | password: str 17 | port: int 18 | host: str 19 | http_scheme: Optional[str] 20 | catalog: Optional[str] 21 | schema: Optional[str] 22 | 23 | # This should be an instance of the `trino.dbapi.Connection`, but we don't want to 24 | # import trino unless the user creates calls the `create_engine` method. 25 | engine: Any 26 | 27 | def __init__( 28 | self, 29 | id: str, 30 | user: str, 31 | password: str, 32 | port: int, 33 | host: str, 34 | http_scheme: Optional[str] = None, 35 | catalog: Optional[str] = None, 36 | schema: Optional[str] = None, 37 | ): 38 | super().__init__( 39 | id, 40 | user=user, 41 | password=password, 42 | port=port, 43 | host=host, 44 | http_scheme=http_scheme, 45 | catalog=catalog, 46 | schema=schema, 47 | ) 48 | 49 | # Minor validation 50 | if self.schema is not None: 51 | if self.catalog is None: 52 | raise ValueError( 53 | "`catalog` cannot be `None` when `schema` is specified" 54 | ) # noqa: E501 55 | 56 | # Create engine 57 | self.engine = self.create_engine() 58 | 59 | @requires_dependencies( 60 | "trino", 61 | "trino", 62 | ) 63 | def create_engine(self) -> Any: 64 | """ 65 | Create the Trino connection 66 | """ 67 | import trino 68 | 69 | if self.schema is not None: 70 | conn = trino.dbapi.connect( 71 | host=self.host, 72 | port=self.port, 73 | http_scheme=self.http_scheme if self.http_scheme else "https", 74 | auth=trino.auth.BasicAuthentication( 75 | self.user, 76 | self.password, 77 | ), 78 | catalog=self.catalog, 79 | schema=self.schema, 80 | ) 81 | 82 | # Just catalog is present 83 | elif self.catalog is not None: 84 | conn = trino.dbapi.connect( 85 | host=self.host, 86 | port=self.port, 87 | http_scheme=self.http_scheme if self.http_scheme else "https", 88 | auth=trino.auth.BasicAuthentication( 89 | self.user, 90 | self.password, 91 | ), 92 | catalog=self.catalog, 93 | ) 94 | 95 | # Neither catalog nor schema is present 96 | else: 97 | conn = trino.dbapi.connect( 98 | host=self.host, 99 | port=self.port, 100 | http_scheme=self.http_scheme if self.http_scheme else "https", 101 | auth=trino.auth.BasicAuthentication( 102 | self.user, 103 | self.password, 104 | ), 105 | ) 106 | 107 | return conn 108 | 109 | @requires_dependencies( 110 | "trino", 111 | "trino", 112 | ) 113 | def execute_sql( 114 | self, 115 | sql: str, 116 | return_type: Optional[Literal["pandas"]], 117 | ) -> Union[pd.DataFrame, List[List[Any]]]: 118 | # For type hinting 119 | import trino 120 | 121 | # Create cursor for every SQL query -- this ensures thread safety 122 | cursor: trino.dbapi.Cursor = self.engine.cursor() 123 | cursor.execute(sql) 124 | data = cursor.fetchall() 125 | 126 | # If the return type is `pandas`, then return a DataFrame 127 | if return_type == "pandas": 128 | cols = [] 129 | for elts in cursor.description: 130 | cols.append(elts[0]) 131 | df: pd.DataFrame = pd.DataFrame(data=data, columns=cols) 132 | cursor.close() 133 | return df 134 | else: 135 | cursor.close() 136 | return data # type: ignore 137 | -------------------------------------------------------------------------------- /prism/constants.py: -------------------------------------------------------------------------------- 1 | """ 2 | Prism constants. 3 | """ 4 | 5 | # Imports 6 | import os 7 | from pathlib import Path 8 | import sys 9 | 10 | 11 | # Version number 12 | VERSION = "0.3.0" 13 | 14 | 15 | # Root directory of project 16 | ROOT_DIR = str(Path(os.path.dirname(__file__)).parent) 17 | 18 | 19 | # Files to ignore when instantiating Prism project 20 | IGNORE_FILES = ["__pycache__", "*checkpoint.ipynb", ".ipynb_checkpoints"] 21 | 22 | 23 | # Python version 24 | PYTHON_VERSION = sys.version_info 25 | 26 | 27 | # Internal folder for stuff created by Prism 28 | INTERNAL_FOLDER = Path(os.path.expanduser("~/.prism")) 29 | if not INTERNAL_FOLDER.is_dir(): 30 | INTERNAL_FOLDER.mkdir(parents=True) 31 | -------------------------------------------------------------------------------- /prism/db/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/runprism/prism/a6a762a1ce44a696b1dd6be793726974b9c80c82/prism/db/__init__.py -------------------------------------------------------------------------------- /prism/db/factory.py: -------------------------------------------------------------------------------- 1 | # General package imports 2 | import contextlib 3 | from pathlib import Path 4 | 5 | # SQLAlchemy imports 6 | from sqlalchemy import Engine, create_engine 7 | from sqlalchemy.orm import ( 8 | sessionmaker, 9 | scoped_session, 10 | Session, 11 | ) 12 | from sqlalchemy.sql.base import Executable 13 | 14 | # Prism imports 15 | from prism.constants import INTERNAL_FOLDER 16 | 17 | 18 | class ThreadLocalSessionFactory: 19 | db_uri: str 20 | engine: Engine 21 | 22 | def __init__(self): 23 | self.db_uri = f"sqlite:///{Path(INTERNAL_FOLDER).resolve()}/prism.db" 24 | self.engine = create_engine(self.db_uri) 25 | 26 | @contextlib.contextmanager 27 | def create_thread_local_session(self): 28 | session_factory = sessionmaker() 29 | Session = scoped_session(session_factory) 30 | Session.configure(bind=self.engine) 31 | session = Session() 32 | try: 33 | yield session 34 | finally: 35 | session.close() 36 | 37 | def execute_thread_local_stmt( 38 | self, 39 | stmt: Executable, 40 | session: Session, 41 | select_statement: bool = True, 42 | model_objects: bool = True, 43 | ): 44 | if select_statement: 45 | if model_objects: 46 | result = session.scalars(stmt).all() 47 | else: 48 | result = session.execute(stmt).all() 49 | return result 50 | else: 51 | session.execute(stmt) 52 | -------------------------------------------------------------------------------- /prism/db/mixins.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime 2 | from pathlib import Path 3 | from typing import Any, Dict, List, Literal, Union 4 | 5 | from sqlalchemy import delete, select 6 | 7 | from prism.db.factory import ThreadLocalSessionFactory 8 | from prism.db.setup import Project, Ref, Run, Target, Task, TaskRun 9 | 10 | 11 | class DbMixin: 12 | """ 13 | Mixin class used to add elements to our database 14 | """ 15 | 16 | def create_new_project( 17 | self, project_id: str, local_path: Union[str, Path], ctx: Dict[str, Any] 18 | ) -> None: 19 | factory = ThreadLocalSessionFactory() 20 | with factory.create_thread_local_session() as session: 21 | # Check if project already exists 22 | stmt = ( 23 | select(Project) 24 | .where(Project.id == project_id) 25 | .where(Project.local_path == str(local_path)) 26 | ) 27 | project_res = factory.execute_thread_local_stmt(stmt, session) 28 | 29 | # All values in context should be serializable. If it's a custom object, 30 | # then we'll just turn it into a string. 31 | # TODO: maybe we should warn the user 32 | ctx = {k: str(v) for k, v in ctx.items()} 33 | 34 | # If it doesn't exist, then add the project 35 | if len(project_res) == 0: 36 | new_project = Project( 37 | id=project_id, local_path=str(local_path), ctx=ctx 38 | ) 39 | session.add(new_project) 40 | session.commit() 41 | 42 | return None 43 | 44 | def update_tasks(self, project_id: str, task_ids: List[str]) -> None: 45 | factory = ThreadLocalSessionFactory() 46 | with factory.create_thread_local_session() as session: 47 | # Current tasks in the database. Compare them against the `task_ids` input 48 | # and update the `current` field. 49 | seen_task_ids: List[str] = [] 50 | stmt = select(Project).where(Project.id == project_id) 51 | res = factory.execute_thread_local_stmt(stmt, session) 52 | project = res[0] 53 | current_tasks_in_db: List[Task] = project.tasks 54 | for t in current_tasks_in_db: 55 | t.current = t.task_id in task_ids 56 | seen_task_ids.append(t.task_id) 57 | 58 | # Add remaining tasks 59 | for tid in list(set(task_ids) - set(seen_task_ids)): 60 | session.add(Task(task_id=tid, project_id=project_id, current=True)) 61 | session.commit() 62 | 63 | return None 64 | 65 | def update_project_tasks_refs_targets( 66 | self, 67 | project_id: str, 68 | tasks: List[str], 69 | refs: Dict[str, List[str]], 70 | targets: Dict[str, List[str]], 71 | ) -> None: 72 | self.update_tasks(project_id, tasks) 73 | factory = ThreadLocalSessionFactory() 74 | with factory.create_thread_local_session() as session: 75 | # Delete existing refs. Then add current ones. 76 | ref_stmt = delete(Ref).where(Ref.project_id == project_id) 77 | factory.execute_thread_local_stmt(ref_stmt, session, select_statement=False) 78 | 79 | # Delete existing targets 80 | target_stmt = delete(Target).where(Target.project_id == project_id) 81 | factory.execute_thread_local_stmt( 82 | target_stmt, session, select_statement=False 83 | ) 84 | 85 | # Add current refs and targets 86 | for target, sources in refs.items(): 87 | for s in sources: 88 | ref = Ref( 89 | target_id=target, 90 | source_id=s, 91 | project_id=project_id, 92 | ) 93 | session.add(ref) 94 | 95 | for tid, tgts in targets.items(): 96 | for t in tgts: 97 | target_obj = Target( 98 | task_id=tid, 99 | loc=t, 100 | project_id=project_id, 101 | ) 102 | session.add(target_obj) 103 | 104 | session.commit() 105 | return None 106 | 107 | def create_new_run( 108 | self, 109 | run_slug: str, 110 | run_date: datetime, 111 | logs_path: Union[str, Path], 112 | status: Literal["PENDING", "RUNNING", "SUCCEEDED", "FAILED"], 113 | ctx: Dict[str, Any], 114 | project_id: str, 115 | ) -> None: 116 | factory = ThreadLocalSessionFactory() 117 | with factory.create_thread_local_session() as session: 118 | # The run should not exist already. If it does, then raise an error 119 | stmt = ( 120 | select(Run) 121 | .where(Run.run_slug == run_slug) 122 | .where(Run.project_id == project_id) 123 | ) 124 | runs = factory.execute_thread_local_stmt(stmt, session) 125 | if len(runs) > 0: 126 | raise ValueError(f"run `{run_slug}` already exists in the database") 127 | 128 | # All values in context should be serializable. If it's a custom object, 129 | # then we'll just turn it into a string. 130 | # TODO: maybe we should warn the user 131 | ctx = {k: str(v) for k, v in ctx.items()} 132 | 133 | # Create a new Run 134 | run = Run( 135 | run_slug=run_slug, 136 | run_date=run_date, 137 | logs_path=logs_path, 138 | status=status, 139 | ctx=ctx, 140 | project_id=project_id, 141 | ) 142 | session.add(run) 143 | session.commit() 144 | return None 145 | 146 | def update_run_status( 147 | self, 148 | run_slug: str, 149 | project_id: str, 150 | status: Literal["PENDING", "RUNNING", "SUCCEEDED", "FAILED"], 151 | ) -> None: 152 | factory = ThreadLocalSessionFactory() 153 | with factory.create_thread_local_session() as session: 154 | stmt = ( 155 | select(Run) 156 | .where(Run.run_slug == run_slug) 157 | .where(Run.project_id == project_id) 158 | ) 159 | run = factory.execute_thread_local_stmt(stmt, session)[0] 160 | run.status = status 161 | session.commit() 162 | return None 163 | 164 | def create_task_run(self, run_slug: str, task_id: str) -> None: 165 | factory = ThreadLocalSessionFactory() 166 | with factory.create_thread_local_session() as session: 167 | # If the task run exists, then do nothing 168 | stmt = ( 169 | select(TaskRun) 170 | .where(TaskRun.run_slug == run_slug) 171 | .where(TaskRun.task_id == task_id) 172 | ) 173 | res = factory.execute_thread_local_stmt(stmt, session) 174 | if len(res) > 0: 175 | return None 176 | 177 | # New TaskRun — we create this TaskRun when all the tasks are compiled and 178 | # the run is executed. Therefore, the task should start with status 179 | # `PENDING`. We dynamically update this status at runtime 180 | tr = TaskRun(run_slug=run_slug, task_id=task_id, status="PENDING") 181 | session.add(tr) 182 | session.commit() 183 | return None 184 | 185 | def update_task_run_status( 186 | self, 187 | run_slug: str, 188 | task_id: str, 189 | status: Literal["PENDING", "RUNNING", "SUCCEEDED", "FAILED", "SKIPPED"], 190 | ) -> None: 191 | factory = ThreadLocalSessionFactory() 192 | with factory.create_thread_local_session() as session: 193 | stmt = ( 194 | select(TaskRun) 195 | .where(TaskRun.run_slug == run_slug) 196 | .where(TaskRun.task_id == task_id) 197 | ) 198 | res = factory.execute_thread_local_stmt(stmt, session) 199 | taskrun = res[0] 200 | taskrun.status = status 201 | session.commit() 202 | return None 203 | -------------------------------------------------------------------------------- /prism/db/setup.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | from typing import Any, Dict, List, Literal 3 | 4 | from sqlalchemy import ForeignKey 5 | from sqlalchemy.orm import DeclarativeBase, Mapped, backref, mapped_column, relationship 6 | from sqlalchemy.types import JSON, String 7 | 8 | # Prism-specific imports 9 | from prism.db.factory import ThreadLocalSessionFactory 10 | 11 | 12 | class Base(DeclarativeBase): 13 | type_annotation_map = { 14 | Dict[str, Any]: JSON, 15 | Literal["PENDING", "RUNNING", "SUCCEEDED", "FAILED"]: String, 16 | Literal["PENDING", "RUNNING", "SUCCEEDED", "FAILED", "SKIPPED"]: String, 17 | } 18 | 19 | 20 | # Models 21 | class Project(Base): 22 | __tablename__ = "project" 23 | id: Mapped[str] = mapped_column(primary_key=True, nullable=False) 24 | local_path: Mapped[str] = mapped_column(nullable=False) 25 | ctx: Mapped[Dict[str, Any]] = mapped_column(nullable=False) 26 | runs: Mapped[List["Run"]] = relationship(backref=backref("project")) 27 | tasks: Mapped[List["Task"]] = relationship(backref=backref("project")) 28 | refs: Mapped[List["Ref"]] = relationship(backref=backref("project")) 29 | targets: Mapped[List["Target"]] = relationship(backref=backref("project")) 30 | 31 | 32 | class Run(Base): 33 | __tablename__ = "runs" 34 | run_slug: Mapped[str] = mapped_column(primary_key=True, nullable=False) 35 | run_date: Mapped[datetime.datetime] = mapped_column(nullable=False) 36 | logs_path: Mapped[str] = mapped_column(nullable=False) 37 | status: Mapped[ 38 | Literal["PENDING", "RUNNING", "SUCCEEDED", "FAILED"] 39 | ] = mapped_column(nullable=True) 40 | taskruns: Mapped[List["TaskRun"]] = relationship(backref="run") 41 | ctx: Mapped[Dict[str, Any]] = mapped_column(nullable=False) 42 | project_id: Mapped[str] = mapped_column(ForeignKey("project.id")) 43 | 44 | 45 | class Task(Base): 46 | __tablename__ = "tasks" 47 | id: Mapped[int] = mapped_column( 48 | primary_key=True, nullable=False, autoincrement=True 49 | ) # noqa: E501 50 | task_id: Mapped[str] = mapped_column(nullable=False) 51 | current: Mapped[bool] = mapped_column(nullable=False) 52 | taskruns: Mapped[List["TaskRun"]] = relationship(backref="task") 53 | project_id: Mapped[int] = mapped_column(ForeignKey("project.id")) 54 | 55 | 56 | class TaskRun(Base): 57 | __tablename__ = "taskruns" 58 | run_slug: Mapped[str] = mapped_column(ForeignKey("runs.run_slug"), primary_key=True) 59 | task_id: Mapped[int] = mapped_column(ForeignKey("tasks.id"), primary_key=True) 60 | status: Mapped[ 61 | Literal["PENDING", "RUNNING", "SUCCEEDED", "FAILED", "SKIPPED"] 62 | ] = mapped_column(nullable=False) # noqa: E501 63 | 64 | 65 | class Ref(Base): 66 | __tablename__ = "refs" 67 | id: Mapped[int] = mapped_column( 68 | nullable=False, primary_key=True, autoincrement=True 69 | ) # noqa: E501 70 | target_id: Mapped[str] = mapped_column(nullable=False) 71 | source_id: Mapped[str] = mapped_column(nullable=False) 72 | project_id: Mapped[int] = mapped_column(ForeignKey("project.id")) 73 | 74 | 75 | class Target(Base): 76 | __tablename__ = "targets" 77 | id: Mapped[int] = mapped_column( 78 | nullable=False, primary_key=True, autoincrement=True 79 | ) # noqa: E501 80 | loc: Mapped[str] = mapped_column(nullable=False) 81 | task_id: Mapped[str] = mapped_column(ForeignKey("tasks.id")) 82 | project_id: Mapped[int] = mapped_column(ForeignKey("project.id")) 83 | 84 | 85 | def setup(): 86 | db_factory = ThreadLocalSessionFactory() 87 | Base.metadata.create_all(bind=db_factory.engine) 88 | -------------------------------------------------------------------------------- /prism/decorators/__init__.py: -------------------------------------------------------------------------------- 1 | from .target import target, target_iterator # noqa: F401 2 | 3 | from .task import task # noqa: F401 4 | -------------------------------------------------------------------------------- /prism/decorators/target.py: -------------------------------------------------------------------------------- 1 | """ 2 | Target decorators 3 | 4 | Table of Contents 5 | - Imports 6 | - Target decorators 7 | """ 8 | 9 | ########### 10 | # Imports # 11 | ########### 12 | 13 | import inspect 14 | 15 | # Standard library imports 16 | from pathlib import Path 17 | 18 | # Prism imports 19 | import prism.exceptions 20 | from prism.task import PrismTask 21 | 22 | ##################### 23 | # Target decorators # 24 | ##################### 25 | 26 | 27 | def target(*, type, loc, **target_kwargs): 28 | """ 29 | Decorator to use if user wishes to save the output of a task to an external location 30 | (e.g., a data warehouse, an S3 bucket, or a local filepath). 31 | """ 32 | 33 | def decorator_target(func): 34 | def wrapper_target_dec(self): 35 | # This will only ever be called inside a PrismTask 36 | if not isinstance(self, PrismTask): 37 | raise prism.exceptions.RuntimeException( 38 | message="`target` decorator can only be called within a Prism task" 39 | ) 40 | 41 | # In cases with multiple decorators, we don't want to "chain" the 42 | # decorators. Rather, we want each target declaration to apply to each 43 | # object returned. In this case, keep track of the target types, locs, and 44 | # kwargs. 45 | if func.__name__ == "wrapper_target_dec": 46 | self.types.append(type) 47 | self.locs.append(loc) 48 | try: 49 | self.kwargs.append(target_kwargs) 50 | except TypeError: 51 | self.kwargs.append({}) 52 | 53 | # Return the next wrapper_target function with the same arguments as 54 | # this one. If a function has `n` targets, then this will happen n-1 55 | # times until the `run` function is reached. 56 | if not inspect.ismethod(func): 57 | return func(self) 58 | else: 59 | return func() 60 | 61 | # Now, we've hit the `run` function 62 | else: 63 | # Confirm function name 64 | if func.__name__ != "run": 65 | raise prism.exceptions.RuntimeException( 66 | message="`target` decorator can only be called on `run` function" # noqa: E501 67 | ) 68 | 69 | # If the task should be run in full, then call the run function 70 | if self.bool_run and not self.is_done: 71 | # When using `target` as a decorator, `run` is a function. When 72 | # using `target` as an argument to the `task()` decorator, `run` is 73 | # a bound method. 74 | if not inspect.ismethod(func): 75 | obj = func(self) 76 | else: 77 | obj = func() 78 | self.types.append(type) 79 | self.locs.append(loc) 80 | try: 81 | self.kwargs.append(target_kwargs) 82 | except TypeError: 83 | self.kwargs.append({}) 84 | 85 | # If multiple things returned, we expected multiple targets 86 | if isinstance(obj, tuple): 87 | objects_to_save = zip(obj, self.types, self.locs, self.kwargs) 88 | for zipped in objects_to_save: 89 | temp_o = zipped[0] 90 | temp_t = zipped[1] 91 | temp_l = zipped[2] 92 | temp_k = zipped[3] 93 | target = temp_t.from_args(temp_o, temp_l) 94 | target.save(**temp_k) 95 | 96 | # If a target is set, just assume that the user wants to 97 | # reference the location of the target when they call `mod` 98 | return obj 99 | 100 | # If return type is not a Tuple, we expect a single target 101 | else: 102 | # Initialize an instance of the target class and save the object 103 | # using the target's `save` method 104 | target = type(obj, loc) 105 | target.save(**target_kwargs) 106 | 107 | # Return the object 108 | return obj 109 | 110 | # If the task should not be run in full, then just return the location 111 | # of the target 112 | else: 113 | # We still need to append the last location to self.locs 114 | self.locs.append(loc) 115 | self.types.append(type) 116 | 117 | # If multiple targets, then return all locs 118 | if len(self.locs) > 1: 119 | all_objs = [] 120 | for _loc, _type in zip(self.locs, self.types): 121 | target = _type.open(_loc) 122 | all_objs.append(target.obj) 123 | return tuple(all_objs) 124 | 125 | # For single-target case, return single loc 126 | else: 127 | return self.types[0].open(self.locs[0]).obj 128 | 129 | return wrapper_target_dec 130 | 131 | return decorator_target 132 | 133 | 134 | def target_iterator(*, type, loc, **kwargs): 135 | """ 136 | Decorator to use if task requires user to iterate through several different objects 137 | and save each object to an external location 138 | """ 139 | 140 | def decorator_target_iterator(func): 141 | def wrapper(self): 142 | # This will only ever be called inside a PrismTask 143 | if not isinstance(self, PrismTask): 144 | raise prism.exceptions.RuntimeException( 145 | message="`target` decorator can only be called within a Prism task" 146 | ) 147 | 148 | # Confirm function name 149 | if func.__name__ != "run": 150 | raise prism.exceptions.RuntimeException( 151 | message="`target iterator` decorator can only be called on `run` function" # noqa: E501 152 | ) 153 | 154 | if self.bool_run: 155 | if not inspect.ismethod(func): 156 | objs = func(self) 157 | else: 158 | objs = func() 159 | if not isinstance(objs, dict): 160 | raise prism.exceptions.RuntimeException( 161 | message="output of run function should be dict mapping name --> object to save" # noqa: E501 162 | ) 163 | for k, _ in objs.items(): 164 | if not isinstance(k, str): 165 | raise prism.exceptions.RuntimeException( 166 | message="output of run function should be dict mapping name --> object to save" # noqa: E501 167 | ) 168 | 169 | # Iterate through objects and save them out 170 | for name, obj in objs.items(): 171 | target = type(obj, Path(loc) / name) 172 | target.save(**kwargs) 173 | 174 | return loc 175 | else: 176 | return loc 177 | 178 | return wrapper 179 | 180 | return decorator_target_iterator 181 | -------------------------------------------------------------------------------- /prism/decorators/task.py: -------------------------------------------------------------------------------- 1 | from functools import reduce 2 | from typing import Optional 3 | 4 | # Prism imports 5 | from prism.task import PrismTask 6 | 7 | 8 | def bind(instance, func, as_name=None): 9 | """ 10 | Bind the function *func* to *instance*, with either provided name *as_name* 11 | or the existing name of *func*. The provided *func* should accept the 12 | instance as the first argument, i.e. "self". 13 | """ 14 | if as_name is None: 15 | as_name = func.__name__ 16 | bound_method = func.__get__(instance, instance.__class__) 17 | setattr(instance, as_name, bound_method) 18 | return bound_method 19 | 20 | 21 | def task( 22 | *, 23 | task_id: Optional[str] = None, 24 | retries: int = 0, 25 | retry_delay_seconds: Optional[int] = None, 26 | targets=None, 27 | ): 28 | """ 29 | Decorator used to turn any Python function into a Prism task. 30 | """ 31 | 32 | def decorator_task(func): 33 | def wrapper_task(task_id: Optional[str] = task_id, bool_run: bool = True): 34 | assert task_id 35 | new_task = PrismTask(task_id=task_id, func=func, bool_run=bool_run) 36 | 37 | # Set class attributes 38 | if retries: 39 | new_task.retries = retries 40 | if retry_delay_seconds: 41 | new_task.retry_delay_seconds = retry_delay_seconds 42 | 43 | # Chain the decorators together and bind the decorated function to the task 44 | # instance. 45 | if targets: 46 | if len(targets) == 0: 47 | pass 48 | decorated_func = reduce( 49 | lambda x, y: y(x), 50 | reversed(targets), 51 | new_task.run, # type: ignore 52 | ) 53 | new_task.run = bind(new_task, decorated_func) # type: ignore 54 | 55 | return new_task 56 | 57 | return wrapper_task 58 | 59 | return decorator_task 60 | -------------------------------------------------------------------------------- /prism/docs/__init__.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | DOCS_INDEX_FILE_DIR = Path(__file__).parent / "build" 4 | -------------------------------------------------------------------------------- /prism/docs/build/311ea03002abadcdcaba.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/runprism/prism/a6a762a1ce44a696b1dd6be793726974b9c80c82/prism/docs/build/311ea03002abadcdcaba.png -------------------------------------------------------------------------------- /prism/docs/build/__init__.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | DOCS_INDEX_FILE_DIR = Path(__file__).parent / 'build' 3 | -------------------------------------------------------------------------------- /prism/docs/build/ae8a93980ebb6c55123b.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/runprism/prism/a6a762a1ce44a696b1dd6be793726974b9c80c82/prism/docs/build/ae8a93980ebb6c55123b.ico -------------------------------------------------------------------------------- /prism/docs/build/ce188596011a8fa32931.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/runprism/prism/a6a762a1ce44a696b1dd6be793726974b9c80c82/prism/docs/build/ce188596011a8fa32931.png -------------------------------------------------------------------------------- /prism/docs/build/d4df11de40d39920ff8c.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/runprism/prism/a6a762a1ce44a696b1dd6be793726974b9c80c82/prism/docs/build/d4df11de40d39920ff8c.svg -------------------------------------------------------------------------------- /prism/docs/build/main.js.LICENSE.txt: -------------------------------------------------------------------------------- 1 | /*! 2 | * Sizzle CSS Selector Engine v2.3.6 3 | * https://sizzlejs.com/ 4 | * 5 | * Copyright JS Foundation and other contributors 6 | * Released under the MIT license 7 | * https://js.foundation/ 8 | * 9 | * Date: 2021-02-16 10 | */ 11 | 12 | /*! 13 | * jQuery JavaScript Library v3.6.1 14 | * https://jquery.com/ 15 | * 16 | * Includes Sizzle.js 17 | * https://sizzlejs.com/ 18 | * 19 | * Copyright OpenJS Foundation and other contributors 20 | * Released under the MIT license 21 | * https://jquery.org/license 22 | * 23 | * Date: 2022-08-26T17:52Z 24 | */ 25 | 26 | /*! 27 | Embeddable Minimum Strictly-Compliant Promises/A+ 1.1.1 Thenable 28 | Copyright (c) 2013-2014 Ralf S. Engelschall (http://engelschall.com) 29 | Licensed under The MIT License (http://opensource.org/licenses/MIT) 30 | */ 31 | 32 | /*! Bezier curve function generator. Copyright Gaetan Renaudeau. MIT License: http://en.wikipedia.org/wiki/MIT_License */ 33 | 34 | /*! Runge-Kutta spring physics function generator. Adapted from Framer.js, copyright Koen Bok. MIT License: http://en.wikipedia.org/wiki/MIT_License */ 35 | -------------------------------------------------------------------------------- /prism/engine/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/runprism/prism/a6a762a1ce44a696b1dd6be793726974b9c80c82/prism/engine/__init__.py -------------------------------------------------------------------------------- /prism/engine/compiled_task.py: -------------------------------------------------------------------------------- 1 | import ast 2 | import importlib 3 | import re 4 | from typing import List, Literal, Optional, Tuple, Union 5 | 6 | # Prism-specific imports 7 | from prism.db.mixins import DbMixin 8 | import prism.exceptions 9 | from prism.engine.module import _PrismModule 10 | from prism.task import PrismTask 11 | 12 | 13 | class _CompiledTask(DbMixin): 14 | """ 15 | Class for defining and executing a single compiled task 16 | """ 17 | 18 | run_slug: Optional[str] 19 | task_id: str 20 | task_module: _PrismModule 21 | refs: List[str] 22 | 23 | prism_task_node: Union[ast.ClassDef, ast.FunctionDef] 24 | 25 | def __init__( 26 | self, 27 | run_slug: Optional[str], 28 | task_id: str, 29 | task_module: _PrismModule, 30 | refs: List[str], 31 | ): 32 | self.run_slug = run_slug 33 | self.task_id = task_id 34 | self.task_module = task_module 35 | self.refs = refs 36 | 37 | # Define the task node 38 | try: 39 | self.prism_task_node = task_module.prism_task_nodes[self.task_id] 40 | except KeyError: 41 | raise prism.exceptions.ParserException( 42 | message=f"could not find task `{self.task_id}` in `{self.task_module.module_task_relpath}`" # noqa: E501 43 | ) 44 | 45 | def grab_retries_metadata(self) -> Tuple[int, int]: 46 | """ 47 | Grab retry metadata, including: 48 | 1. How many retries to undertake 49 | 2. The delay between retries 50 | """ 51 | # Instantiate retries / retry_delay_seconds 52 | retries = None 53 | retry_delay_seconds = None 54 | 55 | # If the task is a class, the variables will be stored in class attributes 56 | if isinstance(self.prism_task_node, ast.ClassDef): 57 | retries = self.task_module.get_class_attribute_value( 58 | self.prism_task_node, "retries" 59 | ) 60 | retry_delay_seconds = self.task_module.get_class_attribute_value( 61 | self.prism_task_node, "retry_delay_seconds" 62 | ) 63 | if retries: 64 | if not isinstance(retries, int): 65 | raise ValueError("TypeError: `retries` should be an integer!") 66 | if retry_delay_seconds: 67 | if not isinstance(retry_delay_seconds, int): 68 | raise ValueError( 69 | "TypeError: `retry_delay_seconds` should be an integer!" 70 | ) 71 | 72 | # If the task is a decorated function, the variables will be stored as keyword 73 | # arguments. 74 | elif isinstance(self.prism_task_node, ast.FunctionDef): 75 | decorator_call = self.task_module.get_task_decorator_call( 76 | self.prism_task_node 77 | ) 78 | retries = self.task_module._get_keyword_arg_from_task_decorator( 79 | self.prism_task_node, decorator_call, "retries", "int" 80 | ) 81 | retry_delay_seconds = self.task_module._get_keyword_arg_from_task_decorator( 82 | self.prism_task_node, decorator_call, "retry_delay_seconds", "int" 83 | ) 84 | 85 | # If nothing was found, default to 0 86 | if not retries: 87 | retries = 0 88 | if not retry_delay_seconds: 89 | retry_delay_seconds = 0 90 | 91 | # Hacky, but good enough for now 92 | assert isinstance(retries, int) 93 | assert isinstance(retry_delay_seconds, int) 94 | 95 | return retries, retry_delay_seconds 96 | 97 | def instantiate_task_class(self, explicit_run: bool = True) -> PrismTask: 98 | """ 99 | Instantiate the PrismTask class. All tasks, even decorated functions, are 100 | converted to a PrismTask class before being executed. 101 | 102 | args: 103 | explicit run: boolean indicating whether to run the task. Default is True 104 | returns: 105 | PrismTask instance 106 | """ 107 | project_relpath = self.task_module.module_task_relpath 108 | 109 | # Before replacing forward slashes with periods, make sure any leading slashes 110 | # are removed. 111 | project_relpath_for_import = re.sub( 112 | r"(^\.+)/(.+)|^/(.+)", 113 | r"\1\2\3", 114 | project_relpath, 115 | ) 116 | 117 | # If we attempt a relative import that is beyond the top-level package, we 118 | # encounter an error. To handle this, 119 | import_statement = project_relpath_for_import.replace(".py", "").replace( 120 | "/", "." 121 | ) 122 | imported_mod = importlib.import_module(import_statement) 123 | 124 | # Get the node name 125 | node_name = self.prism_task_node.name 126 | if isinstance(self.prism_task_node, ast.FunctionDef): 127 | task_class_fn: PrismTask = getattr(imported_mod, node_name)( 128 | task_id=self.task_id, 129 | bool_run=explicit_run, 130 | ) 131 | return task_class_fn 132 | else: 133 | task_class_cls: PrismTask = getattr(imported_mod, node_name)( 134 | task_id=self.task_id, 135 | bool_run=explicit_run, 136 | ) 137 | return task_class_cls 138 | 139 | def exec(self, explicit_run: bool = True, full_refresh: bool = False) -> PrismTask: 140 | """ 141 | Execute task 142 | """ 143 | # Add TaskRun to the database 144 | assert self.run_slug is not None 145 | super().create_task_run(self.run_slug, self.task_id) 146 | 147 | runtime = importlib.import_module("prism.runtime") 148 | # TODO: clean up this error 149 | if not hasattr(runtime, "CurrentRun"): 150 | raise ValueError("runtime does not have `CurrentRun` attribute!") 151 | 152 | # Instantiate class and check if the task is already done 153 | prism_task = self.instantiate_task_class(explicit_run) 154 | is_done = prism_task.done() 155 | prism_task.is_done = is_done and not full_refresh 156 | 157 | # Execute the task 158 | # "PENDING", "RUNNING", "SUCCEEDED", "FAILED", "SKIPPED" 159 | status: Literal["PENDING", "RUNNING", "SUCCEEDED", "FAILED", "SKIPPED"] = ( 160 | "SKIPPED" if prism_task.is_done or not explicit_run else "RUNNING" 161 | ) # noqa: E501 162 | super().update_task_run_status(self.run_slug, prism_task.task_id, status) 163 | prism_task.exec() 164 | if status == "RUNNING": 165 | super().update_task_run_status( 166 | self.run_slug, prism_task.task_id, "SUCCEEDED" 167 | ) 168 | runtime.CurrentRun._set_task_output_value(self.task_id, prism_task.get_output()) 169 | 170 | # Return the task... we don't use the task for anything, but it helps our event 171 | # manager know that we ran a task. 172 | return prism_task 173 | -------------------------------------------------------------------------------- /prism/engine/manifest.py: -------------------------------------------------------------------------------- 1 | """ 2 | Prism Manifest class 3 | 4 | Table of Contents 5 | - Imports 6 | - Class definition 7 | """ 8 | 9 | ########### 10 | # Imports # 11 | ########### 12 | 13 | # Standard library imports 14 | import json 15 | import re 16 | from pathlib import Path 17 | from typing import Any, Dict, List 18 | 19 | #################### 20 | # Class definition # 21 | #################### 22 | 23 | 24 | class TaskManifest: 25 | """ 26 | Class used to store metadata on a parsed task 27 | """ 28 | 29 | def __init__(self): 30 | self.manifest_dict: Dict[str, Any] = {"targets": {}, "tasks": {}, "refs": {}} 31 | 32 | def update_tasks_dir_key( 33 | self, 34 | key: str, 35 | level: Dict[str, Any] = {}, 36 | ): 37 | if key not in level.keys(): 38 | level[key] = {} 39 | return level[key] 40 | 41 | def add_task(self, task_module: Path, task_name: str): 42 | """ 43 | We want the `tasks` key in our manifest to be structured as follows 44 | "tasks": { 45 | "": [ 46 | "task_name1", 47 | "task_name2" 48 | ], 49 | "/" { 50 | "": [ 51 | "nested_task_name3", 52 | "nested_task_name3" 53 | ] 54 | } 55 | ... 56 | } 57 | """ 58 | task_module_no_py = re.sub(r"\.py$", "", str(task_module)) 59 | 60 | # Determine if the task exists in a directory 61 | flag_in_dir = False 62 | task_module_no_py_split = task_module_no_py.split("/") 63 | if len(task_module_no_py_split) > 1: 64 | flag_in_dir = True 65 | 66 | # If the task lives in a module, then the module name should be the key 67 | if not flag_in_dir: 68 | if task_module_no_py in self.manifest_dict["tasks"].keys(): 69 | self.manifest_dict["tasks"][task_module_no_py].append(task_name) 70 | else: 71 | self.manifest_dict["tasks"][task_module_no_py] = [task_name] 72 | 73 | # If task lives in a nested directory, then the directory name should be the 74 | # first key. 75 | else: 76 | # Create necessary nested directory keys 77 | base_level = self.manifest_dict["tasks"] 78 | for _k in task_module_no_py_split[:-1]: 79 | base_level = self.update_tasks_dir_key(f"{_k}/", base_level) 80 | 81 | # Update the module / task name 82 | if task_module_no_py_split[-1] in base_level.keys(): 83 | base_level[task_module_no_py_split[-1]].append(task_name) 84 | else: 85 | base_level[task_module_no_py_split[-1]] = [task_name] 86 | 87 | def add_refs(self, target_module: Path, target_task: str, sources: List[str]): 88 | target_module_no_py = re.sub(r"\.py$", "", str(target_module)) 89 | if target_module_no_py not in self.manifest_dict["refs"].keys(): 90 | self.manifest_dict["refs"][target_module_no_py] = {} 91 | self.manifest_dict["refs"][target_module_no_py][target_task] = sources 92 | 93 | def add_targets(self, module_relative_path: Path, task_name: str, locs: List[str]): 94 | module_name_no_py = re.sub(r"\.py$", "", str(module_relative_path)) 95 | if module_name_no_py not in self.manifest_dict["targets"].keys(): 96 | self.manifest_dict["targets"][module_name_no_py] = {} 97 | self.manifest_dict["targets"][module_name_no_py][task_name] = locs 98 | 99 | 100 | class Manifest: 101 | """ 102 | Class used to store metadata on compiled prism project 103 | """ 104 | 105 | def __init__(self, task_manifests: List[TaskManifest] = []): 106 | self.manifest_dict: Dict[str, Any] = { 107 | "targets": {}, 108 | "prism_project": "", 109 | "tasks": {}, 110 | "refs": {}, 111 | } 112 | self.task_manifests = task_manifests 113 | 114 | # Iterate through task manifests and add to manifest 115 | for mm in self.task_manifests: 116 | self.manifest_dict["targets"].update(mm.manifest_dict["targets"]) 117 | self.update(self.manifest_dict["tasks"], mm.manifest_dict["tasks"]) 118 | self.manifest_dict["refs"].update(mm.manifest_dict["refs"]) 119 | 120 | def update( 121 | self, 122 | manifest_dict: Dict[str, Any], 123 | task_manifest_dict: Dict[str, Any], 124 | ) -> Dict[str, Any]: 125 | """ 126 | Recursive function to update `manifest_dict` with the contents of 127 | `task_manifest_dict`. We need a recursive function, because the `tasks` key 128 | within the manifest.json can have a bunch of nested dictionaries. 129 | 130 | args: 131 | manifest_dict: manifest dictionary 132 | task_manifest_dict: task manifest dictionary 133 | returns: 134 | updated manifest_dict 135 | """ 136 | # Iterate through the task manifest's contents. Note that they should only have 137 | # one key within `tasks`. 138 | for k, v in task_manifest_dict.items(): 139 | if k not in manifest_dict.keys(): 140 | manifest_dict[k] = v 141 | elif isinstance(manifest_dict[k], list): 142 | for _item in v: 143 | if _item not in manifest_dict[k]: 144 | manifest_dict[k].append(_item) 145 | 146 | # If the value is a dictionary and the manifest already has this dictionary, 147 | # then we'll need to recursively update the manifest's dictionary. 148 | elif isinstance(manifest_dict[k], dict): 149 | self.update(manifest_dict[k], v) 150 | return manifest_dict 151 | 152 | def add_prism_project(self, prism_project_data: str): 153 | self.manifest_dict["prism_project"] = prism_project_data 154 | 155 | def json_dump(self, path: Path): 156 | with open(path / "manifest.json", "w") as f: 157 | json.dump(self.manifest_dict, f, sort_keys=False) 158 | f.close() 159 | 160 | def json_load(self, path: Path): 161 | with open(path / "manifest.json", "r") as f: 162 | manifest = json.loads(f.read()) 163 | f.close() 164 | return manifest 165 | -------------------------------------------------------------------------------- /prism/exceptions.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | 4 | class PrismException(Exception): 5 | pass 6 | 7 | 8 | class PrismASTException(PrismException): 9 | def __init__(self, call_name: str, attribute: str): 10 | self.message = ( 11 | f"AST error: `{call_name}` argument does not have `{attribute}` attribute" # noqa: E501 12 | ) 13 | super().__init__(self.message) 14 | 15 | def __str__(self): 16 | return self.message 17 | 18 | 19 | class ProjectAlreadyExistsException(PrismException): 20 | def __init__(self, project_dir: Path): 21 | self.message = f"Project already exists at `{project_dir}`" 22 | super().__init__(self.message) 23 | 24 | 25 | class RuntimeException(PrismException): 26 | def __init__(self, message): 27 | self.message = message 28 | super().__init__(self.message) 29 | 30 | def __str__(self): 31 | return self.message 32 | 33 | 34 | class CompileException(PrismException): 35 | def __init__(self, message): 36 | self.message = message 37 | super().__init__(self.message) 38 | 39 | def __str__(self): 40 | return self.message 41 | 42 | 43 | class DAGException(PrismException): 44 | def __init__(self, message): 45 | self.message = message 46 | super().__init__(self.message) 47 | 48 | def __str__(self): 49 | return self.message 50 | 51 | 52 | class ConsoleEventException(PrismException): 53 | def __init__(self, message): 54 | self.message = message 55 | super().__init__(self.message) 56 | 57 | def __str__(self): 58 | return self.message 59 | 60 | 61 | class ParserException(PrismException): 62 | def __init__(self, message): 63 | self.message = message 64 | super().__init__(self.message) 65 | 66 | def __str__(self): 67 | return self.message 68 | 69 | 70 | class ReferenceException(PrismException): 71 | def __init__(self, message): 72 | self.message = message 73 | super().__init__(self.message) 74 | 75 | def __str__(self): 76 | return self.message 77 | -------------------------------------------------------------------------------- /prism/logging/__init__.py: -------------------------------------------------------------------------------- 1 | from .loggers import * # noqa: F401, F403 2 | -------------------------------------------------------------------------------- /prism/logging/execution.py: -------------------------------------------------------------------------------- 1 | import time 2 | from typing import Any, Callable, Optional 3 | 4 | # Prism-specific imports 5 | from prism.logging.events import ExecutionEvent 6 | import prism.logging.events 7 | from prism.task import PrismTask 8 | 9 | 10 | class _ExecutionEventManager: 11 | """ 12 | For certain actions, we fire events to indicate before the action starts to indicate 13 | that the action is taking place and after the action ends to indicate that the 14 | action succeeded/failed. This class helps us manage that. 15 | """ 16 | 17 | def __init__( 18 | self, 19 | idx: Optional[int], 20 | total: Optional[int], 21 | name: str, 22 | func: Callable[..., Any], 23 | ): 24 | self.idx = idx 25 | self.total = total 26 | self.name = name 27 | self.func = func 28 | 29 | def fire_skipped_exec_event(self, is_task: bool = False) -> None: 30 | """ 31 | Create ExecutionEvent informing user that a task was skipped 32 | """ 33 | event_with_formatting = ( 34 | f"TASK [blue]{self.name}[/blue]" if is_task else self.name.lower() 35 | ) # noqa: E501 36 | e = ExecutionEvent( 37 | msg=f"[orange1]SKIPPING[/orange1] {event_with_formatting}", 38 | num=self.idx, 39 | total=self.total, 40 | status="SKIP", 41 | execution_time=None, 42 | ) 43 | prism.logging.events.fire_console_event(e, log_level="info") 44 | return None 45 | 46 | def fire_running_exec_event(self, is_task: bool = False) -> None: 47 | """ 48 | Create ExecutionEvent informing user of task execution 49 | """ 50 | event_with_formatting = ( 51 | f"RUNNING TASK [blue]{self.name}[/blue]" if is_task else self.name 52 | ) # noqa: E501 53 | e = ExecutionEvent( 54 | msg=event_with_formatting, 55 | num=self.idx, 56 | total=self.total, 57 | status="RUN", 58 | execution_time=None, 59 | ) 60 | prism.logging.events.fire_console_event(e, log_level="info") 61 | return None 62 | 63 | def fire_success_exec_event(self, start_time: float, is_task: bool = False) -> None: 64 | """ 65 | Create ExecutionEvent informing user of successful task execution 66 | """ 67 | event_with_formatting = ( 68 | f"TASK [blue]{self.name}[/blue]" if is_task else self.name.lower() 69 | ) # noqa: E501 70 | execution_time = time.time() - start_time 71 | e = ExecutionEvent( 72 | msg=f"[green]FINISHED[/green] {event_with_formatting}", 73 | num=self.idx, 74 | total=self.total, 75 | status="DONE", 76 | execution_time=execution_time, 77 | ) 78 | prism.logging.events.fire_console_event(e, log_level="info") 79 | return None 80 | 81 | def fire_error_exec_event(self, start_time: float, is_task: bool = False) -> None: 82 | """ 83 | Create ExecutionEvent informing user of error in task execution 84 | """ 85 | event_with_formatting = ( 86 | f"IN TASK [blue]{self.name}[/blue]" if is_task else self.name.lower() 87 | ) # noqa: E501 88 | execution_time = time.time() - start_time 89 | e = ExecutionEvent( 90 | msg=f"[red]ERROR[/red] {event_with_formatting}", 91 | num=self.idx, 92 | total=self.total, 93 | status="ERROR", 94 | execution_time=execution_time, 95 | ) 96 | prism.logging.events.fire_console_event(e, log_level="error") 97 | return None 98 | 99 | def run( 100 | self, fire_exec_events: bool = True, is_task: bool = False, **kwargs 101 | ) -> Any: 102 | start_time = time.time() 103 | if fire_exec_events: 104 | self.fire_running_exec_event(is_task) 105 | 106 | # The only events we ever really skip are actual tasks. For these, the skip 107 | # logic is handled within the task's `exec` function. So, we just run it 108 | # normally here. 109 | try: 110 | output = self.func(**kwargs) 111 | 112 | # Check if the output is a task manager. If it is, then we've run a task. 113 | # Check if the task was skipped, and fire the corresponding event. 114 | if isinstance(output, PrismTask): 115 | if fire_exec_events: 116 | if output.is_done: 117 | self.fire_skipped_exec_event(is_task) 118 | else: 119 | self.fire_success_exec_event(start_time, is_task) 120 | elif fire_exec_events: 121 | self.fire_success_exec_event(start_time, is_task) 122 | return output 123 | except Exception: 124 | self.fire_error_exec_event(start_time, is_task) 125 | raise 126 | -------------------------------------------------------------------------------- /prism/logging/loggers.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import re 3 | import sys 4 | from io import StringIO 5 | from logging import Formatter, Handler, StreamHandler 6 | from logging.handlers import RotatingFileHandler 7 | from pathlib import Path 8 | from typing import List, Literal, Optional, Union 9 | 10 | from rich.console import Console 11 | from rich.logging import RichHandler 12 | from rich.theme import Theme 13 | 14 | CONSOLE: Console 15 | DEFAULT_LOGGER: logging.Logger 16 | 17 | 18 | LOGFORMAT = "%(asctime)s | %(message)s" 19 | LOGFORMAT_RICH = "| %(message)s" 20 | 21 | 22 | def escape_rich_formatting(string: str) -> str: 23 | """ 24 | Replace Rich formatting e.g., [blue]...[/blue]. This cleans up the logs we save to a 25 | file. 26 | """ 27 | return re.sub(r"\[/?[a-z]+\]", "", string) 28 | 29 | 30 | def console_print(msg: Union[List[str], str], **kwargs) -> None: 31 | """ 32 | Thin wrapper around `console.print(...)` in order to add the printed messages to our 33 | logs. 34 | """ 35 | try: 36 | fh = DEFAULT_LOGGER.handlers[0] 37 | except IndexError: 38 | if not isinstance(msg, str): 39 | CONSOLE.print("\n" + "".join(msg), **kwargs) 40 | else: 41 | CONSOLE.print(msg, **kwargs) 42 | return None 43 | 44 | # For tracebacks 45 | if not isinstance(msg, str): 46 | msg = "\n" + "".join(msg) 47 | record = logging.LogRecord( 48 | name="", 49 | level=40, 50 | pathname="", 51 | lineno=0, 52 | msg=msg, 53 | args=None, 54 | exc_info=None, 55 | ) 56 | fh.emit(record) 57 | 58 | else: 59 | CONSOLE.print(msg, **kwargs) 60 | msg_no_formatting = escape_rich_formatting(msg) 61 | 62 | # If the message is a header / tail rule, then ignore 63 | if len(re.findall(r"^\─+$", msg_no_formatting)): 64 | return None 65 | 66 | # Silently log the message. We only silently log `info` messages. 67 | record = logging.LogRecord( 68 | name="", 69 | level=20, 70 | pathname="", 71 | lineno=0, 72 | msg=msg_no_formatting, 73 | args=None, 74 | exc_info=None, 75 | ) 76 | fh.emit(record) 77 | return None 78 | 79 | 80 | class FileHandlerFormatter(logging.Formatter): 81 | def format(self, record): 82 | if re.findall(r"^\s+$", record.msg) or record.msg == "": 83 | formatter = logging.Formatter("%(message)s", "%H:%M:%S") 84 | else: 85 | formatter = logging.Formatter(LOGFORMAT, "%H:%M:%S") 86 | return escape_rich_formatting(formatter.format(record)) 87 | 88 | 89 | def set_up_logger( 90 | log_level: Literal["info", "warning", "error", "debug", "critical"], 91 | fpath: Optional[Union[str, Path, StringIO]], 92 | rich_logging: bool = True, 93 | ): 94 | if globals().get("DEFAULT_LOGGER", None) is None: 95 | global CONSOLE 96 | global DEFAULT_LOGGER 97 | 98 | # Instantiate Console 99 | CONSOLE = Console( 100 | highlight=False, 101 | theme=Theme( 102 | { 103 | "logging.level.info": "cyan", 104 | "logging.level.warning": "yellow", 105 | "logging.level.error": "red", 106 | "logging.level.debug": "orange1", 107 | } 108 | ), 109 | file=fpath if isinstance(fpath, StringIO) else sys.stdout, 110 | ) 111 | 112 | # Instantiate Rich handler 113 | handlers: List[Handler] = [] 114 | if rich_logging: 115 | rh = RichHandler( 116 | rich_tracebacks=True, 117 | tracebacks_width=120, 118 | show_path=False, 119 | omit_repeated_times=False, 120 | console=CONSOLE, 121 | markup=True, 122 | log_time_format="[%X]", 123 | highlighter=None, 124 | ) 125 | rh.setFormatter(Formatter(LOGFORMAT_RICH)) 126 | handlers.append(rh) 127 | else: 128 | sh = StreamHandler(sys.stdout) 129 | sh.setFormatter(FileHandlerFormatter()) 130 | handlers.append(sh) 131 | 132 | # We also want to save our logs on disk, unless the inputted file is a StringIO 133 | # class (used in our tests). 134 | if fpath and not isinstance(fpath, StringIO): 135 | file_handler = RotatingFileHandler( 136 | filename=fpath, 137 | maxBytes=1024 * 1024 * 10, 138 | backupCount=10, # 10Mb 139 | ) 140 | file_handler.setFormatter(FileHandlerFormatter()) 141 | handlers.append(file_handler) 142 | 143 | logging.basicConfig( 144 | level=log_level.upper(), 145 | format=LOGFORMAT, 146 | handlers=handlers, 147 | ) 148 | DEFAULT_LOGGER = logging.getLogger("prism") 149 | if fpath and not isinstance(fpath, StringIO): 150 | DEFAULT_LOGGER.addHandler(file_handler) 151 | -------------------------------------------------------------------------------- /prism/runtime/__init__.py: -------------------------------------------------------------------------------- 1 | from .current_run import * # noqa: F401, F403 2 | -------------------------------------------------------------------------------- /prism/runtime/current_run.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Dict, List, Optional, Union 2 | 3 | # Prism-specific imports 4 | from prism.connectors.base import Connector 5 | 6 | 7 | class _CurrentRun: 8 | """ 9 | THIS CLASS SHOULD NEVER BE IMPORTED DIRECTLY. INSTEAD, IMPORT `CurrentRun`, i.e., 10 | ```python 11 | from prism.runtime import CurrentRun 12 | ``` 13 | 14 | CurrentRun stores contextual variables for that tasks can access during runtime. 15 | This includes variables that the user specifies in their client definition and 16 | connectors. 17 | """ 18 | 19 | run_id: str 20 | _data: Dict[str, Any] 21 | _refs: Dict[str, Any] 22 | connectors: Dict[str, Connector] 23 | 24 | def __init__(self, data: Dict[str, Any]): 25 | self._data = data 26 | self._refs = {} 27 | self.connectors = {} 28 | 29 | def _set_connectors(self, connectors: List[Connector]): 30 | for conn in connectors: 31 | self.connectors[conn.id] = conn 32 | 33 | def _setup( 34 | self, 35 | run_id: str, 36 | runtime_ctx: Dict[str, Any], 37 | connectors: List[Union[Connector]], 38 | ): 39 | self.run_id = run_id 40 | self._data.update(runtime_ctx) 41 | self._set_connectors(connectors) 42 | 43 | def ctx(self, key: str, default_value: Optional[Any] = None) -> Any: 44 | """ 45 | Get the value associated with context variable `key`. Context variables can be 46 | set in two places: when instantiated the PrismProject (with the `ctx` keyword 47 | argument) and when creating the run (with the `runtime_ctx` keyword argument 48 | in the PrismProject's `run` method). 49 | 50 | args: 51 | key: variable to retrieve 52 | default_value: default value to return if `key` is not found. 53 | Default is `None` 54 | returns: 55 | value associated with context variable `key` 56 | """ 57 | return self._data.get(key, default_value) 58 | 59 | def _set_task_output_value(self, task_id: str, value: Any): 60 | self._refs[task_id] = value 61 | 62 | def ref(self, task_id: str) -> Any: 63 | """ 64 | Get the output of task with ID `task_id` 65 | 66 | args: 67 | task_id: ID of task from which to retrieve output 68 | returns: 69 | the output of the inputted `task_id` 70 | raises: 71 | prism.exception.RefDoesNotExistException if the task ID is not found 72 | """ 73 | if task_id not in self._refs.keys(): 74 | raise ValueError(f"task ID `{task_id}` not found in run `{self.run_id}`!") 75 | return self._refs[task_id] 76 | 77 | def conn(self, connector_id: str) -> Connector: 78 | """ 79 | Get the connector object associated with ID `connector_id`. These are defined in 80 | the client's instantiation. 81 | 82 | args: 83 | connector_id: ID of task from which to retrieve output 84 | returns: 85 | connector object associated with `connector_id` 86 | raises: 87 | prism.exception.ConnectorDoesNotExistException if the connector ID is not 88 | found 89 | """ 90 | if connector_id not in self.connectors.keys(): 91 | raise ValueError( 92 | f"connector ID `{connector_id}` not found run `{self.run_id}`!" 93 | ) 94 | return self.connectors[connector_id] 95 | 96 | 97 | if __name__ != "__main__": 98 | # Create a `CurrentRun` object. This is the object that users import within their 99 | # task modules. Here, we are relying on Python's import caching to ensure that the 100 | # refs persist across tasks. When the user creates a run, we automatically import 101 | # this module and create the `CurrentRun` object. Then, when users import CurrentRun 102 | # within their tasks, Python will not re-import and re-create a new CurrentRun 103 | # object. Rather, it will recognize that a CurrentRun object already exists and use 104 | # that for computations. When we run a task, we update the CurrentRun's `data` 105 | # attribute with that task's output — this allows a task's output to persist across 106 | # tasks. 107 | CurrentRun: _CurrentRun = _CurrentRun({}) 108 | -------------------------------------------------------------------------------- /prism/target.py: -------------------------------------------------------------------------------- 1 | """ 2 | Target class definition. 3 | 4 | Table of Contents 5 | - Imports 6 | - Class definitions 7 | - Target decorators 8 | """ 9 | 10 | ########### 11 | # Imports # 12 | ########### 13 | 14 | # Prism imports 15 | import prism.exceptions 16 | 17 | ##################### 18 | # Class definitions # 19 | ##################### 20 | 21 | 22 | class PrismTarget: 23 | def __init__(self, obj, loc): 24 | self.obj = obj 25 | self.loc = loc 26 | 27 | def save(self): 28 | raise prism.exceptions.RuntimeException(message="`save` method not implemented") 29 | 30 | @classmethod 31 | def from_args(cls, obj, loc): 32 | return cls(obj, loc) 33 | 34 | @classmethod 35 | def open(cls, loc): 36 | raise prism.exceptions.RuntimeException(message="`open` method not implemented") 37 | 38 | 39 | class PandasCsv(PrismTarget): 40 | def save(self, **kwargs): 41 | self.obj.to_csv(self.loc, **kwargs) 42 | 43 | @classmethod 44 | def open(cls, loc): 45 | import pandas as pd 46 | 47 | obj = pd.read_csv(loc) 48 | return cls(obj, loc) 49 | 50 | 51 | class NumpyTxt(PrismTarget): 52 | def save(self, **kwargs): 53 | import numpy as np 54 | 55 | np.savetxt(self.loc, self.obj, **kwargs) 56 | 57 | @classmethod 58 | def open(cls, loc): 59 | import numpy as np 60 | 61 | obj = np.loadtxt(loc) 62 | return cls(obj, loc) 63 | 64 | 65 | class Txt(PrismTarget): 66 | def save(self, **kwargs): 67 | with open(self.loc, "w") as f: 68 | f.write(self.obj, **kwargs) 69 | f.close() 70 | 71 | @classmethod 72 | def open(cls, loc): 73 | with open(loc, "r") as f: 74 | obj = f.read() 75 | return cls(obj, loc) 76 | 77 | 78 | class MatplotlibPNG(PrismTarget): 79 | def save(self, **kwargs): 80 | self.obj.savefig(self.loc, **kwargs) 81 | 82 | @classmethod 83 | def open(cls, loc): 84 | from PIL import Image 85 | 86 | obj = Image.open(loc) 87 | return cls(obj, loc) 88 | 89 | 90 | class JSON(PrismTarget): 91 | def save(self, **kwargs): 92 | import json 93 | 94 | json_object = json.dumps(self.obj, **kwargs) 95 | with open(self.loc, "w") as f: 96 | f.write(json_object) 97 | 98 | @classmethod 99 | def open(cls, loc): 100 | import json 101 | 102 | with open(loc, "r") as f: 103 | obj = json.loads(f.read()) 104 | return cls(obj, loc) 105 | -------------------------------------------------------------------------------- /prism/task.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | from typing import Any, Callable, List, Optional, Union 3 | 4 | # Prism imports 5 | import prism.exceptions 6 | import prism.target 7 | 8 | 9 | class PrismTask: 10 | retries: int 11 | retry_delay_seconds: int 12 | 13 | def __init__( 14 | self, 15 | task_id: str, 16 | func: Optional[Callable[..., Any]] = None, 17 | bool_run: bool = True, 18 | ): 19 | """ 20 | Create an instance of the PrismTask. The class immediately calls the `run` 21 | function and assigns the result to the `output` attribute. 22 | """ 23 | self.task_id = task_id 24 | self.func = func 25 | self.bool_run = bool_run 26 | 27 | # Tyeps, locs, and kwargs for target 28 | self.types: List[prism.target.PrismTarget] = [] 29 | self.locs: List[Union[str, Path]] = [] 30 | self.kwargs: List[Any] = [] 31 | 32 | # Retries 33 | self.retries = 0 34 | self.retry_delay_seconds = 0 35 | 36 | # Initialize the is_done attribute 37 | self.is_done: bool = False 38 | 39 | def exec(self): 40 | # If the `target` decorator isn't applied, then only execute the `run` function 41 | # of bool_run is true 42 | if self.run.__name__ == "run" and not self.is_done: 43 | # If bool_run, then execute the `run` function and set the `output` 44 | # attribute to its result 45 | if self.bool_run: 46 | self.output = self.run() 47 | if self.output is None: 48 | raise prism.exceptions.RuntimeException( 49 | "`run` method must produce a non-null output" 50 | ) 51 | 52 | # If the code reaches this stage, then the user is attempting to use this 53 | # tasks output without explicitly running the task or setting a target. We 54 | # will throw an error in the get_output() method. 55 | else: 56 | self.output = None 57 | 58 | # Otherwise, the decorator uses bool_run in its internal computation 59 | else: 60 | self.output = self.run() 61 | if self.output is None: 62 | raise prism.exceptions.RuntimeException( 63 | "`run` method must produce a non-null output" 64 | ) 65 | 66 | def done(self) -> bool: 67 | return False 68 | 69 | def run(self): 70 | if self.func is not None: 71 | return self.func() 72 | else: 73 | raise prism.exceptions.RuntimeException("`run` method not implemented") 74 | 75 | def get_output(self): 76 | """ 77 | Return the output attribute 78 | """ 79 | # If self.output is None, then the user has not specified a target nor have they 80 | # explicitly run the task. 81 | if self.output is None: 82 | msg = f"cannot access the output of `{self.__class__.__name__}` without either explicitly running task or setting a target" # noqa: E501 83 | raise prism.exceptions.RuntimeException(message=msg) 84 | return self.output 85 | -------------------------------------------------------------------------------- /prism/templates/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/runprism/prism/a6a762a1ce44a696b1dd6be793726974b9c80c82/prism/templates/__init__.py -------------------------------------------------------------------------------- /prism/templates/starter_project/.gitignore: -------------------------------------------------------------------------------- 1 | /.ipynb_checkpoints -------------------------------------------------------------------------------- /prism/templates/starter_project/__init__.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | STARTER_PROJECT_TEMPLATE_DIR = os.path.dirname(__file__) 4 | -------------------------------------------------------------------------------- /prism/templates/starter_project/main.py: -------------------------------------------------------------------------------- 1 | """Entrypoint for your Prism project.""" 2 | 3 | from pathlib import Path 4 | 5 | from prism.client import PrismProject 6 | 7 | # Project 8 | project = PrismProject( 9 | version="1.0", 10 | tasks_dir=Path.cwd() / "tasks", 11 | concurrency=2, 12 | ctx={"OUTPUT": Path.cwd() / "output"}, 13 | ) 14 | 15 | 16 | # Run 17 | if __name__ == "__main__": 18 | project.run() 19 | -------------------------------------------------------------------------------- /prism/templates/starter_project/output/.exists: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/runprism/prism/a6a762a1ce44a696b1dd6be793726974b9c80c82/prism/templates/starter_project/output/.exists -------------------------------------------------------------------------------- /prism/templates/starter_project/tasks/class_task.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | import prism.decorators 4 | import prism.target 5 | 6 | # Prism imports 7 | import prism.task 8 | from prism.runtime import CurrentRun 9 | 10 | 11 | class ExampleTask(prism.task.PrismTask): 12 | task_id = "example-class-task" 13 | 14 | # Run 15 | @prism.decorators.target( 16 | type=prism.target.Txt, 17 | loc=Path(CurrentRun.ctx("OUTPUT", "output")).resolve() / "hello_world.txt", 18 | ) 19 | def run(self): 20 | return "Hello, world!" 21 | -------------------------------------------------------------------------------- /prism/templates/starter_project/tasks/decorated_task.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | # Prism imports 4 | import prism.target 5 | from prism.decorators import target, task 6 | from prism.runtime import CurrentRun 7 | 8 | 9 | @task( 10 | task_id="example-decorated-task", 11 | targets=[ 12 | target( 13 | type=prism.target.Txt, 14 | loc=Path(CurrentRun.ctx("OUTPUT", "output")).resolve() / "hello_world.txt", 15 | ) 16 | ], 17 | ) 18 | def example_task(): 19 | return "Hello, world!" 20 | -------------------------------------------------------------------------------- /prism/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/runprism/prism/a6a762a1ce44a696b1dd6be793726974b9c80c82/prism/tests/__init__.py -------------------------------------------------------------------------------- /prism/tests/integration/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/runprism/prism/a6a762a1ce44a696b1dd6be793726974b9c80c82/prism/tests/integration/__init__.py -------------------------------------------------------------------------------- /prism/tests/integration/additional_package/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/runprism/prism/a6a762a1ce44a696b1dd6be793726974b9c80c82/prism/tests/integration/additional_package/__init__.py -------------------------------------------------------------------------------- /prism/tests/integration/additional_package/cli_callbacks.py: -------------------------------------------------------------------------------- 1 | def print_success(): 2 | return "Success!" 3 | -------------------------------------------------------------------------------- /prism/tests/integration/additional_package/cli_connectors.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from prism.connectors import ( 4 | PostgresConnector, 5 | SnowflakeConnector, 6 | ) 7 | 8 | 9 | postgres_connector = PostgresConnector( 10 | id="postgres-connector", 11 | user=os.environ.get("POSTGRES_USER"), 12 | password=os.environ.get("POSTGRES_PASSWORD"), 13 | port=5432, 14 | host=os.environ.get("POSTGRES_HOST"), 15 | database=os.environ.get("POSTGRES_DB"), 16 | autocommit=True, 17 | ) 18 | 19 | 20 | snowflake_connector = SnowflakeConnector( 21 | id="snowflake-connector", 22 | user=os.environ.get("SNOWFLAKE_USER"), 23 | password=os.environ.get("SNOWFLAKE_PASSWORD"), 24 | account=os.environ.get("SNOWFLAKE_ACCOUNT"), 25 | role=os.environ.get("SNOWFLAKE_ROLE"), 26 | warehouse=os.environ.get("SNOWFLAKE_WAREHOUSE"), 27 | database=os.environ.get("SNOWFLAKE_DATABASE"), 28 | schema=os.environ.get("SNOWFLAKE_SCHEMA"), 29 | ) 30 | -------------------------------------------------------------------------------- /prism/tests/integration/additional_package/utils.py: -------------------------------------------------------------------------------- 1 | def task1_return(): 2 | return "Hello from module01.Task01" 3 | -------------------------------------------------------------------------------- /prism/tests/integration/integration_utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | from pathlib import Path 3 | import time 4 | from typing import Optional 5 | 6 | import prism.logging.loggers 7 | import prism.logging.events 8 | 9 | 10 | def _previous_console_output(): 11 | try: 12 | output_str = prism.logging.loggers.CONSOLE.file.getvalue() 13 | return output_str 14 | except AttributeError: 15 | return "" 16 | 17 | 18 | def _mock_fire_console_event( 19 | event: Optional[prism.logging.events.Event], sleep=0.01, log_level: str = "info" 20 | ): 21 | if event: 22 | prism.logging.loggers.CONSOLE.print(event.message()) 23 | time.sleep(sleep) 24 | 25 | 26 | def _console_mocker(monkeypatch): 27 | monkeypatch.setattr( 28 | "prism.logging.events.fire_console_event", _mock_fire_console_event 29 | ) 30 | 31 | 32 | def _remove_files_in_output(wkdir): 33 | """ 34 | Remove file outputs from `output` folder of project 35 | """ 36 | for _file in Path(wkdir / "output").iterdir(): 37 | if Path(wkdir / "output" / _file).is_file() and _file.name != ".exists": 38 | os.unlink(_file) 39 | 40 | 41 | def _file_as_str(path): 42 | """ 43 | Open file as string 44 | """ 45 | with open(path, "r") as f: 46 | compiled_task_str = f.read() 47 | f.close() 48 | return compiled_task_str 49 | -------------------------------------------------------------------------------- /prism/tests/integration/test_cli.py: -------------------------------------------------------------------------------- 1 | # Standard library imports 2 | from io import StringIO 3 | import json 4 | import os 5 | from pathlib import Path 6 | import shutil 7 | 8 | # Prism imports 9 | from prism.main import cli 10 | import prism.logging.loggers 11 | from prism.tests.integration.integration_utils import ( 12 | _previous_console_output, 13 | _remove_files_in_output, 14 | _console_mocker, 15 | _file_as_str, 16 | ) 17 | 18 | 19 | # Directory containing all prism_project.py test cases 20 | TEST_CASE_WKDIR = os.path.dirname(__file__) 21 | TEST_PROJECTS = Path(TEST_CASE_WKDIR) / "test_projects" 22 | 23 | 24 | # Tests 25 | def test_init(monkeypatch): 26 | # Set up the logger 27 | prism.logging.loggers.set_up_logger( 28 | "info", 29 | StringIO(), 30 | ) 31 | 32 | # Set working directory 33 | os.chdir(TEST_PROJECTS) 34 | _console_mocker(monkeypatch) 35 | 36 | # Remove folder '001_init' if it already exists 37 | init_path = Path(TEST_PROJECTS) / "001_init" 38 | if init_path.is_dir(): 39 | shutil.rmtree(init_path) 40 | 41 | # Execute the CLI command 42 | args = ["init", "--project-name", "001_init"] 43 | _ = cli(args, standalone_mode=False) 44 | assert init_path.is_dir() 45 | 46 | # Change to the parent directory 47 | os.chdir(TEST_PROJECTS.parent) 48 | 49 | 50 | def test_run_normal(monkeypatch): 51 | wkdir = TEST_PROJECTS / "005_simple_project_no_null_tasks" 52 | _console_mocker(monkeypatch) 53 | 54 | # Remove output 55 | _remove_files_in_output(wkdir) 56 | assert not Path(wkdir / "output" / "task01.txt").is_file() 57 | assert not Path(wkdir / "output" / "task02.txt").is_file() 58 | 59 | # Execute the CLI command 60 | output_dir = wkdir / "output" 61 | args = [ 62 | "run", 63 | "--tasks-dir", 64 | str(wkdir / "tasks"), 65 | "--disable-rich-logging", 66 | "--runtime-ctx", 67 | json.dumps({"OUTPUT": str(output_dir)}), 68 | ] 69 | _ = cli(args, standalone_mode=False) 70 | 71 | # Check outputs 72 | task01_txt = _file_as_str(Path(wkdir / "output" / "task01.txt")) 73 | task02_txt = _file_as_str(Path(wkdir / "output" / "task02.txt")) 74 | task01_txt = _file_as_str(Path(wkdir / "output" / "task01.txt")) 75 | task02_txt = _file_as_str(Path(wkdir / "output" / "task02.txt")) 76 | assert "Hello from task 1!" == task01_txt 77 | assert "Hello from task 1!" + "\n" + "Hello from task 2!" == task02_txt 78 | 79 | 80 | def test_run_callbacks(monkeypatch): 81 | wkdir = TEST_PROJECTS / "005_simple_project_no_null_tasks" 82 | _console_mocker(monkeypatch) 83 | 84 | # Previous output 85 | prev_console_output = _previous_console_output() 86 | 87 | # Remove output 88 | _remove_files_in_output(wkdir) 89 | assert not Path(wkdir / "output" / "task01.txt").is_file() 90 | assert not Path(wkdir / "output" / "task02.txt").is_file() 91 | 92 | # Execute the CLI command 93 | output_dir = wkdir / "output" 94 | args = [ 95 | "run", 96 | "--tasks-dir", 97 | str(wkdir / "tasks"), 98 | "--on-success", 99 | "additional_package.cli_callbacks.print_success", 100 | "--disable-rich-logging", 101 | "--runtime-ctx", 102 | json.dumps({"OUTPUT": str(output_dir)}), 103 | ] 104 | _ = cli(args, standalone_mode=False) 105 | 106 | # Check outputs 107 | task01_txt = _file_as_str(Path(wkdir / "output" / "task01.txt")) 108 | task02_txt = _file_as_str(Path(wkdir / "output" / "task02.txt")) 109 | task01_txt = _file_as_str(Path(wkdir / "output" / "task01.txt")) 110 | task02_txt = _file_as_str(Path(wkdir / "output" / "task02.txt")) 111 | assert "Hello from task 1!" == task01_txt 112 | assert "Hello from task 1!" + "\n" + "Hello from task 2!" == task02_txt 113 | 114 | # Logs 115 | output_str = prism.logging.loggers.CONSOLE.file.getvalue() # type: ignore 116 | output_str = output_str.replace(prev_console_output, "") 117 | for i in range(1, 5): 118 | assert f"RUNNING TASK module0{i}.Task0{i}" in output_str 119 | assert f"FINISHED TASK module0{i}.Task0{i}" in output_str 120 | assert "Running on_success callbacks" in output_str 121 | assert "FINISHED running print_success callback" in output_str 122 | 123 | 124 | def test_run_connectors(monkeypatch): 125 | wkdir = TEST_PROJECTS / "013_connectors" 126 | _console_mocker(monkeypatch) 127 | 128 | # Remove output 129 | _remove_files_in_output(wkdir) 130 | 131 | # Execute the CLI command 132 | output_dir = wkdir / "output" 133 | args = [ 134 | "run", 135 | "--tasks-dir", 136 | str(wkdir / "tasks"), 137 | "--task", 138 | "snowflake_task.SnowflakeTask", 139 | "--task", 140 | "postgres_task.PostgresTask", 141 | "--connector", 142 | "additional_package.cli_connectors.snowflake_connector", 143 | "--connector", 144 | "additional_package.cli_connectors.postgres_connector", 145 | "--disable-rich-logging", 146 | "--runtime-ctx", 147 | json.dumps({"OUTPUT": str(output_dir)}), 148 | ] 149 | _ = cli(args, standalone_mode=False) 150 | 151 | # Check output 152 | assert (wkdir / "output" / "sample_postgres_data.csv").is_file() 153 | assert (wkdir / "output" / "machinery_sample.csv").is_file() 154 | assert (wkdir / "output" / "household_sample.csv").is_file() 155 | _remove_files_in_output(wkdir) 156 | -------------------------------------------------------------------------------- /prism/tests/integration/test_projects/001_init/.gitignore: -------------------------------------------------------------------------------- 1 | /.ipynb_checkpoints -------------------------------------------------------------------------------- /prism/tests/integration/test_projects/001_init/__init__.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | STARTER_PROJECT_TEMPLATE_DIR = os.path.dirname(__file__) 4 | -------------------------------------------------------------------------------- /prism/tests/integration/test_projects/001_init/main.py: -------------------------------------------------------------------------------- 1 | """Entrypoint for your Prism project.""" 2 | 3 | from pathlib import Path 4 | 5 | from prism.client import PrismProject 6 | 7 | # Project 8 | project = PrismProject( 9 | version="1.0", 10 | tasks_dir=Path.cwd() / "tasks", 11 | concurrency=2, 12 | ctx={"OUTPUT": Path.cwd() / "output"}, 13 | ) 14 | 15 | 16 | # Run 17 | if __name__ == "__main__": 18 | project.run() 19 | -------------------------------------------------------------------------------- /prism/tests/integration/test_projects/001_init/output/.exists: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/runprism/prism/a6a762a1ce44a696b1dd6be793726974b9c80c82/prism/tests/integration/test_projects/001_init/output/.exists -------------------------------------------------------------------------------- /prism/tests/integration/test_projects/001_init/tasks/class_task.py: -------------------------------------------------------------------------------- 1 | import prism.task 2 | import prism.target 3 | import prism.decorators 4 | from prism.runtime import CurrentRun 5 | 6 | 7 | class ExampleTask(prism.task.PrismTask): 8 | task_id = "example-class-task" 9 | 10 | # Run 11 | @prism.decorators.target( 12 | type=prism.target.Txt, loc=CurrentRun.ctx("OUTPUT") / "hello_world.txt" 13 | ) 14 | def run(self): 15 | return "Hello, world!" 16 | -------------------------------------------------------------------------------- /prism/tests/integration/test_projects/001_init/tasks/decorated_task.py: -------------------------------------------------------------------------------- 1 | import prism.target 2 | from prism.decorators import ( 3 | task, 4 | target, 5 | ) 6 | from prism.runtime import CurrentRun 7 | 8 | 9 | @task( 10 | task_id="example-decorated-task", 11 | targets=[ 12 | target(type=prism.target.Txt, loc=CurrentRun.ctx("OUTPUT") / "hello_world.txt") 13 | ], 14 | ) 15 | def example_task(): 16 | return "Hello, world!" 17 | -------------------------------------------------------------------------------- /prism/tests/integration/test_projects/004_simple_project/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/runprism/prism/a6a762a1ce44a696b1dd6be793726974b9c80c82/prism/tests/integration/test_projects/004_simple_project/__init__.py -------------------------------------------------------------------------------- /prism/tests/integration/test_projects/004_simple_project/dev/dev.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "7eafd19a", 6 | "metadata": {}, 7 | "source": [ 8 | "**Use this notebook for developing code before productionizing it within tasks**" 9 | ] 10 | }, 11 | { 12 | "cell_type": "code", 13 | "execution_count": null, 14 | "id": "b771bf53", 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [ 18 | "## CODE HERE..." 19 | ] 20 | } 21 | ], 22 | "metadata": { 23 | "kernelspec": { 24 | "display_name": "Python 3", 25 | "language": "python", 26 | "name": "python3" 27 | }, 28 | "language_info": { 29 | "codemirror_mode": { 30 | "name": "ipython", 31 | "version": 3 32 | }, 33 | "file_extension": ".py", 34 | "mimetype": "text/x-python", 35 | "name": "python", 36 | "nbconvert_exporter": "python", 37 | "pygments_lexer": "ipython3", 38 | "version": "3.7.4" 39 | } 40 | }, 41 | "nbformat": 4, 42 | "nbformat_minor": 5 43 | } 44 | -------------------------------------------------------------------------------- /prism/tests/integration/test_projects/004_simple_project/modules/module01.py: -------------------------------------------------------------------------------- 1 | import prism.decorators 2 | import prism.target 3 | import prism.task 4 | from prism.runtime import CurrentRun 5 | 6 | 7 | class Task01(prism.task.PrismTask): 8 | # Run 9 | @prism.decorators.target( 10 | type=prism.target.Txt, loc=CurrentRun.ctx("OUTPUT") / "task01.txt" 11 | ) 12 | def run(self): 13 | return "Hello from task 1!" 14 | -------------------------------------------------------------------------------- /prism/tests/integration/test_projects/004_simple_project/modules/module02.py: -------------------------------------------------------------------------------- 1 | import prism.decorators 2 | import prism.target 3 | import prism.task 4 | from prism.runtime import CurrentRun 5 | 6 | 7 | class Task02(prism.task.PrismTask): 8 | # Run 9 | @prism.decorators.target( 10 | type=prism.target.Txt, loc=CurrentRun.ctx("OUTPUT") / "task01.txt" 11 | ) 12 | def run(self): 13 | lines = CurrentRun.ref("module01.Task01") 14 | return lines[-5:] 15 | -------------------------------------------------------------------------------- /prism/tests/integration/test_projects/004_simple_project/modules/module03.py: -------------------------------------------------------------------------------- 1 | import prism.task 2 | 3 | 4 | class Task03(prism.task.PrismTask): 5 | # Run 6 | def run(self): 7 | # TODO: Implement the `run` method 8 | return None 9 | -------------------------------------------------------------------------------- /prism/tests/integration/test_projects/004_simple_project/output/.exists: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/runprism/prism/a6a762a1ce44a696b1dd6be793726974b9c80c82/prism/tests/integration/test_projects/004_simple_project/output/.exists -------------------------------------------------------------------------------- /prism/tests/integration/test_projects/005_simple_project_no_null_tasks/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/runprism/prism/a6a762a1ce44a696b1dd6be793726974b9c80c82/prism/tests/integration/test_projects/005_simple_project_no_null_tasks/__init__.py -------------------------------------------------------------------------------- /prism/tests/integration/test_projects/005_simple_project_no_null_tasks/output/task01.txt: -------------------------------------------------------------------------------- 1 | Hello from task 1! -------------------------------------------------------------------------------- /prism/tests/integration/test_projects/005_simple_project_no_null_tasks/output/task02.txt: -------------------------------------------------------------------------------- 1 | Hello from task 1! 2 | Hello from task 2! -------------------------------------------------------------------------------- /prism/tests/integration/test_projects/005_simple_project_no_null_tasks/tasks/module01.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | import prism.decorators 4 | import prism.target 5 | 6 | # Prism imports 7 | import prism.task 8 | from prism.runtime import CurrentRun 9 | 10 | 11 | class Task01(prism.task.PrismTask): 12 | # Run 13 | @prism.decorators.target( 14 | type=prism.target.Txt, loc=Path(CurrentRun.ctx("OUTPUT")) / "task01.txt" 15 | ) 16 | def run(self): 17 | return "Hello from task 1!" 18 | -------------------------------------------------------------------------------- /prism/tests/integration/test_projects/005_simple_project_no_null_tasks/tasks/module02.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | import prism.decorators 4 | import prism.target 5 | 6 | # Prism imports 7 | import prism.task 8 | from prism.runtime import CurrentRun 9 | 10 | 11 | class Task02(prism.task.PrismTask): 12 | # Run 13 | @prism.decorators.target( 14 | type=prism.target.Txt, loc=Path(CurrentRun.ctx("OUTPUT")) / "task02.txt" 15 | ) 16 | def run(self): 17 | lines = CurrentRun.ref("module01.Task01") 18 | return lines + "\n" + "Hello from task 2!" 19 | -------------------------------------------------------------------------------- /prism/tests/integration/test_projects/005_simple_project_no_null_tasks/tasks/module03.py: -------------------------------------------------------------------------------- 1 | # Prism imports 2 | import prism.decorators 3 | import prism.target 4 | import prism.task 5 | from prism.runtime import CurrentRun 6 | 7 | 8 | class Task03(prism.task.PrismTask): 9 | # Run 10 | def run(self): 11 | lines = CurrentRun.ref("module02.Task02") 12 | return lines + "\n" + "Hello from task 3!" 13 | -------------------------------------------------------------------------------- /prism/tests/integration/test_projects/005_simple_project_no_null_tasks/tasks/module04.py: -------------------------------------------------------------------------------- 1 | # Prism imports 2 | import prism.decorators 3 | import prism.target 4 | import prism.task 5 | from prism.runtime import CurrentRun 6 | 7 | 8 | class Task04(prism.task.PrismTask): 9 | # Run 10 | def run(self): 11 | return CurrentRun.ref("module03.Task03") + "\n" + "Hello from task 4!" 12 | -------------------------------------------------------------------------------- /prism/tests/integration/test_projects/010_project_nested_module_dirs/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/runprism/prism/a6a762a1ce44a696b1dd6be793726974b9c80c82/prism/tests/integration/test_projects/010_project_nested_module_dirs/__init__.py -------------------------------------------------------------------------------- /prism/tests/integration/test_projects/010_project_nested_module_dirs/dev/dev.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "7eafd19a", 6 | "metadata": {}, 7 | "source": [ 8 | "**Use this notebook for developing code before productionizing it within tasks**" 9 | ] 10 | }, 11 | { 12 | "cell_type": "code", 13 | "execution_count": null, 14 | "id": "b771bf53", 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [ 18 | "## CODE HERE..." 19 | ] 20 | } 21 | ], 22 | "metadata": { 23 | "kernelspec": { 24 | "display_name": "Python 3", 25 | "language": "python", 26 | "name": "python3" 27 | }, 28 | "language_info": { 29 | "codemirror_mode": { 30 | "name": "ipython", 31 | "version": 3 32 | }, 33 | "file_extension": ".py", 34 | "mimetype": "text/x-python", 35 | "name": "python", 36 | "nbconvert_exporter": "python", 37 | "pygments_lexer": "ipython3", 38 | "version": "3.7.4" 39 | } 40 | }, 41 | "nbformat": 4, 42 | "nbformat_minor": 5 43 | } 44 | -------------------------------------------------------------------------------- /prism/tests/integration/test_projects/010_project_nested_module_dirs/output/task01.txt: -------------------------------------------------------------------------------- 1 | Hello from task 1! -------------------------------------------------------------------------------- /prism/tests/integration/test_projects/010_project_nested_module_dirs/output/task02.txt: -------------------------------------------------------------------------------- 1 | Hello from task 1! 2 | Hello from task 2! -------------------------------------------------------------------------------- /prism/tests/integration/test_projects/010_project_nested_module_dirs/prism_project.py: -------------------------------------------------------------------------------- 1 | """ 2 | Prism project 3 | """ 4 | 5 | # Imports 6 | import logging 7 | from pathlib import Path 8 | from prism.admin import generate_run_id, generate_run_slug 9 | 10 | 11 | # Project metadata 12 | NAME = "" 13 | AUTHOR = "" 14 | VERSION = "" 15 | DESCRIPTION = """ 16 | """ 17 | 18 | # Admin 19 | RUN_ID = generate_run_id() # don't delete this! 20 | SLUG = generate_run_slug() # don't delete this! 21 | 22 | 23 | # sys.path config. This gives your tasks access to local tasks / packages that exist 24 | # outside of your project structure. 25 | SYS_PATH_CONF = [ 26 | Path(__file__).parent, 27 | Path(__file__).parent.parent, 28 | ] 29 | 30 | 31 | # Thread count: number of workers to use to execute tasks concurrently. If set to 1, 32 | # then 1 task is run at a time. 33 | THREADS = 1 34 | 35 | 36 | # Profile directory and name 37 | PROFILE_YML_PATH = Path(__file__).parent / "profile.yml" 38 | PROFILE = None # name of profile within `profiles.yml` 39 | 40 | 41 | # Logger 42 | PRISM_LOGGER = logging.getLogger("PRISM_LOGGER") 43 | 44 | 45 | # Other variables / parameters. Make sure to capitalize all of these! 46 | VAR_1 = {"a": "b"} 47 | VAR_2 = 200 48 | VAR_3 = "2015-01-01" 49 | 50 | # Paths 51 | WKDIR = Path(__file__).parent 52 | DATA = WKDIR / "data" 53 | OUTPUT = WKDIR / "output" 54 | -------------------------------------------------------------------------------- /prism/tests/integration/test_projects/010_project_nested_module_dirs/tasks/extract/module01.py: -------------------------------------------------------------------------------- 1 | # Pirms imports 2 | import prism.decorators 3 | import prism.target 4 | import prism.task 5 | from prism.runtime import CurrentRun 6 | 7 | 8 | class Task01(prism.task.PrismTask): 9 | # Run 10 | @prism.decorators.target( 11 | type=prism.target.Txt, loc=CurrentRun.ctx("OUTPUT") / "task01.txt" 12 | ) 13 | def run(self): 14 | return "Hello from task 1!" 15 | -------------------------------------------------------------------------------- /prism/tests/integration/test_projects/010_project_nested_module_dirs/tasks/extract/module02.py: -------------------------------------------------------------------------------- 1 | # Prism imports 2 | import prism.decorators 3 | import prism.target 4 | import prism.task 5 | from prism.runtime import CurrentRun 6 | 7 | 8 | class Task02(prism.task.PrismTask): 9 | # Run 10 | @prism.decorators.target( 11 | type=prism.target.Txt, loc=CurrentRun.ctx("OUTPUT") / "task02.txt" 12 | ) 13 | def run(self): 14 | lines = CurrentRun.ref("extract/module01.Task01") 15 | return lines + "\n" + "Hello from task 2!" 16 | -------------------------------------------------------------------------------- /prism/tests/integration/test_projects/010_project_nested_module_dirs/tasks/load/module03.py: -------------------------------------------------------------------------------- 1 | # Prism imports 2 | import prism.decorators 3 | import prism.target 4 | import prism.task 5 | from prism.runtime import CurrentRun 6 | 7 | 8 | class Task03(prism.task.PrismTask): 9 | # Run 10 | def run(self): 11 | lines = CurrentRun.ref("extract/module02.Task02") 12 | return lines + "\n" + "Hello from task 3!" 13 | -------------------------------------------------------------------------------- /prism/tests/integration/test_projects/010_project_nested_module_dirs/tasks/module04.py: -------------------------------------------------------------------------------- 1 | # Prism imports 2 | import prism.decorators 3 | import prism.target 4 | import prism.task 5 | from prism.runtime import CurrentRun 6 | 7 | 8 | class Task04(prism.task.PrismTask): 9 | # Run 10 | def run(self): 11 | return CurrentRun.ref("load/module03.Task03") + "\n" + "Hello from task 4!" 12 | -------------------------------------------------------------------------------- /prism/tests/integration/test_projects/011_bad_task_ref/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/runprism/prism/a6a762a1ce44a696b1dd6be793726974b9c80c82/prism/tests/integration/test_projects/011_bad_task_ref/__init__.py -------------------------------------------------------------------------------- /prism/tests/integration/test_projects/011_bad_task_ref/dev/dev.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "7eafd19a", 6 | "metadata": {}, 7 | "source": [ 8 | "**Use this notebook for developing code before productionizing it within tasks**" 9 | ] 10 | }, 11 | { 12 | "cell_type": "code", 13 | "execution_count": null, 14 | "id": "b771bf53", 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [ 18 | "## CODE HERE..." 19 | ] 20 | } 21 | ], 22 | "metadata": { 23 | "kernelspec": { 24 | "display_name": "Python 3", 25 | "language": "python", 26 | "name": "python3" 27 | }, 28 | "language_info": { 29 | "codemirror_mode": { 30 | "name": "ipython", 31 | "version": 3 32 | }, 33 | "file_extension": ".py", 34 | "mimetype": "text/x-python", 35 | "name": "python", 36 | "nbconvert_exporter": "python", 37 | "pygments_lexer": "ipython3", 38 | "version": "3.7.4" 39 | } 40 | }, 41 | "nbformat": 4, 42 | "nbformat_minor": 5 43 | } 44 | -------------------------------------------------------------------------------- /prism/tests/integration/test_projects/011_bad_task_ref/modules/extract/module01.py: -------------------------------------------------------------------------------- 1 | # Pirms imports 2 | import prism.decorators 3 | import prism.target 4 | import prism.task 5 | from prism.runtime import CurrentRun 6 | 7 | 8 | class Task01(prism.task.PrismTask): 9 | # Run 10 | @prism.decorators.target( 11 | type=prism.target.Txt, loc=CurrentRun.ctx("OUTPUT") / "task01.txt" 12 | ) 13 | def run(self): 14 | return "Hello from task 1!" 15 | -------------------------------------------------------------------------------- /prism/tests/integration/test_projects/011_bad_task_ref/modules/extract/module02.py: -------------------------------------------------------------------------------- 1 | # Prism imports 2 | import prism.decorators 3 | import prism.target 4 | import prism.task 5 | from prism.runtime import CurrentRun 6 | 7 | 8 | class Task02(prism.task.PrismTask): 9 | # Run 10 | @prism.decorators.target( 11 | type=prism.target.Txt, loc=CurrentRun.ctx("OUTPUT") / "task02.txt" 12 | ) 13 | def run(self): 14 | lines = CurrentRun.ref("extract/this_is_an_error") 15 | return lines + "\n" + "Hello from task 2!" 16 | -------------------------------------------------------------------------------- /prism/tests/integration/test_projects/011_bad_task_ref/modules/load/module03.py: -------------------------------------------------------------------------------- 1 | # Prism imports 2 | import prism.decorators 3 | import prism.target 4 | import prism.task 5 | from prism.runtime import CurrentRun 6 | 7 | 8 | class Task03(prism.task.PrismTask): 9 | # Run 10 | def run(self): 11 | lines = CurrentRun.ref("extract/module02.Task02") 12 | return lines + "\n" + "Hello from task 3!" 13 | -------------------------------------------------------------------------------- /prism/tests/integration/test_projects/011_bad_task_ref/modules/module04.py: -------------------------------------------------------------------------------- 1 | # Prism imports 2 | import prism.decorators 3 | import prism.target 4 | import prism.task 5 | from prism.runtime import CurrentRun 6 | 7 | 8 | class Task04(prism.task.PrismTask): 9 | # Run 10 | def run(self): 11 | return CurrentRun.ref("load/module03.Task03") + "\n" + "Hello from task 4!" 12 | -------------------------------------------------------------------------------- /prism/tests/integration/test_projects/011_bad_task_ref/output/module01.txt: -------------------------------------------------------------------------------- 1 | Hello from task 1! -------------------------------------------------------------------------------- /prism/tests/integration/test_projects/011_bad_task_ref/output/module02.txt: -------------------------------------------------------------------------------- 1 | Hello from task 1! 2 | Hello from task 2! -------------------------------------------------------------------------------- /prism/tests/integration/test_projects/011_bad_task_ref/prism_project.py: -------------------------------------------------------------------------------- 1 | """ 2 | Prism project 3 | """ 4 | 5 | # Imports 6 | import logging 7 | from pathlib import Path 8 | from prism.admin import generate_run_id, generate_run_slug 9 | 10 | 11 | # Project metadata 12 | NAME = "" 13 | AUTHOR = "" 14 | VERSION = "" 15 | DESCRIPTION = """ 16 | """ 17 | 18 | # Admin 19 | RUN_ID = generate_run_id() # don't delete this! 20 | SLUG = generate_run_slug() # don't delete this! 21 | 22 | 23 | # sys.path config. This gives your tasks access to local tasks / packages that exist 24 | # outside of your project structure. 25 | SYS_PATH_CONF = [ 26 | Path(__file__).parent, 27 | Path(__file__).parent.parent, 28 | ] 29 | 30 | 31 | # Thread count: number of workers to use to execute tasks concurrently. If set to 1, 32 | # then 1 task is run at a time. 33 | THREADS = 1 34 | 35 | 36 | # Profile directory and name 37 | PROFILE_YML_PATH = Path(__file__).parent / "profile.yml" 38 | PROFILE = None # name of profile within `profiles.yml` 39 | 40 | 41 | # Logger 42 | PRISM_LOGGER = logging.getLogger("PRISM_LOGGER") 43 | 44 | 45 | # Other variables / parameters. Make sure to capitalize all of these! 46 | VAR_1 = {"a": "b"} 47 | VAR_2 = 200 48 | VAR_3 = "2015-01-01" 49 | 50 | # Paths 51 | WKDIR = Path(__file__).parent 52 | DATA = WKDIR / "data" 53 | OUTPUT = WKDIR / "output" 54 | -------------------------------------------------------------------------------- /prism/tests/integration/test_projects/012_concurrency/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/runprism/prism/a6a762a1ce44a696b1dd6be793726974b9c80c82/prism/tests/integration/test_projects/012_concurrency/__init__.py -------------------------------------------------------------------------------- /prism/tests/integration/test_projects/012_concurrency/dev/dev.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "7eafd19a", 6 | "metadata": {}, 7 | "source": [ 8 | "**Use this notebook for developing code before productionizing it within tasks**" 9 | ] 10 | }, 11 | { 12 | "cell_type": "code", 13 | "execution_count": null, 14 | "id": "b771bf53", 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [ 18 | "## CODE HERE..." 19 | ] 20 | } 21 | ], 22 | "metadata": { 23 | "kernelspec": { 24 | "display_name": "Python 3", 25 | "language": "python", 26 | "name": "python3" 27 | }, 28 | "language_info": { 29 | "codemirror_mode": { 30 | "name": "ipython", 31 | "version": 3 32 | }, 33 | "file_extension": ".py", 34 | "mimetype": "text/x-python", 35 | "name": "python", 36 | "nbconvert_exporter": "python", 37 | "pygments_lexer": "ipython3", 38 | "version": "3.7.4" 39 | } 40 | }, 41 | "nbformat": 4, 42 | "nbformat_minor": 5 43 | } 44 | -------------------------------------------------------------------------------- /prism/tests/integration/test_projects/012_concurrency/modules/module01.py: -------------------------------------------------------------------------------- 1 | import time 2 | 3 | import pandas as pd 4 | 5 | import prism.decorators 6 | import prism.target 7 | 8 | # Prism imports 9 | import prism.task 10 | from prism.runtime import CurrentRun 11 | 12 | 13 | class Task01(prism.task.PrismTask): 14 | # Run 15 | @prism.decorators.target( 16 | type=prism.target.PandasCsv, 17 | loc=CurrentRun.ctx("OUTPUT") / "task01.csv", 18 | index=False, 19 | ) 20 | def run(self): 21 | start_time = time.time() 22 | time.sleep(15) 23 | end_time = time.time() 24 | time_df = pd.DataFrame({"start_time": [start_time], "end_time": [end_time]}) 25 | return time_df 26 | -------------------------------------------------------------------------------- /prism/tests/integration/test_projects/012_concurrency/modules/module02.py: -------------------------------------------------------------------------------- 1 | import time 2 | 3 | import pandas as pd 4 | 5 | import prism.decorators 6 | import prism.target 7 | 8 | # Prism imports 9 | import prism.task 10 | from prism.runtime import CurrentRun 11 | 12 | #################### 13 | # Class definition # 14 | #################### 15 | 16 | 17 | class Task02(prism.task.PrismTask): 18 | # Run 19 | @prism.decorators.target( 20 | type=prism.target.PandasCsv, 21 | loc=CurrentRun.ctx("OUTPUT") / "task02.csv", 22 | index=False, 23 | ) 24 | def run(self): 25 | start_time = time.time() 26 | time.sleep(5) 27 | end_time = time.time() 28 | time_df = pd.DataFrame({"start_time": [start_time], "end_time": [end_time]}) 29 | return time_df 30 | -------------------------------------------------------------------------------- /prism/tests/integration/test_projects/012_concurrency/modules/module03.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | 3 | import prism.decorators 4 | import prism.target 5 | 6 | # Prism imports 7 | import prism.task 8 | from prism.runtime import CurrentRun 9 | 10 | 11 | class Task03(prism.task.PrismTask): 12 | def get_txt_output(self, path): 13 | with open(path) as f: 14 | lines = f.read() 15 | f.close() 16 | return lines 17 | 18 | # Run 19 | def run(self): 20 | d1 = CurrentRun.ref("module01.Task01") 21 | assert isinstance(d1, pd.DataFrame) 22 | d2 = CurrentRun.ref("module02.Task02") 23 | assert isinstance(d2, pd.DataFrame) 24 | return "Hello from task 3!" 25 | -------------------------------------------------------------------------------- /prism/tests/integration/test_projects/012_concurrency/modules/module04.py: -------------------------------------------------------------------------------- 1 | # Prism imports 2 | import prism.decorators 3 | import prism.target 4 | import prism.task 5 | from prism.runtime import CurrentRun 6 | 7 | 8 | class Task04(prism.task.PrismTask): 9 | # Run 10 | def run(self): 11 | return CurrentRun.ref("module03.Task03") + "\n" + "Hello from task 4!" 12 | -------------------------------------------------------------------------------- /prism/tests/integration/test_projects/012_concurrency/output/.exists: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/runprism/prism/a6a762a1ce44a696b1dd6be793726974b9c80c82/prism/tests/integration/test_projects/012_concurrency/output/.exists -------------------------------------------------------------------------------- /prism/tests/integration/test_projects/012_concurrency/prism_project.py: -------------------------------------------------------------------------------- 1 | """ 2 | Prism project 3 | """ 4 | 5 | # Imports 6 | import logging 7 | from pathlib import Path 8 | from prism.admin import generate_run_id, generate_run_slug 9 | 10 | 11 | # Project metadata 12 | NAME = "" 13 | AUTHOR = "" 14 | VERSION = "" 15 | DESCRIPTION = """ 16 | """ 17 | 18 | # Admin 19 | RUN_ID = generate_run_id() # don't delete this! 20 | SLUG = generate_run_slug() # don't delete this! 21 | 22 | 23 | # sys.path config. This gives your tasks access to local tasks / packages that exist 24 | # outside of your project structure. 25 | SYS_PATH_CONF = [ 26 | Path(__file__).parent, 27 | Path(__file__).parent.parent, 28 | ] 29 | 30 | 31 | # Thread count: number of workers to use to execute tasks concurrently. If set to 1, 32 | # then 1 task is run at a time. 33 | THREADS = 2 34 | 35 | 36 | # Profile directory and name 37 | PROFILE_YML_PATH = Path(__file__).parent / "profile.yml" 38 | PROFILE = None # name of profile within `profiles.yml` 39 | 40 | 41 | # Logger 42 | PRISM_LOGGER = logging.getLogger("PRISM_LOGGER") 43 | 44 | 45 | # Other variables / parameters. Make sure to capitalize all of these! 46 | VAR_1 = {"a": "b"} 47 | VAR_2 = 200 48 | VAR_3 = "2015-01-01" 49 | 50 | # Paths 51 | WKDIR = Path(__file__).parent 52 | DATA = WKDIR / "data" 53 | OUTPUT = WKDIR / "output" 54 | -------------------------------------------------------------------------------- /prism/tests/integration/test_projects/013_connectors/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/runprism/prism/a6a762a1ce44a696b1dd6be793726974b9c80c82/prism/tests/integration/test_projects/013_connectors/__init__.py -------------------------------------------------------------------------------- /prism/tests/integration/test_projects/013_connectors/output/.exists: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/runprism/prism/a6a762a1ce44a696b1dd6be793726974b9c80c82/prism/tests/integration/test_projects/013_connectors/output/.exists -------------------------------------------------------------------------------- /prism/tests/integration/test_projects/013_connectors/tasks/bad_adapter.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | import prism.decorators 4 | import prism.target 5 | 6 | # Prism imports 7 | import prism.task 8 | from prism.runtime import CurrentRun 9 | 10 | 11 | class BadAdapterTask(prism.task.PrismTask): 12 | # Run 13 | @prism.decorators.target( 14 | type=prism.target.PandasCsv, 15 | loc=Path(CurrentRun.ctx("OUTPUT")) / "bad_adapter.csv", 16 | index=False, 17 | ) 18 | def run(self): 19 | sql = """ 20 | SELECT 21 | * 22 | FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."CUSTOMER" 23 | WHERE 24 | C_MKTSEGMENT = 'MACHINERY' 25 | LIMIT 50 26 | """ 27 | conn = CurrentRun.conn("snowflake_connector") 28 | df = conn.execute_sql(sql=sql, return_type="pandas") 29 | return df 30 | -------------------------------------------------------------------------------- /prism/tests/integration/test_projects/013_connectors/tasks/postgres_task.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | import prism.decorators 4 | import prism.target 5 | 6 | # Prism imports 7 | import prism.task 8 | from prism.runtime import CurrentRun 9 | 10 | 11 | class PostgresTask(prism.task.PrismTask): 12 | # Run 13 | @prism.decorators.target( 14 | type=prism.target.PandasCsv, 15 | loc=Path(CurrentRun.ctx("OUTPUT")) / "sample_postgres_data.csv", 16 | index=False, 17 | ) 18 | def run(self): 19 | sql = """ 20 | SELECT 21 | first_name 22 | , last_name 23 | FROM us500 24 | ORDER BY 25 | first_name 26 | , last_name 27 | LIMIT 10 28 | """ 29 | conn = CurrentRun.conn("postgres-connector") 30 | df = conn.execute_sql(sql=sql, return_type="pandas") 31 | return df 32 | -------------------------------------------------------------------------------- /prism/tests/integration/test_projects/013_connectors/tasks/snowflake_task.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | import prism.decorators 4 | import prism.target 5 | 6 | # Prism imports 7 | import prism.task 8 | from prism.runtime import CurrentRun 9 | 10 | 11 | class SnowflakeTask(prism.task.PrismTask): 12 | # Run 13 | @prism.decorators.target( 14 | type=prism.target.PandasCsv, 15 | loc=Path(CurrentRun.ctx("OUTPUT")) / "machinery_sample.csv", 16 | index=False, 17 | ) 18 | @prism.decorators.target( 19 | type=prism.target.PandasCsv, 20 | loc=Path(CurrentRun.ctx("OUTPUT")) / "household_sample.csv", 21 | index=False, 22 | ) 23 | def run(self): 24 | conn = CurrentRun.conn("snowflake-connector") 25 | 26 | machinery_sql = """ 27 | SELECT 28 | * 29 | FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."CUSTOMER" 30 | WHERE 31 | c_mktsegment = 'MACHINERY' 32 | ORDER BY 33 | c_custkey 34 | LIMIT 50 35 | """ 36 | machinery_df = conn.execute_sql(sql=machinery_sql, return_type="pandas") 37 | 38 | household_sql = """ 39 | SELECT 40 | * 41 | FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."CUSTOMER" 42 | WHERE 43 | c_mktsegment = 'HOUSEHOLD' 44 | ORDER BY 45 | c_custkey 46 | LIMIT 50 47 | """ 48 | household_df = conn.execute_sql(sql=household_sql, return_type="pandas") 49 | 50 | return machinery_df, household_df 51 | -------------------------------------------------------------------------------- /prism/tests/integration/test_projects/013_connectors/tasks/spark_task.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | import pyspark.sql.functions as F 4 | 5 | # Spark 6 | from pyspark.sql import SparkSession 7 | 8 | import prism.decorators 9 | import prism.target 10 | 11 | # Prism imports 12 | import prism.task 13 | from prism.runtime import CurrentRun 14 | 15 | spark = ( 16 | SparkSession.builder.appName("spark-test") 17 | .config("spark.driver.cores", 4) 18 | .config("spark.executor.cores", 4) 19 | .getOrCreate() 20 | ) 21 | 22 | 23 | class PysparkTask(prism.task.PrismTask): 24 | # Run 25 | @prism.decorators.target( 26 | type=prism.target.PandasCsv, 27 | loc=Path(CurrentRun.ctx("OUTPUT")) / "machinery_sample_filtered.csv", 28 | index=False, 29 | ) 30 | @prism.decorators.target( 31 | type=prism.target.PandasCsv, 32 | loc=Path(CurrentRun.ctx("OUTPUT")) / "household_sample_filtered.csv", 33 | index=False, 34 | ) 35 | def run(self): 36 | dfs = CurrentRun.ref("snowflake_task.SnowflakeTask") 37 | machinery_df_pd = dfs[0] 38 | household_df_pd = dfs[1] 39 | 40 | # Use spark to do some light processing for machinery df 41 | machinery_df = spark.createDataFrame(machinery_df_pd) 42 | machinery_df_filtered = machinery_df.sort(F.col("C_ACCTBAL").asc()).filter( 43 | F.col("C_ACCTBAL") <= 1000 44 | ) 45 | machinery_df_filtered_pd = machinery_df_filtered.toPandas() 46 | 47 | # Use spark to do some light processing for household df 48 | household_df = spark.createDataFrame(household_df_pd) 49 | household_df_filtered = ( 50 | household_df.sort(F.col("C_ACCTBAL").asc()) 51 | .filter(F.col("C_ACCTBAL") > 1000) 52 | .filter(F.col("C_ACCTBAL") <= 2000) 53 | ) 54 | household_df_filtered_pd = household_df_filtered.toPandas() 55 | 56 | # Return 57 | return machinery_df_filtered_pd, household_df_filtered_pd 58 | -------------------------------------------------------------------------------- /prism/tests/integration/test_projects/014_project_with_package_lookup/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/runprism/prism/a6a762a1ce44a696b1dd6be793726974b9c80c82/prism/tests/integration/test_projects/014_project_with_package_lookup/__init__.py -------------------------------------------------------------------------------- /prism/tests/integration/test_projects/014_project_with_package_lookup/output/.exists: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/runprism/prism/a6a762a1ce44a696b1dd6be793726974b9c80c82/prism/tests/integration/test_projects/014_project_with_package_lookup/output/.exists -------------------------------------------------------------------------------- /prism/tests/integration/test_projects/014_project_with_package_lookup/tasks/module01.py: -------------------------------------------------------------------------------- 1 | # From additional package lookup 2 | from additional_package.utils import task1_return 3 | 4 | import prism.decorators 5 | import prism.target 6 | 7 | # Prism imports 8 | import prism.task 9 | from prism.runtime import CurrentRun 10 | 11 | 12 | class Task01(prism.task.PrismTask): 13 | # Run 14 | @prism.decorators.target( 15 | type=prism.target.Txt, loc=CurrentRun.ctx("OUTPUT") / "task01.txt" 16 | ) 17 | def run(self): 18 | return task1_return() 19 | -------------------------------------------------------------------------------- /prism/tests/integration/test_projects/020_dec_retries/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/runprism/prism/a6a762a1ce44a696b1dd6be793726974b9c80c82/prism/tests/integration/test_projects/020_dec_retries/__init__.py -------------------------------------------------------------------------------- /prism/tests/integration/test_projects/020_dec_retries/output/.exists: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/runprism/prism/a6a762a1ce44a696b1dd6be793726974b9c80c82/prism/tests/integration/test_projects/020_dec_retries/output/.exists -------------------------------------------------------------------------------- /prism/tests/integration/test_projects/020_dec_retries/prism_project.py: -------------------------------------------------------------------------------- 1 | """ 2 | Prism project 3 | """ 4 | 5 | # Imports 6 | import logging 7 | from pathlib import Path 8 | from prism.admin import generate_run_id, generate_run_slug 9 | 10 | 11 | # Project metadata 12 | NAME = "" 13 | AUTHOR = "" 14 | VERSION = "" 15 | DESCRIPTION = """ 16 | """ 17 | 18 | # Admin 19 | RUN_ID = generate_run_id() # don't delete this! 20 | SLUG = generate_run_slug() # don't delete this! 21 | 22 | 23 | # sys.path config. This gives your tasks access to local tasks / packages that exist 24 | # outside of your project structure. 25 | SYS_PATH_CONF = [ 26 | Path(__file__).parent, 27 | ] 28 | 29 | 30 | # Thread count: number of workers to use to execute tasks concurrently. If set to 1, 31 | # then 1 task is run at a time. 32 | THREADS = 1 33 | 34 | 35 | # Profile directory and name 36 | PROFILE_YML_PATH = Path(__file__).parent / "profile.yml" 37 | PROFILE = None # name of profile within `profiles.yml` 38 | 39 | 40 | # Logger 41 | PRISM_LOGGER = logging.getLogger("PRISM_LOGGER") 42 | 43 | 44 | # Other variables / parameters. Make sure to capitalize all of these! 45 | VAR_1 = {"a": "b"} 46 | VAR_2 = 200 47 | VAR_3 = "2015-01-01" 48 | 49 | # Paths 50 | WKDIR = Path(__file__).parent 51 | DATA = WKDIR / "data" 52 | OUTPUT = WKDIR / "output" 53 | 54 | 55 | # Triggers 56 | TRIGGERS_YML_PATH = Path(__file__).parent / "triggers.yml" 57 | TRIGGERS = { 58 | "on_success": ["test_trigger_function"], 59 | "on_failure": ["test_trigger_function"], 60 | } 61 | -------------------------------------------------------------------------------- /prism/tests/integration/test_projects/020_dec_retries/tasks/extract.py: -------------------------------------------------------------------------------- 1 | # Standard library imports 2 | import requests 3 | import json 4 | 5 | # Prism imports 6 | from prism.decorators import task, target 7 | from prism.target import JSON 8 | from prism.runtime import CurrentRun 9 | 10 | 11 | # Task 12 | @task( 13 | task_id="extract_task", 14 | targets=[target(type=JSON, loc=CurrentRun.ctx("OUTPUT") / "todos.json")], 15 | ) 16 | def extract(): 17 | url = "https://jsonplaceholder.typicode.com/todos" 18 | resp = requests.get(url) 19 | return json.loads(resp.text) 20 | -------------------------------------------------------------------------------- /prism/tests/integration/test_projects/020_dec_retries/tasks/load.py: -------------------------------------------------------------------------------- 1 | # Standard library imports 2 | 3 | # Prism imports 4 | from prism.decorators import target_iterator, task 5 | from prism.runtime import CurrentRun 6 | from prism.target import Txt 7 | 8 | 9 | # Task 10 | @task( 11 | retries=1, 12 | retry_delay_seconds=0, 13 | targets=[target_iterator(type=Txt, loc=CurrentRun.ctx("OUTPUT"))], 14 | ) 15 | def load(): 16 | data = CurrentRun.ref("extract_task") 17 | 18 | # Add an error for testing 19 | print(hi) # noqa: F821 20 | 21 | # Names 22 | names = {} 23 | for ppl in data["people"]: 24 | # Formatted 25 | name = ppl["name"].lower().replace(" ", "_") 26 | names[f"{name}.txt"] = ppl["name"] 27 | 28 | # Return 29 | return names 30 | -------------------------------------------------------------------------------- /prism/tests/integration/test_projects/020_dec_retries/triggers.yml: -------------------------------------------------------------------------------- 1 | include: 2 | - "{{ Path(__file__).parent.parent }}" 3 | 4 | triggers: 5 | test_trigger_function: 6 | type: function 7 | function: common.functions.test_trigger_function 8 | -------------------------------------------------------------------------------- /prism/tests/integration/test_projects/023_skipped_task/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/runprism/prism/a6a762a1ce44a696b1dd6be793726974b9c80c82/prism/tests/integration/test_projects/023_skipped_task/__init__.py -------------------------------------------------------------------------------- /prism/tests/integration/test_projects/023_skipped_task/callback.txt: -------------------------------------------------------------------------------- 1 | This is the output of a callback -------------------------------------------------------------------------------- /prism/tests/integration/test_projects/023_skipped_task/output/.exists: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/runprism/prism/a6a762a1ce44a696b1dd6be793726974b9c80c82/prism/tests/integration/test_projects/023_skipped_task/output/.exists -------------------------------------------------------------------------------- /prism/tests/integration/test_projects/023_skipped_task/output/task01.txt: -------------------------------------------------------------------------------- 1 | Hello from task 1! -------------------------------------------------------------------------------- /prism/tests/integration/test_projects/023_skipped_task/output/task02.txt: -------------------------------------------------------------------------------- 1 | Hello from task 1! 2 | Hello from task 2! -------------------------------------------------------------------------------- /prism/tests/integration/test_projects/023_skipped_task/prism_project.py: -------------------------------------------------------------------------------- 1 | """ 2 | Prism project 3 | """ 4 | 5 | # Imports 6 | import logging 7 | from pathlib import Path 8 | from prism.admin import generate_run_id, generate_run_slug 9 | 10 | 11 | # Project metadata 12 | NAME = "" 13 | AUTHOR = "" 14 | VERSION = "" 15 | DESCRIPTION = """ 16 | """ 17 | 18 | # Admin 19 | RUN_ID = generate_run_id() # don't delete this! 20 | SLUG = generate_run_slug() # don't delete this! 21 | 22 | 23 | # sys.path config. This gives your tasks access to local tasks / packages that exist 24 | # outside of your project structure. 25 | SYS_PATH_CONF = [ 26 | Path(__file__).parent, 27 | Path(__file__).parent.parent, 28 | ] 29 | 30 | 31 | # Thread count: number of workers to use to execute tasks concurrently. If set to 1, 32 | # then 1 task is run at a time. 33 | THREADS = 1 34 | 35 | 36 | # Profile directory and name 37 | PROFILE_YML_PATH = Path(__file__).parent / "profile.yml" 38 | PROFILE = None # name of profile within `profiles.yml` 39 | 40 | 41 | # Logger 42 | PRISM_LOGGER = logging.getLogger("PRISM_LOGGER") 43 | 44 | 45 | # Other variables / parameters. Make sure to capitalize all of these! 46 | VAR_1 = {"a": "b"} 47 | VAR_2 = 200 48 | VAR_3 = "2015-01-01" 49 | 50 | # Paths 51 | WKDIR = Path(__file__).parent 52 | DATA = WKDIR / "data" 53 | OUTPUT = WKDIR / "output" 54 | -------------------------------------------------------------------------------- /prism/tests/integration/test_projects/023_skipped_task/tasks/task01.py: -------------------------------------------------------------------------------- 1 | # Other imports 2 | from pathlib import Path 3 | 4 | import prism.decorators 5 | import prism.target 6 | 7 | # Prism infrastructure imports 8 | import prism.task 9 | from prism.runtime import CurrentRun 10 | 11 | 12 | class Task01(prism.task.PrismTask): 13 | def done(self): 14 | return (Path(CurrentRun.ctx("OUTPUT")) / "task01.txt").is_file() 15 | 16 | # Run 17 | @prism.decorators.target( 18 | type=prism.target.Txt, loc=Path(CurrentRun.ctx("OUTPUT")) / "task01.txt" 19 | ) 20 | def run(self): 21 | return "Hello from task 1!" 22 | -------------------------------------------------------------------------------- /prism/tests/integration/test_projects/023_skipped_task/tasks/task02.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | import prism.decorators 4 | import prism.target 5 | 6 | # Prism infrastructure imports 7 | import prism.task 8 | from prism.runtime import CurrentRun 9 | 10 | 11 | class Task02(prism.task.PrismTask): 12 | # Run 13 | @prism.decorators.target( 14 | type=prism.target.Txt, loc=Path(CurrentRun.ctx("OUTPUT")) / "task02.txt" 15 | ) 16 | def run(self): 17 | lines = CurrentRun.ref("task01.Task01") 18 | return lines + "\n" + "Hello from task 2!" 19 | -------------------------------------------------------------------------------- /prism/tests/integration/test_projects/common/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/runprism/prism/a6a762a1ce44a696b1dd6be793726974b9c80c82/prism/tests/integration/test_projects/common/__init__.py -------------------------------------------------------------------------------- /prism/tests/integration/test_projects/common/functions.py: -------------------------------------------------------------------------------- 1 | """ 2 | Common functions for Prism project integration tests. 3 | """ 4 | 5 | # Imports 6 | from pathlib import Path 7 | 8 | 9 | # Functions 10 | def test_trigger_function(project_name: str = "014_test_triggers_normal"): 11 | output_dir = Path(__file__).parent.parent / project_name / "output" 12 | with open(output_dir / "trigger.txt", "w") as f: 13 | f.write("This is outputted from the trigger function!") 14 | -------------------------------------------------------------------------------- /prism/tests/integration/test_visualizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/runprism/prism/a6a762a1ce44a696b1dd6be793726974b9c80c82/prism/tests/integration/test_visualizer.py -------------------------------------------------------------------------------- /prism/tests/unit/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/runprism/prism/a6a762a1ce44a696b1dd6be793726974b9c80c82/prism/tests/unit/__init__.py -------------------------------------------------------------------------------- /prism/tests/unit/test_callbacks.py: -------------------------------------------------------------------------------- 1 | import os 2 | from pathlib import Path 3 | 4 | import pytest 5 | 6 | # Prism imports 7 | from prism.callbacks import _PrismCallback 8 | 9 | 10 | # Example callback 11 | def example_callback(): 12 | with open("callback.txt", "w") as f: 13 | f.write("This is the output of a callback") 14 | 15 | 16 | def example_callback_with_args(args): 17 | with open("callback.txt", "w") as f: 18 | f.write("This is the output of a callback") 19 | 20 | 21 | def test_good_callback(): 22 | # Change working directory 23 | os.chdir(Path(__file__).parent) 24 | assert not (Path(__file__).parent / "callback.txt").is_file() 25 | 26 | # Run callback 27 | callback = _PrismCallback(example_callback) 28 | callback.run() 29 | assert (Path(__file__).parent / "callback.txt").is_file() 30 | os.unlink(Path(__file__).parent / "callback.txt") 31 | 32 | 33 | def test_callback_with_args(): 34 | with pytest.raises(ValueError) as cm: 35 | _PrismCallback(example_callback_with_args) 36 | expected_msg = ( 37 | "Callback function `example_callback_with_args` cannot have any arguments." # noqa: E501 38 | ) 39 | assert str(cm.value) == expected_msg 40 | -------------------------------------------------------------------------------- /prism/tests/unit/test_compiled_projects/__init__.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | COMPILER_TEST_CASES = Path(__file__).parent 4 | -------------------------------------------------------------------------------- /prism/tests/unit/test_compiled_projects/dag_cycle/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/runprism/prism/a6a762a1ce44a696b1dd6be793726974b9c80c82/prism/tests/unit/test_compiled_projects/dag_cycle/__init__.py -------------------------------------------------------------------------------- /prism/tests/unit/test_compiled_projects/dag_cycle/moduleA.py: -------------------------------------------------------------------------------- 1 | from prism.task import PrismTask 2 | 3 | 4 | class Taska(PrismTask): 5 | def run(self): 6 | return "This is task A." 7 | -------------------------------------------------------------------------------- /prism/tests/unit/test_compiled_projects/dag_cycle/moduleB.py: -------------------------------------------------------------------------------- 1 | from prism.runtime import CurrentRun 2 | from prism.task import PrismTask 3 | 4 | 5 | class Taskb(PrismTask): 6 | def run(self): 7 | return ( 8 | CurrentRun.ref("moduleA.Taska") 9 | + CurrentRun.ref("moduleE.Taske") 10 | + " This is task B." 11 | ) # noqa: E501 12 | -------------------------------------------------------------------------------- /prism/tests/unit/test_compiled_projects/dag_cycle/moduleC.py: -------------------------------------------------------------------------------- 1 | from prism.runtime import CurrentRun 2 | from prism.task import PrismTask 3 | 4 | 5 | class Taskc(PrismTask): 6 | def run(self): 7 | return CurrentRun.ref("moduleA.Taska") + " This is task C." 8 | -------------------------------------------------------------------------------- /prism/tests/unit/test_compiled_projects/dag_cycle/moduleD.py: -------------------------------------------------------------------------------- 1 | from prism.runtime import CurrentRun 2 | from prism.task import PrismTask 3 | 4 | 5 | class Taskd(PrismTask): 6 | def run(self): 7 | return ( 8 | CurrentRun.ref("moduleB.Taskb") 9 | + CurrentRun.ref("moduleA.Taska") 10 | + CurrentRun.ref("moduleC.Taskc") 11 | + " This is task D." 12 | ) # noqa 13 | -------------------------------------------------------------------------------- /prism/tests/unit/test_compiled_projects/dag_cycle/moduleE.py: -------------------------------------------------------------------------------- 1 | from prism.runtime import CurrentRun 2 | from prism.task import PrismTask 3 | 4 | 5 | class Taske(PrismTask): 6 | def run(self): 7 | return ( 8 | CurrentRun.ref("moduleA.Taska") 9 | + CurrentRun.ref("moduleC.Taskc") 10 | + CurrentRun.ref("moduleD.Taskd") 11 | + " This is task E." 12 | ) # noqa 13 | -------------------------------------------------------------------------------- /prism/tests/unit/test_compiled_projects/task_ref_15nodes/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/runprism/prism/a6a762a1ce44a696b1dd6be793726974b9c80c82/prism/tests/unit/test_compiled_projects/task_ref_15nodes/__init__.py -------------------------------------------------------------------------------- /prism/tests/unit/test_compiled_projects/task_ref_15nodes/task01.py: -------------------------------------------------------------------------------- 1 | import prism.task 2 | 3 | 4 | class Task01(prism.task.PrismTask): 5 | def run(self): 6 | return "This is task 01. " 7 | -------------------------------------------------------------------------------- /prism/tests/unit/test_compiled_projects/task_ref_15nodes/task02.py: -------------------------------------------------------------------------------- 1 | import prism.task 2 | from prism.runtime import CurrentRun 3 | 4 | 5 | class Task02(prism.task.PrismTask): 6 | def run(self): 7 | return CurrentRun.ref("task01.Task01") + "This is task 02." 8 | -------------------------------------------------------------------------------- /prism/tests/unit/test_compiled_projects/task_ref_15nodes/task03.py: -------------------------------------------------------------------------------- 1 | import prism.task 2 | from prism.runtime import CurrentRun 3 | 4 | 5 | class Task03(prism.task.PrismTask): 6 | def run(self): 7 | return CurrentRun.ref("task01.Task01") + "This is task 03. " 8 | -------------------------------------------------------------------------------- /prism/tests/unit/test_compiled_projects/task_ref_15nodes/task04.py: -------------------------------------------------------------------------------- 1 | import prism.task 2 | from prism.runtime import CurrentRun 3 | 4 | 5 | class Task04(prism.task.PrismTask): 6 | def run(self): 7 | return ( 8 | CurrentRun.ref("task02.Task02") 9 | + CurrentRun.ref("task03.Task03") 10 | + "This is task 04. " 11 | ) # noqa: E501 12 | -------------------------------------------------------------------------------- /prism/tests/unit/test_compiled_projects/task_ref_15nodes/task05.py: -------------------------------------------------------------------------------- 1 | import prism.task 2 | from prism.runtime import CurrentRun 3 | 4 | 5 | class Task05(prism.task.PrismTask): 6 | def run(self): 7 | return CurrentRun.ref("task01.Task01") + "This is task 05. " 8 | -------------------------------------------------------------------------------- /prism/tests/unit/test_compiled_projects/task_ref_15nodes/task06.py: -------------------------------------------------------------------------------- 1 | import prism.task 2 | from prism.runtime import CurrentRun 3 | 4 | 5 | class Task06(prism.task.PrismTask): 6 | def run(self): 7 | return CurrentRun.ref("task05.Task05") + "This is task 06. " 8 | -------------------------------------------------------------------------------- /prism/tests/unit/test_compiled_projects/task_ref_15nodes/task07.py: -------------------------------------------------------------------------------- 1 | import prism.decorators 2 | import prism.task 3 | from prism.runtime import CurrentRun 4 | 5 | 6 | # Class-based task 7 | class Task07a(prism.task.PrismTask): 8 | def run(self): 9 | return ( 10 | CurrentRun.ref("task04.Task04") 11 | + CurrentRun.ref("task06.Task06") 12 | + "This is task 07. " 13 | ) # noqa: E501 14 | 15 | 16 | # Function-based task 17 | @prism.decorators.task() 18 | def task_07b(): 19 | _ = CurrentRun.ref("task07.Task07a") 20 | return "This is a local task" 21 | -------------------------------------------------------------------------------- /prism/tests/unit/test_compiled_projects/task_ref_15nodes/task08.py: -------------------------------------------------------------------------------- 1 | import prism.task 2 | from prism.runtime import CurrentRun 3 | 4 | 5 | class Task08(prism.task.PrismTask): 6 | def run(self): 7 | return CurrentRun.ref("task01.Task01") + "This is task 08. " 8 | -------------------------------------------------------------------------------- /prism/tests/unit/test_compiled_projects/task_ref_15nodes/task09.py: -------------------------------------------------------------------------------- 1 | import prism.task 2 | from prism.runtime import CurrentRun 3 | 4 | 5 | class Task09(prism.task.PrismTask): 6 | def run(self): 7 | return ( 8 | CurrentRun.ref("task05.Task05") 9 | + CurrentRun.ref("task08.Task08") 10 | + "This is task 09. " 11 | ) # noqa: E501 12 | -------------------------------------------------------------------------------- /prism/tests/unit/test_compiled_projects/task_ref_15nodes/task10.py: -------------------------------------------------------------------------------- 1 | import prism.task 2 | from prism.runtime import CurrentRun 3 | 4 | 5 | class Task10(prism.task.PrismTask): 6 | def run(self): 7 | return CurrentRun.ref("task01.Task01") + "This is task 10. " 8 | -------------------------------------------------------------------------------- /prism/tests/unit/test_compiled_projects/task_ref_15nodes/task11.py: -------------------------------------------------------------------------------- 1 | import prism.task 2 | from prism.runtime import CurrentRun 3 | 4 | 5 | class Task11(prism.task.PrismTask): 6 | def run(self): 7 | return ( 8 | CurrentRun.ref("task07.Task07a") 9 | + CurrentRun.ref("task10.Task10") 10 | + "This is task 11." 11 | ) # noqa: E501 12 | -------------------------------------------------------------------------------- /prism/tests/unit/test_compiled_projects/task_ref_15nodes/task12.py: -------------------------------------------------------------------------------- 1 | import prism.task 2 | from prism.runtime import CurrentRun 3 | 4 | 5 | class Task12(prism.task.PrismTask): 6 | def run(self): 7 | return CurrentRun.ref("task10.Task10") + "This is task 12. " 8 | -------------------------------------------------------------------------------- /prism/tests/unit/test_compiled_projects/task_ref_15nodes/task13.py: -------------------------------------------------------------------------------- 1 | import prism.task 2 | from prism.runtime import CurrentRun 3 | 4 | 5 | class Task13(prism.task.PrismTask): 6 | def run(self): 7 | return CurrentRun.ref("task10.Task10") + "This is task 13. " 8 | -------------------------------------------------------------------------------- /prism/tests/unit/test_compiled_projects/task_ref_15nodes/task14.py: -------------------------------------------------------------------------------- 1 | import prism.task 2 | from prism.runtime import CurrentRun 3 | 4 | 5 | class Task14(prism.task.PrismTask): 6 | def run(self): 7 | return CurrentRun.ref("task11.Task11") + "This is task 14. " 8 | -------------------------------------------------------------------------------- /prism/tests/unit/test_compiled_projects/task_ref_15nodes/task15.py: -------------------------------------------------------------------------------- 1 | import prism.task 2 | from prism.runtime import CurrentRun 3 | 4 | 5 | class Task15(prism.task.PrismTask): 6 | def run(self): 7 | return CurrentRun.ref("task11.Task11") + "This is task 15. " 8 | -------------------------------------------------------------------------------- /prism/tests/unit/test_compiled_projects/task_ref_3nodes/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/runprism/prism/a6a762a1ce44a696b1dd6be793726974b9c80c82/prism/tests/unit/test_compiled_projects/task_ref_3nodes/__init__.py -------------------------------------------------------------------------------- /prism/tests/unit/test_compiled_projects/task_ref_3nodes/task01.py: -------------------------------------------------------------------------------- 1 | import prism.task 2 | 3 | 4 | class Task01(prism.task.PrismTask): 5 | def run(): 6 | return "This is task 1." 7 | -------------------------------------------------------------------------------- /prism/tests/unit/test_compiled_projects/task_ref_3nodes/task02.py: -------------------------------------------------------------------------------- 1 | import prism.task 2 | from prism.runtime import CurrentRun 3 | 4 | 5 | class Task02(prism.task.PrismTask): 6 | def run(self): 7 | return CurrentRun.ref("task01.Task01") + " This is task 2." 8 | -------------------------------------------------------------------------------- /prism/tests/unit/test_compiled_projects/task_ref_3nodes/task03.py: -------------------------------------------------------------------------------- 1 | import prism.task 2 | from prism.runtime import CurrentRun 3 | 4 | 5 | class Task03(prism.task.PrismTask): 6 | def run(self): 7 | return CurrentRun.ref("task02.Task02") + "This is task 3." 8 | -------------------------------------------------------------------------------- /prism/tests/unit/test_compiled_projects/task_ref_5nodes/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/runprism/prism/a6a762a1ce44a696b1dd6be793726974b9c80c82/prism/tests/unit/test_compiled_projects/task_ref_5nodes/__init__.py -------------------------------------------------------------------------------- /prism/tests/unit/test_compiled_projects/task_ref_5nodes/taskA.py: -------------------------------------------------------------------------------- 1 | from prism.task import PrismTask 2 | 3 | 4 | class Taska(PrismTask): 5 | def run(self): 6 | return "This is task A." 7 | -------------------------------------------------------------------------------- /prism/tests/unit/test_compiled_projects/task_ref_5nodes/taskB.py: -------------------------------------------------------------------------------- 1 | from prism.runtime import CurrentRun 2 | from prism.task import PrismTask 3 | 4 | 5 | class Taskb(PrismTask): 6 | def run(self): 7 | return CurrentRun.ref("taskA.Taska") + " This is task B." 8 | -------------------------------------------------------------------------------- /prism/tests/unit/test_compiled_projects/task_ref_5nodes/taskC.py: -------------------------------------------------------------------------------- 1 | from prism.runtime import CurrentRun 2 | from prism.task import PrismTask 3 | 4 | 5 | class Taskc(PrismTask): 6 | def run(self): 7 | return CurrentRun.ref("taskA.Taska") + " This is task C." 8 | -------------------------------------------------------------------------------- /prism/tests/unit/test_compiled_projects/task_ref_5nodes/taskD.py: -------------------------------------------------------------------------------- 1 | from prism.runtime import CurrentRun 2 | from prism.task import PrismTask 3 | 4 | 5 | class Taskd(PrismTask): 6 | def run(self): 7 | return ( 8 | CurrentRun.ref("taskB.Taskb") 9 | + CurrentRun.ref("taskA.Taska") 10 | + CurrentRun.ref("taskC.Taskc") 11 | + " This is task D." 12 | ) # noqa 13 | -------------------------------------------------------------------------------- /prism/tests/unit/test_compiled_projects/task_ref_5nodes/taskE.py: -------------------------------------------------------------------------------- 1 | from prism.runtime import CurrentRun 2 | from prism.task import PrismTask 3 | 4 | 5 | class Taske(PrismTask): 6 | def run(self): 7 | return ( 8 | CurrentRun.ref("taskA.Taska") 9 | + CurrentRun.ref("taskC.Taskc") 10 | + CurrentRun.ref("taskD.Taskd") 11 | + " This is task E." 12 | ) # noqa 13 | -------------------------------------------------------------------------------- /prism/tests/unit/test_compiled_projects/task_ref_norefs/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/runprism/prism/a6a762a1ce44a696b1dd6be793726974b9c80c82/prism/tests/unit/test_compiled_projects/task_ref_norefs/__init__.py -------------------------------------------------------------------------------- /prism/tests/unit/test_compiled_projects/task_ref_norefs/moduleA.py: -------------------------------------------------------------------------------- 1 | from prism.task import PrismTask 2 | 3 | 4 | class Taska(PrismTask): 5 | def run(self): 6 | return "This is task A." 7 | -------------------------------------------------------------------------------- /prism/tests/unit/test_compiled_projects/task_ref_norefs/moduleB.py: -------------------------------------------------------------------------------- 1 | from prism.task import PrismTask 2 | 3 | 4 | class Taskb(PrismTask): 5 | def run(self): 6 | return "This is task B." 7 | -------------------------------------------------------------------------------- /prism/tests/unit/test_compiled_projects/task_ref_norefs/moduleC.py: -------------------------------------------------------------------------------- 1 | from prism.task import PrismTask 2 | 3 | 4 | class Taskc(PrismTask): 5 | def run(self): 6 | return "This is task C." 7 | -------------------------------------------------------------------------------- /prism/tests/unit/test_compiled_projects/task_ref_norefs/moduleD.py: -------------------------------------------------------------------------------- 1 | from prism.task import PrismTask 2 | 3 | 4 | class Taskd(PrismTask): 5 | def run(self): 6 | return "This is task D." 7 | -------------------------------------------------------------------------------- /prism/tests/unit/test_compiled_projects/task_ref_norefs/moduleE.py: -------------------------------------------------------------------------------- 1 | from prism.task import PrismTask 2 | 3 | 4 | class Taske(PrismTask): 5 | def run(self): 6 | return "This is task E." 7 | -------------------------------------------------------------------------------- /prism/tests/unit/test_compiled_projects/task_ref_selfref/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/runprism/prism/a6a762a1ce44a696b1dd6be793726974b9c80c82/prism/tests/unit/test_compiled_projects/task_ref_selfref/__init__.py -------------------------------------------------------------------------------- /prism/tests/unit/test_compiled_projects/task_ref_selfref/moduleA.py: -------------------------------------------------------------------------------- 1 | from prism.task import PrismTask 2 | 3 | 4 | class Taska(PrismTask): 5 | def run(self): 6 | return "This is task A." 7 | -------------------------------------------------------------------------------- /prism/tests/unit/test_compiled_projects/task_ref_selfref/moduleB.py: -------------------------------------------------------------------------------- 1 | from prism.runtime import CurrentRun 2 | from prism.task import PrismTask 3 | 4 | 5 | class Taskb(PrismTask): 6 | def run(self): 7 | return CurrentRun.ref("moduleB") 8 | -------------------------------------------------------------------------------- /prism/tests/unit/test_compiled_projects/task_ref_selfref/moduleC.py: -------------------------------------------------------------------------------- 1 | from prism.task import PrismTask 2 | 3 | 4 | class Taskc(PrismTask): 5 | def run(self): 6 | return "This is task C." 7 | -------------------------------------------------------------------------------- /prism/tests/unit/test_compiled_projects/task_ref_selfref/moduleD.py: -------------------------------------------------------------------------------- 1 | from prism.task import PrismTask 2 | 3 | 4 | class Taskd(PrismTask): 5 | def run(self): 6 | return "This is task D." 7 | -------------------------------------------------------------------------------- /prism/tests/unit/test_compiled_projects/task_ref_selfref/moduleE.py: -------------------------------------------------------------------------------- 1 | from prism.task import PrismTask 2 | 3 | 4 | class Taske(PrismTask): 5 | def run(self): 6 | return "This is task E." 7 | -------------------------------------------------------------------------------- /prism/tests/unit/test_import.py: -------------------------------------------------------------------------------- 1 | def test_import_prism_main(): 2 | """ 3 | Project can be imported 4 | """ 5 | import prism.main # noqa: F401 6 | -------------------------------------------------------------------------------- /prism/tests/unit/test_project_parser.py: -------------------------------------------------------------------------------- 1 | # Standard library imports 2 | import networkx 3 | from pathlib import Path 4 | import pytest 5 | 6 | 7 | # Prism imports 8 | import prism.exceptions 9 | from prism.client.parser import ProjectParser 10 | from prism.engine.module import _PrismModule 11 | 12 | 13 | # Paths 14 | UNIT_TEST_WKDIR = Path(__file__).parent 15 | TASK_TEST_CASES = UNIT_TEST_WKDIR / "test_tasks" 16 | EXAMPLE_TASKS_DIR = TASK_TEST_CASES / "example_tasks_dir" 17 | 18 | 19 | def test_parser_no_task_dir(): 20 | with pytest.raises(prism.exceptions.CompileException) as cm: 21 | ProjectParser( 22 | project_dir=TASK_TEST_CASES, 23 | tasks_dir=TASK_TEST_CASES / "dummy_dir", 24 | all_tasks_downstream=True, 25 | ) 26 | expected_msg = f'`{TASK_TEST_CASES / "dummy_dir"}` is not a directory!' 27 | assert str(cm.value) == expected_msg 28 | 29 | 30 | def test_parser(): 31 | parser = ProjectParser( 32 | project_dir=TASK_TEST_CASES, 33 | tasks_dir=EXAMPLE_TASKS_DIR, 34 | all_tasks_downstream=True, 35 | ) 36 | all_modules = parser._get_all_modules_in_dir(EXAMPLE_TASKS_DIR) 37 | assert len(all_modules) == 5 38 | assert "func_0.py" in all_modules 39 | assert "func_1.py" in all_modules 40 | assert "hello.py" in all_modules 41 | assert "world.py" in all_modules 42 | assert "nested/foo.py" in all_modules 43 | 44 | # Module objects 45 | module_objs = parser.parse_all_modules() 46 | for x in module_objs: 47 | assert isinstance(x, _PrismModule) 48 | 49 | # DAG should compile with all tasks 50 | parser.compile_dag( 51 | project_id="", 52 | run_slug=None, 53 | tasks_dir=EXAMPLE_TASKS_DIR, 54 | parsed_module_objs=module_objs, 55 | user_arg_task_ids=[], 56 | user_arg_all_downstream=True, 57 | ) 58 | 59 | # DAG should compile with a subset of tasks 60 | parser.compile_dag( 61 | project_id="", 62 | run_slug=None, 63 | tasks_dir=EXAMPLE_TASKS_DIR, 64 | parsed_module_objs=module_objs, 65 | user_arg_task_ids=["hello", "world"], 66 | user_arg_all_downstream=True, 67 | ) 68 | 69 | # If we pass in tasks that do not exist, the DAG should not compile 70 | with pytest.raises(networkx.exception.NetworkXError) as cm: 71 | parser.compile_dag( 72 | project_id="", 73 | run_slug=None, 74 | tasks_dir=EXAMPLE_TASKS_DIR, 75 | parsed_module_objs=module_objs, 76 | user_arg_task_ids=["foo"], 77 | user_arg_all_downstream=True, 78 | ) 79 | expected_msg = "The node foo is not in the digraph." 80 | assert str(cm.value) == expected_msg 81 | -------------------------------------------------------------------------------- /prism/tests/unit/test_tasks/cls_bad_run_extra_arg.py: -------------------------------------------------------------------------------- 1 | from prism.task import PrismTask 2 | 3 | 4 | class BadRunExtraArg(PrismTask): 5 | def run(self, extra_arg): 6 | return "hi" 7 | -------------------------------------------------------------------------------- /prism/tests/unit/test_tasks/cls_diff_import_structure.py: -------------------------------------------------------------------------------- 1 | import prism.task 2 | 3 | 4 | class DiffImportStructure(prism.task.PrismTask): 5 | def run(self): 6 | return "hi" 7 | -------------------------------------------------------------------------------- /prism/tests/unit/test_tasks/cls_multiple_prism_tasks.py: -------------------------------------------------------------------------------- 1 | from prism.task import PrismTask 2 | 3 | 4 | class FirstPrismTask(PrismTask): 5 | def run(self): 6 | return "hi" 7 | 8 | 9 | class SecondPrismTask(PrismTask): 10 | def run(self): 11 | return "hi" 12 | -------------------------------------------------------------------------------- /prism/tests/unit/test_tasks/cls_no_prism_task.py: -------------------------------------------------------------------------------- 1 | from prism.task import PrismTask # noqa 2 | 3 | 4 | class NoPrismTask: 5 | def run(self): 6 | return "hi" 7 | -------------------------------------------------------------------------------- /prism/tests/unit/test_tasks/cls_no_run_func.py: -------------------------------------------------------------------------------- 1 | from prism.task import PrismTask 2 | 3 | 4 | class NoRunFunc(PrismTask): 5 | def no_run_func(self): 6 | return "hi" 7 | -------------------------------------------------------------------------------- /prism/tests/unit/test_tasks/cls_one_prism_task.py: -------------------------------------------------------------------------------- 1 | from prism.task import PrismTask 2 | 3 | 4 | class NormalPrismTask(PrismTask): 5 | def run(self): 6 | return "hi" 7 | -------------------------------------------------------------------------------- /prism/tests/unit/test_tasks/cls_other_classes.py: -------------------------------------------------------------------------------- 1 | from prism.task import PrismTask 2 | 3 | 4 | class OnlyPrismTask(PrismTask): 5 | def run(self): 6 | return "hi" 7 | 8 | 9 | class NonPrismTask: 10 | def run(self): 11 | return "hi" 12 | -------------------------------------------------------------------------------- /prism/tests/unit/test_tasks/cls_task_with_id.py: -------------------------------------------------------------------------------- 1 | from prism.task import PrismTask 2 | 3 | 4 | class TasksRefs(PrismTask): 5 | task_id = "cls_custom_task_id" 6 | 7 | def run(self): 8 | return "hi" 9 | -------------------------------------------------------------------------------- /prism/tests/unit/test_tasks/cls_task_with_target.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import prism.decorators 4 | from prism.target import PrismTarget 5 | from prism.task import PrismTask 6 | 7 | 8 | class TaskWithTarget(PrismTask): 9 | @prism.decorators.target(PrismTarget.txt, loc=os.path.join(os.getcwd(), "temp")) 10 | def run(self): 11 | return "hi" 12 | -------------------------------------------------------------------------------- /prism/tests/unit/test_tasks/cls_tasks_refs.py: -------------------------------------------------------------------------------- 1 | from prism.runtime import CurrentRun 2 | from prism.task import PrismTask 3 | 4 | 5 | class TasksRefs(PrismTask): 6 | def func_0(self): 7 | return CurrentRun.ref("func_0") 8 | 9 | def run(self): 10 | _ = CurrentRun.ref("hello") 11 | _ = CurrentRun.ref("world") 12 | return "hi" 13 | 14 | def func_1(self): 15 | return CurrentRun.ref("func_1") 16 | -------------------------------------------------------------------------------- /prism/tests/unit/test_tasks/dec_bad_dec_no_parentheses.py: -------------------------------------------------------------------------------- 1 | from prism.decorators import task 2 | 3 | 4 | @task 5 | def task_with_refs(): 6 | return "hi" 7 | -------------------------------------------------------------------------------- /prism/tests/unit/test_tasks/dec_bad_run_extra_arg.py: -------------------------------------------------------------------------------- 1 | from prism.decorators import task 2 | 3 | 4 | @task() 5 | def task_with_refs(extra_arg): 6 | return "hi" 7 | -------------------------------------------------------------------------------- /prism/tests/unit/test_tasks/dec_diff_decorator_structure.py: -------------------------------------------------------------------------------- 1 | import prism.decorators 2 | 3 | 4 | @prism.decorators.task() 5 | def task_fn_different_decorator_structure(): 6 | return "hi" 7 | -------------------------------------------------------------------------------- /prism/tests/unit/test_tasks/dec_multiple_prism_tasks.py: -------------------------------------------------------------------------------- 1 | from prism.decorators import task 2 | 3 | 4 | @task() 5 | def task_function_1(): 6 | return "hi" 7 | 8 | 9 | @task() 10 | def task_function_2(): 11 | return "hi" 12 | -------------------------------------------------------------------------------- /prism/tests/unit/test_tasks/dec_no_prism_task.py: -------------------------------------------------------------------------------- 1 | def task_function(): 2 | return "hi" 3 | -------------------------------------------------------------------------------- /prism/tests/unit/test_tasks/dec_one_prism_task.py: -------------------------------------------------------------------------------- 1 | from prism.decorators import task 2 | 3 | 4 | @task() 5 | def task_function(): 6 | return "hi" 7 | -------------------------------------------------------------------------------- /prism/tests/unit/test_tasks/dec_other_functions.py: -------------------------------------------------------------------------------- 1 | from prism.decorators import task 2 | 3 | 4 | def helper_function(): ... 5 | 6 | 7 | @task() 8 | def task_function(): 9 | return "hi" 10 | -------------------------------------------------------------------------------- /prism/tests/unit/test_tasks/dec_task_with_id.py: -------------------------------------------------------------------------------- 1 | from prism.decorators import task 2 | 3 | 4 | @task(task_id="dec_custom_task_id") 5 | def task_function(): 6 | return "hi" 7 | -------------------------------------------------------------------------------- /prism/tests/unit/test_tasks/dec_task_with_target.py: -------------------------------------------------------------------------------- 1 | from prism.decorators import task, target 2 | import prism.target 3 | from pathlib import Path 4 | from prism.runtime import CurrentRun 5 | 6 | 7 | @task(targets=[target(type=prism.target.Txt, loc=Path(__file__) / "test.txt")]) 8 | def task_with_target(): 9 | _ = CurrentRun.ref("hello.py") 10 | _ = CurrentRun.ref("world.py") 11 | return "hi" 12 | -------------------------------------------------------------------------------- /prism/tests/unit/test_tasks/dec_tasks_refs.py: -------------------------------------------------------------------------------- 1 | from prism.decorators import task, target 2 | import prism.target 3 | from pathlib import Path 4 | from prism.runtime import CurrentRun 5 | 6 | 7 | @task(targets=[target(type=prism.target.Txt, loc=Path(__file__) / "test.txt")]) 8 | def task_with_refs(): 9 | _ = CurrentRun.ref("hello") 10 | _ = CurrentRun.ref("world") 11 | _ = CurrentRun.ref("func_0") 12 | _ = CurrentRun.ref("func_1") 13 | return "hi" 14 | -------------------------------------------------------------------------------- /prism/tests/unit/test_tasks/example_tasks_dir/func_0.py: -------------------------------------------------------------------------------- 1 | from prism.task import PrismTask 2 | from prism.runtime import CurrentRun 3 | 4 | 5 | class Func0(PrismTask): 6 | task_id = "func0" 7 | 8 | def run(self): 9 | CurrentRun.ref("hello") 10 | CurrentRun.ref("world") 11 | return "world" 12 | -------------------------------------------------------------------------------- /prism/tests/unit/test_tasks/example_tasks_dir/func_1.py: -------------------------------------------------------------------------------- 1 | from prism.task import PrismTask 2 | from prism.runtime import CurrentRun 3 | 4 | 5 | class Func1(PrismTask): 6 | task_id = "func1" 7 | 8 | def run(self): 9 | CurrentRun.ref("func0") 10 | return "world" 11 | -------------------------------------------------------------------------------- /prism/tests/unit/test_tasks/example_tasks_dir/hello.py: -------------------------------------------------------------------------------- 1 | from prism.task import PrismTask 2 | 3 | 4 | class Hello(PrismTask): 5 | task_id = "hello" 6 | 7 | def run(self): 8 | return "world" 9 | -------------------------------------------------------------------------------- /prism/tests/unit/test_tasks/example_tasks_dir/nested/foo.py: -------------------------------------------------------------------------------- 1 | from prism.task import PrismTask 2 | 3 | 4 | class Foo(PrismTask): 5 | def run(self): 6 | return "world" 7 | -------------------------------------------------------------------------------- /prism/tests/unit/test_tasks/example_tasks_dir/world.py: -------------------------------------------------------------------------------- 1 | from prism.task import PrismTask 2 | from prism.runtime import CurrentRun 3 | 4 | 5 | class World(PrismTask): 6 | task_id = "world" 7 | 8 | def run(self): 9 | CurrentRun.ref("hello") 10 | return "world" 11 | -------------------------------------------------------------------------------- /prism/utils.py: -------------------------------------------------------------------------------- 1 | from typing import ( 2 | Any, 3 | Callable, 4 | List, 5 | Optional, 6 | Union, 7 | ) 8 | import importlib 9 | from functools import wraps 10 | 11 | 12 | # Util functions 13 | def requires_dependencies( 14 | dependencies: Union[str, List[str]], 15 | extras: Optional[str] = None, 16 | ): 17 | """ 18 | Wrapper used to prompt the user to `pip install` a package and/or Prism extracts in 19 | order to run a function. Borrowed heavily from the `unstructured` library: 20 | https://github.com/Unstructured-IO/unstructured/blob/main/unstructured/utils.py 21 | 22 | args: 23 | dependencies: required dependencies 24 | extracts: list of Prism extras that the user can `pip install` 25 | """ 26 | if isinstance(dependencies, str): 27 | dependencies = [dependencies] 28 | 29 | def decorator(func: Callable[..., Any]) -> Callable[..., Any]: 30 | @wraps(func) 31 | def wrapper(*args, **kwargs): 32 | missing_deps: List[str] = [] 33 | for dep in dependencies: 34 | if not dependency_exists(dep): 35 | missing_deps.append(dep) 36 | if len(missing_deps) > 0: 37 | raise ImportError( 38 | f"""Following dependencies are missing: {', '.join(["`" + dep + "`" for dep in missing_deps])}. """ # noqa 39 | + ( # noqa 40 | f"""Please install them using `pip install "prism-ds[{extras}]"`.""" # noqa 41 | if extras 42 | else f"Please install them using `pip install {' '.join(missing_deps)}`." # noqa 43 | ), 44 | ) 45 | return func(*args, **kwargs) 46 | 47 | return wrapper 48 | 49 | return decorator 50 | 51 | 52 | def dependency_exists(dependency: str): 53 | try: 54 | importlib.import_module(dependency) 55 | except ImportError as e: 56 | # Check to make sure this isn't some unrelated import error. 57 | pkg = dependency.split(".")[0] 58 | if pkg in repr(e): 59 | return False 60 | return True 61 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["setuptools>=41.1.0", "wheel"] 3 | build-backend = "setuptools.build_meta" 4 | 5 | [tool.mypy] 6 | mypy_path = "src" 7 | check_untyped_defs = true 8 | disallow_any_generics = true 9 | ignore_missing_imports = true 10 | no_implicit_optional = true 11 | show_error_codes = true 12 | strict_equality = true 13 | warn_redundant_casts = true 14 | warn_return_any = true 15 | warn_unreachable = true 16 | warn_unused_configs = true 17 | no_implicit_reexport = true 18 | 19 | [tool.bumpver] 20 | current_version = "0.3.0" 21 | version_pattern = "MAJOR.MINOR.PATCH" 22 | commit_message = "bump version {old_version} -> {new_version}" 23 | commit = true 24 | tag = true 25 | push = true 26 | 27 | [tool.bumpver.file_patterns] 28 | "pyproject.toml" = [ 29 | 'current_version = "{version}"', 30 | ] 31 | "prism/constants.py" = [ 32 | "VERSION = '{version}'" 33 | ] 34 | 35 | [tool.ruff] 36 | exclude = [ 37 | ".bzr", 38 | ".direnv", 39 | ".eggs", 40 | ".git", 41 | ".git-rewrite", 42 | ".hg", 43 | ".ipynb_checkpoints", 44 | ".mypy_cache", 45 | ".nox", 46 | ".pants.d", 47 | ".pyenv", 48 | ".pytest_cache", 49 | ".pytype", 50 | ".ruff_cache", 51 | ".svn", 52 | ".tox", 53 | ".vscode", 54 | "__pypackages__", 55 | "_build", 56 | "buck-out", 57 | "build", 58 | "dist", 59 | "site-packages", 60 | ] 61 | line-length = 88 62 | indent-width = 4 63 | lint.fixable = ["ALL"] 64 | lint.unfixable = [] 65 | 66 | [tool.ruff.format] 67 | quote-style = "double" 68 | indent-style = "space" 69 | skip-magic-trailing-comma = false 70 | line-ending = "auto" 71 | docstring-code-format = true 72 | docstring-code-line-length = "dynamic" 73 | 74 | 75 | [tool.black] 76 | line-length = 88 -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [metadata] 2 | name = prism-ds 3 | description = The easiest way to create data pipelines in Python. 4 | long_description_content_type = text/markdown 5 | long_description = file: README.md 6 | version = 0.3.0 7 | author = prism founders 8 | author_email = hello@runprism.com 9 | license = Apache-2.0 10 | license_files = LICENSE 11 | platforms = unix, linux, osx, win32 12 | classifiers = 13 | Programming Language :: Python :: 3 14 | Programming Language :: Python :: 3.8 15 | Programming Language :: Python :: 3.9 16 | Programming Language :: Python :: 3.10 17 | Programming Language :: Python :: 3.11 18 | project_urls = 19 | homepage = https://www.runprism.com 20 | documentation = https://docs.runprism.com 21 | repository = https://github.com/runprism/prism 22 | 23 | [options] 24 | packages=find_namespace: 25 | include_package_data=True 26 | install_requires = 27 | astor>=0.7 28 | boto3>=1 29 | botocore>=1 30 | click>=8 31 | networkx>=2 32 | numpy>=1 33 | pandas>=1 34 | PyYAML>=6 35 | requests>=2 36 | Jinja2==3.1.2 37 | MarkupSafe>=2.0 38 | coolname>=2.2 39 | shortuuid>=1.0 40 | rich_click>=1.6.1 41 | Pillow>=9.5.0 42 | sqlalchemy>=2.0.27 43 | watchdog>=4.0.0 44 | python_requires = >=3.7 45 | zip_safe = no 46 | 47 | [options.extras_require] 48 | snowflake = 49 | snowflake-connector-python>=2 50 | pyarrow<10.1.0,>=10.0.1 51 | bigquery = 52 | google-api-python-client>=2 53 | google-auth>=2 54 | google-cloud-bigquery>=2 55 | db-dtypes>=1 56 | redshift = 57 | psycopg2-binary>=2.9 58 | postgres = 59 | psycopg2-binary>=2.9 60 | trino = 61 | trino>=0.319 62 | presto = 63 | presto-python-client>=0.8 64 | pyspark = 65 | pyspark>=3 66 | dev = 67 | dbt-snowflake>=1,<=1.7.5 68 | pytest>=7 69 | fastparquet>=0.8,<1 70 | tox>=3.24 71 | mypy>=1.9.0 72 | tomli>=2.0.1 73 | typed-ast>=1.5.5 74 | types-PyYAML>=6.0.12.20240311 75 | ruff>=0.3.3 76 | types-networkx>=3.2.1.20240313 77 | black>=24.3.0 78 | pre-commit>=3.5.0 79 | 80 | 81 | [options.entry_points] 82 | console_scripts = 83 | prism = prism.main:cli 84 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | 3 | 4 | if __name__ == "__main__": 5 | setup() 6 | -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- 1 | [tox] 2 | minversion = 3.8.0 3 | envlist = py37, py38, py39, py310 4 | isolated_build = true 5 | 6 | [gh-actions] 7 | python = 8 | 3.8: py38 9 | 3.9: py39 10 | 3.10: py310 11 | 3.11: py311 12 | 13 | [testenv] 14 | setenv = 15 | PYTHONPATH = {toxinidir} 16 | passenv = 17 | SHELL 18 | SNOWFLAKE_ACCOUNT 19 | SNOWFLAKE_DATABASE 20 | SNOWFLAKE_PASSWORD 21 | SNOWFLAKE_ROLE 22 | SNOWFLAKE_SCHEMA 23 | SNOWFLAKE_USER 24 | SNOWFLAKE_WAREHOUSE 25 | POSTGRES_USER 26 | POSTGRES_PASSWORD 27 | POSTGRES_DB 28 | POSTGRES_HOST 29 | GOOGLE_APPLICATION_CREDENTIALS 30 | deps = 31 | -r{toxinidir}/dev_requirements.txt 32 | commands = 33 | pytest 34 | --------------------------------------------------------------------------------