├── .github
├── ISSUE_TEMPLATE
│ ├── bug_report.md
│ └── feature_request.md
├── Logo.png
├── Symbol.png
├── prism_logo_dark.png
├── prism_logo_light.png
└── workflows
│ ├── ci-linux.yml
│ ├── ci-macos.yml
│ ├── imports-linux.yml
│ ├── imports-macosx.yml
│ ├── imports-windows.yml
│ ├── python-publish.yml
│ └── style.yml
├── .gitignore
├── .pre-commit-config.yaml
├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
├── LICENSE
├── MANIFEST.in
├── README.md
├── dev_requirements.txt
├── prism
├── __init__.py
├── admin.py
├── callbacks
│ ├── __init__.py
│ └── callback.py
├── cli
│ ├── __init__.py
│ └── init.py
├── client
│ ├── __init__.py
│ ├── client.py
│ ├── parser.py
│ ├── runner.py
│ └── visualizer.py
├── connectors
│ ├── __init__.py
│ ├── base.py
│ ├── bigquery.py
│ ├── postgres.py
│ ├── presto.py
│ ├── redshift.py
│ ├── snowflake.py
│ └── trino.py
├── constants.py
├── db
│ ├── __init__.py
│ ├── factory.py
│ ├── mixins.py
│ └── setup.py
├── decorators
│ ├── __init__.py
│ ├── target.py
│ └── task.py
├── docs
│ ├── __init__.py
│ └── build
│ │ ├── 311ea03002abadcdcaba.png
│ │ ├── __init__.py
│ │ ├── ae8a93980ebb6c55123b.ico
│ │ ├── ce188596011a8fa32931.png
│ │ ├── d4df11de40d39920ff8c.svg
│ │ ├── index.html
│ │ ├── index2.html
│ │ └── main.js.LICENSE.txt
├── engine
│ ├── __init__.py
│ ├── compiled_task.py
│ ├── compiler.py
│ ├── executor.py
│ ├── manifest.py
│ └── module.py
├── exceptions.py
├── logging
│ ├── __init__.py
│ ├── events.py
│ ├── execution.py
│ └── loggers.py
├── main.py
├── runtime
│ ├── __init__.py
│ └── current_run.py
├── target.py
├── task.py
├── templates
│ ├── __init__.py
│ └── starter_project
│ │ ├── .gitignore
│ │ ├── __init__.py
│ │ ├── main.py
│ │ ├── output
│ │ └── .exists
│ │ └── tasks
│ │ ├── class_task.py
│ │ └── decorated_task.py
├── tests
│ ├── __init__.py
│ ├── integration
│ │ ├── __init__.py
│ │ ├── additional_package
│ │ │ ├── __init__.py
│ │ │ ├── cli_callbacks.py
│ │ │ ├── cli_connectors.py
│ │ │ └── utils.py
│ │ ├── integration_utils.py
│ │ ├── test_cli.py
│ │ ├── test_projects
│ │ │ ├── 001_init
│ │ │ │ ├── .gitignore
│ │ │ │ ├── __init__.py
│ │ │ │ ├── main.py
│ │ │ │ ├── output
│ │ │ │ │ └── .exists
│ │ │ │ └── tasks
│ │ │ │ │ ├── class_task.py
│ │ │ │ │ └── decorated_task.py
│ │ │ ├── 004_simple_project
│ │ │ │ ├── __init__.py
│ │ │ │ ├── dev
│ │ │ │ │ └── dev.ipynb
│ │ │ │ ├── modules
│ │ │ │ │ ├── module01.py
│ │ │ │ │ ├── module02.py
│ │ │ │ │ └── module03.py
│ │ │ │ └── output
│ │ │ │ │ └── .exists
│ │ │ ├── 005_simple_project_no_null_tasks
│ │ │ │ ├── __init__.py
│ │ │ │ ├── output
│ │ │ │ │ ├── task01.txt
│ │ │ │ │ └── task02.txt
│ │ │ │ └── tasks
│ │ │ │ │ ├── module01.py
│ │ │ │ │ ├── module02.py
│ │ │ │ │ ├── module03.py
│ │ │ │ │ └── module04.py
│ │ │ ├── 010_project_nested_module_dirs
│ │ │ │ ├── __init__.py
│ │ │ │ ├── dev
│ │ │ │ │ └── dev.ipynb
│ │ │ │ ├── output
│ │ │ │ │ ├── task01.txt
│ │ │ │ │ └── task02.txt
│ │ │ │ ├── prism_project.py
│ │ │ │ └── tasks
│ │ │ │ │ ├── extract
│ │ │ │ │ ├── module01.py
│ │ │ │ │ └── module02.py
│ │ │ │ │ ├── load
│ │ │ │ │ └── module03.py
│ │ │ │ │ └── module04.py
│ │ │ ├── 011_bad_task_ref
│ │ │ │ ├── __init__.py
│ │ │ │ ├── dev
│ │ │ │ │ └── dev.ipynb
│ │ │ │ ├── modules
│ │ │ │ │ ├── extract
│ │ │ │ │ │ ├── module01.py
│ │ │ │ │ │ └── module02.py
│ │ │ │ │ ├── load
│ │ │ │ │ │ └── module03.py
│ │ │ │ │ └── module04.py
│ │ │ │ ├── output
│ │ │ │ │ ├── module01.txt
│ │ │ │ │ └── module02.txt
│ │ │ │ └── prism_project.py
│ │ │ ├── 012_concurrency
│ │ │ │ ├── __init__.py
│ │ │ │ ├── dev
│ │ │ │ │ └── dev.ipynb
│ │ │ │ ├── modules
│ │ │ │ │ ├── module01.py
│ │ │ │ │ ├── module02.py
│ │ │ │ │ ├── module03.py
│ │ │ │ │ └── module04.py
│ │ │ │ ├── output
│ │ │ │ │ └── .exists
│ │ │ │ └── prism_project.py
│ │ │ ├── 013_connectors
│ │ │ │ ├── __init__.py
│ │ │ │ ├── output
│ │ │ │ │ └── .exists
│ │ │ │ └── tasks
│ │ │ │ │ ├── bad_adapter.py
│ │ │ │ │ ├── postgres_task.py
│ │ │ │ │ ├── snowflake_task.py
│ │ │ │ │ └── spark_task.py
│ │ │ ├── 014_project_with_package_lookup
│ │ │ │ ├── __init__.py
│ │ │ │ ├── output
│ │ │ │ │ └── .exists
│ │ │ │ └── tasks
│ │ │ │ │ └── module01.py
│ │ │ ├── 020_dec_retries
│ │ │ │ ├── __init__.py
│ │ │ │ ├── output
│ │ │ │ │ └── .exists
│ │ │ │ ├── prism_project.py
│ │ │ │ ├── tasks
│ │ │ │ │ ├── extract.py
│ │ │ │ │ └── load.py
│ │ │ │ └── triggers.yml
│ │ │ ├── 023_skipped_task
│ │ │ │ ├── __init__.py
│ │ │ │ ├── callback.txt
│ │ │ │ ├── output
│ │ │ │ │ ├── .exists
│ │ │ │ │ ├── task01.txt
│ │ │ │ │ └── task02.txt
│ │ │ │ ├── prism_project.py
│ │ │ │ └── tasks
│ │ │ │ │ ├── task01.py
│ │ │ │ │ └── task02.py
│ │ │ └── common
│ │ │ │ ├── __init__.py
│ │ │ │ └── functions.py
│ │ ├── test_run.py
│ │ └── test_visualizer.py
│ └── unit
│ │ ├── __init__.py
│ │ ├── test_callbacks.py
│ │ ├── test_compiled_projects
│ │ ├── __init__.py
│ │ ├── dag_cycle
│ │ │ ├── __init__.py
│ │ │ ├── moduleA.py
│ │ │ ├── moduleB.py
│ │ │ ├── moduleC.py
│ │ │ ├── moduleD.py
│ │ │ └── moduleE.py
│ │ ├── task_ref_15nodes
│ │ │ ├── __init__.py
│ │ │ ├── task01.py
│ │ │ ├── task02.py
│ │ │ ├── task03.py
│ │ │ ├── task04.py
│ │ │ ├── task05.py
│ │ │ ├── task06.py
│ │ │ ├── task07.py
│ │ │ ├── task08.py
│ │ │ ├── task09.py
│ │ │ ├── task10.py
│ │ │ ├── task11.py
│ │ │ ├── task12.py
│ │ │ ├── task13.py
│ │ │ ├── task14.py
│ │ │ └── task15.py
│ │ ├── task_ref_3nodes
│ │ │ ├── __init__.py
│ │ │ ├── task01.py
│ │ │ ├── task02.py
│ │ │ └── task03.py
│ │ ├── task_ref_5nodes
│ │ │ ├── __init__.py
│ │ │ ├── taskA.py
│ │ │ ├── taskB.py
│ │ │ ├── taskC.py
│ │ │ ├── taskD.py
│ │ │ └── taskE.py
│ │ ├── task_ref_norefs
│ │ │ ├── __init__.py
│ │ │ ├── moduleA.py
│ │ │ ├── moduleB.py
│ │ │ ├── moduleC.py
│ │ │ ├── moduleD.py
│ │ │ └── moduleE.py
│ │ └── task_ref_selfref
│ │ │ ├── __init__.py
│ │ │ ├── moduleA.py
│ │ │ ├── moduleB.py
│ │ │ ├── moduleC.py
│ │ │ ├── moduleD.py
│ │ │ └── moduleE.py
│ │ ├── test_compiler.py
│ │ ├── test_import.py
│ │ ├── test_module.py
│ │ ├── test_project_parser.py
│ │ └── test_tasks
│ │ ├── cls_bad_run_extra_arg.py
│ │ ├── cls_diff_import_structure.py
│ │ ├── cls_multiple_prism_tasks.py
│ │ ├── cls_no_prism_task.py
│ │ ├── cls_no_run_func.py
│ │ ├── cls_one_prism_task.py
│ │ ├── cls_other_classes.py
│ │ ├── cls_task_with_id.py
│ │ ├── cls_task_with_target.py
│ │ ├── cls_tasks_refs.py
│ │ ├── dec_bad_dec_no_parentheses.py
│ │ ├── dec_bad_run_extra_arg.py
│ │ ├── dec_diff_decorator_structure.py
│ │ ├── dec_multiple_prism_tasks.py
│ │ ├── dec_no_prism_task.py
│ │ ├── dec_one_prism_task.py
│ │ ├── dec_other_functions.py
│ │ ├── dec_task_with_id.py
│ │ ├── dec_task_with_target.py
│ │ ├── dec_tasks_refs.py
│ │ └── example_tasks_dir
│ │ ├── func_0.py
│ │ ├── func_1.py
│ │ ├── hello.py
│ │ ├── nested
│ │ └── foo.py
│ │ └── world.py
└── utils.py
├── pyproject.toml
├── setup.cfg
├── setup.py
└── tox.ini
/.github/ISSUE_TEMPLATE/bug_report.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: Bug report
3 | about: Create a report to help us improve
4 | title: ''
5 | labels: ''
6 | assignees: ''
7 |
8 | ---
9 |
10 | **Describe the bug**
11 | A clear and concise description of what the bug is.
12 |
13 | **Project structure**
14 | Create a skeleton below with your project structure.
15 | ```
16 | project
17 | |-- prism_project.py
18 | |-- tasks
19 | |-- task01.py
20 | ...
21 | ...
22 | ```
23 |
24 | **CLI Arguments**
25 | Copy and paste the CLI command used to produce the error (e.g., ```$ prism run```)
26 |
27 | **Traceback**
28 | Run your command using the ```--full-tb``` option and paste a screenshot of the resulting error message.
29 |
30 | **Expected behavior**
31 | A clear and concise description of what you expected to happen.
32 |
33 | **Desktop (please complete the following information):**
34 | - OS: [e.g., iOS]
35 | - Python version: [e.g., 3.7]
36 |
37 | **Additional context**
38 | Add any other context about the problem here.
39 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature_request.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: Feature request
3 | about: Suggest an idea for this project
4 | title: ''
5 | labels: ''
6 | assignees: ''
7 |
8 | ---
9 |
10 | **Is your feature request related to a problem? Please describe.**
11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
12 |
13 | **Describe the solution you'd like**
14 | A clear and concise description of what you want to happen.
15 |
16 | **Describe alternatives you've considered**
17 | A clear and concise description of any alternative solutions or features you've considered.
18 |
19 | **Additional context**
20 | Add any other context or screenshots about the feature request here.
21 |
--------------------------------------------------------------------------------
/.github/Logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/runprism/prism/a6a762a1ce44a696b1dd6be793726974b9c80c82/.github/Logo.png
--------------------------------------------------------------------------------
/.github/Symbol.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/runprism/prism/a6a762a1ce44a696b1dd6be793726974b9c80c82/.github/Symbol.png
--------------------------------------------------------------------------------
/.github/prism_logo_dark.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/runprism/prism/a6a762a1ce44a696b1dd6be793726974b9c80c82/.github/prism_logo_dark.png
--------------------------------------------------------------------------------
/.github/prism_logo_light.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/runprism/prism/a6a762a1ce44a696b1dd6be793726974b9c80c82/.github/prism_logo_light.png
--------------------------------------------------------------------------------
/.github/workflows/ci-linux.yml:
--------------------------------------------------------------------------------
1 | name: CI Linux
2 |
3 | on:
4 | push:
5 | branches:
6 | - main
7 | paths:
8 | - 'prism/**'
9 | pull_request:
10 | branches:
11 | - main
12 | paths:
13 | - 'prism/**'
14 |
15 | jobs:
16 | test:
17 | runs-on: ubuntu-latest
18 | strategy:
19 | matrix:
20 | python-version: ['3.8', '3.9', '3.10', '3.11']
21 | steps:
22 | - uses: actions/checkout@v2
23 | - name: Set up Python ${{ matrix.python-version }}
24 | uses: actions/setup-python@v2
25 | with:
26 | python-version: ${{ matrix.python-version }}
27 | - name: Install dependencies
28 | run: |
29 | python -m pip install --upgrade pip
30 | pip install tox tox-gh-actions
31 | - name: Test with tox
32 | env:
33 | SHELL: ${{ secrets.SHELL }}
34 | SNOWFLAKE_ACCOUNT: ${{ secrets.SNOWFLAKE_ACCOUNT }}
35 | SNOWFLAKE_DATABASE: ${{ secrets.SNOWFLAKE_DATABASE }}
36 | SNOWFLAKE_PASSWORD: ${{ secrets.SNOWFLAKE_PASSWORD }}
37 | SNOWFLAKE_ROLE: ${{ secrets.SNOWFLAKE_ROLE }}
38 | SNOWFLAKE_SCHEMA: ${{ secrets.SNOWFLAKE_SCHEMA }}
39 | SNOWFLAKE_USER: ${{ secrets.SNOWFLAKE_USER }}
40 | SNOWFLAKE_WAREHOUSE: ${{ secrets.SNOWFLAKE_WAREHOUSE }}
41 | POSTGRES_USER: ${{ secrets.POSTGRES_USER }}
42 | POSTGRES_PASSWORD: ${{ secrets.POSTGRES_PASSWORD }}
43 | POSTGRES_DB: ${{ secrets.POSTGRES_DB }}
44 | POSTGRES_HOST: ${{ secrets.POSTGRES_HOST }}
45 | GOOGLE_APPLICATION_CREDENTIALS: ${{ secrets.GOOGLE_APPLICATION_CREDENTIALS }}
46 | run: tox
47 |
--------------------------------------------------------------------------------
/.github/workflows/ci-macos.yml:
--------------------------------------------------------------------------------
1 | name: CI MacOS
2 |
3 | on:
4 | push:
5 | branches:
6 | - main
7 | paths:
8 | - 'prism/**'
9 | pull_request:
10 | branches:
11 | - main
12 | paths:
13 | - 'prism/**'
14 |
15 | jobs:
16 | test:
17 | runs-on: macos-latest
18 | strategy:
19 | matrix:
20 | python-version: ['3.8', '3.9', '3.10', '3.11']
21 | steps:
22 | - uses: actions/checkout@v2
23 | - name: Set up Python ${{ matrix.python-version }}
24 | uses: actions/setup-python@v2
25 | with:
26 | python-version: ${{ matrix.python-version }}
27 | - name: Install dependencies
28 | run: |
29 | python -m pip install --upgrade pip
30 | pip install tox tox-gh-actions
31 | - name: Test with tox
32 | env:
33 | SHELL: ${{ secrets.SHELL }}
34 | SNOWFLAKE_ACCOUNT: ${{ secrets.SNOWFLAKE_ACCOUNT }}
35 | SNOWFLAKE_DATABASE: ${{ secrets.SNOWFLAKE_DATABASE }}
36 | SNOWFLAKE_PASSWORD: ${{ secrets.SNOWFLAKE_PASSWORD }}
37 | SNOWFLAKE_ROLE: ${{ secrets.SNOWFLAKE_ROLE }}
38 | SNOWFLAKE_SCHEMA: ${{ secrets.SNOWFLAKE_SCHEMA }}
39 | SNOWFLAKE_USER: ${{ secrets.SNOWFLAKE_USER }}
40 | SNOWFLAKE_WAREHOUSE: ${{ secrets.SNOWFLAKE_WAREHOUSE }}
41 | POSTGRES_USER: ${{ secrets.POSTGRES_USER }}
42 | POSTGRES_PASSWORD: ${{ secrets.POSTGRES_PASSWORD }}
43 | POSTGRES_DB: ${{ secrets.POSTGRES_DB }}
44 | POSTGRES_HOST: ${{ secrets.POSTGRES_HOST }}
45 | GOOGLE_APPLICATION_CREDENTIALS: ${{ secrets.GOOGLE_APPLICATION_CREDENTIALS }}
46 | run: tox
47 |
--------------------------------------------------------------------------------
/.github/workflows/imports-linux.yml:
--------------------------------------------------------------------------------
1 | name: Imports (Linux)
2 |
3 | on:
4 | push:
5 | branches:
6 | - main
7 | pull_request:
8 | branches:
9 | - main
10 |
11 | jobs:
12 | test:
13 | runs-on: ubuntu-latest
14 | strategy:
15 | matrix:
16 | python-version: ['3.9']
17 | steps:
18 | - uses: actions/checkout@v2
19 | - name: Set up Python ${{ matrix.python-version }}
20 | uses: actions/setup-python@v2
21 | with:
22 | python-version: ${{ matrix.python-version }}
23 | - name: Test imports
24 | run:
25 | python -m pip install --upgrade pip
26 | pip install .
27 | pip install .[snowflake]
28 | pip install .[bigquery]
29 | pip install .[redshift]
30 | pip install .[postgres]
31 | pip install .[trino]
32 | pip install .[presto]
33 | pip install .[pyspark]
34 | pip install .[dbt]
35 | pip install .[docker]
36 |
37 |
38 | # EOF
--------------------------------------------------------------------------------
/.github/workflows/imports-macosx.yml:
--------------------------------------------------------------------------------
1 | name: Imports (MacOS)
2 |
3 | on:
4 | push:
5 | branches:
6 | - main
7 | pull_request:
8 | branches:
9 | - main
10 |
11 | jobs:
12 | test:
13 | runs-on: macos-latest
14 | strategy:
15 | matrix:
16 | python-version: ['3.9']
17 | steps:
18 | - uses: actions/checkout@v2
19 | - name: Set up Python ${{ matrix.python-version }}
20 | uses: actions/setup-python@v2
21 | with:
22 | python-version: ${{ matrix.python-version }}
23 | - name: Test imports
24 | run:
25 | python -m pip install --upgrade pip
26 | pip install .
27 | pip install .[snowflake]
28 | pip install .[bigquery]
29 | pip install .[redshift]
30 | pip install .[postgres]
31 | pip install .[trino]
32 | pip install .[presto]
33 | pip install .[pyspark]
34 | pip install .[dbt]
35 | pip install .[docker]
36 |
37 |
38 | # EOF
--------------------------------------------------------------------------------
/.github/workflows/imports-windows.yml:
--------------------------------------------------------------------------------
1 | name: Imports (Windows)
2 |
3 | on:
4 | push:
5 | branches:
6 | - main
7 | pull_request:
8 | branches:
9 | - main
10 |
11 | jobs:
12 | test:
13 | runs-on: windows-latest
14 | strategy:
15 | matrix:
16 | python-version: ['3.9']
17 | steps:
18 | - uses: actions/checkout@v2
19 | - name: Set up Python ${{ matrix.python-version }}
20 | uses: actions/setup-python@v2
21 | with:
22 | python-version: ${{ matrix.python-version }}
23 | - name: Test imports
24 | run:
25 | python -m pip install --upgrade pip
26 | pip install .
27 | pip install .[snowflake]
28 | pip install .[bigquery]
29 | pip install .[redshift]
30 | pip install .[postgres]
31 | pip install .[trino]
32 | pip install .[presto]
33 | pip install .[pyspark]
34 | pip install .[dbt]
35 | pip install .[docker]
36 |
37 |
38 | # EOF
--------------------------------------------------------------------------------
/.github/workflows/python-publish.yml:
--------------------------------------------------------------------------------
1 | name: Upload Python Package
2 |
3 | on:
4 | push:
5 | tags:
6 | v*
7 |
8 | permissions:
9 | contents: read
10 |
11 | jobs:
12 | deploy:
13 | runs-on: ubuntu-latest
14 | steps:
15 | - uses: actions/checkout@v3
16 | - name: Set up Python
17 | uses: actions/setup-python@v3
18 | with:
19 | python-version: '3.x'
20 | - name: Install dependencies
21 | run: |
22 | python -m pip install --upgrade pip
23 | pip install build
24 | - name: Build package
25 | run: python -m build
26 | - name: Publish package
27 | uses: pypa/gh-action-pypi-publish@27b31702a0e7fc50959f5ad993c78deac1bdfc29
28 | with:
29 | user: __token__
30 | password: ${{ secrets.PYPI_API_TOKEN }}
31 |
--------------------------------------------------------------------------------
/.github/workflows/style.yml:
--------------------------------------------------------------------------------
1 | name: mypy and ruff
2 |
3 | on:
4 | - push
5 | - pull_request
6 |
7 | jobs:
8 | test:
9 | runs-on: ubuntu-latest
10 | steps:
11 | - uses: actions/checkout@v2
12 | - name: Set up Python 3.10
13 | uses: actions/setup-python@v2
14 | with:
15 | python-version: '3.10'
16 | - name: Install dependencies
17 | run: |
18 | python -m pip install --upgrade pip
19 | pip install -e .[dev]
20 | - name: mypy
21 | run: |
22 | mypy prism/callbacks/ prism/cli/ prism/client/ prism/connectors/ prism/db/ prism/decorators/ prism/engine/ prism/logging/ prism/runtime/ prism/exceptions.py prism/main.py prism/target.py prism/task.py prism/utils.py
23 | ruff check prism/callbacks/ prism/cli/ prism/client/ prism/connectors/ prism/db/ prism/decorators/ prism/engine/ prism/logging/ prism/runtime/ prism/exceptions.py prism/main.py prism/target.py prism/task.py prism/utils.py
24 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | **/.py[cod]
3 | **/__pycache__
4 |
5 | # OSX files
6 | **/.DS_Store
7 |
8 | # DBT artifacts
9 | **/.msgpack
10 | **/.user.yml
11 |
12 | # VSCode settings
13 | **/.vscode
14 |
15 | # Distribution / packaging
16 | *.egg-info/
17 | pip-wheel-metadata/
18 | dist/
19 |
20 | # Testing
21 | .mypy_cache
22 | .pytest_cache
23 | .ruff_cache
24 |
--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
1 | repos:
2 | - repo: https://github.com/pre-commit/pre-commit-hooks
3 | rev: v4.1.0
4 | hooks:
5 | - id: check-added-large-files
6 | - id: check-merge-conflict
7 | - id: detect-private-key
8 | - id: end-of-file-fixer
9 | - id: trailing-whitespace
10 |
11 | - repo: https://github.com/astral-sh/ruff-pre-commit
12 | rev: v0.1.14
13 | hooks:
14 | - id: ruff
15 | args: ["--fix"]
16 | - id: ruff-format
17 |
18 | - repo: https://github.com/PyCQA/isort
19 | rev: 5.11.5
20 | hooks:
21 | - id: isort
22 | args: [--profile=black]
23 |
--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
1 | # Contributor Code of Conduct
2 |
3 | ## Our Pledge
4 |
5 | We as members, contributors, and leaders pledge to make participation in our
6 | community a harassment-free experience for everyone, regardless of age, body
7 | size, visible or invisible disability, ethnicity, sex characteristics, gender
8 | identity and expression, level of experience, education, socio-economic status,
9 | nationality, personal appearance, race, caste, color, religion, or sexual
10 | identity and orientation.
11 |
12 | We pledge to act and interact in ways that contribute to an open, welcoming,
13 | diverse, inclusive, and healthy community.
14 |
15 | ## Our Standards
16 |
17 | Examples of behavior that contributes to a positive environment for our
18 | community include:
19 |
20 | * Demonstrating empathy and kindness toward other people
21 | * Being respectful of differing opinions, viewpoints, and experiences
22 | * Giving and gracefully accepting constructive feedback
23 | * Accepting responsibility and apologizing to those affected by our mistakes,
24 | and learning from the experience
25 | * Focusing on what is best not just for us as individuals, but for the overall
26 | community
27 |
28 | Examples of unacceptable behavior include:
29 |
30 | * The use of sexualized language or imagery, and sexual attention or advances of
31 | any kind
32 | * Trolling, insulting or derogatory comments, and personal or political attacks
33 | * Public or private harassment
34 | * Publishing others' private information, such as a physical or email address,
35 | without their explicit permission
36 | * Other conduct which could reasonably be considered inappropriate in a
37 | professional setting
38 |
39 | ## Enforcement Responsibilities
40 |
41 | Community leaders are responsible for clarifying and enforcing our standards of
42 | acceptable behavior and will take appropriate and fair corrective action in
43 | response to any behavior that they deem inappropriate, threatening, offensive,
44 | or harmful.
45 |
46 | Community leaders have the right and responsibility to remove, edit, or reject
47 | comments, commits, code, wiki edits, issues, and other contributions that are
48 | not aligned to this Code of Conduct, and will communicate reasons for moderation
49 | decisions when appropriate.
50 |
51 | ## Scope
52 |
53 | This Code of Conduct applies within all community spaces, and also applies when
54 | an individual is officially representing the community in public spaces.
55 | Examples of representing our community include using an official e-mail address,
56 | posting via an official social media account, or acting as an appointed
57 | representative at an online or offline event.
58 |
59 | ## Enforcement
60 |
61 | Instances of abusive, harassing, or otherwise unacceptable behavior may be
62 | reported to the community leaders responsible for enforcement at
63 | hello@runprism.com.
64 | All complaints will be reviewed and investigated promptly and fairly.
65 |
66 | All community leaders are obligated to respect the privacy and security of the
67 | reporter of any incident.
68 |
69 | ## Enforcement Guidelines
70 |
71 | Community leaders will follow these Community Impact Guidelines in determining
72 | the consequences for any action they deem in violation of this Code of Conduct:
73 |
74 | ### 1. Correction
75 |
76 | **Community Impact**: Use of inappropriate language or other behavior deemed
77 | unprofessional or unwelcome in the community.
78 |
79 | **Consequence**: A private, written warning from community leaders, providing
80 | clarity around the nature of the violation and an explanation of why the
81 | behavior was inappropriate. A public apology may be requested.
82 |
83 | ### 2. Warning
84 |
85 | **Community Impact**: A violation through a single incident or series of
86 | actions.
87 |
88 | **Consequence**: A warning with consequences for continued behavior. No
89 | interaction with the people involved, including unsolicited interaction with
90 | those enforcing the Code of Conduct, for a specified period of time. This
91 | includes avoiding interactions in community spaces as well as external channels
92 | like social media. Violating these terms may lead to a temporary or permanent
93 | ban.
94 |
95 | ### 3. Temporary Ban
96 |
97 | **Community Impact**: A serious violation of community standards, including
98 | sustained inappropriate behavior.
99 |
100 | **Consequence**: A temporary ban from any sort of interaction or public
101 | communication with the community for a specified period of time. No public or
102 | private interaction with the people involved, including unsolicited interaction
103 | with those enforcing the Code of Conduct, is allowed during this period.
104 | Violating these terms may lead to a permanent ban.
105 |
106 | ### 4. Permanent Ban
107 |
108 | **Community Impact**: Demonstrating a pattern of violation of community
109 | standards, including sustained inappropriate behavior, harassment of an
110 | individual, or aggression toward or disparagement of classes of individuals.
111 |
112 | **Consequence**: A permanent ban from any sort of public interaction within the
113 | community.
114 |
115 | ## Attribution
116 |
117 | This Code of Conduct is adapted from the [Contributor Covenant][homepage],
118 | version 2.1, available at
119 | [https://www.contributor-covenant.org/version/2/1/code_of_conduct.html][v2.1].
120 |
121 | Community Impact Guidelines were inspired by
122 | [Mozilla's code of conduct enforcement ladder][Mozilla CoC].
123 |
124 | For answers to common questions about this code of conduct, see the FAQ at
125 | [https://www.contributor-covenant.org/faq][FAQ]. Translations are available at
126 | [https://www.contributor-covenant.org/translations][translations].
127 |
128 | [homepage]: https://www.contributor-covenant.org
129 | [v2.1]: https://www.contributor-covenant.org/version/2/1/code_of_conduct.html
130 | [Mozilla CoC]: https://github.com/mozilla/diversity
131 | [FAQ]: https://www.contributor-covenant.org/faq
132 | [translations]: https://www.contributor-covenant.org/translations
--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 | # Contributing
2 |
3 | Thank you for considering contributing to Prism! We greatly appreciate your effort in making Prism a best-in-class product.
4 |
5 | ## How do I get started?
6 |
7 | Prism is currently in beta, so we are not accepting contributions at this time. Once we release our stable API, we will update the guidelines and begin accepting contributions!
8 |
9 | ## Raising an issue
10 |
11 | If you notice a bug, please raise an issue using the bug report template.
12 |
13 | ## Suggesting a new feature
14 |
15 | If you would like to suggest a new feature, please raise an issue using the feature request template.
--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | recursive-include prism/templates *.png *.svg *.exists *.ico *.txt *.ipynb *.py *.sql *.yml *.html *.md .gitkeep .gitignore
2 | recursive-include prism/docs *.png *.svg *.ico *.txt *.html
3 | recursive-include prism/agents *.sh
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 | [](https://github.com/runprism/prism/actions/workflows/ci-linux.yml)
17 | [](https://github.com/runprism/prism/actions/workflows/ci-macos.yml)
18 | [](http://mypy-lang.org/)
19 | [](https://flake8.pycqa.org/en/latest/)
20 |
21 |
22 |
23 |
24 | # :wave: Welcome to Prism!
25 | [Prism](https://www.runprism.com/) is the easiest way to create data pipelines in Python.
26 |
27 | ## Introduction
28 | Data projects often require multiple steps that need to be executed in sequence (think extract-transform-load, data wrangling, etc.). With Prism, users can break down their project into modular tasks, manage dependencies, and execute complex computations in sequence.
29 |
30 | Here are some of Prism's main features:
31 | - **Real-time dependency declaration**: With Prism, analysts can declare dependencies using a simple function call. No need to explicitly keep track of the pipeline order — at runtime, Prism automatically parses the function calls and builds the dependency graph.
32 | - **Intuitive logging**: Prism automatically logs events for parsing the configuration files, compiling the tasks and creating the DAG, and executing the tasks. No configuration is required.
33 | - **Flexible CLI**: Users can instantiate, compile, and run projects using a simple, but powerful command-line interface.
34 | - **“Batteries included”**: Prism comes with all the essentials needed to get up and running quickly. Users can create and run their first DAG in less than 2 minutes.
35 | - **Integrations**: Prism integrates with several tools that are popular in the data community, including Snowflake, Google BigQuery, Redshift, PySpark, and dbt. We're adding more integrations every day, so let us know what you'd like to see!
36 |
37 |
38 | ## Getting Started
39 |
40 | Prism can be installed via ```pip```. Prism requires Python >= 3.7.
41 |
42 | ```
43 | pip install --upgrade pip
44 | pip install prism-ds
45 | ```
46 |
47 | Start your first Prism project with the `prism init` command:
48 | ```
49 | $ prism init --project-name my_first_project
50 | ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
51 | Running with Prism v0.3.0...
52 | Creating template project at ./my_first_project...
53 |
54 | ______
55 | ____ __ \_____(_)________ _______
56 | _____ /_/ / ___/ / ___/ __ `__ \ ____
57 | ____ / ____/ / / (__ ) / / / / / _____
58 | ___/_/ /_/ /_/____/_/ /_/ /_/ ___
59 |
60 | Welcome to Prism, the easiest way to create clean, modular data pipelines
61 | using Python!
62 |
63 | To get started, navigate to your newly created project "my_first_project" and try
64 | running the following commands:
65 | > python main.py
66 | > prism run
67 | > prism graph
68 |
69 | Consult the documentation here for more information on how to get started.
70 | docs.runprism.com
71 |
72 | Happy building!
73 |
74 | Done!
75 | ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
76 | ```
77 |
78 | Run your project by navigating to your project directory and running `prism run`:
79 | ```
80 | $ cd my_first_project
81 | $ prism run
82 | ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
83 | [HH:MM:SS] INFO | Running with Prism v0.3.0...
84 | [HH:MM:SS] INFO | Creating run magnetic-pony-BBDYfwdDzH for client my_first_project-1.0...
85 | [HH:MM:SS] INFO | Found 2 task(s) in 2 module(s) in job magnetic-pony-BBDYfwdDzH...
86 |
87 | [HH:MM:SS] INFO | Parsing task dependencies............................................... [RUN]
88 | [HH:MM:SS] INFO | FINISHED parsing task dependencies...................................... [DONE in 0.01s]
89 |
90 | ────────────────────────────────────────────── Tasks ──────────────────────────────────────────────
91 | [HH:MM:SS] INFO | 1 of 2 RUNNING TASK example-decorated-task.............................. [RUN]
92 | [HH:MM:SS] INFO | 1 of 2 FINISHED TASK example-decorated-task............................. [DONE in 0.02s]
93 | [HH:MM:SS] INFO | 2 of 2 RUNNING TASK example-class-task.................................. [RUN]
94 | [HH:MM:SS] INFO | 2 of 2 FINISHED TASK example-class-task................................. [DONE in 0.02s]
95 |
96 | Done!
97 | ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
98 | ```
99 |
100 | ## Documentation
101 | To get started with Prism projects, check out our [documentation](https://docs.runprism.com). Some sections of interest include:
102 |
103 | - :key: [Fundamentals](https://docs.runprism.com/fundamentals)
104 | - :seedling: [CLI](https://docs.runprism.com/cli)
105 | - :electric_plug: [Integrations](https://docs.runprism.com/integrations)
106 | - :bulb: [Use Cases](https://docs.runprism.com/use-cases)
107 |
108 | In addition, check out some [example projects](https://github.com/runprism/prism_examples).
109 |
110 |
111 | ## Integrations
112 | Prism integrates with a wide variety of third-party developer tools There are two kinds of integrations that Prism supports: adapters, and agents.
113 |
114 | ### Adapters
115 | Adapters allow users to connect to data warehouses or analytics engines. Prism currently supports the following adapters:
116 | | Adapter | Command |
117 | | ------------ | ----------- |
118 | | **Google BigQuery** | ```pip install "prism-ds[bigquery]"``` |
119 | | **Postgres** | ```pip install "prism-ds[postgres]"``` |
120 | | **Presto** | ```pip install "prism-ds[presto]"``` |
121 | | **Redshift** | ```pip install "prism-ds[redshift]"``` |
122 | | **Snowflake** | ```pip install "prism-ds[snowflake]"``` |
123 | | **Trino** | ```pip install "prism-ds[trino]"``` |
124 |
125 |
126 | ## Product Roadmap
127 |
128 | We're always looking to improve our product. Here's what we're working on at the moment:
129 |
130 | - **Compatibility with Alto agents**: Docker containers, EC2 clusters, EMR clusters, Databricks clusters, and more!
131 | - **Additional adapters**: Celery, Dask, MySQL, Presto, and more!
132 | - **Cloud deployment**: Managed orchestration platform to deploy Prism projects in the cloud
133 |
134 | Let us know if you'd like to see another feature!
135 |
--------------------------------------------------------------------------------
/dev_requirements.txt:
--------------------------------------------------------------------------------
1 | -e .[snowflake]
2 | -e .[bigquery]
3 | -e .[redshift]
4 | -e .[postgres]
5 | -e .[presto]
6 | -e .[trino]
7 | -e .[pyspark]
8 | -e .[dev]
9 |
--------------------------------------------------------------------------------
/prism/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/runprism/prism/a6a762a1ce44a696b1dd6be793726974b9c80c82/prism/__init__.py
--------------------------------------------------------------------------------
/prism/admin.py:
--------------------------------------------------------------------------------
1 | """
2 | Functions for DAG run admin variables
3 | """
4 |
5 | # Imports
6 | import coolname
7 | import uuid
8 | import shortuuid
9 |
10 | # Words to ignore and exclude from slug
11 | IGNORE_LIST = {
12 | "sexy",
13 | "demonic",
14 | "kickass",
15 | "heretic",
16 | "godlike",
17 | "booby",
18 | "chubby",
19 | "gay",
20 | "sloppy",
21 | "funky",
22 | "juicy",
23 | "beaver",
24 | "curvy",
25 | "fat",
26 | "flashy",
27 | "flat",
28 | "thick",
29 | "nippy",
30 | }
31 |
32 |
33 | # Functions
34 | def generate_run_id() -> str:
35 | """
36 | Cryptographically secure run ID (using UUID)
37 | """
38 | return str(uuid.uuid4()).replace("-", "")
39 |
40 |
41 | def generate_run_slug() -> str:
42 | """
43 | Run slug concatenated with a short UUID. Not necessarily cryptographically secure.
44 | We offer this in addition to the run ID because it's more human-readable.
45 | """
46 | uuid_short = str(shortuuid.ShortUUID().random(length=10))
47 | slug = coolname.generate_slug(2)
48 |
49 | # Regenerate words if they include ignored words
50 | while IGNORE_LIST.intersection(slug.split("-")):
51 | slug = coolname.generate_slug(2)
52 |
53 | return f'{slug}-{uuid_short.replace("-", "")}'
54 |
--------------------------------------------------------------------------------
/prism/callbacks/__init__.py:
--------------------------------------------------------------------------------
1 | from .callback import _PrismCallback # noqa: F401, F403
2 |
--------------------------------------------------------------------------------
/prism/callbacks/callback.py:
--------------------------------------------------------------------------------
1 | import importlib
2 | import inspect
3 | from typing import Any, Callable
4 |
5 |
6 | class _PrismCallback:
7 | """
8 | Callbacks allow users to call specific functions when a run successfully executes or
9 | when a project fails. This is a super thin wrapper around the actual callback
10 | function.
11 | """
12 |
13 | callback_func: Callable[[], Any]
14 | name: str
15 |
16 | def __init__(self, callback_func: Callable[[], Any]):
17 | self.callback_func = callback_func
18 | self.name = callback_func.__name__
19 |
20 | # Check if the argument as any args. If it does, raise an error. For now, users
21 | # cannot specify arguments in a callback function. They can access information
22 | # about the run that triggered the callback via the `CurrentRun` object.
23 | signature = inspect.signature(self.callback_func)
24 | args = list(signature.parameters.keys())
25 | if len(args) > 0:
26 | raise ValueError(
27 | f"Callback function `{self.name}` cannot have any arguments.",
28 | )
29 |
30 | @classmethod
31 | def from_str(cls, import_path: str):
32 | module_name = ".".join(import_path.split(".")[:-1])
33 | fn_name = import_path.split(".")[-1]
34 | imported_mod = importlib.import_module(module_name)
35 | fn = getattr(imported_mod, fn_name)
36 | return cls(fn)
37 |
38 | def run(self):
39 | self.callback_func()
40 |
--------------------------------------------------------------------------------
/prism/cli/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/runprism/prism/a6a762a1ce44a696b1dd6be793726974b9c80c82/prism/cli/__init__.py
--------------------------------------------------------------------------------
/prism/cli/init.py:
--------------------------------------------------------------------------------
1 | # Standard library imports
2 | import shutil
3 | from pathlib import Path
4 | from typing import Literal, Optional
5 |
6 | import click
7 |
8 | import prism.constants
9 |
10 | # Prism-specific imports
11 | import prism.db.setup
12 | import prism.exceptions
13 | from prism.logging.events import (
14 | CreatingPrismProjectTemplate,
15 | InitSuccessfulEvent,
16 | fire_empty_line_event,
17 | fire_init_events,
18 | fire_tail_events,
19 | )
20 | from prism.logging.loggers import console_print, set_up_logger
21 | from prism.templates.starter_project import STARTER_PROJECT_TEMPLATE_DIR
22 |
23 | TASK_COMPLETE_MSG = """ ______
24 | ____ __ \_____(_)________ _______
25 | _____ /_/ / ___/ / ___/ __ `__ \ ____
26 | ____ / ____/ / / (__ ) / / / / / _____
27 | ___/_/ /_/ /_/____/_/ /_/ /_/ ___
28 |
29 | Welcome to Prism, the easiest way to create clean, modular data pipelines
30 | using Python!
31 |
32 | To get started, navigate to your newly created project "{project_name}" and try
33 | running the following commands:
34 | > python main.py
35 | > prism run
36 | > prism graph
37 |
38 | Consult the documentation here for more information on how to get started.
39 | {docs_url}
40 |
41 | Happy building!"""
42 |
43 |
44 | def initialize_project(
45 | project_name: Optional[str],
46 | log_level: Literal["info", "warning", "error", "debug", "critical"],
47 | ) -> None:
48 | """
49 | Initialize a Prism project. The project itself is nothing special — it's just a
50 | template project to help the user get started.
51 |
52 | args:
53 | project_name: name for new project
54 | log_level: log level
55 | returns:
56 | None
57 | """
58 | set_up_logger(log_level, None)
59 | fire_init_events()
60 |
61 | # If the project name wasn't provided by the user, prompt them
62 | if project_name is None:
63 | project_name = click.prompt("What is the desired project name?")
64 | fire_empty_line_event()
65 |
66 | # Set up the database
67 | prism.db.setup.setup()
68 |
69 | # If the project_name already exists witin the working directory, throw an error
70 | wkdir = Path.cwd()
71 | project_dir = wkdir / project_name
72 | if project_dir.is_dir():
73 | raise prism.exceptions.ProjectAlreadyExistsException(project_dir)
74 |
75 | # Template directory
76 | template_dir = STARTER_PROJECT_TEMPLATE_DIR
77 | console_print(CreatingPrismProjectTemplate(project_dir).message())
78 | shutil.copytree(
79 | template_dir,
80 | project_dir,
81 | ignore=shutil.ignore_patterns(*prism.constants.IGNORE_FILES),
82 | )
83 | fire_empty_line_event()
84 | console_print(
85 | InitSuccessfulEvent(
86 | msg=TASK_COMPLETE_MSG.format(
87 | project_name=project_name, docs_url="docs.runprism.com"
88 | )
89 | ).message()
90 | )
91 | fire_tail_events()
92 | return None
93 |
--------------------------------------------------------------------------------
/prism/client/__init__.py:
--------------------------------------------------------------------------------
1 | from .client import * # noqa: F401, F403
2 | from .visualizer import * # noqa: F401, F403
3 |
--------------------------------------------------------------------------------
/prism/client/parser.py:
--------------------------------------------------------------------------------
1 | from pathlib import Path
2 | import os
3 | from typing import List, Optional, Union
4 | import re
5 |
6 | # Prism-specific imports
7 | from prism.engine.module import _PrismModule
8 | from prism.engine.compiler import _DagCompiler, _CompiledDag
9 | import prism.logging.execution
10 | import prism.logging.loggers
11 |
12 |
13 | class ProjectParser(object):
14 | project_dir: Path
15 | tasks_dir: Union[str, Path]
16 | all_tasks_downstream: bool
17 |
18 | def __init__(
19 | self,
20 | project_dir: Path,
21 | tasks_dir: Union[str, Path],
22 | all_tasks_downstream: bool,
23 | ):
24 | self.project_dir = project_dir
25 | self.tasks_dir = tasks_dir
26 | self.all_tasks_downstream = all_tasks_downstream
27 |
28 | self._confirm_tasks_dir_exists(self.tasks_dir)
29 |
30 | def _confirm_tasks_dir_exists(self, tasks_dir: Union[str, Path]) -> None:
31 | if not Path(tasks_dir).is_dir():
32 | raise prism.exceptions.CompileException(
33 | message=f"`{tasks_dir}` is not a directory!"
34 | )
35 | return None
36 |
37 | def _get_all_modules_in_dir(self, rootdir: Union[str, Path]) -> List[str]:
38 | """
39 | Get all modules in the `rootdir`. Returns a list of relative paths for modules
40 | in `dir`
41 |
42 | args:
43 | rootdir: root directory to search
44 | returns:
45 | list of relative paths of modules in `dir`
46 | """
47 | module_relpaths: List[str] = []
48 | if not Path(rootdir).is_dir():
49 | raise ValueError(f"`{rootdir}` is not a directory!")
50 | for root, _, files in os.walk(rootdir):
51 | for fname in files:
52 | if len(re.findall(r"\.py$", fname)) > 0:
53 | full_path = Path(root) / fname
54 | rel_path = os.path.relpath(full_path, rootdir)
55 | module_relpaths.append(str(rel_path))
56 | return module_relpaths
57 |
58 | def parse_all_modules(self) -> List[_PrismModule]:
59 | """
60 | Create a `_PrismModule` object for each module in the project. This object
61 | contains all the task nodes in the module (along with the task's refs and
62 | targets).
63 |
64 | returns:
65 | list of _PrismModule objects
66 | """
67 | module_relpaths = self._get_all_modules_in_dir(self.tasks_dir)
68 | prism_modules: List[_PrismModule] = []
69 | for relpath in module_relpaths:
70 | mod = _PrismModule(self.project_dir, self.tasks_dir, relpath)
71 | if len(mod.prism_task_nodes.keys()) > 0:
72 | prism_modules.append(mod)
73 | return prism_modules
74 |
75 | def compile_dag(
76 | self,
77 | project_id: str,
78 | run_slug: Optional[str],
79 | tasks_dir: Union[str, Path],
80 | parsed_module_objs: List[_PrismModule],
81 | user_arg_task_ids: List[str],
82 | user_arg_all_downstream: bool,
83 | ) -> _CompiledDag:
84 | """
85 | Wrapper for the `compile` method in the DagCompiler class
86 |
87 | args:
88 | project_id: project ID
89 | tasks_dir: directory containing all tasks
90 | parsed_module_objs: list of _PrismModule objects associated with modules
91 | that contain tasks
92 | user_arg_task_ids: task IDs passed in by the user at runtime
93 | user_arg_all_downstream: boolean indicating whether the user wants to run
94 | all tasks downstream of inputted `user_arg_task_ids`
95 | project: PrismProject
96 | returns:
97 | CompiledDag object
98 | """
99 | dag_compiler = _DagCompiler(
100 | project_id,
101 | run_slug,
102 | tasks_dir,
103 | parsed_module_objs,
104 | user_arg_task_ids,
105 | user_arg_all_downstream,
106 | )
107 | compiled_dag = dag_compiler.compile()
108 | return compiled_dag
109 |
--------------------------------------------------------------------------------
/prism/client/visualizer.py:
--------------------------------------------------------------------------------
1 | from http.server import SimpleHTTPRequestHandler
2 | import os
3 | from pathlib import Path
4 | from typing import List, Union
5 | import shutil
6 | from socketserver import TCPServer
7 | import sys
8 | import signal
9 | import webbrowser
10 |
11 | from watchdog.observers.api import BaseObserver
12 | from watchdog.observers import Observer
13 | from watchdog.events import PatternMatchingEventHandler
14 |
15 | # Prism-specific imports
16 | from prism.client.parser import ProjectParser
17 | from prism.docs import DOCS_INDEX_FILE_DIR
18 | from prism.engine.compiler import _CompiledDag
19 | from prism.engine.manifest import TaskManifest, Manifest
20 | import prism.logging.execution
21 | from prism.logging.events import (
22 | fire_header_events,
23 | fire_serving_docs_events,
24 | fire_empty_line_event,
25 | fire_tail_events,
26 | fire_reload_docs_event,
27 | )
28 | import prism.logging.loggers
29 |
30 |
31 | class PrismVisualizer(object):
32 | project_id: str
33 | project_dir: Path
34 | tasks_dir: Union[str, Path]
35 | port: int
36 | open_window: bool
37 | hot_reload: bool
38 |
39 | project_parser: ProjectParser
40 | observer: BaseObserver
41 |
42 | def __init__(
43 | self,
44 | project_id: str,
45 | project_dir: Path,
46 | tasks_dir: Union[str, Path],
47 | port: int,
48 | open_window: bool,
49 | hot_reload: bool,
50 | ):
51 | self.project_id = project_id
52 | self.project_dir = project_dir
53 | self.tasks_dir = tasks_dir
54 | self.port = port
55 | self.open_window = open_window
56 | self.hot_reload = hot_reload
57 |
58 | # Project parser
59 | self.project_parser = ProjectParser(self.project_dir, self.tasks_dir, True)
60 |
61 | # Event handler for hot reloading. Note that we only start this observer if
62 | # `hot_reload` is True. Otherwise, we don't use it.
63 | reload_handler = PatternMatchingEventHandler(
64 | patterns=["*"],
65 | ignore_patterns=None,
66 | ignore_directories=False,
67 | case_sensitive=False,
68 | )
69 |
70 | def on_created(event):
71 | self._event_handler()
72 |
73 | def on_deleted(event):
74 | self._event_handler()
75 |
76 | def on_modified(event):
77 | self._event_handler()
78 |
79 | def on_moved(event):
80 | self._event_handler()
81 |
82 | # mypy doesn't like that we're assigning a function to a method. But it works,
83 | # so ignore.
84 | reload_handler.on_created = on_created # type: ignore
85 | reload_handler.on_deleted = on_deleted # type: ignore
86 | reload_handler.on_modified = on_modified # type: ignore
87 | reload_handler.on_moved = on_moved # type: ignore
88 |
89 | # Observer
90 | path = str(self.tasks_dir)
91 | self.observer = Observer()
92 | self.observer.schedule(reload_handler, path, recursive=True)
93 |
94 | def create_docs_dir(self, project_dir: Path) -> Path:
95 | """
96 | Create a docs/ directory in the project directory
97 |
98 | args:
99 | project_dir: Prism project directory
100 | returns:
101 | document directory
102 | """
103 | # Create compiled directory
104 | docs_dir = project_dir / "docs"
105 | if not docs_dir.is_dir():
106 | docs_dir.mkdir(parents=True, exist_ok=True)
107 | return docs_dir
108 |
109 | def create_manifest(
110 | self,
111 | compiled_dag: _CompiledDag,
112 | docs_dir: Path,
113 | ) -> None:
114 | task_manifests: List[TaskManifest] = []
115 |
116 | task_mods = compiled_dag.task_mods
117 | task_refs = compiled_dag.task_refs
118 | task_targets = compiled_dag.task_targets
119 |
120 | for tid, mod in task_mods.items():
121 | ref_srcs = task_refs[tid]
122 | target_locs = task_targets[tid]
123 |
124 | # Construct task manifest
125 | manifest = TaskManifest()
126 | manifest.add_task(Path(mod.module_task_relpath), tid)
127 | manifest.add_refs(
128 | target_module=Path(mod.module_task_relpath),
129 | target_task=tid,
130 | sources=ref_srcs,
131 | )
132 | manifest.add_targets(
133 | module_relative_path=Path(mod.module_task_relpath),
134 | task_name=tid,
135 | locs=target_locs,
136 | )
137 |
138 | # Add to list
139 | task_manifests.append(manifest)
140 |
141 | # Manifest
142 | full_manifest = Manifest(task_manifests)
143 | if Path(docs_dir / "build" / "manifest.json").is_file():
144 | os.unlink(Path(docs_dir / "build" / "manifest.json"))
145 | full_manifest.json_dump(Path(docs_dir / "build"))
146 | return None
147 |
148 | def _event_handler(self):
149 | """
150 | Every time the user updates their tasks, we should recompile the DAG and
151 | regenerate the manifest.
152 | """
153 | parsed_module_objs = self.project_parser.parse_all_modules()
154 | compiled_dag = self.project_parser.compile_dag(
155 | project_id=self.project_id,
156 | run_slug=None,
157 | tasks_dir=self.tasks_dir,
158 | parsed_module_objs=parsed_module_objs,
159 | user_arg_task_ids=[],
160 | user_arg_all_downstream=True,
161 | )
162 |
163 | # Create docs dir and copy the build directory into the docs folder.
164 | docs_dir = self.create_docs_dir(self.project_dir)
165 | build_dir = docs_dir / "build"
166 | shutil.copytree( # type: ignore
167 | DOCS_INDEX_FILE_DIR, build_dir, dirs_exist_ok=True
168 | )
169 | self.create_manifest(compiled_dag, docs_dir)
170 | fire_reload_docs_event()
171 |
172 | def graph(self):
173 | try:
174 | parsed_module_objs = self.project_parser.parse_all_modules()
175 | num_modules = len(parsed_module_objs)
176 | num_tasks = 0
177 | for mod in parsed_module_objs:
178 | num_tasks += len(mod.prism_task_nodes.keys())
179 | fire_header_events(
180 | project_id=self.project_id,
181 | run_slug=None,
182 | num_tasks=num_tasks,
183 | num_modules=num_modules,
184 | )
185 |
186 | # Compile the DAG
187 | compiled_dag_em = prism.logging.execution._ExecutionEventManager(
188 | idx=None,
189 | total=None,
190 | name="Parsing task dependencies",
191 | func=self.project_parser.compile_dag,
192 | )
193 | compiled_dag = compiled_dag_em.run(
194 | fire_exec_events=True,
195 | project_id=self.project_id,
196 | run_slug=None,
197 | tasks_dir=self.tasks_dir,
198 | parsed_module_objs=parsed_module_objs,
199 | user_arg_task_ids=[],
200 | user_arg_all_downstream=True,
201 | )
202 |
203 | # Create docs dir and copy the build directory into the docs folder.
204 | docs_dir = self.create_docs_dir(self.project_dir)
205 | build_dir = docs_dir / "build"
206 | shutil.copytree( # type: ignore
207 | DOCS_INDEX_FILE_DIR, build_dir, dirs_exist_ok=True
208 | )
209 | self.create_manifest(compiled_dag, docs_dir)
210 |
211 | # Send clean messages when Ctrl+C is pressed
212 | def handler(signum, frame):
213 | fire_empty_line_event()
214 | res = input("Shutdown the Prism docs server (y/n)? ")
215 | if res == "y":
216 | fire_tail_events()
217 | if self.hot_reload:
218 | self.observer.stop()
219 | sys.exit(0)
220 | else:
221 | pass
222 |
223 | signal.signal(signal.SIGINT, handler)
224 |
225 | # Serve the docs
226 | os.chdir(build_dir)
227 | port = self.port
228 | address = "127.0.0.1"
229 | fire_serving_docs_events(address, port)
230 |
231 | # mypy doesn't think SimpleHTTPRequestHandler is ok here, but it is
232 | httpd = TCPServer((address, port), SimpleHTTPRequestHandler)
233 |
234 | if self.open_window:
235 | webbrowser.open_new_tab(f"http://{address}:{port}")
236 | try:
237 | if self.hot_reload:
238 | self.observer.start()
239 | httpd.serve_forever()
240 | finally:
241 | httpd.shutdown()
242 | httpd.server_close()
243 | return None
244 | except Exception:
245 | prism.logging.loggers.CONSOLE.print_exception(
246 | show_locals=False, suppress=[prism], width=120
247 | )
248 | sys.exit(1)
249 |
--------------------------------------------------------------------------------
/prism/connectors/__init__.py:
--------------------------------------------------------------------------------
1 | from .bigquery import * # noqa: F401, F403
2 | from .postgres import * # noqa: F401, F403
3 | from .presto import * # noqa: F401, F403
4 | from .redshift import * # noqa: F401, F403
5 | from .snowflake import * # noqa: F401, F403
6 | from .trino import * # noqa: F401, F403
7 |
--------------------------------------------------------------------------------
/prism/connectors/base.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | from typing import Any, Union, Optional
3 |
4 |
5 | class Connector:
6 | id: str
7 | engine: Any
8 |
9 | def __init__(self, id: str, **kwargs):
10 | self.id = id
11 | for k, v in kwargs.items():
12 | setattr(self, k, v)
13 |
14 | def create_engine(self):
15 | raise NotImplementedError
16 |
17 | def execute_sql(
18 | self, sql: str, return_type: Optional[str]
19 | ) -> Union[pd.DataFrame, Any]:
20 | raise NotImplementedError
21 |
--------------------------------------------------------------------------------
/prism/connectors/bigquery.py:
--------------------------------------------------------------------------------
1 | # Standard library imports
2 | from pathlib import Path
3 | from typing import Any, List, Literal, Optional, Tuple, Union
4 |
5 | import pandas as pd
6 |
7 | from prism.utils import requires_dependencies
8 |
9 | # Prism-specific imports
10 | from .base import Connector
11 |
12 |
13 | class BigQueryConnector(Connector):
14 | creds: Union[str, Path]
15 |
16 | # This should be an instance of the `bigquery.Client` class, but we don't want to
17 | # import bigquery class unless the user calls the `create_engine` method.
18 | engine: Any
19 |
20 | def __init__(self, id: str, creds: Union[str, Path]):
21 | super().__init__(
22 | id,
23 | creds=creds,
24 | )
25 |
26 | @requires_dependencies(["google.cloud", "google.oauth2"], "bigquery") # noqa
27 | def create_engine(self):
28 | """
29 | Parse Google BigQuery adapter, represented as a dict and return the Google
30 | BigQuery connector object
31 |
32 | args:
33 | adapter_dict: Google BigQuery adapter represented as a dictionary
34 | adapter_name: name assigned to adapter
35 | profile_name: profile name containing adapter
36 | returns:
37 | Snowflake connector object
38 | """
39 | # Import Python client for Google BigQuery
40 | from google.cloud import bigquery
41 | from google.oauth2 import service_account
42 |
43 | # Get configuration and check if config is valid
44 | credentials = service_account.Credentials.from_service_account_file(self.creds)
45 |
46 | # Connection
47 | ctx = bigquery.Client(credentials=credentials)
48 | return ctx
49 |
50 | @requires_dependencies(["google.cloud", "google.oauth2"], "bigquery") # noqa
51 | def execute_sql(
52 | self,
53 | sql: str,
54 | return_type: Optional[Literal["pandas"]],
55 | ) -> Union[pd.DataFrame, Any]:
56 | """
57 | Execute the SQL query
58 | """
59 | # Type hinting is kind of a pain here, so ignore for now.
60 | job = self.engine.query(sql)
61 | if return_type == "pandas":
62 | df: pd.DataFrame = job.to_dataframe()
63 | return df
64 | data = job.result()
65 | res: List[Tuple[Any, ...]] = []
66 | for row in data:
67 | res.append(row)
68 | return res
69 |
--------------------------------------------------------------------------------
/prism/connectors/postgres.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | from typing import Any, List, Literal, Optional, Tuple, Union
3 |
4 | # Prism-specific imports
5 | from prism.utils import requires_dependencies
6 | from prism.connectors.base import Connector
7 |
8 |
9 | class PostgresConnector(Connector):
10 | user: str
11 | password: str
12 | port: int
13 | host: str
14 | database: str
15 | autocommit: bool
16 |
17 | # This should be an instance of the `psycopg2.extensions.connection`, but we don't
18 | # want to import psycopg2 unless the user creates calls the `create_engine` method.
19 | engine: Any
20 |
21 | def __init__(
22 | self,
23 | id: str,
24 | user: str,
25 | password: str,
26 | port: int,
27 | host: str,
28 | database: str,
29 | autocommit: bool = True,
30 | ):
31 | super().__init__(
32 | id,
33 | user=user,
34 | password=password,
35 | port=port,
36 | host=host,
37 | database=database,
38 | autocommit=autocommit,
39 | )
40 |
41 | # Create engine
42 | self.engine = self.create_engine()
43 |
44 | @requires_dependencies("psycopg2", "postgres")
45 | def create_engine(self) -> Any:
46 | """
47 | Create the Postgres connection using `psycopg2`
48 | """
49 | import psycopg2
50 |
51 | conn = psycopg2.connect(
52 | dbname=self.database,
53 | host=self.host,
54 | port=self.port,
55 | user=self.user,
56 | password=self.password,
57 | )
58 | conn.set_session(autocommit=self.autocommit)
59 | return conn
60 |
61 | @requires_dependencies("psycopg2", "postgres")
62 | def execute_sql(
63 | self,
64 | sql: str,
65 | return_type: Optional[Literal["pandas"]],
66 | ) -> Union[pd.DataFrame, List[Tuple[Any, ...]]]:
67 | # For type hinting
68 | import psycopg2
69 |
70 | # Create cursor for every SQL query -- this ensures thread safety
71 | cursor: psycopg2.extensions.cursor = self.engine.cursor()
72 | cursor.execute(sql)
73 | data = cursor.fetchall()
74 |
75 | # If the return type is `pandas`, then return a DataFrame
76 | if return_type == "pandas":
77 | cols = []
78 | for elts in cursor.description:
79 | cols.append(elts[0])
80 | df: pd.DataFrame = pd.DataFrame(data=data, columns=cols)
81 | cursor.close()
82 | return df
83 |
84 | # Otherwise, return the data as it exists
85 | else:
86 | cursor.close()
87 | return data # type: ignore
88 |
--------------------------------------------------------------------------------
/prism/connectors/presto.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | from typing import Any, List, Literal, Optional, Union
3 |
4 | # Prism-specific imports
5 | from prism.connectors.base import Connector
6 | from prism.utils import requires_dependencies
7 |
8 |
9 | ####################
10 | # Class definition #
11 | ####################
12 |
13 |
14 | class PrestoConnector(Connector):
15 | user: str
16 | password: str
17 | port: int
18 | host: str
19 | http_scheme: Optional[str]
20 | catalog: Optional[str]
21 | schema: Optional[str]
22 |
23 | # This should be an instance of the `prestodb.dbapi.Connection`, but we don't want
24 | # to import prestodb unless the user creates calls the `create_engine` method.
25 | engine: Any
26 |
27 | def __init__(
28 | self,
29 | id: str,
30 | user: str,
31 | password: str,
32 | port: int,
33 | host: str,
34 | http_scheme: Optional[str] = None,
35 | catalog: Optional[str] = None,
36 | schema: Optional[str] = None,
37 | ):
38 | super().__init__(
39 | id,
40 | user=user,
41 | password=password,
42 | port=port,
43 | host=host,
44 | http_scheme=http_scheme,
45 | catalog=catalog,
46 | schema=schema,
47 | )
48 |
49 | # Minor validation
50 | if self.schema is not None:
51 | if self.catalog is None:
52 | raise ValueError(
53 | "`catalog` cannot be `None` when `schema` is specified"
54 | ) # noqa: E501
55 |
56 | # Create engine
57 | self.engine = self.create_engine()
58 |
59 | @requires_dependencies(
60 | "prestodb",
61 | "presto",
62 | )
63 | def create_engine(self) -> Any:
64 | """
65 | Create the PrestoDB connection
66 | """
67 | import prestodb
68 |
69 | if self.schema is not None:
70 | conn = prestodb.dbapi.connect(
71 | host=self.host,
72 | port=self.port,
73 | http_scheme=self.http_scheme if self.http_scheme else "https",
74 | auth=prestodb.auth.BasicAuthentication(
75 | self.user,
76 | self.password,
77 | ),
78 | catalog=self.catalog,
79 | schema=self.schema,
80 | )
81 |
82 | # Just catalog is present
83 | elif self.catalog is not None:
84 | conn = prestodb.dbapi.connect(
85 | host=self.host,
86 | port=self.port,
87 | http_scheme=self.http_scheme if self.http_scheme else "https",
88 | auth=prestodb.auth.BasicAuthentication(
89 | self.user,
90 | self.password,
91 | ),
92 | catalog=self.catalog,
93 | )
94 |
95 | # Neither catalog nor schema is present
96 | else:
97 | conn = prestodb.dbapi.connect(
98 | host=self.host,
99 | port=self.port,
100 | http_scheme=self.http_scheme if self.http_scheme else "https",
101 | auth=prestodb.auth.BasicAuthentication(
102 | self.user,
103 | self.password,
104 | ),
105 | )
106 | return conn
107 |
108 | @requires_dependencies(
109 | "prestodb",
110 | "presto",
111 | )
112 | def execute_sql(
113 | self,
114 | sql: str,
115 | return_type: Optional[Literal["pandas"]],
116 | ) -> Union[pd.DataFrame, List[List[Any]]]:
117 | # For type hinting
118 | import prestodb
119 |
120 | # Create cursor for every SQL query -- this ensures thread safety
121 | cursor: prestodb.dbapi.Cursor = self.engine.cursor()
122 | cursor.execute(sql)
123 | data = cursor.fetchall()
124 |
125 | # If the return type is `pandas`, then return a DataFrame
126 | if return_type == "pandas":
127 | cols = []
128 | for elts in cursor.description:
129 | cols.append(elts[0])
130 | df: pd.DataFrame = pd.DataFrame(data=data, columns=cols)
131 | cursor.close()
132 | return df
133 | else:
134 | cursor.close()
135 | return data # type: ignore
136 |
--------------------------------------------------------------------------------
/prism/connectors/redshift.py:
--------------------------------------------------------------------------------
1 | from prism.connectors.postgres import PostgresConnector
2 |
3 |
4 | class RedshiftConnector(PostgresConnector):
5 | pass
6 |
--------------------------------------------------------------------------------
/prism/connectors/snowflake.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | from typing import Any, Dict, List, Literal, Optional, Tuple, Union
3 |
4 | # Prism-specific imports
5 | from prism.connectors.base import Connector
6 | from prism.utils import requires_dependencies
7 |
8 |
9 | class SnowflakeConnector(Connector):
10 | user: str
11 | password: str
12 | account: str
13 | role: str
14 | warehouse: str
15 | database: str
16 | schema: str
17 |
18 | # This should be an instance of the `snowflake.connector.Connection` class, but we
19 | # don't want to import snowflake.connector class unless the user
20 | # calls the `create_engine` method.
21 | engine: Any
22 |
23 | def __init__(
24 | self,
25 | id: str,
26 | user: str,
27 | password: str,
28 | account: str,
29 | role: str,
30 | warehouse: str,
31 | database: str,
32 | schema: str,
33 | ):
34 | super().__init__(
35 | id,
36 | user=user,
37 | password=password,
38 | account=account,
39 | role=role,
40 | warehouse=warehouse,
41 | database=database,
42 | schema=schema,
43 | )
44 |
45 | self.engine = self.create_engine()
46 |
47 | @requires_dependencies(["snowflake.connector", "pyarrow"], "snowflake")
48 | def create_engine(self) -> Any:
49 | """
50 | Create the Snowflake connection
51 | """
52 | import snowflake.connector
53 |
54 | conn = snowflake.connector.connect(
55 | account=self.account,
56 | user=self.user,
57 | password=self.password,
58 | database=self.database,
59 | schema=self.schema,
60 | warehouse=self.warehouse,
61 | role=self.role,
62 | )
63 | return conn
64 |
65 | @requires_dependencies(["snowflake.connector", "pyarrow"], "snowflake")
66 | def execute_sql(
67 | self,
68 | sql: str,
69 | return_type: Optional[Literal["pandas"]],
70 | ) -> Union[pd.DataFrame, List[Tuple[Any]], List[Dict[Any, Any]]]:
71 | # For type hinting
72 | import snowflake.connector
73 |
74 | # Create cursor for every SQL query -- this ensures thread safety
75 | cursor: snowflake.connector.cursor.SnowflakeCursor = self.engine.cursor()
76 | cursor.execute(sql)
77 |
78 | # If the return type is `pandas`, then return a DataFrame
79 | if return_type == "pandas":
80 | df: pd.DataFrame = cursor.fetch_pandas_all()
81 | cursor.close()
82 | return df
83 |
84 | # Otherwise, just return the data
85 | else:
86 | data = cursor.fetchall()
87 | cursor.close()
88 | return data
89 |
--------------------------------------------------------------------------------
/prism/connectors/trino.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | from typing import Any, List, Literal, Optional, Union
3 |
4 | # Prism-specific imports
5 | from prism.connectors.base import Connector
6 | from prism.utils import requires_dependencies
7 |
8 |
9 | ####################
10 | # Class definition #
11 | ####################
12 |
13 |
14 | class TrinoConnector(Connector):
15 | user: str
16 | password: str
17 | port: int
18 | host: str
19 | http_scheme: Optional[str]
20 | catalog: Optional[str]
21 | schema: Optional[str]
22 |
23 | # This should be an instance of the `trino.dbapi.Connection`, but we don't want to
24 | # import trino unless the user creates calls the `create_engine` method.
25 | engine: Any
26 |
27 | def __init__(
28 | self,
29 | id: str,
30 | user: str,
31 | password: str,
32 | port: int,
33 | host: str,
34 | http_scheme: Optional[str] = None,
35 | catalog: Optional[str] = None,
36 | schema: Optional[str] = None,
37 | ):
38 | super().__init__(
39 | id,
40 | user=user,
41 | password=password,
42 | port=port,
43 | host=host,
44 | http_scheme=http_scheme,
45 | catalog=catalog,
46 | schema=schema,
47 | )
48 |
49 | # Minor validation
50 | if self.schema is not None:
51 | if self.catalog is None:
52 | raise ValueError(
53 | "`catalog` cannot be `None` when `schema` is specified"
54 | ) # noqa: E501
55 |
56 | # Create engine
57 | self.engine = self.create_engine()
58 |
59 | @requires_dependencies(
60 | "trino",
61 | "trino",
62 | )
63 | def create_engine(self) -> Any:
64 | """
65 | Create the Trino connection
66 | """
67 | import trino
68 |
69 | if self.schema is not None:
70 | conn = trino.dbapi.connect(
71 | host=self.host,
72 | port=self.port,
73 | http_scheme=self.http_scheme if self.http_scheme else "https",
74 | auth=trino.auth.BasicAuthentication(
75 | self.user,
76 | self.password,
77 | ),
78 | catalog=self.catalog,
79 | schema=self.schema,
80 | )
81 |
82 | # Just catalog is present
83 | elif self.catalog is not None:
84 | conn = trino.dbapi.connect(
85 | host=self.host,
86 | port=self.port,
87 | http_scheme=self.http_scheme if self.http_scheme else "https",
88 | auth=trino.auth.BasicAuthentication(
89 | self.user,
90 | self.password,
91 | ),
92 | catalog=self.catalog,
93 | )
94 |
95 | # Neither catalog nor schema is present
96 | else:
97 | conn = trino.dbapi.connect(
98 | host=self.host,
99 | port=self.port,
100 | http_scheme=self.http_scheme if self.http_scheme else "https",
101 | auth=trino.auth.BasicAuthentication(
102 | self.user,
103 | self.password,
104 | ),
105 | )
106 |
107 | return conn
108 |
109 | @requires_dependencies(
110 | "trino",
111 | "trino",
112 | )
113 | def execute_sql(
114 | self,
115 | sql: str,
116 | return_type: Optional[Literal["pandas"]],
117 | ) -> Union[pd.DataFrame, List[List[Any]]]:
118 | # For type hinting
119 | import trino
120 |
121 | # Create cursor for every SQL query -- this ensures thread safety
122 | cursor: trino.dbapi.Cursor = self.engine.cursor()
123 | cursor.execute(sql)
124 | data = cursor.fetchall()
125 |
126 | # If the return type is `pandas`, then return a DataFrame
127 | if return_type == "pandas":
128 | cols = []
129 | for elts in cursor.description:
130 | cols.append(elts[0])
131 | df: pd.DataFrame = pd.DataFrame(data=data, columns=cols)
132 | cursor.close()
133 | return df
134 | else:
135 | cursor.close()
136 | return data # type: ignore
137 |
--------------------------------------------------------------------------------
/prism/constants.py:
--------------------------------------------------------------------------------
1 | """
2 | Prism constants.
3 | """
4 |
5 | # Imports
6 | import os
7 | from pathlib import Path
8 | import sys
9 |
10 |
11 | # Version number
12 | VERSION = "0.3.0"
13 |
14 |
15 | # Root directory of project
16 | ROOT_DIR = str(Path(os.path.dirname(__file__)).parent)
17 |
18 |
19 | # Files to ignore when instantiating Prism project
20 | IGNORE_FILES = ["__pycache__", "*checkpoint.ipynb", ".ipynb_checkpoints"]
21 |
22 |
23 | # Python version
24 | PYTHON_VERSION = sys.version_info
25 |
26 |
27 | # Internal folder for stuff created by Prism
28 | INTERNAL_FOLDER = Path(os.path.expanduser("~/.prism"))
29 | if not INTERNAL_FOLDER.is_dir():
30 | INTERNAL_FOLDER.mkdir(parents=True)
31 |
--------------------------------------------------------------------------------
/prism/db/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/runprism/prism/a6a762a1ce44a696b1dd6be793726974b9c80c82/prism/db/__init__.py
--------------------------------------------------------------------------------
/prism/db/factory.py:
--------------------------------------------------------------------------------
1 | # General package imports
2 | import contextlib
3 | from pathlib import Path
4 |
5 | # SQLAlchemy imports
6 | from sqlalchemy import Engine, create_engine
7 | from sqlalchemy.orm import (
8 | sessionmaker,
9 | scoped_session,
10 | Session,
11 | )
12 | from sqlalchemy.sql.base import Executable
13 |
14 | # Prism imports
15 | from prism.constants import INTERNAL_FOLDER
16 |
17 |
18 | class ThreadLocalSessionFactory:
19 | db_uri: str
20 | engine: Engine
21 |
22 | def __init__(self):
23 | self.db_uri = f"sqlite:///{Path(INTERNAL_FOLDER).resolve()}/prism.db"
24 | self.engine = create_engine(self.db_uri)
25 |
26 | @contextlib.contextmanager
27 | def create_thread_local_session(self):
28 | session_factory = sessionmaker()
29 | Session = scoped_session(session_factory)
30 | Session.configure(bind=self.engine)
31 | session = Session()
32 | try:
33 | yield session
34 | finally:
35 | session.close()
36 |
37 | def execute_thread_local_stmt(
38 | self,
39 | stmt: Executable,
40 | session: Session,
41 | select_statement: bool = True,
42 | model_objects: bool = True,
43 | ):
44 | if select_statement:
45 | if model_objects:
46 | result = session.scalars(stmt).all()
47 | else:
48 | result = session.execute(stmt).all()
49 | return result
50 | else:
51 | session.execute(stmt)
52 |
--------------------------------------------------------------------------------
/prism/db/mixins.py:
--------------------------------------------------------------------------------
1 | from datetime import datetime
2 | from pathlib import Path
3 | from typing import Any, Dict, List, Literal, Union
4 |
5 | from sqlalchemy import delete, select
6 |
7 | from prism.db.factory import ThreadLocalSessionFactory
8 | from prism.db.setup import Project, Ref, Run, Target, Task, TaskRun
9 |
10 |
11 | class DbMixin:
12 | """
13 | Mixin class used to add elements to our database
14 | """
15 |
16 | def create_new_project(
17 | self, project_id: str, local_path: Union[str, Path], ctx: Dict[str, Any]
18 | ) -> None:
19 | factory = ThreadLocalSessionFactory()
20 | with factory.create_thread_local_session() as session:
21 | # Check if project already exists
22 | stmt = (
23 | select(Project)
24 | .where(Project.id == project_id)
25 | .where(Project.local_path == str(local_path))
26 | )
27 | project_res = factory.execute_thread_local_stmt(stmt, session)
28 |
29 | # All values in context should be serializable. If it's a custom object,
30 | # then we'll just turn it into a string.
31 | # TODO: maybe we should warn the user
32 | ctx = {k: str(v) for k, v in ctx.items()}
33 |
34 | # If it doesn't exist, then add the project
35 | if len(project_res) == 0:
36 | new_project = Project(
37 | id=project_id, local_path=str(local_path), ctx=ctx
38 | )
39 | session.add(new_project)
40 | session.commit()
41 |
42 | return None
43 |
44 | def update_tasks(self, project_id: str, task_ids: List[str]) -> None:
45 | factory = ThreadLocalSessionFactory()
46 | with factory.create_thread_local_session() as session:
47 | # Current tasks in the database. Compare them against the `task_ids` input
48 | # and update the `current` field.
49 | seen_task_ids: List[str] = []
50 | stmt = select(Project).where(Project.id == project_id)
51 | res = factory.execute_thread_local_stmt(stmt, session)
52 | project = res[0]
53 | current_tasks_in_db: List[Task] = project.tasks
54 | for t in current_tasks_in_db:
55 | t.current = t.task_id in task_ids
56 | seen_task_ids.append(t.task_id)
57 |
58 | # Add remaining tasks
59 | for tid in list(set(task_ids) - set(seen_task_ids)):
60 | session.add(Task(task_id=tid, project_id=project_id, current=True))
61 | session.commit()
62 |
63 | return None
64 |
65 | def update_project_tasks_refs_targets(
66 | self,
67 | project_id: str,
68 | tasks: List[str],
69 | refs: Dict[str, List[str]],
70 | targets: Dict[str, List[str]],
71 | ) -> None:
72 | self.update_tasks(project_id, tasks)
73 | factory = ThreadLocalSessionFactory()
74 | with factory.create_thread_local_session() as session:
75 | # Delete existing refs. Then add current ones.
76 | ref_stmt = delete(Ref).where(Ref.project_id == project_id)
77 | factory.execute_thread_local_stmt(ref_stmt, session, select_statement=False)
78 |
79 | # Delete existing targets
80 | target_stmt = delete(Target).where(Target.project_id == project_id)
81 | factory.execute_thread_local_stmt(
82 | target_stmt, session, select_statement=False
83 | )
84 |
85 | # Add current refs and targets
86 | for target, sources in refs.items():
87 | for s in sources:
88 | ref = Ref(
89 | target_id=target,
90 | source_id=s,
91 | project_id=project_id,
92 | )
93 | session.add(ref)
94 |
95 | for tid, tgts in targets.items():
96 | for t in tgts:
97 | target_obj = Target(
98 | task_id=tid,
99 | loc=t,
100 | project_id=project_id,
101 | )
102 | session.add(target_obj)
103 |
104 | session.commit()
105 | return None
106 |
107 | def create_new_run(
108 | self,
109 | run_slug: str,
110 | run_date: datetime,
111 | logs_path: Union[str, Path],
112 | status: Literal["PENDING", "RUNNING", "SUCCEEDED", "FAILED"],
113 | ctx: Dict[str, Any],
114 | project_id: str,
115 | ) -> None:
116 | factory = ThreadLocalSessionFactory()
117 | with factory.create_thread_local_session() as session:
118 | # The run should not exist already. If it does, then raise an error
119 | stmt = (
120 | select(Run)
121 | .where(Run.run_slug == run_slug)
122 | .where(Run.project_id == project_id)
123 | )
124 | runs = factory.execute_thread_local_stmt(stmt, session)
125 | if len(runs) > 0:
126 | raise ValueError(f"run `{run_slug}` already exists in the database")
127 |
128 | # All values in context should be serializable. If it's a custom object,
129 | # then we'll just turn it into a string.
130 | # TODO: maybe we should warn the user
131 | ctx = {k: str(v) for k, v in ctx.items()}
132 |
133 | # Create a new Run
134 | run = Run(
135 | run_slug=run_slug,
136 | run_date=run_date,
137 | logs_path=logs_path,
138 | status=status,
139 | ctx=ctx,
140 | project_id=project_id,
141 | )
142 | session.add(run)
143 | session.commit()
144 | return None
145 |
146 | def update_run_status(
147 | self,
148 | run_slug: str,
149 | project_id: str,
150 | status: Literal["PENDING", "RUNNING", "SUCCEEDED", "FAILED"],
151 | ) -> None:
152 | factory = ThreadLocalSessionFactory()
153 | with factory.create_thread_local_session() as session:
154 | stmt = (
155 | select(Run)
156 | .where(Run.run_slug == run_slug)
157 | .where(Run.project_id == project_id)
158 | )
159 | run = factory.execute_thread_local_stmt(stmt, session)[0]
160 | run.status = status
161 | session.commit()
162 | return None
163 |
164 | def create_task_run(self, run_slug: str, task_id: str) -> None:
165 | factory = ThreadLocalSessionFactory()
166 | with factory.create_thread_local_session() as session:
167 | # If the task run exists, then do nothing
168 | stmt = (
169 | select(TaskRun)
170 | .where(TaskRun.run_slug == run_slug)
171 | .where(TaskRun.task_id == task_id)
172 | )
173 | res = factory.execute_thread_local_stmt(stmt, session)
174 | if len(res) > 0:
175 | return None
176 |
177 | # New TaskRun — we create this TaskRun when all the tasks are compiled and
178 | # the run is executed. Therefore, the task should start with status
179 | # `PENDING`. We dynamically update this status at runtime
180 | tr = TaskRun(run_slug=run_slug, task_id=task_id, status="PENDING")
181 | session.add(tr)
182 | session.commit()
183 | return None
184 |
185 | def update_task_run_status(
186 | self,
187 | run_slug: str,
188 | task_id: str,
189 | status: Literal["PENDING", "RUNNING", "SUCCEEDED", "FAILED", "SKIPPED"],
190 | ) -> None:
191 | factory = ThreadLocalSessionFactory()
192 | with factory.create_thread_local_session() as session:
193 | stmt = (
194 | select(TaskRun)
195 | .where(TaskRun.run_slug == run_slug)
196 | .where(TaskRun.task_id == task_id)
197 | )
198 | res = factory.execute_thread_local_stmt(stmt, session)
199 | taskrun = res[0]
200 | taskrun.status = status
201 | session.commit()
202 | return None
203 |
--------------------------------------------------------------------------------
/prism/db/setup.py:
--------------------------------------------------------------------------------
1 | import datetime
2 | from typing import Any, Dict, List, Literal
3 |
4 | from sqlalchemy import ForeignKey
5 | from sqlalchemy.orm import DeclarativeBase, Mapped, backref, mapped_column, relationship
6 | from sqlalchemy.types import JSON, String
7 |
8 | # Prism-specific imports
9 | from prism.db.factory import ThreadLocalSessionFactory
10 |
11 |
12 | class Base(DeclarativeBase):
13 | type_annotation_map = {
14 | Dict[str, Any]: JSON,
15 | Literal["PENDING", "RUNNING", "SUCCEEDED", "FAILED"]: String,
16 | Literal["PENDING", "RUNNING", "SUCCEEDED", "FAILED", "SKIPPED"]: String,
17 | }
18 |
19 |
20 | # Models
21 | class Project(Base):
22 | __tablename__ = "project"
23 | id: Mapped[str] = mapped_column(primary_key=True, nullable=False)
24 | local_path: Mapped[str] = mapped_column(nullable=False)
25 | ctx: Mapped[Dict[str, Any]] = mapped_column(nullable=False)
26 | runs: Mapped[List["Run"]] = relationship(backref=backref("project"))
27 | tasks: Mapped[List["Task"]] = relationship(backref=backref("project"))
28 | refs: Mapped[List["Ref"]] = relationship(backref=backref("project"))
29 | targets: Mapped[List["Target"]] = relationship(backref=backref("project"))
30 |
31 |
32 | class Run(Base):
33 | __tablename__ = "runs"
34 | run_slug: Mapped[str] = mapped_column(primary_key=True, nullable=False)
35 | run_date: Mapped[datetime.datetime] = mapped_column(nullable=False)
36 | logs_path: Mapped[str] = mapped_column(nullable=False)
37 | status: Mapped[
38 | Literal["PENDING", "RUNNING", "SUCCEEDED", "FAILED"]
39 | ] = mapped_column(nullable=True)
40 | taskruns: Mapped[List["TaskRun"]] = relationship(backref="run")
41 | ctx: Mapped[Dict[str, Any]] = mapped_column(nullable=False)
42 | project_id: Mapped[str] = mapped_column(ForeignKey("project.id"))
43 |
44 |
45 | class Task(Base):
46 | __tablename__ = "tasks"
47 | id: Mapped[int] = mapped_column(
48 | primary_key=True, nullable=False, autoincrement=True
49 | ) # noqa: E501
50 | task_id: Mapped[str] = mapped_column(nullable=False)
51 | current: Mapped[bool] = mapped_column(nullable=False)
52 | taskruns: Mapped[List["TaskRun"]] = relationship(backref="task")
53 | project_id: Mapped[int] = mapped_column(ForeignKey("project.id"))
54 |
55 |
56 | class TaskRun(Base):
57 | __tablename__ = "taskruns"
58 | run_slug: Mapped[str] = mapped_column(ForeignKey("runs.run_slug"), primary_key=True)
59 | task_id: Mapped[int] = mapped_column(ForeignKey("tasks.id"), primary_key=True)
60 | status: Mapped[
61 | Literal["PENDING", "RUNNING", "SUCCEEDED", "FAILED", "SKIPPED"]
62 | ] = mapped_column(nullable=False) # noqa: E501
63 |
64 |
65 | class Ref(Base):
66 | __tablename__ = "refs"
67 | id: Mapped[int] = mapped_column(
68 | nullable=False, primary_key=True, autoincrement=True
69 | ) # noqa: E501
70 | target_id: Mapped[str] = mapped_column(nullable=False)
71 | source_id: Mapped[str] = mapped_column(nullable=False)
72 | project_id: Mapped[int] = mapped_column(ForeignKey("project.id"))
73 |
74 |
75 | class Target(Base):
76 | __tablename__ = "targets"
77 | id: Mapped[int] = mapped_column(
78 | nullable=False, primary_key=True, autoincrement=True
79 | ) # noqa: E501
80 | loc: Mapped[str] = mapped_column(nullable=False)
81 | task_id: Mapped[str] = mapped_column(ForeignKey("tasks.id"))
82 | project_id: Mapped[int] = mapped_column(ForeignKey("project.id"))
83 |
84 |
85 | def setup():
86 | db_factory = ThreadLocalSessionFactory()
87 | Base.metadata.create_all(bind=db_factory.engine)
88 |
--------------------------------------------------------------------------------
/prism/decorators/__init__.py:
--------------------------------------------------------------------------------
1 | from .target import target, target_iterator # noqa: F401
2 |
3 | from .task import task # noqa: F401
4 |
--------------------------------------------------------------------------------
/prism/decorators/target.py:
--------------------------------------------------------------------------------
1 | """
2 | Target decorators
3 |
4 | Table of Contents
5 | - Imports
6 | - Target decorators
7 | """
8 |
9 | ###########
10 | # Imports #
11 | ###########
12 |
13 | import inspect
14 |
15 | # Standard library imports
16 | from pathlib import Path
17 |
18 | # Prism imports
19 | import prism.exceptions
20 | from prism.task import PrismTask
21 |
22 | #####################
23 | # Target decorators #
24 | #####################
25 |
26 |
27 | def target(*, type, loc, **target_kwargs):
28 | """
29 | Decorator to use if user wishes to save the output of a task to an external location
30 | (e.g., a data warehouse, an S3 bucket, or a local filepath).
31 | """
32 |
33 | def decorator_target(func):
34 | def wrapper_target_dec(self):
35 | # This will only ever be called inside a PrismTask
36 | if not isinstance(self, PrismTask):
37 | raise prism.exceptions.RuntimeException(
38 | message="`target` decorator can only be called within a Prism task"
39 | )
40 |
41 | # In cases with multiple decorators, we don't want to "chain" the
42 | # decorators. Rather, we want each target declaration to apply to each
43 | # object returned. In this case, keep track of the target types, locs, and
44 | # kwargs.
45 | if func.__name__ == "wrapper_target_dec":
46 | self.types.append(type)
47 | self.locs.append(loc)
48 | try:
49 | self.kwargs.append(target_kwargs)
50 | except TypeError:
51 | self.kwargs.append({})
52 |
53 | # Return the next wrapper_target function with the same arguments as
54 | # this one. If a function has `n` targets, then this will happen n-1
55 | # times until the `run` function is reached.
56 | if not inspect.ismethod(func):
57 | return func(self)
58 | else:
59 | return func()
60 |
61 | # Now, we've hit the `run` function
62 | else:
63 | # Confirm function name
64 | if func.__name__ != "run":
65 | raise prism.exceptions.RuntimeException(
66 | message="`target` decorator can only be called on `run` function" # noqa: E501
67 | )
68 |
69 | # If the task should be run in full, then call the run function
70 | if self.bool_run and not self.is_done:
71 | # When using `target` as a decorator, `run` is a function. When
72 | # using `target` as an argument to the `task()` decorator, `run` is
73 | # a bound method.
74 | if not inspect.ismethod(func):
75 | obj = func(self)
76 | else:
77 | obj = func()
78 | self.types.append(type)
79 | self.locs.append(loc)
80 | try:
81 | self.kwargs.append(target_kwargs)
82 | except TypeError:
83 | self.kwargs.append({})
84 |
85 | # If multiple things returned, we expected multiple targets
86 | if isinstance(obj, tuple):
87 | objects_to_save = zip(obj, self.types, self.locs, self.kwargs)
88 | for zipped in objects_to_save:
89 | temp_o = zipped[0]
90 | temp_t = zipped[1]
91 | temp_l = zipped[2]
92 | temp_k = zipped[3]
93 | target = temp_t.from_args(temp_o, temp_l)
94 | target.save(**temp_k)
95 |
96 | # If a target is set, just assume that the user wants to
97 | # reference the location of the target when they call `mod`
98 | return obj
99 |
100 | # If return type is not a Tuple, we expect a single target
101 | else:
102 | # Initialize an instance of the target class and save the object
103 | # using the target's `save` method
104 | target = type(obj, loc)
105 | target.save(**target_kwargs)
106 |
107 | # Return the object
108 | return obj
109 |
110 | # If the task should not be run in full, then just return the location
111 | # of the target
112 | else:
113 | # We still need to append the last location to self.locs
114 | self.locs.append(loc)
115 | self.types.append(type)
116 |
117 | # If multiple targets, then return all locs
118 | if len(self.locs) > 1:
119 | all_objs = []
120 | for _loc, _type in zip(self.locs, self.types):
121 | target = _type.open(_loc)
122 | all_objs.append(target.obj)
123 | return tuple(all_objs)
124 |
125 | # For single-target case, return single loc
126 | else:
127 | return self.types[0].open(self.locs[0]).obj
128 |
129 | return wrapper_target_dec
130 |
131 | return decorator_target
132 |
133 |
134 | def target_iterator(*, type, loc, **kwargs):
135 | """
136 | Decorator to use if task requires user to iterate through several different objects
137 | and save each object to an external location
138 | """
139 |
140 | def decorator_target_iterator(func):
141 | def wrapper(self):
142 | # This will only ever be called inside a PrismTask
143 | if not isinstance(self, PrismTask):
144 | raise prism.exceptions.RuntimeException(
145 | message="`target` decorator can only be called within a Prism task"
146 | )
147 |
148 | # Confirm function name
149 | if func.__name__ != "run":
150 | raise prism.exceptions.RuntimeException(
151 | message="`target iterator` decorator can only be called on `run` function" # noqa: E501
152 | )
153 |
154 | if self.bool_run:
155 | if not inspect.ismethod(func):
156 | objs = func(self)
157 | else:
158 | objs = func()
159 | if not isinstance(objs, dict):
160 | raise prism.exceptions.RuntimeException(
161 | message="output of run function should be dict mapping name --> object to save" # noqa: E501
162 | )
163 | for k, _ in objs.items():
164 | if not isinstance(k, str):
165 | raise prism.exceptions.RuntimeException(
166 | message="output of run function should be dict mapping name --> object to save" # noqa: E501
167 | )
168 |
169 | # Iterate through objects and save them out
170 | for name, obj in objs.items():
171 | target = type(obj, Path(loc) / name)
172 | target.save(**kwargs)
173 |
174 | return loc
175 | else:
176 | return loc
177 |
178 | return wrapper
179 |
180 | return decorator_target_iterator
181 |
--------------------------------------------------------------------------------
/prism/decorators/task.py:
--------------------------------------------------------------------------------
1 | from functools import reduce
2 | from typing import Optional
3 |
4 | # Prism imports
5 | from prism.task import PrismTask
6 |
7 |
8 | def bind(instance, func, as_name=None):
9 | """
10 | Bind the function *func* to *instance*, with either provided name *as_name*
11 | or the existing name of *func*. The provided *func* should accept the
12 | instance as the first argument, i.e. "self".
13 | """
14 | if as_name is None:
15 | as_name = func.__name__
16 | bound_method = func.__get__(instance, instance.__class__)
17 | setattr(instance, as_name, bound_method)
18 | return bound_method
19 |
20 |
21 | def task(
22 | *,
23 | task_id: Optional[str] = None,
24 | retries: int = 0,
25 | retry_delay_seconds: Optional[int] = None,
26 | targets=None,
27 | ):
28 | """
29 | Decorator used to turn any Python function into a Prism task.
30 | """
31 |
32 | def decorator_task(func):
33 | def wrapper_task(task_id: Optional[str] = task_id, bool_run: bool = True):
34 | assert task_id
35 | new_task = PrismTask(task_id=task_id, func=func, bool_run=bool_run)
36 |
37 | # Set class attributes
38 | if retries:
39 | new_task.retries = retries
40 | if retry_delay_seconds:
41 | new_task.retry_delay_seconds = retry_delay_seconds
42 |
43 | # Chain the decorators together and bind the decorated function to the task
44 | # instance.
45 | if targets:
46 | if len(targets) == 0:
47 | pass
48 | decorated_func = reduce(
49 | lambda x, y: y(x),
50 | reversed(targets),
51 | new_task.run, # type: ignore
52 | )
53 | new_task.run = bind(new_task, decorated_func) # type: ignore
54 |
55 | return new_task
56 |
57 | return wrapper_task
58 |
59 | return decorator_task
60 |
--------------------------------------------------------------------------------
/prism/docs/__init__.py:
--------------------------------------------------------------------------------
1 | from pathlib import Path
2 |
3 | DOCS_INDEX_FILE_DIR = Path(__file__).parent / "build"
4 |
--------------------------------------------------------------------------------
/prism/docs/build/311ea03002abadcdcaba.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/runprism/prism/a6a762a1ce44a696b1dd6be793726974b9c80c82/prism/docs/build/311ea03002abadcdcaba.png
--------------------------------------------------------------------------------
/prism/docs/build/__init__.py:
--------------------------------------------------------------------------------
1 | from pathlib import Path
2 | DOCS_INDEX_FILE_DIR = Path(__file__).parent / 'build'
3 |
--------------------------------------------------------------------------------
/prism/docs/build/ae8a93980ebb6c55123b.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/runprism/prism/a6a762a1ce44a696b1dd6be793726974b9c80c82/prism/docs/build/ae8a93980ebb6c55123b.ico
--------------------------------------------------------------------------------
/prism/docs/build/ce188596011a8fa32931.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/runprism/prism/a6a762a1ce44a696b1dd6be793726974b9c80c82/prism/docs/build/ce188596011a8fa32931.png
--------------------------------------------------------------------------------
/prism/docs/build/d4df11de40d39920ff8c.svg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/runprism/prism/a6a762a1ce44a696b1dd6be793726974b9c80c82/prism/docs/build/d4df11de40d39920ff8c.svg
--------------------------------------------------------------------------------
/prism/docs/build/main.js.LICENSE.txt:
--------------------------------------------------------------------------------
1 | /*!
2 | * Sizzle CSS Selector Engine v2.3.6
3 | * https://sizzlejs.com/
4 | *
5 | * Copyright JS Foundation and other contributors
6 | * Released under the MIT license
7 | * https://js.foundation/
8 | *
9 | * Date: 2021-02-16
10 | */
11 |
12 | /*!
13 | * jQuery JavaScript Library v3.6.1
14 | * https://jquery.com/
15 | *
16 | * Includes Sizzle.js
17 | * https://sizzlejs.com/
18 | *
19 | * Copyright OpenJS Foundation and other contributors
20 | * Released under the MIT license
21 | * https://jquery.org/license
22 | *
23 | * Date: 2022-08-26T17:52Z
24 | */
25 |
26 | /*!
27 | Embeddable Minimum Strictly-Compliant Promises/A+ 1.1.1 Thenable
28 | Copyright (c) 2013-2014 Ralf S. Engelschall (http://engelschall.com)
29 | Licensed under The MIT License (http://opensource.org/licenses/MIT)
30 | */
31 |
32 | /*! Bezier curve function generator. Copyright Gaetan Renaudeau. MIT License: http://en.wikipedia.org/wiki/MIT_License */
33 |
34 | /*! Runge-Kutta spring physics function generator. Adapted from Framer.js, copyright Koen Bok. MIT License: http://en.wikipedia.org/wiki/MIT_License */
35 |
--------------------------------------------------------------------------------
/prism/engine/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/runprism/prism/a6a762a1ce44a696b1dd6be793726974b9c80c82/prism/engine/__init__.py
--------------------------------------------------------------------------------
/prism/engine/compiled_task.py:
--------------------------------------------------------------------------------
1 | import ast
2 | import importlib
3 | import re
4 | from typing import List, Literal, Optional, Tuple, Union
5 |
6 | # Prism-specific imports
7 | from prism.db.mixins import DbMixin
8 | import prism.exceptions
9 | from prism.engine.module import _PrismModule
10 | from prism.task import PrismTask
11 |
12 |
13 | class _CompiledTask(DbMixin):
14 | """
15 | Class for defining and executing a single compiled task
16 | """
17 |
18 | run_slug: Optional[str]
19 | task_id: str
20 | task_module: _PrismModule
21 | refs: List[str]
22 |
23 | prism_task_node: Union[ast.ClassDef, ast.FunctionDef]
24 |
25 | def __init__(
26 | self,
27 | run_slug: Optional[str],
28 | task_id: str,
29 | task_module: _PrismModule,
30 | refs: List[str],
31 | ):
32 | self.run_slug = run_slug
33 | self.task_id = task_id
34 | self.task_module = task_module
35 | self.refs = refs
36 |
37 | # Define the task node
38 | try:
39 | self.prism_task_node = task_module.prism_task_nodes[self.task_id]
40 | except KeyError:
41 | raise prism.exceptions.ParserException(
42 | message=f"could not find task `{self.task_id}` in `{self.task_module.module_task_relpath}`" # noqa: E501
43 | )
44 |
45 | def grab_retries_metadata(self) -> Tuple[int, int]:
46 | """
47 | Grab retry metadata, including:
48 | 1. How many retries to undertake
49 | 2. The delay between retries
50 | """
51 | # Instantiate retries / retry_delay_seconds
52 | retries = None
53 | retry_delay_seconds = None
54 |
55 | # If the task is a class, the variables will be stored in class attributes
56 | if isinstance(self.prism_task_node, ast.ClassDef):
57 | retries = self.task_module.get_class_attribute_value(
58 | self.prism_task_node, "retries"
59 | )
60 | retry_delay_seconds = self.task_module.get_class_attribute_value(
61 | self.prism_task_node, "retry_delay_seconds"
62 | )
63 | if retries:
64 | if not isinstance(retries, int):
65 | raise ValueError("TypeError: `retries` should be an integer!")
66 | if retry_delay_seconds:
67 | if not isinstance(retry_delay_seconds, int):
68 | raise ValueError(
69 | "TypeError: `retry_delay_seconds` should be an integer!"
70 | )
71 |
72 | # If the task is a decorated function, the variables will be stored as keyword
73 | # arguments.
74 | elif isinstance(self.prism_task_node, ast.FunctionDef):
75 | decorator_call = self.task_module.get_task_decorator_call(
76 | self.prism_task_node
77 | )
78 | retries = self.task_module._get_keyword_arg_from_task_decorator(
79 | self.prism_task_node, decorator_call, "retries", "int"
80 | )
81 | retry_delay_seconds = self.task_module._get_keyword_arg_from_task_decorator(
82 | self.prism_task_node, decorator_call, "retry_delay_seconds", "int"
83 | )
84 |
85 | # If nothing was found, default to 0
86 | if not retries:
87 | retries = 0
88 | if not retry_delay_seconds:
89 | retry_delay_seconds = 0
90 |
91 | # Hacky, but good enough for now
92 | assert isinstance(retries, int)
93 | assert isinstance(retry_delay_seconds, int)
94 |
95 | return retries, retry_delay_seconds
96 |
97 | def instantiate_task_class(self, explicit_run: bool = True) -> PrismTask:
98 | """
99 | Instantiate the PrismTask class. All tasks, even decorated functions, are
100 | converted to a PrismTask class before being executed.
101 |
102 | args:
103 | explicit run: boolean indicating whether to run the task. Default is True
104 | returns:
105 | PrismTask instance
106 | """
107 | project_relpath = self.task_module.module_task_relpath
108 |
109 | # Before replacing forward slashes with periods, make sure any leading slashes
110 | # are removed.
111 | project_relpath_for_import = re.sub(
112 | r"(^\.+)/(.+)|^/(.+)",
113 | r"\1\2\3",
114 | project_relpath,
115 | )
116 |
117 | # If we attempt a relative import that is beyond the top-level package, we
118 | # encounter an error. To handle this,
119 | import_statement = project_relpath_for_import.replace(".py", "").replace(
120 | "/", "."
121 | )
122 | imported_mod = importlib.import_module(import_statement)
123 |
124 | # Get the node name
125 | node_name = self.prism_task_node.name
126 | if isinstance(self.prism_task_node, ast.FunctionDef):
127 | task_class_fn: PrismTask = getattr(imported_mod, node_name)(
128 | task_id=self.task_id,
129 | bool_run=explicit_run,
130 | )
131 | return task_class_fn
132 | else:
133 | task_class_cls: PrismTask = getattr(imported_mod, node_name)(
134 | task_id=self.task_id,
135 | bool_run=explicit_run,
136 | )
137 | return task_class_cls
138 |
139 | def exec(self, explicit_run: bool = True, full_refresh: bool = False) -> PrismTask:
140 | """
141 | Execute task
142 | """
143 | # Add TaskRun to the database
144 | assert self.run_slug is not None
145 | super().create_task_run(self.run_slug, self.task_id)
146 |
147 | runtime = importlib.import_module("prism.runtime")
148 | # TODO: clean up this error
149 | if not hasattr(runtime, "CurrentRun"):
150 | raise ValueError("runtime does not have `CurrentRun` attribute!")
151 |
152 | # Instantiate class and check if the task is already done
153 | prism_task = self.instantiate_task_class(explicit_run)
154 | is_done = prism_task.done()
155 | prism_task.is_done = is_done and not full_refresh
156 |
157 | # Execute the task
158 | # "PENDING", "RUNNING", "SUCCEEDED", "FAILED", "SKIPPED"
159 | status: Literal["PENDING", "RUNNING", "SUCCEEDED", "FAILED", "SKIPPED"] = (
160 | "SKIPPED" if prism_task.is_done or not explicit_run else "RUNNING"
161 | ) # noqa: E501
162 | super().update_task_run_status(self.run_slug, prism_task.task_id, status)
163 | prism_task.exec()
164 | if status == "RUNNING":
165 | super().update_task_run_status(
166 | self.run_slug, prism_task.task_id, "SUCCEEDED"
167 | )
168 | runtime.CurrentRun._set_task_output_value(self.task_id, prism_task.get_output())
169 |
170 | # Return the task... we don't use the task for anything, but it helps our event
171 | # manager know that we ran a task.
172 | return prism_task
173 |
--------------------------------------------------------------------------------
/prism/engine/manifest.py:
--------------------------------------------------------------------------------
1 | """
2 | Prism Manifest class
3 |
4 | Table of Contents
5 | - Imports
6 | - Class definition
7 | """
8 |
9 | ###########
10 | # Imports #
11 | ###########
12 |
13 | # Standard library imports
14 | import json
15 | import re
16 | from pathlib import Path
17 | from typing import Any, Dict, List
18 |
19 | ####################
20 | # Class definition #
21 | ####################
22 |
23 |
24 | class TaskManifest:
25 | """
26 | Class used to store metadata on a parsed task
27 | """
28 |
29 | def __init__(self):
30 | self.manifest_dict: Dict[str, Any] = {"targets": {}, "tasks": {}, "refs": {}}
31 |
32 | def update_tasks_dir_key(
33 | self,
34 | key: str,
35 | level: Dict[str, Any] = {},
36 | ):
37 | if key not in level.keys():
38 | level[key] = {}
39 | return level[key]
40 |
41 | def add_task(self, task_module: Path, task_name: str):
42 | """
43 | We want the `tasks` key in our manifest to be structured as follows
44 | "tasks": {
45 | "": [
46 | "task_name1",
47 | "task_name2"
48 | ],
49 | "/" {
50 | "": [
51 | "nested_task_name3",
52 | "nested_task_name3"
53 | ]
54 | }
55 | ...
56 | }
57 | """
58 | task_module_no_py = re.sub(r"\.py$", "", str(task_module))
59 |
60 | # Determine if the task exists in a directory
61 | flag_in_dir = False
62 | task_module_no_py_split = task_module_no_py.split("/")
63 | if len(task_module_no_py_split) > 1:
64 | flag_in_dir = True
65 |
66 | # If the task lives in a module, then the module name should be the key
67 | if not flag_in_dir:
68 | if task_module_no_py in self.manifest_dict["tasks"].keys():
69 | self.manifest_dict["tasks"][task_module_no_py].append(task_name)
70 | else:
71 | self.manifest_dict["tasks"][task_module_no_py] = [task_name]
72 |
73 | # If task lives in a nested directory, then the directory name should be the
74 | # first key.
75 | else:
76 | # Create necessary nested directory keys
77 | base_level = self.manifest_dict["tasks"]
78 | for _k in task_module_no_py_split[:-1]:
79 | base_level = self.update_tasks_dir_key(f"{_k}/", base_level)
80 |
81 | # Update the module / task name
82 | if task_module_no_py_split[-1] in base_level.keys():
83 | base_level[task_module_no_py_split[-1]].append(task_name)
84 | else:
85 | base_level[task_module_no_py_split[-1]] = [task_name]
86 |
87 | def add_refs(self, target_module: Path, target_task: str, sources: List[str]):
88 | target_module_no_py = re.sub(r"\.py$", "", str(target_module))
89 | if target_module_no_py not in self.manifest_dict["refs"].keys():
90 | self.manifest_dict["refs"][target_module_no_py] = {}
91 | self.manifest_dict["refs"][target_module_no_py][target_task] = sources
92 |
93 | def add_targets(self, module_relative_path: Path, task_name: str, locs: List[str]):
94 | module_name_no_py = re.sub(r"\.py$", "", str(module_relative_path))
95 | if module_name_no_py not in self.manifest_dict["targets"].keys():
96 | self.manifest_dict["targets"][module_name_no_py] = {}
97 | self.manifest_dict["targets"][module_name_no_py][task_name] = locs
98 |
99 |
100 | class Manifest:
101 | """
102 | Class used to store metadata on compiled prism project
103 | """
104 |
105 | def __init__(self, task_manifests: List[TaskManifest] = []):
106 | self.manifest_dict: Dict[str, Any] = {
107 | "targets": {},
108 | "prism_project": "",
109 | "tasks": {},
110 | "refs": {},
111 | }
112 | self.task_manifests = task_manifests
113 |
114 | # Iterate through task manifests and add to manifest
115 | for mm in self.task_manifests:
116 | self.manifest_dict["targets"].update(mm.manifest_dict["targets"])
117 | self.update(self.manifest_dict["tasks"], mm.manifest_dict["tasks"])
118 | self.manifest_dict["refs"].update(mm.manifest_dict["refs"])
119 |
120 | def update(
121 | self,
122 | manifest_dict: Dict[str, Any],
123 | task_manifest_dict: Dict[str, Any],
124 | ) -> Dict[str, Any]:
125 | """
126 | Recursive function to update `manifest_dict` with the contents of
127 | `task_manifest_dict`. We need a recursive function, because the `tasks` key
128 | within the manifest.json can have a bunch of nested dictionaries.
129 |
130 | args:
131 | manifest_dict: manifest dictionary
132 | task_manifest_dict: task manifest dictionary
133 | returns:
134 | updated manifest_dict
135 | """
136 | # Iterate through the task manifest's contents. Note that they should only have
137 | # one key within `tasks`.
138 | for k, v in task_manifest_dict.items():
139 | if k not in manifest_dict.keys():
140 | manifest_dict[k] = v
141 | elif isinstance(manifest_dict[k], list):
142 | for _item in v:
143 | if _item not in manifest_dict[k]:
144 | manifest_dict[k].append(_item)
145 |
146 | # If the value is a dictionary and the manifest already has this dictionary,
147 | # then we'll need to recursively update the manifest's dictionary.
148 | elif isinstance(manifest_dict[k], dict):
149 | self.update(manifest_dict[k], v)
150 | return manifest_dict
151 |
152 | def add_prism_project(self, prism_project_data: str):
153 | self.manifest_dict["prism_project"] = prism_project_data
154 |
155 | def json_dump(self, path: Path):
156 | with open(path / "manifest.json", "w") as f:
157 | json.dump(self.manifest_dict, f, sort_keys=False)
158 | f.close()
159 |
160 | def json_load(self, path: Path):
161 | with open(path / "manifest.json", "r") as f:
162 | manifest = json.loads(f.read())
163 | f.close()
164 | return manifest
165 |
--------------------------------------------------------------------------------
/prism/exceptions.py:
--------------------------------------------------------------------------------
1 | from pathlib import Path
2 |
3 |
4 | class PrismException(Exception):
5 | pass
6 |
7 |
8 | class PrismASTException(PrismException):
9 | def __init__(self, call_name: str, attribute: str):
10 | self.message = (
11 | f"AST error: `{call_name}` argument does not have `{attribute}` attribute" # noqa: E501
12 | )
13 | super().__init__(self.message)
14 |
15 | def __str__(self):
16 | return self.message
17 |
18 |
19 | class ProjectAlreadyExistsException(PrismException):
20 | def __init__(self, project_dir: Path):
21 | self.message = f"Project already exists at `{project_dir}`"
22 | super().__init__(self.message)
23 |
24 |
25 | class RuntimeException(PrismException):
26 | def __init__(self, message):
27 | self.message = message
28 | super().__init__(self.message)
29 |
30 | def __str__(self):
31 | return self.message
32 |
33 |
34 | class CompileException(PrismException):
35 | def __init__(self, message):
36 | self.message = message
37 | super().__init__(self.message)
38 |
39 | def __str__(self):
40 | return self.message
41 |
42 |
43 | class DAGException(PrismException):
44 | def __init__(self, message):
45 | self.message = message
46 | super().__init__(self.message)
47 |
48 | def __str__(self):
49 | return self.message
50 |
51 |
52 | class ConsoleEventException(PrismException):
53 | def __init__(self, message):
54 | self.message = message
55 | super().__init__(self.message)
56 |
57 | def __str__(self):
58 | return self.message
59 |
60 |
61 | class ParserException(PrismException):
62 | def __init__(self, message):
63 | self.message = message
64 | super().__init__(self.message)
65 |
66 | def __str__(self):
67 | return self.message
68 |
69 |
70 | class ReferenceException(PrismException):
71 | def __init__(self, message):
72 | self.message = message
73 | super().__init__(self.message)
74 |
75 | def __str__(self):
76 | return self.message
77 |
--------------------------------------------------------------------------------
/prism/logging/__init__.py:
--------------------------------------------------------------------------------
1 | from .loggers import * # noqa: F401, F403
2 |
--------------------------------------------------------------------------------
/prism/logging/execution.py:
--------------------------------------------------------------------------------
1 | import time
2 | from typing import Any, Callable, Optional
3 |
4 | # Prism-specific imports
5 | from prism.logging.events import ExecutionEvent
6 | import prism.logging.events
7 | from prism.task import PrismTask
8 |
9 |
10 | class _ExecutionEventManager:
11 | """
12 | For certain actions, we fire events to indicate before the action starts to indicate
13 | that the action is taking place and after the action ends to indicate that the
14 | action succeeded/failed. This class helps us manage that.
15 | """
16 |
17 | def __init__(
18 | self,
19 | idx: Optional[int],
20 | total: Optional[int],
21 | name: str,
22 | func: Callable[..., Any],
23 | ):
24 | self.idx = idx
25 | self.total = total
26 | self.name = name
27 | self.func = func
28 |
29 | def fire_skipped_exec_event(self, is_task: bool = False) -> None:
30 | """
31 | Create ExecutionEvent informing user that a task was skipped
32 | """
33 | event_with_formatting = (
34 | f"TASK [blue]{self.name}[/blue]" if is_task else self.name.lower()
35 | ) # noqa: E501
36 | e = ExecutionEvent(
37 | msg=f"[orange1]SKIPPING[/orange1] {event_with_formatting}",
38 | num=self.idx,
39 | total=self.total,
40 | status="SKIP",
41 | execution_time=None,
42 | )
43 | prism.logging.events.fire_console_event(e, log_level="info")
44 | return None
45 |
46 | def fire_running_exec_event(self, is_task: bool = False) -> None:
47 | """
48 | Create ExecutionEvent informing user of task execution
49 | """
50 | event_with_formatting = (
51 | f"RUNNING TASK [blue]{self.name}[/blue]" if is_task else self.name
52 | ) # noqa: E501
53 | e = ExecutionEvent(
54 | msg=event_with_formatting,
55 | num=self.idx,
56 | total=self.total,
57 | status="RUN",
58 | execution_time=None,
59 | )
60 | prism.logging.events.fire_console_event(e, log_level="info")
61 | return None
62 |
63 | def fire_success_exec_event(self, start_time: float, is_task: bool = False) -> None:
64 | """
65 | Create ExecutionEvent informing user of successful task execution
66 | """
67 | event_with_formatting = (
68 | f"TASK [blue]{self.name}[/blue]" if is_task else self.name.lower()
69 | ) # noqa: E501
70 | execution_time = time.time() - start_time
71 | e = ExecutionEvent(
72 | msg=f"[green]FINISHED[/green] {event_with_formatting}",
73 | num=self.idx,
74 | total=self.total,
75 | status="DONE",
76 | execution_time=execution_time,
77 | )
78 | prism.logging.events.fire_console_event(e, log_level="info")
79 | return None
80 |
81 | def fire_error_exec_event(self, start_time: float, is_task: bool = False) -> None:
82 | """
83 | Create ExecutionEvent informing user of error in task execution
84 | """
85 | event_with_formatting = (
86 | f"IN TASK [blue]{self.name}[/blue]" if is_task else self.name.lower()
87 | ) # noqa: E501
88 | execution_time = time.time() - start_time
89 | e = ExecutionEvent(
90 | msg=f"[red]ERROR[/red] {event_with_formatting}",
91 | num=self.idx,
92 | total=self.total,
93 | status="ERROR",
94 | execution_time=execution_time,
95 | )
96 | prism.logging.events.fire_console_event(e, log_level="error")
97 | return None
98 |
99 | def run(
100 | self, fire_exec_events: bool = True, is_task: bool = False, **kwargs
101 | ) -> Any:
102 | start_time = time.time()
103 | if fire_exec_events:
104 | self.fire_running_exec_event(is_task)
105 |
106 | # The only events we ever really skip are actual tasks. For these, the skip
107 | # logic is handled within the task's `exec` function. So, we just run it
108 | # normally here.
109 | try:
110 | output = self.func(**kwargs)
111 |
112 | # Check if the output is a task manager. If it is, then we've run a task.
113 | # Check if the task was skipped, and fire the corresponding event.
114 | if isinstance(output, PrismTask):
115 | if fire_exec_events:
116 | if output.is_done:
117 | self.fire_skipped_exec_event(is_task)
118 | else:
119 | self.fire_success_exec_event(start_time, is_task)
120 | elif fire_exec_events:
121 | self.fire_success_exec_event(start_time, is_task)
122 | return output
123 | except Exception:
124 | self.fire_error_exec_event(start_time, is_task)
125 | raise
126 |
--------------------------------------------------------------------------------
/prism/logging/loggers.py:
--------------------------------------------------------------------------------
1 | import logging
2 | import re
3 | import sys
4 | from io import StringIO
5 | from logging import Formatter, Handler, StreamHandler
6 | from logging.handlers import RotatingFileHandler
7 | from pathlib import Path
8 | from typing import List, Literal, Optional, Union
9 |
10 | from rich.console import Console
11 | from rich.logging import RichHandler
12 | from rich.theme import Theme
13 |
14 | CONSOLE: Console
15 | DEFAULT_LOGGER: logging.Logger
16 |
17 |
18 | LOGFORMAT = "%(asctime)s | %(message)s"
19 | LOGFORMAT_RICH = "| %(message)s"
20 |
21 |
22 | def escape_rich_formatting(string: str) -> str:
23 | """
24 | Replace Rich formatting e.g., [blue]...[/blue]. This cleans up the logs we save to a
25 | file.
26 | """
27 | return re.sub(r"\[/?[a-z]+\]", "", string)
28 |
29 |
30 | def console_print(msg: Union[List[str], str], **kwargs) -> None:
31 | """
32 | Thin wrapper around `console.print(...)` in order to add the printed messages to our
33 | logs.
34 | """
35 | try:
36 | fh = DEFAULT_LOGGER.handlers[0]
37 | except IndexError:
38 | if not isinstance(msg, str):
39 | CONSOLE.print("\n" + "".join(msg), **kwargs)
40 | else:
41 | CONSOLE.print(msg, **kwargs)
42 | return None
43 |
44 | # For tracebacks
45 | if not isinstance(msg, str):
46 | msg = "\n" + "".join(msg)
47 | record = logging.LogRecord(
48 | name="",
49 | level=40,
50 | pathname="",
51 | lineno=0,
52 | msg=msg,
53 | args=None,
54 | exc_info=None,
55 | )
56 | fh.emit(record)
57 |
58 | else:
59 | CONSOLE.print(msg, **kwargs)
60 | msg_no_formatting = escape_rich_formatting(msg)
61 |
62 | # If the message is a header / tail rule, then ignore
63 | if len(re.findall(r"^\─+$", msg_no_formatting)):
64 | return None
65 |
66 | # Silently log the message. We only silently log `info` messages.
67 | record = logging.LogRecord(
68 | name="",
69 | level=20,
70 | pathname="",
71 | lineno=0,
72 | msg=msg_no_formatting,
73 | args=None,
74 | exc_info=None,
75 | )
76 | fh.emit(record)
77 | return None
78 |
79 |
80 | class FileHandlerFormatter(logging.Formatter):
81 | def format(self, record):
82 | if re.findall(r"^\s+$", record.msg) or record.msg == "":
83 | formatter = logging.Formatter("%(message)s", "%H:%M:%S")
84 | else:
85 | formatter = logging.Formatter(LOGFORMAT, "%H:%M:%S")
86 | return escape_rich_formatting(formatter.format(record))
87 |
88 |
89 | def set_up_logger(
90 | log_level: Literal["info", "warning", "error", "debug", "critical"],
91 | fpath: Optional[Union[str, Path, StringIO]],
92 | rich_logging: bool = True,
93 | ):
94 | if globals().get("DEFAULT_LOGGER", None) is None:
95 | global CONSOLE
96 | global DEFAULT_LOGGER
97 |
98 | # Instantiate Console
99 | CONSOLE = Console(
100 | highlight=False,
101 | theme=Theme(
102 | {
103 | "logging.level.info": "cyan",
104 | "logging.level.warning": "yellow",
105 | "logging.level.error": "red",
106 | "logging.level.debug": "orange1",
107 | }
108 | ),
109 | file=fpath if isinstance(fpath, StringIO) else sys.stdout,
110 | )
111 |
112 | # Instantiate Rich handler
113 | handlers: List[Handler] = []
114 | if rich_logging:
115 | rh = RichHandler(
116 | rich_tracebacks=True,
117 | tracebacks_width=120,
118 | show_path=False,
119 | omit_repeated_times=False,
120 | console=CONSOLE,
121 | markup=True,
122 | log_time_format="[%X]",
123 | highlighter=None,
124 | )
125 | rh.setFormatter(Formatter(LOGFORMAT_RICH))
126 | handlers.append(rh)
127 | else:
128 | sh = StreamHandler(sys.stdout)
129 | sh.setFormatter(FileHandlerFormatter())
130 | handlers.append(sh)
131 |
132 | # We also want to save our logs on disk, unless the inputted file is a StringIO
133 | # class (used in our tests).
134 | if fpath and not isinstance(fpath, StringIO):
135 | file_handler = RotatingFileHandler(
136 | filename=fpath,
137 | maxBytes=1024 * 1024 * 10,
138 | backupCount=10, # 10Mb
139 | )
140 | file_handler.setFormatter(FileHandlerFormatter())
141 | handlers.append(file_handler)
142 |
143 | logging.basicConfig(
144 | level=log_level.upper(),
145 | format=LOGFORMAT,
146 | handlers=handlers,
147 | )
148 | DEFAULT_LOGGER = logging.getLogger("prism")
149 | if fpath and not isinstance(fpath, StringIO):
150 | DEFAULT_LOGGER.addHandler(file_handler)
151 |
--------------------------------------------------------------------------------
/prism/runtime/__init__.py:
--------------------------------------------------------------------------------
1 | from .current_run import * # noqa: F401, F403
2 |
--------------------------------------------------------------------------------
/prism/runtime/current_run.py:
--------------------------------------------------------------------------------
1 | from typing import Any, Dict, List, Optional, Union
2 |
3 | # Prism-specific imports
4 | from prism.connectors.base import Connector
5 |
6 |
7 | class _CurrentRun:
8 | """
9 | THIS CLASS SHOULD NEVER BE IMPORTED DIRECTLY. INSTEAD, IMPORT `CurrentRun`, i.e.,
10 | ```python
11 | from prism.runtime import CurrentRun
12 | ```
13 |
14 | CurrentRun stores contextual variables for that tasks can access during runtime.
15 | This includes variables that the user specifies in their client definition and
16 | connectors.
17 | """
18 |
19 | run_id: str
20 | _data: Dict[str, Any]
21 | _refs: Dict[str, Any]
22 | connectors: Dict[str, Connector]
23 |
24 | def __init__(self, data: Dict[str, Any]):
25 | self._data = data
26 | self._refs = {}
27 | self.connectors = {}
28 |
29 | def _set_connectors(self, connectors: List[Connector]):
30 | for conn in connectors:
31 | self.connectors[conn.id] = conn
32 |
33 | def _setup(
34 | self,
35 | run_id: str,
36 | runtime_ctx: Dict[str, Any],
37 | connectors: List[Union[Connector]],
38 | ):
39 | self.run_id = run_id
40 | self._data.update(runtime_ctx)
41 | self._set_connectors(connectors)
42 |
43 | def ctx(self, key: str, default_value: Optional[Any] = None) -> Any:
44 | """
45 | Get the value associated with context variable `key`. Context variables can be
46 | set in two places: when instantiated the PrismProject (with the `ctx` keyword
47 | argument) and when creating the run (with the `runtime_ctx` keyword argument
48 | in the PrismProject's `run` method).
49 |
50 | args:
51 | key: variable to retrieve
52 | default_value: default value to return if `key` is not found.
53 | Default is `None`
54 | returns:
55 | value associated with context variable `key`
56 | """
57 | return self._data.get(key, default_value)
58 |
59 | def _set_task_output_value(self, task_id: str, value: Any):
60 | self._refs[task_id] = value
61 |
62 | def ref(self, task_id: str) -> Any:
63 | """
64 | Get the output of task with ID `task_id`
65 |
66 | args:
67 | task_id: ID of task from which to retrieve output
68 | returns:
69 | the output of the inputted `task_id`
70 | raises:
71 | prism.exception.RefDoesNotExistException if the task ID is not found
72 | """
73 | if task_id not in self._refs.keys():
74 | raise ValueError(f"task ID `{task_id}` not found in run `{self.run_id}`!")
75 | return self._refs[task_id]
76 |
77 | def conn(self, connector_id: str) -> Connector:
78 | """
79 | Get the connector object associated with ID `connector_id`. These are defined in
80 | the client's instantiation.
81 |
82 | args:
83 | connector_id: ID of task from which to retrieve output
84 | returns:
85 | connector object associated with `connector_id`
86 | raises:
87 | prism.exception.ConnectorDoesNotExistException if the connector ID is not
88 | found
89 | """
90 | if connector_id not in self.connectors.keys():
91 | raise ValueError(
92 | f"connector ID `{connector_id}` not found run `{self.run_id}`!"
93 | )
94 | return self.connectors[connector_id]
95 |
96 |
97 | if __name__ != "__main__":
98 | # Create a `CurrentRun` object. This is the object that users import within their
99 | # task modules. Here, we are relying on Python's import caching to ensure that the
100 | # refs persist across tasks. When the user creates a run, we automatically import
101 | # this module and create the `CurrentRun` object. Then, when users import CurrentRun
102 | # within their tasks, Python will not re-import and re-create a new CurrentRun
103 | # object. Rather, it will recognize that a CurrentRun object already exists and use
104 | # that for computations. When we run a task, we update the CurrentRun's `data`
105 | # attribute with that task's output — this allows a task's output to persist across
106 | # tasks.
107 | CurrentRun: _CurrentRun = _CurrentRun({})
108 |
--------------------------------------------------------------------------------
/prism/target.py:
--------------------------------------------------------------------------------
1 | """
2 | Target class definition.
3 |
4 | Table of Contents
5 | - Imports
6 | - Class definitions
7 | - Target decorators
8 | """
9 |
10 | ###########
11 | # Imports #
12 | ###########
13 |
14 | # Prism imports
15 | import prism.exceptions
16 |
17 | #####################
18 | # Class definitions #
19 | #####################
20 |
21 |
22 | class PrismTarget:
23 | def __init__(self, obj, loc):
24 | self.obj = obj
25 | self.loc = loc
26 |
27 | def save(self):
28 | raise prism.exceptions.RuntimeException(message="`save` method not implemented")
29 |
30 | @classmethod
31 | def from_args(cls, obj, loc):
32 | return cls(obj, loc)
33 |
34 | @classmethod
35 | def open(cls, loc):
36 | raise prism.exceptions.RuntimeException(message="`open` method not implemented")
37 |
38 |
39 | class PandasCsv(PrismTarget):
40 | def save(self, **kwargs):
41 | self.obj.to_csv(self.loc, **kwargs)
42 |
43 | @classmethod
44 | def open(cls, loc):
45 | import pandas as pd
46 |
47 | obj = pd.read_csv(loc)
48 | return cls(obj, loc)
49 |
50 |
51 | class NumpyTxt(PrismTarget):
52 | def save(self, **kwargs):
53 | import numpy as np
54 |
55 | np.savetxt(self.loc, self.obj, **kwargs)
56 |
57 | @classmethod
58 | def open(cls, loc):
59 | import numpy as np
60 |
61 | obj = np.loadtxt(loc)
62 | return cls(obj, loc)
63 |
64 |
65 | class Txt(PrismTarget):
66 | def save(self, **kwargs):
67 | with open(self.loc, "w") as f:
68 | f.write(self.obj, **kwargs)
69 | f.close()
70 |
71 | @classmethod
72 | def open(cls, loc):
73 | with open(loc, "r") as f:
74 | obj = f.read()
75 | return cls(obj, loc)
76 |
77 |
78 | class MatplotlibPNG(PrismTarget):
79 | def save(self, **kwargs):
80 | self.obj.savefig(self.loc, **kwargs)
81 |
82 | @classmethod
83 | def open(cls, loc):
84 | from PIL import Image
85 |
86 | obj = Image.open(loc)
87 | return cls(obj, loc)
88 |
89 |
90 | class JSON(PrismTarget):
91 | def save(self, **kwargs):
92 | import json
93 |
94 | json_object = json.dumps(self.obj, **kwargs)
95 | with open(self.loc, "w") as f:
96 | f.write(json_object)
97 |
98 | @classmethod
99 | def open(cls, loc):
100 | import json
101 |
102 | with open(loc, "r") as f:
103 | obj = json.loads(f.read())
104 | return cls(obj, loc)
105 |
--------------------------------------------------------------------------------
/prism/task.py:
--------------------------------------------------------------------------------
1 | from pathlib import Path
2 | from typing import Any, Callable, List, Optional, Union
3 |
4 | # Prism imports
5 | import prism.exceptions
6 | import prism.target
7 |
8 |
9 | class PrismTask:
10 | retries: int
11 | retry_delay_seconds: int
12 |
13 | def __init__(
14 | self,
15 | task_id: str,
16 | func: Optional[Callable[..., Any]] = None,
17 | bool_run: bool = True,
18 | ):
19 | """
20 | Create an instance of the PrismTask. The class immediately calls the `run`
21 | function and assigns the result to the `output` attribute.
22 | """
23 | self.task_id = task_id
24 | self.func = func
25 | self.bool_run = bool_run
26 |
27 | # Tyeps, locs, and kwargs for target
28 | self.types: List[prism.target.PrismTarget] = []
29 | self.locs: List[Union[str, Path]] = []
30 | self.kwargs: List[Any] = []
31 |
32 | # Retries
33 | self.retries = 0
34 | self.retry_delay_seconds = 0
35 |
36 | # Initialize the is_done attribute
37 | self.is_done: bool = False
38 |
39 | def exec(self):
40 | # If the `target` decorator isn't applied, then only execute the `run` function
41 | # of bool_run is true
42 | if self.run.__name__ == "run" and not self.is_done:
43 | # If bool_run, then execute the `run` function and set the `output`
44 | # attribute to its result
45 | if self.bool_run:
46 | self.output = self.run()
47 | if self.output is None:
48 | raise prism.exceptions.RuntimeException(
49 | "`run` method must produce a non-null output"
50 | )
51 |
52 | # If the code reaches this stage, then the user is attempting to use this
53 | # tasks output without explicitly running the task or setting a target. We
54 | # will throw an error in the get_output() method.
55 | else:
56 | self.output = None
57 |
58 | # Otherwise, the decorator uses bool_run in its internal computation
59 | else:
60 | self.output = self.run()
61 | if self.output is None:
62 | raise prism.exceptions.RuntimeException(
63 | "`run` method must produce a non-null output"
64 | )
65 |
66 | def done(self) -> bool:
67 | return False
68 |
69 | def run(self):
70 | if self.func is not None:
71 | return self.func()
72 | else:
73 | raise prism.exceptions.RuntimeException("`run` method not implemented")
74 |
75 | def get_output(self):
76 | """
77 | Return the output attribute
78 | """
79 | # If self.output is None, then the user has not specified a target nor have they
80 | # explicitly run the task.
81 | if self.output is None:
82 | msg = f"cannot access the output of `{self.__class__.__name__}` without either explicitly running task or setting a target" # noqa: E501
83 | raise prism.exceptions.RuntimeException(message=msg)
84 | return self.output
85 |
--------------------------------------------------------------------------------
/prism/templates/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/runprism/prism/a6a762a1ce44a696b1dd6be793726974b9c80c82/prism/templates/__init__.py
--------------------------------------------------------------------------------
/prism/templates/starter_project/.gitignore:
--------------------------------------------------------------------------------
1 | /.ipynb_checkpoints
--------------------------------------------------------------------------------
/prism/templates/starter_project/__init__.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | STARTER_PROJECT_TEMPLATE_DIR = os.path.dirname(__file__)
4 |
--------------------------------------------------------------------------------
/prism/templates/starter_project/main.py:
--------------------------------------------------------------------------------
1 | """Entrypoint for your Prism project."""
2 |
3 | from pathlib import Path
4 |
5 | from prism.client import PrismProject
6 |
7 | # Project
8 | project = PrismProject(
9 | version="1.0",
10 | tasks_dir=Path.cwd() / "tasks",
11 | concurrency=2,
12 | ctx={"OUTPUT": Path.cwd() / "output"},
13 | )
14 |
15 |
16 | # Run
17 | if __name__ == "__main__":
18 | project.run()
19 |
--------------------------------------------------------------------------------
/prism/templates/starter_project/output/.exists:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/runprism/prism/a6a762a1ce44a696b1dd6be793726974b9c80c82/prism/templates/starter_project/output/.exists
--------------------------------------------------------------------------------
/prism/templates/starter_project/tasks/class_task.py:
--------------------------------------------------------------------------------
1 | from pathlib import Path
2 |
3 | import prism.decorators
4 | import prism.target
5 |
6 | # Prism imports
7 | import prism.task
8 | from prism.runtime import CurrentRun
9 |
10 |
11 | class ExampleTask(prism.task.PrismTask):
12 | task_id = "example-class-task"
13 |
14 | # Run
15 | @prism.decorators.target(
16 | type=prism.target.Txt,
17 | loc=Path(CurrentRun.ctx("OUTPUT", "output")).resolve() / "hello_world.txt",
18 | )
19 | def run(self):
20 | return "Hello, world!"
21 |
--------------------------------------------------------------------------------
/prism/templates/starter_project/tasks/decorated_task.py:
--------------------------------------------------------------------------------
1 | from pathlib import Path
2 |
3 | # Prism imports
4 | import prism.target
5 | from prism.decorators import target, task
6 | from prism.runtime import CurrentRun
7 |
8 |
9 | @task(
10 | task_id="example-decorated-task",
11 | targets=[
12 | target(
13 | type=prism.target.Txt,
14 | loc=Path(CurrentRun.ctx("OUTPUT", "output")).resolve() / "hello_world.txt",
15 | )
16 | ],
17 | )
18 | def example_task():
19 | return "Hello, world!"
20 |
--------------------------------------------------------------------------------
/prism/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/runprism/prism/a6a762a1ce44a696b1dd6be793726974b9c80c82/prism/tests/__init__.py
--------------------------------------------------------------------------------
/prism/tests/integration/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/runprism/prism/a6a762a1ce44a696b1dd6be793726974b9c80c82/prism/tests/integration/__init__.py
--------------------------------------------------------------------------------
/prism/tests/integration/additional_package/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/runprism/prism/a6a762a1ce44a696b1dd6be793726974b9c80c82/prism/tests/integration/additional_package/__init__.py
--------------------------------------------------------------------------------
/prism/tests/integration/additional_package/cli_callbacks.py:
--------------------------------------------------------------------------------
1 | def print_success():
2 | return "Success!"
3 |
--------------------------------------------------------------------------------
/prism/tests/integration/additional_package/cli_connectors.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | from prism.connectors import (
4 | PostgresConnector,
5 | SnowflakeConnector,
6 | )
7 |
8 |
9 | postgres_connector = PostgresConnector(
10 | id="postgres-connector",
11 | user=os.environ.get("POSTGRES_USER"),
12 | password=os.environ.get("POSTGRES_PASSWORD"),
13 | port=5432,
14 | host=os.environ.get("POSTGRES_HOST"),
15 | database=os.environ.get("POSTGRES_DB"),
16 | autocommit=True,
17 | )
18 |
19 |
20 | snowflake_connector = SnowflakeConnector(
21 | id="snowflake-connector",
22 | user=os.environ.get("SNOWFLAKE_USER"),
23 | password=os.environ.get("SNOWFLAKE_PASSWORD"),
24 | account=os.environ.get("SNOWFLAKE_ACCOUNT"),
25 | role=os.environ.get("SNOWFLAKE_ROLE"),
26 | warehouse=os.environ.get("SNOWFLAKE_WAREHOUSE"),
27 | database=os.environ.get("SNOWFLAKE_DATABASE"),
28 | schema=os.environ.get("SNOWFLAKE_SCHEMA"),
29 | )
30 |
--------------------------------------------------------------------------------
/prism/tests/integration/additional_package/utils.py:
--------------------------------------------------------------------------------
1 | def task1_return():
2 | return "Hello from module01.Task01"
3 |
--------------------------------------------------------------------------------
/prism/tests/integration/integration_utils.py:
--------------------------------------------------------------------------------
1 | import os
2 | from pathlib import Path
3 | import time
4 | from typing import Optional
5 |
6 | import prism.logging.loggers
7 | import prism.logging.events
8 |
9 |
10 | def _previous_console_output():
11 | try:
12 | output_str = prism.logging.loggers.CONSOLE.file.getvalue()
13 | return output_str
14 | except AttributeError:
15 | return ""
16 |
17 |
18 | def _mock_fire_console_event(
19 | event: Optional[prism.logging.events.Event], sleep=0.01, log_level: str = "info"
20 | ):
21 | if event:
22 | prism.logging.loggers.CONSOLE.print(event.message())
23 | time.sleep(sleep)
24 |
25 |
26 | def _console_mocker(monkeypatch):
27 | monkeypatch.setattr(
28 | "prism.logging.events.fire_console_event", _mock_fire_console_event
29 | )
30 |
31 |
32 | def _remove_files_in_output(wkdir):
33 | """
34 | Remove file outputs from `output` folder of project
35 | """
36 | for _file in Path(wkdir / "output").iterdir():
37 | if Path(wkdir / "output" / _file).is_file() and _file.name != ".exists":
38 | os.unlink(_file)
39 |
40 |
41 | def _file_as_str(path):
42 | """
43 | Open file as string
44 | """
45 | with open(path, "r") as f:
46 | compiled_task_str = f.read()
47 | f.close()
48 | return compiled_task_str
49 |
--------------------------------------------------------------------------------
/prism/tests/integration/test_cli.py:
--------------------------------------------------------------------------------
1 | # Standard library imports
2 | from io import StringIO
3 | import json
4 | import os
5 | from pathlib import Path
6 | import shutil
7 |
8 | # Prism imports
9 | from prism.main import cli
10 | import prism.logging.loggers
11 | from prism.tests.integration.integration_utils import (
12 | _previous_console_output,
13 | _remove_files_in_output,
14 | _console_mocker,
15 | _file_as_str,
16 | )
17 |
18 |
19 | # Directory containing all prism_project.py test cases
20 | TEST_CASE_WKDIR = os.path.dirname(__file__)
21 | TEST_PROJECTS = Path(TEST_CASE_WKDIR) / "test_projects"
22 |
23 |
24 | # Tests
25 | def test_init(monkeypatch):
26 | # Set up the logger
27 | prism.logging.loggers.set_up_logger(
28 | "info",
29 | StringIO(),
30 | )
31 |
32 | # Set working directory
33 | os.chdir(TEST_PROJECTS)
34 | _console_mocker(monkeypatch)
35 |
36 | # Remove folder '001_init' if it already exists
37 | init_path = Path(TEST_PROJECTS) / "001_init"
38 | if init_path.is_dir():
39 | shutil.rmtree(init_path)
40 |
41 | # Execute the CLI command
42 | args = ["init", "--project-name", "001_init"]
43 | _ = cli(args, standalone_mode=False)
44 | assert init_path.is_dir()
45 |
46 | # Change to the parent directory
47 | os.chdir(TEST_PROJECTS.parent)
48 |
49 |
50 | def test_run_normal(monkeypatch):
51 | wkdir = TEST_PROJECTS / "005_simple_project_no_null_tasks"
52 | _console_mocker(monkeypatch)
53 |
54 | # Remove output
55 | _remove_files_in_output(wkdir)
56 | assert not Path(wkdir / "output" / "task01.txt").is_file()
57 | assert not Path(wkdir / "output" / "task02.txt").is_file()
58 |
59 | # Execute the CLI command
60 | output_dir = wkdir / "output"
61 | args = [
62 | "run",
63 | "--tasks-dir",
64 | str(wkdir / "tasks"),
65 | "--disable-rich-logging",
66 | "--runtime-ctx",
67 | json.dumps({"OUTPUT": str(output_dir)}),
68 | ]
69 | _ = cli(args, standalone_mode=False)
70 |
71 | # Check outputs
72 | task01_txt = _file_as_str(Path(wkdir / "output" / "task01.txt"))
73 | task02_txt = _file_as_str(Path(wkdir / "output" / "task02.txt"))
74 | task01_txt = _file_as_str(Path(wkdir / "output" / "task01.txt"))
75 | task02_txt = _file_as_str(Path(wkdir / "output" / "task02.txt"))
76 | assert "Hello from task 1!" == task01_txt
77 | assert "Hello from task 1!" + "\n" + "Hello from task 2!" == task02_txt
78 |
79 |
80 | def test_run_callbacks(monkeypatch):
81 | wkdir = TEST_PROJECTS / "005_simple_project_no_null_tasks"
82 | _console_mocker(monkeypatch)
83 |
84 | # Previous output
85 | prev_console_output = _previous_console_output()
86 |
87 | # Remove output
88 | _remove_files_in_output(wkdir)
89 | assert not Path(wkdir / "output" / "task01.txt").is_file()
90 | assert not Path(wkdir / "output" / "task02.txt").is_file()
91 |
92 | # Execute the CLI command
93 | output_dir = wkdir / "output"
94 | args = [
95 | "run",
96 | "--tasks-dir",
97 | str(wkdir / "tasks"),
98 | "--on-success",
99 | "additional_package.cli_callbacks.print_success",
100 | "--disable-rich-logging",
101 | "--runtime-ctx",
102 | json.dumps({"OUTPUT": str(output_dir)}),
103 | ]
104 | _ = cli(args, standalone_mode=False)
105 |
106 | # Check outputs
107 | task01_txt = _file_as_str(Path(wkdir / "output" / "task01.txt"))
108 | task02_txt = _file_as_str(Path(wkdir / "output" / "task02.txt"))
109 | task01_txt = _file_as_str(Path(wkdir / "output" / "task01.txt"))
110 | task02_txt = _file_as_str(Path(wkdir / "output" / "task02.txt"))
111 | assert "Hello from task 1!" == task01_txt
112 | assert "Hello from task 1!" + "\n" + "Hello from task 2!" == task02_txt
113 |
114 | # Logs
115 | output_str = prism.logging.loggers.CONSOLE.file.getvalue() # type: ignore
116 | output_str = output_str.replace(prev_console_output, "")
117 | for i in range(1, 5):
118 | assert f"RUNNING TASK module0{i}.Task0{i}" in output_str
119 | assert f"FINISHED TASK module0{i}.Task0{i}" in output_str
120 | assert "Running on_success callbacks" in output_str
121 | assert "FINISHED running print_success callback" in output_str
122 |
123 |
124 | def test_run_connectors(monkeypatch):
125 | wkdir = TEST_PROJECTS / "013_connectors"
126 | _console_mocker(monkeypatch)
127 |
128 | # Remove output
129 | _remove_files_in_output(wkdir)
130 |
131 | # Execute the CLI command
132 | output_dir = wkdir / "output"
133 | args = [
134 | "run",
135 | "--tasks-dir",
136 | str(wkdir / "tasks"),
137 | "--task",
138 | "snowflake_task.SnowflakeTask",
139 | "--task",
140 | "postgres_task.PostgresTask",
141 | "--connector",
142 | "additional_package.cli_connectors.snowflake_connector",
143 | "--connector",
144 | "additional_package.cli_connectors.postgres_connector",
145 | "--disable-rich-logging",
146 | "--runtime-ctx",
147 | json.dumps({"OUTPUT": str(output_dir)}),
148 | ]
149 | _ = cli(args, standalone_mode=False)
150 |
151 | # Check output
152 | assert (wkdir / "output" / "sample_postgres_data.csv").is_file()
153 | assert (wkdir / "output" / "machinery_sample.csv").is_file()
154 | assert (wkdir / "output" / "household_sample.csv").is_file()
155 | _remove_files_in_output(wkdir)
156 |
--------------------------------------------------------------------------------
/prism/tests/integration/test_projects/001_init/.gitignore:
--------------------------------------------------------------------------------
1 | /.ipynb_checkpoints
--------------------------------------------------------------------------------
/prism/tests/integration/test_projects/001_init/__init__.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | STARTER_PROJECT_TEMPLATE_DIR = os.path.dirname(__file__)
4 |
--------------------------------------------------------------------------------
/prism/tests/integration/test_projects/001_init/main.py:
--------------------------------------------------------------------------------
1 | """Entrypoint for your Prism project."""
2 |
3 | from pathlib import Path
4 |
5 | from prism.client import PrismProject
6 |
7 | # Project
8 | project = PrismProject(
9 | version="1.0",
10 | tasks_dir=Path.cwd() / "tasks",
11 | concurrency=2,
12 | ctx={"OUTPUT": Path.cwd() / "output"},
13 | )
14 |
15 |
16 | # Run
17 | if __name__ == "__main__":
18 | project.run()
19 |
--------------------------------------------------------------------------------
/prism/tests/integration/test_projects/001_init/output/.exists:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/runprism/prism/a6a762a1ce44a696b1dd6be793726974b9c80c82/prism/tests/integration/test_projects/001_init/output/.exists
--------------------------------------------------------------------------------
/prism/tests/integration/test_projects/001_init/tasks/class_task.py:
--------------------------------------------------------------------------------
1 | import prism.task
2 | import prism.target
3 | import prism.decorators
4 | from prism.runtime import CurrentRun
5 |
6 |
7 | class ExampleTask(prism.task.PrismTask):
8 | task_id = "example-class-task"
9 |
10 | # Run
11 | @prism.decorators.target(
12 | type=prism.target.Txt, loc=CurrentRun.ctx("OUTPUT") / "hello_world.txt"
13 | )
14 | def run(self):
15 | return "Hello, world!"
16 |
--------------------------------------------------------------------------------
/prism/tests/integration/test_projects/001_init/tasks/decorated_task.py:
--------------------------------------------------------------------------------
1 | import prism.target
2 | from prism.decorators import (
3 | task,
4 | target,
5 | )
6 | from prism.runtime import CurrentRun
7 |
8 |
9 | @task(
10 | task_id="example-decorated-task",
11 | targets=[
12 | target(type=prism.target.Txt, loc=CurrentRun.ctx("OUTPUT") / "hello_world.txt")
13 | ],
14 | )
15 | def example_task():
16 | return "Hello, world!"
17 |
--------------------------------------------------------------------------------
/prism/tests/integration/test_projects/004_simple_project/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/runprism/prism/a6a762a1ce44a696b1dd6be793726974b9c80c82/prism/tests/integration/test_projects/004_simple_project/__init__.py
--------------------------------------------------------------------------------
/prism/tests/integration/test_projects/004_simple_project/dev/dev.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "id": "7eafd19a",
6 | "metadata": {},
7 | "source": [
8 | "**Use this notebook for developing code before productionizing it within tasks**"
9 | ]
10 | },
11 | {
12 | "cell_type": "code",
13 | "execution_count": null,
14 | "id": "b771bf53",
15 | "metadata": {},
16 | "outputs": [],
17 | "source": [
18 | "## CODE HERE..."
19 | ]
20 | }
21 | ],
22 | "metadata": {
23 | "kernelspec": {
24 | "display_name": "Python 3",
25 | "language": "python",
26 | "name": "python3"
27 | },
28 | "language_info": {
29 | "codemirror_mode": {
30 | "name": "ipython",
31 | "version": 3
32 | },
33 | "file_extension": ".py",
34 | "mimetype": "text/x-python",
35 | "name": "python",
36 | "nbconvert_exporter": "python",
37 | "pygments_lexer": "ipython3",
38 | "version": "3.7.4"
39 | }
40 | },
41 | "nbformat": 4,
42 | "nbformat_minor": 5
43 | }
44 |
--------------------------------------------------------------------------------
/prism/tests/integration/test_projects/004_simple_project/modules/module01.py:
--------------------------------------------------------------------------------
1 | import prism.decorators
2 | import prism.target
3 | import prism.task
4 | from prism.runtime import CurrentRun
5 |
6 |
7 | class Task01(prism.task.PrismTask):
8 | # Run
9 | @prism.decorators.target(
10 | type=prism.target.Txt, loc=CurrentRun.ctx("OUTPUT") / "task01.txt"
11 | )
12 | def run(self):
13 | return "Hello from task 1!"
14 |
--------------------------------------------------------------------------------
/prism/tests/integration/test_projects/004_simple_project/modules/module02.py:
--------------------------------------------------------------------------------
1 | import prism.decorators
2 | import prism.target
3 | import prism.task
4 | from prism.runtime import CurrentRun
5 |
6 |
7 | class Task02(prism.task.PrismTask):
8 | # Run
9 | @prism.decorators.target(
10 | type=prism.target.Txt, loc=CurrentRun.ctx("OUTPUT") / "task01.txt"
11 | )
12 | def run(self):
13 | lines = CurrentRun.ref("module01.Task01")
14 | return lines[-5:]
15 |
--------------------------------------------------------------------------------
/prism/tests/integration/test_projects/004_simple_project/modules/module03.py:
--------------------------------------------------------------------------------
1 | import prism.task
2 |
3 |
4 | class Task03(prism.task.PrismTask):
5 | # Run
6 | def run(self):
7 | # TODO: Implement the `run` method
8 | return None
9 |
--------------------------------------------------------------------------------
/prism/tests/integration/test_projects/004_simple_project/output/.exists:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/runprism/prism/a6a762a1ce44a696b1dd6be793726974b9c80c82/prism/tests/integration/test_projects/004_simple_project/output/.exists
--------------------------------------------------------------------------------
/prism/tests/integration/test_projects/005_simple_project_no_null_tasks/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/runprism/prism/a6a762a1ce44a696b1dd6be793726974b9c80c82/prism/tests/integration/test_projects/005_simple_project_no_null_tasks/__init__.py
--------------------------------------------------------------------------------
/prism/tests/integration/test_projects/005_simple_project_no_null_tasks/output/task01.txt:
--------------------------------------------------------------------------------
1 | Hello from task 1!
--------------------------------------------------------------------------------
/prism/tests/integration/test_projects/005_simple_project_no_null_tasks/output/task02.txt:
--------------------------------------------------------------------------------
1 | Hello from task 1!
2 | Hello from task 2!
--------------------------------------------------------------------------------
/prism/tests/integration/test_projects/005_simple_project_no_null_tasks/tasks/module01.py:
--------------------------------------------------------------------------------
1 | from pathlib import Path
2 |
3 | import prism.decorators
4 | import prism.target
5 |
6 | # Prism imports
7 | import prism.task
8 | from prism.runtime import CurrentRun
9 |
10 |
11 | class Task01(prism.task.PrismTask):
12 | # Run
13 | @prism.decorators.target(
14 | type=prism.target.Txt, loc=Path(CurrentRun.ctx("OUTPUT")) / "task01.txt"
15 | )
16 | def run(self):
17 | return "Hello from task 1!"
18 |
--------------------------------------------------------------------------------
/prism/tests/integration/test_projects/005_simple_project_no_null_tasks/tasks/module02.py:
--------------------------------------------------------------------------------
1 | from pathlib import Path
2 |
3 | import prism.decorators
4 | import prism.target
5 |
6 | # Prism imports
7 | import prism.task
8 | from prism.runtime import CurrentRun
9 |
10 |
11 | class Task02(prism.task.PrismTask):
12 | # Run
13 | @prism.decorators.target(
14 | type=prism.target.Txt, loc=Path(CurrentRun.ctx("OUTPUT")) / "task02.txt"
15 | )
16 | def run(self):
17 | lines = CurrentRun.ref("module01.Task01")
18 | return lines + "\n" + "Hello from task 2!"
19 |
--------------------------------------------------------------------------------
/prism/tests/integration/test_projects/005_simple_project_no_null_tasks/tasks/module03.py:
--------------------------------------------------------------------------------
1 | # Prism imports
2 | import prism.decorators
3 | import prism.target
4 | import prism.task
5 | from prism.runtime import CurrentRun
6 |
7 |
8 | class Task03(prism.task.PrismTask):
9 | # Run
10 | def run(self):
11 | lines = CurrentRun.ref("module02.Task02")
12 | return lines + "\n" + "Hello from task 3!"
13 |
--------------------------------------------------------------------------------
/prism/tests/integration/test_projects/005_simple_project_no_null_tasks/tasks/module04.py:
--------------------------------------------------------------------------------
1 | # Prism imports
2 | import prism.decorators
3 | import prism.target
4 | import prism.task
5 | from prism.runtime import CurrentRun
6 |
7 |
8 | class Task04(prism.task.PrismTask):
9 | # Run
10 | def run(self):
11 | return CurrentRun.ref("module03.Task03") + "\n" + "Hello from task 4!"
12 |
--------------------------------------------------------------------------------
/prism/tests/integration/test_projects/010_project_nested_module_dirs/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/runprism/prism/a6a762a1ce44a696b1dd6be793726974b9c80c82/prism/tests/integration/test_projects/010_project_nested_module_dirs/__init__.py
--------------------------------------------------------------------------------
/prism/tests/integration/test_projects/010_project_nested_module_dirs/dev/dev.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "id": "7eafd19a",
6 | "metadata": {},
7 | "source": [
8 | "**Use this notebook for developing code before productionizing it within tasks**"
9 | ]
10 | },
11 | {
12 | "cell_type": "code",
13 | "execution_count": null,
14 | "id": "b771bf53",
15 | "metadata": {},
16 | "outputs": [],
17 | "source": [
18 | "## CODE HERE..."
19 | ]
20 | }
21 | ],
22 | "metadata": {
23 | "kernelspec": {
24 | "display_name": "Python 3",
25 | "language": "python",
26 | "name": "python3"
27 | },
28 | "language_info": {
29 | "codemirror_mode": {
30 | "name": "ipython",
31 | "version": 3
32 | },
33 | "file_extension": ".py",
34 | "mimetype": "text/x-python",
35 | "name": "python",
36 | "nbconvert_exporter": "python",
37 | "pygments_lexer": "ipython3",
38 | "version": "3.7.4"
39 | }
40 | },
41 | "nbformat": 4,
42 | "nbformat_minor": 5
43 | }
44 |
--------------------------------------------------------------------------------
/prism/tests/integration/test_projects/010_project_nested_module_dirs/output/task01.txt:
--------------------------------------------------------------------------------
1 | Hello from task 1!
--------------------------------------------------------------------------------
/prism/tests/integration/test_projects/010_project_nested_module_dirs/output/task02.txt:
--------------------------------------------------------------------------------
1 | Hello from task 1!
2 | Hello from task 2!
--------------------------------------------------------------------------------
/prism/tests/integration/test_projects/010_project_nested_module_dirs/prism_project.py:
--------------------------------------------------------------------------------
1 | """
2 | Prism project
3 | """
4 |
5 | # Imports
6 | import logging
7 | from pathlib import Path
8 | from prism.admin import generate_run_id, generate_run_slug
9 |
10 |
11 | # Project metadata
12 | NAME = ""
13 | AUTHOR = ""
14 | VERSION = ""
15 | DESCRIPTION = """
16 | """
17 |
18 | # Admin
19 | RUN_ID = generate_run_id() # don't delete this!
20 | SLUG = generate_run_slug() # don't delete this!
21 |
22 |
23 | # sys.path config. This gives your tasks access to local tasks / packages that exist
24 | # outside of your project structure.
25 | SYS_PATH_CONF = [
26 | Path(__file__).parent,
27 | Path(__file__).parent.parent,
28 | ]
29 |
30 |
31 | # Thread count: number of workers to use to execute tasks concurrently. If set to 1,
32 | # then 1 task is run at a time.
33 | THREADS = 1
34 |
35 |
36 | # Profile directory and name
37 | PROFILE_YML_PATH = Path(__file__).parent / "profile.yml"
38 | PROFILE = None # name of profile within `profiles.yml`
39 |
40 |
41 | # Logger
42 | PRISM_LOGGER = logging.getLogger("PRISM_LOGGER")
43 |
44 |
45 | # Other variables / parameters. Make sure to capitalize all of these!
46 | VAR_1 = {"a": "b"}
47 | VAR_2 = 200
48 | VAR_3 = "2015-01-01"
49 |
50 | # Paths
51 | WKDIR = Path(__file__).parent
52 | DATA = WKDIR / "data"
53 | OUTPUT = WKDIR / "output"
54 |
--------------------------------------------------------------------------------
/prism/tests/integration/test_projects/010_project_nested_module_dirs/tasks/extract/module01.py:
--------------------------------------------------------------------------------
1 | # Pirms imports
2 | import prism.decorators
3 | import prism.target
4 | import prism.task
5 | from prism.runtime import CurrentRun
6 |
7 |
8 | class Task01(prism.task.PrismTask):
9 | # Run
10 | @prism.decorators.target(
11 | type=prism.target.Txt, loc=CurrentRun.ctx("OUTPUT") / "task01.txt"
12 | )
13 | def run(self):
14 | return "Hello from task 1!"
15 |
--------------------------------------------------------------------------------
/prism/tests/integration/test_projects/010_project_nested_module_dirs/tasks/extract/module02.py:
--------------------------------------------------------------------------------
1 | # Prism imports
2 | import prism.decorators
3 | import prism.target
4 | import prism.task
5 | from prism.runtime import CurrentRun
6 |
7 |
8 | class Task02(prism.task.PrismTask):
9 | # Run
10 | @prism.decorators.target(
11 | type=prism.target.Txt, loc=CurrentRun.ctx("OUTPUT") / "task02.txt"
12 | )
13 | def run(self):
14 | lines = CurrentRun.ref("extract/module01.Task01")
15 | return lines + "\n" + "Hello from task 2!"
16 |
--------------------------------------------------------------------------------
/prism/tests/integration/test_projects/010_project_nested_module_dirs/tasks/load/module03.py:
--------------------------------------------------------------------------------
1 | # Prism imports
2 | import prism.decorators
3 | import prism.target
4 | import prism.task
5 | from prism.runtime import CurrentRun
6 |
7 |
8 | class Task03(prism.task.PrismTask):
9 | # Run
10 | def run(self):
11 | lines = CurrentRun.ref("extract/module02.Task02")
12 | return lines + "\n" + "Hello from task 3!"
13 |
--------------------------------------------------------------------------------
/prism/tests/integration/test_projects/010_project_nested_module_dirs/tasks/module04.py:
--------------------------------------------------------------------------------
1 | # Prism imports
2 | import prism.decorators
3 | import prism.target
4 | import prism.task
5 | from prism.runtime import CurrentRun
6 |
7 |
8 | class Task04(prism.task.PrismTask):
9 | # Run
10 | def run(self):
11 | return CurrentRun.ref("load/module03.Task03") + "\n" + "Hello from task 4!"
12 |
--------------------------------------------------------------------------------
/prism/tests/integration/test_projects/011_bad_task_ref/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/runprism/prism/a6a762a1ce44a696b1dd6be793726974b9c80c82/prism/tests/integration/test_projects/011_bad_task_ref/__init__.py
--------------------------------------------------------------------------------
/prism/tests/integration/test_projects/011_bad_task_ref/dev/dev.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "id": "7eafd19a",
6 | "metadata": {},
7 | "source": [
8 | "**Use this notebook for developing code before productionizing it within tasks**"
9 | ]
10 | },
11 | {
12 | "cell_type": "code",
13 | "execution_count": null,
14 | "id": "b771bf53",
15 | "metadata": {},
16 | "outputs": [],
17 | "source": [
18 | "## CODE HERE..."
19 | ]
20 | }
21 | ],
22 | "metadata": {
23 | "kernelspec": {
24 | "display_name": "Python 3",
25 | "language": "python",
26 | "name": "python3"
27 | },
28 | "language_info": {
29 | "codemirror_mode": {
30 | "name": "ipython",
31 | "version": 3
32 | },
33 | "file_extension": ".py",
34 | "mimetype": "text/x-python",
35 | "name": "python",
36 | "nbconvert_exporter": "python",
37 | "pygments_lexer": "ipython3",
38 | "version": "3.7.4"
39 | }
40 | },
41 | "nbformat": 4,
42 | "nbformat_minor": 5
43 | }
44 |
--------------------------------------------------------------------------------
/prism/tests/integration/test_projects/011_bad_task_ref/modules/extract/module01.py:
--------------------------------------------------------------------------------
1 | # Pirms imports
2 | import prism.decorators
3 | import prism.target
4 | import prism.task
5 | from prism.runtime import CurrentRun
6 |
7 |
8 | class Task01(prism.task.PrismTask):
9 | # Run
10 | @prism.decorators.target(
11 | type=prism.target.Txt, loc=CurrentRun.ctx("OUTPUT") / "task01.txt"
12 | )
13 | def run(self):
14 | return "Hello from task 1!"
15 |
--------------------------------------------------------------------------------
/prism/tests/integration/test_projects/011_bad_task_ref/modules/extract/module02.py:
--------------------------------------------------------------------------------
1 | # Prism imports
2 | import prism.decorators
3 | import prism.target
4 | import prism.task
5 | from prism.runtime import CurrentRun
6 |
7 |
8 | class Task02(prism.task.PrismTask):
9 | # Run
10 | @prism.decorators.target(
11 | type=prism.target.Txt, loc=CurrentRun.ctx("OUTPUT") / "task02.txt"
12 | )
13 | def run(self):
14 | lines = CurrentRun.ref("extract/this_is_an_error")
15 | return lines + "\n" + "Hello from task 2!"
16 |
--------------------------------------------------------------------------------
/prism/tests/integration/test_projects/011_bad_task_ref/modules/load/module03.py:
--------------------------------------------------------------------------------
1 | # Prism imports
2 | import prism.decorators
3 | import prism.target
4 | import prism.task
5 | from prism.runtime import CurrentRun
6 |
7 |
8 | class Task03(prism.task.PrismTask):
9 | # Run
10 | def run(self):
11 | lines = CurrentRun.ref("extract/module02.Task02")
12 | return lines + "\n" + "Hello from task 3!"
13 |
--------------------------------------------------------------------------------
/prism/tests/integration/test_projects/011_bad_task_ref/modules/module04.py:
--------------------------------------------------------------------------------
1 | # Prism imports
2 | import prism.decorators
3 | import prism.target
4 | import prism.task
5 | from prism.runtime import CurrentRun
6 |
7 |
8 | class Task04(prism.task.PrismTask):
9 | # Run
10 | def run(self):
11 | return CurrentRun.ref("load/module03.Task03") + "\n" + "Hello from task 4!"
12 |
--------------------------------------------------------------------------------
/prism/tests/integration/test_projects/011_bad_task_ref/output/module01.txt:
--------------------------------------------------------------------------------
1 | Hello from task 1!
--------------------------------------------------------------------------------
/prism/tests/integration/test_projects/011_bad_task_ref/output/module02.txt:
--------------------------------------------------------------------------------
1 | Hello from task 1!
2 | Hello from task 2!
--------------------------------------------------------------------------------
/prism/tests/integration/test_projects/011_bad_task_ref/prism_project.py:
--------------------------------------------------------------------------------
1 | """
2 | Prism project
3 | """
4 |
5 | # Imports
6 | import logging
7 | from pathlib import Path
8 | from prism.admin import generate_run_id, generate_run_slug
9 |
10 |
11 | # Project metadata
12 | NAME = ""
13 | AUTHOR = ""
14 | VERSION = ""
15 | DESCRIPTION = """
16 | """
17 |
18 | # Admin
19 | RUN_ID = generate_run_id() # don't delete this!
20 | SLUG = generate_run_slug() # don't delete this!
21 |
22 |
23 | # sys.path config. This gives your tasks access to local tasks / packages that exist
24 | # outside of your project structure.
25 | SYS_PATH_CONF = [
26 | Path(__file__).parent,
27 | Path(__file__).parent.parent,
28 | ]
29 |
30 |
31 | # Thread count: number of workers to use to execute tasks concurrently. If set to 1,
32 | # then 1 task is run at a time.
33 | THREADS = 1
34 |
35 |
36 | # Profile directory and name
37 | PROFILE_YML_PATH = Path(__file__).parent / "profile.yml"
38 | PROFILE = None # name of profile within `profiles.yml`
39 |
40 |
41 | # Logger
42 | PRISM_LOGGER = logging.getLogger("PRISM_LOGGER")
43 |
44 |
45 | # Other variables / parameters. Make sure to capitalize all of these!
46 | VAR_1 = {"a": "b"}
47 | VAR_2 = 200
48 | VAR_3 = "2015-01-01"
49 |
50 | # Paths
51 | WKDIR = Path(__file__).parent
52 | DATA = WKDIR / "data"
53 | OUTPUT = WKDIR / "output"
54 |
--------------------------------------------------------------------------------
/prism/tests/integration/test_projects/012_concurrency/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/runprism/prism/a6a762a1ce44a696b1dd6be793726974b9c80c82/prism/tests/integration/test_projects/012_concurrency/__init__.py
--------------------------------------------------------------------------------
/prism/tests/integration/test_projects/012_concurrency/dev/dev.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "id": "7eafd19a",
6 | "metadata": {},
7 | "source": [
8 | "**Use this notebook for developing code before productionizing it within tasks**"
9 | ]
10 | },
11 | {
12 | "cell_type": "code",
13 | "execution_count": null,
14 | "id": "b771bf53",
15 | "metadata": {},
16 | "outputs": [],
17 | "source": [
18 | "## CODE HERE..."
19 | ]
20 | }
21 | ],
22 | "metadata": {
23 | "kernelspec": {
24 | "display_name": "Python 3",
25 | "language": "python",
26 | "name": "python3"
27 | },
28 | "language_info": {
29 | "codemirror_mode": {
30 | "name": "ipython",
31 | "version": 3
32 | },
33 | "file_extension": ".py",
34 | "mimetype": "text/x-python",
35 | "name": "python",
36 | "nbconvert_exporter": "python",
37 | "pygments_lexer": "ipython3",
38 | "version": "3.7.4"
39 | }
40 | },
41 | "nbformat": 4,
42 | "nbformat_minor": 5
43 | }
44 |
--------------------------------------------------------------------------------
/prism/tests/integration/test_projects/012_concurrency/modules/module01.py:
--------------------------------------------------------------------------------
1 | import time
2 |
3 | import pandas as pd
4 |
5 | import prism.decorators
6 | import prism.target
7 |
8 | # Prism imports
9 | import prism.task
10 | from prism.runtime import CurrentRun
11 |
12 |
13 | class Task01(prism.task.PrismTask):
14 | # Run
15 | @prism.decorators.target(
16 | type=prism.target.PandasCsv,
17 | loc=CurrentRun.ctx("OUTPUT") / "task01.csv",
18 | index=False,
19 | )
20 | def run(self):
21 | start_time = time.time()
22 | time.sleep(15)
23 | end_time = time.time()
24 | time_df = pd.DataFrame({"start_time": [start_time], "end_time": [end_time]})
25 | return time_df
26 |
--------------------------------------------------------------------------------
/prism/tests/integration/test_projects/012_concurrency/modules/module02.py:
--------------------------------------------------------------------------------
1 | import time
2 |
3 | import pandas as pd
4 |
5 | import prism.decorators
6 | import prism.target
7 |
8 | # Prism imports
9 | import prism.task
10 | from prism.runtime import CurrentRun
11 |
12 | ####################
13 | # Class definition #
14 | ####################
15 |
16 |
17 | class Task02(prism.task.PrismTask):
18 | # Run
19 | @prism.decorators.target(
20 | type=prism.target.PandasCsv,
21 | loc=CurrentRun.ctx("OUTPUT") / "task02.csv",
22 | index=False,
23 | )
24 | def run(self):
25 | start_time = time.time()
26 | time.sleep(5)
27 | end_time = time.time()
28 | time_df = pd.DataFrame({"start_time": [start_time], "end_time": [end_time]})
29 | return time_df
30 |
--------------------------------------------------------------------------------
/prism/tests/integration/test_projects/012_concurrency/modules/module03.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 |
3 | import prism.decorators
4 | import prism.target
5 |
6 | # Prism imports
7 | import prism.task
8 | from prism.runtime import CurrentRun
9 |
10 |
11 | class Task03(prism.task.PrismTask):
12 | def get_txt_output(self, path):
13 | with open(path) as f:
14 | lines = f.read()
15 | f.close()
16 | return lines
17 |
18 | # Run
19 | def run(self):
20 | d1 = CurrentRun.ref("module01.Task01")
21 | assert isinstance(d1, pd.DataFrame)
22 | d2 = CurrentRun.ref("module02.Task02")
23 | assert isinstance(d2, pd.DataFrame)
24 | return "Hello from task 3!"
25 |
--------------------------------------------------------------------------------
/prism/tests/integration/test_projects/012_concurrency/modules/module04.py:
--------------------------------------------------------------------------------
1 | # Prism imports
2 | import prism.decorators
3 | import prism.target
4 | import prism.task
5 | from prism.runtime import CurrentRun
6 |
7 |
8 | class Task04(prism.task.PrismTask):
9 | # Run
10 | def run(self):
11 | return CurrentRun.ref("module03.Task03") + "\n" + "Hello from task 4!"
12 |
--------------------------------------------------------------------------------
/prism/tests/integration/test_projects/012_concurrency/output/.exists:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/runprism/prism/a6a762a1ce44a696b1dd6be793726974b9c80c82/prism/tests/integration/test_projects/012_concurrency/output/.exists
--------------------------------------------------------------------------------
/prism/tests/integration/test_projects/012_concurrency/prism_project.py:
--------------------------------------------------------------------------------
1 | """
2 | Prism project
3 | """
4 |
5 | # Imports
6 | import logging
7 | from pathlib import Path
8 | from prism.admin import generate_run_id, generate_run_slug
9 |
10 |
11 | # Project metadata
12 | NAME = ""
13 | AUTHOR = ""
14 | VERSION = ""
15 | DESCRIPTION = """
16 | """
17 |
18 | # Admin
19 | RUN_ID = generate_run_id() # don't delete this!
20 | SLUG = generate_run_slug() # don't delete this!
21 |
22 |
23 | # sys.path config. This gives your tasks access to local tasks / packages that exist
24 | # outside of your project structure.
25 | SYS_PATH_CONF = [
26 | Path(__file__).parent,
27 | Path(__file__).parent.parent,
28 | ]
29 |
30 |
31 | # Thread count: number of workers to use to execute tasks concurrently. If set to 1,
32 | # then 1 task is run at a time.
33 | THREADS = 2
34 |
35 |
36 | # Profile directory and name
37 | PROFILE_YML_PATH = Path(__file__).parent / "profile.yml"
38 | PROFILE = None # name of profile within `profiles.yml`
39 |
40 |
41 | # Logger
42 | PRISM_LOGGER = logging.getLogger("PRISM_LOGGER")
43 |
44 |
45 | # Other variables / parameters. Make sure to capitalize all of these!
46 | VAR_1 = {"a": "b"}
47 | VAR_2 = 200
48 | VAR_3 = "2015-01-01"
49 |
50 | # Paths
51 | WKDIR = Path(__file__).parent
52 | DATA = WKDIR / "data"
53 | OUTPUT = WKDIR / "output"
54 |
--------------------------------------------------------------------------------
/prism/tests/integration/test_projects/013_connectors/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/runprism/prism/a6a762a1ce44a696b1dd6be793726974b9c80c82/prism/tests/integration/test_projects/013_connectors/__init__.py
--------------------------------------------------------------------------------
/prism/tests/integration/test_projects/013_connectors/output/.exists:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/runprism/prism/a6a762a1ce44a696b1dd6be793726974b9c80c82/prism/tests/integration/test_projects/013_connectors/output/.exists
--------------------------------------------------------------------------------
/prism/tests/integration/test_projects/013_connectors/tasks/bad_adapter.py:
--------------------------------------------------------------------------------
1 | from pathlib import Path
2 |
3 | import prism.decorators
4 | import prism.target
5 |
6 | # Prism imports
7 | import prism.task
8 | from prism.runtime import CurrentRun
9 |
10 |
11 | class BadAdapterTask(prism.task.PrismTask):
12 | # Run
13 | @prism.decorators.target(
14 | type=prism.target.PandasCsv,
15 | loc=Path(CurrentRun.ctx("OUTPUT")) / "bad_adapter.csv",
16 | index=False,
17 | )
18 | def run(self):
19 | sql = """
20 | SELECT
21 | *
22 | FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."CUSTOMER"
23 | WHERE
24 | C_MKTSEGMENT = 'MACHINERY'
25 | LIMIT 50
26 | """
27 | conn = CurrentRun.conn("snowflake_connector")
28 | df = conn.execute_sql(sql=sql, return_type="pandas")
29 | return df
30 |
--------------------------------------------------------------------------------
/prism/tests/integration/test_projects/013_connectors/tasks/postgres_task.py:
--------------------------------------------------------------------------------
1 | from pathlib import Path
2 |
3 | import prism.decorators
4 | import prism.target
5 |
6 | # Prism imports
7 | import prism.task
8 | from prism.runtime import CurrentRun
9 |
10 |
11 | class PostgresTask(prism.task.PrismTask):
12 | # Run
13 | @prism.decorators.target(
14 | type=prism.target.PandasCsv,
15 | loc=Path(CurrentRun.ctx("OUTPUT")) / "sample_postgres_data.csv",
16 | index=False,
17 | )
18 | def run(self):
19 | sql = """
20 | SELECT
21 | first_name
22 | , last_name
23 | FROM us500
24 | ORDER BY
25 | first_name
26 | , last_name
27 | LIMIT 10
28 | """
29 | conn = CurrentRun.conn("postgres-connector")
30 | df = conn.execute_sql(sql=sql, return_type="pandas")
31 | return df
32 |
--------------------------------------------------------------------------------
/prism/tests/integration/test_projects/013_connectors/tasks/snowflake_task.py:
--------------------------------------------------------------------------------
1 | from pathlib import Path
2 |
3 | import prism.decorators
4 | import prism.target
5 |
6 | # Prism imports
7 | import prism.task
8 | from prism.runtime import CurrentRun
9 |
10 |
11 | class SnowflakeTask(prism.task.PrismTask):
12 | # Run
13 | @prism.decorators.target(
14 | type=prism.target.PandasCsv,
15 | loc=Path(CurrentRun.ctx("OUTPUT")) / "machinery_sample.csv",
16 | index=False,
17 | )
18 | @prism.decorators.target(
19 | type=prism.target.PandasCsv,
20 | loc=Path(CurrentRun.ctx("OUTPUT")) / "household_sample.csv",
21 | index=False,
22 | )
23 | def run(self):
24 | conn = CurrentRun.conn("snowflake-connector")
25 |
26 | machinery_sql = """
27 | SELECT
28 | *
29 | FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."CUSTOMER"
30 | WHERE
31 | c_mktsegment = 'MACHINERY'
32 | ORDER BY
33 | c_custkey
34 | LIMIT 50
35 | """
36 | machinery_df = conn.execute_sql(sql=machinery_sql, return_type="pandas")
37 |
38 | household_sql = """
39 | SELECT
40 | *
41 | FROM "SNOWFLAKE_SAMPLE_DATA"."TPCH_SF1"."CUSTOMER"
42 | WHERE
43 | c_mktsegment = 'HOUSEHOLD'
44 | ORDER BY
45 | c_custkey
46 | LIMIT 50
47 | """
48 | household_df = conn.execute_sql(sql=household_sql, return_type="pandas")
49 |
50 | return machinery_df, household_df
51 |
--------------------------------------------------------------------------------
/prism/tests/integration/test_projects/013_connectors/tasks/spark_task.py:
--------------------------------------------------------------------------------
1 | from pathlib import Path
2 |
3 | import pyspark.sql.functions as F
4 |
5 | # Spark
6 | from pyspark.sql import SparkSession
7 |
8 | import prism.decorators
9 | import prism.target
10 |
11 | # Prism imports
12 | import prism.task
13 | from prism.runtime import CurrentRun
14 |
15 | spark = (
16 | SparkSession.builder.appName("spark-test")
17 | .config("spark.driver.cores", 4)
18 | .config("spark.executor.cores", 4)
19 | .getOrCreate()
20 | )
21 |
22 |
23 | class PysparkTask(prism.task.PrismTask):
24 | # Run
25 | @prism.decorators.target(
26 | type=prism.target.PandasCsv,
27 | loc=Path(CurrentRun.ctx("OUTPUT")) / "machinery_sample_filtered.csv",
28 | index=False,
29 | )
30 | @prism.decorators.target(
31 | type=prism.target.PandasCsv,
32 | loc=Path(CurrentRun.ctx("OUTPUT")) / "household_sample_filtered.csv",
33 | index=False,
34 | )
35 | def run(self):
36 | dfs = CurrentRun.ref("snowflake_task.SnowflakeTask")
37 | machinery_df_pd = dfs[0]
38 | household_df_pd = dfs[1]
39 |
40 | # Use spark to do some light processing for machinery df
41 | machinery_df = spark.createDataFrame(machinery_df_pd)
42 | machinery_df_filtered = machinery_df.sort(F.col("C_ACCTBAL").asc()).filter(
43 | F.col("C_ACCTBAL") <= 1000
44 | )
45 | machinery_df_filtered_pd = machinery_df_filtered.toPandas()
46 |
47 | # Use spark to do some light processing for household df
48 | household_df = spark.createDataFrame(household_df_pd)
49 | household_df_filtered = (
50 | household_df.sort(F.col("C_ACCTBAL").asc())
51 | .filter(F.col("C_ACCTBAL") > 1000)
52 | .filter(F.col("C_ACCTBAL") <= 2000)
53 | )
54 | household_df_filtered_pd = household_df_filtered.toPandas()
55 |
56 | # Return
57 | return machinery_df_filtered_pd, household_df_filtered_pd
58 |
--------------------------------------------------------------------------------
/prism/tests/integration/test_projects/014_project_with_package_lookup/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/runprism/prism/a6a762a1ce44a696b1dd6be793726974b9c80c82/prism/tests/integration/test_projects/014_project_with_package_lookup/__init__.py
--------------------------------------------------------------------------------
/prism/tests/integration/test_projects/014_project_with_package_lookup/output/.exists:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/runprism/prism/a6a762a1ce44a696b1dd6be793726974b9c80c82/prism/tests/integration/test_projects/014_project_with_package_lookup/output/.exists
--------------------------------------------------------------------------------
/prism/tests/integration/test_projects/014_project_with_package_lookup/tasks/module01.py:
--------------------------------------------------------------------------------
1 | # From additional package lookup
2 | from additional_package.utils import task1_return
3 |
4 | import prism.decorators
5 | import prism.target
6 |
7 | # Prism imports
8 | import prism.task
9 | from prism.runtime import CurrentRun
10 |
11 |
12 | class Task01(prism.task.PrismTask):
13 | # Run
14 | @prism.decorators.target(
15 | type=prism.target.Txt, loc=CurrentRun.ctx("OUTPUT") / "task01.txt"
16 | )
17 | def run(self):
18 | return task1_return()
19 |
--------------------------------------------------------------------------------
/prism/tests/integration/test_projects/020_dec_retries/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/runprism/prism/a6a762a1ce44a696b1dd6be793726974b9c80c82/prism/tests/integration/test_projects/020_dec_retries/__init__.py
--------------------------------------------------------------------------------
/prism/tests/integration/test_projects/020_dec_retries/output/.exists:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/runprism/prism/a6a762a1ce44a696b1dd6be793726974b9c80c82/prism/tests/integration/test_projects/020_dec_retries/output/.exists
--------------------------------------------------------------------------------
/prism/tests/integration/test_projects/020_dec_retries/prism_project.py:
--------------------------------------------------------------------------------
1 | """
2 | Prism project
3 | """
4 |
5 | # Imports
6 | import logging
7 | from pathlib import Path
8 | from prism.admin import generate_run_id, generate_run_slug
9 |
10 |
11 | # Project metadata
12 | NAME = ""
13 | AUTHOR = ""
14 | VERSION = ""
15 | DESCRIPTION = """
16 | """
17 |
18 | # Admin
19 | RUN_ID = generate_run_id() # don't delete this!
20 | SLUG = generate_run_slug() # don't delete this!
21 |
22 |
23 | # sys.path config. This gives your tasks access to local tasks / packages that exist
24 | # outside of your project structure.
25 | SYS_PATH_CONF = [
26 | Path(__file__).parent,
27 | ]
28 |
29 |
30 | # Thread count: number of workers to use to execute tasks concurrently. If set to 1,
31 | # then 1 task is run at a time.
32 | THREADS = 1
33 |
34 |
35 | # Profile directory and name
36 | PROFILE_YML_PATH = Path(__file__).parent / "profile.yml"
37 | PROFILE = None # name of profile within `profiles.yml`
38 |
39 |
40 | # Logger
41 | PRISM_LOGGER = logging.getLogger("PRISM_LOGGER")
42 |
43 |
44 | # Other variables / parameters. Make sure to capitalize all of these!
45 | VAR_1 = {"a": "b"}
46 | VAR_2 = 200
47 | VAR_3 = "2015-01-01"
48 |
49 | # Paths
50 | WKDIR = Path(__file__).parent
51 | DATA = WKDIR / "data"
52 | OUTPUT = WKDIR / "output"
53 |
54 |
55 | # Triggers
56 | TRIGGERS_YML_PATH = Path(__file__).parent / "triggers.yml"
57 | TRIGGERS = {
58 | "on_success": ["test_trigger_function"],
59 | "on_failure": ["test_trigger_function"],
60 | }
61 |
--------------------------------------------------------------------------------
/prism/tests/integration/test_projects/020_dec_retries/tasks/extract.py:
--------------------------------------------------------------------------------
1 | # Standard library imports
2 | import requests
3 | import json
4 |
5 | # Prism imports
6 | from prism.decorators import task, target
7 | from prism.target import JSON
8 | from prism.runtime import CurrentRun
9 |
10 |
11 | # Task
12 | @task(
13 | task_id="extract_task",
14 | targets=[target(type=JSON, loc=CurrentRun.ctx("OUTPUT") / "todos.json")],
15 | )
16 | def extract():
17 | url = "https://jsonplaceholder.typicode.com/todos"
18 | resp = requests.get(url)
19 | return json.loads(resp.text)
20 |
--------------------------------------------------------------------------------
/prism/tests/integration/test_projects/020_dec_retries/tasks/load.py:
--------------------------------------------------------------------------------
1 | # Standard library imports
2 |
3 | # Prism imports
4 | from prism.decorators import target_iterator, task
5 | from prism.runtime import CurrentRun
6 | from prism.target import Txt
7 |
8 |
9 | # Task
10 | @task(
11 | retries=1,
12 | retry_delay_seconds=0,
13 | targets=[target_iterator(type=Txt, loc=CurrentRun.ctx("OUTPUT"))],
14 | )
15 | def load():
16 | data = CurrentRun.ref("extract_task")
17 |
18 | # Add an error for testing
19 | print(hi) # noqa: F821
20 |
21 | # Names
22 | names = {}
23 | for ppl in data["people"]:
24 | # Formatted
25 | name = ppl["name"].lower().replace(" ", "_")
26 | names[f"{name}.txt"] = ppl["name"]
27 |
28 | # Return
29 | return names
30 |
--------------------------------------------------------------------------------
/prism/tests/integration/test_projects/020_dec_retries/triggers.yml:
--------------------------------------------------------------------------------
1 | include:
2 | - "{{ Path(__file__).parent.parent }}"
3 |
4 | triggers:
5 | test_trigger_function:
6 | type: function
7 | function: common.functions.test_trigger_function
8 |
--------------------------------------------------------------------------------
/prism/tests/integration/test_projects/023_skipped_task/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/runprism/prism/a6a762a1ce44a696b1dd6be793726974b9c80c82/prism/tests/integration/test_projects/023_skipped_task/__init__.py
--------------------------------------------------------------------------------
/prism/tests/integration/test_projects/023_skipped_task/callback.txt:
--------------------------------------------------------------------------------
1 | This is the output of a callback
--------------------------------------------------------------------------------
/prism/tests/integration/test_projects/023_skipped_task/output/.exists:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/runprism/prism/a6a762a1ce44a696b1dd6be793726974b9c80c82/prism/tests/integration/test_projects/023_skipped_task/output/.exists
--------------------------------------------------------------------------------
/prism/tests/integration/test_projects/023_skipped_task/output/task01.txt:
--------------------------------------------------------------------------------
1 | Hello from task 1!
--------------------------------------------------------------------------------
/prism/tests/integration/test_projects/023_skipped_task/output/task02.txt:
--------------------------------------------------------------------------------
1 | Hello from task 1!
2 | Hello from task 2!
--------------------------------------------------------------------------------
/prism/tests/integration/test_projects/023_skipped_task/prism_project.py:
--------------------------------------------------------------------------------
1 | """
2 | Prism project
3 | """
4 |
5 | # Imports
6 | import logging
7 | from pathlib import Path
8 | from prism.admin import generate_run_id, generate_run_slug
9 |
10 |
11 | # Project metadata
12 | NAME = ""
13 | AUTHOR = ""
14 | VERSION = ""
15 | DESCRIPTION = """
16 | """
17 |
18 | # Admin
19 | RUN_ID = generate_run_id() # don't delete this!
20 | SLUG = generate_run_slug() # don't delete this!
21 |
22 |
23 | # sys.path config. This gives your tasks access to local tasks / packages that exist
24 | # outside of your project structure.
25 | SYS_PATH_CONF = [
26 | Path(__file__).parent,
27 | Path(__file__).parent.parent,
28 | ]
29 |
30 |
31 | # Thread count: number of workers to use to execute tasks concurrently. If set to 1,
32 | # then 1 task is run at a time.
33 | THREADS = 1
34 |
35 |
36 | # Profile directory and name
37 | PROFILE_YML_PATH = Path(__file__).parent / "profile.yml"
38 | PROFILE = None # name of profile within `profiles.yml`
39 |
40 |
41 | # Logger
42 | PRISM_LOGGER = logging.getLogger("PRISM_LOGGER")
43 |
44 |
45 | # Other variables / parameters. Make sure to capitalize all of these!
46 | VAR_1 = {"a": "b"}
47 | VAR_2 = 200
48 | VAR_3 = "2015-01-01"
49 |
50 | # Paths
51 | WKDIR = Path(__file__).parent
52 | DATA = WKDIR / "data"
53 | OUTPUT = WKDIR / "output"
54 |
--------------------------------------------------------------------------------
/prism/tests/integration/test_projects/023_skipped_task/tasks/task01.py:
--------------------------------------------------------------------------------
1 | # Other imports
2 | from pathlib import Path
3 |
4 | import prism.decorators
5 | import prism.target
6 |
7 | # Prism infrastructure imports
8 | import prism.task
9 | from prism.runtime import CurrentRun
10 |
11 |
12 | class Task01(prism.task.PrismTask):
13 | def done(self):
14 | return (Path(CurrentRun.ctx("OUTPUT")) / "task01.txt").is_file()
15 |
16 | # Run
17 | @prism.decorators.target(
18 | type=prism.target.Txt, loc=Path(CurrentRun.ctx("OUTPUT")) / "task01.txt"
19 | )
20 | def run(self):
21 | return "Hello from task 1!"
22 |
--------------------------------------------------------------------------------
/prism/tests/integration/test_projects/023_skipped_task/tasks/task02.py:
--------------------------------------------------------------------------------
1 | from pathlib import Path
2 |
3 | import prism.decorators
4 | import prism.target
5 |
6 | # Prism infrastructure imports
7 | import prism.task
8 | from prism.runtime import CurrentRun
9 |
10 |
11 | class Task02(prism.task.PrismTask):
12 | # Run
13 | @prism.decorators.target(
14 | type=prism.target.Txt, loc=Path(CurrentRun.ctx("OUTPUT")) / "task02.txt"
15 | )
16 | def run(self):
17 | lines = CurrentRun.ref("task01.Task01")
18 | return lines + "\n" + "Hello from task 2!"
19 |
--------------------------------------------------------------------------------
/prism/tests/integration/test_projects/common/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/runprism/prism/a6a762a1ce44a696b1dd6be793726974b9c80c82/prism/tests/integration/test_projects/common/__init__.py
--------------------------------------------------------------------------------
/prism/tests/integration/test_projects/common/functions.py:
--------------------------------------------------------------------------------
1 | """
2 | Common functions for Prism project integration tests.
3 | """
4 |
5 | # Imports
6 | from pathlib import Path
7 |
8 |
9 | # Functions
10 | def test_trigger_function(project_name: str = "014_test_triggers_normal"):
11 | output_dir = Path(__file__).parent.parent / project_name / "output"
12 | with open(output_dir / "trigger.txt", "w") as f:
13 | f.write("This is outputted from the trigger function!")
14 |
--------------------------------------------------------------------------------
/prism/tests/integration/test_visualizer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/runprism/prism/a6a762a1ce44a696b1dd6be793726974b9c80c82/prism/tests/integration/test_visualizer.py
--------------------------------------------------------------------------------
/prism/tests/unit/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/runprism/prism/a6a762a1ce44a696b1dd6be793726974b9c80c82/prism/tests/unit/__init__.py
--------------------------------------------------------------------------------
/prism/tests/unit/test_callbacks.py:
--------------------------------------------------------------------------------
1 | import os
2 | from pathlib import Path
3 |
4 | import pytest
5 |
6 | # Prism imports
7 | from prism.callbacks import _PrismCallback
8 |
9 |
10 | # Example callback
11 | def example_callback():
12 | with open("callback.txt", "w") as f:
13 | f.write("This is the output of a callback")
14 |
15 |
16 | def example_callback_with_args(args):
17 | with open("callback.txt", "w") as f:
18 | f.write("This is the output of a callback")
19 |
20 |
21 | def test_good_callback():
22 | # Change working directory
23 | os.chdir(Path(__file__).parent)
24 | assert not (Path(__file__).parent / "callback.txt").is_file()
25 |
26 | # Run callback
27 | callback = _PrismCallback(example_callback)
28 | callback.run()
29 | assert (Path(__file__).parent / "callback.txt").is_file()
30 | os.unlink(Path(__file__).parent / "callback.txt")
31 |
32 |
33 | def test_callback_with_args():
34 | with pytest.raises(ValueError) as cm:
35 | _PrismCallback(example_callback_with_args)
36 | expected_msg = (
37 | "Callback function `example_callback_with_args` cannot have any arguments." # noqa: E501
38 | )
39 | assert str(cm.value) == expected_msg
40 |
--------------------------------------------------------------------------------
/prism/tests/unit/test_compiled_projects/__init__.py:
--------------------------------------------------------------------------------
1 | from pathlib import Path
2 |
3 | COMPILER_TEST_CASES = Path(__file__).parent
4 |
--------------------------------------------------------------------------------
/prism/tests/unit/test_compiled_projects/dag_cycle/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/runprism/prism/a6a762a1ce44a696b1dd6be793726974b9c80c82/prism/tests/unit/test_compiled_projects/dag_cycle/__init__.py
--------------------------------------------------------------------------------
/prism/tests/unit/test_compiled_projects/dag_cycle/moduleA.py:
--------------------------------------------------------------------------------
1 | from prism.task import PrismTask
2 |
3 |
4 | class Taska(PrismTask):
5 | def run(self):
6 | return "This is task A."
7 |
--------------------------------------------------------------------------------
/prism/tests/unit/test_compiled_projects/dag_cycle/moduleB.py:
--------------------------------------------------------------------------------
1 | from prism.runtime import CurrentRun
2 | from prism.task import PrismTask
3 |
4 |
5 | class Taskb(PrismTask):
6 | def run(self):
7 | return (
8 | CurrentRun.ref("moduleA.Taska")
9 | + CurrentRun.ref("moduleE.Taske")
10 | + " This is task B."
11 | ) # noqa: E501
12 |
--------------------------------------------------------------------------------
/prism/tests/unit/test_compiled_projects/dag_cycle/moduleC.py:
--------------------------------------------------------------------------------
1 | from prism.runtime import CurrentRun
2 | from prism.task import PrismTask
3 |
4 |
5 | class Taskc(PrismTask):
6 | def run(self):
7 | return CurrentRun.ref("moduleA.Taska") + " This is task C."
8 |
--------------------------------------------------------------------------------
/prism/tests/unit/test_compiled_projects/dag_cycle/moduleD.py:
--------------------------------------------------------------------------------
1 | from prism.runtime import CurrentRun
2 | from prism.task import PrismTask
3 |
4 |
5 | class Taskd(PrismTask):
6 | def run(self):
7 | return (
8 | CurrentRun.ref("moduleB.Taskb")
9 | + CurrentRun.ref("moduleA.Taska")
10 | + CurrentRun.ref("moduleC.Taskc")
11 | + " This is task D."
12 | ) # noqa
13 |
--------------------------------------------------------------------------------
/prism/tests/unit/test_compiled_projects/dag_cycle/moduleE.py:
--------------------------------------------------------------------------------
1 | from prism.runtime import CurrentRun
2 | from prism.task import PrismTask
3 |
4 |
5 | class Taske(PrismTask):
6 | def run(self):
7 | return (
8 | CurrentRun.ref("moduleA.Taska")
9 | + CurrentRun.ref("moduleC.Taskc")
10 | + CurrentRun.ref("moduleD.Taskd")
11 | + " This is task E."
12 | ) # noqa
13 |
--------------------------------------------------------------------------------
/prism/tests/unit/test_compiled_projects/task_ref_15nodes/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/runprism/prism/a6a762a1ce44a696b1dd6be793726974b9c80c82/prism/tests/unit/test_compiled_projects/task_ref_15nodes/__init__.py
--------------------------------------------------------------------------------
/prism/tests/unit/test_compiled_projects/task_ref_15nodes/task01.py:
--------------------------------------------------------------------------------
1 | import prism.task
2 |
3 |
4 | class Task01(prism.task.PrismTask):
5 | def run(self):
6 | return "This is task 01. "
7 |
--------------------------------------------------------------------------------
/prism/tests/unit/test_compiled_projects/task_ref_15nodes/task02.py:
--------------------------------------------------------------------------------
1 | import prism.task
2 | from prism.runtime import CurrentRun
3 |
4 |
5 | class Task02(prism.task.PrismTask):
6 | def run(self):
7 | return CurrentRun.ref("task01.Task01") + "This is task 02."
8 |
--------------------------------------------------------------------------------
/prism/tests/unit/test_compiled_projects/task_ref_15nodes/task03.py:
--------------------------------------------------------------------------------
1 | import prism.task
2 | from prism.runtime import CurrentRun
3 |
4 |
5 | class Task03(prism.task.PrismTask):
6 | def run(self):
7 | return CurrentRun.ref("task01.Task01") + "This is task 03. "
8 |
--------------------------------------------------------------------------------
/prism/tests/unit/test_compiled_projects/task_ref_15nodes/task04.py:
--------------------------------------------------------------------------------
1 | import prism.task
2 | from prism.runtime import CurrentRun
3 |
4 |
5 | class Task04(prism.task.PrismTask):
6 | def run(self):
7 | return (
8 | CurrentRun.ref("task02.Task02")
9 | + CurrentRun.ref("task03.Task03")
10 | + "This is task 04. "
11 | ) # noqa: E501
12 |
--------------------------------------------------------------------------------
/prism/tests/unit/test_compiled_projects/task_ref_15nodes/task05.py:
--------------------------------------------------------------------------------
1 | import prism.task
2 | from prism.runtime import CurrentRun
3 |
4 |
5 | class Task05(prism.task.PrismTask):
6 | def run(self):
7 | return CurrentRun.ref("task01.Task01") + "This is task 05. "
8 |
--------------------------------------------------------------------------------
/prism/tests/unit/test_compiled_projects/task_ref_15nodes/task06.py:
--------------------------------------------------------------------------------
1 | import prism.task
2 | from prism.runtime import CurrentRun
3 |
4 |
5 | class Task06(prism.task.PrismTask):
6 | def run(self):
7 | return CurrentRun.ref("task05.Task05") + "This is task 06. "
8 |
--------------------------------------------------------------------------------
/prism/tests/unit/test_compiled_projects/task_ref_15nodes/task07.py:
--------------------------------------------------------------------------------
1 | import prism.decorators
2 | import prism.task
3 | from prism.runtime import CurrentRun
4 |
5 |
6 | # Class-based task
7 | class Task07a(prism.task.PrismTask):
8 | def run(self):
9 | return (
10 | CurrentRun.ref("task04.Task04")
11 | + CurrentRun.ref("task06.Task06")
12 | + "This is task 07. "
13 | ) # noqa: E501
14 |
15 |
16 | # Function-based task
17 | @prism.decorators.task()
18 | def task_07b():
19 | _ = CurrentRun.ref("task07.Task07a")
20 | return "This is a local task"
21 |
--------------------------------------------------------------------------------
/prism/tests/unit/test_compiled_projects/task_ref_15nodes/task08.py:
--------------------------------------------------------------------------------
1 | import prism.task
2 | from prism.runtime import CurrentRun
3 |
4 |
5 | class Task08(prism.task.PrismTask):
6 | def run(self):
7 | return CurrentRun.ref("task01.Task01") + "This is task 08. "
8 |
--------------------------------------------------------------------------------
/prism/tests/unit/test_compiled_projects/task_ref_15nodes/task09.py:
--------------------------------------------------------------------------------
1 | import prism.task
2 | from prism.runtime import CurrentRun
3 |
4 |
5 | class Task09(prism.task.PrismTask):
6 | def run(self):
7 | return (
8 | CurrentRun.ref("task05.Task05")
9 | + CurrentRun.ref("task08.Task08")
10 | + "This is task 09. "
11 | ) # noqa: E501
12 |
--------------------------------------------------------------------------------
/prism/tests/unit/test_compiled_projects/task_ref_15nodes/task10.py:
--------------------------------------------------------------------------------
1 | import prism.task
2 | from prism.runtime import CurrentRun
3 |
4 |
5 | class Task10(prism.task.PrismTask):
6 | def run(self):
7 | return CurrentRun.ref("task01.Task01") + "This is task 10. "
8 |
--------------------------------------------------------------------------------
/prism/tests/unit/test_compiled_projects/task_ref_15nodes/task11.py:
--------------------------------------------------------------------------------
1 | import prism.task
2 | from prism.runtime import CurrentRun
3 |
4 |
5 | class Task11(prism.task.PrismTask):
6 | def run(self):
7 | return (
8 | CurrentRun.ref("task07.Task07a")
9 | + CurrentRun.ref("task10.Task10")
10 | + "This is task 11."
11 | ) # noqa: E501
12 |
--------------------------------------------------------------------------------
/prism/tests/unit/test_compiled_projects/task_ref_15nodes/task12.py:
--------------------------------------------------------------------------------
1 | import prism.task
2 | from prism.runtime import CurrentRun
3 |
4 |
5 | class Task12(prism.task.PrismTask):
6 | def run(self):
7 | return CurrentRun.ref("task10.Task10") + "This is task 12. "
8 |
--------------------------------------------------------------------------------
/prism/tests/unit/test_compiled_projects/task_ref_15nodes/task13.py:
--------------------------------------------------------------------------------
1 | import prism.task
2 | from prism.runtime import CurrentRun
3 |
4 |
5 | class Task13(prism.task.PrismTask):
6 | def run(self):
7 | return CurrentRun.ref("task10.Task10") + "This is task 13. "
8 |
--------------------------------------------------------------------------------
/prism/tests/unit/test_compiled_projects/task_ref_15nodes/task14.py:
--------------------------------------------------------------------------------
1 | import prism.task
2 | from prism.runtime import CurrentRun
3 |
4 |
5 | class Task14(prism.task.PrismTask):
6 | def run(self):
7 | return CurrentRun.ref("task11.Task11") + "This is task 14. "
8 |
--------------------------------------------------------------------------------
/prism/tests/unit/test_compiled_projects/task_ref_15nodes/task15.py:
--------------------------------------------------------------------------------
1 | import prism.task
2 | from prism.runtime import CurrentRun
3 |
4 |
5 | class Task15(prism.task.PrismTask):
6 | def run(self):
7 | return CurrentRun.ref("task11.Task11") + "This is task 15. "
8 |
--------------------------------------------------------------------------------
/prism/tests/unit/test_compiled_projects/task_ref_3nodes/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/runprism/prism/a6a762a1ce44a696b1dd6be793726974b9c80c82/prism/tests/unit/test_compiled_projects/task_ref_3nodes/__init__.py
--------------------------------------------------------------------------------
/prism/tests/unit/test_compiled_projects/task_ref_3nodes/task01.py:
--------------------------------------------------------------------------------
1 | import prism.task
2 |
3 |
4 | class Task01(prism.task.PrismTask):
5 | def run():
6 | return "This is task 1."
7 |
--------------------------------------------------------------------------------
/prism/tests/unit/test_compiled_projects/task_ref_3nodes/task02.py:
--------------------------------------------------------------------------------
1 | import prism.task
2 | from prism.runtime import CurrentRun
3 |
4 |
5 | class Task02(prism.task.PrismTask):
6 | def run(self):
7 | return CurrentRun.ref("task01.Task01") + " This is task 2."
8 |
--------------------------------------------------------------------------------
/prism/tests/unit/test_compiled_projects/task_ref_3nodes/task03.py:
--------------------------------------------------------------------------------
1 | import prism.task
2 | from prism.runtime import CurrentRun
3 |
4 |
5 | class Task03(prism.task.PrismTask):
6 | def run(self):
7 | return CurrentRun.ref("task02.Task02") + "This is task 3."
8 |
--------------------------------------------------------------------------------
/prism/tests/unit/test_compiled_projects/task_ref_5nodes/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/runprism/prism/a6a762a1ce44a696b1dd6be793726974b9c80c82/prism/tests/unit/test_compiled_projects/task_ref_5nodes/__init__.py
--------------------------------------------------------------------------------
/prism/tests/unit/test_compiled_projects/task_ref_5nodes/taskA.py:
--------------------------------------------------------------------------------
1 | from prism.task import PrismTask
2 |
3 |
4 | class Taska(PrismTask):
5 | def run(self):
6 | return "This is task A."
7 |
--------------------------------------------------------------------------------
/prism/tests/unit/test_compiled_projects/task_ref_5nodes/taskB.py:
--------------------------------------------------------------------------------
1 | from prism.runtime import CurrentRun
2 | from prism.task import PrismTask
3 |
4 |
5 | class Taskb(PrismTask):
6 | def run(self):
7 | return CurrentRun.ref("taskA.Taska") + " This is task B."
8 |
--------------------------------------------------------------------------------
/prism/tests/unit/test_compiled_projects/task_ref_5nodes/taskC.py:
--------------------------------------------------------------------------------
1 | from prism.runtime import CurrentRun
2 | from prism.task import PrismTask
3 |
4 |
5 | class Taskc(PrismTask):
6 | def run(self):
7 | return CurrentRun.ref("taskA.Taska") + " This is task C."
8 |
--------------------------------------------------------------------------------
/prism/tests/unit/test_compiled_projects/task_ref_5nodes/taskD.py:
--------------------------------------------------------------------------------
1 | from prism.runtime import CurrentRun
2 | from prism.task import PrismTask
3 |
4 |
5 | class Taskd(PrismTask):
6 | def run(self):
7 | return (
8 | CurrentRun.ref("taskB.Taskb")
9 | + CurrentRun.ref("taskA.Taska")
10 | + CurrentRun.ref("taskC.Taskc")
11 | + " This is task D."
12 | ) # noqa
13 |
--------------------------------------------------------------------------------
/prism/tests/unit/test_compiled_projects/task_ref_5nodes/taskE.py:
--------------------------------------------------------------------------------
1 | from prism.runtime import CurrentRun
2 | from prism.task import PrismTask
3 |
4 |
5 | class Taske(PrismTask):
6 | def run(self):
7 | return (
8 | CurrentRun.ref("taskA.Taska")
9 | + CurrentRun.ref("taskC.Taskc")
10 | + CurrentRun.ref("taskD.Taskd")
11 | + " This is task E."
12 | ) # noqa
13 |
--------------------------------------------------------------------------------
/prism/tests/unit/test_compiled_projects/task_ref_norefs/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/runprism/prism/a6a762a1ce44a696b1dd6be793726974b9c80c82/prism/tests/unit/test_compiled_projects/task_ref_norefs/__init__.py
--------------------------------------------------------------------------------
/prism/tests/unit/test_compiled_projects/task_ref_norefs/moduleA.py:
--------------------------------------------------------------------------------
1 | from prism.task import PrismTask
2 |
3 |
4 | class Taska(PrismTask):
5 | def run(self):
6 | return "This is task A."
7 |
--------------------------------------------------------------------------------
/prism/tests/unit/test_compiled_projects/task_ref_norefs/moduleB.py:
--------------------------------------------------------------------------------
1 | from prism.task import PrismTask
2 |
3 |
4 | class Taskb(PrismTask):
5 | def run(self):
6 | return "This is task B."
7 |
--------------------------------------------------------------------------------
/prism/tests/unit/test_compiled_projects/task_ref_norefs/moduleC.py:
--------------------------------------------------------------------------------
1 | from prism.task import PrismTask
2 |
3 |
4 | class Taskc(PrismTask):
5 | def run(self):
6 | return "This is task C."
7 |
--------------------------------------------------------------------------------
/prism/tests/unit/test_compiled_projects/task_ref_norefs/moduleD.py:
--------------------------------------------------------------------------------
1 | from prism.task import PrismTask
2 |
3 |
4 | class Taskd(PrismTask):
5 | def run(self):
6 | return "This is task D."
7 |
--------------------------------------------------------------------------------
/prism/tests/unit/test_compiled_projects/task_ref_norefs/moduleE.py:
--------------------------------------------------------------------------------
1 | from prism.task import PrismTask
2 |
3 |
4 | class Taske(PrismTask):
5 | def run(self):
6 | return "This is task E."
7 |
--------------------------------------------------------------------------------
/prism/tests/unit/test_compiled_projects/task_ref_selfref/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/runprism/prism/a6a762a1ce44a696b1dd6be793726974b9c80c82/prism/tests/unit/test_compiled_projects/task_ref_selfref/__init__.py
--------------------------------------------------------------------------------
/prism/tests/unit/test_compiled_projects/task_ref_selfref/moduleA.py:
--------------------------------------------------------------------------------
1 | from prism.task import PrismTask
2 |
3 |
4 | class Taska(PrismTask):
5 | def run(self):
6 | return "This is task A."
7 |
--------------------------------------------------------------------------------
/prism/tests/unit/test_compiled_projects/task_ref_selfref/moduleB.py:
--------------------------------------------------------------------------------
1 | from prism.runtime import CurrentRun
2 | from prism.task import PrismTask
3 |
4 |
5 | class Taskb(PrismTask):
6 | def run(self):
7 | return CurrentRun.ref("moduleB")
8 |
--------------------------------------------------------------------------------
/prism/tests/unit/test_compiled_projects/task_ref_selfref/moduleC.py:
--------------------------------------------------------------------------------
1 | from prism.task import PrismTask
2 |
3 |
4 | class Taskc(PrismTask):
5 | def run(self):
6 | return "This is task C."
7 |
--------------------------------------------------------------------------------
/prism/tests/unit/test_compiled_projects/task_ref_selfref/moduleD.py:
--------------------------------------------------------------------------------
1 | from prism.task import PrismTask
2 |
3 |
4 | class Taskd(PrismTask):
5 | def run(self):
6 | return "This is task D."
7 |
--------------------------------------------------------------------------------
/prism/tests/unit/test_compiled_projects/task_ref_selfref/moduleE.py:
--------------------------------------------------------------------------------
1 | from prism.task import PrismTask
2 |
3 |
4 | class Taske(PrismTask):
5 | def run(self):
6 | return "This is task E."
7 |
--------------------------------------------------------------------------------
/prism/tests/unit/test_import.py:
--------------------------------------------------------------------------------
1 | def test_import_prism_main():
2 | """
3 | Project can be imported
4 | """
5 | import prism.main # noqa: F401
6 |
--------------------------------------------------------------------------------
/prism/tests/unit/test_project_parser.py:
--------------------------------------------------------------------------------
1 | # Standard library imports
2 | import networkx
3 | from pathlib import Path
4 | import pytest
5 |
6 |
7 | # Prism imports
8 | import prism.exceptions
9 | from prism.client.parser import ProjectParser
10 | from prism.engine.module import _PrismModule
11 |
12 |
13 | # Paths
14 | UNIT_TEST_WKDIR = Path(__file__).parent
15 | TASK_TEST_CASES = UNIT_TEST_WKDIR / "test_tasks"
16 | EXAMPLE_TASKS_DIR = TASK_TEST_CASES / "example_tasks_dir"
17 |
18 |
19 | def test_parser_no_task_dir():
20 | with pytest.raises(prism.exceptions.CompileException) as cm:
21 | ProjectParser(
22 | project_dir=TASK_TEST_CASES,
23 | tasks_dir=TASK_TEST_CASES / "dummy_dir",
24 | all_tasks_downstream=True,
25 | )
26 | expected_msg = f'`{TASK_TEST_CASES / "dummy_dir"}` is not a directory!'
27 | assert str(cm.value) == expected_msg
28 |
29 |
30 | def test_parser():
31 | parser = ProjectParser(
32 | project_dir=TASK_TEST_CASES,
33 | tasks_dir=EXAMPLE_TASKS_DIR,
34 | all_tasks_downstream=True,
35 | )
36 | all_modules = parser._get_all_modules_in_dir(EXAMPLE_TASKS_DIR)
37 | assert len(all_modules) == 5
38 | assert "func_0.py" in all_modules
39 | assert "func_1.py" in all_modules
40 | assert "hello.py" in all_modules
41 | assert "world.py" in all_modules
42 | assert "nested/foo.py" in all_modules
43 |
44 | # Module objects
45 | module_objs = parser.parse_all_modules()
46 | for x in module_objs:
47 | assert isinstance(x, _PrismModule)
48 |
49 | # DAG should compile with all tasks
50 | parser.compile_dag(
51 | project_id="",
52 | run_slug=None,
53 | tasks_dir=EXAMPLE_TASKS_DIR,
54 | parsed_module_objs=module_objs,
55 | user_arg_task_ids=[],
56 | user_arg_all_downstream=True,
57 | )
58 |
59 | # DAG should compile with a subset of tasks
60 | parser.compile_dag(
61 | project_id="",
62 | run_slug=None,
63 | tasks_dir=EXAMPLE_TASKS_DIR,
64 | parsed_module_objs=module_objs,
65 | user_arg_task_ids=["hello", "world"],
66 | user_arg_all_downstream=True,
67 | )
68 |
69 | # If we pass in tasks that do not exist, the DAG should not compile
70 | with pytest.raises(networkx.exception.NetworkXError) as cm:
71 | parser.compile_dag(
72 | project_id="",
73 | run_slug=None,
74 | tasks_dir=EXAMPLE_TASKS_DIR,
75 | parsed_module_objs=module_objs,
76 | user_arg_task_ids=["foo"],
77 | user_arg_all_downstream=True,
78 | )
79 | expected_msg = "The node foo is not in the digraph."
80 | assert str(cm.value) == expected_msg
81 |
--------------------------------------------------------------------------------
/prism/tests/unit/test_tasks/cls_bad_run_extra_arg.py:
--------------------------------------------------------------------------------
1 | from prism.task import PrismTask
2 |
3 |
4 | class BadRunExtraArg(PrismTask):
5 | def run(self, extra_arg):
6 | return "hi"
7 |
--------------------------------------------------------------------------------
/prism/tests/unit/test_tasks/cls_diff_import_structure.py:
--------------------------------------------------------------------------------
1 | import prism.task
2 |
3 |
4 | class DiffImportStructure(prism.task.PrismTask):
5 | def run(self):
6 | return "hi"
7 |
--------------------------------------------------------------------------------
/prism/tests/unit/test_tasks/cls_multiple_prism_tasks.py:
--------------------------------------------------------------------------------
1 | from prism.task import PrismTask
2 |
3 |
4 | class FirstPrismTask(PrismTask):
5 | def run(self):
6 | return "hi"
7 |
8 |
9 | class SecondPrismTask(PrismTask):
10 | def run(self):
11 | return "hi"
12 |
--------------------------------------------------------------------------------
/prism/tests/unit/test_tasks/cls_no_prism_task.py:
--------------------------------------------------------------------------------
1 | from prism.task import PrismTask # noqa
2 |
3 |
4 | class NoPrismTask:
5 | def run(self):
6 | return "hi"
7 |
--------------------------------------------------------------------------------
/prism/tests/unit/test_tasks/cls_no_run_func.py:
--------------------------------------------------------------------------------
1 | from prism.task import PrismTask
2 |
3 |
4 | class NoRunFunc(PrismTask):
5 | def no_run_func(self):
6 | return "hi"
7 |
--------------------------------------------------------------------------------
/prism/tests/unit/test_tasks/cls_one_prism_task.py:
--------------------------------------------------------------------------------
1 | from prism.task import PrismTask
2 |
3 |
4 | class NormalPrismTask(PrismTask):
5 | def run(self):
6 | return "hi"
7 |
--------------------------------------------------------------------------------
/prism/tests/unit/test_tasks/cls_other_classes.py:
--------------------------------------------------------------------------------
1 | from prism.task import PrismTask
2 |
3 |
4 | class OnlyPrismTask(PrismTask):
5 | def run(self):
6 | return "hi"
7 |
8 |
9 | class NonPrismTask:
10 | def run(self):
11 | return "hi"
12 |
--------------------------------------------------------------------------------
/prism/tests/unit/test_tasks/cls_task_with_id.py:
--------------------------------------------------------------------------------
1 | from prism.task import PrismTask
2 |
3 |
4 | class TasksRefs(PrismTask):
5 | task_id = "cls_custom_task_id"
6 |
7 | def run(self):
8 | return "hi"
9 |
--------------------------------------------------------------------------------
/prism/tests/unit/test_tasks/cls_task_with_target.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | import prism.decorators
4 | from prism.target import PrismTarget
5 | from prism.task import PrismTask
6 |
7 |
8 | class TaskWithTarget(PrismTask):
9 | @prism.decorators.target(PrismTarget.txt, loc=os.path.join(os.getcwd(), "temp"))
10 | def run(self):
11 | return "hi"
12 |
--------------------------------------------------------------------------------
/prism/tests/unit/test_tasks/cls_tasks_refs.py:
--------------------------------------------------------------------------------
1 | from prism.runtime import CurrentRun
2 | from prism.task import PrismTask
3 |
4 |
5 | class TasksRefs(PrismTask):
6 | def func_0(self):
7 | return CurrentRun.ref("func_0")
8 |
9 | def run(self):
10 | _ = CurrentRun.ref("hello")
11 | _ = CurrentRun.ref("world")
12 | return "hi"
13 |
14 | def func_1(self):
15 | return CurrentRun.ref("func_1")
16 |
--------------------------------------------------------------------------------
/prism/tests/unit/test_tasks/dec_bad_dec_no_parentheses.py:
--------------------------------------------------------------------------------
1 | from prism.decorators import task
2 |
3 |
4 | @task
5 | def task_with_refs():
6 | return "hi"
7 |
--------------------------------------------------------------------------------
/prism/tests/unit/test_tasks/dec_bad_run_extra_arg.py:
--------------------------------------------------------------------------------
1 | from prism.decorators import task
2 |
3 |
4 | @task()
5 | def task_with_refs(extra_arg):
6 | return "hi"
7 |
--------------------------------------------------------------------------------
/prism/tests/unit/test_tasks/dec_diff_decorator_structure.py:
--------------------------------------------------------------------------------
1 | import prism.decorators
2 |
3 |
4 | @prism.decorators.task()
5 | def task_fn_different_decorator_structure():
6 | return "hi"
7 |
--------------------------------------------------------------------------------
/prism/tests/unit/test_tasks/dec_multiple_prism_tasks.py:
--------------------------------------------------------------------------------
1 | from prism.decorators import task
2 |
3 |
4 | @task()
5 | def task_function_1():
6 | return "hi"
7 |
8 |
9 | @task()
10 | def task_function_2():
11 | return "hi"
12 |
--------------------------------------------------------------------------------
/prism/tests/unit/test_tasks/dec_no_prism_task.py:
--------------------------------------------------------------------------------
1 | def task_function():
2 | return "hi"
3 |
--------------------------------------------------------------------------------
/prism/tests/unit/test_tasks/dec_one_prism_task.py:
--------------------------------------------------------------------------------
1 | from prism.decorators import task
2 |
3 |
4 | @task()
5 | def task_function():
6 | return "hi"
7 |
--------------------------------------------------------------------------------
/prism/tests/unit/test_tasks/dec_other_functions.py:
--------------------------------------------------------------------------------
1 | from prism.decorators import task
2 |
3 |
4 | def helper_function(): ...
5 |
6 |
7 | @task()
8 | def task_function():
9 | return "hi"
10 |
--------------------------------------------------------------------------------
/prism/tests/unit/test_tasks/dec_task_with_id.py:
--------------------------------------------------------------------------------
1 | from prism.decorators import task
2 |
3 |
4 | @task(task_id="dec_custom_task_id")
5 | def task_function():
6 | return "hi"
7 |
--------------------------------------------------------------------------------
/prism/tests/unit/test_tasks/dec_task_with_target.py:
--------------------------------------------------------------------------------
1 | from prism.decorators import task, target
2 | import prism.target
3 | from pathlib import Path
4 | from prism.runtime import CurrentRun
5 |
6 |
7 | @task(targets=[target(type=prism.target.Txt, loc=Path(__file__) / "test.txt")])
8 | def task_with_target():
9 | _ = CurrentRun.ref("hello.py")
10 | _ = CurrentRun.ref("world.py")
11 | return "hi"
12 |
--------------------------------------------------------------------------------
/prism/tests/unit/test_tasks/dec_tasks_refs.py:
--------------------------------------------------------------------------------
1 | from prism.decorators import task, target
2 | import prism.target
3 | from pathlib import Path
4 | from prism.runtime import CurrentRun
5 |
6 |
7 | @task(targets=[target(type=prism.target.Txt, loc=Path(__file__) / "test.txt")])
8 | def task_with_refs():
9 | _ = CurrentRun.ref("hello")
10 | _ = CurrentRun.ref("world")
11 | _ = CurrentRun.ref("func_0")
12 | _ = CurrentRun.ref("func_1")
13 | return "hi"
14 |
--------------------------------------------------------------------------------
/prism/tests/unit/test_tasks/example_tasks_dir/func_0.py:
--------------------------------------------------------------------------------
1 | from prism.task import PrismTask
2 | from prism.runtime import CurrentRun
3 |
4 |
5 | class Func0(PrismTask):
6 | task_id = "func0"
7 |
8 | def run(self):
9 | CurrentRun.ref("hello")
10 | CurrentRun.ref("world")
11 | return "world"
12 |
--------------------------------------------------------------------------------
/prism/tests/unit/test_tasks/example_tasks_dir/func_1.py:
--------------------------------------------------------------------------------
1 | from prism.task import PrismTask
2 | from prism.runtime import CurrentRun
3 |
4 |
5 | class Func1(PrismTask):
6 | task_id = "func1"
7 |
8 | def run(self):
9 | CurrentRun.ref("func0")
10 | return "world"
11 |
--------------------------------------------------------------------------------
/prism/tests/unit/test_tasks/example_tasks_dir/hello.py:
--------------------------------------------------------------------------------
1 | from prism.task import PrismTask
2 |
3 |
4 | class Hello(PrismTask):
5 | task_id = "hello"
6 |
7 | def run(self):
8 | return "world"
9 |
--------------------------------------------------------------------------------
/prism/tests/unit/test_tasks/example_tasks_dir/nested/foo.py:
--------------------------------------------------------------------------------
1 | from prism.task import PrismTask
2 |
3 |
4 | class Foo(PrismTask):
5 | def run(self):
6 | return "world"
7 |
--------------------------------------------------------------------------------
/prism/tests/unit/test_tasks/example_tasks_dir/world.py:
--------------------------------------------------------------------------------
1 | from prism.task import PrismTask
2 | from prism.runtime import CurrentRun
3 |
4 |
5 | class World(PrismTask):
6 | task_id = "world"
7 |
8 | def run(self):
9 | CurrentRun.ref("hello")
10 | return "world"
11 |
--------------------------------------------------------------------------------
/prism/utils.py:
--------------------------------------------------------------------------------
1 | from typing import (
2 | Any,
3 | Callable,
4 | List,
5 | Optional,
6 | Union,
7 | )
8 | import importlib
9 | from functools import wraps
10 |
11 |
12 | # Util functions
13 | def requires_dependencies(
14 | dependencies: Union[str, List[str]],
15 | extras: Optional[str] = None,
16 | ):
17 | """
18 | Wrapper used to prompt the user to `pip install` a package and/or Prism extracts in
19 | order to run a function. Borrowed heavily from the `unstructured` library:
20 | https://github.com/Unstructured-IO/unstructured/blob/main/unstructured/utils.py
21 |
22 | args:
23 | dependencies: required dependencies
24 | extracts: list of Prism extras that the user can `pip install`
25 | """
26 | if isinstance(dependencies, str):
27 | dependencies = [dependencies]
28 |
29 | def decorator(func: Callable[..., Any]) -> Callable[..., Any]:
30 | @wraps(func)
31 | def wrapper(*args, **kwargs):
32 | missing_deps: List[str] = []
33 | for dep in dependencies:
34 | if not dependency_exists(dep):
35 | missing_deps.append(dep)
36 | if len(missing_deps) > 0:
37 | raise ImportError(
38 | f"""Following dependencies are missing: {', '.join(["`" + dep + "`" for dep in missing_deps])}. """ # noqa
39 | + ( # noqa
40 | f"""Please install them using `pip install "prism-ds[{extras}]"`.""" # noqa
41 | if extras
42 | else f"Please install them using `pip install {' '.join(missing_deps)}`." # noqa
43 | ),
44 | )
45 | return func(*args, **kwargs)
46 |
47 | return wrapper
48 |
49 | return decorator
50 |
51 |
52 | def dependency_exists(dependency: str):
53 | try:
54 | importlib.import_module(dependency)
55 | except ImportError as e:
56 | # Check to make sure this isn't some unrelated import error.
57 | pkg = dependency.split(".")[0]
58 | if pkg in repr(e):
59 | return False
60 | return True
61 |
--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [build-system]
2 | requires = ["setuptools>=41.1.0", "wheel"]
3 | build-backend = "setuptools.build_meta"
4 |
5 | [tool.mypy]
6 | mypy_path = "src"
7 | check_untyped_defs = true
8 | disallow_any_generics = true
9 | ignore_missing_imports = true
10 | no_implicit_optional = true
11 | show_error_codes = true
12 | strict_equality = true
13 | warn_redundant_casts = true
14 | warn_return_any = true
15 | warn_unreachable = true
16 | warn_unused_configs = true
17 | no_implicit_reexport = true
18 |
19 | [tool.bumpver]
20 | current_version = "0.3.0"
21 | version_pattern = "MAJOR.MINOR.PATCH"
22 | commit_message = "bump version {old_version} -> {new_version}"
23 | commit = true
24 | tag = true
25 | push = true
26 |
27 | [tool.bumpver.file_patterns]
28 | "pyproject.toml" = [
29 | 'current_version = "{version}"',
30 | ]
31 | "prism/constants.py" = [
32 | "VERSION = '{version}'"
33 | ]
34 |
35 | [tool.ruff]
36 | exclude = [
37 | ".bzr",
38 | ".direnv",
39 | ".eggs",
40 | ".git",
41 | ".git-rewrite",
42 | ".hg",
43 | ".ipynb_checkpoints",
44 | ".mypy_cache",
45 | ".nox",
46 | ".pants.d",
47 | ".pyenv",
48 | ".pytest_cache",
49 | ".pytype",
50 | ".ruff_cache",
51 | ".svn",
52 | ".tox",
53 | ".vscode",
54 | "__pypackages__",
55 | "_build",
56 | "buck-out",
57 | "build",
58 | "dist",
59 | "site-packages",
60 | ]
61 | line-length = 88
62 | indent-width = 4
63 | lint.fixable = ["ALL"]
64 | lint.unfixable = []
65 |
66 | [tool.ruff.format]
67 | quote-style = "double"
68 | indent-style = "space"
69 | skip-magic-trailing-comma = false
70 | line-ending = "auto"
71 | docstring-code-format = true
72 | docstring-code-line-length = "dynamic"
73 |
74 |
75 | [tool.black]
76 | line-length = 88
--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | [metadata]
2 | name = prism-ds
3 | description = The easiest way to create data pipelines in Python.
4 | long_description_content_type = text/markdown
5 | long_description = file: README.md
6 | version = 0.3.0
7 | author = prism founders
8 | author_email = hello@runprism.com
9 | license = Apache-2.0
10 | license_files = LICENSE
11 | platforms = unix, linux, osx, win32
12 | classifiers =
13 | Programming Language :: Python :: 3
14 | Programming Language :: Python :: 3.8
15 | Programming Language :: Python :: 3.9
16 | Programming Language :: Python :: 3.10
17 | Programming Language :: Python :: 3.11
18 | project_urls =
19 | homepage = https://www.runprism.com
20 | documentation = https://docs.runprism.com
21 | repository = https://github.com/runprism/prism
22 |
23 | [options]
24 | packages=find_namespace:
25 | include_package_data=True
26 | install_requires =
27 | astor>=0.7
28 | boto3>=1
29 | botocore>=1
30 | click>=8
31 | networkx>=2
32 | numpy>=1
33 | pandas>=1
34 | PyYAML>=6
35 | requests>=2
36 | Jinja2==3.1.2
37 | MarkupSafe>=2.0
38 | coolname>=2.2
39 | shortuuid>=1.0
40 | rich_click>=1.6.1
41 | Pillow>=9.5.0
42 | sqlalchemy>=2.0.27
43 | watchdog>=4.0.0
44 | python_requires = >=3.7
45 | zip_safe = no
46 |
47 | [options.extras_require]
48 | snowflake =
49 | snowflake-connector-python>=2
50 | pyarrow<10.1.0,>=10.0.1
51 | bigquery =
52 | google-api-python-client>=2
53 | google-auth>=2
54 | google-cloud-bigquery>=2
55 | db-dtypes>=1
56 | redshift =
57 | psycopg2-binary>=2.9
58 | postgres =
59 | psycopg2-binary>=2.9
60 | trino =
61 | trino>=0.319
62 | presto =
63 | presto-python-client>=0.8
64 | pyspark =
65 | pyspark>=3
66 | dev =
67 | dbt-snowflake>=1,<=1.7.5
68 | pytest>=7
69 | fastparquet>=0.8,<1
70 | tox>=3.24
71 | mypy>=1.9.0
72 | tomli>=2.0.1
73 | typed-ast>=1.5.5
74 | types-PyYAML>=6.0.12.20240311
75 | ruff>=0.3.3
76 | types-networkx>=3.2.1.20240313
77 | black>=24.3.0
78 | pre-commit>=3.5.0
79 |
80 |
81 | [options.entry_points]
82 | console_scripts =
83 | prism = prism.main:cli
84 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | from setuptools import setup
2 |
3 |
4 | if __name__ == "__main__":
5 | setup()
6 |
--------------------------------------------------------------------------------
/tox.ini:
--------------------------------------------------------------------------------
1 | [tox]
2 | minversion = 3.8.0
3 | envlist = py37, py38, py39, py310
4 | isolated_build = true
5 |
6 | [gh-actions]
7 | python =
8 | 3.8: py38
9 | 3.9: py39
10 | 3.10: py310
11 | 3.11: py311
12 |
13 | [testenv]
14 | setenv =
15 | PYTHONPATH = {toxinidir}
16 | passenv =
17 | SHELL
18 | SNOWFLAKE_ACCOUNT
19 | SNOWFLAKE_DATABASE
20 | SNOWFLAKE_PASSWORD
21 | SNOWFLAKE_ROLE
22 | SNOWFLAKE_SCHEMA
23 | SNOWFLAKE_USER
24 | SNOWFLAKE_WAREHOUSE
25 | POSTGRES_USER
26 | POSTGRES_PASSWORD
27 | POSTGRES_DB
28 | POSTGRES_HOST
29 | GOOGLE_APPLICATION_CREDENTIALS
30 | deps =
31 | -r{toxinidir}/dev_requirements.txt
32 | commands =
33 | pytest
34 |
--------------------------------------------------------------------------------