├── .devcontainer └── devcontainer.json ├── .github ├── ISSUE_TEMPLATE │ ├── bug_report.md │ ├── compatibility.md │ ├── deprecation.md │ ├── feature_request.md │ └── questions.md └── workflows │ ├── publish.yml │ ├── test_all.yml │ ├── test_core.yml │ ├── test_dask.yml │ ├── test_no_sql.yml │ ├── test_notebook.yml │ ├── test_ray.yml │ ├── test_spark.yml │ └── test_win.yml ├── .gitignore ├── .gitpod.yml ├── .pre-commit-config.yaml ├── .pylintrc ├── .readthedocs.yaml ├── CONTRIBUTING.md ├── LICENSE ├── Makefile ├── README.md ├── RELEASE.md ├── docs ├── Makefile ├── _static │ ├── fugue_logo_trimmed.svg │ ├── logo.svg │ └── logo_doc.svg ├── _templates │ ├── package.rst_t │ └── toc.rst_t ├── api.rst ├── api │ ├── fugue.bag.rst │ ├── fugue.collections.rst │ ├── fugue.column.rst │ ├── fugue.dataframe.rst │ ├── fugue.dataset.rst │ ├── fugue.execution.rst │ ├── fugue.extensions.creator.rst │ ├── fugue.extensions.outputter.rst │ ├── fugue.extensions.processor.rst │ ├── fugue.extensions.rst │ ├── fugue.extensions.transformer.rst │ ├── fugue.rpc.rst │ ├── fugue.rst │ ├── fugue.sql.rst │ └── fugue.workflow.rst ├── api_dask │ └── fugue_dask.rst ├── api_duckdb │ └── fugue_duckdb.rst ├── api_ibis │ ├── fugue_ibis.execution.rst │ └── fugue_ibis.rst ├── api_ray │ └── fugue_ray.rst ├── api_spark │ └── fugue_spark.rst ├── api_sql │ └── fugue_sql.rst ├── conf.py ├── index.rst ├── make.bat ├── top_api.rst └── tutorials.rst ├── fugue ├── __init__.py ├── _utils │ ├── __init__.py │ ├── display.py │ ├── exception.py │ ├── interfaceless.py │ ├── io.py │ ├── misc.py │ └── registry.py ├── api.py ├── bag │ ├── __init__.py │ ├── array_bag.py │ └── bag.py ├── collections │ ├── __init__.py │ ├── partition.py │ ├── sql.py │ └── yielded.py ├── column │ ├── __init__.py │ ├── expressions.py │ ├── functions.py │ └── sql.py ├── constants.py ├── dataframe │ ├── __init__.py │ ├── api.py │ ├── array_dataframe.py │ ├── arrow_dataframe.py │ ├── dataframe.py │ ├── dataframe_iterable_dataframe.py │ ├── dataframes.py │ ├── function_wrapper.py │ ├── iterable_dataframe.py │ ├── pandas_dataframe.py │ └── utils.py ├── dataset │ ├── __init__.py │ ├── api.py │ └── dataset.py ├── dev.py ├── exceptions.py ├── execution │ ├── __init__.py │ ├── api.py │ ├── execution_engine.py │ ├── factory.py │ └── native_execution_engine.py ├── extensions │ ├── __init__.py │ ├── _builtins │ │ ├── __init__.py │ │ ├── creators.py │ │ ├── outputters.py │ │ └── processors.py │ ├── _utils.py │ ├── context.py │ ├── creator │ │ ├── __init__.py │ │ ├── convert.py │ │ └── creator.py │ ├── outputter │ │ ├── __init__.py │ │ ├── convert.py │ │ └── outputter.py │ ├── processor │ │ ├── __init__.py │ │ ├── convert.py │ │ └── processor.py │ └── transformer │ │ ├── __init__.py │ │ ├── constants.py │ │ ├── convert.py │ │ └── transformer.py ├── plugins.py ├── py.typed ├── registry.py ├── rpc │ ├── __init__.py │ ├── base.py │ └── flask.py ├── sql │ ├── __init__.py │ ├── _utils.py │ ├── _visitors.py │ ├── api.py │ └── workflow.py ├── test │ ├── __init__.py │ ├── pandas_tester.py │ └── plugins.py └── workflow │ ├── __init__.py │ ├── _checkpoint.py │ ├── _tasks.py │ ├── _workflow_context.py │ ├── api.py │ ├── input.py │ ├── module.py │ └── workflow.py ├── fugue_contrib ├── __init__.py ├── contrib.py ├── seaborn │ └── __init__.py └── viz │ ├── __init__.py │ └── _ext.py ├── fugue_dask ├── __init__.py ├── _constants.py ├── _dask_sql_wrapper.py ├── _io.py ├── _utils.py ├── dataframe.py ├── execution_engine.py ├── registry.py └── tester.py ├── fugue_duckdb ├── __init__.py ├── _io.py ├── _utils.py ├── dask.py ├── dataframe.py ├── execution_engine.py ├── registry.py └── tester.py ├── fugue_ibis ├── __init__.py ├── _compat.py ├── _utils.py ├── dataframe.py └── execution_engine.py ├── fugue_notebook ├── __init__.py ├── env.py └── nbextension │ ├── README.md │ ├── __init__.py │ ├── description.yaml │ └── main.js ├── fugue_polars ├── __init__.py ├── _utils.py ├── polars_dataframe.py └── registry.py ├── fugue_ray ├── __init__.py ├── _constants.py ├── _utils │ ├── __init__.py │ ├── cluster.py │ ├── dataframe.py │ └── io.py ├── dataframe.py ├── execution_engine.py ├── registry.py └── tester.py ├── fugue_spark ├── __init__.py ├── _constants.py ├── _utils │ ├── __init__.py │ ├── convert.py │ ├── io.py │ ├── misc.py │ └── partition.py ├── dataframe.py ├── execution_engine.py ├── registry.py └── tester.py ├── fugue_sql ├── __init__.py └── exceptions.py ├── fugue_test ├── __init__.py ├── bag_suite.py ├── builtin_suite.py ├── dataframe_suite.py ├── execution_suite.py └── fixtures.py ├── fugue_version └── __init__.py ├── images ├── architecture.png ├── extensions.png └── logo.svg ├── requirements.txt ├── scripts └── setupsparkconnect.sh ├── setup.cfg ├── setup.py └── tests ├── __init__.py ├── fugue ├── __init__.py ├── bag │ ├── __init__.py │ └── test_array_bag.py ├── collections │ ├── __init__.py │ ├── test_partition.py │ └── test_sql.py ├── column │ ├── __init__.py │ ├── test_expressions.py │ ├── test_functions.py │ └── test_sql.py ├── dataframe │ ├── __init__.py │ ├── test_array_dataframe.py │ ├── test_arrow_dataframe.py │ ├── test_dataframe.py │ ├── test_dataframe_iterable_dataframe.py │ ├── test_dataframes.py │ ├── test_function_wrapper.py │ ├── test_iterable_dataframe.py │ ├── test_pandas_dataframe.py │ └── test_utils.py ├── execution │ ├── __init__.py │ ├── test_api.py │ ├── test_execution_engine.py │ ├── test_factory.py │ └── test_naive_execution_engine.py ├── extensions │ ├── __init__.py │ ├── creator │ │ ├── __init__.py │ │ └── test_convert.py │ ├── outputter │ │ ├── __init__.py │ │ └── test_convert.py │ ├── processor │ │ ├── __init__.py │ │ └── test_convert.py │ ├── test_utils.py │ └── transformer │ │ ├── __init__.py │ │ ├── test_convert_cotransformer.py │ │ ├── test_convert_output_cotransformer.py │ │ ├── test_convert_output_transformer.py │ │ └── test_convert_transformer.py ├── rpc │ ├── __init__.py │ ├── test_base.py │ ├── test_flask.py │ └── test_func.py ├── sql │ ├── __init__.py │ ├── test_utils.py │ ├── test_visitors.py │ ├── test_workflow.py │ └── test_workflow_parse.py ├── test │ ├── __init__.py │ └── test_plugins.py ├── test_interfaceless.py ├── utils │ ├── __init__.py │ ├── test_interfaceless.py │ ├── test_io.py │ └── test_misc.py └── workflow │ ├── __init__.py │ ├── test_module.py │ ├── test_runtime_exception.py │ ├── test_workflow.py │ ├── test_workflow_determinism.py │ └── test_workflow_parallel.py ├── fugue_dask ├── __init__.py ├── test_dataframe.py ├── test_execution_engine.py ├── test_importless.py ├── test_io.py ├── test_sql.py └── test_utils.py ├── fugue_duckdb ├── __init__.py ├── test_dask.py ├── test_dataframe.py ├── test_execution_engine.py ├── test_importless.py └── test_utils.py ├── fugue_ibis ├── __init__.py ├── mock │ ├── __init__.py │ ├── dataframe.py │ ├── execution_engine.py │ ├── registry.py │ └── tester.py ├── test_dataframe.py ├── test_execution_engine.py └── test_utils.py ├── fugue_notebook ├── __init__.py └── test_notebook.ipynb ├── fugue_polars ├── __init__.py ├── test_api.py ├── test_dataframe.py └── test_transform.py ├── fugue_ray ├── __init__.py ├── test_dataframe.py ├── test_execution_engine.py ├── test_registry.py └── test_utils.py └── fugue_spark ├── __init__.py ├── test_dataframe.py ├── test_execution_engine.py ├── test_importless.py ├── test_spark_connect.py ├── test_sql.py └── utils ├── __init__.py ├── test_convert.py ├── test_io.py └── test_partition.py /.devcontainer/devcontainer.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "Fugue Development Environment", 3 | "image": "mcr.microsoft.com/vscode/devcontainers/python:3.10", 4 | "customizations": { 5 | "vscode": { 6 | "settings": { 7 | "terminal.integrated.shell.linux": "/bin/bash", 8 | "python.pythonPath": "/usr/local/bin/python", 9 | "python.defaultInterpreterPath": "/usr/local/bin/python", 10 | "editor.defaultFormatter": "ms-python.black-formatter", 11 | "isort.interpreter": [ 12 | "/usr/local/bin/python" 13 | ], 14 | "flake8.interpreter": [ 15 | "/usr/local/bin/python" 16 | ], 17 | "pylint.interpreter": [ 18 | "/usr/local/bin/python" 19 | ], 20 | "black-formatter.interpreter": [ 21 | "/usr/local/bin/python" 22 | ] 23 | }, 24 | "extensions": [ 25 | "ms-python.python", 26 | "ms-python.isort", 27 | "ms-python.flake8", 28 | "ms-python.pylint", 29 | "ms-python.mypy", 30 | "ms-python.black-formatter", 31 | "GitHub.copilot", 32 | "njpwerner.autodocstring" 33 | ] 34 | } 35 | }, 36 | "forwardPorts": [ 37 | 8888 38 | ], 39 | "postCreateCommand": "make devenv", 40 | "features": { 41 | "ghcr.io/devcontainers/features/docker-in-docker:2.11.0": {}, 42 | "ghcr.io/devcontainers/features/java:1": { 43 | "version": "11" 44 | } 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Create a report to help us improve 4 | title: "[BUG]" 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Minimal Code To Reproduce** 11 | 12 | ```python 13 | ``` 14 | 15 | **Describe the bug** 16 | A clear and concise description of what the bug is. 17 | 18 | **Expected behavior** 19 | A clear and concise description of what you expected to happen. 20 | 21 | **Environment (please complete the following information):** 22 | - Backend: pandas/dask/ray? 23 | - Backend version: 24 | - Python version: 25 | - OS: linux/windows 26 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/compatibility.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Compatibility 3 | about: Compatibility with dependent packages updates 4 | title: "[COMPATIBILITY]" 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/deprecation.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Deprecation 3 | about: Deprecate certain features 4 | title: "[DEPRECATION]" 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest an idea for this project 4 | title: "[FEATURE]" 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Is your feature request related to a problem? Please describe.** 11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] 12 | 13 | **Describe the solution you'd like** 14 | A clear and concise description of what you want to happen. 15 | 16 | **Describe alternatives you've considered** 17 | A clear and concise description of any alternative solutions or features you've considered. 18 | 19 | **Additional context** 20 | Add any other context or screenshots about the feature request here. 21 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/questions.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Questions 3 | about: General questions 4 | title: "[QUESTION]" 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | -------------------------------------------------------------------------------- /.github/workflows/publish.yml: -------------------------------------------------------------------------------- 1 | # This workflows will upload a Python Package using Twine when a release is created 2 | # For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries 3 | 4 | name: Publish 5 | 6 | on: 7 | release: 8 | types: [created] 9 | 10 | jobs: 11 | deploy: 12 | runs-on: ubuntu-latest 13 | 14 | steps: 15 | - uses: actions/checkout@v2 16 | - name: Set up Python 17 | uses: actions/setup-python@v1 18 | with: 19 | python-version: '3.10' 20 | - name: Install dependencies 21 | run: make devenv 22 | - name: Test 23 | if: "!github.event.release.prerelease" 24 | run: make test 25 | - name: Build and publish 26 | env: 27 | RELEASE_TAG: ${{ github.event.release.tag_name }} 28 | TWINE_USERNAME: __token__ 29 | TWINE_PASSWORD: ${{ secrets.PYPI_TOKEN }} 30 | run: | 31 | make package 32 | twine upload dist/* 33 | -------------------------------------------------------------------------------- /.github/workflows/test_all.yml: -------------------------------------------------------------------------------- 1 | # This workflow will install Python dependencies, run tests and lint with a variety of Python versions 2 | # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions 3 | 4 | name: Full Tests 5 | 6 | on: 7 | push: 8 | branches: [ master ] 9 | paths-ignore: 10 | - 'docs/**' 11 | - '**.md' 12 | pull_request: 13 | branches: [ master ] 14 | paths-ignore: 15 | - 'docs/**' 16 | - '**.md' 17 | 18 | concurrency: 19 | group: ${{ github.workflow }}-${{ github.ref }} 20 | cancel-in-progress: true 21 | 22 | jobs: 23 | all: 24 | name: Tests & Lint 25 | runs-on: ubuntu-latest 26 | strategy: 27 | matrix: 28 | python-version: ["3.10", "3.11", "3.12"] 29 | 30 | steps: 31 | - uses: actions/checkout@v2 32 | - name: Set up Python ${{ matrix.python-version }} 33 | uses: actions/setup-python@v1 34 | with: 35 | python-version: ${{ matrix.python-version }} 36 | - name: Install dependencies 37 | run: make devenv 38 | - name: Lint 39 | if: matrix.python-version == '3.10' 40 | run: make lint 41 | - name: Test 42 | run: make test 43 | - name: "Upload coverage to Codecov" 44 | if: matrix.python-version == '3.10' 45 | uses: codecov/codecov-action@v4 46 | with: 47 | fail_ci_if_error: false 48 | token: ${{ secrets.CODECOV_TOKEN }} 49 | 50 | no_spark: 51 | name: Tests 52 | runs-on: ubuntu-latest 53 | strategy: 54 | matrix: 55 | python-version: [3.9] 56 | 57 | steps: 58 | - uses: actions/checkout@v2 59 | - name: Set up Python ${{ matrix.python-version }} 60 | uses: actions/setup-python@v1 61 | with: 62 | python-version: ${{ matrix.python-version }} 63 | - name: Install dependencies 64 | run: make devenv 65 | - name: Test 66 | run: make testnospark 67 | -------------------------------------------------------------------------------- /.github/workflows/test_core.yml: -------------------------------------------------------------------------------- 1 | # This workflow will install Python dependencies, run tests and lint with a variety of Python versions 2 | # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions 3 | 4 | name: Core Tests 5 | 6 | on: 7 | push: 8 | branches: [ master ] 9 | paths-ignore: 10 | - 'docs/**' 11 | - '**.md' 12 | pull_request: 13 | branches: [ master ] 14 | paths-ignore: 15 | - 'docs/**' 16 | - '**.md' 17 | 18 | concurrency: 19 | group: ${{ github.workflow }}-${{ github.ref }} 20 | cancel-in-progress: true 21 | 22 | jobs: 23 | core-tests: 24 | name: Tests 25 | runs-on: ubuntu-latest 26 | strategy: 27 | matrix: 28 | python-version: ["3.10", "3.11", "3.12"] 29 | 30 | steps: 31 | - uses: actions/checkout@v2 32 | - name: Set up Python ${{ matrix.python-version }} 33 | uses: actions/setup-python@v1 34 | with: 35 | python-version: ${{ matrix.python-version }} 36 | - name: Fix setuptools_scm 37 | run: pip install "setuptools_scm<7" 38 | - name: Install dependencies 39 | run: make devenv 40 | - name: Install pandas 2 41 | if: matrix.python-version == '3.10' 42 | run: pip install "pandas>=2" 43 | - name: Test 44 | run: make testcore 45 | -------------------------------------------------------------------------------- /.github/workflows/test_dask.yml: -------------------------------------------------------------------------------- 1 | # This workflow will install Python dependencies, run tests and lint with a variety of Python versions 2 | # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions 3 | 4 | name: Dask Tests 5 | 6 | on: 7 | push: 8 | branches: [ master ] 9 | paths-ignore: 10 | - 'docs/**' 11 | - '**.md' 12 | pull_request: 13 | branches: [ master ] 14 | paths-ignore: 15 | - 'docs/**' 16 | - '**.md' 17 | 18 | concurrency: 19 | group: ${{ github.workflow }}-${{ github.ref }} 20 | cancel-in-progress: true 21 | 22 | jobs: 23 | test_dask_lower_bound: 24 | name: Dask 2024.4.0 25 | runs-on: ubuntu-latest 26 | 27 | steps: 28 | - uses: actions/checkout@v2 29 | - name: Set up Python 3.10 30 | uses: actions/setup-python@v1 31 | with: 32 | python-version: "3.10" 33 | - name: Install dependencies 34 | run: make devenv 35 | - name: Setup Dask 36 | run: pip install pyarrow==7.0.0 pandas==2.0.2 dask[dataframe,distributed]==2024.4.0 37 | - name: Test 38 | run: make testdask 39 | 40 | test_dask_sql_latest: 41 | name: Dask with SQL Latest 42 | runs-on: ubuntu-latest 43 | 44 | steps: 45 | - uses: actions/checkout@v2 46 | - name: Set up Python 3.10 47 | uses: actions/setup-python@v1 48 | with: 49 | python-version: "3.10" 50 | - name: Install dependencies 51 | run: make devenv 52 | - name: Test 53 | run: make testdask 54 | 55 | test_dask_latest: 56 | name: Dask without SQL Latest 57 | runs-on: ubuntu-latest 58 | 59 | steps: 60 | - uses: actions/checkout@v2 61 | - name: Set up Python 3.11 62 | uses: actions/setup-python@v1 63 | with: 64 | python-version: "3.11" 65 | - name: Install dependencies 66 | run: make devenv 67 | - name: Setup Dask 68 | run: pip install -U dask[dataframe,distributed] pyarrow pandas 69 | - name: Remove Dask SQL 70 | run: pip uninstall -y dask-sql qpd fugue-sql-antlr sqlglot 71 | - name: Test 72 | run: make testdask 73 | -------------------------------------------------------------------------------- /.github/workflows/test_no_sql.yml: -------------------------------------------------------------------------------- 1 | # This workflow will install Python dependencies, run tests and lint with a variety of Python versions 2 | # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions 3 | 4 | name: Tests Excluding SQL Dependencies 5 | 6 | on: 7 | push: 8 | branches: [ master ] 9 | paths-ignore: 10 | - 'docs/**' 11 | - '**.md' 12 | pull_request: 13 | branches: [ master ] 14 | paths-ignore: 15 | - 'docs/**' 16 | - '**.md' 17 | 18 | concurrency: 19 | group: ${{ github.workflow }}-${{ github.ref }} 20 | cancel-in-progress: true 21 | 22 | jobs: 23 | tests-no-sql: 24 | name: Tests 25 | runs-on: ubuntu-latest 26 | strategy: 27 | matrix: 28 | python-version: ["3.10"] 29 | 30 | steps: 31 | - uses: actions/checkout@v2 32 | - name: Set up Python ${{ matrix.python-version }} 33 | uses: actions/setup-python@v1 34 | with: 35 | python-version: ${{ matrix.python-version }} 36 | - name: Fix setuptools_scm 37 | run: pip install "setuptools_scm<7" 38 | - name: Install dependencies 39 | run: make devenv 40 | - name: Install pandas 2 41 | if: matrix.python-version == '3.10' 42 | run: pip install "pandas>=2" 43 | - name: Remove SQL dependencies 44 | run: pip uninstall -y qpd fugue-sql-antlr sqlglot 45 | - name: Test 46 | run: make testnosql 47 | -------------------------------------------------------------------------------- /.github/workflows/test_notebook.yml: -------------------------------------------------------------------------------- 1 | # This workflow will install Python dependencies, run tests and lint with a variety of Python versions 2 | # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions 3 | 4 | name: Test Notebook Experience 5 | 6 | on: 7 | push: 8 | branches: [ master ] 9 | paths-ignore: 10 | - 'docs/**' 11 | - '**.md' 12 | pull_request: 13 | branches: [ master ] 14 | paths-ignore: 15 | - 'docs/**' 16 | - '**.md' 17 | 18 | concurrency: 19 | group: ${{ github.workflow }}-${{ github.ref }} 20 | cancel-in-progress: true 21 | 22 | jobs: 23 | build: 24 | runs-on: ubuntu-latest 25 | strategy: 26 | matrix: 27 | python-version: ["3.10"] 28 | 29 | steps: 30 | - uses: actions/checkout@v2 31 | - name: Set up Python ${{ matrix.python-version }} 32 | uses: actions/setup-python@v1 33 | with: 34 | python-version: ${{ matrix.python-version }} 35 | - name: Install dependencies 36 | run: make devenv 37 | - name: Test 38 | run: make testnotebook 39 | -------------------------------------------------------------------------------- /.github/workflows/test_ray.yml: -------------------------------------------------------------------------------- 1 | # This workflow will install Python dependencies, run tests and lint with a variety of Python versions 2 | # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions 3 | 4 | name: Ray Tests 5 | 6 | on: 7 | push: 8 | branches: [ master ] 9 | paths-ignore: 10 | - 'docs/**' 11 | - '**.md' 12 | pull_request: 13 | branches: [ master ] 14 | paths-ignore: 15 | - 'docs/**' 16 | - '**.md' 17 | 18 | concurrency: 19 | group: ${{ github.workflow }}-${{ github.ref }} 20 | cancel-in-progress: true 21 | 22 | jobs: 23 | test_ray_lower_bound: 24 | name: Ray 2.5.0 25 | runs-on: ubuntu-latest 26 | 27 | steps: 28 | - uses: actions/checkout@v2 29 | - name: Set up Python 3.9 30 | uses: actions/setup-python@v1 31 | with: 32 | python-version: 3.9 33 | - name: Install dependencies 34 | run: make devenv 35 | - name: Setup Ray 36 | run: pip install ray[data]==2.5.0 pyarrow==7.0.0 "duckdb<0.9" pandas==1.5.3 'pydantic<2' 37 | - name: Test 38 | run: make testray 39 | 40 | test_ray_latest: 41 | name: Ray Latest 42 | runs-on: ubuntu-latest 43 | 44 | steps: 45 | - uses: actions/checkout@v2 46 | - name: Set up Python 3.9 47 | uses: actions/setup-python@v1 48 | with: 49 | python-version: 3.9 50 | - name: Install dependencies 51 | run: make devenv 52 | - name: Setup Ray 53 | run: pip install -U ray[data] 54 | - name: Test 55 | run: make testray 56 | -------------------------------------------------------------------------------- /.github/workflows/test_spark.yml: -------------------------------------------------------------------------------- 1 | # This workflow will install Python dependencies, run tests and lint with a variety of Python versions 2 | # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions 3 | 4 | name: Spark Tests 5 | 6 | on: 7 | push: 8 | branches: [ master ] 9 | paths-ignore: 10 | - 'docs/**' 11 | - '**.md' 12 | pull_request: 13 | branches: [ master ] 14 | paths-ignore: 15 | - 'docs/**' 16 | - '**.md' 17 | 18 | concurrency: 19 | group: ${{ github.workflow }}-${{ github.ref }} 20 | cancel-in-progress: true 21 | 22 | jobs: 23 | test_combinations: 24 | name: Spark ${{ matrix.spark-version }} Pandas ${{ matrix.pandas-version }} 25 | runs-on: ubuntu-latest 26 | strategy: 27 | matrix: 28 | spark-version: ["3.4.0","3.5.5"] 29 | pandas-version: ["1.5.3","2.0.1"] 30 | 31 | steps: 32 | - uses: actions/checkout@v2 33 | - name: Set up Python 3.9 34 | uses: actions/setup-python@v1 35 | with: 36 | python-version: 3.9 37 | - name: Install dependencies 38 | run: make devenv 39 | - name: Install Spark ${{ matrix.spark-version }} 40 | run: pip install "pyspark==${{ matrix.spark-version }}" 41 | - name: Install Pandas ${{ matrix.pandas-version }} 42 | run: pip install "pandas==${{ matrix.pandas-version }}" 43 | - name: Downgrade Ibis 44 | if: matrix.spark-version < '3.4.0' 45 | run: pip install "ibis-framework<5" 46 | - name: Test 47 | run: make testspark 48 | 49 | test_connect: 50 | name: Spark Connect 51 | runs-on: ubuntu-latest 52 | 53 | steps: 54 | - uses: actions/checkout@v2 55 | - name: Set up Python 3.10 56 | uses: actions/setup-python@v1 57 | with: 58 | python-version: "3.10" 59 | - name: Install dependencies 60 | run: make devenv 61 | - name: Setup Spark 62 | run: make sparkconnect 63 | - name: Test 64 | run: make testsparkconnect 65 | -------------------------------------------------------------------------------- /.github/workflows/test_win.yml: -------------------------------------------------------------------------------- 1 | # This workflow will install Python dependencies, run tests and lint with a variety of Python versions 2 | # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions 3 | 4 | name: Test Windows 5 | 6 | on: 7 | push: 8 | branches: [ master ] 9 | paths-ignore: 10 | - 'docs/**' 11 | - '**.md' 12 | pull_request: 13 | branches: [ master ] 14 | paths-ignore: 15 | - 'docs/**' 16 | - '**.md' 17 | 18 | concurrency: 19 | group: ${{ github.workflow }}-${{ github.ref }} 20 | cancel-in-progress: true 21 | 22 | jobs: 23 | build: 24 | runs-on: windows-latest 25 | strategy: 26 | matrix: 27 | python-version: [3.9, "3.10"] 28 | steps: 29 | - uses: actions/checkout@v2 30 | - name: Set up Python ${{ matrix.python-version }} 31 | uses: actions/setup-python@v1 32 | with: 33 | python-version: ${{ matrix.python-version }} 34 | - name: Install dependencies 35 | run: pip install -r requirements.txt 36 | # - name: Install pyarrow 37 | # run: pip install pyarrow==8.0.0 38 | - name: Test 39 | run: python -m pytest --reruns 2 --only-rerun 'Overflow in cast' tests/fugue tests/fugue_dask tests/fugue_ibis tests/fugue_duckdb 40 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | pythonenv* 113 | 114 | # Spyder project settings 115 | .spyderproject 116 | .spyproject 117 | 118 | # Rope project settings 119 | .ropeproject 120 | 121 | # mkdocs documentation 122 | /site 123 | .virtual_documents 124 | 125 | # mypy 126 | .mypy_cache 127 | .dmypy.json 128 | dmypy.json 129 | 130 | # Pyre type checker 131 | .pyre/ 132 | 133 | .vscode 134 | tmp 135 | 136 | # Antlr 137 | .antlr 138 | 139 | # dask 140 | dask-worker-space 141 | 142 | # spark 143 | spark-warehourse 144 | =* 145 | 146 | # DS_Store 147 | *.DS_Store 148 | -------------------------------------------------------------------------------- /.gitpod.yml: -------------------------------------------------------------------------------- 1 | image: fugueproject/gitpod:0.7.2 2 | 3 | tasks: 4 | - init: | 5 | make devenv 6 | 7 | github: 8 | prebuilds: 9 | # enable for the master/default branch (defaults to true) 10 | master: true 11 | # enable for all branches in this repo (defaults to false) 12 | branches: true 13 | # enable for pull requests coming from this repo (defaults to true) 14 | pullRequests: true 15 | # enable for pull requests coming from forks (defaults to false) 16 | pullRequestsFromForks: true 17 | # add a "Review in Gitpod" button as a comment to pull requests (defaults to true) 18 | addComment: true 19 | # add a "Review in Gitpod" button to pull requests (defaults to false) 20 | addBadge: false 21 | # add a label once the prebuild is ready to pull requests (defaults to false) 22 | addLabel: prebuilt-in-gitpod 23 | 24 | vscode: 25 | extensions: 26 | - ms-python.python 27 | - njpwerner.autodocstring 28 | - ms-toolsai.jupyter 29 | - ms-toolsai.jupyter-keymap 30 | - ms-toolsai.jupyter-renderers 31 | - ms-python.isort 32 | - virgilsisoe.python-auto-import 33 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | default_language_version: 2 | python: python3 3 | 4 | exclude: | 5 | (?x)( 6 | ^tests/| 7 | ^docs/| 8 | ^fugue_sql/_antlr/ 9 | ) 10 | repos: 11 | - repo: https://github.com/pre-commit/pre-commit-hooks 12 | rev: v3.2.0 13 | hooks: 14 | - id: check-ast 15 | - id: check-docstring-first 16 | - id: check-executables-have-shebangs 17 | - id: check-json 18 | - id: check-merge-conflict 19 | - id: check-yaml 20 | - id: debug-statements 21 | - id: end-of-file-fixer 22 | - id: trailing-whitespace 23 | - id: check-vcs-permalinks 24 | - repo: https://github.com/pycqa/flake8 25 | rev: '3.8.3' 26 | hooks: 27 | - id: flake8 28 | types: [python] 29 | additional_dependencies: 30 | - flake8-bugbear 31 | - flake8-builtins 32 | # - flake8-docstrings # TODO: add back! 33 | # - flake8-rst-docstrings 34 | - flake8-comprehensions 35 | - flake8-tidy-imports 36 | - pycodestyle 37 | - repo: https://github.com/pre-commit/mirrors-mypy 38 | rev: v0.971 39 | hooks: 40 | - id: mypy 41 | - repo: https://github.com/pre-commit/mirrors-pylint 42 | rev: v2.6.0 43 | hooks: 44 | - id: pylint 45 | - repo: https://github.com/ambv/black 46 | rev: 22.3.0 47 | hooks: 48 | - id: black 49 | types: [python] 50 | language_version: python3 51 | -------------------------------------------------------------------------------- /.pylintrc: -------------------------------------------------------------------------------- 1 | [MESSAGES CONTROL] 2 | disable = C0103,C0114,C0115,C0116,C0122,C0200,C0201,C0302,C0411,C0415,E0401,E0712,E1130,E1136,E5110,R0201,R0205,R0801,R0902,R0903,R0904,R0911,R0912,R0913,R0914,R0915,R0917,R1705,R1710,R1714,R1718,R1720,R1724,W0102,W0107,W0108,W0201,W0212,W0221,W0223,W0237,W0511,W0603,W0613,W0621,W0622,W0631,W0640,W0703,W0707,W1116 3 | # TODO: R0205: inherits from object, can be safely removed 4 | -------------------------------------------------------------------------------- /.readthedocs.yaml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | # Set the version of Python and other tools you might need 4 | build: 5 | os: ubuntu-20.04 6 | tools: 7 | python: "3.10" 8 | jobs: 9 | pre_install: 10 | - pip install -U pip 11 | 12 | sphinx: 13 | configuration: docs/conf.py 14 | 15 | python: 16 | install: 17 | - requirements: requirements.txt 18 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line, and also 5 | # from the environment for the first two. 6 | SPHINXOPTS ?= 7 | SPHINXBUILD ?= sphinx-build 8 | SOURCEDIR = source 9 | BUILDDIR = build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 21 | -------------------------------------------------------------------------------- /docs/_static/logo.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 7 | 8 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | -------------------------------------------------------------------------------- /docs/_static/logo_doc.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 7 | 8 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | -------------------------------------------------------------------------------- /docs/_templates/toc.rst_t: -------------------------------------------------------------------------------- 1 | {{ header | heading }} 2 | 3 | .. toctree:: 4 | :maxdepth: {{ maxdepth }} 5 | {% for docname in docnames %} 6 | {{ docname }} 7 | {%- endfor %} 8 | -------------------------------------------------------------------------------- /docs/api.rst: -------------------------------------------------------------------------------- 1 | API Reference 2 | ============== 3 | 4 | .. toctree:: 5 | 6 | api/fugue 7 | api_sql/fugue_sql 8 | api_duckdb/fugue_duckdb 9 | api_spark/fugue_spark 10 | api_dask/fugue_dask 11 | api_ray/fugue_ray 12 | api_ibis/fugue_ibis 13 | -------------------------------------------------------------------------------- /docs/api/fugue.bag.rst: -------------------------------------------------------------------------------- 1 | fugue.bag 2 | ========== 3 | 4 | .. |SchemaLikeObject| replace:: :ref:`Schema like object ` 5 | .. |ParamsLikeObject| replace:: :ref:`Parameters like object ` 6 | .. |DataFrameLikeObject| replace:: :ref:`DataFrame like object ` 7 | .. |DataFramesLikeObject| replace:: :ref:`DataFrames like object ` 8 | .. |PartitionLikeObject| replace:: :ref:`Partition like object ` 9 | .. |RPCHandlerLikeObject| replace:: :ref:`RPChandler like object ` 10 | 11 | .. |ExecutionEngine| replace:: :class:`~fugue.execution.execution_engine.ExecutionEngine` 12 | .. |NativeExecutionEngine| replace:: :class:`~fugue.execution.native_execution_engine.NativeExecutionEngine` 13 | .. |FugueWorkflow| replace:: :class:`~fugue.workflow.workflow.FugueWorkflow` 14 | 15 | .. |ReadJoin| replace:: Read Join tutorials on :ref:`workflow ` and :ref:`engine ` for details 16 | .. |FugueConfig| replace:: :doc:`the Fugue Configuration Tutorial ` 17 | .. |PartitionTutorial| replace:: :doc:`the Partition Tutorial ` 18 | .. |FugueSQLTutorial| replace:: :doc:`the Fugue SQL Tutorial ` 19 | .. |DataFrameTutorial| replace:: :ref:`the DataFrame Tutorial ` 20 | .. |ExecutionEngineTutorial| replace:: :doc:`the ExecutionEngine Tutorial ` 21 | .. |ZipComap| replace:: :ref:`Zip & Comap ` 22 | .. |LoadSave| replace:: :ref:`Load & Save ` 23 | .. |AutoPersist| replace:: :ref:`Auto Persist ` 24 | .. |TransformerTutorial| replace:: :doc:`the Transformer Tutorial ` 25 | .. |CoTransformer| replace:: :ref:`CoTransformer ` 26 | .. |CoTransformerTutorial| replace:: :doc:`the CoTransformer Tutorial ` 27 | .. |FugueDataTypes| replace:: :doc:`Fugue Data Types ` 28 | 29 | 30 | fugue.bag.array\_bag 31 | -------------------- 32 | 33 | .. automodule:: fugue.bag.array_bag 34 | :members: 35 | :undoc-members: 36 | :show-inheritance: 37 | 38 | fugue.bag.bag 39 | ------------- 40 | 41 | .. automodule:: fugue.bag.bag 42 | :members: 43 | :undoc-members: 44 | :show-inheritance: 45 | 46 | -------------------------------------------------------------------------------- /docs/api/fugue.collections.rst: -------------------------------------------------------------------------------- 1 | fugue.collections 2 | ================== 3 | 4 | .. |SchemaLikeObject| replace:: :ref:`Schema like object ` 5 | .. |ParamsLikeObject| replace:: :ref:`Parameters like object ` 6 | .. |DataFrameLikeObject| replace:: :ref:`DataFrame like object ` 7 | .. |DataFramesLikeObject| replace:: :ref:`DataFrames like object ` 8 | .. |PartitionLikeObject| replace:: :ref:`Partition like object ` 9 | .. |RPCHandlerLikeObject| replace:: :ref:`RPChandler like object ` 10 | 11 | .. |ExecutionEngine| replace:: :class:`~fugue.execution.execution_engine.ExecutionEngine` 12 | .. |NativeExecutionEngine| replace:: :class:`~fugue.execution.native_execution_engine.NativeExecutionEngine` 13 | .. |FugueWorkflow| replace:: :class:`~fugue.workflow.workflow.FugueWorkflow` 14 | 15 | .. |ReadJoin| replace:: Read Join tutorials on :ref:`workflow ` and :ref:`engine ` for details 16 | .. |FugueConfig| replace:: :doc:`the Fugue Configuration Tutorial ` 17 | .. |PartitionTutorial| replace:: :doc:`the Partition Tutorial ` 18 | .. |FugueSQLTutorial| replace:: :doc:`the Fugue SQL Tutorial ` 19 | .. |DataFrameTutorial| replace:: :ref:`the DataFrame Tutorial ` 20 | .. |ExecutionEngineTutorial| replace:: :doc:`the ExecutionEngine Tutorial ` 21 | .. |ZipComap| replace:: :ref:`Zip & Comap ` 22 | .. |LoadSave| replace:: :ref:`Load & Save ` 23 | .. |AutoPersist| replace:: :ref:`Auto Persist ` 24 | .. |TransformerTutorial| replace:: :doc:`the Transformer Tutorial ` 25 | .. |CoTransformer| replace:: :ref:`CoTransformer ` 26 | .. |CoTransformerTutorial| replace:: :doc:`the CoTransformer Tutorial ` 27 | .. |FugueDataTypes| replace:: :doc:`Fugue Data Types ` 28 | 29 | 30 | fugue.collections.partition 31 | --------------------------- 32 | 33 | .. automodule:: fugue.collections.partition 34 | :members: 35 | :undoc-members: 36 | :show-inheritance: 37 | 38 | fugue.collections.sql 39 | --------------------- 40 | 41 | .. automodule:: fugue.collections.sql 42 | :members: 43 | :undoc-members: 44 | :show-inheritance: 45 | 46 | fugue.collections.yielded 47 | ------------------------- 48 | 49 | .. automodule:: fugue.collections.yielded 50 | :members: 51 | :undoc-members: 52 | :show-inheritance: 53 | 54 | -------------------------------------------------------------------------------- /docs/api/fugue.column.rst: -------------------------------------------------------------------------------- 1 | fugue.column 2 | ============= 3 | 4 | .. |SchemaLikeObject| replace:: :ref:`Schema like object ` 5 | .. |ParamsLikeObject| replace:: :ref:`Parameters like object ` 6 | .. |DataFrameLikeObject| replace:: :ref:`DataFrame like object ` 7 | .. |DataFramesLikeObject| replace:: :ref:`DataFrames like object ` 8 | .. |PartitionLikeObject| replace:: :ref:`Partition like object ` 9 | .. |RPCHandlerLikeObject| replace:: :ref:`RPChandler like object ` 10 | 11 | .. |ExecutionEngine| replace:: :class:`~fugue.execution.execution_engine.ExecutionEngine` 12 | .. |NativeExecutionEngine| replace:: :class:`~fugue.execution.native_execution_engine.NativeExecutionEngine` 13 | .. |FugueWorkflow| replace:: :class:`~fugue.workflow.workflow.FugueWorkflow` 14 | 15 | .. |ReadJoin| replace:: Read Join tutorials on :ref:`workflow ` and :ref:`engine ` for details 16 | .. |FugueConfig| replace:: :doc:`the Fugue Configuration Tutorial ` 17 | .. |PartitionTutorial| replace:: :doc:`the Partition Tutorial ` 18 | .. |FugueSQLTutorial| replace:: :doc:`the Fugue SQL Tutorial ` 19 | .. |DataFrameTutorial| replace:: :ref:`the DataFrame Tutorial ` 20 | .. |ExecutionEngineTutorial| replace:: :doc:`the ExecutionEngine Tutorial ` 21 | .. |ZipComap| replace:: :ref:`Zip & Comap ` 22 | .. |LoadSave| replace:: :ref:`Load & Save ` 23 | .. |AutoPersist| replace:: :ref:`Auto Persist ` 24 | .. |TransformerTutorial| replace:: :doc:`the Transformer Tutorial ` 25 | .. |CoTransformer| replace:: :ref:`CoTransformer ` 26 | .. |CoTransformerTutorial| replace:: :doc:`the CoTransformer Tutorial ` 27 | .. |FugueDataTypes| replace:: :doc:`Fugue Data Types ` 28 | 29 | 30 | fugue.column.expressions 31 | ------------------------ 32 | 33 | .. automodule:: fugue.column.expressions 34 | :members: 35 | :undoc-members: 36 | :show-inheritance: 37 | 38 | fugue.column.functions 39 | ---------------------- 40 | 41 | .. automodule:: fugue.column.functions 42 | :members: 43 | :undoc-members: 44 | :show-inheritance: 45 | 46 | fugue.column.sql 47 | ---------------- 48 | 49 | .. automodule:: fugue.column.sql 50 | :members: 51 | :undoc-members: 52 | :show-inheritance: 53 | 54 | -------------------------------------------------------------------------------- /docs/api/fugue.dataset.rst: -------------------------------------------------------------------------------- 1 | fugue.dataset 2 | ============== 3 | 4 | .. |SchemaLikeObject| replace:: :ref:`Schema like object ` 5 | .. |ParamsLikeObject| replace:: :ref:`Parameters like object ` 6 | .. |DataFrameLikeObject| replace:: :ref:`DataFrame like object ` 7 | .. |DataFramesLikeObject| replace:: :ref:`DataFrames like object ` 8 | .. |PartitionLikeObject| replace:: :ref:`Partition like object ` 9 | .. |RPCHandlerLikeObject| replace:: :ref:`RPChandler like object ` 10 | 11 | .. |ExecutionEngine| replace:: :class:`~fugue.execution.execution_engine.ExecutionEngine` 12 | .. |NativeExecutionEngine| replace:: :class:`~fugue.execution.native_execution_engine.NativeExecutionEngine` 13 | .. |FugueWorkflow| replace:: :class:`~fugue.workflow.workflow.FugueWorkflow` 14 | 15 | .. |ReadJoin| replace:: Read Join tutorials on :ref:`workflow ` and :ref:`engine ` for details 16 | .. |FugueConfig| replace:: :doc:`the Fugue Configuration Tutorial ` 17 | .. |PartitionTutorial| replace:: :doc:`the Partition Tutorial ` 18 | .. |FugueSQLTutorial| replace:: :doc:`the Fugue SQL Tutorial ` 19 | .. |DataFrameTutorial| replace:: :ref:`the DataFrame Tutorial ` 20 | .. |ExecutionEngineTutorial| replace:: :doc:`the ExecutionEngine Tutorial ` 21 | .. |ZipComap| replace:: :ref:`Zip & Comap ` 22 | .. |LoadSave| replace:: :ref:`Load & Save ` 23 | .. |AutoPersist| replace:: :ref:`Auto Persist ` 24 | .. |TransformerTutorial| replace:: :doc:`the Transformer Tutorial ` 25 | .. |CoTransformer| replace:: :ref:`CoTransformer ` 26 | .. |CoTransformerTutorial| replace:: :doc:`the CoTransformer Tutorial ` 27 | .. |FugueDataTypes| replace:: :doc:`Fugue Data Types ` 28 | 29 | 30 | fugue.dataset.api 31 | ----------------- 32 | 33 | .. automodule:: fugue.dataset.api 34 | :members: 35 | :undoc-members: 36 | :show-inheritance: 37 | 38 | fugue.dataset.dataset 39 | --------------------- 40 | 41 | .. automodule:: fugue.dataset.dataset 42 | :members: 43 | :undoc-members: 44 | :show-inheritance: 45 | 46 | -------------------------------------------------------------------------------- /docs/api/fugue.extensions.creator.rst: -------------------------------------------------------------------------------- 1 | fugue.extensions.creator 2 | ========================= 3 | 4 | .. |SchemaLikeObject| replace:: :ref:`Schema like object ` 5 | .. |ParamsLikeObject| replace:: :ref:`Parameters like object ` 6 | .. |DataFrameLikeObject| replace:: :ref:`DataFrame like object ` 7 | .. |DataFramesLikeObject| replace:: :ref:`DataFrames like object ` 8 | .. |PartitionLikeObject| replace:: :ref:`Partition like object ` 9 | .. |RPCHandlerLikeObject| replace:: :ref:`RPChandler like object ` 10 | 11 | .. |ExecutionEngine| replace:: :class:`~fugue.execution.execution_engine.ExecutionEngine` 12 | .. |NativeExecutionEngine| replace:: :class:`~fugue.execution.native_execution_engine.NativeExecutionEngine` 13 | .. |FugueWorkflow| replace:: :class:`~fugue.workflow.workflow.FugueWorkflow` 14 | 15 | .. |ReadJoin| replace:: Read Join tutorials on :ref:`workflow ` and :ref:`engine ` for details 16 | .. |FugueConfig| replace:: :doc:`the Fugue Configuration Tutorial ` 17 | .. |PartitionTutorial| replace:: :doc:`the Partition Tutorial ` 18 | .. |FugueSQLTutorial| replace:: :doc:`the Fugue SQL Tutorial ` 19 | .. |DataFrameTutorial| replace:: :ref:`the DataFrame Tutorial ` 20 | .. |ExecutionEngineTutorial| replace:: :doc:`the ExecutionEngine Tutorial ` 21 | .. |ZipComap| replace:: :ref:`Zip & Comap ` 22 | .. |LoadSave| replace:: :ref:`Load & Save ` 23 | .. |AutoPersist| replace:: :ref:`Auto Persist ` 24 | .. |TransformerTutorial| replace:: :doc:`the Transformer Tutorial ` 25 | .. |CoTransformer| replace:: :ref:`CoTransformer ` 26 | .. |CoTransformerTutorial| replace:: :doc:`the CoTransformer Tutorial ` 27 | .. |FugueDataTypes| replace:: :doc:`Fugue Data Types ` 28 | 29 | 30 | fugue.extensions.creator.convert 31 | -------------------------------- 32 | 33 | .. automodule:: fugue.extensions.creator.convert 34 | :members: 35 | :undoc-members: 36 | :show-inheritance: 37 | 38 | fugue.extensions.creator.creator 39 | -------------------------------- 40 | 41 | .. automodule:: fugue.extensions.creator.creator 42 | :members: 43 | :undoc-members: 44 | :show-inheritance: 45 | 46 | -------------------------------------------------------------------------------- /docs/api/fugue.extensions.outputter.rst: -------------------------------------------------------------------------------- 1 | fugue.extensions.outputter 2 | =========================== 3 | 4 | .. |SchemaLikeObject| replace:: :ref:`Schema like object ` 5 | .. |ParamsLikeObject| replace:: :ref:`Parameters like object ` 6 | .. |DataFrameLikeObject| replace:: :ref:`DataFrame like object ` 7 | .. |DataFramesLikeObject| replace:: :ref:`DataFrames like object ` 8 | .. |PartitionLikeObject| replace:: :ref:`Partition like object ` 9 | .. |RPCHandlerLikeObject| replace:: :ref:`RPChandler like object ` 10 | 11 | .. |ExecutionEngine| replace:: :class:`~fugue.execution.execution_engine.ExecutionEngine` 12 | .. |NativeExecutionEngine| replace:: :class:`~fugue.execution.native_execution_engine.NativeExecutionEngine` 13 | .. |FugueWorkflow| replace:: :class:`~fugue.workflow.workflow.FugueWorkflow` 14 | 15 | .. |ReadJoin| replace:: Read Join tutorials on :ref:`workflow ` and :ref:`engine ` for details 16 | .. |FugueConfig| replace:: :doc:`the Fugue Configuration Tutorial ` 17 | .. |PartitionTutorial| replace:: :doc:`the Partition Tutorial ` 18 | .. |FugueSQLTutorial| replace:: :doc:`the Fugue SQL Tutorial ` 19 | .. |DataFrameTutorial| replace:: :ref:`the DataFrame Tutorial ` 20 | .. |ExecutionEngineTutorial| replace:: :doc:`the ExecutionEngine Tutorial ` 21 | .. |ZipComap| replace:: :ref:`Zip & Comap ` 22 | .. |LoadSave| replace:: :ref:`Load & Save ` 23 | .. |AutoPersist| replace:: :ref:`Auto Persist ` 24 | .. |TransformerTutorial| replace:: :doc:`the Transformer Tutorial ` 25 | .. |CoTransformer| replace:: :ref:`CoTransformer ` 26 | .. |CoTransformerTutorial| replace:: :doc:`the CoTransformer Tutorial ` 27 | .. |FugueDataTypes| replace:: :doc:`Fugue Data Types ` 28 | 29 | 30 | fugue.extensions.outputter.convert 31 | ---------------------------------- 32 | 33 | .. automodule:: fugue.extensions.outputter.convert 34 | :members: 35 | :undoc-members: 36 | :show-inheritance: 37 | 38 | fugue.extensions.outputter.outputter 39 | ------------------------------------ 40 | 41 | .. automodule:: fugue.extensions.outputter.outputter 42 | :members: 43 | :undoc-members: 44 | :show-inheritance: 45 | 46 | -------------------------------------------------------------------------------- /docs/api/fugue.extensions.processor.rst: -------------------------------------------------------------------------------- 1 | fugue.extensions.processor 2 | =========================== 3 | 4 | .. |SchemaLikeObject| replace:: :ref:`Schema like object ` 5 | .. |ParamsLikeObject| replace:: :ref:`Parameters like object ` 6 | .. |DataFrameLikeObject| replace:: :ref:`DataFrame like object ` 7 | .. |DataFramesLikeObject| replace:: :ref:`DataFrames like object ` 8 | .. |PartitionLikeObject| replace:: :ref:`Partition like object ` 9 | .. |RPCHandlerLikeObject| replace:: :ref:`RPChandler like object ` 10 | 11 | .. |ExecutionEngine| replace:: :class:`~fugue.execution.execution_engine.ExecutionEngine` 12 | .. |NativeExecutionEngine| replace:: :class:`~fugue.execution.native_execution_engine.NativeExecutionEngine` 13 | .. |FugueWorkflow| replace:: :class:`~fugue.workflow.workflow.FugueWorkflow` 14 | 15 | .. |ReadJoin| replace:: Read Join tutorials on :ref:`workflow ` and :ref:`engine ` for details 16 | .. |FugueConfig| replace:: :doc:`the Fugue Configuration Tutorial ` 17 | .. |PartitionTutorial| replace:: :doc:`the Partition Tutorial ` 18 | .. |FugueSQLTutorial| replace:: :doc:`the Fugue SQL Tutorial ` 19 | .. |DataFrameTutorial| replace:: :ref:`the DataFrame Tutorial ` 20 | .. |ExecutionEngineTutorial| replace:: :doc:`the ExecutionEngine Tutorial ` 21 | .. |ZipComap| replace:: :ref:`Zip & Comap ` 22 | .. |LoadSave| replace:: :ref:`Load & Save ` 23 | .. |AutoPersist| replace:: :ref:`Auto Persist ` 24 | .. |TransformerTutorial| replace:: :doc:`the Transformer Tutorial ` 25 | .. |CoTransformer| replace:: :ref:`CoTransformer ` 26 | .. |CoTransformerTutorial| replace:: :doc:`the CoTransformer Tutorial ` 27 | .. |FugueDataTypes| replace:: :doc:`Fugue Data Types ` 28 | 29 | 30 | fugue.extensions.processor.convert 31 | ---------------------------------- 32 | 33 | .. automodule:: fugue.extensions.processor.convert 34 | :members: 35 | :undoc-members: 36 | :show-inheritance: 37 | 38 | fugue.extensions.processor.processor 39 | ------------------------------------ 40 | 41 | .. automodule:: fugue.extensions.processor.processor 42 | :members: 43 | :undoc-members: 44 | :show-inheritance: 45 | 46 | -------------------------------------------------------------------------------- /docs/api/fugue.extensions.rst: -------------------------------------------------------------------------------- 1 | fugue.extensions 2 | ================= 3 | 4 | .. toctree:: 5 | :maxdepth: 4 6 | 7 | fugue.extensions.creator 8 | fugue.extensions.outputter 9 | fugue.extensions.processor 10 | fugue.extensions.transformer 11 | 12 | .. |SchemaLikeObject| replace:: :ref:`Schema like object ` 13 | .. |ParamsLikeObject| replace:: :ref:`Parameters like object ` 14 | .. |DataFrameLikeObject| replace:: :ref:`DataFrame like object ` 15 | .. |DataFramesLikeObject| replace:: :ref:`DataFrames like object ` 16 | .. |PartitionLikeObject| replace:: :ref:`Partition like object ` 17 | .. |RPCHandlerLikeObject| replace:: :ref:`RPChandler like object ` 18 | 19 | .. |ExecutionEngine| replace:: :class:`~fugue.execution.execution_engine.ExecutionEngine` 20 | .. |NativeExecutionEngine| replace:: :class:`~fugue.execution.native_execution_engine.NativeExecutionEngine` 21 | .. |FugueWorkflow| replace:: :class:`~fugue.workflow.workflow.FugueWorkflow` 22 | 23 | .. |ReadJoin| replace:: Read Join tutorials on :ref:`workflow ` and :ref:`engine ` for details 24 | .. |FugueConfig| replace:: :doc:`the Fugue Configuration Tutorial ` 25 | .. |PartitionTutorial| replace:: :doc:`the Partition Tutorial ` 26 | .. |FugueSQLTutorial| replace:: :doc:`the Fugue SQL Tutorial ` 27 | .. |DataFrameTutorial| replace:: :ref:`the DataFrame Tutorial ` 28 | .. |ExecutionEngineTutorial| replace:: :doc:`the ExecutionEngine Tutorial ` 29 | .. |ZipComap| replace:: :ref:`Zip & Comap ` 30 | .. |LoadSave| replace:: :ref:`Load & Save ` 31 | .. |AutoPersist| replace:: :ref:`Auto Persist ` 32 | .. |TransformerTutorial| replace:: :doc:`the Transformer Tutorial ` 33 | .. |CoTransformer| replace:: :ref:`CoTransformer ` 34 | .. |CoTransformerTutorial| replace:: :doc:`the CoTransformer Tutorial ` 35 | .. |FugueDataTypes| replace:: :doc:`Fugue Data Types ` 36 | 37 | 38 | fugue.extensions.context 39 | ------------------------ 40 | 41 | .. automodule:: fugue.extensions.context 42 | :members: 43 | :undoc-members: 44 | :show-inheritance: 45 | 46 | -------------------------------------------------------------------------------- /docs/api/fugue.rpc.rst: -------------------------------------------------------------------------------- 1 | fugue.rpc 2 | ========== 3 | 4 | .. |SchemaLikeObject| replace:: :ref:`Schema like object ` 5 | .. |ParamsLikeObject| replace:: :ref:`Parameters like object ` 6 | .. |DataFrameLikeObject| replace:: :ref:`DataFrame like object ` 7 | .. |DataFramesLikeObject| replace:: :ref:`DataFrames like object ` 8 | .. |PartitionLikeObject| replace:: :ref:`Partition like object ` 9 | .. |RPCHandlerLikeObject| replace:: :ref:`RPChandler like object ` 10 | 11 | .. |ExecutionEngine| replace:: :class:`~fugue.execution.execution_engine.ExecutionEngine` 12 | .. |NativeExecutionEngine| replace:: :class:`~fugue.execution.native_execution_engine.NativeExecutionEngine` 13 | .. |FugueWorkflow| replace:: :class:`~fugue.workflow.workflow.FugueWorkflow` 14 | 15 | .. |ReadJoin| replace:: Read Join tutorials on :ref:`workflow ` and :ref:`engine ` for details 16 | .. |FugueConfig| replace:: :doc:`the Fugue Configuration Tutorial ` 17 | .. |PartitionTutorial| replace:: :doc:`the Partition Tutorial ` 18 | .. |FugueSQLTutorial| replace:: :doc:`the Fugue SQL Tutorial ` 19 | .. |DataFrameTutorial| replace:: :ref:`the DataFrame Tutorial ` 20 | .. |ExecutionEngineTutorial| replace:: :doc:`the ExecutionEngine Tutorial ` 21 | .. |ZipComap| replace:: :ref:`Zip & Comap ` 22 | .. |LoadSave| replace:: :ref:`Load & Save ` 23 | .. |AutoPersist| replace:: :ref:`Auto Persist ` 24 | .. |TransformerTutorial| replace:: :doc:`the Transformer Tutorial ` 25 | .. |CoTransformer| replace:: :ref:`CoTransformer ` 26 | .. |CoTransformerTutorial| replace:: :doc:`the CoTransformer Tutorial ` 27 | .. |FugueDataTypes| replace:: :doc:`Fugue Data Types ` 28 | 29 | 30 | fugue.rpc.base 31 | -------------- 32 | 33 | .. automodule:: fugue.rpc.base 34 | :members: 35 | :undoc-members: 36 | :show-inheritance: 37 | 38 | fugue.rpc.flask 39 | --------------- 40 | 41 | .. automodule:: fugue.rpc.flask 42 | :members: 43 | :undoc-members: 44 | :show-inheritance: 45 | 46 | -------------------------------------------------------------------------------- /docs/api/fugue.sql.rst: -------------------------------------------------------------------------------- 1 | fugue.sql 2 | ========== 3 | 4 | .. |SchemaLikeObject| replace:: :ref:`Schema like object ` 5 | .. |ParamsLikeObject| replace:: :ref:`Parameters like object ` 6 | .. |DataFrameLikeObject| replace:: :ref:`DataFrame like object ` 7 | .. |DataFramesLikeObject| replace:: :ref:`DataFrames like object ` 8 | .. |PartitionLikeObject| replace:: :ref:`Partition like object ` 9 | .. |RPCHandlerLikeObject| replace:: :ref:`RPChandler like object ` 10 | 11 | .. |ExecutionEngine| replace:: :class:`~fugue.execution.execution_engine.ExecutionEngine` 12 | .. |NativeExecutionEngine| replace:: :class:`~fugue.execution.native_execution_engine.NativeExecutionEngine` 13 | .. |FugueWorkflow| replace:: :class:`~fugue.workflow.workflow.FugueWorkflow` 14 | 15 | .. |ReadJoin| replace:: Read Join tutorials on :ref:`workflow ` and :ref:`engine ` for details 16 | .. |FugueConfig| replace:: :doc:`the Fugue Configuration Tutorial ` 17 | .. |PartitionTutorial| replace:: :doc:`the Partition Tutorial ` 18 | .. |FugueSQLTutorial| replace:: :doc:`the Fugue SQL Tutorial ` 19 | .. |DataFrameTutorial| replace:: :ref:`the DataFrame Tutorial ` 20 | .. |ExecutionEngineTutorial| replace:: :doc:`the ExecutionEngine Tutorial ` 21 | .. |ZipComap| replace:: :ref:`Zip & Comap ` 22 | .. |LoadSave| replace:: :ref:`Load & Save ` 23 | .. |AutoPersist| replace:: :ref:`Auto Persist ` 24 | .. |TransformerTutorial| replace:: :doc:`the Transformer Tutorial ` 25 | .. |CoTransformer| replace:: :ref:`CoTransformer ` 26 | .. |CoTransformerTutorial| replace:: :doc:`the CoTransformer Tutorial ` 27 | .. |FugueDataTypes| replace:: :doc:`Fugue Data Types ` 28 | 29 | 30 | fugue.sql.api 31 | ------------- 32 | 33 | .. automodule:: fugue.sql.api 34 | :members: 35 | :undoc-members: 36 | :show-inheritance: 37 | 38 | fugue.sql.workflow 39 | ------------------ 40 | 41 | .. automodule:: fugue.sql.workflow 42 | :members: 43 | :undoc-members: 44 | :show-inheritance: 45 | 46 | -------------------------------------------------------------------------------- /docs/api/fugue.workflow.rst: -------------------------------------------------------------------------------- 1 | fugue.workflow 2 | =============== 3 | 4 | .. |SchemaLikeObject| replace:: :ref:`Schema like object ` 5 | .. |ParamsLikeObject| replace:: :ref:`Parameters like object ` 6 | .. |DataFrameLikeObject| replace:: :ref:`DataFrame like object ` 7 | .. |DataFramesLikeObject| replace:: :ref:`DataFrames like object ` 8 | .. |PartitionLikeObject| replace:: :ref:`Partition like object ` 9 | .. |RPCHandlerLikeObject| replace:: :ref:`RPChandler like object ` 10 | 11 | .. |ExecutionEngine| replace:: :class:`~fugue.execution.execution_engine.ExecutionEngine` 12 | .. |NativeExecutionEngine| replace:: :class:`~fugue.execution.native_execution_engine.NativeExecutionEngine` 13 | .. |FugueWorkflow| replace:: :class:`~fugue.workflow.workflow.FugueWorkflow` 14 | 15 | .. |ReadJoin| replace:: Read Join tutorials on :ref:`workflow ` and :ref:`engine ` for details 16 | .. |FugueConfig| replace:: :doc:`the Fugue Configuration Tutorial ` 17 | .. |PartitionTutorial| replace:: :doc:`the Partition Tutorial ` 18 | .. |FugueSQLTutorial| replace:: :doc:`the Fugue SQL Tutorial ` 19 | .. |DataFrameTutorial| replace:: :ref:`the DataFrame Tutorial ` 20 | .. |ExecutionEngineTutorial| replace:: :doc:`the ExecutionEngine Tutorial ` 21 | .. |ZipComap| replace:: :ref:`Zip & Comap ` 22 | .. |LoadSave| replace:: :ref:`Load & Save ` 23 | .. |AutoPersist| replace:: :ref:`Auto Persist ` 24 | .. |TransformerTutorial| replace:: :doc:`the Transformer Tutorial ` 25 | .. |CoTransformer| replace:: :ref:`CoTransformer ` 26 | .. |CoTransformerTutorial| replace:: :doc:`the CoTransformer Tutorial ` 27 | .. |FugueDataTypes| replace:: :doc:`Fugue Data Types ` 28 | 29 | 30 | fugue.workflow.api 31 | ------------------ 32 | 33 | .. automodule:: fugue.workflow.api 34 | :members: 35 | :undoc-members: 36 | :show-inheritance: 37 | 38 | fugue.workflow.input 39 | -------------------- 40 | 41 | .. automodule:: fugue.workflow.input 42 | :members: 43 | :undoc-members: 44 | :show-inheritance: 45 | 46 | fugue.workflow.module 47 | --------------------- 48 | 49 | .. automodule:: fugue.workflow.module 50 | :members: 51 | :undoc-members: 52 | :show-inheritance: 53 | 54 | fugue.workflow.workflow 55 | ----------------------- 56 | 57 | .. automodule:: fugue.workflow.workflow 58 | :members: 59 | :undoc-members: 60 | :show-inheritance: 61 | 62 | -------------------------------------------------------------------------------- /docs/api_ibis/fugue_ibis.execution.rst: -------------------------------------------------------------------------------- 1 | fugue\_ibis.execution 2 | ====================== 3 | 4 | .. |SchemaLikeObject| replace:: :ref:`Schema like object ` 5 | .. |ParamsLikeObject| replace:: :ref:`Parameters like object ` 6 | .. |DataFrameLikeObject| replace:: :ref:`DataFrame like object ` 7 | .. |DataFramesLikeObject| replace:: :ref:`DataFrames like object ` 8 | .. |PartitionLikeObject| replace:: :ref:`Partition like object ` 9 | .. |RPCHandlerLikeObject| replace:: :ref:`RPChandler like object ` 10 | 11 | .. |ExecutionEngine| replace:: :class:`~fugue.execution.execution_engine.ExecutionEngine` 12 | .. |NativeExecutionEngine| replace:: :class:`~fugue.execution.native_execution_engine.NativeExecutionEngine` 13 | .. |FugueWorkflow| replace:: :class:`~fugue.workflow.workflow.FugueWorkflow` 14 | 15 | .. |ReadJoin| replace:: Read Join tutorials on :ref:`workflow ` and :ref:`engine ` for details 16 | .. |FugueConfig| replace:: :doc:`the Fugue Configuration Tutorial ` 17 | .. |PartitionTutorial| replace:: :doc:`the Partition Tutorial ` 18 | .. |FugueSQLTutorial| replace:: :doc:`the Fugue SQL Tutorial ` 19 | .. |DataFrameTutorial| replace:: :ref:`the DataFrame Tutorial ` 20 | .. |ExecutionEngineTutorial| replace:: :doc:`the ExecutionEngine Tutorial ` 21 | .. |ZipComap| replace:: :ref:`Zip & Comap ` 22 | .. |LoadSave| replace:: :ref:`Load & Save ` 23 | .. |AutoPersist| replace:: :ref:`Auto Persist ` 24 | .. |TransformerTutorial| replace:: :doc:`the Transformer Tutorial ` 25 | .. |CoTransformer| replace:: :ref:`CoTransformer ` 26 | .. |CoTransformerTutorial| replace:: :doc:`the CoTransformer Tutorial ` 27 | .. |FugueDataTypes| replace:: :doc:`Fugue Data Types ` 28 | 29 | 30 | fugue\_ibis.execution.ibis\_engine 31 | ---------------------------------- 32 | 33 | .. automodule:: fugue_ibis.execution.ibis_engine 34 | :members: 35 | :undoc-members: 36 | :show-inheritance: 37 | 38 | fugue\_ibis.execution.pandas\_backend 39 | ------------------------------------- 40 | 41 | .. automodule:: fugue_ibis.execution.pandas_backend 42 | :members: 43 | :undoc-members: 44 | :show-inheritance: 45 | 46 | -------------------------------------------------------------------------------- /docs/api_ibis/fugue_ibis.rst: -------------------------------------------------------------------------------- 1 | fugue\_ibis 2 | ============ 3 | 4 | .. toctree:: 5 | :maxdepth: 4 6 | 7 | fugue_ibis.execution 8 | 9 | .. |SchemaLikeObject| replace:: :ref:`Schema like object ` 10 | .. |ParamsLikeObject| replace:: :ref:`Parameters like object ` 11 | .. |DataFrameLikeObject| replace:: :ref:`DataFrame like object ` 12 | .. |DataFramesLikeObject| replace:: :ref:`DataFrames like object ` 13 | .. |PartitionLikeObject| replace:: :ref:`Partition like object ` 14 | .. |RPCHandlerLikeObject| replace:: :ref:`RPChandler like object ` 15 | 16 | .. |ExecutionEngine| replace:: :class:`~fugue.execution.execution_engine.ExecutionEngine` 17 | .. |NativeExecutionEngine| replace:: :class:`~fugue.execution.native_execution_engine.NativeExecutionEngine` 18 | .. |FugueWorkflow| replace:: :class:`~fugue.workflow.workflow.FugueWorkflow` 19 | 20 | .. |ReadJoin| replace:: Read Join tutorials on :ref:`workflow ` and :ref:`engine ` for details 21 | .. |FugueConfig| replace:: :doc:`the Fugue Configuration Tutorial ` 22 | .. |PartitionTutorial| replace:: :doc:`the Partition Tutorial ` 23 | .. |FugueSQLTutorial| replace:: :doc:`the Fugue SQL Tutorial ` 24 | .. |DataFrameTutorial| replace:: :ref:`the DataFrame Tutorial ` 25 | .. |ExecutionEngineTutorial| replace:: :doc:`the ExecutionEngine Tutorial ` 26 | .. |ZipComap| replace:: :ref:`Zip & Comap ` 27 | .. |LoadSave| replace:: :ref:`Load & Save ` 28 | .. |AutoPersist| replace:: :ref:`Auto Persist ` 29 | .. |TransformerTutorial| replace:: :doc:`the Transformer Tutorial ` 30 | .. |CoTransformer| replace:: :ref:`CoTransformer ` 31 | .. |CoTransformerTutorial| replace:: :doc:`the CoTransformer Tutorial ` 32 | .. |FugueDataTypes| replace:: :doc:`Fugue Data Types ` 33 | 34 | 35 | fugue\_ibis.dataframe 36 | --------------------- 37 | 38 | .. automodule:: fugue_ibis.dataframe 39 | :members: 40 | :undoc-members: 41 | :show-inheritance: 42 | 43 | fugue\_ibis.execution\_engine 44 | ----------------------------- 45 | 46 | .. automodule:: fugue_ibis.execution_engine 47 | :members: 48 | :undoc-members: 49 | :show-inheritance: 50 | 51 | fugue\_ibis.extensions 52 | ---------------------- 53 | 54 | .. automodule:: fugue_ibis.extensions 55 | :members: 56 | :undoc-members: 57 | :show-inheritance: 58 | 59 | -------------------------------------------------------------------------------- /docs/api_ray/fugue_ray.rst: -------------------------------------------------------------------------------- 1 | fugue\_ray 2 | =========== 3 | 4 | .. |SchemaLikeObject| replace:: :ref:`Schema like object ` 5 | .. |ParamsLikeObject| replace:: :ref:`Parameters like object ` 6 | .. |DataFrameLikeObject| replace:: :ref:`DataFrame like object ` 7 | .. |DataFramesLikeObject| replace:: :ref:`DataFrames like object ` 8 | .. |PartitionLikeObject| replace:: :ref:`Partition like object ` 9 | .. |RPCHandlerLikeObject| replace:: :ref:`RPChandler like object ` 10 | 11 | .. |ExecutionEngine| replace:: :class:`~fugue.execution.execution_engine.ExecutionEngine` 12 | .. |NativeExecutionEngine| replace:: :class:`~fugue.execution.native_execution_engine.NativeExecutionEngine` 13 | .. |FugueWorkflow| replace:: :class:`~fugue.workflow.workflow.FugueWorkflow` 14 | 15 | .. |ReadJoin| replace:: Read Join tutorials on :ref:`workflow ` and :ref:`engine ` for details 16 | .. |FugueConfig| replace:: :doc:`the Fugue Configuration Tutorial ` 17 | .. |PartitionTutorial| replace:: :doc:`the Partition Tutorial ` 18 | .. |FugueSQLTutorial| replace:: :doc:`the Fugue SQL Tutorial ` 19 | .. |DataFrameTutorial| replace:: :ref:`the DataFrame Tutorial ` 20 | .. |ExecutionEngineTutorial| replace:: :doc:`the ExecutionEngine Tutorial ` 21 | .. |ZipComap| replace:: :ref:`Zip & Comap ` 22 | .. |LoadSave| replace:: :ref:`Load & Save ` 23 | .. |AutoPersist| replace:: :ref:`Auto Persist ` 24 | .. |TransformerTutorial| replace:: :doc:`the Transformer Tutorial ` 25 | .. |CoTransformer| replace:: :ref:`CoTransformer ` 26 | .. |CoTransformerTutorial| replace:: :doc:`the CoTransformer Tutorial ` 27 | .. |FugueDataTypes| replace:: :doc:`Fugue Data Types ` 28 | 29 | 30 | fugue\_ray.dataframe 31 | -------------------- 32 | 33 | .. automodule:: fugue_ray.dataframe 34 | :members: 35 | :undoc-members: 36 | :show-inheritance: 37 | 38 | fugue\_ray.execution\_engine 39 | ---------------------------- 40 | 41 | .. automodule:: fugue_ray.execution_engine 42 | :members: 43 | :undoc-members: 44 | :show-inheritance: 45 | 46 | fugue\_ray.registry 47 | ------------------- 48 | 49 | .. automodule:: fugue_ray.registry 50 | :members: 51 | :undoc-members: 52 | :show-inheritance: 53 | 54 | -------------------------------------------------------------------------------- /docs/api_sql/fugue_sql.rst: -------------------------------------------------------------------------------- 1 | fugue\_sql 2 | =========== 3 | 4 | .. |SchemaLikeObject| replace:: :ref:`Schema like object ` 5 | .. |ParamsLikeObject| replace:: :ref:`Parameters like object ` 6 | .. |DataFrameLikeObject| replace:: :ref:`DataFrame like object ` 7 | .. |DataFramesLikeObject| replace:: :ref:`DataFrames like object ` 8 | .. |PartitionLikeObject| replace:: :ref:`Partition like object ` 9 | .. |RPCHandlerLikeObject| replace:: :ref:`RPChandler like object ` 10 | 11 | .. |ExecutionEngine| replace:: :class:`~fugue.execution.execution_engine.ExecutionEngine` 12 | .. |NativeExecutionEngine| replace:: :class:`~fugue.execution.native_execution_engine.NativeExecutionEngine` 13 | .. |FugueWorkflow| replace:: :class:`~fugue.workflow.workflow.FugueWorkflow` 14 | 15 | .. |ReadJoin| replace:: Read Join tutorials on :ref:`workflow ` and :ref:`engine ` for details 16 | .. |FugueConfig| replace:: :doc:`the Fugue Configuration Tutorial ` 17 | .. |PartitionTutorial| replace:: :doc:`the Partition Tutorial ` 18 | .. |FugueSQLTutorial| replace:: :doc:`the Fugue SQL Tutorial ` 19 | .. |DataFrameTutorial| replace:: :ref:`the DataFrame Tutorial ` 20 | .. |ExecutionEngineTutorial| replace:: :doc:`the ExecutionEngine Tutorial ` 21 | .. |ZipComap| replace:: :ref:`Zip & Comap ` 22 | .. |LoadSave| replace:: :ref:`Load & Save ` 23 | .. |AutoPersist| replace:: :ref:`Auto Persist ` 24 | .. |TransformerTutorial| replace:: :doc:`the Transformer Tutorial ` 25 | .. |CoTransformer| replace:: :ref:`CoTransformer ` 26 | .. |CoTransformerTutorial| replace:: :doc:`the CoTransformer Tutorial ` 27 | .. |FugueDataTypes| replace:: :doc:`Fugue Data Types ` 28 | 29 | 30 | fugue\_sql.exceptions 31 | --------------------- 32 | 33 | .. automodule:: fugue_sql.exceptions 34 | :members: 35 | :undoc-members: 36 | :show-inheritance: 37 | 38 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | .. Fugue documentation master file, created by 2 | sphinx-quickstart on Sun May 17 21:49:44 2020. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | Fugue API Docs 7 | ============== 8 | 9 | Fugue is a unified interface for distributed computing that lets users execute Python, 10 | pandas, and SQL code on Spark, Dask, and Ray with minimal rewrites. 11 | 12 | This documentation page is mainly an API reference. To learn more about Fugue, the 13 | `Github repo README `_ and the 14 | `tutorials `_ will be the best places to start. 15 | The API reference is mainly for users looking for specific functions and methods. 16 | 17 | Installation 18 | ------------ 19 | 20 | Fugue is available on both pip and conda. `Detailed instructions `_ 21 | can be found on the README. 22 | 23 | Community 24 | --------- 25 | 26 | Please join the `Fugue Slack `_ 27 | to ask questions. We will try to reply as soon as possible. 28 | 29 | For contributing, start with the `contributing guide `_ 30 | 31 | 32 | .. toctree:: 33 | :maxdepth: 3 34 | :hidden: 35 | 36 | tutorials 37 | top_api 38 | api 39 | 40 | -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=source 11 | set BUILDDIR=build 12 | 13 | if "%1" == "" goto help 14 | 15 | %SPHINXBUILD% >NUL 2>NUL 16 | if errorlevel 9009 ( 17 | echo. 18 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 19 | echo.installed, then set the SPHINXBUILD environment variable to point 20 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 21 | echo.may add the Sphinx directory to PATH. 22 | echo. 23 | echo.If you don't have Sphinx installed, grab it from 24 | echo.http://sphinx-doc.org/ 25 | exit /b 1 26 | ) 27 | 28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 29 | goto end 30 | 31 | :help 32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 33 | 34 | :end 35 | popd 36 | -------------------------------------------------------------------------------- /docs/tutorials.rst: -------------------------------------------------------------------------------- 1 | 2 | Fugue Tutorials 3 | ================ 4 | 5 | To directly read the tutorials without running them: 6 | 7 | .. toctree:: 8 | 9 | Tutorial Homepage 10 | For Beginners 11 | For Advanced Users 12 | For Fugue-SQL 13 | 14 | 15 | 16 | You may launch a 17 | `Fugue tutorial notebook environemnt on binder `_ 18 | 19 | **But it runs slow on binder**, the machine on binder isn't powerful enough for 20 | a distributed framework such as Spark. Parallel executions can become sequential, so some of the 21 | performance comparison examples will not give you the correct numbers. 22 | 23 | Alternatively, you should get decent performance if running its docker image on your own machine: 24 | 25 | .. code-block:: bash 26 | 27 | docker run -p 8888:8888 fugueproject/tutorials:latest 28 | 29 | 30 | 31 | 32 | 33 | -------------------------------------------------------------------------------- /fugue/__init__.py: -------------------------------------------------------------------------------- 1 | # flake8: noqa 2 | from triad.collections import Schema 3 | 4 | from fugue.api import out_transform, transform 5 | from fugue.bag.array_bag import ArrayBag 6 | from fugue.bag.bag import Bag, BagDisplay 7 | from fugue.collections.partition import PartitionCursor, PartitionSpec 8 | from fugue.collections.sql import StructuredRawSQL, TempTableName 9 | from fugue.collections.yielded import PhysicalYielded, Yielded 10 | from fugue.constants import register_global_conf 11 | from fugue.dataframe.array_dataframe import ArrayDataFrame 12 | from fugue.dataframe.arrow_dataframe import ArrowDataFrame 13 | from fugue.dataframe.dataframe import ( 14 | AnyDataFrame, 15 | DataFrame, 16 | DataFrameDisplay, 17 | LocalBoundedDataFrame, 18 | LocalDataFrame, 19 | ) 20 | from fugue.dataframe.dataframe_iterable_dataframe import ( 21 | IterableArrowDataFrame, 22 | IterablePandasDataFrame, 23 | LocalDataFrameIterableDataFrame, 24 | ) 25 | from fugue.dataframe.dataframes import DataFrames 26 | from fugue.dataframe.iterable_dataframe import IterableDataFrame 27 | from fugue.dataframe.pandas_dataframe import PandasDataFrame 28 | from fugue.dataset import ( 29 | AnyDataset, 30 | Dataset, 31 | DatasetDisplay, 32 | as_fugue_dataset, 33 | get_dataset_display, 34 | ) 35 | from fugue.execution.execution_engine import ( 36 | AnyExecutionEngine, 37 | EngineFacet, 38 | ExecutionEngine, 39 | MapEngine, 40 | SQLEngine, 41 | ) 42 | from fugue.execution.factory import ( 43 | is_pandas_or, 44 | make_execution_engine, 45 | make_sql_engine, 46 | register_default_execution_engine, 47 | register_default_sql_engine, 48 | register_execution_engine, 49 | register_sql_engine, 50 | ) 51 | from fugue.execution.native_execution_engine import ( 52 | NativeExecutionEngine, 53 | PandasMapEngine, 54 | QPDPandasEngine, 55 | ) 56 | from fugue.extensions.creator import Creator, creator, register_creator 57 | from fugue.extensions.outputter import Outputter, outputter, register_outputter 58 | from fugue.extensions.processor import Processor, processor, register_processor 59 | from fugue.extensions.transformer import ( 60 | CoTransformer, 61 | OutputCoTransformer, 62 | OutputTransformer, 63 | Transformer, 64 | cotransformer, 65 | output_cotransformer, 66 | output_transformer, 67 | register_output_transformer, 68 | register_transformer, 69 | transformer, 70 | ) 71 | from fugue.registry import _register 72 | from fugue.rpc import ( 73 | EmptyRPCHandler, 74 | RPCClient, 75 | RPCFunc, 76 | RPCHandler, 77 | RPCServer, 78 | make_rpc_server, 79 | to_rpc_handler, 80 | ) 81 | from fugue.sql.api import fugue_sql_flow as fsql 82 | from fugue.sql.workflow import FugueSQLWorkflow 83 | from fugue.workflow._workflow_context import FugueWorkflowContext 84 | from fugue.workflow.module import module 85 | from fugue.workflow.workflow import FugueWorkflow, WorkflowDataFrame, WorkflowDataFrames 86 | from fugue_version import __version__ 87 | 88 | from .dev import * 89 | 90 | _register() 91 | -------------------------------------------------------------------------------- /fugue/_utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fugue-project/fugue/596d28eea1834ecc9087b864b83e1b67b9748977/fugue/_utils/__init__.py -------------------------------------------------------------------------------- /fugue/_utils/exception.py: -------------------------------------------------------------------------------- 1 | from types import FrameType, TracebackType 2 | from typing import Callable, List, Optional 3 | 4 | _MODIFIED_EXCEPTION_VAR_NAME = "__modified_exception__" 5 | 6 | 7 | def frames_to_traceback( 8 | frame: Optional[FrameType], 9 | limit: int, 10 | should_prune: Optional[Callable[[str], bool]] = None, 11 | ) -> Optional[TracebackType]: 12 | ctb: Optional[TracebackType] = None 13 | skipped = False 14 | while frame is not None and limit > 0: 15 | if _MODIFIED_EXCEPTION_VAR_NAME in frame.f_locals: 16 | return TracebackType( 17 | tb_next=None, 18 | tb_frame=frame, 19 | tb_lasti=frame.f_lasti, 20 | tb_lineno=frame.f_lineno, 21 | ) 22 | if not skipped: 23 | if should_prune is not None and should_prune(frame.f_globals["__name__"]): 24 | frame = frame.f_back 25 | continue 26 | skipped = True 27 | if should_prune is None or not should_prune(frame.f_globals["__name__"]): 28 | ctb = TracebackType( 29 | tb_next=ctb, 30 | tb_frame=frame, 31 | tb_lasti=frame.f_lasti, 32 | tb_lineno=frame.f_lineno, 33 | ) 34 | limit -= 1 35 | frame = frame.f_back 36 | continue 37 | break # pragma: no cover 38 | 39 | return ctb 40 | 41 | 42 | def modify_traceback( 43 | traceback: Optional[TracebackType], 44 | should_prune: Optional[Callable[[str], bool]] = None, 45 | add_traceback: Optional[TracebackType] = None, 46 | ) -> Optional[TracebackType]: 47 | ctb: Optional[TracebackType] = None 48 | 49 | # get stack 50 | stack: List[TracebackType] = [] 51 | 52 | if add_traceback is not None: 53 | f: Optional[TracebackType] = add_traceback 54 | while f is not None: 55 | stack.append(f) 56 | f = f.tb_next 57 | f = traceback 58 | while f is not None: 59 | stack.append(f) 60 | f = f.tb_next 61 | stack.reverse() 62 | 63 | # prune and reconstruct 64 | for n, f in enumerate(stack): 65 | if ( 66 | n == 0 67 | or should_prune is None 68 | or not should_prune(f.tb_frame.f_globals["__name__"]) 69 | ): 70 | ctb = TracebackType( 71 | tb_next=ctb, 72 | tb_frame=f.tb_frame, 73 | tb_lasti=f.tb_lasti, 74 | tb_lineno=f.tb_lineno, 75 | ) 76 | 77 | return ctb 78 | -------------------------------------------------------------------------------- /fugue/_utils/interfaceless.py: -------------------------------------------------------------------------------- 1 | import inspect 2 | from typing import Callable, Optional 3 | 4 | from triad.utils.assertion import assert_or_throw 5 | 6 | _COMMENT_SCHEMA_ANNOTATION = "schema" 7 | 8 | 9 | def parse_comment_annotation(func: Callable, annotation: str) -> Optional[str]: 10 | """Parse comment annotation above the function. It try to find 11 | comment lines starts with the annotation from bottom up, and will use the first 12 | occurrance as the result. 13 | 14 | :param func: the function 15 | :param annotation: the annotation string 16 | :return: schema hint string 17 | 18 | .. admonition:: Examples 19 | 20 | .. code-block:: python 21 | 22 | # schema: a:int,b:str 23 | #schema:a:int,b:int # more comment 24 | # some comment 25 | def dummy(): 26 | pass 27 | 28 | assert "a:int,b:int" == parse_comment_annotation(dummy, "schema:") 29 | """ 30 | for orig in reversed((inspect.getcomments(func) or "").splitlines()): 31 | start = orig.find(":") 32 | if start <= 0: 33 | continue 34 | actual = orig[:start].replace("#", "", 1).strip() 35 | if actual != annotation: 36 | continue 37 | end = orig.find("#", start) 38 | s = orig[start + 1 : (end if end > 0 else len(orig))].strip() 39 | return s 40 | return None 41 | 42 | 43 | def parse_output_schema_from_comment(func: Callable) -> Optional[str]: 44 | """Parse schema hint from the comments above the function. It try to find 45 | comment lines starts with `schema:` from bottom up, and will use the first 46 | occurrance as the hint. 47 | 48 | :param func: the function 49 | :return: schema hint string 50 | 51 | .. admonition:: Examples 52 | 53 | .. code-block:: python 54 | 55 | # schema: a:int,b:str 56 | #schema:a:int,b:int # more comment 57 | # some comment 58 | def dummy(): 59 | pass 60 | 61 | assert "a:int,b:int" == parse_output_schema_from_comment(dummy) 62 | """ 63 | res = parse_comment_annotation(func, _COMMENT_SCHEMA_ANNOTATION) 64 | if res is None: 65 | return None 66 | assert_or_throw(res != "", SyntaxError("incorrect schema annotation")) 67 | return res.strip() 68 | 69 | 70 | def is_class_method(func: Callable) -> bool: 71 | sig = inspect.signature(func) 72 | # TODO: this is not the best way 73 | return "self" in sig.parameters 74 | -------------------------------------------------------------------------------- /fugue/_utils/misc.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Type, TypeVar 2 | 3 | from triad.utils.assertion import assert_or_throw 4 | 5 | T = TypeVar("T") 6 | 7 | 8 | def get_attribute(obj: object, attr_name: str, data_type: Type[T]) -> T: 9 | if attr_name not in obj.__dict__ or obj.__dict__[attr_name] is None: 10 | obj.__dict__[attr_name] = data_type() 11 | assert_or_throw( 12 | isinstance(obj.__dict__[attr_name], data_type), 13 | lambda: TypeError(f"{obj.__dict__[attr_name]} is not type {data_type}"), 14 | ) 15 | return obj.__dict__[attr_name] 16 | 17 | 18 | def import_or_throw(package_name: str, message: str) -> Any: 19 | try: 20 | return __import__(package_name) 21 | except Exception as e: # pragma: no cover 22 | raise ImportError(str(e) + ". " + message) 23 | 24 | 25 | def import_fsql_dependency(package_name: str) -> Any: 26 | return import_or_throw( 27 | package_name, "Please try to install the package by `pip install fugue[sql]`." 28 | ) 29 | -------------------------------------------------------------------------------- /fugue/_utils/registry.py: -------------------------------------------------------------------------------- 1 | from typing import Callable 2 | 3 | from triad import conditional_dispatcher 4 | from triad.utils.dispatcher import ConditionalDispatcher 5 | 6 | from ..constants import FUGUE_ENTRYPOINT 7 | 8 | 9 | def fugue_plugin(func: Callable) -> ConditionalDispatcher: 10 | return conditional_dispatcher(entry_point=FUGUE_ENTRYPOINT)(func) # type: ignore 11 | -------------------------------------------------------------------------------- /fugue/api.py: -------------------------------------------------------------------------------- 1 | # flake8: noqa 2 | # pylint: disable-all 3 | from .dataframe.api import ( 4 | alter_columns, 5 | as_array, 6 | as_array_iterable, 7 | as_arrow, 8 | as_dict_iterable, 9 | as_dicts, 10 | as_fugue_df, 11 | as_pandas, 12 | drop_columns, 13 | get_column_names, 14 | get_native_as_df, 15 | get_schema, 16 | head, 17 | is_df, 18 | normalize_column_names, 19 | peek_array, 20 | peek_dict, 21 | rename, 22 | select_columns, 23 | ) 24 | from .dataset.api import ( 25 | as_fugue_dataset, 26 | as_local, 27 | as_local_bounded, 28 | count, 29 | get_num_partitions, 30 | is_bounded, 31 | is_empty, 32 | is_local, 33 | show, 34 | ) 35 | from .execution.api import ( 36 | aggregate, 37 | anti_join, 38 | as_fugue_engine_df, 39 | assign, 40 | broadcast, 41 | clear_global_engine, 42 | cross_join, 43 | distinct, 44 | dropna, 45 | engine_context, 46 | fillna, 47 | filter, 48 | full_outer_join, 49 | get_context_engine, 50 | get_current_conf, 51 | get_current_parallelism, 52 | inner_join, 53 | intersect, 54 | join, 55 | left_outer_join, 56 | load, 57 | persist, 58 | repartition, 59 | right_outer_join, 60 | run_engine_function, 61 | sample, 62 | save, 63 | select, 64 | semi_join, 65 | set_global_engine, 66 | subtract, 67 | take, 68 | union, 69 | ) 70 | from .sql.api import fugue_sql, fugue_sql_flow 71 | from .workflow.api import out_transform, raw_sql, transform 72 | -------------------------------------------------------------------------------- /fugue/bag/__init__.py: -------------------------------------------------------------------------------- 1 | # flake8: noqa 2 | from .bag import Bag, LocalBag 3 | -------------------------------------------------------------------------------- /fugue/bag/array_bag.py: -------------------------------------------------------------------------------- 1 | from types import GeneratorType 2 | from typing import Any, Iterable, List 3 | 4 | from ..exceptions import FugueDatasetEmptyError 5 | from .bag import LocalBoundedBag 6 | 7 | 8 | class ArrayBag(LocalBoundedBag): 9 | def __init__(self, data: Any, copy: bool = True): 10 | if isinstance(data, list): 11 | self._native = list(data) if copy else data 12 | elif isinstance(data, (GeneratorType, Iterable)): 13 | self._native = list(data) 14 | else: 15 | raise ValueError(f"{type(data)} can't be converted to ArrayBag") 16 | super().__init__() 17 | 18 | @property 19 | def native(self) -> List[Any]: 20 | """The underlying Python list object""" 21 | return self._native 22 | 23 | @property 24 | def empty(self) -> bool: 25 | return len(self._native) == 0 26 | 27 | def count(self) -> int: 28 | return len(self._native) 29 | 30 | def peek(self) -> Any: 31 | if self.count() == 0: 32 | raise FugueDatasetEmptyError() 33 | return self._native[0] 34 | 35 | def as_array(self) -> List[Any]: 36 | return list(self._native) 37 | 38 | def head(self, n: int) -> LocalBoundedBag: 39 | return ArrayBag(self._native[:n]) 40 | -------------------------------------------------------------------------------- /fugue/collections/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fugue-project/fugue/596d28eea1834ecc9087b864b83e1b67b9748977/fugue/collections/__init__.py -------------------------------------------------------------------------------- /fugue/collections/yielded.py: -------------------------------------------------------------------------------- 1 | from typing import Any 2 | 3 | from triad import assert_or_throw 4 | from triad.utils.hash import to_uuid 5 | 6 | 7 | class Yielded(object): 8 | """Yields from :class:`~fugue.workflow.workflow.FugueWorkflow`. 9 | Users shouldn't create this object directly. 10 | 11 | :param yid: unique id for determinism 12 | """ 13 | 14 | def __init__(self, yid: str): 15 | self._yid = to_uuid(yid) 16 | 17 | def __uuid__(self) -> str: 18 | """uuid of the instance""" 19 | return self._yid 20 | 21 | @property 22 | def is_set(self) -> bool: # pragma: no cover 23 | """Whether the value is set. It can be false if the parent workflow 24 | has not been executed. 25 | """ 26 | raise NotImplementedError 27 | 28 | def __copy__(self) -> Any: # pragma: no cover 29 | """``copy`` should have no effect""" 30 | return self 31 | 32 | def __deepcopy__(self, memo: Any) -> Any: # pragma: no cover 33 | """``deepcopy`` should have no effect""" 34 | return self 35 | 36 | 37 | class PhysicalYielded(Yielded): 38 | """Physical yielded object from :class:`~fugue.workflow.workflow.FugueWorkflow`. 39 | Users shouldn't create this object directly. 40 | 41 | :param yid: unique id for determinism 42 | :param storage_type: ``file`` or ``table`` 43 | """ 44 | 45 | def __init__(self, yid: str, storage_type: str): 46 | super().__init__(yid) 47 | self._name = "" 48 | assert_or_throw( 49 | storage_type in ["file", "table"], 50 | ValueError(f"{storage_type} not in (file, table) "), 51 | ) 52 | self._storage_type = storage_type 53 | 54 | @property 55 | def is_set(self) -> bool: 56 | return self._name != "" 57 | 58 | def set_value(self, name: str) -> None: 59 | """Set the storage name after compute 60 | 61 | :param name: name reference of the storage 62 | """ 63 | self._name = name 64 | 65 | @property 66 | def name(self) -> str: 67 | """The name reference of the yield""" 68 | assert_or_throw(self.is_set, "value is not set") 69 | return self._name 70 | 71 | @property 72 | def storage_type(self) -> str: 73 | """The storage type of this yield""" 74 | return self._storage_type 75 | -------------------------------------------------------------------------------- /fugue/column/__init__.py: -------------------------------------------------------------------------------- 1 | # flake8: noqa 2 | from fugue.column.expressions import ColumnExpr, all_cols, col, function, lit, null 3 | from fugue.column.functions import is_agg 4 | from fugue.column.sql import SelectColumns, SQLExpressionGenerator 5 | -------------------------------------------------------------------------------- /fugue/dataframe/__init__.py: -------------------------------------------------------------------------------- 1 | # flake8: noqa 2 | from .api import * 3 | from .array_dataframe import ArrayDataFrame 4 | from .arrow_dataframe import ArrowDataFrame 5 | from .dataframe import ( 6 | AnyDataFrame, 7 | DataFrame, 8 | LocalBoundedDataFrame, 9 | LocalDataFrame, 10 | YieldedDataFrame, 11 | ) 12 | from .dataframe_iterable_dataframe import ( 13 | IterableArrowDataFrame, 14 | IterablePandasDataFrame, 15 | LocalDataFrameIterableDataFrame, 16 | ) 17 | from .dataframes import DataFrames 18 | from .function_wrapper import DataFrameFunctionWrapper, fugue_annotated_param 19 | from .iterable_dataframe import IterableDataFrame 20 | from .pandas_dataframe import PandasDataFrame 21 | from .utils import get_column_names, normalize_dataframe_column_names, rename 22 | -------------------------------------------------------------------------------- /fugue/dataset/__init__.py: -------------------------------------------------------------------------------- 1 | # flake8: noqa 2 | from .api import * 3 | from .dataset import AnyDataset, Dataset, DatasetDisplay, get_dataset_display 4 | -------------------------------------------------------------------------------- /fugue/dev.py: -------------------------------------------------------------------------------- 1 | """ 2 | All modeuls for developing and extending Fugue 3 | """ 4 | # flake8: noqa 5 | # pylint: disable-all 6 | 7 | from triad.collections.function_wrapper import AnnotatedParam 8 | 9 | from fugue.bag.bag import BagDisplay 10 | from fugue.collections.partition import PartitionCursor, PartitionSpec 11 | from fugue.collections.sql import StructuredRawSQL, TempTableName 12 | from fugue.collections.yielded import PhysicalYielded, Yielded 13 | from fugue.dataframe.function_wrapper import ( 14 | DataFrameFunctionWrapper, 15 | DataFrameParam, 16 | LocalDataFrameParam, 17 | fugue_annotated_param, 18 | ) 19 | from fugue.dataset import DatasetDisplay 20 | from fugue.execution.execution_engine import ( 21 | EngineFacet, 22 | ExecutionEngineParam, 23 | MapEngine, 24 | SQLEngine, 25 | ) 26 | from fugue.execution.factory import ( 27 | is_pandas_or, 28 | make_execution_engine, 29 | make_sql_engine, 30 | register_default_execution_engine, 31 | register_default_sql_engine, 32 | register_execution_engine, 33 | register_sql_engine, 34 | ) 35 | from fugue.execution.native_execution_engine import PandasMapEngine, QPDPandasEngine 36 | from fugue.rpc import ( 37 | EmptyRPCHandler, 38 | RPCClient, 39 | RPCFunc, 40 | RPCHandler, 41 | RPCServer, 42 | make_rpc_server, 43 | to_rpc_handler, 44 | ) 45 | from fugue.workflow._workflow_context import FugueWorkflowContext 46 | from fugue.workflow.module import module 47 | from fugue.workflow.workflow import FugueWorkflow, WorkflowDataFrame, WorkflowDataFrames 48 | -------------------------------------------------------------------------------- /fugue/exceptions.py: -------------------------------------------------------------------------------- 1 | class FugueError(Exception): 2 | """Fugue exceptions""" 3 | 4 | 5 | class FugueBug(FugueError): 6 | """Fugue internal bug""" 7 | 8 | 9 | class FugueInvalidOperation(FugueError): 10 | """Invalid operation on the Fugue framework""" 11 | 12 | 13 | class FuguePluginsRegistrationError(FugueError): 14 | """Fugue plugins registration error""" 15 | 16 | 17 | class FugueDataFrameError(FugueError): 18 | """Fugue dataframe related error""" 19 | 20 | 21 | class FugueDataFrameInitError(FugueDataFrameError): 22 | """Fugue dataframe initialization error""" 23 | 24 | 25 | class FugueDatasetEmptyError(FugueDataFrameError): 26 | """Fugue dataframe is empty""" 27 | 28 | 29 | class FugueDataFrameOperationError(FugueDataFrameError): 30 | """Fugue dataframe invalid operation""" 31 | 32 | 33 | class FugueWorkflowError(FugueError): 34 | """Fugue workflow exceptions""" 35 | 36 | 37 | class FugueWorkflowCompileError(FugueWorkflowError): 38 | """Fugue workflow compile time error""" 39 | 40 | 41 | class FugueWorkflowCompileValidationError(FugueWorkflowCompileError): 42 | """Fugue workflow compile time validation error""" 43 | 44 | 45 | class FugueInterfacelessError(FugueWorkflowCompileError): 46 | """Fugue interfaceless exceptions""" 47 | 48 | 49 | class FugueWorkflowRuntimeError(FugueWorkflowError): 50 | """Fugue workflow compile time error""" 51 | 52 | 53 | class FugueWorkflowRuntimeValidationError(FugueWorkflowRuntimeError): 54 | """Fugue workflow runtime validation error""" 55 | 56 | 57 | class FugueSQLError(FugueWorkflowCompileError): 58 | """Fugue SQL error""" 59 | 60 | 61 | class FugueSQLSyntaxError(FugueSQLError): 62 | """Fugue SQL syntax error""" 63 | 64 | 65 | class FugueSQLRuntimeError(FugueWorkflowRuntimeError): 66 | """Fugue SQL runtime error""" 67 | -------------------------------------------------------------------------------- /fugue/execution/__init__.py: -------------------------------------------------------------------------------- 1 | # flake8: noqa 2 | from .api import * 3 | from .execution_engine import AnyExecutionEngine, ExecutionEngine, MapEngine, SQLEngine 4 | from .factory import ( 5 | infer_execution_engine, 6 | make_execution_engine, 7 | make_sql_engine, 8 | register_default_execution_engine, 9 | register_default_sql_engine, 10 | register_execution_engine, 11 | register_sql_engine, 12 | ) 13 | from .native_execution_engine import NativeExecutionEngine, QPDPandasEngine 14 | -------------------------------------------------------------------------------- /fugue/extensions/__init__.py: -------------------------------------------------------------------------------- 1 | # flake8: noqa 2 | from ._utils import namespace_candidate 3 | from .creator import Creator, creator, parse_creator, register_creator 4 | from .outputter import Outputter, outputter, parse_outputter, register_outputter 5 | from .processor import Processor, parse_processor, processor, register_processor 6 | from .transformer import ( 7 | CoTransformer, 8 | OutputCoTransformer, 9 | OutputTransformer, 10 | Transformer, 11 | cotransformer, 12 | output_cotransformer, 13 | output_transformer, 14 | parse_output_transformer, 15 | parse_transformer, 16 | register_output_transformer, 17 | register_transformer, 18 | transformer, 19 | ) 20 | -------------------------------------------------------------------------------- /fugue/extensions/_builtins/__init__.py: -------------------------------------------------------------------------------- 1 | # flake8: noqa 2 | from fugue.extensions._builtins.creators import Load, CreateData 3 | from fugue.extensions._builtins.outputters import ( 4 | AssertEqual, 5 | AssertNotEqual, 6 | RunOutputTransformer, 7 | Save, 8 | Show, 9 | ) 10 | from fugue.extensions._builtins.processors import ( 11 | Aggregate, 12 | AlterColumns, 13 | Assign, 14 | Distinct, 15 | DropColumns, 16 | Dropna, 17 | Fillna, 18 | Filter, 19 | Rename, 20 | RunJoin, 21 | RunSetOperation, 22 | RunSQLSelect, 23 | RunTransformer, 24 | Sample, 25 | SaveAndUse, 26 | Select, 27 | SelectColumns, 28 | Take, 29 | Zip, 30 | ) 31 | -------------------------------------------------------------------------------- /fugue/extensions/_builtins/creators.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Callable, Optional 2 | 3 | from triad import Schema, assert_or_throw, to_uuid 4 | 5 | from fugue.collections.yielded import Yielded 6 | from fugue.dataframe import DataFrame 7 | from fugue.exceptions import FugueWorkflowCompileError 8 | from fugue.execution.api import as_fugue_engine_df 9 | from fugue.extensions.creator import Creator 10 | 11 | 12 | class Load(Creator): 13 | def create(self) -> DataFrame: 14 | kwargs = self.params.get("params", dict()) 15 | path = self.params.get_or_throw("path", str) 16 | format_hint = self.params.get("fmt", "") 17 | columns = self.params.get_or_none("columns", object) 18 | 19 | return self.execution_engine.load_df( 20 | path=path, format_hint=format_hint, columns=columns, **kwargs 21 | ) 22 | 23 | 24 | class CreateData(Creator): 25 | def __init__( 26 | self, 27 | df: Any, 28 | schema: Any = None, 29 | data_determiner: Optional[Callable[[Any], Any]] = None, 30 | ) -> None: 31 | if isinstance(df, Yielded): 32 | assert_or_throw( 33 | schema is None, 34 | FugueWorkflowCompileError("schema must be None when data is Yielded"), 35 | ) 36 | super().__init__() 37 | self._df = df 38 | self._schema = schema if schema is None else Schema(schema) 39 | self._data_determiner = data_determiner 40 | 41 | def create(self) -> DataFrame: 42 | if isinstance(self._df, Yielded): 43 | return self.execution_engine.load_yielded(self._df) 44 | return as_fugue_engine_df(self.execution_engine, self._df, schema=self._schema) 45 | 46 | def _df_uid(self): 47 | if self._data_determiner is not None: 48 | return self._data_determiner(self._df) 49 | if isinstance(self._df, Yielded): 50 | return self._df 51 | return 1 52 | 53 | def __uuid__(self) -> str: 54 | return to_uuid(super().__uuid__(), self._df_uid(), self._schema) 55 | -------------------------------------------------------------------------------- /fugue/extensions/creator/__init__.py: -------------------------------------------------------------------------------- 1 | # flake8: noqa 2 | from fugue.extensions.creator.convert import ( 3 | _to_creator, 4 | creator, 5 | parse_creator, 6 | register_creator, 7 | ) 8 | from fugue.extensions.creator.creator import Creator 9 | -------------------------------------------------------------------------------- /fugue/extensions/creator/creator.py: -------------------------------------------------------------------------------- 1 | from abc import ABC, abstractmethod 2 | 3 | from fugue.dataframe import DataFrame 4 | from fugue.extensions.context import ExtensionContext 5 | 6 | 7 | class Creator(ExtensionContext, ABC): 8 | """The interface is to generate single DataFrame from `params`. 9 | For example reading data from file should be a type of Creator. 10 | Creator is task level extension, running on driver, and execution engine aware. 11 | 12 | To implement this class, you should not have ``__init__``, please directly implement 13 | the interface functions. 14 | 15 | .. note:: 16 | 17 | Before implementing this class, do you really need to implement this 18 | interface? Do you know the interfaceless feature of Fugue? Implementing Creator 19 | is commonly unnecessary. You can choose the interfaceless approach which may 20 | decouple your code from Fugue. 21 | 22 | .. seealso:: 23 | 24 | Please read :doc:`Creator Tutorial ` 25 | """ 26 | 27 | @abstractmethod 28 | def create(self) -> DataFrame: # pragma: no cover 29 | """Create DataFrame on driver side 30 | 31 | .. note:: 32 | 33 | * It runs on driver side 34 | * The output dataframe is not necessarily local, for example a SparkDataFrame 35 | * It is engine aware, you can put platform dependent code in it (for example 36 | native pyspark code) but by doing so your code may not be portable. If you 37 | only use the functions of the general ExecutionEngine interface, it's still 38 | portable. 39 | 40 | :return: result dataframe 41 | """ 42 | raise NotImplementedError 43 | -------------------------------------------------------------------------------- /fugue/extensions/outputter/__init__.py: -------------------------------------------------------------------------------- 1 | # flake8: noqa 2 | from fugue.extensions.outputter.convert import ( 3 | _to_outputter, 4 | outputter, 5 | parse_outputter, 6 | register_outputter, 7 | ) 8 | from fugue.extensions.outputter.outputter import Outputter 9 | -------------------------------------------------------------------------------- /fugue/extensions/outputter/outputter.py: -------------------------------------------------------------------------------- 1 | from abc import ABC, abstractmethod 2 | 3 | from fugue.dataframe import DataFrames 4 | from fugue.extensions.context import ExtensionContext 5 | 6 | 7 | class Outputter(ExtensionContext, ABC): 8 | """The interface to process one or multiple incoming dataframes without returning 9 | anything. For example printing or saving dataframes should be a type of Outputter. 10 | Outputter is task level extension, running on driver, and execution engine aware. 11 | 12 | To implement this class, you should not have ``__init__``, please directly implement 13 | the interface functions. 14 | 15 | .. note:: 16 | 17 | Before implementing this class, do you really need to implement this 18 | interface? Do you know the interfaceless feature of Fugue? Implementing Outputter 19 | is commonly unnecessary. You can choose the interfaceless approach which may 20 | decouple your code from Fugue. 21 | 22 | .. seealso:: 23 | 24 | Please read 25 | :doc:`Outputter Tutorial ` 26 | """ 27 | 28 | @abstractmethod 29 | def process(self, dfs: DataFrames) -> None: # pragma: no cover 30 | """Process the collection of dataframes on driver side 31 | 32 | .. note:: 33 | 34 | * It runs on driver side 35 | * The dataframes are not necessarily local, for example a SparkDataFrame 36 | * It is engine aware, you can put platform dependent code in it (for example 37 | native pyspark code) but by doing so your code may not be portable. If you 38 | only use the functions of the general ExecutionEngine, it's still portable. 39 | 40 | :param dfs: dataframe collection to process 41 | """ 42 | raise NotImplementedError 43 | -------------------------------------------------------------------------------- /fugue/extensions/processor/__init__.py: -------------------------------------------------------------------------------- 1 | # flake8: noqa 2 | from fugue.extensions.processor.convert import ( 3 | _to_processor, 4 | parse_processor, 5 | processor, 6 | register_processor, 7 | ) 8 | from fugue.extensions.processor.processor import Processor 9 | -------------------------------------------------------------------------------- /fugue/extensions/processor/processor.py: -------------------------------------------------------------------------------- 1 | from abc import ABC, abstractmethod 2 | 3 | from fugue.dataframe import DataFrame, DataFrames 4 | from fugue.extensions.context import ExtensionContext 5 | 6 | 7 | class Processor(ExtensionContext, ABC): 8 | """The interface to process one or multiple incoming dataframes and return one 9 | DataFrame. For example dropping a column of df should be a type of Processor. 10 | Processor is task level extension, running on driver, and execution engine aware. 11 | 12 | To implement this class, you should not have ``__init__``, please directly implement 13 | the interface functions. 14 | 15 | .. note:: 16 | 17 | Before implementing this class, do you really need to implement this 18 | interface? Do you know the interfaceless feature of Fugue? Implementing Processor 19 | is commonly unnecessary. You can choose the interfaceless approach which may 20 | decouple your code from Fugue. 21 | 22 | .. seealso:: 23 | 24 | Please read 25 | :doc:`Processor Tutorial ` 26 | """ 27 | 28 | @abstractmethod 29 | def process(self, dfs: DataFrames) -> DataFrame: # pragma: no cover 30 | """Process the collection of dataframes on driver side 31 | 32 | .. note:: 33 | 34 | * It runs on driver side 35 | * The dataframes are not necessarily local, for example a SparkDataFrame 36 | * It is engine aware, you can put platform dependent code in it (for example 37 | native pyspark code) but by doing so your code may not be portable. If you 38 | only use the functions of the general ExecutionEngine, it's still portable. 39 | 40 | :param dfs: dataframe collection to process 41 | :return: the result dataframe 42 | """ 43 | raise NotImplementedError 44 | -------------------------------------------------------------------------------- /fugue/extensions/transformer/__init__.py: -------------------------------------------------------------------------------- 1 | # flake8: noqa 2 | from fugue.extensions.transformer.convert import ( 3 | _to_output_transformer, 4 | _to_transformer, 5 | cotransformer, 6 | output_cotransformer, 7 | output_transformer, 8 | parse_output_transformer, 9 | parse_transformer, 10 | register_output_transformer, 11 | register_transformer, 12 | transformer, 13 | ) 14 | from fugue.extensions.transformer.transformer import ( 15 | CoTransformer, 16 | OutputCoTransformer, 17 | OutputTransformer, 18 | Transformer, 19 | ) 20 | -------------------------------------------------------------------------------- /fugue/extensions/transformer/constants.py: -------------------------------------------------------------------------------- 1 | OUTPUT_TRANSFORMER_DUMMY_SCHEMA = "__output_no_data__:int" 2 | -------------------------------------------------------------------------------- /fugue/plugins.py: -------------------------------------------------------------------------------- 1 | # flake8: noqa 2 | # pylint: disable-all 3 | from fugue.collections.sql import transpile_sql 4 | from fugue.dataframe import ( 5 | alter_columns, 6 | as_array, 7 | as_array_iterable, 8 | as_arrow, 9 | as_dict_iterable, 10 | as_dicts, 11 | as_pandas, 12 | drop_columns, 13 | fugue_annotated_param, 14 | get_column_names, 15 | get_schema, 16 | head, 17 | is_df, 18 | peek_array, 19 | peek_dict, 20 | rename, 21 | select_columns, 22 | ) 23 | from fugue.dataset import ( 24 | as_fugue_dataset, 25 | as_local, 26 | as_local_bounded, 27 | count, 28 | get_dataset_display, 29 | get_num_partitions, 30 | is_bounded, 31 | is_empty, 32 | is_local, 33 | ) 34 | from fugue.execution.api import as_fugue_engine_df 35 | from fugue.execution.factory import ( 36 | infer_execution_engine, 37 | parse_execution_engine, 38 | parse_sql_engine, 39 | ) 40 | from fugue.extensions.creator import parse_creator 41 | from fugue.extensions.outputter import parse_outputter 42 | from fugue.extensions.processor import parse_processor 43 | from fugue.extensions.transformer import parse_output_transformer, parse_transformer 44 | -------------------------------------------------------------------------------- /fugue/py.typed: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fugue-project/fugue/596d28eea1834ecc9087b864b83e1b67b9748977/fugue/py.typed -------------------------------------------------------------------------------- /fugue/registry.py: -------------------------------------------------------------------------------- 1 | from fugue.execution.factory import register_execution_engine, register_sql_engine 2 | from fugue.execution.native_execution_engine import ( 3 | NativeExecutionEngine, 4 | QPDPandasEngine, 5 | ) 6 | 7 | 8 | def _register() -> None: 9 | """Register Fugue core additional types 10 | 11 | .. note:: 12 | 13 | This function is automatically called when you do 14 | 15 | >>> import fugue 16 | """ 17 | _register_engines() 18 | 19 | 20 | def _register_engines() -> None: 21 | register_execution_engine( 22 | "native", lambda conf: NativeExecutionEngine(conf), on_dup="ignore" 23 | ) 24 | register_execution_engine( 25 | "pandas", lambda conf: NativeExecutionEngine(conf), on_dup="ignore" 26 | ) 27 | register_sql_engine( 28 | "qpdpandas", lambda engine: QPDPandasEngine(engine), on_dup="ignore" 29 | ) 30 | register_sql_engine( 31 | "qpd_pandas", lambda engine: QPDPandasEngine(engine), on_dup="ignore" 32 | ) 33 | -------------------------------------------------------------------------------- /fugue/rpc/__init__.py: -------------------------------------------------------------------------------- 1 | # flake8: noqa 2 | from fugue.rpc.base import ( 3 | RPCClient, 4 | EmptyRPCHandler, 5 | RPCFunc, 6 | RPCHandler, 7 | RPCServer, 8 | make_rpc_server, 9 | to_rpc_handler, 10 | ) 11 | -------------------------------------------------------------------------------- /fugue/sql/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fugue-project/fugue/596d28eea1834ecc9087b864b83e1b67b9748977/fugue/sql/__init__.py -------------------------------------------------------------------------------- /fugue/sql/_utils.py: -------------------------------------------------------------------------------- 1 | import re 2 | from typing import Any, Dict, Optional 3 | 4 | from triad import assert_or_throw 5 | 6 | from ..collections.yielded import Yielded 7 | from ..exceptions import FugueSQLError 8 | from ..workflow.workflow import FugueWorkflow, WorkflowDataFrame 9 | 10 | MATCH_QUOTED_STRING = r"([\"'])(({|%|})*)\1" 11 | 12 | 13 | def fill_sql_template(sql: str, params: Dict[str, Any]): 14 | """Prepare string to be executed, inserts params into sql template 15 | --- 16 | :param sql: jinja compatible template 17 | :param params: params to be inserted into template 18 | """ 19 | import jinja2 20 | from jinja2 import Template 21 | 22 | try: 23 | if "self" in params: 24 | params = {k: v for k, v in params.items() if k != "self"} 25 | single_quote_pattern = "'{{% raw %}}{}{{% endraw %}}'" 26 | double_quote_pattern = '"{{% raw %}}{}{{% endraw %}}"' 27 | new_sql = re.sub( 28 | MATCH_QUOTED_STRING, 29 | lambda pattern: double_quote_pattern.format(pattern.group(2)) 30 | if pattern.group(1) == '"' 31 | else single_quote_pattern.format(pattern.group(2)), 32 | sql, 33 | ) 34 | 35 | template = Template(new_sql) 36 | 37 | except jinja2.exceptions.TemplateSyntaxError: 38 | 39 | template = Template(sql) 40 | 41 | return template.render(**params) 42 | 43 | 44 | class LazyWorkflowDataFrame: 45 | def __init__(self, key: str, df: Any, workflow: FugueWorkflow): 46 | self._key = key 47 | self._df = df 48 | self._workflow = workflow 49 | self._wdf: Optional[WorkflowDataFrame] = None 50 | 51 | def get_df(self) -> WorkflowDataFrame: 52 | if self._wdf is None: 53 | self._wdf = self._get_df() 54 | return self._wdf 55 | 56 | def _get_df(self) -> WorkflowDataFrame: 57 | if isinstance(self._df, Yielded): 58 | return self._workflow.df(self._df) 59 | if isinstance(self._df, WorkflowDataFrame): 60 | assert_or_throw( 61 | self._df.workflow is self._workflow, 62 | lambda: FugueSQLError( 63 | f"{self._key}, {self._df} is from another workflow" 64 | ), 65 | ) 66 | return self._df 67 | return self._workflow.df(self._df) 68 | -------------------------------------------------------------------------------- /fugue/test/__init__.py: -------------------------------------------------------------------------------- 1 | # flake8: noqa 2 | from .pandas_tester import NativeTestBackend, PandasTestBackend 3 | from .plugins import ( 4 | FugueTestBackend, 5 | FugueTestContext, 6 | FugueTestSuite, 7 | extract_conf, 8 | fugue_test_backend, 9 | fugue_test_suite, 10 | with_backend, 11 | ) 12 | -------------------------------------------------------------------------------- /fugue/test/pandas_tester.py: -------------------------------------------------------------------------------- 1 | from contextlib import contextmanager 2 | from typing import Any, Dict, Iterator 3 | 4 | from .plugins import FugueTestBackend, fugue_test_backend 5 | 6 | 7 | @fugue_test_backend 8 | class PandasTestBackend(FugueTestBackend): 9 | name = "pandas" 10 | 11 | @classmethod 12 | @contextmanager 13 | def session_context(cls, session_conf: Dict[str, Any]) -> Iterator[Any]: 14 | yield "pandas" # pragma: no cover 15 | 16 | 17 | @fugue_test_backend 18 | class NativeTestBackend(FugueTestBackend): 19 | name = "native" 20 | 21 | @classmethod 22 | @contextmanager 23 | def session_context(cls, session_conf: Dict[str, Any]) -> Iterator[Any]: 24 | yield "native" # pragma: no cover 25 | -------------------------------------------------------------------------------- /fugue/workflow/__init__.py: -------------------------------------------------------------------------------- 1 | # flake8: noqa 2 | 3 | from ._workflow_context import FugueWorkflowContext 4 | from .api import * 5 | from .input import register_raw_df_type 6 | from .module import module 7 | from .workflow import FugueWorkflow, WorkflowDataFrame, WorkflowDataFrames 8 | -------------------------------------------------------------------------------- /fugue/workflow/_workflow_context.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Dict 2 | from uuid import uuid4 3 | 4 | from adagio.instances import ( 5 | NoOpCache, 6 | ParallelExecutionEngine, 7 | WorkflowContext, 8 | WorkflowHooks, 9 | ) 10 | from adagio.specs import WorkflowSpec 11 | from fugue.constants import FUGUE_CONF_WORKFLOW_CONCURRENCY 12 | from fugue.dataframe import DataFrame 13 | from fugue.execution.execution_engine import ExecutionEngine 14 | from fugue.rpc.base import make_rpc_server, RPCServer 15 | from fugue.workflow._checkpoint import CheckpointPath 16 | from triad import SerializableRLock, ParamDict 17 | 18 | 19 | class FugueWorkflowContext(WorkflowContext): 20 | def __init__( 21 | self, 22 | engine: ExecutionEngine, 23 | compile_conf: Any = None, 24 | cache: Any = NoOpCache, 25 | workflow_engine: Any = None, 26 | hooks: Any = WorkflowHooks, 27 | ): 28 | conf = ParamDict(compile_conf) 29 | self._fugue_engine = engine 30 | self._lock = SerializableRLock() 31 | self._results: Dict[Any, DataFrame] = {} 32 | self._execution_id = "" 33 | self._checkpoint_path = CheckpointPath(self.execution_engine) 34 | self._rpc_server = make_rpc_server(engine.conf) 35 | if workflow_engine is None: 36 | workflow_engine = ParallelExecutionEngine( 37 | conf.get_or_throw(FUGUE_CONF_WORKFLOW_CONCURRENCY, int), 38 | self, 39 | ) 40 | super().__init__( 41 | cache=cache, 42 | engine=workflow_engine, 43 | hooks=hooks, 44 | logger=self.execution_engine.log, 45 | config=conf, 46 | ) 47 | 48 | def run(self, spec: WorkflowSpec, conf: Dict[str, Any]) -> None: 49 | try: 50 | self._execution_id = str(uuid4()) 51 | self._checkpoint_path = CheckpointPath(self.execution_engine) 52 | self._checkpoint_path.init_temp_path(self._execution_id) 53 | self._rpc_server.start() 54 | super().run(spec, conf) 55 | finally: 56 | self._checkpoint_path.remove_temp_path() 57 | self._rpc_server.stop() 58 | self._execution_id = "" 59 | 60 | @property 61 | def checkpoint_path(self) -> CheckpointPath: 62 | return self._checkpoint_path 63 | 64 | @property 65 | def execution_engine(self) -> ExecutionEngine: 66 | return self._fugue_engine 67 | 68 | @property 69 | def rpc_server(self) -> RPCServer: 70 | return self._rpc_server 71 | 72 | def set_result(self, key: Any, df: DataFrame) -> None: 73 | with self._lock: 74 | self._results[key] = df 75 | 76 | def get_result(self, key: Any) -> DataFrame: 77 | with self._lock: 78 | return self._results[key] 79 | -------------------------------------------------------------------------------- /fugue/workflow/input.py: -------------------------------------------------------------------------------- 1 | from typing import Type 2 | 3 | 4 | def register_raw_df_type(df_type: Type) -> None: # pragma: no cover 5 | """TODO: This function is to be removed before 0.9.0 6 | 7 | .. deprecated:: 0.8.0 8 | Register using :func:`fugue.api.is_df` instead. 9 | """ 10 | raise DeprecationWarning("use fugue.api.is_df to register the dataframe") 11 | -------------------------------------------------------------------------------- /fugue_contrib/__init__.py: -------------------------------------------------------------------------------- 1 | import importlib 2 | 3 | from .contrib import FUGUE_CONTRIB 4 | 5 | 6 | def load_namespace(namespace: str) -> None: 7 | if namespace in FUGUE_CONTRIB: 8 | path = FUGUE_CONTRIB[namespace]["module"] 9 | importlib.import_module(path) 10 | -------------------------------------------------------------------------------- /fugue_contrib/contrib.py: -------------------------------------------------------------------------------- 1 | from typing import Dict, Any 2 | 3 | FUGUE_CONTRIB: Dict[str, Any] = { 4 | "viz": {"module": "fugue_contrib.viz"}, 5 | "sns": {"module": "fugue_contrib.seaborn"}, 6 | "why": {"module": "whylogs.api.fugue.registry"}, 7 | "vizzu": {"module": "ipyvizzu.integrations.fugue"}, 8 | } 9 | -------------------------------------------------------------------------------- /fugue_contrib/seaborn/__init__.py: -------------------------------------------------------------------------------- 1 | import json 2 | from functools import partial 3 | from typing import Any, Tuple 4 | 5 | import matplotlib.pyplot as plt 6 | import pandas as pd 7 | import seaborn 8 | 9 | from fugue import Outputter 10 | from fugue.extensions import namespace_candidate, parse_outputter 11 | 12 | from ..viz._ext import Visualize 13 | 14 | 15 | @parse_outputter.candidate(namespace_candidate("sns", lambda x: isinstance(x, str))) 16 | def _parse_seaborn(obj: Tuple[str, str]) -> Outputter: 17 | return _SeabornVisualize(obj[1]) 18 | 19 | 20 | class _SeabornVisualize(Visualize): 21 | def __init__(self, func: str) -> None: 22 | super().__init__(func) 23 | getattr(seaborn, func) # ensure the func exists 24 | 25 | def _plot(self, df: pd.DataFrame) -> None: 26 | params = dict(self.params) 27 | title: Any = None 28 | if len(self.partition_spec.partition_by) > 0: 29 | keys = df[self.partition_spec.partition_by].head(1).to_dict("records")[0] 30 | kt = json.dumps(keys)[1:-1] 31 | if "title" in params: 32 | params["title"] = params["title"] + " -- " + kt 33 | else: 34 | params["title"] = kt 35 | df = df.drop(self.partition_spec.partition_by, axis=1) 36 | func = self._get_func(df) 37 | title = params.pop("title", None) 38 | plt.figure(0) 39 | func(**params).set(title=title) 40 | plt.show() 41 | 42 | def _get_func(self, df: pd.DataFrame) -> Any: 43 | f = getattr(seaborn, self._func) 44 | return partial(f, df) 45 | -------------------------------------------------------------------------------- /fugue_contrib/viz/__init__.py: -------------------------------------------------------------------------------- 1 | import json 2 | from typing import Any, Tuple 3 | 4 | import pandas as pd 5 | 6 | from fugue import Outputter 7 | from fugue.extensions import namespace_candidate, parse_outputter 8 | 9 | from ._ext import Visualize 10 | 11 | 12 | @parse_outputter.candidate(namespace_candidate("viz", lambda x: isinstance(x, str))) 13 | def _parse_pandas_plot(obj: Tuple[str, str]) -> Outputter: 14 | return _PandasVisualize(obj[1]) 15 | 16 | 17 | class _PandasVisualize(Visualize): 18 | def __init__(self, func: str) -> None: 19 | super().__init__(func) 20 | if func != "plot": 21 | getattr(pd.DataFrame.plot, func) # ensure the func exists 22 | 23 | def _plot(self, df: pd.DataFrame) -> None: 24 | params = dict(self.params) 25 | if len(self.partition_spec.partition_by) > 0: 26 | keys = df[self.partition_spec.partition_by].head(1).to_dict("records")[0] 27 | kt = json.dumps(keys)[1:-1] 28 | if "title" in params: 29 | params["title"] = params["title"] + " -- " + kt 30 | else: 31 | params["title"] = kt 32 | df = df.drop(self.partition_spec.partition_by, axis=1) 33 | func = self._get_func(df) 34 | func(**params) 35 | 36 | def _get_func(self, df: pd.DataFrame) -> Any: 37 | if self._func == "plot": 38 | return df.plot 39 | return getattr(df.plot, self._func) 40 | -------------------------------------------------------------------------------- /fugue_contrib/viz/_ext.py: -------------------------------------------------------------------------------- 1 | from abc import ABC, abstractmethod 2 | from typing import Any 3 | 4 | import pandas as pd 5 | from triad import assert_or_throw 6 | 7 | from fugue import DataFrames, Outputter 8 | from fugue.exceptions import FugueWorkflowError 9 | 10 | 11 | class Visualize(Outputter, ABC): 12 | def __init__(self, func: str) -> None: 13 | super().__init__() 14 | self._func = func 15 | 16 | def process(self, dfs: DataFrames) -> None: 17 | assert_or_throw(len(dfs) == 1, FugueWorkflowError("not single input")) 18 | df = dfs[0].as_pandas() 19 | presort = self.partition_spec.presort 20 | presort_keys = list(presort.keys()) 21 | presort_asc = list(presort.values()) 22 | if len(presort_keys) > 0: 23 | df = df.sort_values(presort_keys, ascending=presort_asc).reset_index( 24 | drop=True 25 | ) 26 | if len(self.partition_spec.partition_by) == 0: 27 | self._plot(df) 28 | else: 29 | keys: Any = ( # avoid pandas warning 30 | self.partition_spec.partition_by 31 | if len(self.partition_spec.partition_by) > 1 32 | else self.partition_spec.partition_by[0] 33 | ) 34 | for _, gp in df.groupby(keys, dropna=False): 35 | self._plot(gp.reset_index(drop=True)) 36 | 37 | @abstractmethod 38 | def _plot(self, df: pd.DataFrame) -> None: # pragma: no cover 39 | raise NotImplementedError 40 | -------------------------------------------------------------------------------- /fugue_dask/__init__.py: -------------------------------------------------------------------------------- 1 | # flake8: noqa 2 | from fugue_version import __version__ 3 | 4 | from fugue_dask.dataframe import DaskDataFrame 5 | from fugue_dask.execution_engine import DaskExecutionEngine 6 | -------------------------------------------------------------------------------- /fugue_dask/_constants.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Dict 2 | 3 | import dask 4 | import pandas as pd 5 | import pyarrow as pa 6 | from packaging import version 7 | 8 | FUGUE_DASK_CONF_DEFAULT_PARTITIONS = "fugue.dask.default.partitions" 9 | FUGUE_DASK_DEFAULT_CONF: Dict[str, Any] = {FUGUE_DASK_CONF_DEFAULT_PARTITIONS: -1} 10 | FUGUE_DASK_USE_ARROW = ( 11 | hasattr(pd, "ArrowDtype") 12 | and version.parse(dask.__version__) >= version.parse("2023.2") 13 | and version.parse(pa.__version__) >= version.parse("7") 14 | and version.parse(pd.__version__) >= version.parse("2") 15 | ) 16 | -------------------------------------------------------------------------------- /fugue_dask/_dask_sql_wrapper.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Optional 2 | 3 | import dask.dataframe as dd 4 | 5 | try: 6 | from dask.dataframe.dask_expr.io.parquet import ReadParquet 7 | 8 | HAS_DASK_EXPR = True # newer dask 9 | except ImportError: # pragma: no cover 10 | HAS_DASK_EXPR = False # older dask 11 | 12 | if not HAS_DASK_EXPR: # pragma: no cover 13 | try: 14 | from dask_sql import Context as ContextWrapper # pylint: disable-all 15 | except ImportError: # pragma: no cover 16 | raise ImportError( 17 | "dask-sql is not installed. Please install it with `pip install dask-sql`" 18 | ) 19 | else: 20 | from triad.utils.assertion import assert_or_throw 21 | 22 | try: 23 | from dask_sql import Context 24 | from dask_sql.datacontainer import Statistics 25 | from dask_sql.input_utils import InputUtil 26 | except ImportError: # pragma: no cover 27 | raise ImportError( 28 | "dask-sql is not installed. Please install it with `pip install dask-sql`" 29 | ) 30 | 31 | class ContextWrapper(Context): # type: ignore 32 | def create_table( 33 | self, 34 | table_name: str, 35 | input_table: dd.DataFrame, 36 | format: Optional[str] = None, # noqa 37 | persist: bool = False, 38 | schema_name: Optional[str] = None, 39 | statistics: Optional[Statistics] = None, 40 | gpu: bool = False, 41 | **kwargs: Any, 42 | ) -> None: # pragma: no cover 43 | assert_or_throw( 44 | isinstance(input_table, dd.DataFrame), 45 | lambda: ValueError( 46 | f"input_table must be a dask dataframe, but got {type(input_table)}" 47 | ), 48 | ) 49 | assert_or_throw( 50 | dd._dask_expr_enabled(), lambda: ValueError("Dask expr must be enabled") 51 | ) 52 | schema_name = schema_name or self.schema_name 53 | 54 | dc = InputUtil.to_dc( 55 | input_table, 56 | table_name=table_name, 57 | format=format, 58 | persist=persist, 59 | gpu=gpu, 60 | **kwargs, 61 | ) 62 | 63 | dask_filepath = None 64 | operations = input_table.find_operations(ReadParquet) 65 | for op in operations: 66 | dask_filepath = op._args[0] 67 | 68 | dc.filepath = dask_filepath 69 | self.schema[schema_name].filepaths[table_name.lower()] = dask_filepath 70 | 71 | if not statistics: 72 | statistics = Statistics(float("nan")) 73 | dc.statistics = statistics 74 | 75 | self.schema[schema_name].tables[table_name.lower()] = dc 76 | self.schema[schema_name].statistics[table_name.lower()] = statistics 77 | -------------------------------------------------------------------------------- /fugue_dask/registry.py: -------------------------------------------------------------------------------- 1 | from typing import Any 2 | 3 | import dask.dataframe as dd 4 | from dask.distributed import Client 5 | 6 | from fugue import DataFrame 7 | from fugue.dev import ( 8 | DataFrameParam, 9 | ExecutionEngineParam, 10 | fugue_annotated_param, 11 | is_pandas_or, 12 | ) 13 | from fugue.plugins import ( 14 | as_fugue_dataset, 15 | infer_execution_engine, 16 | parse_execution_engine, 17 | ) 18 | from fugue_dask._utils import DASK_UTILS 19 | from fugue_dask.dataframe import DaskDataFrame 20 | from fugue_dask.execution_engine import DaskExecutionEngine 21 | 22 | from .tester import DaskTestBackend # noqa: F401 # pylint: disable-all 23 | 24 | 25 | @infer_execution_engine.candidate( 26 | lambda objs: is_pandas_or(objs, (dd.DataFrame, DaskDataFrame)) 27 | ) 28 | def _infer_dask_client(objs: Any) -> Any: 29 | return DASK_UTILS.get_or_create_client() 30 | 31 | 32 | @as_fugue_dataset.candidate(lambda df, **kwargs: isinstance(df, dd.DataFrame)) 33 | def _dask_as_fugue_df(df: dd.DataFrame, **kwargs: Any) -> DaskDataFrame: 34 | return DaskDataFrame(df, **kwargs) 35 | 36 | 37 | @parse_execution_engine.candidate( 38 | lambda engine, conf, **kwargs: isinstance(engine, Client), 39 | priority=4, # TODO: this is to overwrite dask-sql fugue integration 40 | ) 41 | def _parse_dask_client(engine: Client, conf: Any, **kwargs: Any) -> DaskExecutionEngine: 42 | return DaskExecutionEngine(dask_client=engine, conf=conf) 43 | 44 | 45 | @parse_execution_engine.candidate( 46 | lambda engine, conf, **kwargs: isinstance(engine, str) and engine == "dask", 47 | priority=4, # TODO: this is to overwrite dask-sql fugue integration 48 | ) 49 | def _parse_dask_str(engine: str, conf: Any, **kwargs: Any) -> DaskExecutionEngine: 50 | return DaskExecutionEngine(conf=conf) 51 | 52 | 53 | @fugue_annotated_param(DaskExecutionEngine) 54 | class _DaskExecutionEngineParam(ExecutionEngineParam): 55 | pass 56 | 57 | 58 | @fugue_annotated_param(dd.DataFrame) 59 | class _DaskDataFrameParam(DataFrameParam): 60 | def to_input_data(self, df: DataFrame, ctx: Any) -> Any: 61 | assert isinstance(ctx, DaskExecutionEngine) 62 | return ctx.to_df(df).native 63 | 64 | def to_output_df(self, output: Any, schema: Any, ctx: Any) -> DataFrame: 65 | assert isinstance(output, dd.DataFrame) 66 | assert isinstance(ctx, DaskExecutionEngine) 67 | return ctx.to_df(output, schema=schema) 68 | 69 | def count(self, df: DataFrame) -> int: # pragma: no cover 70 | raise NotImplementedError("not allowed") 71 | -------------------------------------------------------------------------------- /fugue_dask/tester.py: -------------------------------------------------------------------------------- 1 | from contextlib import contextmanager 2 | from typing import Any, Dict, Iterator 3 | 4 | import dask 5 | from dask.distributed import Client 6 | 7 | import fugue.test as ft 8 | 9 | 10 | @ft.fugue_test_backend 11 | class DaskTestBackend(ft.FugueTestBackend): 12 | name = "dask" 13 | 14 | @classmethod 15 | def transform_session_conf(cls, conf: Dict[str, Any]) -> Dict[str, Any]: 16 | return ft.extract_conf(conf, "dask.", remove_prefix=True) 17 | 18 | @classmethod 19 | @contextmanager 20 | def session_context(cls, session_conf: Dict[str, Any]) -> Iterator[Any]: 21 | with Client(**session_conf) as client: 22 | dask.config.set({"dataframe.shuffle.method": "tasks"}) 23 | dask.config.set({"dataframe.convert-string": False}) 24 | yield client 25 | -------------------------------------------------------------------------------- /fugue_duckdb/__init__.py: -------------------------------------------------------------------------------- 1 | # flake8: noqa 2 | from fugue import register_execution_engine, register_sql_engine 3 | 4 | from fugue_duckdb.execution_engine import DuckDBEngine, DuckExecutionEngine 5 | 6 | try: 7 | from fugue_duckdb.dask import DuckDaskExecutionEngine 8 | except Exception: # pragma: no cover 9 | pass 10 | -------------------------------------------------------------------------------- /fugue_duckdb/tester.py: -------------------------------------------------------------------------------- 1 | from contextlib import contextmanager 2 | from typing import Any, Dict, Iterator 3 | 4 | import duckdb 5 | 6 | import fugue.test as ft 7 | 8 | try: 9 | import dask.distributed as dd 10 | import dask 11 | 12 | _HAS_DASK = True 13 | except ImportError: # pragma: no cover 14 | _HAS_DASK = False 15 | 16 | 17 | @ft.fugue_test_backend 18 | class DuckDBTestBackend(ft.FugueTestBackend): 19 | name = "duckdb" 20 | 21 | @classmethod 22 | @contextmanager 23 | def session_context(cls, session_conf: Dict[str, Any]) -> Iterator[Any]: 24 | with duckdb.connect(config=session_conf) as conn: 25 | yield conn 26 | 27 | 28 | if _HAS_DASK: 29 | 30 | @ft.fugue_test_backend 31 | class DuckDaskTestBackend(ft.FugueTestBackend): 32 | name = "duckdask" 33 | 34 | @classmethod 35 | def transform_session_conf(cls, conf: Dict[str, Any]) -> Dict[str, Any]: 36 | res = ft.extract_conf(conf, "duck.", remove_prefix=False) 37 | res.update(ft.extract_conf(conf, "dask.", remove_prefix=False)) 38 | return res 39 | 40 | @classmethod 41 | @contextmanager 42 | def session_context(cls, session_conf: Dict[str, Any]) -> Iterator[Any]: 43 | duck_conf = ft.extract_conf(session_conf, "duck.", remove_prefix=True) 44 | dask_conf = ft.extract_conf(session_conf, "dask.", remove_prefix=True) 45 | with dd.Client(**dask_conf) as client: 46 | dask.config.set({"dataframe.shuffle.method": "tasks"}) 47 | dask.config.set({"dataframe.convert-string": False}) 48 | with duckdb.connect(config=duck_conf) as conn: 49 | yield [conn, client] 50 | -------------------------------------------------------------------------------- /fugue_ibis/__init__.py: -------------------------------------------------------------------------------- 1 | # flake8: noqa 2 | from triad import run_at_def 3 | 4 | from ._compat import IbisSchema, IbisTable 5 | from .dataframe import IbisDataFrame 6 | from .execution_engine import IbisExecutionEngine, IbisSQLEngine 7 | -------------------------------------------------------------------------------- /fugue_ibis/_compat.py: -------------------------------------------------------------------------------- 1 | # flake8: noqa 2 | # pylint: disable-all 3 | 4 | try: # pragma: no cover 5 | from ibis.expr.types import Table as IbisTable 6 | except Exception: # pragma: no cover 7 | from ibis.expr.types import TableExpr as IbisTable 8 | 9 | from ibis import Schema as IbisSchema 10 | -------------------------------------------------------------------------------- /fugue_notebook/nbextension/README.md: -------------------------------------------------------------------------------- 1 | # Fugue Notebook Extension 2 | 3 | - Add `%%fsql` magic to run Fugue SQL 4 | - Add Fugue SQL highlight in code cells for `%%fsql` 5 | -------------------------------------------------------------------------------- /fugue_notebook/nbextension/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fugue-project/fugue/596d28eea1834ecc9087b864b83e1b67b9748977/fugue_notebook/nbextension/__init__.py -------------------------------------------------------------------------------- /fugue_notebook/nbextension/description.yaml: -------------------------------------------------------------------------------- 1 | Type: Jupyter Notebook Extension 2 | Compatibility: 3.x, 4.x, 5.x, 6.x 3 | Name: Fugue 4 | Main: main.js 5 | Link: README.md 6 | Description: | 7 | Fugue Jupyter extension 8 | -------------------------------------------------------------------------------- /fugue_polars/__init__.py: -------------------------------------------------------------------------------- 1 | # flake8: noqa 2 | from .polars_dataframe import PolarsDataFrame 3 | -------------------------------------------------------------------------------- /fugue_polars/_utils.py: -------------------------------------------------------------------------------- 1 | import polars as pl 2 | from triad import Schema 3 | 4 | 5 | def build_empty_pl(schema: Schema) -> pl.DataFrame: 6 | return pl.from_arrow(schema.create_empty_arrow_table()) 7 | -------------------------------------------------------------------------------- /fugue_ray/__init__.py: -------------------------------------------------------------------------------- 1 | # flake8: noqa 2 | 3 | from fugue_ray.dataframe import RayDataFrame 4 | from fugue_ray.execution_engine import RayExecutionEngine 5 | -------------------------------------------------------------------------------- /fugue_ray/_constants.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Dict 2 | 3 | import ray 4 | from packaging import version 5 | 6 | FUGUE_RAY_CONF_SHUFFLE_PARTITIONS = "fugue.ray.shuffle.partitions" 7 | FUGUE_RAY_DEFAULT_PARTITIONS = "fugue.ray.default.partitions" 8 | FUGUE_RAY_DEFAULT_BATCH_SIZE = "fugue.ray.default.batch_size" 9 | FUGUE_RAY_ZERO_COPY = "fugue.ray.zero_copy" 10 | 11 | FUGUE_RAY_DEFAULT_CONF: Dict[str, Any] = { 12 | FUGUE_RAY_CONF_SHUFFLE_PARTITIONS: -1, 13 | FUGUE_RAY_DEFAULT_PARTITIONS: 0, 14 | FUGUE_RAY_ZERO_COPY: True, 15 | } 16 | RAY_VERSION = version.parse(ray.__version__) 17 | 18 | _ZERO_COPY: Dict[str, Any] = {"zero_copy_batch": True} 19 | -------------------------------------------------------------------------------- /fugue_ray/_utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fugue-project/fugue/596d28eea1834ecc9087b864b83e1b67b9748977/fugue_ray/_utils/__init__.py -------------------------------------------------------------------------------- /fugue_ray/_utils/cluster.py: -------------------------------------------------------------------------------- 1 | from fugue import ExecutionEngine 2 | 3 | from .._constants import FUGUE_RAY_CONF_SHUFFLE_PARTITIONS, FUGUE_RAY_DEFAULT_PARTITIONS 4 | from fugue.constants import FUGUE_CONF_DEFAULT_PARTITIONS 5 | 6 | 7 | def get_default_partitions(engine: ExecutionEngine) -> int: 8 | n = engine.conf.get( 9 | FUGUE_RAY_DEFAULT_PARTITIONS, engine.conf.get(FUGUE_CONF_DEFAULT_PARTITIONS, -1) 10 | ) 11 | return n if n >= 0 else engine.get_current_parallelism() * 2 12 | 13 | 14 | def get_default_shuffle_partitions(engine: ExecutionEngine) -> int: 15 | n = engine.conf.get(FUGUE_RAY_CONF_SHUFFLE_PARTITIONS, -1) 16 | return n if n >= 0 else get_default_partitions(engine) 17 | -------------------------------------------------------------------------------- /fugue_ray/registry.py: -------------------------------------------------------------------------------- 1 | from typing import Any 2 | 3 | import ray.data as rd 4 | from triad import run_at_def 5 | 6 | from fugue import DataFrame, register_execution_engine 7 | from fugue.dev import ( 8 | DataFrameParam, 9 | ExecutionEngineParam, 10 | fugue_annotated_param, 11 | is_pandas_or, 12 | ) 13 | from fugue.plugins import as_fugue_dataset, infer_execution_engine 14 | 15 | from .dataframe import RayDataFrame 16 | from .execution_engine import RayExecutionEngine 17 | from .tester import RayTestBackend # noqa: F401 # pylint: disable-all 18 | 19 | 20 | @infer_execution_engine.candidate( 21 | lambda objs: is_pandas_or(objs, (rd.Dataset, RayDataFrame)) 22 | ) 23 | def _infer_ray_client(objs: Any) -> Any: 24 | return "ray" 25 | 26 | 27 | @as_fugue_dataset.candidate(lambda df, **kwargs: isinstance(df, rd.Dataset)) 28 | def _ray_as_fugue_df(df: rd.Dataset, **kwargs: Any) -> RayDataFrame: 29 | return RayDataFrame(df, **kwargs) 30 | 31 | 32 | def _register_engines() -> None: 33 | register_execution_engine( 34 | "ray", lambda conf, **kwargs: RayExecutionEngine(conf=conf), on_dup="ignore" 35 | ) 36 | 37 | 38 | @fugue_annotated_param(RayExecutionEngine) 39 | class _RayExecutionEngineParam(ExecutionEngineParam): 40 | pass 41 | 42 | 43 | @fugue_annotated_param(rd.Dataset) 44 | class _RayDatasetParam(DataFrameParam): 45 | def to_input_data(self, df: DataFrame, ctx: Any) -> Any: 46 | assert isinstance(ctx, RayExecutionEngine) 47 | return ctx._to_ray_df(df).native 48 | 49 | def to_output_df(self, output: Any, schema: Any, ctx: Any) -> DataFrame: 50 | assert isinstance(output, rd.Dataset) 51 | assert isinstance(ctx, RayExecutionEngine) 52 | return RayDataFrame(output, schema=schema) 53 | 54 | def count(self, df: DataFrame) -> int: # pragma: no cover 55 | raise NotImplementedError("not allowed") 56 | 57 | 58 | @run_at_def 59 | def _register() -> None: 60 | """Register Ray Execution Engine""" 61 | _register_engines() 62 | -------------------------------------------------------------------------------- /fugue_ray/tester.py: -------------------------------------------------------------------------------- 1 | from contextlib import contextmanager 2 | from typing import Any, Dict, Iterator 3 | 4 | import ray 5 | 6 | import fugue.test as ft 7 | 8 | 9 | @ft.fugue_test_backend 10 | class RayTestBackend(ft.FugueTestBackend): 11 | name = "ray" 12 | default_session_conf = {"num_cpus": 2} 13 | default_fugue_conf = { 14 | "fugue.ray.zero_copy": True, 15 | "fugue.ray.default.batch_size": 10000, 16 | } 17 | 18 | @classmethod 19 | @contextmanager 20 | def session_context(cls, session_conf: Dict[str, Any]) -> Iterator[Any]: 21 | with ray.init(**session_conf): 22 | yield "ray" 23 | -------------------------------------------------------------------------------- /fugue_spark/__init__.py: -------------------------------------------------------------------------------- 1 | # flake8: noqa 2 | from fugue_version import __version__ 3 | 4 | from fugue_spark.dataframe import SparkDataFrame 5 | from fugue_spark.execution_engine import SparkExecutionEngine 6 | -------------------------------------------------------------------------------- /fugue_spark/_constants.py: -------------------------------------------------------------------------------- 1 | from typing import Dict, Any 2 | 3 | FUGUE_SPARK_CONF_USE_PANDAS_UDF = "fugue.spark.use_pandas_udf" 4 | 5 | FUGUE_SPARK_DEFAULT_CONF: Dict[str, Any] = {FUGUE_SPARK_CONF_USE_PANDAS_UDF: True} 6 | -------------------------------------------------------------------------------- /fugue_spark/_utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fugue-project/fugue/596d28eea1834ecc9087b864b83e1b67b9748977/fugue_spark/_utils/__init__.py -------------------------------------------------------------------------------- /fugue_spark/_utils/misc.py: -------------------------------------------------------------------------------- 1 | from typing import Any 2 | 3 | try: 4 | from pyspark.sql.connect.session import SparkSession as SparkConnectSession 5 | from pyspark.sql.connect.dataframe import DataFrame as SparkConnectDataFrame 6 | except Exception: # pragma: no cover 7 | SparkConnectSession = None 8 | SparkConnectDataFrame = None 9 | import pyspark.sql as ps 10 | 11 | 12 | def is_spark_connect(session: Any) -> bool: 13 | return SparkConnectSession is not None and isinstance( 14 | session, (SparkConnectSession, SparkConnectDataFrame) 15 | ) 16 | 17 | 18 | def is_spark_dataframe(df: Any) -> bool: 19 | return isinstance(df, ps.DataFrame) or ( 20 | SparkConnectDataFrame is not None and isinstance(df, SparkConnectDataFrame) 21 | ) 22 | 23 | 24 | def is_spark_session(session: Any) -> bool: 25 | return isinstance(session, ps.SparkSession) or ( 26 | SparkConnectSession is not None and isinstance(session, SparkConnectSession) 27 | ) 28 | -------------------------------------------------------------------------------- /fugue_spark/tester.py: -------------------------------------------------------------------------------- 1 | from contextlib import contextmanager 2 | from typing import Any, Dict, Iterator 3 | 4 | from pyspark.sql import SparkSession 5 | 6 | import fugue.test as ft 7 | 8 | from ._utils.misc import SparkConnectSession 9 | 10 | 11 | @ft.fugue_test_backend 12 | class SparkTestBackend(ft.FugueTestBackend): 13 | name = "spark" 14 | default_session_conf = { 15 | "spark.app.name": "fugue-test-spark", 16 | "spark.master": "local[*]", 17 | "spark.default.parallelism": 4, 18 | "spark.dynamicAllocation.enabled": "false", 19 | "spark.executor.cores": 4, 20 | "spark.executor.instances": 1, 21 | "spark.io.compression.codec": "lz4", 22 | "spark.rdd.compress": "false", 23 | "spark.sql.shuffle.partitions": 4, 24 | "spark.shuffle.compress": "false", 25 | "spark.sql.catalogImplementation": "in-memory", 26 | "spark.sql.execution.arrow.pyspark.enabled": True, 27 | "spark.sql.adaptive.enabled": False, 28 | } 29 | 30 | @classmethod 31 | def transform_session_conf(cls, conf: Dict[str, Any]) -> Dict[str, Any]: 32 | return ft.extract_conf(conf, "spark.", remove_prefix=False) 33 | 34 | @classmethod 35 | @contextmanager 36 | def session_context(cls, session_conf: Dict[str, Any]) -> Iterator[Any]: 37 | with _create_session(session_conf).getOrCreate() as spark: 38 | yield spark 39 | 40 | 41 | if SparkConnectSession is not None: 42 | 43 | @ft.fugue_test_backend 44 | class SparkConnectTestBackend(SparkTestBackend): 45 | name = "sparkconnect" 46 | default_session_conf = { 47 | "spark.default.parallelism": 4, 48 | "spark.sql.shuffle.partitions": 4, 49 | "spark.sql.execution.arrow.pyspark.enabled": True, 50 | "spark.sql.adaptive.enabled": False, 51 | } 52 | 53 | @classmethod 54 | def transform_session_conf( 55 | cls, conf: Dict[str, Any] 56 | ) -> Dict[str, Any]: # pragma: no cover 57 | # replace sparkconnect. with spark. 58 | return { 59 | "spark." + k: v 60 | for k, v in ft.extract_conf( 61 | conf, cls.name + ".", remove_prefix=True 62 | ).items() 63 | } 64 | 65 | @classmethod 66 | @contextmanager 67 | def session_context( 68 | cls, session_conf: Dict[str, Any] 69 | ) -> Iterator[Any]: # pragma: no cover 70 | spark = _create_session(session_conf).remote("sc://localhost").getOrCreate() 71 | yield spark 72 | 73 | 74 | def _create_session(conf: Dict[str, Any]) -> Any: 75 | sb = SparkSession.builder 76 | for k, v in conf.items(): 77 | sb = sb.config(k, v) 78 | return sb 79 | -------------------------------------------------------------------------------- /fugue_sql/__init__.py: -------------------------------------------------------------------------------- 1 | # flake8: noqa 2 | # TODO: This folder is to be deprecated 3 | from fugue_version import __version__ 4 | 5 | import warnings 6 | from fugue import FugueSQLWorkflow, fsql 7 | 8 | warnings.warn( 9 | "fsql and FugueSQLWorkflow now should be imported directly from fugue, " 10 | "fugue_sql will be removed in 0.9.0" 11 | ) 12 | -------------------------------------------------------------------------------- /fugue_sql/exceptions.py: -------------------------------------------------------------------------------- 1 | # pylint: disable-all 2 | # flake8: noqa 3 | # TODO: This folder is to be deprecated 4 | import warnings 5 | from fugue.exceptions import * 6 | 7 | warnings.warn( 8 | "fsql and FugueSQLWorkflow now should be imported directly from fugue, " 9 | "fugue_sql will be removed in 0.9.0" 10 | ) 11 | -------------------------------------------------------------------------------- /fugue_test/__init__.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Dict, Tuple 2 | 3 | import pyarrow as pa 4 | import pytest 5 | from triad.utils.pyarrow import to_pa_datatype 6 | 7 | _FUGUE_TEST_CONF_NAME = "fugue_test_conf" 8 | 9 | 10 | def pytest_addoption(parser: Any): # pragma: no cover 11 | parser.addini( 12 | _FUGUE_TEST_CONF_NAME, 13 | help="Configs for fugue testing execution engines", 14 | type="linelist", 15 | ) 16 | 17 | 18 | def pytest_configure(config: Any): 19 | from fugue.test.plugins import _set_global_conf 20 | 21 | options = config.getini(_FUGUE_TEST_CONF_NAME) 22 | conf: Dict[str, Any] = {} 23 | if options: 24 | for line in options: 25 | line = line.strip() 26 | if not line.startswith("#"): 27 | k, v = _parse_line(line) 28 | conf[k] = v 29 | _set_global_conf(conf) 30 | 31 | 32 | def pytest_report_header(config, start_path): 33 | from fugue.test.plugins import _get_all_ini_conf 34 | 35 | header_lines = [] 36 | header_lines.append("Fugue tests will be initialized with options:") 37 | for k, v in _get_all_ini_conf().items(): 38 | header_lines.append(f"\t{k} = {v}") 39 | return "\n".join(header_lines) 40 | 41 | 42 | def _parse_line(line: str) -> Tuple[str, Any]: 43 | try: 44 | kv = line.split("=", 1) 45 | if len(kv) == 1: 46 | raise ValueError() 47 | kt = kv[0].split(":", 1) 48 | if len(kt) == 1: 49 | tp = pa.string() 50 | else: 51 | tp = to_pa_datatype(kt[1].strip()) 52 | key = kt[0].strip() 53 | if key == "": 54 | raise ValueError() 55 | value = pa.compute.cast([kv[1].strip()], tp).to_pylist()[0] 56 | return key, value 57 | except Exception: 58 | raise ValueError( 59 | f"Invalid config line: {line}, it must be in format: key[:type]=value" 60 | ) 61 | 62 | 63 | @pytest.fixture(scope="class") 64 | def backend_context(request: Any): 65 | from fugue.test.plugins import _make_backend_context, _parse_backend 66 | 67 | c, _ = _parse_backend(request.param) 68 | session = request.getfixturevalue(c + "_session") 69 | with _make_backend_context(request.param, session) as ctx: 70 | yield ctx 71 | 72 | 73 | @pytest.fixture(scope="class") 74 | def _class_backend_context(request, backend_context): 75 | from fugue.test.plugins import FugueTestContext 76 | 77 | request.cls._test_context = FugueTestContext( 78 | engine=backend_context.engine, 79 | session=backend_context.session, 80 | name=backend_context.name, 81 | ) 82 | yield 83 | -------------------------------------------------------------------------------- /fugue_test/bag_suite.py: -------------------------------------------------------------------------------- 1 | # pylint: disable-all 2 | # flake8: noqa 3 | 4 | from datetime import date, datetime 5 | from typing import Any 6 | from unittest import TestCase 7 | import copy 8 | import numpy as np 9 | import pandas as pd 10 | from fugue.bag import Bag, LocalBag 11 | from fugue.exceptions import FugueDataFrameOperationError, FugueDatasetEmptyError 12 | from pytest import raises 13 | from triad.collections.schema import Schema 14 | 15 | 16 | class BagTests(object): 17 | """DataFrame level general test suite. 18 | All new DataFrame types should pass this test suite. 19 | """ 20 | 21 | class Tests(TestCase): 22 | @classmethod 23 | def setUpClass(cls): 24 | pass 25 | 26 | @classmethod 27 | def tearDownClass(cls): 28 | pass 29 | 30 | def bg(self, data: Any = None) -> Bag: # pragma: no cover 31 | raise NotImplementedError 32 | 33 | def test_init_basic(self): 34 | raises(Exception, lambda: self.bg()) 35 | bg = self.bg([]) 36 | assert bg.empty 37 | assert copy.copy(bg) is bg 38 | assert copy.deepcopy(bg) is bg 39 | 40 | def test_peek(self): 41 | bg = self.bg([]) 42 | raises(FugueDatasetEmptyError, lambda: bg.peek()) 43 | 44 | bg = self.bg(["x"]) 45 | assert not bg.is_bounded or 1 == bg.count() 46 | assert not bg.empty 47 | assert "x" == bg.peek() 48 | 49 | def test_as_array(self): 50 | bg = self.bg([2, 1, "a"]) 51 | assert set([1, 2, "a"]) == set(bg.as_array()) 52 | 53 | def test_as_array_special_values(self): 54 | bg = self.bg([2, None, "a"]) 55 | assert set([None, 2, "a"]) == set(bg.as_array()) 56 | 57 | bg = self.bg([np.float16(0.1)]) 58 | assert set([np.float16(0.1)]) == set(bg.as_array()) 59 | 60 | def test_head(self): 61 | bg = self.bg([]) 62 | assert [] == bg.head(0).as_array() 63 | assert [] == bg.head(1).as_array() 64 | bg = self.bg([["a", 1]]) 65 | if bg.is_bounded: 66 | assert [["a", 1]] == bg.head(1).as_array() 67 | assert [] == bg.head(0).as_array() 68 | 69 | bg = self.bg([1, 2, 3, 4]) 70 | assert 2 == bg.head(2).count() 71 | bg = self.bg([1, 2, 3, 4]) 72 | assert 4 == bg.head(10).count() 73 | h = bg.head(10) 74 | assert h.is_local and h.is_bounded 75 | 76 | def test_show(self): 77 | bg = self.bg(["a", 1]) 78 | bg.show() 79 | bg.show(n=0) 80 | bg.show(n=1) 81 | bg.show(n=2) 82 | bg.show(title="title") 83 | bg.metadata["m"] = 1 84 | bg.show() 85 | -------------------------------------------------------------------------------- /fugue_test/fixtures.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | _DEFAULT_SCOPE = "module" 4 | 5 | 6 | @pytest.fixture(scope=_DEFAULT_SCOPE) 7 | def pandas_session(): 8 | yield "pandas" 9 | 10 | 11 | @pytest.fixture(scope=_DEFAULT_SCOPE) 12 | def native_session(): 13 | yield "native" 14 | 15 | 16 | @pytest.fixture(scope=_DEFAULT_SCOPE) 17 | def dask_session(): 18 | from fugue_dask.tester import DaskTestBackend 19 | 20 | with DaskTestBackend.generate_session_fixture() as session: 21 | yield session 22 | 23 | 24 | @pytest.fixture(scope=_DEFAULT_SCOPE) 25 | def duckdb_session(): 26 | from fugue_duckdb.tester import DuckDBTestBackend 27 | 28 | with DuckDBTestBackend.generate_session_fixture() as session: 29 | yield session 30 | 31 | 32 | @pytest.fixture(scope=_DEFAULT_SCOPE) 33 | def duckdask_session(): 34 | from fugue_duckdb.tester import DuckDaskTestBackend 35 | 36 | with DuckDaskTestBackend.generate_session_fixture() as session: 37 | yield session 38 | 39 | 40 | @pytest.fixture(scope=_DEFAULT_SCOPE) 41 | def ray_session(): 42 | from fugue_ray.tester import RayTestBackend 43 | 44 | with RayTestBackend.generate_session_fixture() as session: 45 | yield session 46 | 47 | 48 | @pytest.fixture(scope=_DEFAULT_SCOPE) 49 | def spark_session(): 50 | from fugue_spark.tester import SparkTestBackend 51 | 52 | with SparkTestBackend.generate_session_fixture() as session: 53 | yield session 54 | 55 | 56 | @pytest.fixture(scope=_DEFAULT_SCOPE) 57 | def sparkconnect_session(): 58 | from fugue_spark.tester import SparkConnectTestBackend 59 | 60 | with SparkConnectTestBackend.generate_session_fixture() as session: 61 | yield session 62 | -------------------------------------------------------------------------------- /fugue_version/__init__.py: -------------------------------------------------------------------------------- 1 | __version__ = "0.9.2" 2 | -------------------------------------------------------------------------------- /images/architecture.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fugue-project/fugue/596d28eea1834ecc9087b864b83e1b67b9748977/images/architecture.png -------------------------------------------------------------------------------- /images/extensions.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fugue-project/fugue/596d28eea1834ecc9087b864b83e1b67b9748977/images/extensions.png -------------------------------------------------------------------------------- /images/logo.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 7 | 8 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | .[all] 2 | 3 | furo 4 | 5 | # test requirements 6 | pre-commit 7 | black>=22.3.0 8 | mypy 9 | flake8 10 | autopep8 11 | pylint==2.16.2 12 | pytest 13 | pytest-cov 14 | pytest-mock 15 | pytest-rerunfailures==10.2 16 | sphinx>=2.4.0 17 | sphinx-rtd-theme 18 | sphinx-autodoc-typehints 19 | flask 20 | psutil 21 | matplotlib 22 | seaborn 23 | 24 | notebook<7 25 | jupyter_contrib_nbextensions 26 | 27 | s3fs 28 | 29 | pyspark[connect] 30 | duckdb-engine>=0.6.4 31 | sqlalchemy==2.0.10 # 2.0.11 has a bug 32 | ray[data]>=2.5.0 33 | pydantic<2.5 # 2.5.0+ doesn't work with ray 2.8 34 | # pyarrow==7.0.0 35 | dask-sql 36 | 37 | # publish to pypi 38 | wheel 39 | twine 40 | -------------------------------------------------------------------------------- /scripts/setupsparkconnect.sh: -------------------------------------------------------------------------------- 1 | wget https://dlcdn.apache.org/spark/spark-3.5.5/spark-3.5.5-bin-hadoop3.tgz -O - | tar -xz -C /tmp 2 | # export SPARK_NO_DAEMONIZE=1 3 | bash /tmp/spark-3.5.5-bin-hadoop3/sbin/start-connect-server.sh --jars https://repo1.maven.org/maven2/org/apache/spark/spark-connect_2.12/3.5.5/spark-connect_2.12-3.5.5.jar 4 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [metadata] 2 | description-file = README.md 3 | license_files = LICENSE 4 | 5 | [testenv] 6 | setenv = 7 | COV_CORE_SOURCE= 8 | COV_CORE_CONFIG=.coveragerc 9 | COV_CORE_DATAFILE=.coverage 10 | 11 | [tool:pytest] 12 | addopts = 13 | -p pytest_cov 14 | --cov=fugue 15 | --cov=fugue_test 16 | --cov=fugue_spark 17 | --cov=fugue_dask 18 | --cov=fugue_ray 19 | --cov=fugue_duckdb 20 | --cov=fugue_ibis 21 | --cov=fugue_polars 22 | --ignore=tests/fugue_spark/test_spark_connect.py 23 | --cov-report=term-missing:skip-covered 24 | -vvv 25 | spark_options = 26 | spark.master: local[*] 27 | spark.sql.catalogImplementation: in-memory 28 | spark.sql.shuffle.partitions: 4 29 | spark.default.parallelism: 4 30 | spark.executor.cores: 4 31 | spark.sql.execution.arrow.pyspark.enabled: true 32 | spark.sql.adaptive.enabled: false 33 | fugue_test_conf = 34 | # don't move for testing purpose 35 | fugue.test.dummy=dummy 36 | fugue.test:bool=true 37 | # ray settings 38 | ray.num_cpus:int=2 39 | # dask settings 40 | dask.processes:bool=true 41 | dask.n_workers:int=3 42 | dask.threads_per_worker:int=1 43 | 44 | 45 | 46 | [coverage:run] 47 | omit = 48 | fugue_sql/_antlr/* 49 | fugue_test/plugins/* 50 | fugue_test/fixtures.py 51 | fugue_test/__init__.py 52 | 53 | [flake8] 54 | ignore = E24,E203,W503,C401,C408,C420,A001,A003,A005,W504,C407,C405,B023,B028 55 | max-line-length = 88 56 | format = pylint 57 | exclude = .svc,CVS,.bzr,.hg,.git,__pycache__,venv,tests/*,docs/* 58 | max-complexity = 10 59 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- 1 | # pylint: disable-all 2 | -------------------------------------------------------------------------------- /tests/fugue/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fugue-project/fugue/596d28eea1834ecc9087b864b83e1b67b9748977/tests/fugue/__init__.py -------------------------------------------------------------------------------- /tests/fugue/bag/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fugue-project/fugue/596d28eea1834ecc9087b864b83e1b67b9748977/tests/fugue/bag/__init__.py -------------------------------------------------------------------------------- /tests/fugue/bag/test_array_bag.py: -------------------------------------------------------------------------------- 1 | from typing import Any 2 | 3 | from fugue import ArrayBag, Bag 4 | from fugue_test.bag_suite import BagTests 5 | 6 | 7 | class ArrayBagTests(BagTests.Tests): 8 | def bg(self, data: Any = None) -> Bag: 9 | return ArrayBag(data) 10 | 11 | def test_array_bag_init(self): 12 | def _it(): 13 | yield from [1, 2, 3] 14 | 15 | bg = self.bg([]) 16 | assert bg.count() == 0 17 | assert bg.is_local 18 | assert bg.is_bounded 19 | assert bg.as_local() is bg 20 | assert bg.empty 21 | assert bg.native == [] 22 | 23 | for x in [[1, 2, 3], _it(), set([1, 2, 3])]: 24 | bg = self.bg(x) 25 | assert bg.count() == 3 26 | assert bg.is_local 27 | assert bg.is_bounded 28 | assert bg.as_local() is bg 29 | assert not bg.empty 30 | assert 1 == bg.num_partitions 31 | assert isinstance(bg.native, list) 32 | 33 | bg = self.bg(x + 1 for x in []) 34 | assert bg.count() == 0 35 | bg = self.bg(x + 1 for x in [1, 2, 3]) 36 | assert bg.count() == 3 37 | -------------------------------------------------------------------------------- /tests/fugue/collections/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fugue-project/fugue/596d28eea1834ecc9087b864b83e1b67b9748977/tests/fugue/collections/__init__.py -------------------------------------------------------------------------------- /tests/fugue/column/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fugue-project/fugue/596d28eea1834ecc9087b864b83e1b67b9748977/tests/fugue/column/__init__.py -------------------------------------------------------------------------------- /tests/fugue/column/test_functions.py: -------------------------------------------------------------------------------- 1 | import pyarrow as pa 2 | from pytest import raises 3 | from triad import Schema 4 | 5 | import fugue.column.functions as f 6 | from fugue.column import all_cols, col, lit, null 7 | 8 | 9 | def test_is_agg(): 10 | assert f.is_agg(f.first(col("a"))) 11 | assert f.is_agg(f.count_distinct(col("a")).alias("x")) 12 | assert f.is_agg(f.first(col("a") + 1)) 13 | assert f.is_agg(f.first(col("a")) + 1) 14 | assert f.is_agg((f.first(col("a")) < 1).alias("x")) 15 | assert f.is_agg(col("a") * f.first(col("a")) + 1) 16 | 17 | assert not f.is_agg(col("a")) 18 | assert not f.is_agg(lit("a")) 19 | assert not f.is_agg(col("a") + col("b")) 20 | assert not f.is_agg(null()) 21 | 22 | 23 | def test_functions(): 24 | schema = Schema("a:int,b:str,c:bool,d:double") 25 | 26 | expr = f.coalesce(col("a"), 1, None, col("b") + col("c")) 27 | assert "COALESCE(a,1,NULL,+(b,c))" == str(expr) 28 | assert expr.infer_type(schema) is None 29 | 30 | expr = f.min(col("a")) 31 | assert "MIN(a)" == str(expr) 32 | assert pa.int32() == expr.infer_type(schema) 33 | assert "MIN(a) AS a" == str(expr.infer_alias()) 34 | assert "CAST(MIN(a) AS long) AS a" == str(expr.cast(int).infer_alias()) 35 | assert "MIN(a) AS b" == str(expr.alias("b").infer_alias()) 36 | 37 | assert "MIN(-(a)) AS a" == str(f.min(-col("a")).infer_alias()) 38 | 39 | expr = f.min(lit(1.1)) 40 | assert "MIN(1.1)" == str(expr) 41 | assert pa.float64() == expr.infer_type(schema) 42 | 43 | expr = f.max(col("a")) 44 | assert "MAX(a)" == str(expr) 45 | assert pa.int32() == expr.infer_type(schema) 46 | 47 | expr = f.max(lit(1.1)) 48 | assert "MAX(1.1)" == str(expr) 49 | assert pa.float64() == expr.infer_type(schema) 50 | 51 | expr = f.first(col("a")) 52 | assert "FIRST(a)" == str(expr) 53 | assert pa.int32() == expr.infer_type(schema) 54 | 55 | expr = f.first(lit(1.1)) 56 | assert "FIRST(1.1)" == str(expr) 57 | assert pa.float64() == expr.infer_type(schema) 58 | 59 | expr = f.last(col("a")) 60 | assert "LAST(a)" == str(expr) 61 | assert pa.int32() == expr.infer_type(schema) 62 | 63 | expr = f.last(lit(1.1)) 64 | assert "LAST(1.1)" == str(expr) 65 | assert pa.float64() == expr.infer_type(schema) 66 | 67 | expr = f.avg(col("a")) 68 | assert "AVG(a)" == str(expr) 69 | assert expr.infer_type(schema) is None 70 | 71 | expr = f.sum(col("a")) 72 | assert "SUM(a)" == str(expr) 73 | assert expr.infer_type(schema) is None 74 | 75 | expr = f.count(col("a")) 76 | assert "COUNT(a)" == str(expr) 77 | assert expr.infer_type(schema) is None 78 | 79 | expr = f.count_distinct(col("a")) 80 | assert "COUNT(DISTINCT a)" == str(expr) 81 | assert expr.infer_type(schema) is None 82 | assert "COUNT(DISTINCT a) AS a" == str(expr.infer_alias()) 83 | 84 | expr = f.count_distinct(all_cols()) 85 | assert "COUNT(DISTINCT *)" == str(expr) 86 | assert expr.infer_type(schema) is None 87 | raises(NotImplementedError, lambda: expr.infer_alias()) 88 | -------------------------------------------------------------------------------- /tests/fugue/dataframe/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /tests/fugue/dataframe/test_arrow_dataframe.py: -------------------------------------------------------------------------------- 1 | from typing import Any 2 | 3 | import pandas as pd 4 | import pyarrow as pa 5 | from pytest import raises 6 | 7 | import fugue.api as fa 8 | import fugue.test as ft 9 | from fugue.dataframe import ArrowDataFrame 10 | from fugue_test.dataframe_suite import DataFrameTests 11 | 12 | 13 | @ft.fugue_test_suite("native", mark_test=True) 14 | class ArrowDataFrameTests(DataFrameTests.Tests): 15 | def df(self, data: Any = None, schema: Any = None) -> ArrowDataFrame: 16 | return ArrowDataFrame(data, schema) 17 | 18 | 19 | @ft.fugue_test_suite("native", mark_test=True) 20 | class NativeArrowDataFrameTests(DataFrameTests.NativeTests): 21 | def df(self, data: Any = None, schema: Any = None) -> pd.DataFrame: 22 | return ArrowDataFrame(data, schema).as_arrow() 23 | 24 | def to_native_df(self, pdf: pd.DataFrame) -> Any: # pragma: no cover 25 | return pa.Table.from_pandas(pdf) 26 | 27 | def test_num_partitions(self): 28 | assert fa.get_num_partitions(self.df([[0, 1]], "a:int,b:int")) == 1 29 | 30 | 31 | def test_init(): 32 | df = ArrowDataFrame(schema="a:str,b:int") 33 | assert df.empty 34 | assert df.schema == "a:str,b:int" 35 | assert df.is_bounded 36 | 37 | df = ArrowDataFrame(pd.DataFrame([], columns=["a", "b"]), schema="a:str,b:int") 38 | assert df.empty 39 | assert df.schema == "a:str,b:int" 40 | assert df.is_bounded 41 | 42 | data = [["a", "1"], ["b", "2"]] 43 | df = ArrowDataFrame(data, "a:str,b:str") 44 | assert [["a", "1"], ["b", "2"]] == df.as_array(type_safe=True) 45 | data = [["a", 1], ["b", 2]] 46 | df = ArrowDataFrame(data, "a:str,b:int") 47 | assert [["a", 1.0], ["b", 2.0]] == df.as_array(type_safe=True) 48 | df = ArrowDataFrame(data, "a:str,b:double") 49 | assert [["a", 1.0], ["b", 2.0]] == df.as_array(type_safe=True) 50 | 51 | ddf = ArrowDataFrame(df.native) 52 | assert [["a", 1.0], ["b", 2.0]] == ddf.as_array(type_safe=True) 53 | 54 | df = ArrowDataFrame(df.as_pandas(), "a:str,b:double") 55 | assert [["a", 1.0], ["b", 2.0]] == df.as_array(type_safe=True) 56 | df = ArrowDataFrame(df.as_pandas()["b"]) 57 | assert [[1.0], [2.0]] == df.as_array(type_safe=True) 58 | 59 | df = ArrowDataFrame([], "x:str,y:double") 60 | assert df.empty 61 | assert df.is_local 62 | assert df.is_bounded 63 | 64 | raises(Exception, lambda: ArrowDataFrame(123)) 65 | -------------------------------------------------------------------------------- /tests/fugue/dataframe/test_dataframe.py: -------------------------------------------------------------------------------- 1 | import copy 2 | 3 | import pandas as pd 4 | from pytest import raises 5 | from triad.collections.schema import Schema 6 | 7 | from fugue.dataframe import ArrayDataFrame, DataFrame 8 | from fugue.api import as_fugue_df, get_native_as_df 9 | from fugue.bag.array_bag import ArrayBag 10 | 11 | 12 | def test_as_fugue_df(): 13 | with raises(NotImplementedError): 14 | as_fugue_df(10) 15 | with raises(TypeError): 16 | as_fugue_df(ArrayBag([1, 2])) 17 | df = pd.DataFrame([[0]], columns=["a"]) 18 | assert isinstance(as_fugue_df(df), DataFrame) 19 | 20 | 21 | def test_get_native_as_df(): 22 | with raises(NotImplementedError): 23 | get_native_as_df(10) 24 | # other tests are in the suites 25 | 26 | 27 | def test_show(): 28 | df = ArrayDataFrame(schema="a:str,b:str") 29 | df.show() 30 | 31 | assert repr(df) == df._repr_html_() 32 | 33 | s = " ".join(["x"] * 2) 34 | df = ArrayDataFrame([[s, 1], ["b", 2]], "a:str,b:str") 35 | df.show() 36 | 37 | s = " ".join(["x"] * 200) 38 | df = ArrayDataFrame([[s, 1], ["b", 2]], "a:str,b:str") 39 | df.show() 40 | 41 | s = " ".join(["x"] * 200) 42 | df = ArrayDataFrame([[s, 1], ["b", s]], "a:str,b:str") 43 | df.show() 44 | 45 | s = "".join(["x"] * 2000) 46 | df = ArrayDataFrame([[s, 1], ["b", None]], "a:str,b:str") 47 | df.show() 48 | 49 | s = " ".join(["x"] * 20) 50 | schema = [f"a{x}:str" for x in range(20)] 51 | data = [[f"aasdfjasdfka;sdf{x}:str" for x in range(20)]] 52 | df = ArrayDataFrame(data, schema) 53 | df.show() 54 | 55 | s = " ".join(["x"] * 200) 56 | df = ArrayDataFrame([[s, 1], ["b", "s"]], "a:str,b:str") 57 | df.show(n=1, with_count=True, title="abc") 58 | 59 | 60 | def test_lazy_schema(): 61 | df = MockDF([["a", 1], ["b", 2]], "a:str,b:str") 62 | assert callable(df._schema) 63 | assert df.schema == "a:str,b:str" 64 | 65 | 66 | def test_get_info_str(): 67 | df = ArrayDataFrame([["a", 1], ["b", 2]], "a:str,b:str") 68 | assert '{"schema": "a:str,b:str", "type": ' 69 | '"tests.collections.dataframe.test_dataframe.MockDF", "metadata": {}}' == df.get_info_str() 70 | 71 | 72 | def test_copy(): 73 | df = ArrayDataFrame([["a", 1], ["b", 2]], "a:str,b:str") 74 | assert copy.copy(df) is df 75 | assert copy.deepcopy(df) is df 76 | 77 | 78 | class MockDF(ArrayDataFrame): 79 | def __init__(self, df=None, schema=None): 80 | super().__init__(df=df, schema=schema) 81 | DataFrame.__init__(self, lambda: Schema(schema)) 82 | -------------------------------------------------------------------------------- /tests/fugue/dataframe/test_dataframes.py: -------------------------------------------------------------------------------- 1 | from fugue.dataframe import DataFrames 2 | from fugue.dataframe.array_dataframe import ArrayDataFrame 3 | from fugue.dataframe.pandas_dataframe import PandasDataFrame 4 | from pytest import raises 5 | from triad.exceptions import InvalidOperationError 6 | 7 | 8 | def test_dataframes(): 9 | df1 = ArrayDataFrame([[0]], "a:int") 10 | df2 = ArrayDataFrame([[1]], "a:int") 11 | dfs = DataFrames(a=df1, b=df2) 12 | assert dfs[0] is df1 13 | assert dfs[1] is df2 14 | 15 | dfs = DataFrames([df1, df2], df1) 16 | assert not dfs.has_key 17 | assert dfs[0] is df1 18 | assert dfs[1] is df2 19 | assert dfs[2] is df1 20 | 21 | dfs2 = DataFrames(dfs, dfs, df2) 22 | assert not dfs2.has_key 23 | assert dfs2[0] is df1 24 | assert dfs2[1] is df2 25 | assert dfs2[2] is df1 26 | assert dfs2[3] is df1 27 | assert dfs2[4] is df2 28 | assert dfs2[5] is df1 29 | assert dfs2[6] is df2 30 | 31 | dfs = DataFrames([("a", df1), ("b", df2)]) 32 | assert dfs.has_key 33 | assert dfs[0] is df1 34 | assert dfs[1] is df2 35 | assert dfs["a"] is df1 36 | assert dfs["b"] is df2 37 | 38 | with raises(ValueError): 39 | dfs["c"] = 1 40 | 41 | with raises(ValueError): 42 | dfs2 = DataFrames(1) 43 | 44 | with raises(ValueError): 45 | dfs2 = DataFrames(a=df1, b=2) 46 | 47 | with raises(InvalidOperationError): 48 | dfs2 = DataFrames(dict(a=df1), df2) 49 | 50 | with raises(InvalidOperationError): 51 | dfs2 = DataFrames(df2, dict(a=df1)) 52 | 53 | with raises(InvalidOperationError): 54 | dfs2 = DataFrames(df1, a=df2) 55 | 56 | with raises(InvalidOperationError): 57 | dfs2 = DataFrames(DataFrames(df1, df2), x=df2) 58 | 59 | dfs2 = DataFrames(dfs) 60 | assert dfs2.has_key 61 | assert dfs2[0] is df1 62 | assert dfs2[1] is df2 63 | 64 | dfs1 = DataFrames(a=df1, b=df2) 65 | dfs2 = dfs1.convert(lambda x: PandasDataFrame(x.as_array(), x.schema)) 66 | assert len(dfs1) == len(dfs2) 67 | assert dfs2.has_key 68 | assert isinstance(dfs2["a"], PandasDataFrame) 69 | assert isinstance(dfs2["b"], PandasDataFrame) 70 | 71 | dfs1 = DataFrames(df1, df2) 72 | dfs2 = dfs1.convert(lambda x: PandasDataFrame(x.as_array(), x.schema)) 73 | assert len(dfs1) == len(dfs2) 74 | assert not dfs2.has_key 75 | assert isinstance(dfs2[0], PandasDataFrame) 76 | assert isinstance(dfs2[1], PandasDataFrame) -------------------------------------------------------------------------------- /tests/fugue/execution/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /tests/fugue/execution/test_api.py: -------------------------------------------------------------------------------- 1 | from pytest import raises 2 | 3 | import fugue.api as fa 4 | from fugue import NativeExecutionEngine, register_global_conf 5 | from fugue.exceptions import FugueInvalidOperation 6 | 7 | 8 | class MyEngine(NativeExecutionEngine): 9 | def __init__(self, conf=None): 10 | super().__init__(conf) 11 | self.pre_enter_state = [] 12 | self.post_exit_state = [] 13 | self.stop_calls = 0 14 | 15 | def on_enter_context(self) -> None: 16 | self.pre_enter_state += [self.in_context] 17 | 18 | def on_exit_context(self) -> None: 19 | self.post_exit_state += [self.in_context] 20 | 21 | def stop_engine(self) -> None: 22 | self.stop_calls += 1 23 | 24 | 25 | def test_engine_operations(): 26 | o = MyEngine() 27 | assert fa.get_current_conf().get("fugue.x", 0) == 0 28 | register_global_conf({"fugue.x": 1}) 29 | assert fa.get_current_conf().get("fugue.x", 0) == 1 30 | e = fa.set_global_engine(o, {"fugue.x": 2}) 31 | assert e.pre_enter_state == [False] 32 | assert e.post_exit_state == [] 33 | assert fa.get_current_conf().get("fugue.x", 0) == 2 34 | assert isinstance(e, NativeExecutionEngine) 35 | assert e.in_context and e.is_global 36 | assert fa.get_context_engine() is e 37 | with fa.engine_context("duckdb", {"fugue.x": 3}) as e2: 38 | assert fa.get_current_conf().get("fugue.x", 0) == 3 39 | assert fa.get_context_engine() is e2 40 | assert not e2.is_global and e2.in_context 41 | with e.as_context(): 42 | assert e.pre_enter_state == [False, True] 43 | assert e.post_exit_state == [] 44 | assert fa.get_current_conf().get("fugue.x", 0) == 2 45 | assert not e2.is_global and e2.in_context 46 | assert e.in_context and e.is_global 47 | assert fa.get_context_engine() is e 48 | assert e.stop_calls == 0 49 | assert e.pre_enter_state == [False, True] 50 | assert e.post_exit_state == [True] 51 | assert fa.get_current_conf().get("fugue.x", 0) == 3 52 | assert e.in_context and e.is_global 53 | assert fa.get_context_engine() is e2 54 | assert e.stop_calls == 0 55 | assert e.pre_enter_state == [False, True] 56 | assert e.post_exit_state == [True] 57 | assert fa.get_current_conf().get("fugue.x", 0) == 2 58 | assert not e2.is_global and not e2.in_context 59 | assert e.in_context and e.is_global 60 | e3 = fa.set_global_engine("duckdb", {"fugue.x": 4}) 61 | assert e.stop_calls == 1 62 | assert e.pre_enter_state == [False, True] 63 | assert e.post_exit_state == [True, False] 64 | assert fa.get_current_conf().get("fugue.x", 0) == 4 65 | assert not e.in_context and not e.is_global 66 | assert e3.in_context and e3.is_global 67 | fa.clear_global_engine() 68 | assert not e3.in_context and not e3.is_global 69 | assert fa.get_current_conf().get("fugue.x", 0) == 1 70 | raises(FugueInvalidOperation, lambda: fa.get_context_engine()) 71 | -------------------------------------------------------------------------------- /tests/fugue/execution/test_execution_engine.py: -------------------------------------------------------------------------------- 1 | from typing import Type 2 | 3 | from pytest import raises 4 | from triad.collections.dict import ParamDict 5 | from triad.utils.convert import get_full_type_path 6 | 7 | from fugue import ExecutionEngine, NativeExecutionEngine, register_global_conf 8 | from fugue.constants import FUGUE_CONF_SQL_IGNORE_CASE 9 | from fugue.rpc.base import NativeRPCServer 10 | from fugue_duckdb import DuckDBEngine 11 | 12 | 13 | class _MockSQLEngine(DuckDBEngine): 14 | @property 15 | def execution_engine_constraint(self) -> Type[ExecutionEngine]: 16 | return _MockExecutionEngine 17 | 18 | 19 | class _MockExecutionEngine(NativeExecutionEngine): 20 | def __init__(self, conf=None): 21 | super().__init__(conf=conf) 22 | self._stop = 0 23 | 24 | def stop_engine(self): 25 | self._stop += 1 26 | 27 | def create_default_sql_engine(self): 28 | return _MockSQLEngine(self) 29 | 30 | 31 | class _MockRPC(NativeRPCServer): 32 | _start = 0 33 | _stop = 0 34 | 35 | def __init__(self, conf): 36 | super().__init__(conf) 37 | _MockRPC._start = 0 38 | _MockRPC._stop = 0 39 | 40 | def start_handler(self): 41 | _MockRPC._start += 1 42 | 43 | def stop_handler(self): 44 | _MockRPC._stop += 1 45 | 46 | 47 | def test_sql_engine_init(): 48 | engine = _MockExecutionEngine() 49 | assert isinstance(engine.sql_engine, _MockSQLEngine) 50 | 51 | with raises(TypeError): 52 | _MockSQLEngine(NativeExecutionEngine()) 53 | 54 | 55 | def test_start_stop(): 56 | conf = {"fugue.rpc.server": get_full_type_path(_MockRPC)} 57 | engine = _MockExecutionEngine(conf=conf) 58 | engine.stop() 59 | assert 1 == engine._stop 60 | engine.stop() # stop will be called only once 61 | assert 1 == engine._stop 62 | 63 | 64 | def test_global_conf(): 65 | register_global_conf({"ftest.a": 1}) 66 | engine = _MockExecutionEngine() 67 | assert 1 == engine.conf.get_or_throw("ftest.a", int) 68 | engine = _MockExecutionEngine({"ftest.a": 2}) 69 | assert 2 == engine.conf.get_or_throw("ftest.a", int) 70 | assert not engine.conf.get_or_throw(FUGUE_CONF_SQL_IGNORE_CASE, bool) 71 | 72 | # with duplicated value but it's the same as existing ones 73 | register_global_conf({"ftest.a": 1, "ftest.b": 2}, on_dup=ParamDict.THROW) 74 | engine = _MockExecutionEngine() 75 | assert 1 == engine.conf.get_or_throw("ftest.a", int) 76 | assert 2 == engine.conf.get_or_throw("ftest.b", int) 77 | 78 | # transactional, of one value has problem, the whole conf will not be added 79 | with raises(ValueError): 80 | register_global_conf({"ftest.a": 2, "ftest.c": 3}, on_dup=ParamDict.THROW) 81 | assert 1 == engine.conf.get_or_throw("ftest.a", int) 82 | assert 2 == engine.conf.get_or_throw("ftest.b", int) 83 | assert "ftest.c" not in engine.conf 84 | -------------------------------------------------------------------------------- /tests/fugue/extensions/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fugue-project/fugue/596d28eea1834ecc9087b864b83e1b67b9748977/tests/fugue/extensions/__init__.py -------------------------------------------------------------------------------- /tests/fugue/extensions/creator/__init__.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Dict, Iterable, List 2 | 3 | from fugue.dataframe import ArrayDataFrame 4 | from fugue.exceptions import FugueInterfacelessError 5 | from fugue.extensions.transformer import Transformer, _to_transformer, transformer 6 | from pytest import raises 7 | from triad.collections.schema import Schema 8 | 9 | 10 | def test_transformer(): 11 | assert isinstance(t1, Transformer) 12 | df = ArrayDataFrame([[0]], "a:int") 13 | t1._output_schema = t1.get_output_schema(df) 14 | assert t1.output_schema == "a:int,b:int" 15 | t2._output_schema = t2.get_output_schema(df) 16 | assert t2.output_schema == "b:int,a:int" 17 | assert [[0, 1]] == list(t3(df.as_array_iterable())) 18 | 19 | 20 | def test__to_transformer(): 21 | a = _to_transformer(t1, None) 22 | assert isinstance(a, Transformer) 23 | a._x = 1 24 | # every parse should produce a different transformer even the input is 25 | # a transformer instance 26 | b = _to_transformer(t1, None) 27 | assert isinstance(b, Transformer) 28 | assert "_x" not in b.__dict__ 29 | c = _to_transformer("t1", None) 30 | assert isinstance(c, Transformer) 31 | assert "_x" not in c.__dict__ 32 | c._x = 1 33 | d = _to_transformer("t1", None) 34 | assert isinstance(d, Transformer) 35 | assert "_x" not in d.__dict__ 36 | raises(FugueInterfacelessError, lambda: _to_transformer(t4, None)) 37 | raises(FugueInterfacelessError, lambda: _to_transformer("t4", None)) 38 | e = _to_transformer("t4", "*,b:int") 39 | assert isinstance(e, Transformer) 40 | 41 | 42 | @transformer(["*", None, "b:int"]) 43 | def t1(df: Iterable[Dict[str, Any]]) -> Iterable[Dict[str, Any]]: 44 | for r in df: 45 | r["b"] = 1 46 | yield r 47 | 48 | 49 | @transformer([Schema("b:int"), "*"]) 50 | def t2(df: Iterable[Dict[str, Any]]) -> Iterable[Dict[str, Any]]: 51 | for r in df: 52 | r["b"] = 1 53 | yield r 54 | 55 | 56 | @transformer("*, b:int") 57 | def t3(df: Iterable[List[Any]]) -> Iterable[List[Any]]: 58 | for r in df: 59 | r += [1] 60 | yield r 61 | 62 | 63 | def t4(df: Iterable[List[Any]]) -> Iterable[List[Any]]: 64 | for r in df: 65 | r += [1] 66 | yield r 67 | -------------------------------------------------------------------------------- /tests/fugue/extensions/outputter/__init__.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Dict, Iterable, List 2 | 3 | from fugue.dataframe import ArrayDataFrame 4 | from fugue.exceptions import FugueInterfacelessError 5 | from fugue.extensions.transformer import Transformer, _to_transformer, transformer 6 | from pytest import raises 7 | from triad.collections.schema import Schema 8 | 9 | 10 | def test_transformer(): 11 | assert isinstance(t1, Transformer) 12 | df = ArrayDataFrame([[0]], "a:int") 13 | t1._output_schema = t1.get_output_schema(df) 14 | assert t1.output_schema == "a:int,b:int" 15 | t2._output_schema = t2.get_output_schema(df) 16 | assert t2.output_schema == "b:int,a:int" 17 | assert [[0, 1]] == list(t3(df.as_array_iterable())) 18 | 19 | 20 | def test__to_transformer(): 21 | a = _to_transformer(t1, None) 22 | assert isinstance(a, Transformer) 23 | a._x = 1 24 | # every parse should produce a different transformer even the input is 25 | # a transformer instance 26 | b = _to_transformer(t1, None) 27 | assert isinstance(b, Transformer) 28 | assert "_x" not in b.__dict__ 29 | c = _to_transformer("t1", None) 30 | assert isinstance(c, Transformer) 31 | assert "_x" not in c.__dict__ 32 | c._x = 1 33 | d = _to_transformer("t1", None) 34 | assert isinstance(d, Transformer) 35 | assert "_x" not in d.__dict__ 36 | raises(FugueInterfacelessError, lambda: _to_transformer(t4, None)) 37 | raises(FugueInterfacelessError, lambda: _to_transformer("t4", None)) 38 | e = _to_transformer("t4", "*,b:int") 39 | assert isinstance(e, Transformer) 40 | 41 | 42 | @transformer(["*", None, "b:int"]) 43 | def t1(df: Iterable[Dict[str, Any]]) -> Iterable[Dict[str, Any]]: 44 | for r in df: 45 | r["b"] = 1 46 | yield r 47 | 48 | 49 | @transformer([Schema("b:int"), "*"]) 50 | def t2(df: Iterable[Dict[str, Any]]) -> Iterable[Dict[str, Any]]: 51 | for r in df: 52 | r["b"] = 1 53 | yield r 54 | 55 | 56 | @transformer("*, b:int") 57 | def t3(df: Iterable[List[Any]]) -> Iterable[List[Any]]: 58 | for r in df: 59 | r += [1] 60 | yield r 61 | 62 | 63 | def t4(df: Iterable[List[Any]]) -> Iterable[List[Any]]: 64 | for r in df: 65 | r += [1] 66 | yield r 67 | -------------------------------------------------------------------------------- /tests/fugue/extensions/processor/__init__.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Dict, Iterable, List 2 | 3 | from fugue.dataframe import ArrayDataFrame 4 | from fugue.exceptions import FugueInterfacelessError 5 | from fugue.extensions.transformer import Transformer, _to_transformer, transformer 6 | from pytest import raises 7 | from triad.collections.schema import Schema 8 | 9 | 10 | def test_transformer(): 11 | assert isinstance(t1, Transformer) 12 | df = ArrayDataFrame([[0]], "a:int") 13 | t1._output_schema = t1.get_output_schema(df) 14 | assert t1.output_schema == "a:int,b:int" 15 | t2._output_schema = t2.get_output_schema(df) 16 | assert t2.output_schema == "b:int,a:int" 17 | assert [[0, 1]] == list(t3(df.as_array_iterable())) 18 | 19 | 20 | def test__to_transformer(): 21 | a = _to_transformer(t1, None) 22 | assert isinstance(a, Transformer) 23 | a._x = 1 24 | # every parse should produce a different transformer even the input is 25 | # a transformer instance 26 | b = _to_transformer(t1, None) 27 | assert isinstance(b, Transformer) 28 | assert "_x" not in b.__dict__ 29 | c = _to_transformer("t1", None) 30 | assert isinstance(c, Transformer) 31 | assert "_x" not in c.__dict__ 32 | c._x = 1 33 | d = _to_transformer("t1", None) 34 | assert isinstance(d, Transformer) 35 | assert "_x" not in d.__dict__ 36 | raises(FugueInterfacelessError, lambda: _to_transformer(t4, None)) 37 | raises(FugueInterfacelessError, lambda: _to_transformer("t4", None)) 38 | e = _to_transformer("t4", "*,b:int") 39 | assert isinstance(e, Transformer) 40 | 41 | 42 | @transformer(["*", None, "b:int"]) 43 | def t1(df: Iterable[Dict[str, Any]]) -> Iterable[Dict[str, Any]]: 44 | for r in df: 45 | r["b"] = 1 46 | yield r 47 | 48 | 49 | @transformer([Schema("b:int"), "*"]) 50 | def t2(df: Iterable[Dict[str, Any]]) -> Iterable[Dict[str, Any]]: 51 | for r in df: 52 | r["b"] = 1 53 | yield r 54 | 55 | 56 | @transformer("*, b:int") 57 | def t3(df: Iterable[List[Any]]) -> Iterable[List[Any]]: 58 | for r in df: 59 | r += [1] 60 | yield r 61 | 62 | 63 | def t4(df: Iterable[List[Any]]) -> Iterable[List[Any]]: 64 | for r in df: 65 | r += [1] 66 | yield r 67 | -------------------------------------------------------------------------------- /tests/fugue/extensions/transformer/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fugue-project/fugue/596d28eea1834ecc9087b864b83e1b67b9748977/tests/fugue/extensions/transformer/__init__.py -------------------------------------------------------------------------------- /tests/fugue/rpc/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fugue-project/fugue/596d28eea1834ecc9087b864b83e1b67b9748977/tests/fugue/rpc/__init__.py -------------------------------------------------------------------------------- /tests/fugue/rpc/test_base.py: -------------------------------------------------------------------------------- 1 | import pickle 2 | 3 | from fugue.rpc import make_rpc_server, to_rpc_handler, RPCFunc, EmptyRPCHandler 4 | from pytest import raises 5 | from triad import ParamDict 6 | 7 | 8 | def test_default_server(): 9 | def k(value: str) -> str: 10 | return value + "x" 11 | 12 | def kk(value: str) -> str: 13 | return value + "xx" 14 | 15 | conf = {"x": "y"} 16 | 17 | with make_rpc_server(conf).start() as server: 18 | assert "y" == server.conf["x"] 19 | with server.start(): # recursive start will take no effect 20 | client = server.make_client(k) 21 | assert "dddx" == client("ddd") 22 | client = server.make_client(kk) 23 | assert "dddxx" == client("ddd") 24 | server.stop() # extra stop in the end will take no effect 25 | 26 | with raises(pickle.PicklingError): 27 | pickle.dumps(client) 28 | 29 | with raises(pickle.PicklingError): 30 | pickle.dumps(server) 31 | 32 | 33 | def test_server_handlers(): 34 | func = lambda x: x + "aa" 35 | 36 | class _Dict(RPCFunc): 37 | def __init__(self, obj): 38 | super().__init__(obj) 39 | self.start_called = 0 40 | self.stop_called = 0 41 | 42 | def start_handler(self): 43 | self.start_called += 1 44 | 45 | def stop_handler(self): 46 | self.stop_called += 1 47 | 48 | server = make_rpc_server({}) 49 | server.start() 50 | d1 = _Dict(func) 51 | c1 = server.make_client(d1) 52 | assert "xaa" == c1("x") 53 | assert 1 == d1.start_called 54 | assert 0 == d1.stop_called 55 | server.stop() 56 | assert 1 == d1.start_called 57 | assert 1 == d1.stop_called 58 | 59 | with server.start(): 60 | d2 = _Dict(func) 61 | c1 = server.make_client(d2) 62 | server.start() 63 | assert "xaa" == c1("x") 64 | assert 1 == d2.start_called 65 | assert 0 == d2.stop_called 66 | assert 1 == d1.start_called 67 | assert 1 == d1.stop_called 68 | server.stop() 69 | assert 1 == d2.start_called 70 | assert 1 == d2.stop_called 71 | assert 1 == d1.start_called 72 | assert 1 == d1.stop_called 73 | 74 | 75 | def test_to_rpc_handler(): 76 | assert isinstance(to_rpc_handler(None), EmptyRPCHandler) 77 | assert isinstance(to_rpc_handler(lambda x: x), RPCFunc) 78 | handler = to_rpc_handler(lambda x: x) 79 | assert handler is to_rpc_handler(handler) 80 | raises(ValueError, lambda: to_rpc_handler(1)) 81 | -------------------------------------------------------------------------------- /tests/fugue/rpc/test_flask.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | pytest.importorskip("flask") 4 | pytest.importorskip("jinja2") 5 | from fugue.rpc import make_rpc_server 6 | from triad import ParamDict 7 | import cloudpickle 8 | 9 | 10 | def test_flask_service(): 11 | # fugue.rpc.flask.FlaskRPCServer 12 | conf = ParamDict( 13 | { 14 | "fugue.rpc.server": "fugue.rpc.flask.FlaskRPCServer", 15 | "fugue.rpc.flask_server.host": "127.0.0.1", 16 | "fugue.rpc.flask_server.port": "1234", 17 | "fugue.rpc.flask_server.timeout": "2 sec", 18 | } 19 | ) 20 | 21 | def k(value: str) -> str: 22 | return value + "x" 23 | 24 | def kk(a: int, b: int) -> int: 25 | return a + b 26 | 27 | def kkk(f: callable, a: int) -> int: 28 | return f(a) 29 | 30 | with make_rpc_server(conf).start() as server: 31 | assert "1234" == server.conf["fugue.rpc.flask_server.port"] 32 | with server.start(): # recursive start will take no effect 33 | client1 = cloudpickle.loads(cloudpickle.dumps(server.make_client(k))) 34 | assert "dddx" == client1("ddd") 35 | client2 = cloudpickle.loads(cloudpickle.dumps(server.make_client(kk))) 36 | assert 3 == client2(1, 2) 37 | assert "dddx" == client1("ddd") 38 | client3 = cloudpickle.loads(cloudpickle.dumps(server.make_client(kkk))) 39 | assert 3 == client3(lambda x: x + 1, 2) 40 | assert 3 == client2(1, 2) 41 | server.stop() # extra stop in the end will take no effect 42 | -------------------------------------------------------------------------------- /tests/fugue/rpc/test_func.py: -------------------------------------------------------------------------------- 1 | from fugue.rpc import RPCFunc, to_rpc_handler 2 | from pytest import raises 3 | from triad import to_uuid 4 | from copy import copy, deepcopy 5 | 6 | 7 | def test_rpc_func(): 8 | def f1(a: str) -> str: 9 | return "1" 10 | 11 | d1 = RPCFunc(f1) 12 | d2 = to_rpc_handler(f1) 13 | assert to_uuid(d1) == to_uuid(d2) 14 | assert to_uuid(d1) == to_uuid(to_rpc_handler(d1)) 15 | assert "1" == d1("x") 16 | with raises(ValueError): 17 | RPCFunc(1) 18 | 19 | 20 | def test_determinism(): 21 | def _f1(a: str) -> str: 22 | return "1" 23 | 24 | assert to_uuid(RPCFunc(_f1)) == to_uuid(to_rpc_handler(_f1)) 25 | assert to_uuid(RPCFunc(lambda x: x)) == to_uuid(RPCFunc(lambda x: x + 1)) 26 | 27 | 28 | def test_no_copy(): 29 | class T(object): 30 | def __init__(self): 31 | self.n = 0 32 | 33 | def call(self, n: int) -> int: 34 | self.n += n 35 | return self.n 36 | 37 | t = T() 38 | d1 = RPCFunc(t.call) 39 | assert 10 == d1(10) 40 | assert 10 == t.n 41 | 42 | d2 = to_rpc_handler(t.call) 43 | d2(10) 44 | 45 | d3 = to_rpc_handler(d1) 46 | d3(10) 47 | assert 30 == t.n 48 | 49 | d4 = copy(d3) 50 | d4(10) 51 | 52 | d5 = deepcopy(d4) 53 | d5(10) 54 | assert 50 == t.n 55 | -------------------------------------------------------------------------------- /tests/fugue/sql/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fugue-project/fugue/596d28eea1834ecc9087b864b83e1b67b9748977/tests/fugue/sql/__init__.py -------------------------------------------------------------------------------- /tests/fugue/test/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fugue-project/fugue/596d28eea1834ecc9087b864b83e1b67b9748977/tests/fugue/test/__init__.py -------------------------------------------------------------------------------- /tests/fugue/utils/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /tests/fugue/utils/test_interfaceless.py: -------------------------------------------------------------------------------- 1 | from pytest import raises 2 | 3 | from fugue._utils.interfaceless import ( 4 | is_class_method, 5 | parse_comment_annotation, 6 | parse_output_schema_from_comment, 7 | ) 8 | 9 | 10 | def test_parse_comment_annotation(): 11 | def a(): 12 | pass 13 | 14 | # asdfasdf 15 | def b(): 16 | pass 17 | 18 | # asdfasdf 19 | # schema : s:int 20 | # # # schema : a : int,b:str 21 | # schema : a : str ,b:str 22 | # asdfasdf 23 | def c(): 24 | pass 25 | 26 | # schema: 27 | def d(): 28 | pass 29 | 30 | assert parse_comment_annotation(a, "schema") is None 31 | assert parse_comment_annotation(b, "schema") is None 32 | assert "a : str ,b:str" == parse_comment_annotation(c, "schema") 33 | assert "" == parse_comment_annotation(d, "schema") 34 | 35 | 36 | def test_parse_output_schema_from_comment(): 37 | def a(): 38 | pass 39 | 40 | # asdfasdf 41 | def b(): 42 | pass 43 | 44 | # asdfasdf 45 | # schema : s : int # more comment 46 | # # # schema : a : int,b:str 47 | # asdfasdf 48 | def c(): 49 | pass 50 | 51 | # schema: 52 | def d(): 53 | pass 54 | 55 | assert parse_output_schema_from_comment(a) is None 56 | assert parse_output_schema_from_comment(b) is None 57 | assert "s:int" == parse_output_schema_from_comment(c).replace(" ", "") 58 | raises(SyntaxError, lambda: parse_output_schema_from_comment(d)) 59 | 60 | 61 | def test_is_class_method(): 62 | def f1(): 63 | pass 64 | 65 | class F(object): 66 | def f2(self): 67 | pass 68 | 69 | assert not is_class_method(f1) 70 | assert is_class_method(F.f2) 71 | assert not is_class_method(F().f2) 72 | -------------------------------------------------------------------------------- /tests/fugue/utils/test_misc.py: -------------------------------------------------------------------------------- 1 | from fugue._utils.misc import get_attribute 2 | from pytest import raises 3 | 4 | 5 | def test_get_attribute(): 6 | class C(object): 7 | pass 8 | 9 | c = C() 10 | assert "x" not in c.__dict__ 11 | assert 0 == get_attribute(c, "x", int) 12 | assert 0 == c.x 13 | assert 0 == get_attribute(c, "x", int) 14 | c.x = 10 15 | assert 10 == get_attribute(c, "x", int) 16 | raises(TypeError, lambda: get_attribute(c, "x", str)) 17 | -------------------------------------------------------------------------------- /tests/fugue/workflow/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fugue-project/fugue/596d28eea1834ecc9087b864b83e1b67b9748977/tests/fugue/workflow/__init__.py -------------------------------------------------------------------------------- /tests/fugue/workflow/test_runtime_exception.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | from fugue import FugueWorkflow 3 | import sys 4 | import traceback 5 | from fugue.constants import ( 6 | FUGUE_CONF_WORKFLOW_EXCEPTION_HIDE, 7 | FUGUE_CONF_WORKFLOW_EXCEPTION_OPTIMIZE, 8 | ) 9 | 10 | 11 | def test_runtime_exception(): 12 | if sys.version_info < (3, 7): 13 | return 14 | 15 | def tr(df: pd.DataFrame) -> pd.DataFrame: 16 | raise Exception 17 | 18 | def show(df): 19 | df.show() 20 | 21 | dag = FugueWorkflow() 22 | df = dag.df([[0]], "a:int") 23 | df = df.transform(tr, schema="*") 24 | show(df) 25 | 26 | try: 27 | dag.run() 28 | except Exception: 29 | assert len(traceback.extract_tb(sys.exc_info()[2])) < 10 30 | 31 | dag = FugueWorkflow({FUGUE_CONF_WORKFLOW_EXCEPTION_OPTIMIZE: False}) 32 | df = dag.df([[0]], "a:int") 33 | df = df.transform(tr, schema="*") 34 | show(df) 35 | 36 | try: 37 | dag.run("native") 38 | except Exception: 39 | assert len(traceback.extract_tb(sys.exc_info()[2])) > 10 40 | 41 | dag = FugueWorkflow({FUGUE_CONF_WORKFLOW_EXCEPTION_HIDE: ""}) 42 | df = dag.df([[0]], "a:int") 43 | df = df.transform(tr, schema="*") 44 | show(df) 45 | 46 | try: 47 | dag.run("native") 48 | except Exception: 49 | assert len(traceback.extract_tb(sys.exc_info()[2])) > 10 50 | 51 | 52 | def test_modified_exception(): 53 | if sys.version_info < (3, 7): 54 | return 55 | 56 | def tr(df: pd.DataFrame) -> pd.DataFrame: 57 | raise Exception 58 | 59 | def show(df): 60 | df.show() 61 | 62 | def tt(df): 63 | __modified_exception__ = NotImplementedError() 64 | return df.transform(tr, schema="*") 65 | 66 | dag = FugueWorkflow() 67 | df = dag.df([[0]], "a:int") 68 | df = tt(df) 69 | show(df) 70 | 71 | try: 72 | dag.run() 73 | except Exception as ex: 74 | assert isinstance(ex.__cause__, NotImplementedError) 75 | -------------------------------------------------------------------------------- /tests/fugue/workflow/test_workflow_parallel.py: -------------------------------------------------------------------------------- 1 | from fugue import FugueWorkflow, DataFrame, NativeExecutionEngine 2 | from typing import List, Any 3 | from time import sleep 4 | from timeit import timeit 5 | from pytest import raises 6 | 7 | 8 | def test_parallel(): 9 | dag = FugueWorkflow({"fugue.workflow.concurrency": 10}) 10 | dag.create(create).process(process).output(display) 11 | dag.create(create).process(process).output(display) 12 | 13 | t = timeit( 14 | lambda: dag.run(), 15 | number=1, 16 | ) # warmup 17 | t = timeit( 18 | lambda: dag.run(), 19 | number=1, 20 | ) 21 | assert t < 0.4 22 | 23 | 24 | def test_parallel_exception(): 25 | dag = FugueWorkflow({"fugue.workflow.concurrency": 2}) 26 | dag.create(create).process(process).process(process, params=dict(sec=0.5)).output( 27 | display 28 | ) 29 | dag.create(create_e).process(process).output(display) 30 | 31 | def run(dag, *args): 32 | with raises(NotImplementedError): 33 | dag.run(*args) 34 | 35 | t = timeit( 36 | lambda: run(dag), 37 | number=1, 38 | ) # warmup 39 | t = timeit( 40 | lambda: run(dag), 41 | number=1, 42 | ) 43 | assert t < 0.5 44 | 45 | 46 | # schema: a:int 47 | def create(sec: float = 0.1) -> List[List[Any]]: 48 | sleep(sec) 49 | return [[0]] 50 | 51 | 52 | # schema: a:int 53 | def create_e(sec: float = 0.1) -> List[List[Any]]: 54 | raise NotImplementedError 55 | 56 | 57 | def process(df: DataFrame, sec: float = 0.1) -> DataFrame: 58 | sleep(sec) 59 | return df 60 | 61 | 62 | def display(df: DataFrame, sec: float = 0.1) -> None: 63 | sleep(sec) 64 | df.show() 65 | -------------------------------------------------------------------------------- /tests/fugue_dask/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fugue-project/fugue/596d28eea1834ecc9087b864b83e1b67b9748977/tests/fugue_dask/__init__.py -------------------------------------------------------------------------------- /tests/fugue_dask/test_importless.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from fugue import FugueWorkflow, fsql 4 | import fugue.test as ft 5 | 6 | @ft.with_backend("dask") 7 | def test_importless(backend_context): 8 | pytest.importorskip("fugue_sql_antlr") 9 | for engine in ["dask", backend_context.session]: 10 | dag = FugueWorkflow() 11 | dag.df([[0]], "a:int").show() 12 | 13 | dag.run(engine) 14 | 15 | fsql( 16 | """ 17 | CREATE [[0],[1]] SCHEMA a:int 18 | SELECT * WHERE a<1 19 | PRINT 20 | """ 21 | ).run(engine) 22 | 23 | dag = FugueWorkflow() 24 | 25 | dag.run(engine) 26 | -------------------------------------------------------------------------------- /tests/fugue_dask/test_sql.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | pytest.importorskip("fugue_sql_antlr") 4 | import dask.dataframe as dd 5 | import pandas as pd 6 | 7 | from fugue import FugueSQLWorkflow, register_execution_engine 8 | from fugue_dask import DaskExecutionEngine 9 | import fugue.test as ft 10 | 11 | 12 | @ft.with_backend("dask") 13 | def test_sql(backend_context): 14 | register_execution_engine( 15 | "da", 16 | lambda conf, **kwargs: DaskExecutionEngine( 17 | conf=conf, dask_client=backend_context.session 18 | ), 19 | ) 20 | df = dd.from_pandas(pd.DataFrame([[0], [1]], columns=["a"]), npartitions=2) 21 | dag = FugueSQLWorkflow() 22 | dag( 23 | """ 24 | SELECT * FROM df WHERE a>0 25 | PRINT 26 | """, 27 | df=df, 28 | ) 29 | dag.run("da") 30 | -------------------------------------------------------------------------------- /tests/fugue_duckdb/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fugue-project/fugue/596d28eea1834ecc9087b864b83e1b67b9748977/tests/fugue_duckdb/__init__.py -------------------------------------------------------------------------------- /tests/fugue_duckdb/test_importless.py: -------------------------------------------------------------------------------- 1 | from fugue import FugueWorkflow 2 | from fugue import fsql 3 | 4 | 5 | def test_importless(): 6 | for engine in ["duck", "duckdb"]: 7 | dag = FugueWorkflow() 8 | dag.df([[0]], "a:int").show() 9 | 10 | dag.run(engine) 11 | 12 | fsql( 13 | """ 14 | CREATE [[0],[1]] SCHEMA a:int 15 | SELECT * WHERE a<1 16 | PRINT 17 | """ 18 | ).run(engine) 19 | 20 | dag = FugueWorkflow() 21 | tdf = dag.df([[0], [1]], "a:int") 22 | dag.select("SELECT * FROM ", tdf, " WHERE a<1", sql_engine=engine) 23 | 24 | dag.run() 25 | -------------------------------------------------------------------------------- /tests/fugue_ibis/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fugue-project/fugue/596d28eea1834ecc9087b864b83e1b67b9748977/tests/fugue_ibis/__init__.py -------------------------------------------------------------------------------- /tests/fugue_ibis/mock/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fugue-project/fugue/596d28eea1834ecc9087b864b83e1b67b9748977/tests/fugue_ibis/mock/__init__.py -------------------------------------------------------------------------------- /tests/fugue_ibis/mock/dataframe.py: -------------------------------------------------------------------------------- 1 | from typing import Any 2 | 3 | from fugue import ArrowDataFrame, DataFrame, LocalDataFrame 4 | from fugue.plugins import as_fugue_dataset, as_local_bounded 5 | from fugue_ibis import IbisDataFrame, IbisTable 6 | 7 | 8 | class MockDuckDataFrame(IbisDataFrame): 9 | def to_sql(self) -> str: 10 | return str(self.native.compile()) 11 | 12 | def _to_new_df(self, table: IbisTable, schema: Any = None) -> DataFrame: 13 | return MockDuckDataFrame(table, schema=schema) 14 | 15 | def _to_local_df(self, table: IbisTable, schema: Any = None) -> LocalDataFrame: 16 | return ArrowDataFrame(table.execute(), schema=schema) 17 | 18 | def _to_iterable_df(self, table: IbisTable, schema: Any = None) -> LocalDataFrame: 19 | return self._to_local_df(table, schema=schema) 20 | 21 | 22 | # should also check the df._findbackend is duckdb 23 | @as_fugue_dataset.candidate(lambda df, **kwargs: isinstance(df, IbisTable)) 24 | def _ibis_as_fugue(df: IbisTable, **kwargs: Any) -> bool: 25 | return MockDuckDataFrame(df, **kwargs) 26 | 27 | 28 | # should also check the df._findbackend is duckdb 29 | @as_local_bounded.candidate(lambda df, **kwargs: isinstance(df, IbisTable)) 30 | def _ibis_as_local(df: IbisTable, **kwargs: Any) -> bool: 31 | return df.execute() 32 | -------------------------------------------------------------------------------- /tests/fugue_ibis/mock/registry.py: -------------------------------------------------------------------------------- 1 | from fugue.plugins import parse_execution_engine 2 | from typing import Any 3 | from .execution_engine import MockDuckExecutionEngine 4 | 5 | 6 | @parse_execution_engine.candidate( 7 | lambda engine, conf, **kwargs: isinstance(engine, str) and engine == "mockibisduck" 8 | ) 9 | def _parse_mockibisduck( 10 | engine: str, conf: Any, **kwargs: Any 11 | ) -> MockDuckExecutionEngine: 12 | return MockDuckExecutionEngine(conf=conf) 13 | -------------------------------------------------------------------------------- /tests/fugue_ibis/mock/tester.py: -------------------------------------------------------------------------------- 1 | from contextlib import contextmanager 2 | from typing import Any, Dict, Iterator 3 | 4 | import pytest 5 | 6 | import fugue.test as ft 7 | from .registry import * # noqa: F401, F403 # pylint: disable-all 8 | 9 | 10 | @ft.fugue_test_backend 11 | class _MockIbisDuckDBTestBackend(ft.FugueTestBackend): 12 | name = "mockibisduck" 13 | 14 | @classmethod 15 | @contextmanager 16 | def session_context(cls, session_conf: Dict[str, Any]) -> Iterator[Any]: 17 | yield "mockibisduck" 18 | 19 | 20 | @pytest.fixture(scope="module") 21 | def mockibisduck_session(): 22 | with _MockIbisDuckDBTestBackend.generate_session_fixture() as session: 23 | yield session 24 | -------------------------------------------------------------------------------- /tests/fugue_ibis/test_dataframe.py: -------------------------------------------------------------------------------- 1 | import sys 2 | from datetime import datetime 3 | from typing import Any 4 | 5 | import pandas as pd 6 | import pytest 7 | 8 | import fugue.api as fe 9 | import fugue.test as ft 10 | from fugue import ArrowDataFrame 11 | from fugue.exceptions import FugueDataFrameOperationError 12 | from fugue_test.dataframe_suite import DataFrameTests 13 | 14 | from .mock.dataframe import MockDuckDataFrame 15 | from .mock.tester import mockibisduck_session # noqa: F401 # pylint: disable-all 16 | from uuid import uuid4 17 | 18 | 19 | @ft.fugue_test_suite("mockibisduck", mark_test=True) 20 | class IbisDataFrameTests(DataFrameTests.Tests): 21 | def df(self, data: Any = None, schema: Any = None) -> MockDuckDataFrame: 22 | df = ArrowDataFrame(data, schema) 23 | name = "_" + str(uuid4())[:5] 24 | con = self.context.engine.sql_engine.backend 25 | con.create_table(name, df.native, overwrite=True) 26 | return MockDuckDataFrame(con.table(name), schema=schema) 27 | 28 | def test_init_df(self): 29 | df = self.df([["x", 1]], "a:str,b:int") 30 | df = MockDuckDataFrame(df.native, "a:str,b:long") 31 | assert df.schema == "a:str,b:long" 32 | 33 | def test_is_local(self): 34 | df = self.df([["x", 1]], "a:str,b:int") 35 | assert not fe.is_local(df) 36 | assert fe.is_bounded(df) 37 | 38 | def test_map_type(self): 39 | pass 40 | 41 | def test_as_arrow(self): 42 | # empty 43 | df = self.df([], "a:int,b:int") 44 | assert [] == list(ArrowDataFrame(df.as_arrow()).as_dict_iterable()) 45 | # pd.Nat 46 | df = self.df([[pd.NaT, 1]], "a:datetime,b:int") 47 | assert [dict(a=None, b=1)] == list( 48 | ArrowDataFrame(df.as_arrow()).as_dict_iterable() 49 | ) 50 | # pandas timestamps 51 | df = self.df([[pd.Timestamp("2020-01-01"), 1]], "a:datetime,b:int") 52 | assert [dict(a=datetime(2020, 1, 1), b=1)] == list( 53 | ArrowDataFrame(df.as_arrow()).as_dict_iterable() 54 | ) 55 | 56 | def test_deep_nested_types(self): 57 | pass 58 | 59 | def test_list_type(self): 60 | pass 61 | 62 | def test_native_table(self): 63 | df = self.df([["x", 1]], "a:str,b:int").native 64 | assert fe.get_schema(fe.rename(df, dict())) == "a:str,b:int" 65 | assert fe.get_schema(fe.rename(df, dict(a="c"))) == "c:str,b:int" 66 | 67 | with pytest.raises(Exception): 68 | fe.rename(df, dict(a="b")) 69 | 70 | with pytest.raises(FugueDataFrameOperationError): 71 | fe.rename(df, dict(x="y")) 72 | 73 | assert fe.get_schema(fe.drop_columns(df, [])) == "a:str,b:int" 74 | assert fe.get_schema(fe.drop_columns(df, ["a"])) == "b:int" 75 | 76 | with pytest.raises(FugueDataFrameOperationError): 77 | fe.get_schema(fe.drop_columns(df, ["a", "b"])) 78 | 79 | with pytest.raises(FugueDataFrameOperationError): 80 | fe.get_schema(fe.drop_columns(df, ["a", "c"])) 81 | -------------------------------------------------------------------------------- /tests/fugue_ibis/test_execution_engine.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | import pytest 4 | 5 | import fugue.test as ft 6 | from fugue_test.builtin_suite import BuiltInTests 7 | from fugue_test.execution_suite import ExecutionEngineTests 8 | 9 | from .mock.tester import mockibisduck_session # noqa: F401 # pylint: disable-all 10 | 11 | 12 | @ft.fugue_test_suite("mockibisduck", mark_test=True) 13 | class IbisExecutionEngineTests(ExecutionEngineTests.Tests): 14 | def test_select(self): 15 | # it can't work properly with DuckDB (hugeint is not recognized) 16 | pass 17 | 18 | 19 | @ft.fugue_test_suite(("mockibisduck", {"fugue.force_is_ibis": True}), mark_test=True) 20 | class IbisExecutionEngineForceIbisTests(ExecutionEngineTests.Tests): 21 | def test_properties(self): 22 | assert not self.engine.is_distributed 23 | assert not self.engine.map_engine.is_distributed 24 | assert not self.engine.sql_engine.is_distributed 25 | 26 | assert self.engine.sql_engine.get_temp_table_name( 27 | ) != self.engine.sql_engine.get_temp_table_name() 28 | 29 | def test_select(self): 30 | # it can't work properly with DuckDB (hugeint is not recognized) 31 | pass 32 | 33 | def test_get_parallelism(self): 34 | assert self.engine.get_current_parallelism() == 1 35 | 36 | def test_union(self): 37 | if sys.version_info >= (3, 9): 38 | # ibis 3.8 support no longer works 39 | return super().test_union() 40 | 41 | 42 | @ft.fugue_test_suite("mockibisduck", mark_test=True) 43 | class DuckBuiltInTests(BuiltInTests.Tests): 44 | def test_df_select(self): 45 | # it can't work properly with DuckDB (hugeint is not recognized) 46 | pass 47 | 48 | 49 | @ft.fugue_test_suite(("mockibisduck", {"fugue.force_is_ibis": True}), mark_test=True) 50 | class DuckBuiltInForceIbisTests(BuiltInTests.Tests): 51 | def test_df_select(self): 52 | # it can't work properly with DuckDB (hugeint is not recognized) 53 | pass 54 | -------------------------------------------------------------------------------- /tests/fugue_notebook/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fugue-project/fugue/596d28eea1834ecc9087b864b83e1b67b9748977/tests/fugue_notebook/__init__.py -------------------------------------------------------------------------------- /tests/fugue_polars/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fugue-project/fugue/596d28eea1834ecc9087b864b83e1b67b9748977/tests/fugue_polars/__init__.py -------------------------------------------------------------------------------- /tests/fugue_polars/test_api.py: -------------------------------------------------------------------------------- 1 | import fugue.api as fa 2 | import pandas as pd 3 | import polars as pl 4 | 5 | 6 | def test_to_df(): 7 | df = pl.from_pandas(pd.DataFrame({"a": [0, 1]})) 8 | res = fa.fugue_sql("SELECT * FROM df", df=df, engine="duckdb") 9 | assert fa.as_array(res) == [[0], [1]] 10 | 11 | df2 = pl.from_pandas(pd.DataFrame({"a": [0]})) 12 | res = fa.inner_join(df, df2, engine="duckdb") 13 | assert fa.as_array(res) == [[0]] 14 | -------------------------------------------------------------------------------- /tests/fugue_ray/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fugue-project/fugue/596d28eea1834ecc9087b864b83e1b67b9748977/tests/fugue_ray/__init__.py -------------------------------------------------------------------------------- /tests/fugue_ray/test_registry.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import ray.data as rd 3 | 4 | import fugue.test as ft 5 | from fugue import FugueWorkflow 6 | from fugue_ray import RayExecutionEngine 7 | 8 | 9 | @ft.with_backend("ray") 10 | def test_registry(): 11 | def creator() -> rd.Dataset: 12 | return rd.from_pandas(pd.DataFrame(dict(a=[1, 2], b=["a", "b"]))) 13 | 14 | def processor1(ctx: RayExecutionEngine, df: rd.Dataset) -> pd.DataFrame: 15 | assert isinstance(ctx, RayExecutionEngine) 16 | return df.to_pandas() 17 | 18 | def processor2(df: pd.DataFrame) -> rd.Dataset: 19 | return rd.from_pandas(df) 20 | 21 | def outputter(df: rd.Dataset) -> None: 22 | assert [[1, "a"], [2, "b"]] == df.to_pandas().values.tolist() 23 | 24 | dag = FugueWorkflow() 25 | dag.create(creator).process(processor1).process(processor2).output(outputter) 26 | 27 | dag.run("ray") 28 | -------------------------------------------------------------------------------- /tests/fugue_ray/test_utils.py: -------------------------------------------------------------------------------- 1 | from triad import Schema 2 | 3 | import fugue.test as ft 4 | from fugue_ray import RayDataFrame 5 | from fugue_ray._utils.dataframe import add_partition_key 6 | 7 | 8 | @ft.with_backend("ray") 9 | def test_add_partition_key(): 10 | df = RayDataFrame([[0, "a"], [1, "b"]], "a:int,b:str") 11 | res, s = add_partition_key(df.native, df.schema, ["b", "a"], output_key="x") 12 | assert s == Schema("a:int,b:str,x:binary") 13 | 14 | res, s = add_partition_key(df.native, df.schema, ["b"], output_key="x") 15 | assert s == "a:int,b:str,x:str" 16 | assert RayDataFrame(res, s).as_array() == [[0, "a", "a"], [1, "b", "b"]] 17 | -------------------------------------------------------------------------------- /tests/fugue_spark/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fugue-project/fugue/596d28eea1834ecc9087b864b83e1b67b9748977/tests/fugue_spark/__init__.py -------------------------------------------------------------------------------- /tests/fugue_spark/test_importless.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import pytest 3 | from pyspark.sql import DataFrame, SparkSession 4 | 5 | from fugue import FugueWorkflow, fsql, transform 6 | from fugue_spark._utils.convert import to_pandas 7 | from fugue_spark.registry import _is_sparksql 8 | 9 | 10 | def test_importless(spark_session): 11 | pytest.importorskip("fugue_sql_antlr") 12 | 13 | for engine in [spark_session, "spark"]: 14 | dag = FugueWorkflow() 15 | dag.df([[0]], "a:int").show() 16 | 17 | dag.run(engine) 18 | 19 | fsql( 20 | """ 21 | CREATE [[0],[1]] SCHEMA a:int 22 | SELECT * WHERE a<1 23 | PRINT 24 | """ 25 | ).run(engine) 26 | 27 | 28 | def test_is_sparksql(): 29 | assert _is_sparksql(("sparksql", "abc")) 30 | assert not _is_sparksql(123) 31 | assert not _is_sparksql("SELECT *") 32 | 33 | 34 | def test_transform_from_sparksql(spark_session): 35 | # schema: * 36 | def t(df: pd.DataFrame) -> pd.DataFrame: 37 | return df 38 | 39 | res = transform(("sparksql", "SELECT 1 AS a, 'b' AS aa"), t) 40 | assert isinstance(res, DataFrame) # engine inference 41 | assert to_pandas(res).to_dict("records") == [{"a": 1, "aa": "b"}] 42 | -------------------------------------------------------------------------------- /tests/fugue_spark/test_spark_connect.py: -------------------------------------------------------------------------------- 1 | import fugue.test as ft 2 | 3 | from .test_dataframe import NativeSparkDataFrameTestsBase as _NativeDataFrameTests 4 | from .test_dataframe import SparkDataFrameTestsBase as _DataFrameTests 5 | from .test_execution_engine import _CONF 6 | from .test_execution_engine import ( 7 | SparkExecutionEngineBuiltInTestsBase as _WorkflowTests, 8 | ) 9 | from .test_execution_engine import ( 10 | SparkExecutionEnginePandasUDFTestsBase as _EngineTests, 11 | ) 12 | 13 | 14 | @ft.fugue_test_suite("sparkconnect", mark_test=True) 15 | class SparkConnectDataFrameTests(_DataFrameTests): 16 | pass 17 | 18 | 19 | @ft.fugue_test_suite("sparkconnect", mark_test=True) 20 | class SparkConnectNativeDataFrameTests(_NativeDataFrameTests): 21 | pass 22 | 23 | 24 | @ft.fugue_test_suite("sparkconnect", mark_test=True) 25 | class SparkConnectExecutionEngineTests(_EngineTests): 26 | def test_using_pandas_udf(self): 27 | return 28 | 29 | def test_map_with_dict_col(self): 30 | return # spark connect has a bug 31 | 32 | 33 | @ft.fugue_test_suite(("sparkconnect", _CONF), mark_test=True) 34 | class SparkConnectBuiltInTests(_WorkflowTests): 35 | def test_annotation_3(self): 36 | return # RDD is not implemented in spark connect 37 | 38 | def test_repartition(self): 39 | return # spark connect doesn't support even repartitioning 40 | 41 | def test_repartition_large(self): 42 | return # spark connect doesn't support even repartitioning 43 | -------------------------------------------------------------------------------- /tests/fugue_spark/test_sql.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import pytest 3 | from pyspark.sql import SparkSession 4 | 5 | from fugue import FugueSQLWorkflow, register_execution_engine 6 | from fugue_spark import SparkExecutionEngine 7 | 8 | 9 | def test_sql(spark_session): 10 | pytest.importorskip("fugue_sql_antlr") 11 | register_execution_engine( 12 | "_spark", 13 | lambda conf, **kwargs: SparkExecutionEngine( 14 | conf=conf, spark_session=spark_session 15 | ), 16 | ) 17 | df = spark_session.createDataFrame(pd.DataFrame([[0], [1]], columns=["a"])) 18 | dag = FugueSQLWorkflow() 19 | dag( 20 | """ 21 | SELECT * FROM df WHERE a>0 22 | PRINT 23 | """, 24 | df=df, 25 | ) 26 | dag.run("_spark") 27 | -------------------------------------------------------------------------------- /tests/fugue_spark/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fugue-project/fugue/596d28eea1834ecc9087b864b83e1b67b9748977/tests/fugue_spark/utils/__init__.py --------------------------------------------------------------------------------