├── .devcontainer
    └── devcontainer.json
├── .github
    ├── ISSUE_TEMPLATE
    │   ├── bug_report.md
    │   ├── compatibility.md
    │   ├── deprecation.md
    │   ├── feature_request.md
    │   └── questions.md
    └── workflows
    │   ├── publish.yml
    │   ├── test_all.yml
    │   ├── test_core.yml
    │   ├── test_dask.yml
    │   ├── test_no_sql.yml
    │   ├── test_notebook.yml
    │   ├── test_ray.yml
    │   ├── test_spark.yml
    │   └── test_win.yml
├── .gitignore
├── .gitpod.yml
├── .pre-commit-config.yaml
├── .pylintrc
├── .readthedocs.yaml
├── CONTRIBUTING.md
├── LICENSE
├── Makefile
├── README.md
├── RELEASE.md
├── docs
    ├── Makefile
    ├── _static
    │   ├── fugue_logo_trimmed.svg
    │   ├── logo.svg
    │   └── logo_doc.svg
    ├── _templates
    │   ├── package.rst_t
    │   └── toc.rst_t
    ├── api.rst
    ├── api
    │   ├── fugue.bag.rst
    │   ├── fugue.collections.rst
    │   ├── fugue.column.rst
    │   ├── fugue.dataframe.rst
    │   ├── fugue.dataset.rst
    │   ├── fugue.execution.rst
    │   ├── fugue.extensions.creator.rst
    │   ├── fugue.extensions.outputter.rst
    │   ├── fugue.extensions.processor.rst
    │   ├── fugue.extensions.rst
    │   ├── fugue.extensions.transformer.rst
    │   ├── fugue.rpc.rst
    │   ├── fugue.rst
    │   ├── fugue.sql.rst
    │   └── fugue.workflow.rst
    ├── api_dask
    │   └── fugue_dask.rst
    ├── api_duckdb
    │   └── fugue_duckdb.rst
    ├── api_ibis
    │   ├── fugue_ibis.execution.rst
    │   └── fugue_ibis.rst
    ├── api_ray
    │   └── fugue_ray.rst
    ├── api_spark
    │   └── fugue_spark.rst
    ├── api_sql
    │   └── fugue_sql.rst
    ├── conf.py
    ├── index.rst
    ├── make.bat
    ├── top_api.rst
    └── tutorials.rst
├── fugue
    ├── __init__.py
    ├── _utils
    │   ├── __init__.py
    │   ├── display.py
    │   ├── exception.py
    │   ├── interfaceless.py
    │   ├── io.py
    │   ├── misc.py
    │   └── registry.py
    ├── api.py
    ├── bag
    │   ├── __init__.py
    │   ├── array_bag.py
    │   └── bag.py
    ├── collections
    │   ├── __init__.py
    │   ├── partition.py
    │   ├── sql.py
    │   └── yielded.py
    ├── column
    │   ├── __init__.py
    │   ├── expressions.py
    │   ├── functions.py
    │   └── sql.py
    ├── constants.py
    ├── dataframe
    │   ├── __init__.py
    │   ├── api.py
    │   ├── array_dataframe.py
    │   ├── arrow_dataframe.py
    │   ├── dataframe.py
    │   ├── dataframe_iterable_dataframe.py
    │   ├── dataframes.py
    │   ├── function_wrapper.py
    │   ├── iterable_dataframe.py
    │   ├── pandas_dataframe.py
    │   └── utils.py
    ├── dataset
    │   ├── __init__.py
    │   ├── api.py
    │   └── dataset.py
    ├── dev.py
    ├── exceptions.py
    ├── execution
    │   ├── __init__.py
    │   ├── api.py
    │   ├── execution_engine.py
    │   ├── factory.py
    │   └── native_execution_engine.py
    ├── extensions
    │   ├── __init__.py
    │   ├── _builtins
    │   │   ├── __init__.py
    │   │   ├── creators.py
    │   │   ├── outputters.py
    │   │   └── processors.py
    │   ├── _utils.py
    │   ├── context.py
    │   ├── creator
    │   │   ├── __init__.py
    │   │   ├── convert.py
    │   │   └── creator.py
    │   ├── outputter
    │   │   ├── __init__.py
    │   │   ├── convert.py
    │   │   └── outputter.py
    │   ├── processor
    │   │   ├── __init__.py
    │   │   ├── convert.py
    │   │   └── processor.py
    │   └── transformer
    │   │   ├── __init__.py
    │   │   ├── constants.py
    │   │   ├── convert.py
    │   │   └── transformer.py
    ├── plugins.py
    ├── py.typed
    ├── registry.py
    ├── rpc
    │   ├── __init__.py
    │   ├── base.py
    │   └── flask.py
    ├── sql
    │   ├── __init__.py
    │   ├── _utils.py
    │   ├── _visitors.py
    │   ├── api.py
    │   └── workflow.py
    ├── test
    │   ├── __init__.py
    │   ├── pandas_tester.py
    │   └── plugins.py
    └── workflow
    │   ├── __init__.py
    │   ├── _checkpoint.py
    │   ├── _tasks.py
    │   ├── _workflow_context.py
    │   ├── api.py
    │   ├── input.py
    │   ├── module.py
    │   └── workflow.py
├── fugue_contrib
    ├── __init__.py
    ├── contrib.py
    ├── seaborn
    │   └── __init__.py
    └── viz
    │   ├── __init__.py
    │   └── _ext.py
├── fugue_dask
    ├── __init__.py
    ├── _constants.py
    ├── _dask_sql_wrapper.py
    ├── _io.py
    ├── _utils.py
    ├── dataframe.py
    ├── execution_engine.py
    ├── registry.py
    └── tester.py
├── fugue_duckdb
    ├── __init__.py
    ├── _io.py
    ├── _utils.py
    ├── dask.py
    ├── dataframe.py
    ├── execution_engine.py
    ├── registry.py
    └── tester.py
├── fugue_ibis
    ├── __init__.py
    ├── _compat.py
    ├── _utils.py
    ├── dataframe.py
    └── execution_engine.py
├── fugue_notebook
    ├── __init__.py
    ├── env.py
    └── nbextension
    │   ├── README.md
    │   ├── __init__.py
    │   ├── description.yaml
    │   └── main.js
├── fugue_polars
    ├── __init__.py
    ├── _utils.py
    ├── polars_dataframe.py
    └── registry.py
├── fugue_ray
    ├── __init__.py
    ├── _constants.py
    ├── _utils
    │   ├── __init__.py
    │   ├── cluster.py
    │   ├── dataframe.py
    │   └── io.py
    ├── dataframe.py
    ├── execution_engine.py
    ├── registry.py
    └── tester.py
├── fugue_spark
    ├── __init__.py
    ├── _constants.py
    ├── _utils
    │   ├── __init__.py
    │   ├── convert.py
    │   ├── io.py
    │   ├── misc.py
    │   └── partition.py
    ├── dataframe.py
    ├── execution_engine.py
    ├── registry.py
    └── tester.py
├── fugue_sql
    ├── __init__.py
    └── exceptions.py
├── fugue_test
    ├── __init__.py
    ├── bag_suite.py
    ├── builtin_suite.py
    ├── dataframe_suite.py
    ├── execution_suite.py
    └── fixtures.py
├── fugue_version
    └── __init__.py
├── images
    ├── architecture.png
    ├── extensions.png
    └── logo.svg
├── requirements.txt
├── scripts
    └── setupsparkconnect.sh
├── setup.cfg
├── setup.py
└── tests
    ├── __init__.py
    ├── fugue
        ├── __init__.py
        ├── bag
        │   ├── __init__.py
        │   └── test_array_bag.py
        ├── collections
        │   ├── __init__.py
        │   ├── test_partition.py
        │   └── test_sql.py
        ├── column
        │   ├── __init__.py
        │   ├── test_expressions.py
        │   ├── test_functions.py
        │   └── test_sql.py
        ├── dataframe
        │   ├── __init__.py
        │   ├── test_array_dataframe.py
        │   ├── test_arrow_dataframe.py
        │   ├── test_dataframe.py
        │   ├── test_dataframe_iterable_dataframe.py
        │   ├── test_dataframes.py
        │   ├── test_function_wrapper.py
        │   ├── test_iterable_dataframe.py
        │   ├── test_pandas_dataframe.py
        │   └── test_utils.py
        ├── execution
        │   ├── __init__.py
        │   ├── test_api.py
        │   ├── test_execution_engine.py
        │   ├── test_factory.py
        │   └── test_naive_execution_engine.py
        ├── extensions
        │   ├── __init__.py
        │   ├── creator
        │   │   ├── __init__.py
        │   │   └── test_convert.py
        │   ├── outputter
        │   │   ├── __init__.py
        │   │   └── test_convert.py
        │   ├── processor
        │   │   ├── __init__.py
        │   │   └── test_convert.py
        │   ├── test_utils.py
        │   └── transformer
        │   │   ├── __init__.py
        │   │   ├── test_convert_cotransformer.py
        │   │   ├── test_convert_output_cotransformer.py
        │   │   ├── test_convert_output_transformer.py
        │   │   └── test_convert_transformer.py
        ├── rpc
        │   ├── __init__.py
        │   ├── test_base.py
        │   ├── test_flask.py
        │   └── test_func.py
        ├── sql
        │   ├── __init__.py
        │   ├── test_utils.py
        │   ├── test_visitors.py
        │   ├── test_workflow.py
        │   └── test_workflow_parse.py
        ├── test
        │   ├── __init__.py
        │   └── test_plugins.py
        ├── test_interfaceless.py
        ├── utils
        │   ├── __init__.py
        │   ├── test_interfaceless.py
        │   ├── test_io.py
        │   └── test_misc.py
        └── workflow
        │   ├── __init__.py
        │   ├── test_module.py
        │   ├── test_runtime_exception.py
        │   ├── test_workflow.py
        │   ├── test_workflow_determinism.py
        │   └── test_workflow_parallel.py
    ├── fugue_dask
        ├── __init__.py
        ├── test_dataframe.py
        ├── test_execution_engine.py
        ├── test_importless.py
        ├── test_io.py
        ├── test_sql.py
        └── test_utils.py
    ├── fugue_duckdb
        ├── __init__.py
        ├── test_dask.py
        ├── test_dataframe.py
        ├── test_execution_engine.py
        ├── test_importless.py
        └── test_utils.py
    ├── fugue_ibis
        ├── __init__.py
        ├── mock
        │   ├── __init__.py
        │   ├── dataframe.py
        │   ├── execution_engine.py
        │   ├── registry.py
        │   └── tester.py
        ├── test_dataframe.py
        ├── test_execution_engine.py
        └── test_utils.py
    ├── fugue_notebook
        ├── __init__.py
        └── test_notebook.ipynb
    ├── fugue_polars
        ├── __init__.py
        ├── test_api.py
        ├── test_dataframe.py
        └── test_transform.py
    ├── fugue_ray
        ├── __init__.py
        ├── test_dataframe.py
        ├── test_execution_engine.py
        ├── test_registry.py
        └── test_utils.py
    └── fugue_spark
        ├── __init__.py
        ├── test_dataframe.py
        ├── test_execution_engine.py
        ├── test_importless.py
        ├── test_spark_connect.py
        ├── test_sql.py
        └── utils
            ├── __init__.py
            ├── test_convert.py
            ├── test_io.py
            └── test_partition.py


/.devcontainer/devcontainer.json:
--------------------------------------------------------------------------------
 1 | {
 2 | 	"name": "Fugue Development Environment",
 3 | 	"image": "mcr.microsoft.com/vscode/devcontainers/python:3.10",
 4 | 	"customizations": {
 5 | 		"vscode": {
 6 | 			"settings": {
 7 | 				"terminal.integrated.shell.linux": "/bin/bash",
 8 | 				"python.pythonPath": "/usr/local/bin/python",
 9 | 				"python.defaultInterpreterPath": "/usr/local/bin/python",
10 | 				"editor.defaultFormatter": "ms-python.black-formatter",
11 | 				"isort.interpreter": [
12 | 					"/usr/local/bin/python"
13 | 				],
14 | 				"flake8.interpreter": [
15 | 					"/usr/local/bin/python"
16 | 				],
17 | 				"pylint.interpreter": [
18 | 					"/usr/local/bin/python"
19 | 				],
20 | 				"black-formatter.interpreter": [
21 | 					"/usr/local/bin/python"
22 | 				]
23 | 			},
24 | 			"extensions": [
25 | 				"ms-python.python",
26 | 				"ms-python.isort",
27 | 				"ms-python.flake8",
28 | 				"ms-python.pylint",
29 | 				"ms-python.mypy",
30 | 				"ms-python.black-formatter",
31 | 				"GitHub.copilot",
32 | 				"njpwerner.autodocstring"
33 | 			]
34 | 		}
35 | 	},
36 | 	"forwardPorts": [
37 | 		8888
38 | 	],
39 | 	"postCreateCommand": "make devenv",
40 | 	"features": {
41 | 		"ghcr.io/devcontainers/features/docker-in-docker:2.11.0": {},
42 | 		"ghcr.io/devcontainers/features/java:1": {
43 | 			"version": "11"
44 | 		}
45 | 	}
46 | }
47 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug_report.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Bug report
 3 | about: Create a report to help us improve
 4 | title: "[BUG]"
 5 | labels: ''
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | **Minimal Code To Reproduce**
11 | 
12 | ```python
13 | ```
14 | 
15 | **Describe the bug**
16 | A clear and concise description of what the bug is.
17 | 
18 | **Expected behavior**
19 | A clear and concise description of what you expected to happen.
20 | 
21 | **Environment (please complete the following information):**
22 |  - Backend: pandas/dask/ray?
23 |  - Backend version:
24 |  - Python version:
25 |  - OS: linux/windows
26 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/compatibility.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: Compatibility
3 | about: Compatibility with dependent packages updates
4 | title: "[COMPATIBILITY]"
5 | labels: ''
6 | assignees: ''
7 | 
8 | ---
9 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/deprecation.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: Deprecation
3 | about: Deprecate certain features
4 | title: "[DEPRECATION]"
5 | labels: ''
6 | assignees: ''
7 | 
8 | ---
9 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature_request.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Feature request
 3 | about: Suggest an idea for this project
 4 | title: "[FEATURE]"
 5 | labels: ''
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | **Is your feature request related to a problem? Please describe.**
11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
12 | 
13 | **Describe the solution you'd like**
14 | A clear and concise description of what you want to happen.
15 | 
16 | **Describe alternatives you've considered**
17 | A clear and concise description of any alternative solutions or features you've considered.
18 | 
19 | **Additional context**
20 | Add any other context or screenshots about the feature request here.
21 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/questions.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: Questions
3 | about: General questions
4 | title: "[QUESTION]"
5 | labels: ''
6 | assignees: ''
7 | 
8 | ---
9 | 


--------------------------------------------------------------------------------
/.github/workflows/publish.yml:
--------------------------------------------------------------------------------
 1 | # This workflows will upload a Python Package using Twine when a release is created
 2 | # For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries
 3 | 
 4 | name: Publish
 5 | 
 6 | on:
 7 |   release:
 8 |     types: [created]
 9 | 
10 | jobs:
11 |   deploy:
12 |     runs-on: ubuntu-latest
13 | 
14 |     steps:
15 |     - uses: actions/checkout@v2
16 |     - name: Set up Python
17 |       uses: actions/setup-python@v1
18 |       with:
19 |         python-version: '3.10'
20 |     - name: Install dependencies
21 |       run: make devenv
22 |     - name: Test
23 |       if: "!github.event.release.prerelease"
24 |       run: make test
25 |     - name: Build and publish
26 |       env:
27 |         RELEASE_TAG: ${{ github.event.release.tag_name }}
28 |         TWINE_USERNAME: __token__
29 |         TWINE_PASSWORD: ${{ secrets.PYPI_TOKEN }}
30 |       run: |
31 |         make package
32 |         twine upload dist/*
33 | 


--------------------------------------------------------------------------------
/.github/workflows/test_all.yml:
--------------------------------------------------------------------------------
 1 | # This workflow will install Python dependencies, run tests and lint with a variety of Python versions
 2 | # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
 3 | 
 4 | name: Full Tests
 5 | 
 6 | on:
 7 |   push:
 8 |     branches: [ master ]
 9 |     paths-ignore:
10 |       - 'docs/**'
11 |       - '**.md'
12 |   pull_request:
13 |     branches: [ master ]
14 |     paths-ignore:
15 |       - 'docs/**'
16 |       - '**.md'
17 | 
18 | concurrency:
19 |   group: ${{ github.workflow }}-${{ github.ref }}
20 |   cancel-in-progress: true
21 | 
22 | jobs:
23 |   all:
24 |     name: Tests & Lint
25 |     runs-on: ubuntu-latest
26 |     strategy:
27 |       matrix:
28 |         python-version: ["3.10", "3.11", "3.12"]
29 | 
30 |     steps:
31 |     - uses: actions/checkout@v2
32 |     - name: Set up Python ${{ matrix.python-version }}
33 |       uses: actions/setup-python@v1
34 |       with:
35 |         python-version: ${{ matrix.python-version }}
36 |     - name: Install dependencies
37 |       run: make devenv
38 |     - name: Lint
39 |       if: matrix.python-version == '3.10'
40 |       run: make lint
41 |     - name: Test
42 |       run: make test
43 |     - name: "Upload coverage to Codecov"
44 |       if: matrix.python-version == '3.10'
45 |       uses: codecov/codecov-action@v4
46 |       with:
47 |         fail_ci_if_error: false
48 |         token: ${{ secrets.CODECOV_TOKEN }}
49 | 
50 |   no_spark:
51 |     name: Tests
52 |     runs-on: ubuntu-latest
53 |     strategy:
54 |       matrix:
55 |         python-version: [3.9]
56 | 
57 |     steps:
58 |     - uses: actions/checkout@v2
59 |     - name: Set up Python ${{ matrix.python-version }}
60 |       uses: actions/setup-python@v1
61 |       with:
62 |         python-version: ${{ matrix.python-version }}
63 |     - name: Install dependencies
64 |       run: make devenv
65 |     - name: Test
66 |       run: make testnospark
67 | 


--------------------------------------------------------------------------------
/.github/workflows/test_core.yml:
--------------------------------------------------------------------------------
 1 | # This workflow will install Python dependencies, run tests and lint with a variety of Python versions
 2 | # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
 3 | 
 4 | name: Core Tests
 5 | 
 6 | on:
 7 |   push:
 8 |     branches: [ master ]
 9 |     paths-ignore:
10 |       - 'docs/**'
11 |       - '**.md'
12 |   pull_request:
13 |     branches: [ master ]
14 |     paths-ignore:
15 |       - 'docs/**'
16 |       - '**.md'
17 | 
18 | concurrency:
19 |   group: ${{ github.workflow }}-${{ github.ref }}
20 |   cancel-in-progress: true
21 | 
22 | jobs:
23 |   core-tests:
24 |     name: Tests
25 |     runs-on: ubuntu-latest
26 |     strategy:
27 |       matrix:
28 |         python-version: ["3.10", "3.11", "3.12"]
29 | 
30 |     steps:
31 |     - uses: actions/checkout@v2
32 |     - name: Set up Python ${{ matrix.python-version }}
33 |       uses: actions/setup-python@v1
34 |       with:
35 |         python-version: ${{ matrix.python-version }}
36 |     - name: Fix setuptools_scm
37 |       run: pip install "setuptools_scm<7"
38 |     - name: Install dependencies
39 |       run: make devenv
40 |     - name: Install pandas 2
41 |       if: matrix.python-version == '3.10'
42 |       run: pip install "pandas>=2"
43 |     - name: Test
44 |       run: make testcore
45 | 


--------------------------------------------------------------------------------
/.github/workflows/test_dask.yml:
--------------------------------------------------------------------------------
 1 | # This workflow will install Python dependencies, run tests and lint with a variety of Python versions
 2 | # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
 3 | 
 4 | name: Dask Tests
 5 | 
 6 | on:
 7 |   push:
 8 |     branches: [ master ]
 9 |     paths-ignore:
10 |       - 'docs/**'
11 |       - '**.md'
12 |   pull_request:
13 |     branches: [ master ]
14 |     paths-ignore:
15 |       - 'docs/**'
16 |       - '**.md'
17 | 
18 | concurrency:
19 |   group: ${{ github.workflow }}-${{ github.ref }}
20 |   cancel-in-progress: true
21 | 
22 | jobs:
23 |   test_dask_lower_bound:
24 |     name: Dask 2024.4.0
25 |     runs-on: ubuntu-latest
26 | 
27 |     steps:
28 |     - uses: actions/checkout@v2
29 |     - name: Set up Python 3.10
30 |       uses: actions/setup-python@v1
31 |       with:
32 |         python-version: "3.10"
33 |     - name: Install dependencies
34 |       run: make devenv
35 |     - name: Setup Dask
36 |       run: pip install pyarrow==7.0.0 pandas==2.0.2 dask[dataframe,distributed]==2024.4.0
37 |     - name: Test
38 |       run: make testdask
39 | 
40 |   test_dask_sql_latest:
41 |     name: Dask with SQL Latest
42 |     runs-on: ubuntu-latest
43 | 
44 |     steps:
45 |     - uses: actions/checkout@v2
46 |     - name: Set up Python 3.10
47 |       uses: actions/setup-python@v1
48 |       with:
49 |         python-version: "3.10"
50 |     - name: Install dependencies
51 |       run: make devenv
52 |     - name: Test
53 |       run: make testdask
54 | 
55 |   test_dask_latest:
56 |     name: Dask without SQL Latest
57 |     runs-on: ubuntu-latest
58 | 
59 |     steps:
60 |     - uses: actions/checkout@v2
61 |     - name: Set up Python 3.11
62 |       uses: actions/setup-python@v1
63 |       with:
64 |         python-version: "3.11"
65 |     - name: Install dependencies
66 |       run: make devenv
67 |     - name: Setup Dask
68 |       run: pip install -U dask[dataframe,distributed] pyarrow pandas
69 |     - name: Remove Dask SQL
70 |       run: pip uninstall -y dask-sql qpd fugue-sql-antlr sqlglot
71 |     - name: Test
72 |       run: make testdask
73 | 


--------------------------------------------------------------------------------
/.github/workflows/test_no_sql.yml:
--------------------------------------------------------------------------------
 1 | # This workflow will install Python dependencies, run tests and lint with a variety of Python versions
 2 | # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
 3 | 
 4 | name: Tests Excluding SQL Dependencies
 5 | 
 6 | on:
 7 |   push:
 8 |     branches: [ master ]
 9 |     paths-ignore:
10 |       - 'docs/**'
11 |       - '**.md'
12 |   pull_request:
13 |     branches: [ master ]
14 |     paths-ignore:
15 |       - 'docs/**'
16 |       - '**.md'
17 | 
18 | concurrency:
19 |   group: ${{ github.workflow }}-${{ github.ref }}
20 |   cancel-in-progress: true
21 | 
22 | jobs:
23 |   tests-no-sql:
24 |     name: Tests
25 |     runs-on: ubuntu-latest
26 |     strategy:
27 |       matrix:
28 |         python-version: ["3.10"]
29 | 
30 |     steps:
31 |     - uses: actions/checkout@v2
32 |     - name: Set up Python ${{ matrix.python-version }}
33 |       uses: actions/setup-python@v1
34 |       with:
35 |         python-version: ${{ matrix.python-version }}
36 |     - name: Fix setuptools_scm
37 |       run: pip install "setuptools_scm<7"
38 |     - name: Install dependencies
39 |       run: make devenv
40 |     - name: Install pandas 2
41 |       if: matrix.python-version == '3.10'
42 |       run: pip install "pandas>=2"
43 |     - name: Remove SQL dependencies
44 |       run: pip uninstall -y qpd fugue-sql-antlr sqlglot
45 |     - name: Test
46 |       run: make testnosql
47 | 


--------------------------------------------------------------------------------
/.github/workflows/test_notebook.yml:
--------------------------------------------------------------------------------
 1 | # This workflow will install Python dependencies, run tests and lint with a variety of Python versions
 2 | # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
 3 | 
 4 | name: Test Notebook Experience
 5 | 
 6 | on:
 7 |   push:
 8 |     branches: [ master ]
 9 |     paths-ignore:
10 |       - 'docs/**'
11 |       - '**.md'
12 |   pull_request:
13 |     branches: [ master ]
14 |     paths-ignore:
15 |       - 'docs/**'
16 |       - '**.md'
17 | 
18 | concurrency:
19 |   group: ${{ github.workflow }}-${{ github.ref }}
20 |   cancel-in-progress: true
21 | 
22 | jobs:
23 |   build:
24 |     runs-on: ubuntu-latest
25 |     strategy:
26 |       matrix:
27 |         python-version: ["3.10"]
28 | 
29 |     steps:
30 |     - uses: actions/checkout@v2
31 |     - name: Set up Python ${{ matrix.python-version }}
32 |       uses: actions/setup-python@v1
33 |       with:
34 |         python-version: ${{ matrix.python-version }}
35 |     - name: Install dependencies
36 |       run: make devenv
37 |     - name: Test
38 |       run: make testnotebook
39 | 


--------------------------------------------------------------------------------
/.github/workflows/test_ray.yml:
--------------------------------------------------------------------------------
 1 | # This workflow will install Python dependencies, run tests and lint with a variety of Python versions
 2 | # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
 3 | 
 4 | name: Ray Tests
 5 | 
 6 | on:
 7 |   push:
 8 |     branches: [ master ]
 9 |     paths-ignore:
10 |       - 'docs/**'
11 |       - '**.md'
12 |   pull_request:
13 |     branches: [ master ]
14 |     paths-ignore:
15 |       - 'docs/**'
16 |       - '**.md'
17 | 
18 | concurrency:
19 |   group: ${{ github.workflow }}-${{ github.ref }}
20 |   cancel-in-progress: true
21 | 
22 | jobs:
23 |   test_ray_lower_bound:
24 |     name: Ray 2.5.0
25 |     runs-on: ubuntu-latest
26 | 
27 |     steps:
28 |     - uses: actions/checkout@v2
29 |     - name: Set up Python 3.9
30 |       uses: actions/setup-python@v1
31 |       with:
32 |         python-version: 3.9
33 |     - name: Install dependencies
34 |       run: make devenv
35 |     - name: Setup Ray
36 |       run: pip install ray[data]==2.5.0 pyarrow==7.0.0 "duckdb<0.9" pandas==1.5.3 'pydantic<2'
37 |     - name: Test
38 |       run: make testray
39 | 
40 |   test_ray_latest:
41 |     name: Ray Latest
42 |     runs-on: ubuntu-latest
43 | 
44 |     steps:
45 |     - uses: actions/checkout@v2
46 |     - name: Set up Python 3.9
47 |       uses: actions/setup-python@v1
48 |       with:
49 |         python-version: 3.9
50 |     - name: Install dependencies
51 |       run: make devenv
52 |     - name: Setup Ray
53 |       run: pip install -U ray[data]
54 |     - name: Test
55 |       run: make testray
56 | 


--------------------------------------------------------------------------------
/.github/workflows/test_spark.yml:
--------------------------------------------------------------------------------
 1 | # This workflow will install Python dependencies, run tests and lint with a variety of Python versions
 2 | # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
 3 | 
 4 | name: Spark Tests
 5 | 
 6 | on:
 7 |   push:
 8 |     branches: [ master ]
 9 |     paths-ignore:
10 |       - 'docs/**'
11 |       - '**.md'
12 |   pull_request:
13 |     branches: [ master ]
14 |     paths-ignore:
15 |       - 'docs/**'
16 |       - '**.md'
17 | 
18 | concurrency:
19 |   group: ${{ github.workflow }}-${{ github.ref }}
20 |   cancel-in-progress: true
21 | 
22 | jobs:
23 |   test_combinations:
24 |     name: Spark ${{ matrix.spark-version }} Pandas ${{ matrix.pandas-version }}
25 |     runs-on: ubuntu-latest
26 |     strategy:
27 |       matrix:
28 |         spark-version: ["3.4.0","3.5.5"]
29 |         pandas-version: ["1.5.3","2.0.1"]
30 | 
31 |     steps:
32 |     - uses: actions/checkout@v2
33 |     - name: Set up Python 3.9
34 |       uses: actions/setup-python@v1
35 |       with:
36 |         python-version: 3.9
37 |     - name: Install dependencies
38 |       run: make devenv
39 |     - name: Install Spark ${{ matrix.spark-version }}
40 |       run: pip install "pyspark==${{ matrix.spark-version }}"
41 |     - name: Install Pandas ${{ matrix.pandas-version }}
42 |       run: pip install "pandas==${{ matrix.pandas-version }}"
43 |     - name: Downgrade Ibis
44 |       if: matrix.spark-version < '3.4.0'
45 |       run: pip install "ibis-framework<5"
46 |     - name: Test
47 |       run: make testspark
48 | 
49 |   test_connect:
50 |     name: Spark Connect
51 |     runs-on: ubuntu-latest
52 | 
53 |     steps:
54 |     - uses: actions/checkout@v2
55 |     - name: Set up Python 3.10
56 |       uses: actions/setup-python@v1
57 |       with:
58 |         python-version: "3.10"
59 |     - name: Install dependencies
60 |       run: make devenv
61 |     - name: Setup Spark
62 |       run: make sparkconnect
63 |     - name: Test
64 |       run: make testsparkconnect
65 | 


--------------------------------------------------------------------------------
/.github/workflows/test_win.yml:
--------------------------------------------------------------------------------
 1 | # This workflow will install Python dependencies, run tests and lint with a variety of Python versions
 2 | # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
 3 | 
 4 | name: Test Windows
 5 | 
 6 | on:
 7 |   push:
 8 |     branches: [ master ]
 9 |     paths-ignore:
10 |       - 'docs/**'
11 |       - '**.md'
12 |   pull_request:
13 |     branches: [ master ]
14 |     paths-ignore:
15 |       - 'docs/**'
16 |       - '**.md'
17 | 
18 | concurrency:
19 |   group: ${{ github.workflow }}-${{ github.ref }}
20 |   cancel-in-progress: true
21 | 
22 | jobs:
23 |   build:
24 |     runs-on: windows-latest
25 |     strategy:
26 |       matrix:
27 |         python-version: [3.9, "3.10"]
28 |     steps:
29 |     - uses: actions/checkout@v2
30 |     - name: Set up Python ${{ matrix.python-version }}
31 |       uses: actions/setup-python@v1
32 |       with:
33 |         python-version: ${{ matrix.python-version }}
34 |     - name: Install dependencies
35 |       run: pip install -r requirements.txt
36 |     # - name: Install pyarrow
37 |     #  run: pip install pyarrow==8.0.0
38 |     - name: Test
39 |       run: python -m pytest --reruns 2 --only-rerun 'Overflow in cast' tests/fugue tests/fugue_dask tests/fugue_ibis tests/fugue_duckdb
40 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | pip-wheel-metadata/
 24 | share/python-wheels/
 25 | *.egg-info/
 26 | .installed.cfg
 27 | *.egg
 28 | MANIFEST
 29 | 
 30 | # PyInstaller
 31 | #  Usually these files are written by a python script from a template
 32 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 33 | *.manifest
 34 | *.spec
 35 | 
 36 | # Installer logs
 37 | pip-log.txt
 38 | pip-delete-this-directory.txt
 39 | 
 40 | # Unit test / coverage reports
 41 | htmlcov/
 42 | .tox/
 43 | .nox/
 44 | .coverage
 45 | .coverage.*
 46 | .cache
 47 | nosetests.xml
 48 | coverage.xml
 49 | *.cover
 50 | *.py,cover
 51 | .hypothesis/
 52 | .pytest_cache/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | target/
 76 | 
 77 | # Jupyter Notebook
 78 | .ipynb_checkpoints
 79 | 
 80 | # IPython
 81 | profile_default/
 82 | ipython_config.py
 83 | 
 84 | # pyenv
 85 | .python-version
 86 | 
 87 | # pipenv
 88 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 89 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 90 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 91 | #   install all needed dependencies.
 92 | #Pipfile.lock
 93 | 
 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 95 | __pypackages__/
 96 | 
 97 | # Celery stuff
 98 | celerybeat-schedule
 99 | celerybeat.pid
100 | 
101 | # SageMath parsed files
102 | *.sage.py
103 | 
104 | # Environments
105 | .env
106 | .venv
107 | env/
108 | venv/
109 | ENV/
110 | env.bak/
111 | venv.bak/
112 | pythonenv*
113 | 
114 | # Spyder project settings
115 | .spyderproject
116 | .spyproject
117 | 
118 | # Rope project settings
119 | .ropeproject
120 | 
121 | # mkdocs documentation
122 | /site
123 | .virtual_documents
124 | 
125 | # mypy
126 | .mypy_cache
127 | .dmypy.json
128 | dmypy.json
129 | 
130 | # Pyre type checker
131 | .pyre/
132 | 
133 | .vscode
134 | tmp
135 | 
136 | # Antlr
137 | .antlr
138 | 
139 | # dask
140 | dask-worker-space
141 | 
142 | # spark
143 | spark-warehourse
144 | =*
145 | 
146 | # DS_Store
147 | *.DS_Store
148 | 


--------------------------------------------------------------------------------
/.gitpod.yml:
--------------------------------------------------------------------------------
 1 | image: fugueproject/gitpod:0.7.2
 2 | 
 3 | tasks:
 4 |   - init: |
 5 |       make devenv
 6 | 
 7 | github:
 8 |   prebuilds:
 9 |     # enable for the master/default branch (defaults to true)
10 |     master: true
11 |     # enable for all branches in this repo (defaults to false)
12 |     branches: true
13 |     # enable for pull requests coming from this repo (defaults to true)
14 |     pullRequests: true
15 |     # enable for pull requests coming from forks (defaults to false)
16 |     pullRequestsFromForks: true
17 |     # add a "Review in Gitpod" button as a comment to pull requests (defaults to true)
18 |     addComment: true
19 |     # add a "Review in Gitpod" button to pull requests (defaults to false)
20 |     addBadge: false
21 |     # add a label once the prebuild is ready to pull requests (defaults to false)
22 |     addLabel: prebuilt-in-gitpod
23 | 
24 | vscode:
25 |   extensions:
26 |     - ms-python.python
27 |     - njpwerner.autodocstring
28 |     - ms-toolsai.jupyter
29 |     - ms-toolsai.jupyter-keymap
30 |     - ms-toolsai.jupyter-renderers
31 |     - ms-python.isort
32 |     - virgilsisoe.python-auto-import
33 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | default_language_version:
 2 |     python: python3
 3 | 
 4 | exclude: |
 5 |     (?x)(
 6 |         ^tests/|
 7 |         ^docs/|
 8 |         ^fugue_sql/_antlr/
 9 |     )
10 | repos:
11 |     - repo: https://github.com/pre-commit/pre-commit-hooks
12 |       rev: v3.2.0
13 |       hooks:
14 |           - id: check-ast
15 |           - id: check-docstring-first
16 |           - id: check-executables-have-shebangs
17 |           - id: check-json
18 |           - id: check-merge-conflict
19 |           - id: check-yaml
20 |           - id: debug-statements
21 |           - id: end-of-file-fixer
22 |           - id: trailing-whitespace
23 |           - id: check-vcs-permalinks
24 |     - repo: https://github.com/pycqa/flake8
25 |       rev: '3.8.3'
26 |       hooks:
27 |           - id: flake8
28 |             types: [python]
29 |             additional_dependencies:
30 |             - flake8-bugbear
31 |             - flake8-builtins
32 |             # - flake8-docstrings  # TODO: add back!
33 |             # - flake8-rst-docstrings
34 |             - flake8-comprehensions
35 |             - flake8-tidy-imports
36 |             - pycodestyle
37 |     - repo: https://github.com/pre-commit/mirrors-mypy
38 |       rev: v0.971
39 |       hooks:
40 |         - id: mypy
41 |     - repo: https://github.com/pre-commit/mirrors-pylint
42 |       rev: v2.6.0
43 |       hooks:
44 |         - id: pylint
45 |     - repo: https://github.com/ambv/black
46 |       rev: 22.3.0
47 |       hooks:
48 |       - id: black
49 |         types: [python]
50 |         language_version: python3
51 | 


--------------------------------------------------------------------------------
/.pylintrc:
--------------------------------------------------------------------------------
1 | [MESSAGES CONTROL]
2 | disable = C0103,C0114,C0115,C0116,C0122,C0200,C0201,C0302,C0411,C0415,E0401,E0712,E1130,E1136,E5110,R0201,R0205,R0801,R0902,R0903,R0904,R0911,R0912,R0913,R0914,R0915,R0917,R1705,R1710,R1714,R1718,R1720,R1724,W0102,W0107,W0108,W0201,W0212,W0221,W0223,W0237,W0511,W0603,W0613,W0621,W0622,W0631,W0640,W0703,W0707,W1116
3 | # TODO: R0205: inherits from object, can be safely removed
4 | 


--------------------------------------------------------------------------------
/.readthedocs.yaml:
--------------------------------------------------------------------------------
 1 | version: 2
 2 | 
 3 | # Set the version of Python and other tools you might need
 4 | build:
 5 |   os: ubuntu-20.04
 6 |   tools:
 7 |     python: "3.10"
 8 |   jobs:
 9 |     pre_install:
10 |       - pip install -U pip
11 | 
12 | sphinx:
13 |   configuration: docs/conf.py
14 | 
15 | python:
16 |    install:
17 |    - requirements: requirements.txt
18 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line, and also
 5 | # from the environment for the first two.
 6 | SPHINXOPTS    ?=
 7 | SPHINXBUILD   ?= sphinx-build
 8 | SOURCEDIR     = source
 9 | BUILDDIR      = build
10 | 
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 | 
15 | .PHONY: help Makefile
16 | 
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
21 | 


--------------------------------------------------------------------------------
/docs/_static/logo.svg:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
 3 | <!-- Creator: CorelDRAW X8 -->
 4 | <svg xmlns="http://www.w3.org/2000/svg" xml:space="preserve" width="236.998mm" height="111.704mm" version="1.1" style="shape-rendering:geometricPrecision; text-rendering:geometricPrecision; image-rendering:optimizeQuality; fill-rule:evenodd; clip-rule:evenodd"
 5 | viewBox="0 0 22289 10505"
 6 |  xmlns:xlink="http://www.w3.org/1999/xlink">
 7 |  <defs>
 8 |   <style type="text/css">
 9 |    <![CDATA[
10 |     .fil0 {fill:#FF4F19}
11 |     .fil1 {fill:#264263;fill-rule:nonzero}
12 |    ]]>
13 |   </style>
14 |  </defs>
15 |  <g id="Layer_x0020_1">
16 |   <metadata id="CorelCorpID_0Corel-Layer"/>
17 |   <g id="_2413006974944">
18 |    <path class="fil0" d="M3148 10505c-1108,-1439 -749,-3784 895,-5428l-1058 -1058c-1592,2020 -1737,4645 -240,6142 127,127 261,241 403,344zm-1259 -1258c-1447,-1881 -978,-4945 1169,-7092 249,-249 510,-475 780,-678l-1477 -1477c-2705,2705 -3152,6643 -998,8797 165,165 341,315 526,450z"/>
19 |    <path class="fil1" d="M6046 7212l364 0 0 -2519 728 0 0 -328 -728 0 0 -20c0,-271 10,-713 728,-713l0 -339c-1051,11 -1082,677 -1092,1072l-443 0 0 328 443 0 0 2519zm4445 -2847l-364 0 0 1504c0,500 -130,687 -296,854 -183,182 -401,229 -604,229 -198,0 -463,-47 -661,-271 -125,-156 -203,-390 -203,-749l0 -1567 -364 0 0 1572c0,302 47,666 250,916 124,156 416,427 978,427 531,0 807,-297 911,-510l10 0 0 442 343 0 0 -2847zm4041 0l-365 0 0 510 -10 0c-328,-504 -838,-582 -1098,-582 -750,0 -1489,603 -1489,1509 0,895 739,1478 1489,1478 416,0 885,-182 1098,-588l10 0 0 249c0,786 -551,1062 -1030,1062 -692,0 -973,-484 -1067,-718l-385 0c260,890 1051,1046 1457,1046 115,0 770,-26 1129,-526 261,-348 261,-733 261,-1040l0 -2400zm-2597 1426c0,-624 479,-1171 1124,-1171 640,0 1129,547 1129,1171 0,484 -375,1161 -1129,1161 -645,0 -1124,-541 -1124,-1161zm6298 -1426l-364 0 0 1504c0,500 -130,687 -297,854 -182,182 -400,229 -603,229 -198,0 -463,-47 -661,-271 -125,-156 -203,-390 -203,-749l0 -1567 -364 0 0 1572c0,302 46,666 249,916 125,156 417,427 979,427 531,0 806,-297 910,-510l11 0 0 442 343 0 0 -2847zm4051 1556c0,-36 5,-67 5,-104 0,-249 -57,-525 -224,-796 -203,-354 -645,-723 -1270,-728 -827,15 -1477,692 -1477,1498 0,843 686,1489 1483,1489 655,0 1202,-432 1400,-999l-386 0c-130,353 -499,660 -1020,671 -556,0 -1077,-474 -1103,-1031l2592 0zm-2592 -327c104,-672 661,-974 1109,-974 452,0 999,302 1113,974l-2222 0z"/>
20 |   </g>
21 |  </g>
22 | </svg>
23 | 


--------------------------------------------------------------------------------
/docs/_static/logo_doc.svg:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
 3 | <!-- Creator: CorelDRAW X8 -->
 4 | <svg xmlns="http://www.w3.org/2000/svg" xml:space="preserve" width="460mm" height="111.704mm" version="1.1" style="shape-rendering:geometricPrecision; text-rendering:geometricPrecision; image-rendering:optimizeQuality; fill-rule:evenodd; clip-rule:evenodd"
 5 | viewBox="0 0 22289 10505"
 6 |  xmlns:xlink="http://www.w3.org/1999/xlink">
 7 |  <defs>
 8 |   <style type="text/css">
 9 |    <![CDATA[
10 |     .fil0 {fill:#FF4F19}
11 |     .fil1 {fill:#F0F0F0;fill-rule:nonzero}
12 |    ]]>
13 |   </style>
14 |  </defs>
15 |  <g id="Layer_x0020_1">
16 |   <metadata id="CorelCorpID_0Corel-Layer"/>
17 |   <g id="_2413006974944">
18 |    <path class="fil0" d="M3148 10505c-1108,-1439 -749,-3784 895,-5428l-1058 -1058c-1592,2020 -1737,4645 -240,6142 127,127 261,241 403,344zm-1259 -1258c-1447,-1881 -978,-4945 1169,-7092 249,-249 510,-475 780,-678l-1477 -1477c-2705,2705 -3152,6643 -998,8797 165,165 341,315 526,450z"/>
19 |    <path class="fil1" d="M6046 7212l364 0 0 -2519 728 0 0 -328 -728 0 0 -20c0,-271 10,-713 728,-713l0 -339c-1051,11 -1082,677 -1092,1072l-443 0 0 328 443 0 0 2519zm4445 -2847l-364 0 0 1504c0,500 -130,687 -296,854 -183,182 -401,229 -604,229 -198,0 -463,-47 -661,-271 -125,-156 -203,-390 -203,-749l0 -1567 -364 0 0 1572c0,302 47,666 250,916 124,156 416,427 978,427 531,0 807,-297 911,-510l10 0 0 442 343 0 0 -2847zm4041 0l-365 0 0 510 -10 0c-328,-504 -838,-582 -1098,-582 -750,0 -1489,603 -1489,1509 0,895 739,1478 1489,1478 416,0 885,-182 1098,-588l10 0 0 249c0,786 -551,1062 -1030,1062 -692,0 -973,-484 -1067,-718l-385 0c260,890 1051,1046 1457,1046 115,0 770,-26 1129,-526 261,-348 261,-733 261,-1040l0 -2400zm-2597 1426c0,-624 479,-1171 1124,-1171 640,0 1129,547 1129,1171 0,484 -375,1161 -1129,1161 -645,0 -1124,-541 -1124,-1161zm6298 -1426l-364 0 0 1504c0,500 -130,687 -297,854 -182,182 -400,229 -603,229 -198,0 -463,-47 -661,-271 -125,-156 -203,-390 -203,-749l0 -1567 -364 0 0 1572c0,302 46,666 249,916 125,156 417,427 979,427 531,0 806,-297 910,-510l11 0 0 442 343 0 0 -2847zm4051 1556c0,-36 5,-67 5,-104 0,-249 -57,-525 -224,-796 -203,-354 -645,-723 -1270,-728 -827,15 -1477,692 -1477,1498 0,843 686,1489 1483,1489 655,0 1202,-432 1400,-999l-386 0c-130,353 -499,660 -1020,671 -556,0 -1077,-474 -1103,-1031l2592 0zm-2592 -327c104,-672 661,-974 1109,-974 452,0 999,302 1113,974l-2222 0z"/>
20 |   </g>
21 |  </g>
22 | </svg>
23 | 


--------------------------------------------------------------------------------
/docs/_templates/toc.rst_t:
--------------------------------------------------------------------------------
1 | {{ header | heading }}
2 | 
3 | .. toctree::
4 |    :maxdepth: {{ maxdepth }}
5 | {% for docname in docnames %}
6 |    {{ docname }}
7 | {%- endfor %}
8 | 


--------------------------------------------------------------------------------
/docs/api.rst:
--------------------------------------------------------------------------------
 1 | API Reference
 2 | ==============
 3 | 
 4 | .. toctree::
 5 | 
 6 |    api/fugue
 7 |    api_sql/fugue_sql
 8 |    api_duckdb/fugue_duckdb
 9 |    api_spark/fugue_spark
10 |    api_dask/fugue_dask
11 |    api_ray/fugue_ray
12 |    api_ibis/fugue_ibis
13 | 


--------------------------------------------------------------------------------
/docs/api/fugue.bag.rst:
--------------------------------------------------------------------------------
 1 | fugue.bag 
 2 | ==========
 3 | 
 4 | .. |SchemaLikeObject| replace:: :ref:`Schema like object <tutorial:tutorials/advanced/x-like:schema>`
 5 | .. |ParamsLikeObject| replace:: :ref:`Parameters like object <tutorial:tutorials/advanced/x-like:parameters>`
 6 | .. |DataFrameLikeObject| replace:: :ref:`DataFrame like object <tutorial:tutorials/advanced/x-like:dataframe>`
 7 | .. |DataFramesLikeObject| replace:: :ref:`DataFrames like object <tutorial:tutorials/advanced/x-like:dataframes>`
 8 | .. |PartitionLikeObject| replace:: :ref:`Partition like object <tutorial:tutorials/advanced/x-like:partition>`
 9 | .. |RPCHandlerLikeObject| replace:: :ref:`RPChandler like object <tutorial:tutorials/advanced/x-like:rpc>`
10 | 
11 | .. |ExecutionEngine| replace:: :class:`~fugue.execution.execution_engine.ExecutionEngine`
12 | .. |NativeExecutionEngine| replace:: :class:`~fugue.execution.native_execution_engine.NativeExecutionEngine`
13 | .. |FugueWorkflow| replace:: :class:`~fugue.workflow.workflow.FugueWorkflow`
14 | 
15 | .. |ReadJoin| replace:: Read Join tutorials on :ref:`workflow <tutorial:tutorials/advanced/dag:join>` and :ref:`engine <tutorial:tutorials/advanced/execution_engine:join>` for details
16 | .. |FugueConfig| replace:: :doc:`the Fugue Configuration Tutorial <tutorial:tutorials/advanced/useful_config>`
17 | .. |PartitionTutorial| replace:: :doc:`the Partition Tutorial <tutorial:tutorials/advanced/partition>`
18 | .. |FugueSQLTutorial| replace:: :doc:`the Fugue SQL Tutorial <tutorial:tutorials/fugue_sql/index>`
19 | .. |DataFrameTutorial| replace:: :ref:`the DataFrame Tutorial <tutorial:tutorials/advanced/schema_dataframes:dataframe>`
20 | .. |ExecutionEngineTutorial| replace:: :doc:`the ExecutionEngine Tutorial <tutorial:tutorials/advanced/execution_engine>`
21 | .. |ZipComap| replace:: :ref:`Zip & Comap <tutorial:tutorials/advanced/execution_engine:zip & comap>`
22 | .. |LoadSave| replace:: :ref:`Load & Save <tutorial:tutorials/advanced/execution_engine:load & save>`
23 | .. |AutoPersist| replace:: :ref:`Auto Persist <tutorial:tutorials/advanced/useful_config:auto persist>`
24 | .. |TransformerTutorial| replace:: :doc:`the Transformer Tutorial <tutorial:tutorials/extensions/transformer>`
25 | .. |CoTransformer| replace:: :ref:`CoTransformer <tutorial:tutorials/advanced/dag:cotransformer>`
26 | .. |CoTransformerTutorial| replace:: :doc:`the CoTransformer Tutorial <tutorial:tutorials/extensions/cotransformer>`
27 | .. |FugueDataTypes| replace:: :doc:`Fugue Data Types <tutorial:tutorials/appendix/generate_types>`
28 | 
29 | 
30 | fugue.bag.array\_bag
31 | --------------------
32 | 
33 | .. automodule:: fugue.bag.array_bag
34 |    :members:
35 |    :undoc-members:
36 |    :show-inheritance:
37 | 
38 | fugue.bag.bag
39 | -------------
40 | 
41 | .. automodule:: fugue.bag.bag
42 |    :members:
43 |    :undoc-members:
44 |    :show-inheritance:
45 | 
46 | 


--------------------------------------------------------------------------------
/docs/api/fugue.collections.rst:
--------------------------------------------------------------------------------
 1 | fugue.collections 
 2 | ==================
 3 | 
 4 | .. |SchemaLikeObject| replace:: :ref:`Schema like object <tutorial:tutorials/advanced/x-like:schema>`
 5 | .. |ParamsLikeObject| replace:: :ref:`Parameters like object <tutorial:tutorials/advanced/x-like:parameters>`
 6 | .. |DataFrameLikeObject| replace:: :ref:`DataFrame like object <tutorial:tutorials/advanced/x-like:dataframe>`
 7 | .. |DataFramesLikeObject| replace:: :ref:`DataFrames like object <tutorial:tutorials/advanced/x-like:dataframes>`
 8 | .. |PartitionLikeObject| replace:: :ref:`Partition like object <tutorial:tutorials/advanced/x-like:partition>`
 9 | .. |RPCHandlerLikeObject| replace:: :ref:`RPChandler like object <tutorial:tutorials/advanced/x-like:rpc>`
10 | 
11 | .. |ExecutionEngine| replace:: :class:`~fugue.execution.execution_engine.ExecutionEngine`
12 | .. |NativeExecutionEngine| replace:: :class:`~fugue.execution.native_execution_engine.NativeExecutionEngine`
13 | .. |FugueWorkflow| replace:: :class:`~fugue.workflow.workflow.FugueWorkflow`
14 | 
15 | .. |ReadJoin| replace:: Read Join tutorials on :ref:`workflow <tutorial:tutorials/advanced/dag:join>` and :ref:`engine <tutorial:tutorials/advanced/execution_engine:join>` for details
16 | .. |FugueConfig| replace:: :doc:`the Fugue Configuration Tutorial <tutorial:tutorials/advanced/useful_config>`
17 | .. |PartitionTutorial| replace:: :doc:`the Partition Tutorial <tutorial:tutorials/advanced/partition>`
18 | .. |FugueSQLTutorial| replace:: :doc:`the Fugue SQL Tutorial <tutorial:tutorials/fugue_sql/index>`
19 | .. |DataFrameTutorial| replace:: :ref:`the DataFrame Tutorial <tutorial:tutorials/advanced/schema_dataframes:dataframe>`
20 | .. |ExecutionEngineTutorial| replace:: :doc:`the ExecutionEngine Tutorial <tutorial:tutorials/advanced/execution_engine>`
21 | .. |ZipComap| replace:: :ref:`Zip & Comap <tutorial:tutorials/advanced/execution_engine:zip & comap>`
22 | .. |LoadSave| replace:: :ref:`Load & Save <tutorial:tutorials/advanced/execution_engine:load & save>`
23 | .. |AutoPersist| replace:: :ref:`Auto Persist <tutorial:tutorials/advanced/useful_config:auto persist>`
24 | .. |TransformerTutorial| replace:: :doc:`the Transformer Tutorial <tutorial:tutorials/extensions/transformer>`
25 | .. |CoTransformer| replace:: :ref:`CoTransformer <tutorial:tutorials/advanced/dag:cotransformer>`
26 | .. |CoTransformerTutorial| replace:: :doc:`the CoTransformer Tutorial <tutorial:tutorials/extensions/cotransformer>`
27 | .. |FugueDataTypes| replace:: :doc:`Fugue Data Types <tutorial:tutorials/appendix/generate_types>`
28 | 
29 | 
30 | fugue.collections.partition
31 | ---------------------------
32 | 
33 | .. automodule:: fugue.collections.partition
34 |    :members:
35 |    :undoc-members:
36 |    :show-inheritance:
37 | 
38 | fugue.collections.sql
39 | ---------------------
40 | 
41 | .. automodule:: fugue.collections.sql
42 |    :members:
43 |    :undoc-members:
44 |    :show-inheritance:
45 | 
46 | fugue.collections.yielded
47 | -------------------------
48 | 
49 | .. automodule:: fugue.collections.yielded
50 |    :members:
51 |    :undoc-members:
52 |    :show-inheritance:
53 | 
54 | 


--------------------------------------------------------------------------------
/docs/api/fugue.column.rst:
--------------------------------------------------------------------------------
 1 | fugue.column 
 2 | =============
 3 | 
 4 | .. |SchemaLikeObject| replace:: :ref:`Schema like object <tutorial:tutorials/advanced/x-like:schema>`
 5 | .. |ParamsLikeObject| replace:: :ref:`Parameters like object <tutorial:tutorials/advanced/x-like:parameters>`
 6 | .. |DataFrameLikeObject| replace:: :ref:`DataFrame like object <tutorial:tutorials/advanced/x-like:dataframe>`
 7 | .. |DataFramesLikeObject| replace:: :ref:`DataFrames like object <tutorial:tutorials/advanced/x-like:dataframes>`
 8 | .. |PartitionLikeObject| replace:: :ref:`Partition like object <tutorial:tutorials/advanced/x-like:partition>`
 9 | .. |RPCHandlerLikeObject| replace:: :ref:`RPChandler like object <tutorial:tutorials/advanced/x-like:rpc>`
10 | 
11 | .. |ExecutionEngine| replace:: :class:`~fugue.execution.execution_engine.ExecutionEngine`
12 | .. |NativeExecutionEngine| replace:: :class:`~fugue.execution.native_execution_engine.NativeExecutionEngine`
13 | .. |FugueWorkflow| replace:: :class:`~fugue.workflow.workflow.FugueWorkflow`
14 | 
15 | .. |ReadJoin| replace:: Read Join tutorials on :ref:`workflow <tutorial:tutorials/advanced/dag:join>` and :ref:`engine <tutorial:tutorials/advanced/execution_engine:join>` for details
16 | .. |FugueConfig| replace:: :doc:`the Fugue Configuration Tutorial <tutorial:tutorials/advanced/useful_config>`
17 | .. |PartitionTutorial| replace:: :doc:`the Partition Tutorial <tutorial:tutorials/advanced/partition>`
18 | .. |FugueSQLTutorial| replace:: :doc:`the Fugue SQL Tutorial <tutorial:tutorials/fugue_sql/index>`
19 | .. |DataFrameTutorial| replace:: :ref:`the DataFrame Tutorial <tutorial:tutorials/advanced/schema_dataframes:dataframe>`
20 | .. |ExecutionEngineTutorial| replace:: :doc:`the ExecutionEngine Tutorial <tutorial:tutorials/advanced/execution_engine>`
21 | .. |ZipComap| replace:: :ref:`Zip & Comap <tutorial:tutorials/advanced/execution_engine:zip & comap>`
22 | .. |LoadSave| replace:: :ref:`Load & Save <tutorial:tutorials/advanced/execution_engine:load & save>`
23 | .. |AutoPersist| replace:: :ref:`Auto Persist <tutorial:tutorials/advanced/useful_config:auto persist>`
24 | .. |TransformerTutorial| replace:: :doc:`the Transformer Tutorial <tutorial:tutorials/extensions/transformer>`
25 | .. |CoTransformer| replace:: :ref:`CoTransformer <tutorial:tutorials/advanced/dag:cotransformer>`
26 | .. |CoTransformerTutorial| replace:: :doc:`the CoTransformer Tutorial <tutorial:tutorials/extensions/cotransformer>`
27 | .. |FugueDataTypes| replace:: :doc:`Fugue Data Types <tutorial:tutorials/appendix/generate_types>`
28 | 
29 | 
30 | fugue.column.expressions
31 | ------------------------
32 | 
33 | .. automodule:: fugue.column.expressions
34 |    :members:
35 |    :undoc-members:
36 |    :show-inheritance:
37 | 
38 | fugue.column.functions
39 | ----------------------
40 | 
41 | .. automodule:: fugue.column.functions
42 |    :members:
43 |    :undoc-members:
44 |    :show-inheritance:
45 | 
46 | fugue.column.sql
47 | ----------------
48 | 
49 | .. automodule:: fugue.column.sql
50 |    :members:
51 |    :undoc-members:
52 |    :show-inheritance:
53 | 
54 | 


--------------------------------------------------------------------------------
/docs/api/fugue.dataset.rst:
--------------------------------------------------------------------------------
 1 | fugue.dataset 
 2 | ==============
 3 | 
 4 | .. |SchemaLikeObject| replace:: :ref:`Schema like object <tutorial:tutorials/advanced/x-like:schema>`
 5 | .. |ParamsLikeObject| replace:: :ref:`Parameters like object <tutorial:tutorials/advanced/x-like:parameters>`
 6 | .. |DataFrameLikeObject| replace:: :ref:`DataFrame like object <tutorial:tutorials/advanced/x-like:dataframe>`
 7 | .. |DataFramesLikeObject| replace:: :ref:`DataFrames like object <tutorial:tutorials/advanced/x-like:dataframes>`
 8 | .. |PartitionLikeObject| replace:: :ref:`Partition like object <tutorial:tutorials/advanced/x-like:partition>`
 9 | .. |RPCHandlerLikeObject| replace:: :ref:`RPChandler like object <tutorial:tutorials/advanced/x-like:rpc>`
10 | 
11 | .. |ExecutionEngine| replace:: :class:`~fugue.execution.execution_engine.ExecutionEngine`
12 | .. |NativeExecutionEngine| replace:: :class:`~fugue.execution.native_execution_engine.NativeExecutionEngine`
13 | .. |FugueWorkflow| replace:: :class:`~fugue.workflow.workflow.FugueWorkflow`
14 | 
15 | .. |ReadJoin| replace:: Read Join tutorials on :ref:`workflow <tutorial:tutorials/advanced/dag:join>` and :ref:`engine <tutorial:tutorials/advanced/execution_engine:join>` for details
16 | .. |FugueConfig| replace:: :doc:`the Fugue Configuration Tutorial <tutorial:tutorials/advanced/useful_config>`
17 | .. |PartitionTutorial| replace:: :doc:`the Partition Tutorial <tutorial:tutorials/advanced/partition>`
18 | .. |FugueSQLTutorial| replace:: :doc:`the Fugue SQL Tutorial <tutorial:tutorials/fugue_sql/index>`
19 | .. |DataFrameTutorial| replace:: :ref:`the DataFrame Tutorial <tutorial:tutorials/advanced/schema_dataframes:dataframe>`
20 | .. |ExecutionEngineTutorial| replace:: :doc:`the ExecutionEngine Tutorial <tutorial:tutorials/advanced/execution_engine>`
21 | .. |ZipComap| replace:: :ref:`Zip & Comap <tutorial:tutorials/advanced/execution_engine:zip & comap>`
22 | .. |LoadSave| replace:: :ref:`Load & Save <tutorial:tutorials/advanced/execution_engine:load & save>`
23 | .. |AutoPersist| replace:: :ref:`Auto Persist <tutorial:tutorials/advanced/useful_config:auto persist>`
24 | .. |TransformerTutorial| replace:: :doc:`the Transformer Tutorial <tutorial:tutorials/extensions/transformer>`
25 | .. |CoTransformer| replace:: :ref:`CoTransformer <tutorial:tutorials/advanced/dag:cotransformer>`
26 | .. |CoTransformerTutorial| replace:: :doc:`the CoTransformer Tutorial <tutorial:tutorials/extensions/cotransformer>`
27 | .. |FugueDataTypes| replace:: :doc:`Fugue Data Types <tutorial:tutorials/appendix/generate_types>`
28 | 
29 | 
30 | fugue.dataset.api
31 | -----------------
32 | 
33 | .. automodule:: fugue.dataset.api
34 |    :members:
35 |    :undoc-members:
36 |    :show-inheritance:
37 | 
38 | fugue.dataset.dataset
39 | ---------------------
40 | 
41 | .. automodule:: fugue.dataset.dataset
42 |    :members:
43 |    :undoc-members:
44 |    :show-inheritance:
45 | 
46 | 


--------------------------------------------------------------------------------
/docs/api/fugue.extensions.creator.rst:
--------------------------------------------------------------------------------
 1 | fugue.extensions.creator 
 2 | =========================
 3 | 
 4 | .. |SchemaLikeObject| replace:: :ref:`Schema like object <tutorial:tutorials/advanced/x-like:schema>`
 5 | .. |ParamsLikeObject| replace:: :ref:`Parameters like object <tutorial:tutorials/advanced/x-like:parameters>`
 6 | .. |DataFrameLikeObject| replace:: :ref:`DataFrame like object <tutorial:tutorials/advanced/x-like:dataframe>`
 7 | .. |DataFramesLikeObject| replace:: :ref:`DataFrames like object <tutorial:tutorials/advanced/x-like:dataframes>`
 8 | .. |PartitionLikeObject| replace:: :ref:`Partition like object <tutorial:tutorials/advanced/x-like:partition>`
 9 | .. |RPCHandlerLikeObject| replace:: :ref:`RPChandler like object <tutorial:tutorials/advanced/x-like:rpc>`
10 | 
11 | .. |ExecutionEngine| replace:: :class:`~fugue.execution.execution_engine.ExecutionEngine`
12 | .. |NativeExecutionEngine| replace:: :class:`~fugue.execution.native_execution_engine.NativeExecutionEngine`
13 | .. |FugueWorkflow| replace:: :class:`~fugue.workflow.workflow.FugueWorkflow`
14 | 
15 | .. |ReadJoin| replace:: Read Join tutorials on :ref:`workflow <tutorial:tutorials/advanced/dag:join>` and :ref:`engine <tutorial:tutorials/advanced/execution_engine:join>` for details
16 | .. |FugueConfig| replace:: :doc:`the Fugue Configuration Tutorial <tutorial:tutorials/advanced/useful_config>`
17 | .. |PartitionTutorial| replace:: :doc:`the Partition Tutorial <tutorial:tutorials/advanced/partition>`
18 | .. |FugueSQLTutorial| replace:: :doc:`the Fugue SQL Tutorial <tutorial:tutorials/fugue_sql/index>`
19 | .. |DataFrameTutorial| replace:: :ref:`the DataFrame Tutorial <tutorial:tutorials/advanced/schema_dataframes:dataframe>`
20 | .. |ExecutionEngineTutorial| replace:: :doc:`the ExecutionEngine Tutorial <tutorial:tutorials/advanced/execution_engine>`
21 | .. |ZipComap| replace:: :ref:`Zip & Comap <tutorial:tutorials/advanced/execution_engine:zip & comap>`
22 | .. |LoadSave| replace:: :ref:`Load & Save <tutorial:tutorials/advanced/execution_engine:load & save>`
23 | .. |AutoPersist| replace:: :ref:`Auto Persist <tutorial:tutorials/advanced/useful_config:auto persist>`
24 | .. |TransformerTutorial| replace:: :doc:`the Transformer Tutorial <tutorial:tutorials/extensions/transformer>`
25 | .. |CoTransformer| replace:: :ref:`CoTransformer <tutorial:tutorials/advanced/dag:cotransformer>`
26 | .. |CoTransformerTutorial| replace:: :doc:`the CoTransformer Tutorial <tutorial:tutorials/extensions/cotransformer>`
27 | .. |FugueDataTypes| replace:: :doc:`Fugue Data Types <tutorial:tutorials/appendix/generate_types>`
28 | 
29 | 
30 | fugue.extensions.creator.convert
31 | --------------------------------
32 | 
33 | .. automodule:: fugue.extensions.creator.convert
34 |    :members:
35 |    :undoc-members:
36 |    :show-inheritance:
37 | 
38 | fugue.extensions.creator.creator
39 | --------------------------------
40 | 
41 | .. automodule:: fugue.extensions.creator.creator
42 |    :members:
43 |    :undoc-members:
44 |    :show-inheritance:
45 | 
46 | 


--------------------------------------------------------------------------------
/docs/api/fugue.extensions.outputter.rst:
--------------------------------------------------------------------------------
 1 | fugue.extensions.outputter 
 2 | ===========================
 3 | 
 4 | .. |SchemaLikeObject| replace:: :ref:`Schema like object <tutorial:tutorials/advanced/x-like:schema>`
 5 | .. |ParamsLikeObject| replace:: :ref:`Parameters like object <tutorial:tutorials/advanced/x-like:parameters>`
 6 | .. |DataFrameLikeObject| replace:: :ref:`DataFrame like object <tutorial:tutorials/advanced/x-like:dataframe>`
 7 | .. |DataFramesLikeObject| replace:: :ref:`DataFrames like object <tutorial:tutorials/advanced/x-like:dataframes>`
 8 | .. |PartitionLikeObject| replace:: :ref:`Partition like object <tutorial:tutorials/advanced/x-like:partition>`
 9 | .. |RPCHandlerLikeObject| replace:: :ref:`RPChandler like object <tutorial:tutorials/advanced/x-like:rpc>`
10 | 
11 | .. |ExecutionEngine| replace:: :class:`~fugue.execution.execution_engine.ExecutionEngine`
12 | .. |NativeExecutionEngine| replace:: :class:`~fugue.execution.native_execution_engine.NativeExecutionEngine`
13 | .. |FugueWorkflow| replace:: :class:`~fugue.workflow.workflow.FugueWorkflow`
14 | 
15 | .. |ReadJoin| replace:: Read Join tutorials on :ref:`workflow <tutorial:tutorials/advanced/dag:join>` and :ref:`engine <tutorial:tutorials/advanced/execution_engine:join>` for details
16 | .. |FugueConfig| replace:: :doc:`the Fugue Configuration Tutorial <tutorial:tutorials/advanced/useful_config>`
17 | .. |PartitionTutorial| replace:: :doc:`the Partition Tutorial <tutorial:tutorials/advanced/partition>`
18 | .. |FugueSQLTutorial| replace:: :doc:`the Fugue SQL Tutorial <tutorial:tutorials/fugue_sql/index>`
19 | .. |DataFrameTutorial| replace:: :ref:`the DataFrame Tutorial <tutorial:tutorials/advanced/schema_dataframes:dataframe>`
20 | .. |ExecutionEngineTutorial| replace:: :doc:`the ExecutionEngine Tutorial <tutorial:tutorials/advanced/execution_engine>`
21 | .. |ZipComap| replace:: :ref:`Zip & Comap <tutorial:tutorials/advanced/execution_engine:zip & comap>`
22 | .. |LoadSave| replace:: :ref:`Load & Save <tutorial:tutorials/advanced/execution_engine:load & save>`
23 | .. |AutoPersist| replace:: :ref:`Auto Persist <tutorial:tutorials/advanced/useful_config:auto persist>`
24 | .. |TransformerTutorial| replace:: :doc:`the Transformer Tutorial <tutorial:tutorials/extensions/transformer>`
25 | .. |CoTransformer| replace:: :ref:`CoTransformer <tutorial:tutorials/advanced/dag:cotransformer>`
26 | .. |CoTransformerTutorial| replace:: :doc:`the CoTransformer Tutorial <tutorial:tutorials/extensions/cotransformer>`
27 | .. |FugueDataTypes| replace:: :doc:`Fugue Data Types <tutorial:tutorials/appendix/generate_types>`
28 | 
29 | 
30 | fugue.extensions.outputter.convert
31 | ----------------------------------
32 | 
33 | .. automodule:: fugue.extensions.outputter.convert
34 |    :members:
35 |    :undoc-members:
36 |    :show-inheritance:
37 | 
38 | fugue.extensions.outputter.outputter
39 | ------------------------------------
40 | 
41 | .. automodule:: fugue.extensions.outputter.outputter
42 |    :members:
43 |    :undoc-members:
44 |    :show-inheritance:
45 | 
46 | 


--------------------------------------------------------------------------------
/docs/api/fugue.extensions.processor.rst:
--------------------------------------------------------------------------------
 1 | fugue.extensions.processor 
 2 | ===========================
 3 | 
 4 | .. |SchemaLikeObject| replace:: :ref:`Schema like object <tutorial:tutorials/advanced/x-like:schema>`
 5 | .. |ParamsLikeObject| replace:: :ref:`Parameters like object <tutorial:tutorials/advanced/x-like:parameters>`
 6 | .. |DataFrameLikeObject| replace:: :ref:`DataFrame like object <tutorial:tutorials/advanced/x-like:dataframe>`
 7 | .. |DataFramesLikeObject| replace:: :ref:`DataFrames like object <tutorial:tutorials/advanced/x-like:dataframes>`
 8 | .. |PartitionLikeObject| replace:: :ref:`Partition like object <tutorial:tutorials/advanced/x-like:partition>`
 9 | .. |RPCHandlerLikeObject| replace:: :ref:`RPChandler like object <tutorial:tutorials/advanced/x-like:rpc>`
10 | 
11 | .. |ExecutionEngine| replace:: :class:`~fugue.execution.execution_engine.ExecutionEngine`
12 | .. |NativeExecutionEngine| replace:: :class:`~fugue.execution.native_execution_engine.NativeExecutionEngine`
13 | .. |FugueWorkflow| replace:: :class:`~fugue.workflow.workflow.FugueWorkflow`
14 | 
15 | .. |ReadJoin| replace:: Read Join tutorials on :ref:`workflow <tutorial:tutorials/advanced/dag:join>` and :ref:`engine <tutorial:tutorials/advanced/execution_engine:join>` for details
16 | .. |FugueConfig| replace:: :doc:`the Fugue Configuration Tutorial <tutorial:tutorials/advanced/useful_config>`
17 | .. |PartitionTutorial| replace:: :doc:`the Partition Tutorial <tutorial:tutorials/advanced/partition>`
18 | .. |FugueSQLTutorial| replace:: :doc:`the Fugue SQL Tutorial <tutorial:tutorials/fugue_sql/index>`
19 | .. |DataFrameTutorial| replace:: :ref:`the DataFrame Tutorial <tutorial:tutorials/advanced/schema_dataframes:dataframe>`
20 | .. |ExecutionEngineTutorial| replace:: :doc:`the ExecutionEngine Tutorial <tutorial:tutorials/advanced/execution_engine>`
21 | .. |ZipComap| replace:: :ref:`Zip & Comap <tutorial:tutorials/advanced/execution_engine:zip & comap>`
22 | .. |LoadSave| replace:: :ref:`Load & Save <tutorial:tutorials/advanced/execution_engine:load & save>`
23 | .. |AutoPersist| replace:: :ref:`Auto Persist <tutorial:tutorials/advanced/useful_config:auto persist>`
24 | .. |TransformerTutorial| replace:: :doc:`the Transformer Tutorial <tutorial:tutorials/extensions/transformer>`
25 | .. |CoTransformer| replace:: :ref:`CoTransformer <tutorial:tutorials/advanced/dag:cotransformer>`
26 | .. |CoTransformerTutorial| replace:: :doc:`the CoTransformer Tutorial <tutorial:tutorials/extensions/cotransformer>`
27 | .. |FugueDataTypes| replace:: :doc:`Fugue Data Types <tutorial:tutorials/appendix/generate_types>`
28 | 
29 | 
30 | fugue.extensions.processor.convert
31 | ----------------------------------
32 | 
33 | .. automodule:: fugue.extensions.processor.convert
34 |    :members:
35 |    :undoc-members:
36 |    :show-inheritance:
37 | 
38 | fugue.extensions.processor.processor
39 | ------------------------------------
40 | 
41 | .. automodule:: fugue.extensions.processor.processor
42 |    :members:
43 |    :undoc-members:
44 |    :show-inheritance:
45 | 
46 | 


--------------------------------------------------------------------------------
/docs/api/fugue.extensions.rst:
--------------------------------------------------------------------------------
 1 | fugue.extensions 
 2 | =================
 3 | 
 4 | .. toctree::
 5 |    :maxdepth: 4
 6 | 
 7 |    fugue.extensions.creator
 8 |    fugue.extensions.outputter
 9 |    fugue.extensions.processor
10 |    fugue.extensions.transformer
11 | 
12 | .. |SchemaLikeObject| replace:: :ref:`Schema like object <tutorial:tutorials/advanced/x-like:schema>`
13 | .. |ParamsLikeObject| replace:: :ref:`Parameters like object <tutorial:tutorials/advanced/x-like:parameters>`
14 | .. |DataFrameLikeObject| replace:: :ref:`DataFrame like object <tutorial:tutorials/advanced/x-like:dataframe>`
15 | .. |DataFramesLikeObject| replace:: :ref:`DataFrames like object <tutorial:tutorials/advanced/x-like:dataframes>`
16 | .. |PartitionLikeObject| replace:: :ref:`Partition like object <tutorial:tutorials/advanced/x-like:partition>`
17 | .. |RPCHandlerLikeObject| replace:: :ref:`RPChandler like object <tutorial:tutorials/advanced/x-like:rpc>`
18 | 
19 | .. |ExecutionEngine| replace:: :class:`~fugue.execution.execution_engine.ExecutionEngine`
20 | .. |NativeExecutionEngine| replace:: :class:`~fugue.execution.native_execution_engine.NativeExecutionEngine`
21 | .. |FugueWorkflow| replace:: :class:`~fugue.workflow.workflow.FugueWorkflow`
22 | 
23 | .. |ReadJoin| replace:: Read Join tutorials on :ref:`workflow <tutorial:tutorials/advanced/dag:join>` and :ref:`engine <tutorial:tutorials/advanced/execution_engine:join>` for details
24 | .. |FugueConfig| replace:: :doc:`the Fugue Configuration Tutorial <tutorial:tutorials/advanced/useful_config>`
25 | .. |PartitionTutorial| replace:: :doc:`the Partition Tutorial <tutorial:tutorials/advanced/partition>`
26 | .. |FugueSQLTutorial| replace:: :doc:`the Fugue SQL Tutorial <tutorial:tutorials/fugue_sql/index>`
27 | .. |DataFrameTutorial| replace:: :ref:`the DataFrame Tutorial <tutorial:tutorials/advanced/schema_dataframes:dataframe>`
28 | .. |ExecutionEngineTutorial| replace:: :doc:`the ExecutionEngine Tutorial <tutorial:tutorials/advanced/execution_engine>`
29 | .. |ZipComap| replace:: :ref:`Zip & Comap <tutorial:tutorials/advanced/execution_engine:zip & comap>`
30 | .. |LoadSave| replace:: :ref:`Load & Save <tutorial:tutorials/advanced/execution_engine:load & save>`
31 | .. |AutoPersist| replace:: :ref:`Auto Persist <tutorial:tutorials/advanced/useful_config:auto persist>`
32 | .. |TransformerTutorial| replace:: :doc:`the Transformer Tutorial <tutorial:tutorials/extensions/transformer>`
33 | .. |CoTransformer| replace:: :ref:`CoTransformer <tutorial:tutorials/advanced/dag:cotransformer>`
34 | .. |CoTransformerTutorial| replace:: :doc:`the CoTransformer Tutorial <tutorial:tutorials/extensions/cotransformer>`
35 | .. |FugueDataTypes| replace:: :doc:`Fugue Data Types <tutorial:tutorials/appendix/generate_types>`
36 | 
37 | 
38 | fugue.extensions.context
39 | ------------------------
40 | 
41 | .. automodule:: fugue.extensions.context
42 |    :members:
43 |    :undoc-members:
44 |    :show-inheritance:
45 | 
46 | 


--------------------------------------------------------------------------------
/docs/api/fugue.rpc.rst:
--------------------------------------------------------------------------------
 1 | fugue.rpc 
 2 | ==========
 3 | 
 4 | .. |SchemaLikeObject| replace:: :ref:`Schema like object <tutorial:tutorials/advanced/x-like:schema>`
 5 | .. |ParamsLikeObject| replace:: :ref:`Parameters like object <tutorial:tutorials/advanced/x-like:parameters>`
 6 | .. |DataFrameLikeObject| replace:: :ref:`DataFrame like object <tutorial:tutorials/advanced/x-like:dataframe>`
 7 | .. |DataFramesLikeObject| replace:: :ref:`DataFrames like object <tutorial:tutorials/advanced/x-like:dataframes>`
 8 | .. |PartitionLikeObject| replace:: :ref:`Partition like object <tutorial:tutorials/advanced/x-like:partition>`
 9 | .. |RPCHandlerLikeObject| replace:: :ref:`RPChandler like object <tutorial:tutorials/advanced/x-like:rpc>`
10 | 
11 | .. |ExecutionEngine| replace:: :class:`~fugue.execution.execution_engine.ExecutionEngine`
12 | .. |NativeExecutionEngine| replace:: :class:`~fugue.execution.native_execution_engine.NativeExecutionEngine`
13 | .. |FugueWorkflow| replace:: :class:`~fugue.workflow.workflow.FugueWorkflow`
14 | 
15 | .. |ReadJoin| replace:: Read Join tutorials on :ref:`workflow <tutorial:tutorials/advanced/dag:join>` and :ref:`engine <tutorial:tutorials/advanced/execution_engine:join>` for details
16 | .. |FugueConfig| replace:: :doc:`the Fugue Configuration Tutorial <tutorial:tutorials/advanced/useful_config>`
17 | .. |PartitionTutorial| replace:: :doc:`the Partition Tutorial <tutorial:tutorials/advanced/partition>`
18 | .. |FugueSQLTutorial| replace:: :doc:`the Fugue SQL Tutorial <tutorial:tutorials/fugue_sql/index>`
19 | .. |DataFrameTutorial| replace:: :ref:`the DataFrame Tutorial <tutorial:tutorials/advanced/schema_dataframes:dataframe>`
20 | .. |ExecutionEngineTutorial| replace:: :doc:`the ExecutionEngine Tutorial <tutorial:tutorials/advanced/execution_engine>`
21 | .. |ZipComap| replace:: :ref:`Zip & Comap <tutorial:tutorials/advanced/execution_engine:zip & comap>`
22 | .. |LoadSave| replace:: :ref:`Load & Save <tutorial:tutorials/advanced/execution_engine:load & save>`
23 | .. |AutoPersist| replace:: :ref:`Auto Persist <tutorial:tutorials/advanced/useful_config:auto persist>`
24 | .. |TransformerTutorial| replace:: :doc:`the Transformer Tutorial <tutorial:tutorials/extensions/transformer>`
25 | .. |CoTransformer| replace:: :ref:`CoTransformer <tutorial:tutorials/advanced/dag:cotransformer>`
26 | .. |CoTransformerTutorial| replace:: :doc:`the CoTransformer Tutorial <tutorial:tutorials/extensions/cotransformer>`
27 | .. |FugueDataTypes| replace:: :doc:`Fugue Data Types <tutorial:tutorials/appendix/generate_types>`
28 | 
29 | 
30 | fugue.rpc.base
31 | --------------
32 | 
33 | .. automodule:: fugue.rpc.base
34 |    :members:
35 |    :undoc-members:
36 |    :show-inheritance:
37 | 
38 | fugue.rpc.flask
39 | ---------------
40 | 
41 | .. automodule:: fugue.rpc.flask
42 |    :members:
43 |    :undoc-members:
44 |    :show-inheritance:
45 | 
46 | 


--------------------------------------------------------------------------------
/docs/api/fugue.sql.rst:
--------------------------------------------------------------------------------
 1 | fugue.sql 
 2 | ==========
 3 | 
 4 | .. |SchemaLikeObject| replace:: :ref:`Schema like object <tutorial:tutorials/advanced/x-like:schema>`
 5 | .. |ParamsLikeObject| replace:: :ref:`Parameters like object <tutorial:tutorials/advanced/x-like:parameters>`
 6 | .. |DataFrameLikeObject| replace:: :ref:`DataFrame like object <tutorial:tutorials/advanced/x-like:dataframe>`
 7 | .. |DataFramesLikeObject| replace:: :ref:`DataFrames like object <tutorial:tutorials/advanced/x-like:dataframes>`
 8 | .. |PartitionLikeObject| replace:: :ref:`Partition like object <tutorial:tutorials/advanced/x-like:partition>`
 9 | .. |RPCHandlerLikeObject| replace:: :ref:`RPChandler like object <tutorial:tutorials/advanced/x-like:rpc>`
10 | 
11 | .. |ExecutionEngine| replace:: :class:`~fugue.execution.execution_engine.ExecutionEngine`
12 | .. |NativeExecutionEngine| replace:: :class:`~fugue.execution.native_execution_engine.NativeExecutionEngine`
13 | .. |FugueWorkflow| replace:: :class:`~fugue.workflow.workflow.FugueWorkflow`
14 | 
15 | .. |ReadJoin| replace:: Read Join tutorials on :ref:`workflow <tutorial:tutorials/advanced/dag:join>` and :ref:`engine <tutorial:tutorials/advanced/execution_engine:join>` for details
16 | .. |FugueConfig| replace:: :doc:`the Fugue Configuration Tutorial <tutorial:tutorials/advanced/useful_config>`
17 | .. |PartitionTutorial| replace:: :doc:`the Partition Tutorial <tutorial:tutorials/advanced/partition>`
18 | .. |FugueSQLTutorial| replace:: :doc:`the Fugue SQL Tutorial <tutorial:tutorials/fugue_sql/index>`
19 | .. |DataFrameTutorial| replace:: :ref:`the DataFrame Tutorial <tutorial:tutorials/advanced/schema_dataframes:dataframe>`
20 | .. |ExecutionEngineTutorial| replace:: :doc:`the ExecutionEngine Tutorial <tutorial:tutorials/advanced/execution_engine>`
21 | .. |ZipComap| replace:: :ref:`Zip & Comap <tutorial:tutorials/advanced/execution_engine:zip & comap>`
22 | .. |LoadSave| replace:: :ref:`Load & Save <tutorial:tutorials/advanced/execution_engine:load & save>`
23 | .. |AutoPersist| replace:: :ref:`Auto Persist <tutorial:tutorials/advanced/useful_config:auto persist>`
24 | .. |TransformerTutorial| replace:: :doc:`the Transformer Tutorial <tutorial:tutorials/extensions/transformer>`
25 | .. |CoTransformer| replace:: :ref:`CoTransformer <tutorial:tutorials/advanced/dag:cotransformer>`
26 | .. |CoTransformerTutorial| replace:: :doc:`the CoTransformer Tutorial <tutorial:tutorials/extensions/cotransformer>`
27 | .. |FugueDataTypes| replace:: :doc:`Fugue Data Types <tutorial:tutorials/appendix/generate_types>`
28 | 
29 | 
30 | fugue.sql.api
31 | -------------
32 | 
33 | .. automodule:: fugue.sql.api
34 |    :members:
35 |    :undoc-members:
36 |    :show-inheritance:
37 | 
38 | fugue.sql.workflow
39 | ------------------
40 | 
41 | .. automodule:: fugue.sql.workflow
42 |    :members:
43 |    :undoc-members:
44 |    :show-inheritance:
45 | 
46 | 


--------------------------------------------------------------------------------
/docs/api/fugue.workflow.rst:
--------------------------------------------------------------------------------
 1 | fugue.workflow 
 2 | ===============
 3 | 
 4 | .. |SchemaLikeObject| replace:: :ref:`Schema like object <tutorial:tutorials/advanced/x-like:schema>`
 5 | .. |ParamsLikeObject| replace:: :ref:`Parameters like object <tutorial:tutorials/advanced/x-like:parameters>`
 6 | .. |DataFrameLikeObject| replace:: :ref:`DataFrame like object <tutorial:tutorials/advanced/x-like:dataframe>`
 7 | .. |DataFramesLikeObject| replace:: :ref:`DataFrames like object <tutorial:tutorials/advanced/x-like:dataframes>`
 8 | .. |PartitionLikeObject| replace:: :ref:`Partition like object <tutorial:tutorials/advanced/x-like:partition>`
 9 | .. |RPCHandlerLikeObject| replace:: :ref:`RPChandler like object <tutorial:tutorials/advanced/x-like:rpc>`
10 | 
11 | .. |ExecutionEngine| replace:: :class:`~fugue.execution.execution_engine.ExecutionEngine`
12 | .. |NativeExecutionEngine| replace:: :class:`~fugue.execution.native_execution_engine.NativeExecutionEngine`
13 | .. |FugueWorkflow| replace:: :class:`~fugue.workflow.workflow.FugueWorkflow`
14 | 
15 | .. |ReadJoin| replace:: Read Join tutorials on :ref:`workflow <tutorial:tutorials/advanced/dag:join>` and :ref:`engine <tutorial:tutorials/advanced/execution_engine:join>` for details
16 | .. |FugueConfig| replace:: :doc:`the Fugue Configuration Tutorial <tutorial:tutorials/advanced/useful_config>`
17 | .. |PartitionTutorial| replace:: :doc:`the Partition Tutorial <tutorial:tutorials/advanced/partition>`
18 | .. |FugueSQLTutorial| replace:: :doc:`the Fugue SQL Tutorial <tutorial:tutorials/fugue_sql/index>`
19 | .. |DataFrameTutorial| replace:: :ref:`the DataFrame Tutorial <tutorial:tutorials/advanced/schema_dataframes:dataframe>`
20 | .. |ExecutionEngineTutorial| replace:: :doc:`the ExecutionEngine Tutorial <tutorial:tutorials/advanced/execution_engine>`
21 | .. |ZipComap| replace:: :ref:`Zip & Comap <tutorial:tutorials/advanced/execution_engine:zip & comap>`
22 | .. |LoadSave| replace:: :ref:`Load & Save <tutorial:tutorials/advanced/execution_engine:load & save>`
23 | .. |AutoPersist| replace:: :ref:`Auto Persist <tutorial:tutorials/advanced/useful_config:auto persist>`
24 | .. |TransformerTutorial| replace:: :doc:`the Transformer Tutorial <tutorial:tutorials/extensions/transformer>`
25 | .. |CoTransformer| replace:: :ref:`CoTransformer <tutorial:tutorials/advanced/dag:cotransformer>`
26 | .. |CoTransformerTutorial| replace:: :doc:`the CoTransformer Tutorial <tutorial:tutorials/extensions/cotransformer>`
27 | .. |FugueDataTypes| replace:: :doc:`Fugue Data Types <tutorial:tutorials/appendix/generate_types>`
28 | 
29 | 
30 | fugue.workflow.api
31 | ------------------
32 | 
33 | .. automodule:: fugue.workflow.api
34 |    :members:
35 |    :undoc-members:
36 |    :show-inheritance:
37 | 
38 | fugue.workflow.input
39 | --------------------
40 | 
41 | .. automodule:: fugue.workflow.input
42 |    :members:
43 |    :undoc-members:
44 |    :show-inheritance:
45 | 
46 | fugue.workflow.module
47 | ---------------------
48 | 
49 | .. automodule:: fugue.workflow.module
50 |    :members:
51 |    :undoc-members:
52 |    :show-inheritance:
53 | 
54 | fugue.workflow.workflow
55 | -----------------------
56 | 
57 | .. automodule:: fugue.workflow.workflow
58 |    :members:
59 |    :undoc-members:
60 |    :show-inheritance:
61 | 
62 | 


--------------------------------------------------------------------------------
/docs/api_ibis/fugue_ibis.execution.rst:
--------------------------------------------------------------------------------
 1 | fugue\_ibis.execution 
 2 | ======================
 3 | 
 4 | .. |SchemaLikeObject| replace:: :ref:`Schema like object <tutorial:tutorials/advanced/x-like:schema>`
 5 | .. |ParamsLikeObject| replace:: :ref:`Parameters like object <tutorial:tutorials/advanced/x-like:parameters>`
 6 | .. |DataFrameLikeObject| replace:: :ref:`DataFrame like object <tutorial:tutorials/advanced/x-like:dataframe>`
 7 | .. |DataFramesLikeObject| replace:: :ref:`DataFrames like object <tutorial:tutorials/advanced/x-like:dataframes>`
 8 | .. |PartitionLikeObject| replace:: :ref:`Partition like object <tutorial:tutorials/advanced/x-like:partition>`
 9 | .. |RPCHandlerLikeObject| replace:: :ref:`RPChandler like object <tutorial:tutorials/advanced/x-like:rpc>`
10 | 
11 | .. |ExecutionEngine| replace:: :class:`~fugue.execution.execution_engine.ExecutionEngine`
12 | .. |NativeExecutionEngine| replace:: :class:`~fugue.execution.native_execution_engine.NativeExecutionEngine`
13 | .. |FugueWorkflow| replace:: :class:`~fugue.workflow.workflow.FugueWorkflow`
14 | 
15 | .. |ReadJoin| replace:: Read Join tutorials on :ref:`workflow <tutorial:tutorials/advanced/dag:join>` and :ref:`engine <tutorial:tutorials/advanced/execution_engine:join>` for details
16 | .. |FugueConfig| replace:: :doc:`the Fugue Configuration Tutorial <tutorial:tutorials/advanced/useful_config>`
17 | .. |PartitionTutorial| replace:: :doc:`the Partition Tutorial <tutorial:tutorials/advanced/partition>`
18 | .. |FugueSQLTutorial| replace:: :doc:`the Fugue SQL Tutorial <tutorial:tutorials/fugue_sql/index>`
19 | .. |DataFrameTutorial| replace:: :ref:`the DataFrame Tutorial <tutorial:tutorials/advanced/schema_dataframes:dataframe>`
20 | .. |ExecutionEngineTutorial| replace:: :doc:`the ExecutionEngine Tutorial <tutorial:tutorials/advanced/execution_engine>`
21 | .. |ZipComap| replace:: :ref:`Zip & Comap <tutorial:tutorials/advanced/execution_engine:zip & comap>`
22 | .. |LoadSave| replace:: :ref:`Load & Save <tutorial:tutorials/advanced/execution_engine:load & save>`
23 | .. |AutoPersist| replace:: :ref:`Auto Persist <tutorial:tutorials/advanced/useful_config:auto persist>`
24 | .. |TransformerTutorial| replace:: :doc:`the Transformer Tutorial <tutorial:tutorials/extensions/transformer>`
25 | .. |CoTransformer| replace:: :ref:`CoTransformer <tutorial:tutorials/advanced/dag:cotransformer>`
26 | .. |CoTransformerTutorial| replace:: :doc:`the CoTransformer Tutorial <tutorial:tutorials/extensions/cotransformer>`
27 | .. |FugueDataTypes| replace:: :doc:`Fugue Data Types <tutorial:tutorials/appendix/generate_types>`
28 | 
29 | 
30 | fugue\_ibis.execution.ibis\_engine
31 | ----------------------------------
32 | 
33 | .. automodule:: fugue_ibis.execution.ibis_engine
34 |    :members:
35 |    :undoc-members:
36 |    :show-inheritance:
37 | 
38 | fugue\_ibis.execution.pandas\_backend
39 | -------------------------------------
40 | 
41 | .. automodule:: fugue_ibis.execution.pandas_backend
42 |    :members:
43 |    :undoc-members:
44 |    :show-inheritance:
45 | 
46 | 


--------------------------------------------------------------------------------
/docs/api_ibis/fugue_ibis.rst:
--------------------------------------------------------------------------------
 1 | fugue\_ibis 
 2 | ============
 3 | 
 4 | .. toctree::
 5 |    :maxdepth: 4
 6 | 
 7 |    fugue_ibis.execution
 8 | 
 9 | .. |SchemaLikeObject| replace:: :ref:`Schema like object <tutorial:tutorials/advanced/x-like:schema>`
10 | .. |ParamsLikeObject| replace:: :ref:`Parameters like object <tutorial:tutorials/advanced/x-like:parameters>`
11 | .. |DataFrameLikeObject| replace:: :ref:`DataFrame like object <tutorial:tutorials/advanced/x-like:dataframe>`
12 | .. |DataFramesLikeObject| replace:: :ref:`DataFrames like object <tutorial:tutorials/advanced/x-like:dataframes>`
13 | .. |PartitionLikeObject| replace:: :ref:`Partition like object <tutorial:tutorials/advanced/x-like:partition>`
14 | .. |RPCHandlerLikeObject| replace:: :ref:`RPChandler like object <tutorial:tutorials/advanced/x-like:rpc>`
15 | 
16 | .. |ExecutionEngine| replace:: :class:`~fugue.execution.execution_engine.ExecutionEngine`
17 | .. |NativeExecutionEngine| replace:: :class:`~fugue.execution.native_execution_engine.NativeExecutionEngine`
18 | .. |FugueWorkflow| replace:: :class:`~fugue.workflow.workflow.FugueWorkflow`
19 | 
20 | .. |ReadJoin| replace:: Read Join tutorials on :ref:`workflow <tutorial:tutorials/advanced/dag:join>` and :ref:`engine <tutorial:tutorials/advanced/execution_engine:join>` for details
21 | .. |FugueConfig| replace:: :doc:`the Fugue Configuration Tutorial <tutorial:tutorials/advanced/useful_config>`
22 | .. |PartitionTutorial| replace:: :doc:`the Partition Tutorial <tutorial:tutorials/advanced/partition>`
23 | .. |FugueSQLTutorial| replace:: :doc:`the Fugue SQL Tutorial <tutorial:tutorials/fugue_sql/index>`
24 | .. |DataFrameTutorial| replace:: :ref:`the DataFrame Tutorial <tutorial:tutorials/advanced/schema_dataframes:dataframe>`
25 | .. |ExecutionEngineTutorial| replace:: :doc:`the ExecutionEngine Tutorial <tutorial:tutorials/advanced/execution_engine>`
26 | .. |ZipComap| replace:: :ref:`Zip & Comap <tutorial:tutorials/advanced/execution_engine:zip & comap>`
27 | .. |LoadSave| replace:: :ref:`Load & Save <tutorial:tutorials/advanced/execution_engine:load & save>`
28 | .. |AutoPersist| replace:: :ref:`Auto Persist <tutorial:tutorials/advanced/useful_config:auto persist>`
29 | .. |TransformerTutorial| replace:: :doc:`the Transformer Tutorial <tutorial:tutorials/extensions/transformer>`
30 | .. |CoTransformer| replace:: :ref:`CoTransformer <tutorial:tutorials/advanced/dag:cotransformer>`
31 | .. |CoTransformerTutorial| replace:: :doc:`the CoTransformer Tutorial <tutorial:tutorials/extensions/cotransformer>`
32 | .. |FugueDataTypes| replace:: :doc:`Fugue Data Types <tutorial:tutorials/appendix/generate_types>`
33 | 
34 | 
35 | fugue\_ibis.dataframe
36 | ---------------------
37 | 
38 | .. automodule:: fugue_ibis.dataframe
39 |    :members:
40 |    :undoc-members:
41 |    :show-inheritance:
42 | 
43 | fugue\_ibis.execution\_engine
44 | -----------------------------
45 | 
46 | .. automodule:: fugue_ibis.execution_engine
47 |    :members:
48 |    :undoc-members:
49 |    :show-inheritance:
50 | 
51 | fugue\_ibis.extensions
52 | ----------------------
53 | 
54 | .. automodule:: fugue_ibis.extensions
55 |    :members:
56 |    :undoc-members:
57 |    :show-inheritance:
58 | 
59 | 


--------------------------------------------------------------------------------
/docs/api_ray/fugue_ray.rst:
--------------------------------------------------------------------------------
 1 | fugue\_ray 
 2 | ===========
 3 | 
 4 | .. |SchemaLikeObject| replace:: :ref:`Schema like object <tutorial:tutorials/advanced/x-like:schema>`
 5 | .. |ParamsLikeObject| replace:: :ref:`Parameters like object <tutorial:tutorials/advanced/x-like:parameters>`
 6 | .. |DataFrameLikeObject| replace:: :ref:`DataFrame like object <tutorial:tutorials/advanced/x-like:dataframe>`
 7 | .. |DataFramesLikeObject| replace:: :ref:`DataFrames like object <tutorial:tutorials/advanced/x-like:dataframes>`
 8 | .. |PartitionLikeObject| replace:: :ref:`Partition like object <tutorial:tutorials/advanced/x-like:partition>`
 9 | .. |RPCHandlerLikeObject| replace:: :ref:`RPChandler like object <tutorial:tutorials/advanced/x-like:rpc>`
10 | 
11 | .. |ExecutionEngine| replace:: :class:`~fugue.execution.execution_engine.ExecutionEngine`
12 | .. |NativeExecutionEngine| replace:: :class:`~fugue.execution.native_execution_engine.NativeExecutionEngine`
13 | .. |FugueWorkflow| replace:: :class:`~fugue.workflow.workflow.FugueWorkflow`
14 | 
15 | .. |ReadJoin| replace:: Read Join tutorials on :ref:`workflow <tutorial:tutorials/advanced/dag:join>` and :ref:`engine <tutorial:tutorials/advanced/execution_engine:join>` for details
16 | .. |FugueConfig| replace:: :doc:`the Fugue Configuration Tutorial <tutorial:tutorials/advanced/useful_config>`
17 | .. |PartitionTutorial| replace:: :doc:`the Partition Tutorial <tutorial:tutorials/advanced/partition>`
18 | .. |FugueSQLTutorial| replace:: :doc:`the Fugue SQL Tutorial <tutorial:tutorials/fugue_sql/index>`
19 | .. |DataFrameTutorial| replace:: :ref:`the DataFrame Tutorial <tutorial:tutorials/advanced/schema_dataframes:dataframe>`
20 | .. |ExecutionEngineTutorial| replace:: :doc:`the ExecutionEngine Tutorial <tutorial:tutorials/advanced/execution_engine>`
21 | .. |ZipComap| replace:: :ref:`Zip & Comap <tutorial:tutorials/advanced/execution_engine:zip & comap>`
22 | .. |LoadSave| replace:: :ref:`Load & Save <tutorial:tutorials/advanced/execution_engine:load & save>`
23 | .. |AutoPersist| replace:: :ref:`Auto Persist <tutorial:tutorials/advanced/useful_config:auto persist>`
24 | .. |TransformerTutorial| replace:: :doc:`the Transformer Tutorial <tutorial:tutorials/extensions/transformer>`
25 | .. |CoTransformer| replace:: :ref:`CoTransformer <tutorial:tutorials/advanced/dag:cotransformer>`
26 | .. |CoTransformerTutorial| replace:: :doc:`the CoTransformer Tutorial <tutorial:tutorials/extensions/cotransformer>`
27 | .. |FugueDataTypes| replace:: :doc:`Fugue Data Types <tutorial:tutorials/appendix/generate_types>`
28 | 
29 | 
30 | fugue\_ray.dataframe
31 | --------------------
32 | 
33 | .. automodule:: fugue_ray.dataframe
34 |    :members:
35 |    :undoc-members:
36 |    :show-inheritance:
37 | 
38 | fugue\_ray.execution\_engine
39 | ----------------------------
40 | 
41 | .. automodule:: fugue_ray.execution_engine
42 |    :members:
43 |    :undoc-members:
44 |    :show-inheritance:
45 | 
46 | fugue\_ray.registry
47 | -------------------
48 | 
49 | .. automodule:: fugue_ray.registry
50 |    :members:
51 |    :undoc-members:
52 |    :show-inheritance:
53 | 
54 | 


--------------------------------------------------------------------------------
/docs/api_sql/fugue_sql.rst:
--------------------------------------------------------------------------------
 1 | fugue\_sql 
 2 | ===========
 3 | 
 4 | .. |SchemaLikeObject| replace:: :ref:`Schema like object <tutorial:tutorials/advanced/x-like:schema>`
 5 | .. |ParamsLikeObject| replace:: :ref:`Parameters like object <tutorial:tutorials/advanced/x-like:parameters>`
 6 | .. |DataFrameLikeObject| replace:: :ref:`DataFrame like object <tutorial:tutorials/advanced/x-like:dataframe>`
 7 | .. |DataFramesLikeObject| replace:: :ref:`DataFrames like object <tutorial:tutorials/advanced/x-like:dataframes>`
 8 | .. |PartitionLikeObject| replace:: :ref:`Partition like object <tutorial:tutorials/advanced/x-like:partition>`
 9 | .. |RPCHandlerLikeObject| replace:: :ref:`RPChandler like object <tutorial:tutorials/advanced/x-like:rpc>`
10 | 
11 | .. |ExecutionEngine| replace:: :class:`~fugue.execution.execution_engine.ExecutionEngine`
12 | .. |NativeExecutionEngine| replace:: :class:`~fugue.execution.native_execution_engine.NativeExecutionEngine`
13 | .. |FugueWorkflow| replace:: :class:`~fugue.workflow.workflow.FugueWorkflow`
14 | 
15 | .. |ReadJoin| replace:: Read Join tutorials on :ref:`workflow <tutorial:tutorials/advanced/dag:join>` and :ref:`engine <tutorial:tutorials/advanced/execution_engine:join>` for details
16 | .. |FugueConfig| replace:: :doc:`the Fugue Configuration Tutorial <tutorial:tutorials/advanced/useful_config>`
17 | .. |PartitionTutorial| replace:: :doc:`the Partition Tutorial <tutorial:tutorials/advanced/partition>`
18 | .. |FugueSQLTutorial| replace:: :doc:`the Fugue SQL Tutorial <tutorial:tutorials/fugue_sql/index>`
19 | .. |DataFrameTutorial| replace:: :ref:`the DataFrame Tutorial <tutorial:tutorials/advanced/schema_dataframes:dataframe>`
20 | .. |ExecutionEngineTutorial| replace:: :doc:`the ExecutionEngine Tutorial <tutorial:tutorials/advanced/execution_engine>`
21 | .. |ZipComap| replace:: :ref:`Zip & Comap <tutorial:tutorials/advanced/execution_engine:zip & comap>`
22 | .. |LoadSave| replace:: :ref:`Load & Save <tutorial:tutorials/advanced/execution_engine:load & save>`
23 | .. |AutoPersist| replace:: :ref:`Auto Persist <tutorial:tutorials/advanced/useful_config:auto persist>`
24 | .. |TransformerTutorial| replace:: :doc:`the Transformer Tutorial <tutorial:tutorials/extensions/transformer>`
25 | .. |CoTransformer| replace:: :ref:`CoTransformer <tutorial:tutorials/advanced/dag:cotransformer>`
26 | .. |CoTransformerTutorial| replace:: :doc:`the CoTransformer Tutorial <tutorial:tutorials/extensions/cotransformer>`
27 | .. |FugueDataTypes| replace:: :doc:`Fugue Data Types <tutorial:tutorials/appendix/generate_types>`
28 | 
29 | 
30 | fugue\_sql.exceptions
31 | ---------------------
32 | 
33 | .. automodule:: fugue_sql.exceptions
34 |    :members:
35 |    :undoc-members:
36 |    :show-inheritance:
37 | 
38 | 


--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
 1 | .. Fugue documentation master file, created by
 2 |    sphinx-quickstart on Sun May 17 21:49:44 2020.
 3 |    You can adapt this file completely to your liking, but it should at least
 4 |    contain the root `toctree` directive.
 5 | 
 6 | Fugue API Docs
 7 | ==============
 8 | 
 9 | Fugue is a unified interface for distributed computing that lets users execute Python, 
10 | pandas, and SQL code on Spark, Dask, and Ray with minimal rewrites.
11 | 
12 | This documentation page is mainly an API reference. To learn more about Fugue, the
13 | `Github repo README <https://github.com/fugue-project/fugue/>`_ and the
14 | `tutorials <https://fugue-tutorials.readthedocs.io/>`_ will be the best places to start.
15 | The API reference is mainly for users looking for specific functions and methods.
16 | 
17 | Installation
18 | ------------
19 | 
20 | Fugue is available on both pip and conda. `Detailed instructions <https://github.com/fugue-project/fugue/#installation>`_ 
21 | can be found on the README.
22 | 
23 | Community
24 | ---------
25 | 
26 | Please join the `Fugue Slack <http://slack.fugue.ai>`_
27 | to ask questions. We will try to reply as soon as possible.
28 | 
29 | For contributing, start with the `contributing guide <https://github.com/fugue-project/fugue/blob/master/CONTRIBUTING.md>`_
30 | 
31 | 
32 | .. toctree::
33 |    :maxdepth: 3
34 |    :hidden:
35 | 
36 |    tutorials
37 |    top_api
38 |    api
39 | 
40 | 


--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
 1 | @ECHO OFF
 2 | 
 3 | pushd %~dp0
 4 | 
 5 | REM Command file for Sphinx documentation
 6 | 
 7 | if "%SPHINXBUILD%" == "" (
 8 | 	set SPHINXBUILD=sphinx-build
 9 | )
10 | set SOURCEDIR=source
11 | set BUILDDIR=build
12 | 
13 | if "%1" == "" goto help
14 | 
15 | %SPHINXBUILD% >NUL 2>NUL
16 | if errorlevel 9009 (
17 | 	echo.
18 | 	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
19 | 	echo.installed, then set the SPHINXBUILD environment variable to point
20 | 	echo.to the full path of the 'sphinx-build' executable. Alternatively you
21 | 	echo.may add the Sphinx directory to PATH.
22 | 	echo.
23 | 	echo.If you don't have Sphinx installed, grab it from
24 | 	echo.http://sphinx-doc.org/
25 | 	exit /b 1
26 | )
27 | 
28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
29 | goto end
30 | 
31 | :help
32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
33 | 
34 | :end
35 | popd
36 | 


--------------------------------------------------------------------------------
/docs/tutorials.rst:
--------------------------------------------------------------------------------
 1 | 
 2 | Fugue Tutorials
 3 | ================
 4 | 
 5 | To directly read the tutorials without running them:
 6 | 
 7 | .. toctree::
 8 | 
 9 |    Tutorial Homepage <https://fugue-tutorials.readthedocs.io/>
10 |    For Beginners <https://fugue-tutorials.readthedocs.io/tutorials/beginner/index.html>
11 |    For Advanced Users <https://fugue-tutorials.readthedocs.io/tutorials/advanced/index.html>
12 |    For Fugue-SQL <https://fugue-tutorials.readthedocs.io/tutorials/fugue_sql/index.html>
13 |    
14 | 
15 | 
16 | You may launch a
17 | `Fugue tutorial notebook environemnt on binder <https://mybinder.org/v2/gh/fugue-project/tutorials/master>`_
18 | 
19 | **But it runs slow on binder**, the machine on binder isn't powerful enough for
20 | a distributed framework such as Spark. Parallel executions can become sequential, so some of the
21 | performance comparison examples will not give you the correct numbers.
22 | 
23 | Alternatively, you should get decent performance if running its docker image on your own machine:
24 | 
25 | .. code-block:: bash
26 | 
27 |     docker run -p 8888:8888 fugueproject/tutorials:latest
28 | 
29 | 
30 | 
31 | 
32 | 
33 | 


--------------------------------------------------------------------------------
/fugue/__init__.py:
--------------------------------------------------------------------------------
 1 | # flake8: noqa
 2 | from triad.collections import Schema
 3 | 
 4 | from fugue.api import out_transform, transform
 5 | from fugue.bag.array_bag import ArrayBag
 6 | from fugue.bag.bag import Bag, BagDisplay
 7 | from fugue.collections.partition import PartitionCursor, PartitionSpec
 8 | from fugue.collections.sql import StructuredRawSQL, TempTableName
 9 | from fugue.collections.yielded import PhysicalYielded, Yielded
10 | from fugue.constants import register_global_conf
11 | from fugue.dataframe.array_dataframe import ArrayDataFrame
12 | from fugue.dataframe.arrow_dataframe import ArrowDataFrame
13 | from fugue.dataframe.dataframe import (
14 |     AnyDataFrame,
15 |     DataFrame,
16 |     DataFrameDisplay,
17 |     LocalBoundedDataFrame,
18 |     LocalDataFrame,
19 | )
20 | from fugue.dataframe.dataframe_iterable_dataframe import (
21 |     IterableArrowDataFrame,
22 |     IterablePandasDataFrame,
23 |     LocalDataFrameIterableDataFrame,
24 | )
25 | from fugue.dataframe.dataframes import DataFrames
26 | from fugue.dataframe.iterable_dataframe import IterableDataFrame
27 | from fugue.dataframe.pandas_dataframe import PandasDataFrame
28 | from fugue.dataset import (
29 |     AnyDataset,
30 |     Dataset,
31 |     DatasetDisplay,
32 |     as_fugue_dataset,
33 |     get_dataset_display,
34 | )
35 | from fugue.execution.execution_engine import (
36 |     AnyExecutionEngine,
37 |     EngineFacet,
38 |     ExecutionEngine,
39 |     MapEngine,
40 |     SQLEngine,
41 | )
42 | from fugue.execution.factory import (
43 |     is_pandas_or,
44 |     make_execution_engine,
45 |     make_sql_engine,
46 |     register_default_execution_engine,
47 |     register_default_sql_engine,
48 |     register_execution_engine,
49 |     register_sql_engine,
50 | )
51 | from fugue.execution.native_execution_engine import (
52 |     NativeExecutionEngine,
53 |     PandasMapEngine,
54 |     QPDPandasEngine,
55 | )
56 | from fugue.extensions.creator import Creator, creator, register_creator
57 | from fugue.extensions.outputter import Outputter, outputter, register_outputter
58 | from fugue.extensions.processor import Processor, processor, register_processor
59 | from fugue.extensions.transformer import (
60 |     CoTransformer,
61 |     OutputCoTransformer,
62 |     OutputTransformer,
63 |     Transformer,
64 |     cotransformer,
65 |     output_cotransformer,
66 |     output_transformer,
67 |     register_output_transformer,
68 |     register_transformer,
69 |     transformer,
70 | )
71 | from fugue.registry import _register
72 | from fugue.rpc import (
73 |     EmptyRPCHandler,
74 |     RPCClient,
75 |     RPCFunc,
76 |     RPCHandler,
77 |     RPCServer,
78 |     make_rpc_server,
79 |     to_rpc_handler,
80 | )
81 | from fugue.sql.api import fugue_sql_flow as fsql
82 | from fugue.sql.workflow import FugueSQLWorkflow
83 | from fugue.workflow._workflow_context import FugueWorkflowContext
84 | from fugue.workflow.module import module
85 | from fugue.workflow.workflow import FugueWorkflow, WorkflowDataFrame, WorkflowDataFrames
86 | from fugue_version import __version__
87 | 
88 | from .dev import *
89 | 
90 | _register()
91 | 


--------------------------------------------------------------------------------
/fugue/_utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fugue-project/fugue/596d28eea1834ecc9087b864b83e1b67b9748977/fugue/_utils/__init__.py


--------------------------------------------------------------------------------
/fugue/_utils/exception.py:
--------------------------------------------------------------------------------
 1 | from types import FrameType, TracebackType
 2 | from typing import Callable, List, Optional
 3 | 
 4 | _MODIFIED_EXCEPTION_VAR_NAME = "__modified_exception__"
 5 | 
 6 | 
 7 | def frames_to_traceback(
 8 |     frame: Optional[FrameType],
 9 |     limit: int,
10 |     should_prune: Optional[Callable[[str], bool]] = None,
11 | ) -> Optional[TracebackType]:
12 |     ctb: Optional[TracebackType] = None
13 |     skipped = False
14 |     while frame is not None and limit > 0:
15 |         if _MODIFIED_EXCEPTION_VAR_NAME in frame.f_locals:
16 |             return TracebackType(
17 |                 tb_next=None,
18 |                 tb_frame=frame,
19 |                 tb_lasti=frame.f_lasti,
20 |                 tb_lineno=frame.f_lineno,
21 |             )
22 |         if not skipped:
23 |             if should_prune is not None and should_prune(frame.f_globals["__name__"]):
24 |                 frame = frame.f_back
25 |                 continue
26 |             skipped = True
27 |         if should_prune is None or not should_prune(frame.f_globals["__name__"]):
28 |             ctb = TracebackType(
29 |                 tb_next=ctb,
30 |                 tb_frame=frame,
31 |                 tb_lasti=frame.f_lasti,
32 |                 tb_lineno=frame.f_lineno,
33 |             )
34 |             limit -= 1
35 |             frame = frame.f_back
36 |             continue
37 |         break  # pragma: no cover
38 | 
39 |     return ctb
40 | 
41 | 
42 | def modify_traceback(
43 |     traceback: Optional[TracebackType],
44 |     should_prune: Optional[Callable[[str], bool]] = None,
45 |     add_traceback: Optional[TracebackType] = None,
46 | ) -> Optional[TracebackType]:
47 |     ctb: Optional[TracebackType] = None
48 | 
49 |     # get stack
50 |     stack: List[TracebackType] = []
51 | 
52 |     if add_traceback is not None:
53 |         f: Optional[TracebackType] = add_traceback
54 |         while f is not None:
55 |             stack.append(f)
56 |             f = f.tb_next
57 |     f = traceback
58 |     while f is not None:
59 |         stack.append(f)
60 |         f = f.tb_next
61 |     stack.reverse()
62 | 
63 |     # prune and reconstruct
64 |     for n, f in enumerate(stack):
65 |         if (
66 |             n == 0
67 |             or should_prune is None
68 |             or not should_prune(f.tb_frame.f_globals["__name__"])
69 |         ):
70 |             ctb = TracebackType(
71 |                 tb_next=ctb,
72 |                 tb_frame=f.tb_frame,
73 |                 tb_lasti=f.tb_lasti,
74 |                 tb_lineno=f.tb_lineno,
75 |             )
76 | 
77 |     return ctb
78 | 


--------------------------------------------------------------------------------
/fugue/_utils/interfaceless.py:
--------------------------------------------------------------------------------
 1 | import inspect
 2 | from typing import Callable, Optional
 3 | 
 4 | from triad.utils.assertion import assert_or_throw
 5 | 
 6 | _COMMENT_SCHEMA_ANNOTATION = "schema"
 7 | 
 8 | 
 9 | def parse_comment_annotation(func: Callable, annotation: str) -> Optional[str]:
10 |     """Parse comment annotation above the function. It try to find
11 |     comment lines starts with the annotation from bottom up, and will use the first
12 |     occurrance as the result.
13 | 
14 |     :param func: the function
15 |     :param annotation: the annotation string
16 |     :return: schema hint string
17 | 
18 |     .. admonition:: Examples
19 | 
20 |         .. code-block:: python
21 | 
22 |             # schema: a:int,b:str
23 |             #schema:a:int,b:int # more comment
24 |             # some comment
25 |             def dummy():
26 |                 pass
27 | 
28 |             assert "a:int,b:int" == parse_comment_annotation(dummy, "schema:")
29 |     """
30 |     for orig in reversed((inspect.getcomments(func) or "").splitlines()):
31 |         start = orig.find(":")
32 |         if start <= 0:
33 |             continue
34 |         actual = orig[:start].replace("#", "", 1).strip()
35 |         if actual != annotation:
36 |             continue
37 |         end = orig.find("#", start)
38 |         s = orig[start + 1 : (end if end > 0 else len(orig))].strip()
39 |         return s
40 |     return None
41 | 
42 | 
43 | def parse_output_schema_from_comment(func: Callable) -> Optional[str]:
44 |     """Parse schema hint from the comments above the function. It try to find
45 |     comment lines starts with `schema:` from bottom up, and will use the first
46 |     occurrance as the hint.
47 | 
48 |     :param func: the function
49 |     :return: schema hint string
50 | 
51 |     .. admonition:: Examples
52 | 
53 |         .. code-block:: python
54 | 
55 |             # schema: a:int,b:str
56 |             #schema:a:int,b:int # more comment
57 |             # some comment
58 |             def dummy():
59 |                 pass
60 | 
61 |             assert "a:int,b:int" == parse_output_schema_from_comment(dummy)
62 |     """
63 |     res = parse_comment_annotation(func, _COMMENT_SCHEMA_ANNOTATION)
64 |     if res is None:
65 |         return None
66 |     assert_or_throw(res != "", SyntaxError("incorrect schema annotation"))
67 |     return res.strip()
68 | 
69 | 
70 | def is_class_method(func: Callable) -> bool:
71 |     sig = inspect.signature(func)
72 |     # TODO: this is not the best way
73 |     return "self" in sig.parameters
74 | 


--------------------------------------------------------------------------------
/fugue/_utils/misc.py:
--------------------------------------------------------------------------------
 1 | from typing import Any, Type, TypeVar
 2 | 
 3 | from triad.utils.assertion import assert_or_throw
 4 | 
 5 | T = TypeVar("T")
 6 | 
 7 | 
 8 | def get_attribute(obj: object, attr_name: str, data_type: Type[T]) -> T:
 9 |     if attr_name not in obj.__dict__ or obj.__dict__[attr_name] is None:
10 |         obj.__dict__[attr_name] = data_type()
11 |     assert_or_throw(
12 |         isinstance(obj.__dict__[attr_name], data_type),
13 |         lambda: TypeError(f"{obj.__dict__[attr_name]} is not type {data_type}"),
14 |     )
15 |     return obj.__dict__[attr_name]
16 | 
17 | 
18 | def import_or_throw(package_name: str, message: str) -> Any:
19 |     try:
20 |         return __import__(package_name)
21 |     except Exception as e:  # pragma: no cover
22 |         raise ImportError(str(e) + ". " + message)
23 | 
24 | 
25 | def import_fsql_dependency(package_name: str) -> Any:
26 |     return import_or_throw(
27 |         package_name, "Please try to install the package by `pip install fugue[sql]`."
28 |     )
29 | 


--------------------------------------------------------------------------------
/fugue/_utils/registry.py:
--------------------------------------------------------------------------------
 1 | from typing import Callable
 2 | 
 3 | from triad import conditional_dispatcher
 4 | from triad.utils.dispatcher import ConditionalDispatcher
 5 | 
 6 | from ..constants import FUGUE_ENTRYPOINT
 7 | 
 8 | 
 9 | def fugue_plugin(func: Callable) -> ConditionalDispatcher:
10 |     return conditional_dispatcher(entry_point=FUGUE_ENTRYPOINT)(func)  # type: ignore
11 | 


--------------------------------------------------------------------------------
/fugue/api.py:
--------------------------------------------------------------------------------
 1 | # flake8: noqa
 2 | # pylint: disable-all
 3 | from .dataframe.api import (
 4 |     alter_columns,
 5 |     as_array,
 6 |     as_array_iterable,
 7 |     as_arrow,
 8 |     as_dict_iterable,
 9 |     as_dicts,
10 |     as_fugue_df,
11 |     as_pandas,
12 |     drop_columns,
13 |     get_column_names,
14 |     get_native_as_df,
15 |     get_schema,
16 |     head,
17 |     is_df,
18 |     normalize_column_names,
19 |     peek_array,
20 |     peek_dict,
21 |     rename,
22 |     select_columns,
23 | )
24 | from .dataset.api import (
25 |     as_fugue_dataset,
26 |     as_local,
27 |     as_local_bounded,
28 |     count,
29 |     get_num_partitions,
30 |     is_bounded,
31 |     is_empty,
32 |     is_local,
33 |     show,
34 | )
35 | from .execution.api import (
36 |     aggregate,
37 |     anti_join,
38 |     as_fugue_engine_df,
39 |     assign,
40 |     broadcast,
41 |     clear_global_engine,
42 |     cross_join,
43 |     distinct,
44 |     dropna,
45 |     engine_context,
46 |     fillna,
47 |     filter,
48 |     full_outer_join,
49 |     get_context_engine,
50 |     get_current_conf,
51 |     get_current_parallelism,
52 |     inner_join,
53 |     intersect,
54 |     join,
55 |     left_outer_join,
56 |     load,
57 |     persist,
58 |     repartition,
59 |     right_outer_join,
60 |     run_engine_function,
61 |     sample,
62 |     save,
63 |     select,
64 |     semi_join,
65 |     set_global_engine,
66 |     subtract,
67 |     take,
68 |     union,
69 | )
70 | from .sql.api import fugue_sql, fugue_sql_flow
71 | from .workflow.api import out_transform, raw_sql, transform
72 | 


--------------------------------------------------------------------------------
/fugue/bag/__init__.py:
--------------------------------------------------------------------------------
1 | # flake8: noqa
2 | from .bag import Bag, LocalBag
3 | 


--------------------------------------------------------------------------------
/fugue/bag/array_bag.py:
--------------------------------------------------------------------------------
 1 | from types import GeneratorType
 2 | from typing import Any, Iterable, List
 3 | 
 4 | from ..exceptions import FugueDatasetEmptyError
 5 | from .bag import LocalBoundedBag
 6 | 
 7 | 
 8 | class ArrayBag(LocalBoundedBag):
 9 |     def __init__(self, data: Any, copy: bool = True):
10 |         if isinstance(data, list):
11 |             self._native = list(data) if copy else data
12 |         elif isinstance(data, (GeneratorType, Iterable)):
13 |             self._native = list(data)
14 |         else:
15 |             raise ValueError(f"{type(data)} can't be converted to ArrayBag")
16 |         super().__init__()
17 | 
18 |     @property
19 |     def native(self) -> List[Any]:
20 |         """The underlying Python list object"""
21 |         return self._native
22 | 
23 |     @property
24 |     def empty(self) -> bool:
25 |         return len(self._native) == 0
26 | 
27 |     def count(self) -> int:
28 |         return len(self._native)
29 | 
30 |     def peek(self) -> Any:
31 |         if self.count() == 0:
32 |             raise FugueDatasetEmptyError()
33 |         return self._native[0]
34 | 
35 |     def as_array(self) -> List[Any]:
36 |         return list(self._native)
37 | 
38 |     def head(self, n: int) -> LocalBoundedBag:
39 |         return ArrayBag(self._native[:n])
40 | 


--------------------------------------------------------------------------------
/fugue/collections/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fugue-project/fugue/596d28eea1834ecc9087b864b83e1b67b9748977/fugue/collections/__init__.py


--------------------------------------------------------------------------------
/fugue/collections/yielded.py:
--------------------------------------------------------------------------------
 1 | from typing import Any
 2 | 
 3 | from triad import assert_or_throw
 4 | from triad.utils.hash import to_uuid
 5 | 
 6 | 
 7 | class Yielded(object):
 8 |     """Yields from :class:`~fugue.workflow.workflow.FugueWorkflow`.
 9 |     Users shouldn't create this object directly.
10 | 
11 |     :param yid: unique id for determinism
12 |     """
13 | 
14 |     def __init__(self, yid: str):
15 |         self._yid = to_uuid(yid)
16 | 
17 |     def __uuid__(self) -> str:
18 |         """uuid of the instance"""
19 |         return self._yid
20 | 
21 |     @property
22 |     def is_set(self) -> bool:  # pragma: no cover
23 |         """Whether the value is set. It can be false if the parent workflow
24 |         has not been executed.
25 |         """
26 |         raise NotImplementedError
27 | 
28 |     def __copy__(self) -> Any:  # pragma: no cover
29 |         """``copy`` should have no effect"""
30 |         return self
31 | 
32 |     def __deepcopy__(self, memo: Any) -> Any:  # pragma: no cover
33 |         """``deepcopy`` should have no effect"""
34 |         return self
35 | 
36 | 
37 | class PhysicalYielded(Yielded):
38 |     """Physical yielded object from :class:`~fugue.workflow.workflow.FugueWorkflow`.
39 |     Users shouldn't create this object directly.
40 | 
41 |     :param yid: unique id for determinism
42 |     :param storage_type: ``file`` or ``table``
43 |     """
44 | 
45 |     def __init__(self, yid: str, storage_type: str):
46 |         super().__init__(yid)
47 |         self._name = ""
48 |         assert_or_throw(
49 |             storage_type in ["file", "table"],
50 |             ValueError(f"{storage_type} not in (file, table) "),
51 |         )
52 |         self._storage_type = storage_type
53 | 
54 |     @property
55 |     def is_set(self) -> bool:
56 |         return self._name != ""
57 | 
58 |     def set_value(self, name: str) -> None:
59 |         """Set the storage name after compute
60 | 
61 |         :param name: name reference of the storage
62 |         """
63 |         self._name = name
64 | 
65 |     @property
66 |     def name(self) -> str:
67 |         """The name reference of the yield"""
68 |         assert_or_throw(self.is_set, "value is not set")
69 |         return self._name
70 | 
71 |     @property
72 |     def storage_type(self) -> str:
73 |         """The storage type of this yield"""
74 |         return self._storage_type
75 | 


--------------------------------------------------------------------------------
/fugue/column/__init__.py:
--------------------------------------------------------------------------------
1 | # flake8: noqa
2 | from fugue.column.expressions import ColumnExpr, all_cols, col, function, lit, null
3 | from fugue.column.functions import is_agg
4 | from fugue.column.sql import SelectColumns, SQLExpressionGenerator
5 | 


--------------------------------------------------------------------------------
/fugue/dataframe/__init__.py:
--------------------------------------------------------------------------------
 1 | # flake8: noqa
 2 | from .api import *
 3 | from .array_dataframe import ArrayDataFrame
 4 | from .arrow_dataframe import ArrowDataFrame
 5 | from .dataframe import (
 6 |     AnyDataFrame,
 7 |     DataFrame,
 8 |     LocalBoundedDataFrame,
 9 |     LocalDataFrame,
10 |     YieldedDataFrame,
11 | )
12 | from .dataframe_iterable_dataframe import (
13 |     IterableArrowDataFrame,
14 |     IterablePandasDataFrame,
15 |     LocalDataFrameIterableDataFrame,
16 | )
17 | from .dataframes import DataFrames
18 | from .function_wrapper import DataFrameFunctionWrapper, fugue_annotated_param
19 | from .iterable_dataframe import IterableDataFrame
20 | from .pandas_dataframe import PandasDataFrame
21 | from .utils import get_column_names, normalize_dataframe_column_names, rename
22 | 


--------------------------------------------------------------------------------
/fugue/dataset/__init__.py:
--------------------------------------------------------------------------------
1 | # flake8: noqa
2 | from .api import *
3 | from .dataset import AnyDataset, Dataset, DatasetDisplay, get_dataset_display
4 | 


--------------------------------------------------------------------------------
/fugue/dev.py:
--------------------------------------------------------------------------------
 1 | """
 2 | All modeuls for developing and extending Fugue
 3 | """
 4 | # flake8: noqa
 5 | # pylint: disable-all
 6 | 
 7 | from triad.collections.function_wrapper import AnnotatedParam
 8 | 
 9 | from fugue.bag.bag import BagDisplay
10 | from fugue.collections.partition import PartitionCursor, PartitionSpec
11 | from fugue.collections.sql import StructuredRawSQL, TempTableName
12 | from fugue.collections.yielded import PhysicalYielded, Yielded
13 | from fugue.dataframe.function_wrapper import (
14 |     DataFrameFunctionWrapper,
15 |     DataFrameParam,
16 |     LocalDataFrameParam,
17 |     fugue_annotated_param,
18 | )
19 | from fugue.dataset import DatasetDisplay
20 | from fugue.execution.execution_engine import (
21 |     EngineFacet,
22 |     ExecutionEngineParam,
23 |     MapEngine,
24 |     SQLEngine,
25 | )
26 | from fugue.execution.factory import (
27 |     is_pandas_or,
28 |     make_execution_engine,
29 |     make_sql_engine,
30 |     register_default_execution_engine,
31 |     register_default_sql_engine,
32 |     register_execution_engine,
33 |     register_sql_engine,
34 | )
35 | from fugue.execution.native_execution_engine import PandasMapEngine, QPDPandasEngine
36 | from fugue.rpc import (
37 |     EmptyRPCHandler,
38 |     RPCClient,
39 |     RPCFunc,
40 |     RPCHandler,
41 |     RPCServer,
42 |     make_rpc_server,
43 |     to_rpc_handler,
44 | )
45 | from fugue.workflow._workflow_context import FugueWorkflowContext
46 | from fugue.workflow.module import module
47 | from fugue.workflow.workflow import FugueWorkflow, WorkflowDataFrame, WorkflowDataFrames
48 | 


--------------------------------------------------------------------------------
/fugue/exceptions.py:
--------------------------------------------------------------------------------
 1 | class FugueError(Exception):
 2 |     """Fugue exceptions"""
 3 | 
 4 | 
 5 | class FugueBug(FugueError):
 6 |     """Fugue internal bug"""
 7 | 
 8 | 
 9 | class FugueInvalidOperation(FugueError):
10 |     """Invalid operation on the Fugue framework"""
11 | 
12 | 
13 | class FuguePluginsRegistrationError(FugueError):
14 |     """Fugue plugins registration error"""
15 | 
16 | 
17 | class FugueDataFrameError(FugueError):
18 |     """Fugue dataframe related error"""
19 | 
20 | 
21 | class FugueDataFrameInitError(FugueDataFrameError):
22 |     """Fugue dataframe initialization error"""
23 | 
24 | 
25 | class FugueDatasetEmptyError(FugueDataFrameError):
26 |     """Fugue dataframe is empty"""
27 | 
28 | 
29 | class FugueDataFrameOperationError(FugueDataFrameError):
30 |     """Fugue dataframe invalid operation"""
31 | 
32 | 
33 | class FugueWorkflowError(FugueError):
34 |     """Fugue workflow exceptions"""
35 | 
36 | 
37 | class FugueWorkflowCompileError(FugueWorkflowError):
38 |     """Fugue workflow compile time error"""
39 | 
40 | 
41 | class FugueWorkflowCompileValidationError(FugueWorkflowCompileError):
42 |     """Fugue workflow compile time validation error"""
43 | 
44 | 
45 | class FugueInterfacelessError(FugueWorkflowCompileError):
46 |     """Fugue interfaceless exceptions"""
47 | 
48 | 
49 | class FugueWorkflowRuntimeError(FugueWorkflowError):
50 |     """Fugue workflow compile time error"""
51 | 
52 | 
53 | class FugueWorkflowRuntimeValidationError(FugueWorkflowRuntimeError):
54 |     """Fugue workflow runtime validation error"""
55 | 
56 | 
57 | class FugueSQLError(FugueWorkflowCompileError):
58 |     """Fugue SQL error"""
59 | 
60 | 
61 | class FugueSQLSyntaxError(FugueSQLError):
62 |     """Fugue SQL syntax error"""
63 | 
64 | 
65 | class FugueSQLRuntimeError(FugueWorkflowRuntimeError):
66 |     """Fugue SQL runtime error"""
67 | 


--------------------------------------------------------------------------------
/fugue/execution/__init__.py:
--------------------------------------------------------------------------------
 1 | # flake8: noqa
 2 | from .api import *
 3 | from .execution_engine import AnyExecutionEngine, ExecutionEngine, MapEngine, SQLEngine
 4 | from .factory import (
 5 |     infer_execution_engine,
 6 |     make_execution_engine,
 7 |     make_sql_engine,
 8 |     register_default_execution_engine,
 9 |     register_default_sql_engine,
10 |     register_execution_engine,
11 |     register_sql_engine,
12 | )
13 | from .native_execution_engine import NativeExecutionEngine, QPDPandasEngine
14 | 


--------------------------------------------------------------------------------
/fugue/extensions/__init__.py:
--------------------------------------------------------------------------------
 1 | # flake8: noqa
 2 | from ._utils import namespace_candidate
 3 | from .creator import Creator, creator, parse_creator, register_creator
 4 | from .outputter import Outputter, outputter, parse_outputter, register_outputter
 5 | from .processor import Processor, parse_processor, processor, register_processor
 6 | from .transformer import (
 7 |     CoTransformer,
 8 |     OutputCoTransformer,
 9 |     OutputTransformer,
10 |     Transformer,
11 |     cotransformer,
12 |     output_cotransformer,
13 |     output_transformer,
14 |     parse_output_transformer,
15 |     parse_transformer,
16 |     register_output_transformer,
17 |     register_transformer,
18 |     transformer,
19 | )
20 | 


--------------------------------------------------------------------------------
/fugue/extensions/_builtins/__init__.py:
--------------------------------------------------------------------------------
 1 | # flake8: noqa
 2 | from fugue.extensions._builtins.creators import Load, CreateData
 3 | from fugue.extensions._builtins.outputters import (
 4 |     AssertEqual,
 5 |     AssertNotEqual,
 6 |     RunOutputTransformer,
 7 |     Save,
 8 |     Show,
 9 | )
10 | from fugue.extensions._builtins.processors import (
11 |     Aggregate,
12 |     AlterColumns,
13 |     Assign,
14 |     Distinct,
15 |     DropColumns,
16 |     Dropna,
17 |     Fillna,
18 |     Filter,
19 |     Rename,
20 |     RunJoin,
21 |     RunSetOperation,
22 |     RunSQLSelect,
23 |     RunTransformer,
24 |     Sample,
25 |     SaveAndUse,
26 |     Select,
27 |     SelectColumns,
28 |     Take,
29 |     Zip,
30 | )
31 | 


--------------------------------------------------------------------------------
/fugue/extensions/_builtins/creators.py:
--------------------------------------------------------------------------------
 1 | from typing import Any, Callable, Optional
 2 | 
 3 | from triad import Schema, assert_or_throw, to_uuid
 4 | 
 5 | from fugue.collections.yielded import Yielded
 6 | from fugue.dataframe import DataFrame
 7 | from fugue.exceptions import FugueWorkflowCompileError
 8 | from fugue.execution.api import as_fugue_engine_df
 9 | from fugue.extensions.creator import Creator
10 | 
11 | 
12 | class Load(Creator):
13 |     def create(self) -> DataFrame:
14 |         kwargs = self.params.get("params", dict())
15 |         path = self.params.get_or_throw("path", str)
16 |         format_hint = self.params.get("fmt", "")
17 |         columns = self.params.get_or_none("columns", object)
18 | 
19 |         return self.execution_engine.load_df(
20 |             path=path, format_hint=format_hint, columns=columns, **kwargs
21 |         )
22 | 
23 | 
24 | class CreateData(Creator):
25 |     def __init__(
26 |         self,
27 |         df: Any,
28 |         schema: Any = None,
29 |         data_determiner: Optional[Callable[[Any], Any]] = None,
30 |     ) -> None:
31 |         if isinstance(df, Yielded):
32 |             assert_or_throw(
33 |                 schema is None,
34 |                 FugueWorkflowCompileError("schema must be None when data is Yielded"),
35 |             )
36 |         super().__init__()
37 |         self._df = df
38 |         self._schema = schema if schema is None else Schema(schema)
39 |         self._data_determiner = data_determiner
40 | 
41 |     def create(self) -> DataFrame:
42 |         if isinstance(self._df, Yielded):
43 |             return self.execution_engine.load_yielded(self._df)
44 |         return as_fugue_engine_df(self.execution_engine, self._df, schema=self._schema)
45 | 
46 |     def _df_uid(self):
47 |         if self._data_determiner is not None:
48 |             return self._data_determiner(self._df)
49 |         if isinstance(self._df, Yielded):
50 |             return self._df
51 |         return 1
52 | 
53 |     def __uuid__(self) -> str:
54 |         return to_uuid(super().__uuid__(), self._df_uid(), self._schema)
55 | 


--------------------------------------------------------------------------------
/fugue/extensions/creator/__init__.py:
--------------------------------------------------------------------------------
1 | # flake8: noqa
2 | from fugue.extensions.creator.convert import (
3 |     _to_creator,
4 |     creator,
5 |     parse_creator,
6 |     register_creator,
7 | )
8 | from fugue.extensions.creator.creator import Creator
9 | 


--------------------------------------------------------------------------------
/fugue/extensions/creator/creator.py:
--------------------------------------------------------------------------------
 1 | from abc import ABC, abstractmethod
 2 | 
 3 | from fugue.dataframe import DataFrame
 4 | from fugue.extensions.context import ExtensionContext
 5 | 
 6 | 
 7 | class Creator(ExtensionContext, ABC):
 8 |     """The interface is to generate single DataFrame from `params`.
 9 |     For example reading data from file should be a type of Creator.
10 |     Creator is task level extension, running on driver, and execution engine aware.
11 | 
12 |     To implement this class, you should not have ``__init__``, please directly implement
13 |     the interface functions.
14 | 
15 |     .. note::
16 | 
17 |       Before implementing this class, do you really need to implement this
18 |       interface? Do you know the interfaceless feature of Fugue? Implementing Creator
19 |       is commonly unnecessary. You can choose the interfaceless approach which may
20 |       decouple your code from Fugue.
21 | 
22 |     .. seealso::
23 | 
24 |       Please read :doc:`Creator Tutorial <tutorial:tutorials/extensions/creator>`
25 |     """
26 | 
27 |     @abstractmethod
28 |     def create(self) -> DataFrame:  # pragma: no cover
29 |         """Create DataFrame on driver side
30 | 
31 |         .. note::
32 | 
33 |           * It runs on driver side
34 |           * The output dataframe is not necessarily local, for example a SparkDataFrame
35 |           * It is engine aware, you can put platform dependent code in it (for example
36 |             native pyspark code) but by doing so your code may not be portable. If you
37 |             only use the functions of the general ExecutionEngine interface, it's still
38 |             portable.
39 | 
40 |         :return: result dataframe
41 |         """
42 |         raise NotImplementedError
43 | 


--------------------------------------------------------------------------------
/fugue/extensions/outputter/__init__.py:
--------------------------------------------------------------------------------
1 | # flake8: noqa
2 | from fugue.extensions.outputter.convert import (
3 |     _to_outputter,
4 |     outputter,
5 |     parse_outputter,
6 |     register_outputter,
7 | )
8 | from fugue.extensions.outputter.outputter import Outputter
9 | 


--------------------------------------------------------------------------------
/fugue/extensions/outputter/outputter.py:
--------------------------------------------------------------------------------
 1 | from abc import ABC, abstractmethod
 2 | 
 3 | from fugue.dataframe import DataFrames
 4 | from fugue.extensions.context import ExtensionContext
 5 | 
 6 | 
 7 | class Outputter(ExtensionContext, ABC):
 8 |     """The interface to process one or multiple incoming dataframes without returning
 9 |     anything. For example printing or saving dataframes should be a type of Outputter.
10 |     Outputter is task level extension, running on driver, and execution engine aware.
11 | 
12 |     To implement this class, you should not have ``__init__``, please directly implement
13 |     the interface functions.
14 | 
15 |     .. note::
16 | 
17 |       Before implementing this class, do you really need to implement this
18 |       interface? Do you know the interfaceless feature of Fugue? Implementing Outputter
19 |       is commonly unnecessary. You can choose the interfaceless approach which may
20 |       decouple your code from Fugue.
21 | 
22 |     .. seealso::
23 | 
24 |       Please read
25 |       :doc:`Outputter Tutorial <tutorial:tutorials/extensions/outputter>`
26 |     """
27 | 
28 |     @abstractmethod
29 |     def process(self, dfs: DataFrames) -> None:  # pragma: no cover
30 |         """Process the collection of dataframes on driver side
31 | 
32 |         .. note::
33 | 
34 |           * It runs on driver side
35 |           * The dataframes are not necessarily local, for example a SparkDataFrame
36 |           * It is engine aware, you can put platform dependent code in it (for example
37 |             native pyspark code) but by doing so your code may not be portable. If you
38 |             only use the functions of the general ExecutionEngine, it's still portable.
39 | 
40 |         :param dfs: dataframe collection to process
41 |         """
42 |         raise NotImplementedError
43 | 


--------------------------------------------------------------------------------
/fugue/extensions/processor/__init__.py:
--------------------------------------------------------------------------------
1 | # flake8: noqa
2 | from fugue.extensions.processor.convert import (
3 |     _to_processor,
4 |     parse_processor,
5 |     processor,
6 |     register_processor,
7 | )
8 | from fugue.extensions.processor.processor import Processor
9 | 


--------------------------------------------------------------------------------
/fugue/extensions/processor/processor.py:
--------------------------------------------------------------------------------
 1 | from abc import ABC, abstractmethod
 2 | 
 3 | from fugue.dataframe import DataFrame, DataFrames
 4 | from fugue.extensions.context import ExtensionContext
 5 | 
 6 | 
 7 | class Processor(ExtensionContext, ABC):
 8 |     """The interface to process one or multiple incoming dataframes and return one
 9 |     DataFrame. For example dropping a column of df should be a type of Processor.
10 |     Processor is task level extension, running on driver, and execution engine aware.
11 | 
12 |     To implement this class, you should not have ``__init__``, please directly implement
13 |     the interface functions.
14 | 
15 |     .. note::
16 | 
17 |       Before implementing this class, do you really need to implement this
18 |       interface? Do you know the interfaceless feature of Fugue? Implementing Processor
19 |       is commonly unnecessary. You can choose the interfaceless approach which may
20 |       decouple your code from Fugue.
21 | 
22 |     .. seealso::
23 | 
24 |       Please read
25 |       :doc:`Processor Tutorial <tutorial:tutorials/extensions/processor>`
26 |     """
27 | 
28 |     @abstractmethod
29 |     def process(self, dfs: DataFrames) -> DataFrame:  # pragma: no cover
30 |         """Process the collection of dataframes on driver side
31 | 
32 |         .. note::
33 | 
34 |           * It runs on driver side
35 |           * The dataframes are not necessarily local, for example a SparkDataFrame
36 |           * It is engine aware, you can put platform dependent code in it (for example
37 |             native pyspark code) but by doing so your code may not be portable. If you
38 |             only use the functions of the general ExecutionEngine, it's still portable.
39 | 
40 |         :param dfs: dataframe collection to process
41 |         :return: the result dataframe
42 |         """
43 |         raise NotImplementedError
44 | 


--------------------------------------------------------------------------------
/fugue/extensions/transformer/__init__.py:
--------------------------------------------------------------------------------
 1 | # flake8: noqa
 2 | from fugue.extensions.transformer.convert import (
 3 |     _to_output_transformer,
 4 |     _to_transformer,
 5 |     cotransformer,
 6 |     output_cotransformer,
 7 |     output_transformer,
 8 |     parse_output_transformer,
 9 |     parse_transformer,
10 |     register_output_transformer,
11 |     register_transformer,
12 |     transformer,
13 | )
14 | from fugue.extensions.transformer.transformer import (
15 |     CoTransformer,
16 |     OutputCoTransformer,
17 |     OutputTransformer,
18 |     Transformer,
19 | )
20 | 


--------------------------------------------------------------------------------
/fugue/extensions/transformer/constants.py:
--------------------------------------------------------------------------------
1 | OUTPUT_TRANSFORMER_DUMMY_SCHEMA = "__output_no_data__:int"
2 | 


--------------------------------------------------------------------------------
/fugue/plugins.py:
--------------------------------------------------------------------------------
 1 | # flake8: noqa
 2 | # pylint: disable-all
 3 | from fugue.collections.sql import transpile_sql
 4 | from fugue.dataframe import (
 5 |     alter_columns,
 6 |     as_array,
 7 |     as_array_iterable,
 8 |     as_arrow,
 9 |     as_dict_iterable,
10 |     as_dicts,
11 |     as_pandas,
12 |     drop_columns,
13 |     fugue_annotated_param,
14 |     get_column_names,
15 |     get_schema,
16 |     head,
17 |     is_df,
18 |     peek_array,
19 |     peek_dict,
20 |     rename,
21 |     select_columns,
22 | )
23 | from fugue.dataset import (
24 |     as_fugue_dataset,
25 |     as_local,
26 |     as_local_bounded,
27 |     count,
28 |     get_dataset_display,
29 |     get_num_partitions,
30 |     is_bounded,
31 |     is_empty,
32 |     is_local,
33 | )
34 | from fugue.execution.api import as_fugue_engine_df
35 | from fugue.execution.factory import (
36 |     infer_execution_engine,
37 |     parse_execution_engine,
38 |     parse_sql_engine,
39 | )
40 | from fugue.extensions.creator import parse_creator
41 | from fugue.extensions.outputter import parse_outputter
42 | from fugue.extensions.processor import parse_processor
43 | from fugue.extensions.transformer import parse_output_transformer, parse_transformer
44 | 


--------------------------------------------------------------------------------
/fugue/py.typed:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fugue-project/fugue/596d28eea1834ecc9087b864b83e1b67b9748977/fugue/py.typed


--------------------------------------------------------------------------------
/fugue/registry.py:
--------------------------------------------------------------------------------
 1 | from fugue.execution.factory import register_execution_engine, register_sql_engine
 2 | from fugue.execution.native_execution_engine import (
 3 |     NativeExecutionEngine,
 4 |     QPDPandasEngine,
 5 | )
 6 | 
 7 | 
 8 | def _register() -> None:
 9 |     """Register Fugue core additional types
10 | 
11 |     .. note::
12 | 
13 |         This function is automatically called when you do
14 | 
15 |         >>> import fugue
16 |     """
17 |     _register_engines()
18 | 
19 | 
20 | def _register_engines() -> None:
21 |     register_execution_engine(
22 |         "native", lambda conf: NativeExecutionEngine(conf), on_dup="ignore"
23 |     )
24 |     register_execution_engine(
25 |         "pandas", lambda conf: NativeExecutionEngine(conf), on_dup="ignore"
26 |     )
27 |     register_sql_engine(
28 |         "qpdpandas", lambda engine: QPDPandasEngine(engine), on_dup="ignore"
29 |     )
30 |     register_sql_engine(
31 |         "qpd_pandas", lambda engine: QPDPandasEngine(engine), on_dup="ignore"
32 |     )
33 | 


--------------------------------------------------------------------------------
/fugue/rpc/__init__.py:
--------------------------------------------------------------------------------
 1 | # flake8: noqa
 2 | from fugue.rpc.base import (
 3 |     RPCClient,
 4 |     EmptyRPCHandler,
 5 |     RPCFunc,
 6 |     RPCHandler,
 7 |     RPCServer,
 8 |     make_rpc_server,
 9 |     to_rpc_handler,
10 | )
11 | 


--------------------------------------------------------------------------------
/fugue/sql/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fugue-project/fugue/596d28eea1834ecc9087b864b83e1b67b9748977/fugue/sql/__init__.py


--------------------------------------------------------------------------------
/fugue/sql/_utils.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | from typing import Any, Dict, Optional
 3 | 
 4 | from triad import assert_or_throw
 5 | 
 6 | from ..collections.yielded import Yielded
 7 | from ..exceptions import FugueSQLError
 8 | from ..workflow.workflow import FugueWorkflow, WorkflowDataFrame
 9 | 
10 | MATCH_QUOTED_STRING = r"([\"'])(({|%|})*)\1"
11 | 
12 | 
13 | def fill_sql_template(sql: str, params: Dict[str, Any]):
14 |     """Prepare string to be executed, inserts params into sql template
15 |     ---
16 |     :param sql: jinja compatible template
17 |     :param params: params to be inserted into template
18 |     """
19 |     import jinja2
20 |     from jinja2 import Template
21 | 
22 |     try:
23 |         if "self" in params:
24 |             params = {k: v for k, v in params.items() if k != "self"}
25 |         single_quote_pattern = "'{{% raw %}}{}{{% endraw %}}'"
26 |         double_quote_pattern = '"{{% raw %}}{}{{% endraw %}}"'
27 |         new_sql = re.sub(
28 |             MATCH_QUOTED_STRING,
29 |             lambda pattern: double_quote_pattern.format(pattern.group(2))
30 |             if pattern.group(1) == '"'
31 |             else single_quote_pattern.format(pattern.group(2)),
32 |             sql,
33 |         )
34 | 
35 |         template = Template(new_sql)
36 | 
37 |     except jinja2.exceptions.TemplateSyntaxError:
38 | 
39 |         template = Template(sql)
40 | 
41 |     return template.render(**params)
42 | 
43 | 
44 | class LazyWorkflowDataFrame:
45 |     def __init__(self, key: str, df: Any, workflow: FugueWorkflow):
46 |         self._key = key
47 |         self._df = df
48 |         self._workflow = workflow
49 |         self._wdf: Optional[WorkflowDataFrame] = None
50 | 
51 |     def get_df(self) -> WorkflowDataFrame:
52 |         if self._wdf is None:
53 |             self._wdf = self._get_df()
54 |         return self._wdf
55 | 
56 |     def _get_df(self) -> WorkflowDataFrame:
57 |         if isinstance(self._df, Yielded):
58 |             return self._workflow.df(self._df)
59 |         if isinstance(self._df, WorkflowDataFrame):
60 |             assert_or_throw(
61 |                 self._df.workflow is self._workflow,
62 |                 lambda: FugueSQLError(
63 |                     f"{self._key}, {self._df} is from another workflow"
64 |                 ),
65 |             )
66 |             return self._df
67 |         return self._workflow.df(self._df)
68 | 


--------------------------------------------------------------------------------
/fugue/test/__init__.py:
--------------------------------------------------------------------------------
 1 | # flake8: noqa
 2 | from .pandas_tester import NativeTestBackend, PandasTestBackend
 3 | from .plugins import (
 4 |     FugueTestBackend,
 5 |     FugueTestContext,
 6 |     FugueTestSuite,
 7 |     extract_conf,
 8 |     fugue_test_backend,
 9 |     fugue_test_suite,
10 |     with_backend,
11 | )
12 | 


--------------------------------------------------------------------------------
/fugue/test/pandas_tester.py:
--------------------------------------------------------------------------------
 1 | from contextlib import contextmanager
 2 | from typing import Any, Dict, Iterator
 3 | 
 4 | from .plugins import FugueTestBackend, fugue_test_backend
 5 | 
 6 | 
 7 | @fugue_test_backend
 8 | class PandasTestBackend(FugueTestBackend):
 9 |     name = "pandas"
10 | 
11 |     @classmethod
12 |     @contextmanager
13 |     def session_context(cls, session_conf: Dict[str, Any]) -> Iterator[Any]:
14 |         yield "pandas"  # pragma: no cover
15 | 
16 | 
17 | @fugue_test_backend
18 | class NativeTestBackend(FugueTestBackend):
19 |     name = "native"
20 | 
21 |     @classmethod
22 |     @contextmanager
23 |     def session_context(cls, session_conf: Dict[str, Any]) -> Iterator[Any]:
24 |         yield "native"  # pragma: no cover
25 | 


--------------------------------------------------------------------------------
/fugue/workflow/__init__.py:
--------------------------------------------------------------------------------
1 | # flake8: noqa
2 | 
3 | from ._workflow_context import FugueWorkflowContext
4 | from .api import *
5 | from .input import register_raw_df_type
6 | from .module import module
7 | from .workflow import FugueWorkflow, WorkflowDataFrame, WorkflowDataFrames
8 | 


--------------------------------------------------------------------------------
/fugue/workflow/_workflow_context.py:
--------------------------------------------------------------------------------
 1 | from typing import Any, Dict
 2 | from uuid import uuid4
 3 | 
 4 | from adagio.instances import (
 5 |     NoOpCache,
 6 |     ParallelExecutionEngine,
 7 |     WorkflowContext,
 8 |     WorkflowHooks,
 9 | )
10 | from adagio.specs import WorkflowSpec
11 | from fugue.constants import FUGUE_CONF_WORKFLOW_CONCURRENCY
12 | from fugue.dataframe import DataFrame
13 | from fugue.execution.execution_engine import ExecutionEngine
14 | from fugue.rpc.base import make_rpc_server, RPCServer
15 | from fugue.workflow._checkpoint import CheckpointPath
16 | from triad import SerializableRLock, ParamDict
17 | 
18 | 
19 | class FugueWorkflowContext(WorkflowContext):
20 |     def __init__(
21 |         self,
22 |         engine: ExecutionEngine,
23 |         compile_conf: Any = None,
24 |         cache: Any = NoOpCache,
25 |         workflow_engine: Any = None,
26 |         hooks: Any = WorkflowHooks,
27 |     ):
28 |         conf = ParamDict(compile_conf)
29 |         self._fugue_engine = engine
30 |         self._lock = SerializableRLock()
31 |         self._results: Dict[Any, DataFrame] = {}
32 |         self._execution_id = ""
33 |         self._checkpoint_path = CheckpointPath(self.execution_engine)
34 |         self._rpc_server = make_rpc_server(engine.conf)
35 |         if workflow_engine is None:
36 |             workflow_engine = ParallelExecutionEngine(
37 |                 conf.get_or_throw(FUGUE_CONF_WORKFLOW_CONCURRENCY, int),
38 |                 self,
39 |             )
40 |         super().__init__(
41 |             cache=cache,
42 |             engine=workflow_engine,
43 |             hooks=hooks,
44 |             logger=self.execution_engine.log,
45 |             config=conf,
46 |         )
47 | 
48 |     def run(self, spec: WorkflowSpec, conf: Dict[str, Any]) -> None:
49 |         try:
50 |             self._execution_id = str(uuid4())
51 |             self._checkpoint_path = CheckpointPath(self.execution_engine)
52 |             self._checkpoint_path.init_temp_path(self._execution_id)
53 |             self._rpc_server.start()
54 |             super().run(spec, conf)
55 |         finally:
56 |             self._checkpoint_path.remove_temp_path()
57 |             self._rpc_server.stop()
58 |             self._execution_id = ""
59 | 
60 |     @property
61 |     def checkpoint_path(self) -> CheckpointPath:
62 |         return self._checkpoint_path
63 | 
64 |     @property
65 |     def execution_engine(self) -> ExecutionEngine:
66 |         return self._fugue_engine
67 | 
68 |     @property
69 |     def rpc_server(self) -> RPCServer:
70 |         return self._rpc_server
71 | 
72 |     def set_result(self, key: Any, df: DataFrame) -> None:
73 |         with self._lock:
74 |             self._results[key] = df
75 | 
76 |     def get_result(self, key: Any) -> DataFrame:
77 |         with self._lock:
78 |             return self._results[key]
79 | 


--------------------------------------------------------------------------------
/fugue/workflow/input.py:
--------------------------------------------------------------------------------
 1 | from typing import Type
 2 | 
 3 | 
 4 | def register_raw_df_type(df_type: Type) -> None:  # pragma: no cover
 5 |     """TODO: This function is to be removed before 0.9.0
 6 | 
 7 |     .. deprecated:: 0.8.0
 8 |         Register using :func:`fugue.api.is_df` instead.
 9 |     """
10 |     raise DeprecationWarning("use fugue.api.is_df to register the dataframe")
11 | 


--------------------------------------------------------------------------------
/fugue_contrib/__init__.py:
--------------------------------------------------------------------------------
 1 | import importlib
 2 | 
 3 | from .contrib import FUGUE_CONTRIB
 4 | 
 5 | 
 6 | def load_namespace(namespace: str) -> None:
 7 |     if namespace in FUGUE_CONTRIB:
 8 |         path = FUGUE_CONTRIB[namespace]["module"]
 9 |         importlib.import_module(path)
10 | 


--------------------------------------------------------------------------------
/fugue_contrib/contrib.py:
--------------------------------------------------------------------------------
1 | from typing import Dict, Any
2 | 
3 | FUGUE_CONTRIB: Dict[str, Any] = {
4 |     "viz": {"module": "fugue_contrib.viz"},
5 |     "sns": {"module": "fugue_contrib.seaborn"},
6 |     "why": {"module": "whylogs.api.fugue.registry"},
7 |     "vizzu": {"module": "ipyvizzu.integrations.fugue"},
8 | }
9 | 


--------------------------------------------------------------------------------
/fugue_contrib/seaborn/__init__.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | from functools import partial
 3 | from typing import Any, Tuple
 4 | 
 5 | import matplotlib.pyplot as plt
 6 | import pandas as pd
 7 | import seaborn
 8 | 
 9 | from fugue import Outputter
10 | from fugue.extensions import namespace_candidate, parse_outputter
11 | 
12 | from ..viz._ext import Visualize
13 | 
14 | 
15 | @parse_outputter.candidate(namespace_candidate("sns", lambda x: isinstance(x, str)))
16 | def _parse_seaborn(obj: Tuple[str, str]) -> Outputter:
17 |     return _SeabornVisualize(obj[1])
18 | 
19 | 
20 | class _SeabornVisualize(Visualize):
21 |     def __init__(self, func: str) -> None:
22 |         super().__init__(func)
23 |         getattr(seaborn, func)  # ensure the func exists
24 | 
25 |     def _plot(self, df: pd.DataFrame) -> None:
26 |         params = dict(self.params)
27 |         title: Any = None
28 |         if len(self.partition_spec.partition_by) > 0:
29 |             keys = df[self.partition_spec.partition_by].head(1).to_dict("records")[0]
30 |             kt = json.dumps(keys)[1:-1]
31 |             if "title" in params:
32 |                 params["title"] = params["title"] + " -- " + kt
33 |             else:
34 |                 params["title"] = kt
35 |             df = df.drop(self.partition_spec.partition_by, axis=1)
36 |         func = self._get_func(df)
37 |         title = params.pop("title", None)
38 |         plt.figure(0)
39 |         func(**params).set(title=title)
40 |         plt.show()
41 | 
42 |     def _get_func(self, df: pd.DataFrame) -> Any:
43 |         f = getattr(seaborn, self._func)
44 |         return partial(f, df)
45 | 


--------------------------------------------------------------------------------
/fugue_contrib/viz/__init__.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | from typing import Any, Tuple
 3 | 
 4 | import pandas as pd
 5 | 
 6 | from fugue import Outputter
 7 | from fugue.extensions import namespace_candidate, parse_outputter
 8 | 
 9 | from ._ext import Visualize
10 | 
11 | 
12 | @parse_outputter.candidate(namespace_candidate("viz", lambda x: isinstance(x, str)))
13 | def _parse_pandas_plot(obj: Tuple[str, str]) -> Outputter:
14 |     return _PandasVisualize(obj[1])
15 | 
16 | 
17 | class _PandasVisualize(Visualize):
18 |     def __init__(self, func: str) -> None:
19 |         super().__init__(func)
20 |         if func != "plot":
21 |             getattr(pd.DataFrame.plot, func)  # ensure the func exists
22 | 
23 |     def _plot(self, df: pd.DataFrame) -> None:
24 |         params = dict(self.params)
25 |         if len(self.partition_spec.partition_by) > 0:
26 |             keys = df[self.partition_spec.partition_by].head(1).to_dict("records")[0]
27 |             kt = json.dumps(keys)[1:-1]
28 |             if "title" in params:
29 |                 params["title"] = params["title"] + " -- " + kt
30 |             else:
31 |                 params["title"] = kt
32 |             df = df.drop(self.partition_spec.partition_by, axis=1)
33 |         func = self._get_func(df)
34 |         func(**params)
35 | 
36 |     def _get_func(self, df: pd.DataFrame) -> Any:
37 |         if self._func == "plot":
38 |             return df.plot
39 |         return getattr(df.plot, self._func)
40 | 


--------------------------------------------------------------------------------
/fugue_contrib/viz/_ext.py:
--------------------------------------------------------------------------------
 1 | from abc import ABC, abstractmethod
 2 | from typing import Any
 3 | 
 4 | import pandas as pd
 5 | from triad import assert_or_throw
 6 | 
 7 | from fugue import DataFrames, Outputter
 8 | from fugue.exceptions import FugueWorkflowError
 9 | 
10 | 
11 | class Visualize(Outputter, ABC):
12 |     def __init__(self, func: str) -> None:
13 |         super().__init__()
14 |         self._func = func
15 | 
16 |     def process(self, dfs: DataFrames) -> None:
17 |         assert_or_throw(len(dfs) == 1, FugueWorkflowError("not single input"))
18 |         df = dfs[0].as_pandas()
19 |         presort = self.partition_spec.presort
20 |         presort_keys = list(presort.keys())
21 |         presort_asc = list(presort.values())
22 |         if len(presort_keys) > 0:
23 |             df = df.sort_values(presort_keys, ascending=presort_asc).reset_index(
24 |                 drop=True
25 |             )
26 |         if len(self.partition_spec.partition_by) == 0:
27 |             self._plot(df)
28 |         else:
29 |             keys: Any = (  # avoid pandas warning
30 |                 self.partition_spec.partition_by
31 |                 if len(self.partition_spec.partition_by) > 1
32 |                 else self.partition_spec.partition_by[0]
33 |             )
34 |             for _, gp in df.groupby(keys, dropna=False):
35 |                 self._plot(gp.reset_index(drop=True))
36 | 
37 |     @abstractmethod
38 |     def _plot(self, df: pd.DataFrame) -> None:  # pragma: no cover
39 |         raise NotImplementedError
40 | 


--------------------------------------------------------------------------------
/fugue_dask/__init__.py:
--------------------------------------------------------------------------------
1 | # flake8: noqa
2 | from fugue_version import __version__
3 | 
4 | from fugue_dask.dataframe import DaskDataFrame
5 | from fugue_dask.execution_engine import DaskExecutionEngine
6 | 


--------------------------------------------------------------------------------
/fugue_dask/_constants.py:
--------------------------------------------------------------------------------
 1 | from typing import Any, Dict
 2 | 
 3 | import dask
 4 | import pandas as pd
 5 | import pyarrow as pa
 6 | from packaging import version
 7 | 
 8 | FUGUE_DASK_CONF_DEFAULT_PARTITIONS = "fugue.dask.default.partitions"
 9 | FUGUE_DASK_DEFAULT_CONF: Dict[str, Any] = {FUGUE_DASK_CONF_DEFAULT_PARTITIONS: -1}
10 | FUGUE_DASK_USE_ARROW = (
11 |     hasattr(pd, "ArrowDtype")
12 |     and version.parse(dask.__version__) >= version.parse("2023.2")
13 |     and version.parse(pa.__version__) >= version.parse("7")
14 |     and version.parse(pd.__version__) >= version.parse("2")
15 | )
16 | 


--------------------------------------------------------------------------------
/fugue_dask/_dask_sql_wrapper.py:
--------------------------------------------------------------------------------
 1 | from typing import Any, Optional
 2 | 
 3 | import dask.dataframe as dd
 4 | 
 5 | try:
 6 |     from dask.dataframe.dask_expr.io.parquet import ReadParquet
 7 | 
 8 |     HAS_DASK_EXPR = True  # newer dask
 9 | except ImportError:  # pragma: no cover
10 |     HAS_DASK_EXPR = False  # older dask
11 | 
12 | if not HAS_DASK_EXPR:  # pragma: no cover
13 |     try:
14 |         from dask_sql import Context as ContextWrapper  # pylint: disable-all
15 |     except ImportError:  # pragma: no cover
16 |         raise ImportError(
17 |             "dask-sql is not installed. Please install it with `pip install dask-sql`"
18 |         )
19 | else:
20 |     from triad.utils.assertion import assert_or_throw
21 | 
22 |     try:
23 |         from dask_sql import Context
24 |         from dask_sql.datacontainer import Statistics
25 |         from dask_sql.input_utils import InputUtil
26 |     except ImportError:  # pragma: no cover
27 |         raise ImportError(
28 |             "dask-sql is not installed. Please install it with `pip install dask-sql`"
29 |         )
30 | 
31 |     class ContextWrapper(Context):  # type: ignore
32 |         def create_table(
33 |             self,
34 |             table_name: str,
35 |             input_table: dd.DataFrame,
36 |             format: Optional[str] = None,  # noqa
37 |             persist: bool = False,
38 |             schema_name: Optional[str] = None,
39 |             statistics: Optional[Statistics] = None,
40 |             gpu: bool = False,
41 |             **kwargs: Any,
42 |         ) -> None:  # pragma: no cover
43 |             assert_or_throw(
44 |                 isinstance(input_table, dd.DataFrame),
45 |                 lambda: ValueError(
46 |                     f"input_table must be a dask dataframe, but got {type(input_table)}"
47 |                 ),
48 |             )
49 |             assert_or_throw(
50 |                 dd._dask_expr_enabled(), lambda: ValueError("Dask expr must be enabled")
51 |             )
52 |             schema_name = schema_name or self.schema_name
53 | 
54 |             dc = InputUtil.to_dc(
55 |                 input_table,
56 |                 table_name=table_name,
57 |                 format=format,
58 |                 persist=persist,
59 |                 gpu=gpu,
60 |                 **kwargs,
61 |             )
62 | 
63 |             dask_filepath = None
64 |             operations = input_table.find_operations(ReadParquet)
65 |             for op in operations:
66 |                 dask_filepath = op._args[0]
67 | 
68 |             dc.filepath = dask_filepath
69 |             self.schema[schema_name].filepaths[table_name.lower()] = dask_filepath
70 | 
71 |             if not statistics:
72 |                 statistics = Statistics(float("nan"))
73 |             dc.statistics = statistics
74 | 
75 |             self.schema[schema_name].tables[table_name.lower()] = dc
76 |             self.schema[schema_name].statistics[table_name.lower()] = statistics
77 | 


--------------------------------------------------------------------------------
/fugue_dask/registry.py:
--------------------------------------------------------------------------------
 1 | from typing import Any
 2 | 
 3 | import dask.dataframe as dd
 4 | from dask.distributed import Client
 5 | 
 6 | from fugue import DataFrame
 7 | from fugue.dev import (
 8 |     DataFrameParam,
 9 |     ExecutionEngineParam,
10 |     fugue_annotated_param,
11 |     is_pandas_or,
12 | )
13 | from fugue.plugins import (
14 |     as_fugue_dataset,
15 |     infer_execution_engine,
16 |     parse_execution_engine,
17 | )
18 | from fugue_dask._utils import DASK_UTILS
19 | from fugue_dask.dataframe import DaskDataFrame
20 | from fugue_dask.execution_engine import DaskExecutionEngine
21 | 
22 | from .tester import DaskTestBackend  # noqa: F401  # pylint: disable-all
23 | 
24 | 
25 | @infer_execution_engine.candidate(
26 |     lambda objs: is_pandas_or(objs, (dd.DataFrame, DaskDataFrame))
27 | )
28 | def _infer_dask_client(objs: Any) -> Any:
29 |     return DASK_UTILS.get_or_create_client()
30 | 
31 | 
32 | @as_fugue_dataset.candidate(lambda df, **kwargs: isinstance(df, dd.DataFrame))
33 | def _dask_as_fugue_df(df: dd.DataFrame, **kwargs: Any) -> DaskDataFrame:
34 |     return DaskDataFrame(df, **kwargs)
35 | 
36 | 
37 | @parse_execution_engine.candidate(
38 |     lambda engine, conf, **kwargs: isinstance(engine, Client),
39 |     priority=4,  # TODO: this is to overwrite dask-sql fugue integration
40 | )
41 | def _parse_dask_client(engine: Client, conf: Any, **kwargs: Any) -> DaskExecutionEngine:
42 |     return DaskExecutionEngine(dask_client=engine, conf=conf)
43 | 
44 | 
45 | @parse_execution_engine.candidate(
46 |     lambda engine, conf, **kwargs: isinstance(engine, str) and engine == "dask",
47 |     priority=4,  # TODO: this is to overwrite dask-sql fugue integration
48 | )
49 | def _parse_dask_str(engine: str, conf: Any, **kwargs: Any) -> DaskExecutionEngine:
50 |     return DaskExecutionEngine(conf=conf)
51 | 
52 | 
53 | @fugue_annotated_param(DaskExecutionEngine)
54 | class _DaskExecutionEngineParam(ExecutionEngineParam):
55 |     pass
56 | 
57 | 
58 | @fugue_annotated_param(dd.DataFrame)
59 | class _DaskDataFrameParam(DataFrameParam):
60 |     def to_input_data(self, df: DataFrame, ctx: Any) -> Any:
61 |         assert isinstance(ctx, DaskExecutionEngine)
62 |         return ctx.to_df(df).native
63 | 
64 |     def to_output_df(self, output: Any, schema: Any, ctx: Any) -> DataFrame:
65 |         assert isinstance(output, dd.DataFrame)
66 |         assert isinstance(ctx, DaskExecutionEngine)
67 |         return ctx.to_df(output, schema=schema)
68 | 
69 |     def count(self, df: DataFrame) -> int:  # pragma: no cover
70 |         raise NotImplementedError("not allowed")
71 | 


--------------------------------------------------------------------------------
/fugue_dask/tester.py:
--------------------------------------------------------------------------------
 1 | from contextlib import contextmanager
 2 | from typing import Any, Dict, Iterator
 3 | 
 4 | import dask
 5 | from dask.distributed import Client
 6 | 
 7 | import fugue.test as ft
 8 | 
 9 | 
10 | @ft.fugue_test_backend
11 | class DaskTestBackend(ft.FugueTestBackend):
12 |     name = "dask"
13 | 
14 |     @classmethod
15 |     def transform_session_conf(cls, conf: Dict[str, Any]) -> Dict[str, Any]:
16 |         return ft.extract_conf(conf, "dask.", remove_prefix=True)
17 | 
18 |     @classmethod
19 |     @contextmanager
20 |     def session_context(cls, session_conf: Dict[str, Any]) -> Iterator[Any]:
21 |         with Client(**session_conf) as client:
22 |             dask.config.set({"dataframe.shuffle.method": "tasks"})
23 |             dask.config.set({"dataframe.convert-string": False})
24 |             yield client
25 | 


--------------------------------------------------------------------------------
/fugue_duckdb/__init__.py:
--------------------------------------------------------------------------------
 1 | # flake8: noqa
 2 | from fugue import register_execution_engine, register_sql_engine
 3 | 
 4 | from fugue_duckdb.execution_engine import DuckDBEngine, DuckExecutionEngine
 5 | 
 6 | try:
 7 |     from fugue_duckdb.dask import DuckDaskExecutionEngine
 8 | except Exception:  # pragma: no cover
 9 |     pass
10 | 


--------------------------------------------------------------------------------
/fugue_duckdb/tester.py:
--------------------------------------------------------------------------------
 1 | from contextlib import contextmanager
 2 | from typing import Any, Dict, Iterator
 3 | 
 4 | import duckdb
 5 | 
 6 | import fugue.test as ft
 7 | 
 8 | try:
 9 |     import dask.distributed as dd
10 |     import dask
11 | 
12 |     _HAS_DASK = True
13 | except ImportError:  # pragma: no cover
14 |     _HAS_DASK = False
15 | 
16 | 
17 | @ft.fugue_test_backend
18 | class DuckDBTestBackend(ft.FugueTestBackend):
19 |     name = "duckdb"
20 | 
21 |     @classmethod
22 |     @contextmanager
23 |     def session_context(cls, session_conf: Dict[str, Any]) -> Iterator[Any]:
24 |         with duckdb.connect(config=session_conf) as conn:
25 |             yield conn
26 | 
27 | 
28 | if _HAS_DASK:
29 | 
30 |     @ft.fugue_test_backend
31 |     class DuckDaskTestBackend(ft.FugueTestBackend):
32 |         name = "duckdask"
33 | 
34 |         @classmethod
35 |         def transform_session_conf(cls, conf: Dict[str, Any]) -> Dict[str, Any]:
36 |             res = ft.extract_conf(conf, "duck.", remove_prefix=False)
37 |             res.update(ft.extract_conf(conf, "dask.", remove_prefix=False))
38 |             return res
39 | 
40 |         @classmethod
41 |         @contextmanager
42 |         def session_context(cls, session_conf: Dict[str, Any]) -> Iterator[Any]:
43 |             duck_conf = ft.extract_conf(session_conf, "duck.", remove_prefix=True)
44 |             dask_conf = ft.extract_conf(session_conf, "dask.", remove_prefix=True)
45 |             with dd.Client(**dask_conf) as client:
46 |                 dask.config.set({"dataframe.shuffle.method": "tasks"})
47 |                 dask.config.set({"dataframe.convert-string": False})
48 |                 with duckdb.connect(config=duck_conf) as conn:
49 |                     yield [conn, client]
50 | 


--------------------------------------------------------------------------------
/fugue_ibis/__init__.py:
--------------------------------------------------------------------------------
1 | # flake8: noqa
2 | from triad import run_at_def
3 | 
4 | from ._compat import IbisSchema, IbisTable
5 | from .dataframe import IbisDataFrame
6 | from .execution_engine import IbisExecutionEngine, IbisSQLEngine
7 | 


--------------------------------------------------------------------------------
/fugue_ibis/_compat.py:
--------------------------------------------------------------------------------
 1 | # flake8: noqa
 2 | # pylint: disable-all
 3 | 
 4 | try:  # pragma: no cover
 5 |     from ibis.expr.types import Table as IbisTable
 6 | except Exception:  # pragma: no cover
 7 |     from ibis.expr.types import TableExpr as IbisTable
 8 | 
 9 | from ibis import Schema as IbisSchema
10 | 


--------------------------------------------------------------------------------
/fugue_notebook/nbextension/README.md:
--------------------------------------------------------------------------------
1 | # Fugue Notebook Extension
2 | 
3 | -   Add `%%fsql` magic to run Fugue SQL
4 | -   Add Fugue SQL highlight in code cells for `%%fsql`
5 | 


--------------------------------------------------------------------------------
/fugue_notebook/nbextension/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fugue-project/fugue/596d28eea1834ecc9087b864b83e1b67b9748977/fugue_notebook/nbextension/__init__.py


--------------------------------------------------------------------------------
/fugue_notebook/nbextension/description.yaml:
--------------------------------------------------------------------------------
1 | Type: Jupyter Notebook Extension
2 | Compatibility: 3.x, 4.x, 5.x, 6.x
3 | Name: Fugue
4 | Main: main.js
5 | Link: README.md
6 | Description: |
7 |   Fugue Jupyter extension
8 | 


--------------------------------------------------------------------------------
/fugue_polars/__init__.py:
--------------------------------------------------------------------------------
1 | # flake8: noqa
2 | from .polars_dataframe import PolarsDataFrame
3 | 


--------------------------------------------------------------------------------
/fugue_polars/_utils.py:
--------------------------------------------------------------------------------
1 | import polars as pl
2 | from triad import Schema
3 | 
4 | 
5 | def build_empty_pl(schema: Schema) -> pl.DataFrame:
6 |     return pl.from_arrow(schema.create_empty_arrow_table())
7 | 


--------------------------------------------------------------------------------
/fugue_ray/__init__.py:
--------------------------------------------------------------------------------
1 | # flake8: noqa
2 | 
3 | from fugue_ray.dataframe import RayDataFrame
4 | from fugue_ray.execution_engine import RayExecutionEngine
5 | 


--------------------------------------------------------------------------------
/fugue_ray/_constants.py:
--------------------------------------------------------------------------------
 1 | from typing import Any, Dict
 2 | 
 3 | import ray
 4 | from packaging import version
 5 | 
 6 | FUGUE_RAY_CONF_SHUFFLE_PARTITIONS = "fugue.ray.shuffle.partitions"
 7 | FUGUE_RAY_DEFAULT_PARTITIONS = "fugue.ray.default.partitions"
 8 | FUGUE_RAY_DEFAULT_BATCH_SIZE = "fugue.ray.default.batch_size"
 9 | FUGUE_RAY_ZERO_COPY = "fugue.ray.zero_copy"
10 | 
11 | FUGUE_RAY_DEFAULT_CONF: Dict[str, Any] = {
12 |     FUGUE_RAY_CONF_SHUFFLE_PARTITIONS: -1,
13 |     FUGUE_RAY_DEFAULT_PARTITIONS: 0,
14 |     FUGUE_RAY_ZERO_COPY: True,
15 | }
16 | RAY_VERSION = version.parse(ray.__version__)
17 | 
18 | _ZERO_COPY: Dict[str, Any] = {"zero_copy_batch": True}
19 | 


--------------------------------------------------------------------------------
/fugue_ray/_utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fugue-project/fugue/596d28eea1834ecc9087b864b83e1b67b9748977/fugue_ray/_utils/__init__.py


--------------------------------------------------------------------------------
/fugue_ray/_utils/cluster.py:
--------------------------------------------------------------------------------
 1 | from fugue import ExecutionEngine
 2 | 
 3 | from .._constants import FUGUE_RAY_CONF_SHUFFLE_PARTITIONS, FUGUE_RAY_DEFAULT_PARTITIONS
 4 | from fugue.constants import FUGUE_CONF_DEFAULT_PARTITIONS
 5 | 
 6 | 
 7 | def get_default_partitions(engine: ExecutionEngine) -> int:
 8 |     n = engine.conf.get(
 9 |         FUGUE_RAY_DEFAULT_PARTITIONS, engine.conf.get(FUGUE_CONF_DEFAULT_PARTITIONS, -1)
10 |     )
11 |     return n if n >= 0 else engine.get_current_parallelism() * 2
12 | 
13 | 
14 | def get_default_shuffle_partitions(engine: ExecutionEngine) -> int:
15 |     n = engine.conf.get(FUGUE_RAY_CONF_SHUFFLE_PARTITIONS, -1)
16 |     return n if n >= 0 else get_default_partitions(engine)
17 | 


--------------------------------------------------------------------------------
/fugue_ray/registry.py:
--------------------------------------------------------------------------------
 1 | from typing import Any
 2 | 
 3 | import ray.data as rd
 4 | from triad import run_at_def
 5 | 
 6 | from fugue import DataFrame, register_execution_engine
 7 | from fugue.dev import (
 8 |     DataFrameParam,
 9 |     ExecutionEngineParam,
10 |     fugue_annotated_param,
11 |     is_pandas_or,
12 | )
13 | from fugue.plugins import as_fugue_dataset, infer_execution_engine
14 | 
15 | from .dataframe import RayDataFrame
16 | from .execution_engine import RayExecutionEngine
17 | from .tester import RayTestBackend  # noqa: F401  # pylint: disable-all
18 | 
19 | 
20 | @infer_execution_engine.candidate(
21 |     lambda objs: is_pandas_or(objs, (rd.Dataset, RayDataFrame))
22 | )
23 | def _infer_ray_client(objs: Any) -> Any:
24 |     return "ray"
25 | 
26 | 
27 | @as_fugue_dataset.candidate(lambda df, **kwargs: isinstance(df, rd.Dataset))
28 | def _ray_as_fugue_df(df: rd.Dataset, **kwargs: Any) -> RayDataFrame:
29 |     return RayDataFrame(df, **kwargs)
30 | 
31 | 
32 | def _register_engines() -> None:
33 |     register_execution_engine(
34 |         "ray", lambda conf, **kwargs: RayExecutionEngine(conf=conf), on_dup="ignore"
35 |     )
36 | 
37 | 
38 | @fugue_annotated_param(RayExecutionEngine)
39 | class _RayExecutionEngineParam(ExecutionEngineParam):
40 |     pass
41 | 
42 | 
43 | @fugue_annotated_param(rd.Dataset)
44 | class _RayDatasetParam(DataFrameParam):
45 |     def to_input_data(self, df: DataFrame, ctx: Any) -> Any:
46 |         assert isinstance(ctx, RayExecutionEngine)
47 |         return ctx._to_ray_df(df).native
48 | 
49 |     def to_output_df(self, output: Any, schema: Any, ctx: Any) -> DataFrame:
50 |         assert isinstance(output, rd.Dataset)
51 |         assert isinstance(ctx, RayExecutionEngine)
52 |         return RayDataFrame(output, schema=schema)
53 | 
54 |     def count(self, df: DataFrame) -> int:  # pragma: no cover
55 |         raise NotImplementedError("not allowed")
56 | 
57 | 
58 | @run_at_def
59 | def _register() -> None:
60 |     """Register Ray Execution Engine"""
61 |     _register_engines()
62 | 


--------------------------------------------------------------------------------
/fugue_ray/tester.py:
--------------------------------------------------------------------------------
 1 | from contextlib import contextmanager
 2 | from typing import Any, Dict, Iterator
 3 | 
 4 | import ray
 5 | 
 6 | import fugue.test as ft
 7 | 
 8 | 
 9 | @ft.fugue_test_backend
10 | class RayTestBackend(ft.FugueTestBackend):
11 |     name = "ray"
12 |     default_session_conf = {"num_cpus": 2}
13 |     default_fugue_conf = {
14 |         "fugue.ray.zero_copy": True,
15 |         "fugue.ray.default.batch_size": 10000,
16 |     }
17 | 
18 |     @classmethod
19 |     @contextmanager
20 |     def session_context(cls, session_conf: Dict[str, Any]) -> Iterator[Any]:
21 |         with ray.init(**session_conf):
22 |             yield "ray"
23 | 


--------------------------------------------------------------------------------
/fugue_spark/__init__.py:
--------------------------------------------------------------------------------
1 | # flake8: noqa
2 | from fugue_version import __version__
3 | 
4 | from fugue_spark.dataframe import SparkDataFrame
5 | from fugue_spark.execution_engine import SparkExecutionEngine
6 | 


--------------------------------------------------------------------------------
/fugue_spark/_constants.py:
--------------------------------------------------------------------------------
1 | from typing import Dict, Any
2 | 
3 | FUGUE_SPARK_CONF_USE_PANDAS_UDF = "fugue.spark.use_pandas_udf"
4 | 
5 | FUGUE_SPARK_DEFAULT_CONF: Dict[str, Any] = {FUGUE_SPARK_CONF_USE_PANDAS_UDF: True}
6 | 


--------------------------------------------------------------------------------
/fugue_spark/_utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fugue-project/fugue/596d28eea1834ecc9087b864b83e1b67b9748977/fugue_spark/_utils/__init__.py


--------------------------------------------------------------------------------
/fugue_spark/_utils/misc.py:
--------------------------------------------------------------------------------
 1 | from typing import Any
 2 | 
 3 | try:
 4 |     from pyspark.sql.connect.session import SparkSession as SparkConnectSession
 5 |     from pyspark.sql.connect.dataframe import DataFrame as SparkConnectDataFrame
 6 | except Exception:  # pragma: no cover
 7 |     SparkConnectSession = None
 8 |     SparkConnectDataFrame = None
 9 | import pyspark.sql as ps
10 | 
11 | 
12 | def is_spark_connect(session: Any) -> bool:
13 |     return SparkConnectSession is not None and isinstance(
14 |         session, (SparkConnectSession, SparkConnectDataFrame)
15 |     )
16 | 
17 | 
18 | def is_spark_dataframe(df: Any) -> bool:
19 |     return isinstance(df, ps.DataFrame) or (
20 |         SparkConnectDataFrame is not None and isinstance(df, SparkConnectDataFrame)
21 |     )
22 | 
23 | 
24 | def is_spark_session(session: Any) -> bool:
25 |     return isinstance(session, ps.SparkSession) or (
26 |         SparkConnectSession is not None and isinstance(session, SparkConnectSession)
27 |     )
28 | 


--------------------------------------------------------------------------------
/fugue_spark/tester.py:
--------------------------------------------------------------------------------
 1 | from contextlib import contextmanager
 2 | from typing import Any, Dict, Iterator
 3 | 
 4 | from pyspark.sql import SparkSession
 5 | 
 6 | import fugue.test as ft
 7 | 
 8 | from ._utils.misc import SparkConnectSession
 9 | 
10 | 
11 | @ft.fugue_test_backend
12 | class SparkTestBackend(ft.FugueTestBackend):
13 |     name = "spark"
14 |     default_session_conf = {
15 |         "spark.app.name": "fugue-test-spark",
16 |         "spark.master": "local[*]",
17 |         "spark.default.parallelism": 4,
18 |         "spark.dynamicAllocation.enabled": "false",
19 |         "spark.executor.cores": 4,
20 |         "spark.executor.instances": 1,
21 |         "spark.io.compression.codec": "lz4",
22 |         "spark.rdd.compress": "false",
23 |         "spark.sql.shuffle.partitions": 4,
24 |         "spark.shuffle.compress": "false",
25 |         "spark.sql.catalogImplementation": "in-memory",
26 |         "spark.sql.execution.arrow.pyspark.enabled": True,
27 |         "spark.sql.adaptive.enabled": False,
28 |     }
29 | 
30 |     @classmethod
31 |     def transform_session_conf(cls, conf: Dict[str, Any]) -> Dict[str, Any]:
32 |         return ft.extract_conf(conf, "spark.", remove_prefix=False)
33 | 
34 |     @classmethod
35 |     @contextmanager
36 |     def session_context(cls, session_conf: Dict[str, Any]) -> Iterator[Any]:
37 |         with _create_session(session_conf).getOrCreate() as spark:
38 |             yield spark
39 | 
40 | 
41 | if SparkConnectSession is not None:
42 | 
43 |     @ft.fugue_test_backend
44 |     class SparkConnectTestBackend(SparkTestBackend):
45 |         name = "sparkconnect"
46 |         default_session_conf = {
47 |             "spark.default.parallelism": 4,
48 |             "spark.sql.shuffle.partitions": 4,
49 |             "spark.sql.execution.arrow.pyspark.enabled": True,
50 |             "spark.sql.adaptive.enabled": False,
51 |         }
52 | 
53 |         @classmethod
54 |         def transform_session_conf(
55 |             cls, conf: Dict[str, Any]
56 |         ) -> Dict[str, Any]:  # pragma: no cover
57 |             # replace sparkconnect. with spark.
58 |             return {
59 |                 "spark." + k: v
60 |                 for k, v in ft.extract_conf(
61 |                     conf, cls.name + ".", remove_prefix=True
62 |                 ).items()
63 |             }
64 | 
65 |         @classmethod
66 |         @contextmanager
67 |         def session_context(
68 |             cls, session_conf: Dict[str, Any]
69 |         ) -> Iterator[Any]:  # pragma: no cover
70 |             spark = _create_session(session_conf).remote("sc://localhost").getOrCreate()
71 |             yield spark
72 | 
73 | 
74 | def _create_session(conf: Dict[str, Any]) -> Any:
75 |     sb = SparkSession.builder
76 |     for k, v in conf.items():
77 |         sb = sb.config(k, v)
78 |     return sb
79 | 


--------------------------------------------------------------------------------
/fugue_sql/__init__.py:
--------------------------------------------------------------------------------
 1 | # flake8: noqa
 2 | # TODO: This folder is to be deprecated
 3 | from fugue_version import __version__
 4 | 
 5 | import warnings
 6 | from fugue import FugueSQLWorkflow, fsql
 7 | 
 8 | warnings.warn(
 9 |     "fsql and FugueSQLWorkflow now should be imported directly from fugue, "
10 |     "fugue_sql will be removed in 0.9.0"
11 | )
12 | 


--------------------------------------------------------------------------------
/fugue_sql/exceptions.py:
--------------------------------------------------------------------------------
 1 | # pylint: disable-all
 2 | # flake8: noqa
 3 | # TODO: This folder is to be deprecated
 4 | import warnings
 5 | from fugue.exceptions import *
 6 | 
 7 | warnings.warn(
 8 |     "fsql and FugueSQLWorkflow now should be imported directly from fugue, "
 9 |     "fugue_sql will be removed in 0.9.0"
10 | )
11 | 


--------------------------------------------------------------------------------
/fugue_test/__init__.py:
--------------------------------------------------------------------------------
 1 | from typing import Any, Dict, Tuple
 2 | 
 3 | import pyarrow as pa
 4 | import pytest
 5 | from triad.utils.pyarrow import to_pa_datatype
 6 | 
 7 | _FUGUE_TEST_CONF_NAME = "fugue_test_conf"
 8 | 
 9 | 
10 | def pytest_addoption(parser: Any):  # pragma: no cover
11 |     parser.addini(
12 |         _FUGUE_TEST_CONF_NAME,
13 |         help="Configs for fugue testing execution engines",
14 |         type="linelist",
15 |     )
16 | 
17 | 
18 | def pytest_configure(config: Any):
19 |     from fugue.test.plugins import _set_global_conf
20 | 
21 |     options = config.getini(_FUGUE_TEST_CONF_NAME)
22 |     conf: Dict[str, Any] = {}
23 |     if options:
24 |         for line in options:
25 |             line = line.strip()
26 |             if not line.startswith("#"):
27 |                 k, v = _parse_line(line)
28 |                 conf[k] = v
29 |     _set_global_conf(conf)
30 | 
31 | 
32 | def pytest_report_header(config, start_path):
33 |     from fugue.test.plugins import _get_all_ini_conf
34 | 
35 |     header_lines = []
36 |     header_lines.append("Fugue tests will be initialized with options:")
37 |     for k, v in _get_all_ini_conf().items():
38 |         header_lines.append(f"\t{k} = {v}")
39 |     return "\n".join(header_lines)
40 | 
41 | 
42 | def _parse_line(line: str) -> Tuple[str, Any]:
43 |     try:
44 |         kv = line.split("=", 1)
45 |         if len(kv) == 1:
46 |             raise ValueError()
47 |         kt = kv[0].split(":", 1)
48 |         if len(kt) == 1:
49 |             tp = pa.string()
50 |         else:
51 |             tp = to_pa_datatype(kt[1].strip())
52 |         key = kt[0].strip()
53 |         if key == "":
54 |             raise ValueError()
55 |         value = pa.compute.cast([kv[1].strip()], tp).to_pylist()[0]
56 |         return key, value
57 |     except Exception:
58 |         raise ValueError(
59 |             f"Invalid config line: {line}, it must be in format: key[:type]=value"
60 |         )
61 | 
62 | 
63 | @pytest.fixture(scope="class")
64 | def backend_context(request: Any):
65 |     from fugue.test.plugins import _make_backend_context, _parse_backend
66 | 
67 |     c, _ = _parse_backend(request.param)
68 |     session = request.getfixturevalue(c + "_session")
69 |     with _make_backend_context(request.param, session) as ctx:
70 |         yield ctx
71 | 
72 | 
73 | @pytest.fixture(scope="class")
74 | def _class_backend_context(request, backend_context):
75 |     from fugue.test.plugins import FugueTestContext
76 | 
77 |     request.cls._test_context = FugueTestContext(
78 |         engine=backend_context.engine,
79 |         session=backend_context.session,
80 |         name=backend_context.name,
81 |     )
82 |     yield
83 | 


--------------------------------------------------------------------------------
/fugue_test/bag_suite.py:
--------------------------------------------------------------------------------
 1 | # pylint: disable-all
 2 | # flake8: noqa
 3 | 
 4 | from datetime import date, datetime
 5 | from typing import Any
 6 | from unittest import TestCase
 7 | import copy
 8 | import numpy as np
 9 | import pandas as pd
10 | from fugue.bag import Bag, LocalBag
11 | from fugue.exceptions import FugueDataFrameOperationError, FugueDatasetEmptyError
12 | from pytest import raises
13 | from triad.collections.schema import Schema
14 | 
15 | 
16 | class BagTests(object):
17 |     """DataFrame level general test suite.
18 |     All new DataFrame types should pass this test suite.
19 |     """
20 | 
21 |     class Tests(TestCase):
22 |         @classmethod
23 |         def setUpClass(cls):
24 |             pass
25 | 
26 |         @classmethod
27 |         def tearDownClass(cls):
28 |             pass
29 | 
30 |         def bg(self, data: Any = None) -> Bag:  # pragma: no cover
31 |             raise NotImplementedError
32 | 
33 |         def test_init_basic(self):
34 |             raises(Exception, lambda: self.bg())
35 |             bg = self.bg([])
36 |             assert bg.empty
37 |             assert copy.copy(bg) is bg
38 |             assert copy.deepcopy(bg) is bg
39 | 
40 |         def test_peek(self):
41 |             bg = self.bg([])
42 |             raises(FugueDatasetEmptyError, lambda: bg.peek())
43 | 
44 |             bg = self.bg(["x"])
45 |             assert not bg.is_bounded or 1 == bg.count()
46 |             assert not bg.empty
47 |             assert "x" == bg.peek()
48 | 
49 |         def test_as_array(self):
50 |             bg = self.bg([2, 1, "a"])
51 |             assert set([1, 2, "a"]) == set(bg.as_array())
52 | 
53 |         def test_as_array_special_values(self):
54 |             bg = self.bg([2, None, "a"])
55 |             assert set([None, 2, "a"]) == set(bg.as_array())
56 | 
57 |             bg = self.bg([np.float16(0.1)])
58 |             assert set([np.float16(0.1)]) == set(bg.as_array())
59 | 
60 |         def test_head(self):
61 |             bg = self.bg([])
62 |             assert [] == bg.head(0).as_array()
63 |             assert [] == bg.head(1).as_array()
64 |             bg = self.bg([["a", 1]])
65 |             if bg.is_bounded:
66 |                 assert [["a", 1]] == bg.head(1).as_array()
67 |             assert [] == bg.head(0).as_array()
68 | 
69 |             bg = self.bg([1, 2, 3, 4])
70 |             assert 2 == bg.head(2).count()
71 |             bg = self.bg([1, 2, 3, 4])
72 |             assert 4 == bg.head(10).count()
73 |             h = bg.head(10)
74 |             assert h.is_local and h.is_bounded
75 | 
76 |         def test_show(self):
77 |             bg = self.bg(["a", 1])
78 |             bg.show()
79 |             bg.show(n=0)
80 |             bg.show(n=1)
81 |             bg.show(n=2)
82 |             bg.show(title="title")
83 |             bg.metadata["m"] = 1
84 |             bg.show()
85 | 


--------------------------------------------------------------------------------
/fugue_test/fixtures.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | _DEFAULT_SCOPE = "module"
 4 | 
 5 | 
 6 | @pytest.fixture(scope=_DEFAULT_SCOPE)
 7 | def pandas_session():
 8 |     yield "pandas"
 9 | 
10 | 
11 | @pytest.fixture(scope=_DEFAULT_SCOPE)
12 | def native_session():
13 |     yield "native"
14 | 
15 | 
16 | @pytest.fixture(scope=_DEFAULT_SCOPE)
17 | def dask_session():
18 |     from fugue_dask.tester import DaskTestBackend
19 | 
20 |     with DaskTestBackend.generate_session_fixture() as session:
21 |         yield session
22 | 
23 | 
24 | @pytest.fixture(scope=_DEFAULT_SCOPE)
25 | def duckdb_session():
26 |     from fugue_duckdb.tester import DuckDBTestBackend
27 | 
28 |     with DuckDBTestBackend.generate_session_fixture() as session:
29 |         yield session
30 | 
31 | 
32 | @pytest.fixture(scope=_DEFAULT_SCOPE)
33 | def duckdask_session():
34 |     from fugue_duckdb.tester import DuckDaskTestBackend
35 | 
36 |     with DuckDaskTestBackend.generate_session_fixture() as session:
37 |         yield session
38 | 
39 | 
40 | @pytest.fixture(scope=_DEFAULT_SCOPE)
41 | def ray_session():
42 |     from fugue_ray.tester import RayTestBackend
43 | 
44 |     with RayTestBackend.generate_session_fixture() as session:
45 |         yield session
46 | 
47 | 
48 | @pytest.fixture(scope=_DEFAULT_SCOPE)
49 | def spark_session():
50 |     from fugue_spark.tester import SparkTestBackend
51 | 
52 |     with SparkTestBackend.generate_session_fixture() as session:
53 |         yield session
54 | 
55 | 
56 | @pytest.fixture(scope=_DEFAULT_SCOPE)
57 | def sparkconnect_session():
58 |     from fugue_spark.tester import SparkConnectTestBackend
59 | 
60 |     with SparkConnectTestBackend.generate_session_fixture() as session:
61 |         yield session
62 | 


--------------------------------------------------------------------------------
/fugue_version/__init__.py:
--------------------------------------------------------------------------------
1 | __version__ = "0.9.2"
2 | 


--------------------------------------------------------------------------------
/images/architecture.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fugue-project/fugue/596d28eea1834ecc9087b864b83e1b67b9748977/images/architecture.png


--------------------------------------------------------------------------------
/images/extensions.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fugue-project/fugue/596d28eea1834ecc9087b864b83e1b67b9748977/images/extensions.png


--------------------------------------------------------------------------------
/images/logo.svg:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
 3 | <!-- Creator: CorelDRAW X8 -->
 4 | <svg xmlns="http://www.w3.org/2000/svg" xml:space="preserve" width="236.998mm" height="111.704mm" version="1.1" style="shape-rendering:geometricPrecision; text-rendering:geometricPrecision; image-rendering:optimizeQuality; fill-rule:evenodd; clip-rule:evenodd"
 5 | viewBox="0 0 22289 10505"
 6 |  xmlns:xlink="http://www.w3.org/1999/xlink">
 7 |  <defs>
 8 |   <style type="text/css">
 9 |    <![CDATA[
10 |     .fil0 {fill:#FF4F19}
11 |     .fil1 {fill:#264263;fill-rule:nonzero}
12 |    ]]>
13 |   </style>
14 |  </defs>
15 |  <g id="Layer_x0020_1">
16 |   <metadata id="CorelCorpID_0Corel-Layer"/>
17 |   <g id="_2413006974944">
18 |    <path class="fil0" d="M3148 10505c-1108,-1439 -749,-3784 895,-5428l-1058 -1058c-1592,2020 -1737,4645 -240,6142 127,127 261,241 403,344zm-1259 -1258c-1447,-1881 -978,-4945 1169,-7092 249,-249 510,-475 780,-678l-1477 -1477c-2705,2705 -3152,6643 -998,8797 165,165 341,315 526,450z"/>
19 |    <path class="fil1" d="M6046 7212l364 0 0 -2519 728 0 0 -328 -728 0 0 -20c0,-271 10,-713 728,-713l0 -339c-1051,11 -1082,677 -1092,1072l-443 0 0 328 443 0 0 2519zm4445 -2847l-364 0 0 1504c0,500 -130,687 -296,854 -183,182 -401,229 -604,229 -198,0 -463,-47 -661,-271 -125,-156 -203,-390 -203,-749l0 -1567 -364 0 0 1572c0,302 47,666 250,916 124,156 416,427 978,427 531,0 807,-297 911,-510l10 0 0 442 343 0 0 -2847zm4041 0l-365 0 0 510 -10 0c-328,-504 -838,-582 -1098,-582 -750,0 -1489,603 -1489,1509 0,895 739,1478 1489,1478 416,0 885,-182 1098,-588l10 0 0 249c0,786 -551,1062 -1030,1062 -692,0 -973,-484 -1067,-718l-385 0c260,890 1051,1046 1457,1046 115,0 770,-26 1129,-526 261,-348 261,-733 261,-1040l0 -2400zm-2597 1426c0,-624 479,-1171 1124,-1171 640,0 1129,547 1129,1171 0,484 -375,1161 -1129,1161 -645,0 -1124,-541 -1124,-1161zm6298 -1426l-364 0 0 1504c0,500 -130,687 -297,854 -182,182 -400,229 -603,229 -198,0 -463,-47 -661,-271 -125,-156 -203,-390 -203,-749l0 -1567 -364 0 0 1572c0,302 46,666 249,916 125,156 417,427 979,427 531,0 806,-297 910,-510l11 0 0 442 343 0 0 -2847zm4051 1556c0,-36 5,-67 5,-104 0,-249 -57,-525 -224,-796 -203,-354 -645,-723 -1270,-728 -827,15 -1477,692 -1477,1498 0,843 686,1489 1483,1489 655,0 1202,-432 1400,-999l-386 0c-130,353 -499,660 -1020,671 -556,0 -1077,-474 -1103,-1031l2592 0zm-2592 -327c104,-672 661,-974 1109,-974 452,0 999,302 1113,974l-2222 0z"/>
20 |   </g>
21 |  </g>
22 | </svg>
23 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | .[all]
 2 | 
 3 | furo
 4 | 
 5 | # test requirements
 6 | pre-commit
 7 | black>=22.3.0
 8 | mypy
 9 | flake8
10 | autopep8
11 | pylint==2.16.2
12 | pytest
13 | pytest-cov
14 | pytest-mock
15 | pytest-rerunfailures==10.2
16 | sphinx>=2.4.0
17 | sphinx-rtd-theme
18 | sphinx-autodoc-typehints
19 | flask
20 | psutil
21 | matplotlib
22 | seaborn
23 | 
24 | notebook<7
25 | jupyter_contrib_nbextensions
26 | 
27 | s3fs
28 | 
29 | pyspark[connect]
30 | duckdb-engine>=0.6.4
31 | sqlalchemy==2.0.10  # 2.0.11 has a bug
32 | ray[data]>=2.5.0
33 | pydantic<2.5  # 2.5.0+ doesn't work with ray 2.8
34 | # pyarrow==7.0.0
35 | dask-sql
36 | 
37 | # publish to pypi
38 | wheel
39 | twine
40 | 


--------------------------------------------------------------------------------
/scripts/setupsparkconnect.sh:
--------------------------------------------------------------------------------
1 | wget https://dlcdn.apache.org/spark/spark-3.5.5/spark-3.5.5-bin-hadoop3.tgz -O - | tar -xz -C /tmp
2 | # export SPARK_NO_DAEMONIZE=1
3 | bash /tmp/spark-3.5.5-bin-hadoop3/sbin/start-connect-server.sh --jars https://repo1.maven.org/maven2/org/apache/spark/spark-connect_2.12/3.5.5/spark-connect_2.12-3.5.5.jar
4 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
 1 | [metadata]
 2 | description-file = README.md
 3 | license_files = LICENSE
 4 | 
 5 | [testenv]
 6 | setenv =
 7 |     COV_CORE_SOURCE=
 8 |     COV_CORE_CONFIG=.coveragerc
 9 |     COV_CORE_DATAFILE=.coverage
10 | 
11 | [tool:pytest]
12 | addopts =
13 |   -p pytest_cov
14 |   --cov=fugue
15 |   --cov=fugue_test
16 |   --cov=fugue_spark
17 |   --cov=fugue_dask
18 |   --cov=fugue_ray
19 |   --cov=fugue_duckdb
20 |   --cov=fugue_ibis
21 |   --cov=fugue_polars
22 |   --ignore=tests/fugue_spark/test_spark_connect.py
23 |   --cov-report=term-missing:skip-covered
24 |   -vvv
25 | spark_options =
26 |   spark.master: local[*]
27 |   spark.sql.catalogImplementation: in-memory
28 |   spark.sql.shuffle.partitions: 4
29 |   spark.default.parallelism: 4
30 |   spark.executor.cores: 4
31 |   spark.sql.execution.arrow.pyspark.enabled: true
32 |   spark.sql.adaptive.enabled: false
33 | fugue_test_conf =
34 |   # don't move for testing purpose
35 |   fugue.test.dummy=dummy
36 |   fugue.test:bool=true
37 |   # ray settings
38 |   ray.num_cpus:int=2
39 |   # dask settings
40 |   dask.processes:bool=true
41 |   dask.n_workers:int=3
42 |   dask.threads_per_worker:int=1
43 | 
44 | 
45 | 
46 | [coverage:run]
47 | omit =
48 |   fugue_sql/_antlr/*
49 |   fugue_test/plugins/*
50 |   fugue_test/fixtures.py
51 |   fugue_test/__init__.py
52 | 
53 | [flake8]
54 | ignore = E24,E203,W503,C401,C408,C420,A001,A003,A005,W504,C407,C405,B023,B028
55 | max-line-length = 88
56 | format = pylint
57 | exclude = .svc,CVS,.bzr,.hg,.git,__pycache__,venv,tests/*,docs/*
58 | max-complexity = 10
59 | 


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
1 | # pylint: disable-all
2 | 


--------------------------------------------------------------------------------
/tests/fugue/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fugue-project/fugue/596d28eea1834ecc9087b864b83e1b67b9748977/tests/fugue/__init__.py


--------------------------------------------------------------------------------
/tests/fugue/bag/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fugue-project/fugue/596d28eea1834ecc9087b864b83e1b67b9748977/tests/fugue/bag/__init__.py


--------------------------------------------------------------------------------
/tests/fugue/bag/test_array_bag.py:
--------------------------------------------------------------------------------
 1 | from typing import Any
 2 | 
 3 | from fugue import ArrayBag, Bag
 4 | from fugue_test.bag_suite import BagTests
 5 | 
 6 | 
 7 | class ArrayBagTests(BagTests.Tests):
 8 |     def bg(self, data: Any = None) -> Bag:
 9 |         return ArrayBag(data)
10 | 
11 |     def test_array_bag_init(self):
12 |         def _it():
13 |             yield from [1, 2, 3]
14 | 
15 |         bg = self.bg([])
16 |         assert bg.count() == 0
17 |         assert bg.is_local
18 |         assert bg.is_bounded
19 |         assert bg.as_local() is bg
20 |         assert bg.empty
21 |         assert bg.native == []
22 | 
23 |         for x in [[1, 2, 3], _it(), set([1, 2, 3])]:
24 |             bg = self.bg(x)
25 |             assert bg.count() == 3
26 |             assert bg.is_local
27 |             assert bg.is_bounded
28 |             assert bg.as_local() is bg
29 |             assert not bg.empty
30 |             assert 1 == bg.num_partitions
31 |             assert isinstance(bg.native, list)
32 | 
33 |         bg = self.bg(x + 1 for x in [])
34 |         assert bg.count() == 0
35 |         bg = self.bg(x + 1 for x in [1, 2, 3])
36 |         assert bg.count() == 3
37 | 


--------------------------------------------------------------------------------
/tests/fugue/collections/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fugue-project/fugue/596d28eea1834ecc9087b864b83e1b67b9748977/tests/fugue/collections/__init__.py


--------------------------------------------------------------------------------
/tests/fugue/column/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fugue-project/fugue/596d28eea1834ecc9087b864b83e1b67b9748977/tests/fugue/column/__init__.py


--------------------------------------------------------------------------------
/tests/fugue/column/test_functions.py:
--------------------------------------------------------------------------------
 1 | import pyarrow as pa
 2 | from pytest import raises
 3 | from triad import Schema
 4 | 
 5 | import fugue.column.functions as f
 6 | from fugue.column import all_cols, col, lit, null
 7 | 
 8 | 
 9 | def test_is_agg():
10 |     assert f.is_agg(f.first(col("a")))
11 |     assert f.is_agg(f.count_distinct(col("a")).alias("x"))
12 |     assert f.is_agg(f.first(col("a") + 1))
13 |     assert f.is_agg(f.first(col("a")) + 1)
14 |     assert f.is_agg((f.first(col("a")) < 1).alias("x"))
15 |     assert f.is_agg(col("a") * f.first(col("a")) + 1)
16 | 
17 |     assert not f.is_agg(col("a"))
18 |     assert not f.is_agg(lit("a"))
19 |     assert not f.is_agg(col("a") + col("b"))
20 |     assert not f.is_agg(null())
21 | 
22 | 
23 | def test_functions():
24 |     schema = Schema("a:int,b:str,c:bool,d:double")
25 | 
26 |     expr = f.coalesce(col("a"), 1, None, col("b") + col("c"))
27 |     assert "COALESCE(a,1,NULL,+(b,c))" == str(expr)
28 |     assert expr.infer_type(schema) is None
29 | 
30 |     expr = f.min(col("a"))
31 |     assert "MIN(a)" == str(expr)
32 |     assert pa.int32() == expr.infer_type(schema)
33 |     assert "MIN(a) AS a" == str(expr.infer_alias())
34 |     assert "CAST(MIN(a) AS long) AS a" == str(expr.cast(int).infer_alias())
35 |     assert "MIN(a) AS b" == str(expr.alias("b").infer_alias())
36 | 
37 |     assert "MIN(-(a)) AS a" == str(f.min(-col("a")).infer_alias())
38 | 
39 |     expr = f.min(lit(1.1))
40 |     assert "MIN(1.1)" == str(expr)
41 |     assert pa.float64() == expr.infer_type(schema)
42 | 
43 |     expr = f.max(col("a"))
44 |     assert "MAX(a)" == str(expr)
45 |     assert pa.int32() == expr.infer_type(schema)
46 | 
47 |     expr = f.max(lit(1.1))
48 |     assert "MAX(1.1)" == str(expr)
49 |     assert pa.float64() == expr.infer_type(schema)
50 | 
51 |     expr = f.first(col("a"))
52 |     assert "FIRST(a)" == str(expr)
53 |     assert pa.int32() == expr.infer_type(schema)
54 | 
55 |     expr = f.first(lit(1.1))
56 |     assert "FIRST(1.1)" == str(expr)
57 |     assert pa.float64() == expr.infer_type(schema)
58 | 
59 |     expr = f.last(col("a"))
60 |     assert "LAST(a)" == str(expr)
61 |     assert pa.int32() == expr.infer_type(schema)
62 | 
63 |     expr = f.last(lit(1.1))
64 |     assert "LAST(1.1)" == str(expr)
65 |     assert pa.float64() == expr.infer_type(schema)
66 | 
67 |     expr = f.avg(col("a"))
68 |     assert "AVG(a)" == str(expr)
69 |     assert expr.infer_type(schema) is None
70 | 
71 |     expr = f.sum(col("a"))
72 |     assert "SUM(a)" == str(expr)
73 |     assert expr.infer_type(schema) is None
74 | 
75 |     expr = f.count(col("a"))
76 |     assert "COUNT(a)" == str(expr)
77 |     assert expr.infer_type(schema) is None
78 | 
79 |     expr = f.count_distinct(col("a"))
80 |     assert "COUNT(DISTINCT a)" == str(expr)
81 |     assert expr.infer_type(schema) is None
82 |     assert "COUNT(DISTINCT a) AS a" == str(expr.infer_alias())
83 | 
84 |     expr = f.count_distinct(all_cols())
85 |     assert "COUNT(DISTINCT *)" == str(expr)
86 |     assert expr.infer_type(schema) is None
87 |     raises(NotImplementedError, lambda: expr.infer_alias())
88 | 


--------------------------------------------------------------------------------
/tests/fugue/dataframe/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/tests/fugue/dataframe/test_arrow_dataframe.py:
--------------------------------------------------------------------------------
 1 | from typing import Any
 2 | 
 3 | import pandas as pd
 4 | import pyarrow as pa
 5 | from pytest import raises
 6 | 
 7 | import fugue.api as fa
 8 | import fugue.test as ft
 9 | from fugue.dataframe import ArrowDataFrame
10 | from fugue_test.dataframe_suite import DataFrameTests
11 | 
12 | 
13 | @ft.fugue_test_suite("native", mark_test=True)
14 | class ArrowDataFrameTests(DataFrameTests.Tests):
15 |     def df(self, data: Any = None, schema: Any = None) -> ArrowDataFrame:
16 |         return ArrowDataFrame(data, schema)
17 | 
18 | 
19 | @ft.fugue_test_suite("native", mark_test=True)
20 | class NativeArrowDataFrameTests(DataFrameTests.NativeTests):
21 |     def df(self, data: Any = None, schema: Any = None) -> pd.DataFrame:
22 |         return ArrowDataFrame(data, schema).as_arrow()
23 | 
24 |     def to_native_df(self, pdf: pd.DataFrame) -> Any:  # pragma: no cover
25 |         return pa.Table.from_pandas(pdf)
26 | 
27 |     def test_num_partitions(self):
28 |         assert fa.get_num_partitions(self.df([[0, 1]], "a:int,b:int")) == 1
29 | 
30 | 
31 | def test_init():
32 |     df = ArrowDataFrame(schema="a:str,b:int")
33 |     assert df.empty
34 |     assert df.schema == "a:str,b:int"
35 |     assert df.is_bounded
36 | 
37 |     df = ArrowDataFrame(pd.DataFrame([], columns=["a", "b"]), schema="a:str,b:int")
38 |     assert df.empty
39 |     assert df.schema == "a:str,b:int"
40 |     assert df.is_bounded
41 | 
42 |     data = [["a", "1"], ["b", "2"]]
43 |     df = ArrowDataFrame(data, "a:str,b:str")
44 |     assert [["a", "1"], ["b", "2"]] == df.as_array(type_safe=True)
45 |     data = [["a", 1], ["b", 2]]
46 |     df = ArrowDataFrame(data, "a:str,b:int")
47 |     assert [["a", 1.0], ["b", 2.0]] == df.as_array(type_safe=True)
48 |     df = ArrowDataFrame(data, "a:str,b:double")
49 |     assert [["a", 1.0], ["b", 2.0]] == df.as_array(type_safe=True)
50 | 
51 |     ddf = ArrowDataFrame(df.native)
52 |     assert [["a", 1.0], ["b", 2.0]] == ddf.as_array(type_safe=True)
53 | 
54 |     df = ArrowDataFrame(df.as_pandas(), "a:str,b:double")
55 |     assert [["a", 1.0], ["b", 2.0]] == df.as_array(type_safe=True)
56 |     df = ArrowDataFrame(df.as_pandas()["b"])
57 |     assert [[1.0], [2.0]] == df.as_array(type_safe=True)
58 | 
59 |     df = ArrowDataFrame([], "x:str,y:double")
60 |     assert df.empty
61 |     assert df.is_local
62 |     assert df.is_bounded
63 | 
64 |     raises(Exception, lambda: ArrowDataFrame(123))
65 | 


--------------------------------------------------------------------------------
/tests/fugue/dataframe/test_dataframe.py:
--------------------------------------------------------------------------------
 1 | import copy
 2 | 
 3 | import pandas as pd
 4 | from pytest import raises
 5 | from triad.collections.schema import Schema
 6 | 
 7 | from fugue.dataframe import ArrayDataFrame, DataFrame
 8 | from fugue.api import as_fugue_df, get_native_as_df
 9 | from fugue.bag.array_bag import ArrayBag
10 | 
11 | 
12 | def test_as_fugue_df():
13 |     with raises(NotImplementedError):
14 |         as_fugue_df(10)
15 |     with raises(TypeError):
16 |         as_fugue_df(ArrayBag([1, 2]))
17 |     df = pd.DataFrame([[0]], columns=["a"])
18 |     assert isinstance(as_fugue_df(df), DataFrame)
19 | 
20 | 
21 | def test_get_native_as_df():
22 |     with raises(NotImplementedError):
23 |         get_native_as_df(10)
24 |     # other tests are in the suites
25 | 
26 | 
27 | def test_show():
28 |     df = ArrayDataFrame(schema="a:str,b:str")
29 |     df.show()
30 | 
31 |     assert repr(df) == df._repr_html_()
32 | 
33 |     s = " ".join(["x"] * 2)
34 |     df = ArrayDataFrame([[s, 1], ["b", 2]], "a:str,b:str")
35 |     df.show()
36 | 
37 |     s = " ".join(["x"] * 200)
38 |     df = ArrayDataFrame([[s, 1], ["b", 2]], "a:str,b:str")
39 |     df.show()
40 | 
41 |     s = " ".join(["x"] * 200)
42 |     df = ArrayDataFrame([[s, 1], ["b", s]], "a:str,b:str")
43 |     df.show()
44 | 
45 |     s = "".join(["x"] * 2000)
46 |     df = ArrayDataFrame([[s, 1], ["b", None]], "a:str,b:str")
47 |     df.show()
48 | 
49 |     s = " ".join(["x"] * 20)
50 |     schema = [f"a{x}:str" for x in range(20)]
51 |     data = [[f"aasdfjasdfka;sdf{x}:str" for x in range(20)]]
52 |     df = ArrayDataFrame(data, schema)
53 |     df.show()
54 | 
55 |     s = " ".join(["x"] * 200)
56 |     df = ArrayDataFrame([[s, 1], ["b", "s"]], "a:str,b:str")
57 |     df.show(n=1, with_count=True, title="abc")
58 | 
59 | 
60 | def test_lazy_schema():
61 |     df = MockDF([["a", 1], ["b", 2]], "a:str,b:str")
62 |     assert callable(df._schema)
63 |     assert df.schema == "a:str,b:str"
64 | 
65 | 
66 | def test_get_info_str():
67 |     df = ArrayDataFrame([["a", 1], ["b", 2]], "a:str,b:str")
68 |     assert '{"schema": "a:str,b:str", "type": '
69 |     '"tests.collections.dataframe.test_dataframe.MockDF", "metadata": {}}' == df.get_info_str()
70 | 
71 | 
72 | def test_copy():
73 |     df = ArrayDataFrame([["a", 1], ["b", 2]], "a:str,b:str")
74 |     assert copy.copy(df) is df
75 |     assert copy.deepcopy(df) is df
76 | 
77 | 
78 | class MockDF(ArrayDataFrame):
79 |     def __init__(self, df=None, schema=None):
80 |         super().__init__(df=df, schema=schema)
81 |         DataFrame.__init__(self, lambda: Schema(schema))
82 | 


--------------------------------------------------------------------------------
/tests/fugue/dataframe/test_dataframes.py:
--------------------------------------------------------------------------------
 1 | from fugue.dataframe import DataFrames
 2 | from fugue.dataframe.array_dataframe import ArrayDataFrame
 3 | from fugue.dataframe.pandas_dataframe import PandasDataFrame
 4 | from pytest import raises
 5 | from triad.exceptions import InvalidOperationError
 6 | 
 7 | 
 8 | def test_dataframes():
 9 |     df1 = ArrayDataFrame([[0]], "a:int")
10 |     df2 = ArrayDataFrame([[1]], "a:int")
11 |     dfs = DataFrames(a=df1, b=df2)
12 |     assert dfs[0] is df1
13 |     assert dfs[1] is df2
14 | 
15 |     dfs = DataFrames([df1, df2], df1)
16 |     assert not dfs.has_key
17 |     assert dfs[0] is df1
18 |     assert dfs[1] is df2
19 |     assert dfs[2] is df1
20 | 
21 |     dfs2 = DataFrames(dfs, dfs, df2)
22 |     assert not dfs2.has_key
23 |     assert dfs2[0] is df1
24 |     assert dfs2[1] is df2
25 |     assert dfs2[2] is df1
26 |     assert dfs2[3] is df1
27 |     assert dfs2[4] is df2
28 |     assert dfs2[5] is df1
29 |     assert dfs2[6] is df2
30 | 
31 |     dfs = DataFrames([("a", df1), ("b", df2)])
32 |     assert dfs.has_key
33 |     assert dfs[0] is df1
34 |     assert dfs[1] is df2
35 |     assert dfs["a"] is df1
36 |     assert dfs["b"] is df2
37 | 
38 |     with raises(ValueError):
39 |         dfs["c"] = 1
40 | 
41 |     with raises(ValueError):
42 |         dfs2 = DataFrames(1)
43 | 
44 |     with raises(ValueError):
45 |         dfs2 = DataFrames(a=df1, b=2)
46 | 
47 |     with raises(InvalidOperationError):
48 |         dfs2 = DataFrames(dict(a=df1), df2)
49 | 
50 |     with raises(InvalidOperationError):
51 |         dfs2 = DataFrames(df2, dict(a=df1))
52 | 
53 |     with raises(InvalidOperationError):
54 |         dfs2 = DataFrames(df1, a=df2)
55 | 
56 |     with raises(InvalidOperationError):
57 |         dfs2 = DataFrames(DataFrames(df1, df2), x=df2)
58 | 
59 |     dfs2 = DataFrames(dfs)
60 |     assert dfs2.has_key
61 |     assert dfs2[0] is df1
62 |     assert dfs2[1] is df2
63 | 
64 |     dfs1 = DataFrames(a=df1, b=df2)
65 |     dfs2 = dfs1.convert(lambda x: PandasDataFrame(x.as_array(), x.schema))
66 |     assert len(dfs1) == len(dfs2)
67 |     assert dfs2.has_key
68 |     assert isinstance(dfs2["a"], PandasDataFrame)
69 |     assert isinstance(dfs2["b"], PandasDataFrame)
70 | 
71 |     dfs1 = DataFrames(df1, df2)
72 |     dfs2 = dfs1.convert(lambda x: PandasDataFrame(x.as_array(), x.schema))
73 |     assert len(dfs1) == len(dfs2)
74 |     assert not dfs2.has_key
75 |     assert isinstance(dfs2[0], PandasDataFrame)
76 |     assert isinstance(dfs2[1], PandasDataFrame)


--------------------------------------------------------------------------------
/tests/fugue/execution/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/tests/fugue/execution/test_api.py:
--------------------------------------------------------------------------------
 1 | from pytest import raises
 2 | 
 3 | import fugue.api as fa
 4 | from fugue import NativeExecutionEngine, register_global_conf
 5 | from fugue.exceptions import FugueInvalidOperation
 6 | 
 7 | 
 8 | class MyEngine(NativeExecutionEngine):
 9 |     def __init__(self, conf=None):
10 |         super().__init__(conf)
11 |         self.pre_enter_state = []
12 |         self.post_exit_state = []
13 |         self.stop_calls = 0
14 | 
15 |     def on_enter_context(self) -> None:
16 |         self.pre_enter_state += [self.in_context]
17 | 
18 |     def on_exit_context(self) -> None:
19 |         self.post_exit_state += [self.in_context]
20 | 
21 |     def stop_engine(self) -> None:
22 |         self.stop_calls += 1
23 | 
24 | 
25 | def test_engine_operations():
26 |     o = MyEngine()
27 |     assert fa.get_current_conf().get("fugue.x", 0) == 0
28 |     register_global_conf({"fugue.x": 1})
29 |     assert fa.get_current_conf().get("fugue.x", 0) == 1
30 |     e = fa.set_global_engine(o, {"fugue.x": 2})
31 |     assert e.pre_enter_state == [False]
32 |     assert e.post_exit_state == []
33 |     assert fa.get_current_conf().get("fugue.x", 0) == 2
34 |     assert isinstance(e, NativeExecutionEngine)
35 |     assert e.in_context and e.is_global
36 |     assert fa.get_context_engine() is e
37 |     with fa.engine_context("duckdb", {"fugue.x": 3}) as e2:
38 |         assert fa.get_current_conf().get("fugue.x", 0) == 3
39 |         assert fa.get_context_engine() is e2
40 |         assert not e2.is_global and e2.in_context
41 |         with e.as_context():
42 |             assert e.pre_enter_state == [False, True]
43 |             assert e.post_exit_state == []
44 |             assert fa.get_current_conf().get("fugue.x", 0) == 2
45 |             assert not e2.is_global and e2.in_context
46 |             assert e.in_context and e.is_global
47 |             assert fa.get_context_engine() is e
48 |             assert e.stop_calls == 0
49 |         assert e.pre_enter_state == [False, True]
50 |         assert e.post_exit_state == [True]
51 |         assert fa.get_current_conf().get("fugue.x", 0) == 3
52 |         assert e.in_context and e.is_global
53 |         assert fa.get_context_engine() is e2
54 |     assert e.stop_calls == 0
55 |     assert e.pre_enter_state == [False, True]
56 |     assert e.post_exit_state == [True]
57 |     assert fa.get_current_conf().get("fugue.x", 0) == 2
58 |     assert not e2.is_global and not e2.in_context
59 |     assert e.in_context and e.is_global
60 |     e3 = fa.set_global_engine("duckdb", {"fugue.x": 4})
61 |     assert e.stop_calls == 1
62 |     assert e.pre_enter_state == [False, True]
63 |     assert e.post_exit_state == [True, False]
64 |     assert fa.get_current_conf().get("fugue.x", 0) == 4
65 |     assert not e.in_context and not e.is_global
66 |     assert e3.in_context and e3.is_global
67 |     fa.clear_global_engine()
68 |     assert not e3.in_context and not e3.is_global
69 |     assert fa.get_current_conf().get("fugue.x", 0) == 1
70 |     raises(FugueInvalidOperation, lambda: fa.get_context_engine())
71 | 


--------------------------------------------------------------------------------
/tests/fugue/execution/test_execution_engine.py:
--------------------------------------------------------------------------------
 1 | from typing import Type
 2 | 
 3 | from pytest import raises
 4 | from triad.collections.dict import ParamDict
 5 | from triad.utils.convert import get_full_type_path
 6 | 
 7 | from fugue import ExecutionEngine, NativeExecutionEngine, register_global_conf
 8 | from fugue.constants import FUGUE_CONF_SQL_IGNORE_CASE
 9 | from fugue.rpc.base import NativeRPCServer
10 | from fugue_duckdb import DuckDBEngine
11 | 
12 | 
13 | class _MockSQLEngine(DuckDBEngine):
14 |     @property
15 |     def execution_engine_constraint(self) -> Type[ExecutionEngine]:
16 |         return _MockExecutionEngine
17 | 
18 | 
19 | class _MockExecutionEngine(NativeExecutionEngine):
20 |     def __init__(self, conf=None):
21 |         super().__init__(conf=conf)
22 |         self._stop = 0
23 | 
24 |     def stop_engine(self):
25 |         self._stop += 1
26 | 
27 |     def create_default_sql_engine(self):
28 |         return _MockSQLEngine(self)
29 | 
30 | 
31 | class _MockRPC(NativeRPCServer):
32 |     _start = 0
33 |     _stop = 0
34 | 
35 |     def __init__(self, conf):
36 |         super().__init__(conf)
37 |         _MockRPC._start = 0
38 |         _MockRPC._stop = 0
39 | 
40 |     def start_handler(self):
41 |         _MockRPC._start += 1
42 | 
43 |     def stop_handler(self):
44 |         _MockRPC._stop += 1
45 | 
46 | 
47 | def test_sql_engine_init():
48 |     engine = _MockExecutionEngine()
49 |     assert isinstance(engine.sql_engine, _MockSQLEngine)
50 | 
51 |     with raises(TypeError):
52 |         _MockSQLEngine(NativeExecutionEngine())
53 | 
54 | 
55 | def test_start_stop():
56 |     conf = {"fugue.rpc.server": get_full_type_path(_MockRPC)}
57 |     engine = _MockExecutionEngine(conf=conf)
58 |     engine.stop()
59 |     assert 1 == engine._stop
60 |     engine.stop()  # stop will be called only once
61 |     assert 1 == engine._stop
62 | 
63 | 
64 | def test_global_conf():
65 |     register_global_conf({"ftest.a": 1})
66 |     engine = _MockExecutionEngine()
67 |     assert 1 == engine.conf.get_or_throw("ftest.a", int)
68 |     engine = _MockExecutionEngine({"ftest.a": 2})
69 |     assert 2 == engine.conf.get_or_throw("ftest.a", int)
70 |     assert not engine.conf.get_or_throw(FUGUE_CONF_SQL_IGNORE_CASE, bool)
71 | 
72 |     # with duplicated value but it's the same as existing ones
73 |     register_global_conf({"ftest.a": 1, "ftest.b": 2}, on_dup=ParamDict.THROW)
74 |     engine = _MockExecutionEngine()
75 |     assert 1 == engine.conf.get_or_throw("ftest.a", int)
76 |     assert 2 == engine.conf.get_or_throw("ftest.b", int)
77 | 
78 |     # transactional, of one value has problem, the whole conf will not be added
79 |     with raises(ValueError):
80 |         register_global_conf({"ftest.a": 2, "ftest.c": 3}, on_dup=ParamDict.THROW)
81 |     assert 1 == engine.conf.get_or_throw("ftest.a", int)
82 |     assert 2 == engine.conf.get_or_throw("ftest.b", int)
83 |     assert "ftest.c" not in engine.conf
84 | 


--------------------------------------------------------------------------------
/tests/fugue/extensions/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fugue-project/fugue/596d28eea1834ecc9087b864b83e1b67b9748977/tests/fugue/extensions/__init__.py


--------------------------------------------------------------------------------
/tests/fugue/extensions/creator/__init__.py:
--------------------------------------------------------------------------------
 1 | from typing import Any, Dict, Iterable, List
 2 | 
 3 | from fugue.dataframe import ArrayDataFrame
 4 | from fugue.exceptions import FugueInterfacelessError
 5 | from fugue.extensions.transformer import Transformer, _to_transformer, transformer
 6 | from pytest import raises
 7 | from triad.collections.schema import Schema
 8 | 
 9 | 
10 | def test_transformer():
11 |     assert isinstance(t1, Transformer)
12 |     df = ArrayDataFrame([[0]], "a:int")
13 |     t1._output_schema = t1.get_output_schema(df)
14 |     assert t1.output_schema == "a:int,b:int"
15 |     t2._output_schema = t2.get_output_schema(df)
16 |     assert t2.output_schema == "b:int,a:int"
17 |     assert [[0, 1]] == list(t3(df.as_array_iterable()))
18 | 
19 | 
20 | def test__to_transformer():
21 |     a = _to_transformer(t1, None)
22 |     assert isinstance(a, Transformer)
23 |     a._x = 1
24 |     # every parse should produce a different transformer even the input is
25 |     # a transformer instance
26 |     b = _to_transformer(t1, None)
27 |     assert isinstance(b, Transformer)
28 |     assert "_x" not in b.__dict__
29 |     c = _to_transformer("t1", None)
30 |     assert isinstance(c, Transformer)
31 |     assert "_x" not in c.__dict__
32 |     c._x = 1
33 |     d = _to_transformer("t1", None)
34 |     assert isinstance(d, Transformer)
35 |     assert "_x" not in d.__dict__
36 |     raises(FugueInterfacelessError, lambda: _to_transformer(t4, None))
37 |     raises(FugueInterfacelessError, lambda: _to_transformer("t4", None))
38 |     e = _to_transformer("t4", "*,b:int")
39 |     assert isinstance(e, Transformer)
40 | 
41 | 
42 | @transformer(["*", None, "b:int"])
43 | def t1(df: Iterable[Dict[str, Any]]) -> Iterable[Dict[str, Any]]:
44 |     for r in df:
45 |         r["b"] = 1
46 |         yield r
47 | 
48 | 
49 | @transformer([Schema("b:int"), "*"])
50 | def t2(df: Iterable[Dict[str, Any]]) -> Iterable[Dict[str, Any]]:
51 |     for r in df:
52 |         r["b"] = 1
53 |         yield r
54 | 
55 | 
56 | @transformer("*, b:int")
57 | def t3(df: Iterable[List[Any]]) -> Iterable[List[Any]]:
58 |     for r in df:
59 |         r += [1]
60 |         yield r
61 | 
62 | 
63 | def t4(df: Iterable[List[Any]]) -> Iterable[List[Any]]:
64 |     for r in df:
65 |         r += [1]
66 |         yield r
67 | 


--------------------------------------------------------------------------------
/tests/fugue/extensions/outputter/__init__.py:
--------------------------------------------------------------------------------
 1 | from typing import Any, Dict, Iterable, List
 2 | 
 3 | from fugue.dataframe import ArrayDataFrame
 4 | from fugue.exceptions import FugueInterfacelessError
 5 | from fugue.extensions.transformer import Transformer, _to_transformer, transformer
 6 | from pytest import raises
 7 | from triad.collections.schema import Schema
 8 | 
 9 | 
10 | def test_transformer():
11 |     assert isinstance(t1, Transformer)
12 |     df = ArrayDataFrame([[0]], "a:int")
13 |     t1._output_schema = t1.get_output_schema(df)
14 |     assert t1.output_schema == "a:int,b:int"
15 |     t2._output_schema = t2.get_output_schema(df)
16 |     assert t2.output_schema == "b:int,a:int"
17 |     assert [[0, 1]] == list(t3(df.as_array_iterable()))
18 | 
19 | 
20 | def test__to_transformer():
21 |     a = _to_transformer(t1, None)
22 |     assert isinstance(a, Transformer)
23 |     a._x = 1
24 |     # every parse should produce a different transformer even the input is
25 |     # a transformer instance
26 |     b = _to_transformer(t1, None)
27 |     assert isinstance(b, Transformer)
28 |     assert "_x" not in b.__dict__
29 |     c = _to_transformer("t1", None)
30 |     assert isinstance(c, Transformer)
31 |     assert "_x" not in c.__dict__
32 |     c._x = 1
33 |     d = _to_transformer("t1", None)
34 |     assert isinstance(d, Transformer)
35 |     assert "_x" not in d.__dict__
36 |     raises(FugueInterfacelessError, lambda: _to_transformer(t4, None))
37 |     raises(FugueInterfacelessError, lambda: _to_transformer("t4", None))
38 |     e = _to_transformer("t4", "*,b:int")
39 |     assert isinstance(e, Transformer)
40 | 
41 | 
42 | @transformer(["*", None, "b:int"])
43 | def t1(df: Iterable[Dict[str, Any]]) -> Iterable[Dict[str, Any]]:
44 |     for r in df:
45 |         r["b"] = 1
46 |         yield r
47 | 
48 | 
49 | @transformer([Schema("b:int"), "*"])
50 | def t2(df: Iterable[Dict[str, Any]]) -> Iterable[Dict[str, Any]]:
51 |     for r in df:
52 |         r["b"] = 1
53 |         yield r
54 | 
55 | 
56 | @transformer("*, b:int")
57 | def t3(df: Iterable[List[Any]]) -> Iterable[List[Any]]:
58 |     for r in df:
59 |         r += [1]
60 |         yield r
61 | 
62 | 
63 | def t4(df: Iterable[List[Any]]) -> Iterable[List[Any]]:
64 |     for r in df:
65 |         r += [1]
66 |         yield r
67 | 


--------------------------------------------------------------------------------
/tests/fugue/extensions/processor/__init__.py:
--------------------------------------------------------------------------------
 1 | from typing import Any, Dict, Iterable, List
 2 | 
 3 | from fugue.dataframe import ArrayDataFrame
 4 | from fugue.exceptions import FugueInterfacelessError
 5 | from fugue.extensions.transformer import Transformer, _to_transformer, transformer
 6 | from pytest import raises
 7 | from triad.collections.schema import Schema
 8 | 
 9 | 
10 | def test_transformer():
11 |     assert isinstance(t1, Transformer)
12 |     df = ArrayDataFrame([[0]], "a:int")
13 |     t1._output_schema = t1.get_output_schema(df)
14 |     assert t1.output_schema == "a:int,b:int"
15 |     t2._output_schema = t2.get_output_schema(df)
16 |     assert t2.output_schema == "b:int,a:int"
17 |     assert [[0, 1]] == list(t3(df.as_array_iterable()))
18 | 
19 | 
20 | def test__to_transformer():
21 |     a = _to_transformer(t1, None)
22 |     assert isinstance(a, Transformer)
23 |     a._x = 1
24 |     # every parse should produce a different transformer even the input is
25 |     # a transformer instance
26 |     b = _to_transformer(t1, None)
27 |     assert isinstance(b, Transformer)
28 |     assert "_x" not in b.__dict__
29 |     c = _to_transformer("t1", None)
30 |     assert isinstance(c, Transformer)
31 |     assert "_x" not in c.__dict__
32 |     c._x = 1
33 |     d = _to_transformer("t1", None)
34 |     assert isinstance(d, Transformer)
35 |     assert "_x" not in d.__dict__
36 |     raises(FugueInterfacelessError, lambda: _to_transformer(t4, None))
37 |     raises(FugueInterfacelessError, lambda: _to_transformer("t4", None))
38 |     e = _to_transformer("t4", "*,b:int")
39 |     assert isinstance(e, Transformer)
40 | 
41 | 
42 | @transformer(["*", None, "b:int"])
43 | def t1(df: Iterable[Dict[str, Any]]) -> Iterable[Dict[str, Any]]:
44 |     for r in df:
45 |         r["b"] = 1
46 |         yield r
47 | 
48 | 
49 | @transformer([Schema("b:int"), "*"])
50 | def t2(df: Iterable[Dict[str, Any]]) -> Iterable[Dict[str, Any]]:
51 |     for r in df:
52 |         r["b"] = 1
53 |         yield r
54 | 
55 | 
56 | @transformer("*, b:int")
57 | def t3(df: Iterable[List[Any]]) -> Iterable[List[Any]]:
58 |     for r in df:
59 |         r += [1]
60 |         yield r
61 | 
62 | 
63 | def t4(df: Iterable[List[Any]]) -> Iterable[List[Any]]:
64 |     for r in df:
65 |         r += [1]
66 |         yield r
67 | 


--------------------------------------------------------------------------------
/tests/fugue/extensions/transformer/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fugue-project/fugue/596d28eea1834ecc9087b864b83e1b67b9748977/tests/fugue/extensions/transformer/__init__.py


--------------------------------------------------------------------------------
/tests/fugue/rpc/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fugue-project/fugue/596d28eea1834ecc9087b864b83e1b67b9748977/tests/fugue/rpc/__init__.py


--------------------------------------------------------------------------------
/tests/fugue/rpc/test_base.py:
--------------------------------------------------------------------------------
 1 | import pickle
 2 | 
 3 | from fugue.rpc import make_rpc_server, to_rpc_handler, RPCFunc, EmptyRPCHandler
 4 | from pytest import raises
 5 | from triad import ParamDict
 6 | 
 7 | 
 8 | def test_default_server():
 9 |     def k(value: str) -> str:
10 |         return value + "x"
11 | 
12 |     def kk(value: str) -> str:
13 |         return value + "xx"
14 | 
15 |     conf = {"x": "y"}
16 | 
17 |     with make_rpc_server(conf).start() as server:
18 |         assert "y" == server.conf["x"]
19 |         with server.start():  # recursive start will take no effect
20 |             client = server.make_client(k)
21 |         assert "dddx" == client("ddd")
22 |         client = server.make_client(kk)
23 |         assert "dddxx" == client("ddd")
24 |         server.stop()  # extra stop in the end will take no effect
25 | 
26 |     with raises(pickle.PicklingError):
27 |         pickle.dumps(client)
28 | 
29 |     with raises(pickle.PicklingError):
30 |         pickle.dumps(server)
31 | 
32 | 
33 | def test_server_handlers():
34 |     func = lambda x: x + "aa"
35 | 
36 |     class _Dict(RPCFunc):
37 |         def __init__(self, obj):
38 |             super().__init__(obj)
39 |             self.start_called = 0
40 |             self.stop_called = 0
41 | 
42 |         def start_handler(self):
43 |             self.start_called += 1
44 | 
45 |         def stop_handler(self):
46 |             self.stop_called += 1
47 | 
48 |     server = make_rpc_server({})
49 |     server.start()
50 |     d1 = _Dict(func)
51 |     c1 = server.make_client(d1)
52 |     assert "xaa" == c1("x")
53 |     assert 1 == d1.start_called
54 |     assert 0 == d1.stop_called
55 |     server.stop()
56 |     assert 1 == d1.start_called
57 |     assert 1 == d1.stop_called
58 | 
59 |     with server.start():
60 |         d2 = _Dict(func)
61 |         c1 = server.make_client(d2)
62 |         server.start()
63 |         assert "xaa" == c1("x")
64 |         assert 1 == d2.start_called
65 |         assert 0 == d2.stop_called
66 |         assert 1 == d1.start_called
67 |         assert 1 == d1.stop_called
68 |         server.stop()
69 |     assert 1 == d2.start_called
70 |     assert 1 == d2.stop_called
71 |     assert 1 == d1.start_called
72 |     assert 1 == d1.stop_called
73 | 
74 | 
75 | def test_to_rpc_handler():
76 |     assert isinstance(to_rpc_handler(None), EmptyRPCHandler)
77 |     assert isinstance(to_rpc_handler(lambda x: x), RPCFunc)
78 |     handler = to_rpc_handler(lambda x: x)
79 |     assert handler is to_rpc_handler(handler)
80 |     raises(ValueError, lambda: to_rpc_handler(1))
81 | 


--------------------------------------------------------------------------------
/tests/fugue/rpc/test_flask.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | pytest.importorskip("flask")
 4 | pytest.importorskip("jinja2")
 5 | from fugue.rpc import make_rpc_server
 6 | from triad import ParamDict
 7 | import cloudpickle
 8 | 
 9 | 
10 | def test_flask_service():
11 |     # fugue.rpc.flask.FlaskRPCServer
12 |     conf = ParamDict(
13 |         {
14 |             "fugue.rpc.server": "fugue.rpc.flask.FlaskRPCServer",
15 |             "fugue.rpc.flask_server.host": "127.0.0.1",
16 |             "fugue.rpc.flask_server.port": "1234",
17 |             "fugue.rpc.flask_server.timeout": "2 sec",
18 |         }
19 |     )
20 | 
21 |     def k(value: str) -> str:
22 |         return value + "x"
23 | 
24 |     def kk(a: int, b: int) -> int:
25 |         return a + b
26 | 
27 |     def kkk(f: callable, a: int) -> int:
28 |         return f(a)
29 | 
30 |     with make_rpc_server(conf).start() as server:
31 |         assert "1234" == server.conf["fugue.rpc.flask_server.port"]
32 |         with server.start():  # recursive start will take no effect
33 |             client1 = cloudpickle.loads(cloudpickle.dumps(server.make_client(k)))
34 |         assert "dddx" == client1("ddd")
35 |         client2 = cloudpickle.loads(cloudpickle.dumps(server.make_client(kk)))
36 |         assert 3 == client2(1, 2)
37 |         assert "dddx" == client1("ddd")
38 |         client3 = cloudpickle.loads(cloudpickle.dumps(server.make_client(kkk)))
39 |         assert 3 == client3(lambda x: x + 1, 2)
40 |         assert 3 == client2(1, 2)
41 |         server.stop()  # extra stop in the end will take no effect
42 | 


--------------------------------------------------------------------------------
/tests/fugue/rpc/test_func.py:
--------------------------------------------------------------------------------
 1 | from fugue.rpc import RPCFunc, to_rpc_handler
 2 | from pytest import raises
 3 | from triad import to_uuid
 4 | from copy import copy, deepcopy
 5 | 
 6 | 
 7 | def test_rpc_func():
 8 |     def f1(a: str) -> str:
 9 |         return "1"
10 | 
11 |     d1 = RPCFunc(f1)
12 |     d2 = to_rpc_handler(f1)
13 |     assert to_uuid(d1) == to_uuid(d2)
14 |     assert to_uuid(d1) == to_uuid(to_rpc_handler(d1))
15 |     assert "1" == d1("x")
16 |     with raises(ValueError):
17 |         RPCFunc(1)
18 | 
19 | 
20 | def test_determinism():
21 |     def _f1(a: str) -> str:
22 |         return "1"
23 | 
24 |     assert to_uuid(RPCFunc(_f1)) == to_uuid(to_rpc_handler(_f1))
25 |     assert to_uuid(RPCFunc(lambda x: x)) == to_uuid(RPCFunc(lambda x: x + 1))
26 | 
27 | 
28 | def test_no_copy():
29 |     class T(object):
30 |         def __init__(self):
31 |             self.n = 0
32 | 
33 |         def call(self, n: int) -> int:
34 |             self.n += n
35 |             return self.n
36 | 
37 |     t = T()
38 |     d1 = RPCFunc(t.call)
39 |     assert 10 == d1(10)
40 |     assert 10 == t.n
41 | 
42 |     d2 = to_rpc_handler(t.call)
43 |     d2(10)
44 | 
45 |     d3 = to_rpc_handler(d1)
46 |     d3(10)
47 |     assert 30 == t.n
48 | 
49 |     d4 = copy(d3)
50 |     d4(10)
51 | 
52 |     d5 = deepcopy(d4)
53 |     d5(10)
54 |     assert 50 == t.n
55 | 


--------------------------------------------------------------------------------
/tests/fugue/sql/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fugue-project/fugue/596d28eea1834ecc9087b864b83e1b67b9748977/tests/fugue/sql/__init__.py


--------------------------------------------------------------------------------
/tests/fugue/test/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fugue-project/fugue/596d28eea1834ecc9087b864b83e1b67b9748977/tests/fugue/test/__init__.py


--------------------------------------------------------------------------------
/tests/fugue/utils/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/tests/fugue/utils/test_interfaceless.py:
--------------------------------------------------------------------------------
 1 | from pytest import raises
 2 | 
 3 | from fugue._utils.interfaceless import (
 4 |     is_class_method,
 5 |     parse_comment_annotation,
 6 |     parse_output_schema_from_comment,
 7 | )
 8 | 
 9 | 
10 | def test_parse_comment_annotation():
11 |     def a():
12 |         pass
13 | 
14 |     # asdfasdf
15 |     def b():
16 |         pass
17 | 
18 |     # asdfasdf
19 |     # schema : s:int
20 |     # # # schema : a : int,b:str
21 |     # schema : a : str ,b:str
22 |     # asdfasdf
23 |     def c():
24 |         pass
25 | 
26 |     # schema:
27 |     def d():
28 |         pass
29 | 
30 |     assert parse_comment_annotation(a, "schema") is None
31 |     assert parse_comment_annotation(b, "schema") is None
32 |     assert "a : str ,b:str" == parse_comment_annotation(c, "schema")
33 |     assert "" == parse_comment_annotation(d, "schema")
34 | 
35 | 
36 | def test_parse_output_schema_from_comment():
37 |     def a():
38 |         pass
39 | 
40 |     # asdfasdf
41 |     def b():
42 |         pass
43 | 
44 |     # asdfasdf
45 |     # schema : s : int # more comment
46 |     # # # schema : a :  int,b:str
47 |     # asdfasdf
48 |     def c():
49 |         pass
50 | 
51 |     # schema:
52 |     def d():
53 |         pass
54 | 
55 |     assert parse_output_schema_from_comment(a) is None
56 |     assert parse_output_schema_from_comment(b) is None
57 |     assert "s:int" == parse_output_schema_from_comment(c).replace(" ", "")
58 |     raises(SyntaxError, lambda: parse_output_schema_from_comment(d))
59 | 
60 | 
61 | def test_is_class_method():
62 |     def f1():
63 |         pass
64 | 
65 |     class F(object):
66 |         def f2(self):
67 |             pass
68 | 
69 |     assert not is_class_method(f1)
70 |     assert is_class_method(F.f2)
71 |     assert not is_class_method(F().f2)
72 | 


--------------------------------------------------------------------------------
/tests/fugue/utils/test_misc.py:
--------------------------------------------------------------------------------
 1 | from fugue._utils.misc import get_attribute
 2 | from pytest import raises
 3 | 
 4 | 
 5 | def test_get_attribute():
 6 |     class C(object):
 7 |         pass
 8 | 
 9 |     c = C()
10 |     assert "x" not in c.__dict__
11 |     assert 0 == get_attribute(c, "x", int)
12 |     assert 0 == c.x
13 |     assert 0 == get_attribute(c, "x", int)
14 |     c.x = 10
15 |     assert 10 == get_attribute(c, "x", int)
16 |     raises(TypeError, lambda: get_attribute(c, "x", str))
17 | 


--------------------------------------------------------------------------------
/tests/fugue/workflow/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fugue-project/fugue/596d28eea1834ecc9087b864b83e1b67b9748977/tests/fugue/workflow/__init__.py


--------------------------------------------------------------------------------
/tests/fugue/workflow/test_runtime_exception.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | from fugue import FugueWorkflow
 3 | import sys
 4 | import traceback
 5 | from fugue.constants import (
 6 |     FUGUE_CONF_WORKFLOW_EXCEPTION_HIDE,
 7 |     FUGUE_CONF_WORKFLOW_EXCEPTION_OPTIMIZE,
 8 | )
 9 | 
10 | 
11 | def test_runtime_exception():
12 |     if sys.version_info < (3, 7):
13 |         return
14 | 
15 |     def tr(df: pd.DataFrame) -> pd.DataFrame:
16 |         raise Exception
17 | 
18 |     def show(df):
19 |         df.show()
20 | 
21 |     dag = FugueWorkflow()
22 |     df = dag.df([[0]], "a:int")
23 |     df = df.transform(tr, schema="*")
24 |     show(df)
25 | 
26 |     try:
27 |         dag.run()
28 |     except Exception:
29 |         assert len(traceback.extract_tb(sys.exc_info()[2])) < 10
30 | 
31 |     dag = FugueWorkflow({FUGUE_CONF_WORKFLOW_EXCEPTION_OPTIMIZE: False})
32 |     df = dag.df([[0]], "a:int")
33 |     df = df.transform(tr, schema="*")
34 |     show(df)
35 | 
36 |     try:
37 |         dag.run("native")
38 |     except Exception:
39 |         assert len(traceback.extract_tb(sys.exc_info()[2])) > 10
40 | 
41 |     dag = FugueWorkflow({FUGUE_CONF_WORKFLOW_EXCEPTION_HIDE: ""})
42 |     df = dag.df([[0]], "a:int")
43 |     df = df.transform(tr, schema="*")
44 |     show(df)
45 | 
46 |     try:
47 |         dag.run("native")
48 |     except Exception:
49 |         assert len(traceback.extract_tb(sys.exc_info()[2])) > 10
50 | 
51 | 
52 | def test_modified_exception():
53 |     if sys.version_info < (3, 7):
54 |         return
55 | 
56 |     def tr(df: pd.DataFrame) -> pd.DataFrame:
57 |         raise Exception
58 | 
59 |     def show(df):
60 |         df.show()
61 | 
62 |     def tt(df):
63 |         __modified_exception__ = NotImplementedError()
64 |         return df.transform(tr, schema="*")
65 | 
66 |     dag = FugueWorkflow()
67 |     df = dag.df([[0]], "a:int")
68 |     df = tt(df)
69 |     show(df)
70 | 
71 |     try:
72 |         dag.run()
73 |     except Exception as ex:
74 |         assert isinstance(ex.__cause__, NotImplementedError)
75 | 


--------------------------------------------------------------------------------
/tests/fugue/workflow/test_workflow_parallel.py:
--------------------------------------------------------------------------------
 1 | from fugue import FugueWorkflow, DataFrame, NativeExecutionEngine
 2 | from typing import List, Any
 3 | from time import sleep
 4 | from timeit import timeit
 5 | from pytest import raises
 6 | 
 7 | 
 8 | def test_parallel():
 9 |     dag = FugueWorkflow({"fugue.workflow.concurrency": 10})
10 |     dag.create(create).process(process).output(display)
11 |     dag.create(create).process(process).output(display)
12 | 
13 |     t = timeit(
14 |         lambda: dag.run(),
15 |         number=1,
16 |     )  # warmup
17 |     t = timeit(
18 |         lambda: dag.run(),
19 |         number=1,
20 |     )
21 |     assert t < 0.4
22 | 
23 | 
24 | def test_parallel_exception():
25 |     dag = FugueWorkflow({"fugue.workflow.concurrency": 2})
26 |     dag.create(create).process(process).process(process, params=dict(sec=0.5)).output(
27 |         display
28 |     )
29 |     dag.create(create_e).process(process).output(display)
30 | 
31 |     def run(dag, *args):
32 |         with raises(NotImplementedError):
33 |             dag.run(*args)
34 | 
35 |     t = timeit(
36 |         lambda: run(dag),
37 |         number=1,
38 |     )  # warmup
39 |     t = timeit(
40 |         lambda: run(dag),
41 |         number=1,
42 |     )
43 |     assert t < 0.5
44 | 
45 | 
46 | # schema: a:int
47 | def create(sec: float = 0.1) -> List[List[Any]]:
48 |     sleep(sec)
49 |     return [[0]]
50 | 
51 | 
52 | # schema: a:int
53 | def create_e(sec: float = 0.1) -> List[List[Any]]:
54 |     raise NotImplementedError
55 | 
56 | 
57 | def process(df: DataFrame, sec: float = 0.1) -> DataFrame:
58 |     sleep(sec)
59 |     return df
60 | 
61 | 
62 | def display(df: DataFrame, sec: float = 0.1) -> None:
63 |     sleep(sec)
64 |     df.show()
65 | 


--------------------------------------------------------------------------------
/tests/fugue_dask/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fugue-project/fugue/596d28eea1834ecc9087b864b83e1b67b9748977/tests/fugue_dask/__init__.py


--------------------------------------------------------------------------------
/tests/fugue_dask/test_importless.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from fugue import FugueWorkflow, fsql
 4 | import fugue.test as ft
 5 | 
 6 | @ft.with_backend("dask")
 7 | def test_importless(backend_context):
 8 |     pytest.importorskip("fugue_sql_antlr")
 9 |     for engine in ["dask", backend_context.session]:
10 |         dag = FugueWorkflow()
11 |         dag.df([[0]], "a:int").show()
12 | 
13 |         dag.run(engine)
14 | 
15 |         fsql(
16 |             """
17 |         CREATE [[0],[1]] SCHEMA a:int
18 |         SELECT * WHERE a<1
19 |         PRINT
20 |         """
21 |         ).run(engine)
22 | 
23 |         dag = FugueWorkflow()
24 | 
25 |         dag.run(engine)
26 | 


--------------------------------------------------------------------------------
/tests/fugue_dask/test_sql.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | pytest.importorskip("fugue_sql_antlr")
 4 | import dask.dataframe as dd
 5 | import pandas as pd
 6 | 
 7 | from fugue import FugueSQLWorkflow, register_execution_engine
 8 | from fugue_dask import DaskExecutionEngine
 9 | import fugue.test as ft
10 | 
11 | 
12 | @ft.with_backend("dask")
13 | def test_sql(backend_context):
14 |     register_execution_engine(
15 |         "da",
16 |         lambda conf, **kwargs: DaskExecutionEngine(
17 |             conf=conf, dask_client=backend_context.session
18 |         ),
19 |     )
20 |     df = dd.from_pandas(pd.DataFrame([[0], [1]], columns=["a"]), npartitions=2)
21 |     dag = FugueSQLWorkflow()
22 |     dag(
23 |         """
24 |     SELECT * FROM df WHERE a>0
25 |     PRINT
26 |     """,
27 |         df=df,
28 |     )
29 |     dag.run("da")
30 | 


--------------------------------------------------------------------------------
/tests/fugue_duckdb/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fugue-project/fugue/596d28eea1834ecc9087b864b83e1b67b9748977/tests/fugue_duckdb/__init__.py


--------------------------------------------------------------------------------
/tests/fugue_duckdb/test_importless.py:
--------------------------------------------------------------------------------
 1 | from fugue import FugueWorkflow
 2 | from fugue import fsql
 3 | 
 4 | 
 5 | def test_importless():
 6 |     for engine in ["duck", "duckdb"]:
 7 |         dag = FugueWorkflow()
 8 |         dag.df([[0]], "a:int").show()
 9 | 
10 |         dag.run(engine)
11 | 
12 |         fsql(
13 |             """
14 |         CREATE [[0],[1]] SCHEMA a:int
15 |         SELECT * WHERE a<1
16 |         PRINT
17 |         """
18 |         ).run(engine)
19 | 
20 |         dag = FugueWorkflow()
21 |         tdf = dag.df([[0], [1]], "a:int")
22 |         dag.select("SELECT * FROM ", tdf, " WHERE a<1", sql_engine=engine)
23 | 
24 |         dag.run()
25 | 


--------------------------------------------------------------------------------
/tests/fugue_ibis/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fugue-project/fugue/596d28eea1834ecc9087b864b83e1b67b9748977/tests/fugue_ibis/__init__.py


--------------------------------------------------------------------------------
/tests/fugue_ibis/mock/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fugue-project/fugue/596d28eea1834ecc9087b864b83e1b67b9748977/tests/fugue_ibis/mock/__init__.py


--------------------------------------------------------------------------------
/tests/fugue_ibis/mock/dataframe.py:
--------------------------------------------------------------------------------
 1 | from typing import Any
 2 | 
 3 | from fugue import ArrowDataFrame, DataFrame, LocalDataFrame
 4 | from fugue.plugins import as_fugue_dataset, as_local_bounded
 5 | from fugue_ibis import IbisDataFrame, IbisTable
 6 | 
 7 | 
 8 | class MockDuckDataFrame(IbisDataFrame):
 9 |     def to_sql(self) -> str:
10 |         return str(self.native.compile())
11 | 
12 |     def _to_new_df(self, table: IbisTable, schema: Any = None) -> DataFrame:
13 |         return MockDuckDataFrame(table, schema=schema)
14 | 
15 |     def _to_local_df(self, table: IbisTable, schema: Any = None) -> LocalDataFrame:
16 |         return ArrowDataFrame(table.execute(), schema=schema)
17 | 
18 |     def _to_iterable_df(self, table: IbisTable, schema: Any = None) -> LocalDataFrame:
19 |         return self._to_local_df(table, schema=schema)
20 | 
21 | 
22 | # should also check the df._findbackend is duckdb
23 | @as_fugue_dataset.candidate(lambda df, **kwargs: isinstance(df, IbisTable))
24 | def _ibis_as_fugue(df: IbisTable, **kwargs: Any) -> bool:
25 |     return MockDuckDataFrame(df, **kwargs)
26 | 
27 | 
28 | # should also check the df._findbackend is duckdb
29 | @as_local_bounded.candidate(lambda df, **kwargs: isinstance(df, IbisTable))
30 | def _ibis_as_local(df: IbisTable, **kwargs: Any) -> bool:
31 |     return df.execute()
32 | 


--------------------------------------------------------------------------------
/tests/fugue_ibis/mock/registry.py:
--------------------------------------------------------------------------------
 1 | from fugue.plugins import parse_execution_engine
 2 | from typing import Any
 3 | from .execution_engine import MockDuckExecutionEngine
 4 | 
 5 | 
 6 | @parse_execution_engine.candidate(
 7 |     lambda engine, conf, **kwargs: isinstance(engine, str) and engine == "mockibisduck"
 8 | )
 9 | def _parse_mockibisduck(
10 |     engine: str, conf: Any, **kwargs: Any
11 | ) -> MockDuckExecutionEngine:
12 |     return MockDuckExecutionEngine(conf=conf)
13 | 


--------------------------------------------------------------------------------
/tests/fugue_ibis/mock/tester.py:
--------------------------------------------------------------------------------
 1 | from contextlib import contextmanager
 2 | from typing import Any, Dict, Iterator
 3 | 
 4 | import pytest
 5 | 
 6 | import fugue.test as ft
 7 | from .registry import *  # noqa: F401, F403  # pylint: disable-all
 8 | 
 9 | 
10 | @ft.fugue_test_backend
11 | class _MockIbisDuckDBTestBackend(ft.FugueTestBackend):
12 |     name = "mockibisduck"
13 | 
14 |     @classmethod
15 |     @contextmanager
16 |     def session_context(cls, session_conf: Dict[str, Any]) -> Iterator[Any]:
17 |         yield "mockibisduck"
18 | 
19 | 
20 | @pytest.fixture(scope="module")
21 | def mockibisduck_session():
22 |     with _MockIbisDuckDBTestBackend.generate_session_fixture() as session:
23 |         yield session
24 | 


--------------------------------------------------------------------------------
/tests/fugue_ibis/test_dataframe.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | from datetime import datetime
 3 | from typing import Any
 4 | 
 5 | import pandas as pd
 6 | import pytest
 7 | 
 8 | import fugue.api as fe
 9 | import fugue.test as ft
10 | from fugue import ArrowDataFrame
11 | from fugue.exceptions import FugueDataFrameOperationError
12 | from fugue_test.dataframe_suite import DataFrameTests
13 | 
14 | from .mock.dataframe import MockDuckDataFrame
15 | from .mock.tester import mockibisduck_session  # noqa: F401  # pylint: disable-all
16 | from uuid import uuid4
17 | 
18 | 
19 | @ft.fugue_test_suite("mockibisduck", mark_test=True)
20 | class IbisDataFrameTests(DataFrameTests.Tests):
21 |     def df(self, data: Any = None, schema: Any = None) -> MockDuckDataFrame:
22 |         df = ArrowDataFrame(data, schema)
23 |         name = "_" + str(uuid4())[:5]
24 |         con = self.context.engine.sql_engine.backend
25 |         con.create_table(name, df.native, overwrite=True)
26 |         return MockDuckDataFrame(con.table(name), schema=schema)
27 | 
28 |     def test_init_df(self):
29 |         df = self.df([["x", 1]], "a:str,b:int")
30 |         df = MockDuckDataFrame(df.native, "a:str,b:long")
31 |         assert df.schema == "a:str,b:long"
32 | 
33 |     def test_is_local(self):
34 |         df = self.df([["x", 1]], "a:str,b:int")
35 |         assert not fe.is_local(df)
36 |         assert fe.is_bounded(df)
37 | 
38 |     def test_map_type(self):
39 |         pass
40 | 
41 |     def test_as_arrow(self):
42 |         # empty
43 |         df = self.df([], "a:int,b:int")
44 |         assert [] == list(ArrowDataFrame(df.as_arrow()).as_dict_iterable())
45 |         # pd.Nat
46 |         df = self.df([[pd.NaT, 1]], "a:datetime,b:int")
47 |         assert [dict(a=None, b=1)] == list(
48 |             ArrowDataFrame(df.as_arrow()).as_dict_iterable()
49 |         )
50 |         # pandas timestamps
51 |         df = self.df([[pd.Timestamp("2020-01-01"), 1]], "a:datetime,b:int")
52 |         assert [dict(a=datetime(2020, 1, 1), b=1)] == list(
53 |             ArrowDataFrame(df.as_arrow()).as_dict_iterable()
54 |         )
55 | 
56 |     def test_deep_nested_types(self):
57 |         pass
58 | 
59 |     def test_list_type(self):
60 |         pass
61 | 
62 |     def test_native_table(self):
63 |         df = self.df([["x", 1]], "a:str,b:int").native
64 |         assert fe.get_schema(fe.rename(df, dict())) == "a:str,b:int"
65 |         assert fe.get_schema(fe.rename(df, dict(a="c"))) == "c:str,b:int"
66 | 
67 |         with pytest.raises(Exception):
68 |             fe.rename(df, dict(a="b"))
69 | 
70 |         with pytest.raises(FugueDataFrameOperationError):
71 |             fe.rename(df, dict(x="y"))
72 | 
73 |         assert fe.get_schema(fe.drop_columns(df, [])) == "a:str,b:int"
74 |         assert fe.get_schema(fe.drop_columns(df, ["a"])) == "b:int"
75 | 
76 |         with pytest.raises(FugueDataFrameOperationError):
77 |             fe.get_schema(fe.drop_columns(df, ["a", "b"]))
78 | 
79 |         with pytest.raises(FugueDataFrameOperationError):
80 |             fe.get_schema(fe.drop_columns(df, ["a", "c"]))
81 | 


--------------------------------------------------------------------------------
/tests/fugue_ibis/test_execution_engine.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | 
 3 | import pytest
 4 | 
 5 | import fugue.test as ft
 6 | from fugue_test.builtin_suite import BuiltInTests
 7 | from fugue_test.execution_suite import ExecutionEngineTests
 8 | 
 9 | from .mock.tester import mockibisduck_session  # noqa: F401  # pylint: disable-all
10 | 
11 | 
12 | @ft.fugue_test_suite("mockibisduck", mark_test=True)
13 | class IbisExecutionEngineTests(ExecutionEngineTests.Tests):
14 |     def test_select(self):
15 |         # it can't work properly with DuckDB (hugeint is not recognized)
16 |         pass
17 | 
18 | 
19 | @ft.fugue_test_suite(("mockibisduck", {"fugue.force_is_ibis": True}), mark_test=True)
20 | class IbisExecutionEngineForceIbisTests(ExecutionEngineTests.Tests):
21 |     def test_properties(self):
22 |         assert not self.engine.is_distributed
23 |         assert not self.engine.map_engine.is_distributed
24 |         assert not self.engine.sql_engine.is_distributed
25 | 
26 |         assert self.engine.sql_engine.get_temp_table_name(
27 |         ) != self.engine.sql_engine.get_temp_table_name()
28 | 
29 |     def test_select(self):
30 |         # it can't work properly with DuckDB (hugeint is not recognized)
31 |         pass
32 | 
33 |     def test_get_parallelism(self):
34 |         assert self.engine.get_current_parallelism() == 1
35 | 
36 |     def test_union(self):
37 |         if sys.version_info >= (3, 9):
38 |             # ibis 3.8 support no longer works
39 |             return super().test_union()
40 | 
41 | 
42 | @ft.fugue_test_suite("mockibisduck", mark_test=True)
43 | class DuckBuiltInTests(BuiltInTests.Tests):
44 |     def test_df_select(self):
45 |         # it can't work properly with DuckDB (hugeint is not recognized)
46 |         pass
47 | 
48 | 
49 | @ft.fugue_test_suite(("mockibisduck", {"fugue.force_is_ibis": True}), mark_test=True)
50 | class DuckBuiltInForceIbisTests(BuiltInTests.Tests):
51 |     def test_df_select(self):
52 |         # it can't work properly with DuckDB (hugeint is not recognized)
53 |         pass
54 | 


--------------------------------------------------------------------------------
/tests/fugue_notebook/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fugue-project/fugue/596d28eea1834ecc9087b864b83e1b67b9748977/tests/fugue_notebook/__init__.py


--------------------------------------------------------------------------------
/tests/fugue_polars/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fugue-project/fugue/596d28eea1834ecc9087b864b83e1b67b9748977/tests/fugue_polars/__init__.py


--------------------------------------------------------------------------------
/tests/fugue_polars/test_api.py:
--------------------------------------------------------------------------------
 1 | import fugue.api as fa
 2 | import pandas as pd
 3 | import polars as pl
 4 | 
 5 | 
 6 | def test_to_df():
 7 |     df = pl.from_pandas(pd.DataFrame({"a": [0, 1]}))
 8 |     res = fa.fugue_sql("SELECT * FROM df", df=df, engine="duckdb")
 9 |     assert fa.as_array(res) == [[0], [1]]
10 | 
11 |     df2 = pl.from_pandas(pd.DataFrame({"a": [0]}))
12 |     res = fa.inner_join(df, df2, engine="duckdb")
13 |     assert fa.as_array(res) == [[0]]
14 | 


--------------------------------------------------------------------------------
/tests/fugue_ray/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fugue-project/fugue/596d28eea1834ecc9087b864b83e1b67b9748977/tests/fugue_ray/__init__.py


--------------------------------------------------------------------------------
/tests/fugue_ray/test_registry.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import ray.data as rd
 3 | 
 4 | import fugue.test as ft
 5 | from fugue import FugueWorkflow
 6 | from fugue_ray import RayExecutionEngine
 7 | 
 8 | 
 9 | @ft.with_backend("ray")
10 | def test_registry():
11 |     def creator() -> rd.Dataset:
12 |         return rd.from_pandas(pd.DataFrame(dict(a=[1, 2], b=["a", "b"])))
13 | 
14 |     def processor1(ctx: RayExecutionEngine, df: rd.Dataset) -> pd.DataFrame:
15 |         assert isinstance(ctx, RayExecutionEngine)
16 |         return df.to_pandas()
17 | 
18 |     def processor2(df: pd.DataFrame) -> rd.Dataset:
19 |         return rd.from_pandas(df)
20 | 
21 |     def outputter(df: rd.Dataset) -> None:
22 |         assert [[1, "a"], [2, "b"]] == df.to_pandas().values.tolist()
23 | 
24 |     dag = FugueWorkflow()
25 |     dag.create(creator).process(processor1).process(processor2).output(outputter)
26 | 
27 |     dag.run("ray")
28 | 


--------------------------------------------------------------------------------
/tests/fugue_ray/test_utils.py:
--------------------------------------------------------------------------------
 1 | from triad import Schema
 2 | 
 3 | import fugue.test as ft
 4 | from fugue_ray import RayDataFrame
 5 | from fugue_ray._utils.dataframe import add_partition_key
 6 | 
 7 | 
 8 | @ft.with_backend("ray")
 9 | def test_add_partition_key():
10 |     df = RayDataFrame([[0, "a"], [1, "b"]], "a:int,b:str")
11 |     res, s = add_partition_key(df.native, df.schema, ["b", "a"], output_key="x")
12 |     assert s == Schema("a:int,b:str,x:binary")
13 | 
14 |     res, s = add_partition_key(df.native, df.schema, ["b"], output_key="x")
15 |     assert s == "a:int,b:str,x:str"
16 |     assert RayDataFrame(res, s).as_array() == [[0, "a", "a"], [1, "b", "b"]]
17 | 


--------------------------------------------------------------------------------
/tests/fugue_spark/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fugue-project/fugue/596d28eea1834ecc9087b864b83e1b67b9748977/tests/fugue_spark/__init__.py


--------------------------------------------------------------------------------
/tests/fugue_spark/test_importless.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import pytest
 3 | from pyspark.sql import DataFrame, SparkSession
 4 | 
 5 | from fugue import FugueWorkflow, fsql, transform
 6 | from fugue_spark._utils.convert import to_pandas
 7 | from fugue_spark.registry import _is_sparksql
 8 | 
 9 | 
10 | def test_importless(spark_session):
11 |     pytest.importorskip("fugue_sql_antlr")
12 | 
13 |     for engine in [spark_session, "spark"]:
14 |         dag = FugueWorkflow()
15 |         dag.df([[0]], "a:int").show()
16 | 
17 |         dag.run(engine)
18 | 
19 |         fsql(
20 |             """
21 |         CREATE [[0],[1]] SCHEMA a:int
22 |         SELECT * WHERE a<1
23 |         PRINT
24 |         """
25 |         ).run(engine)
26 | 
27 | 
28 | def test_is_sparksql():
29 |     assert _is_sparksql(("sparksql", "abc"))
30 |     assert not _is_sparksql(123)
31 |     assert not _is_sparksql("SELECT *")
32 | 
33 | 
34 | def test_transform_from_sparksql(spark_session):
35 |     # schema: *
36 |     def t(df: pd.DataFrame) -> pd.DataFrame:
37 |         return df
38 | 
39 |     res = transform(("sparksql", "SELECT 1 AS a, 'b' AS aa"), t)
40 |     assert isinstance(res, DataFrame)  # engine inference
41 |     assert to_pandas(res).to_dict("records") == [{"a": 1, "aa": "b"}]
42 | 


--------------------------------------------------------------------------------
/tests/fugue_spark/test_spark_connect.py:
--------------------------------------------------------------------------------
 1 | import fugue.test as ft
 2 | 
 3 | from .test_dataframe import NativeSparkDataFrameTestsBase as _NativeDataFrameTests
 4 | from .test_dataframe import SparkDataFrameTestsBase as _DataFrameTests
 5 | from .test_execution_engine import _CONF
 6 | from .test_execution_engine import (
 7 |     SparkExecutionEngineBuiltInTestsBase as _WorkflowTests,
 8 | )
 9 | from .test_execution_engine import (
10 |     SparkExecutionEnginePandasUDFTestsBase as _EngineTests,
11 | )
12 | 
13 | 
14 | @ft.fugue_test_suite("sparkconnect", mark_test=True)
15 | class SparkConnectDataFrameTests(_DataFrameTests):
16 |     pass
17 | 
18 | 
19 | @ft.fugue_test_suite("sparkconnect", mark_test=True)
20 | class SparkConnectNativeDataFrameTests(_NativeDataFrameTests):
21 |     pass
22 | 
23 | 
24 | @ft.fugue_test_suite("sparkconnect", mark_test=True)
25 | class SparkConnectExecutionEngineTests(_EngineTests):
26 |     def test_using_pandas_udf(self):
27 |         return
28 | 
29 |     def test_map_with_dict_col(self):
30 |         return  # spark connect has a bug
31 | 
32 | 
33 | @ft.fugue_test_suite(("sparkconnect", _CONF), mark_test=True)
34 | class SparkConnectBuiltInTests(_WorkflowTests):
35 |     def test_annotation_3(self):
36 |         return  # RDD is not implemented in spark connect
37 | 
38 |     def test_repartition(self):
39 |         return  # spark connect doesn't support even repartitioning
40 | 
41 |     def test_repartition_large(self):
42 |         return  # spark connect doesn't support even repartitioning
43 | 


--------------------------------------------------------------------------------
/tests/fugue_spark/test_sql.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import pytest
 3 | from pyspark.sql import SparkSession
 4 | 
 5 | from fugue import FugueSQLWorkflow, register_execution_engine
 6 | from fugue_spark import SparkExecutionEngine
 7 | 
 8 | 
 9 | def test_sql(spark_session):
10 |     pytest.importorskip("fugue_sql_antlr")
11 |     register_execution_engine(
12 |         "_spark",
13 |         lambda conf, **kwargs: SparkExecutionEngine(
14 |             conf=conf, spark_session=spark_session
15 |         ),
16 |     )
17 |     df = spark_session.createDataFrame(pd.DataFrame([[0], [1]], columns=["a"]))
18 |     dag = FugueSQLWorkflow()
19 |     dag(
20 |         """
21 |     SELECT * FROM df WHERE a>0
22 |     PRINT
23 |     """,
24 |         df=df,
25 |     )
26 |     dag.run("_spark")
27 | 


--------------------------------------------------------------------------------
/tests/fugue_spark/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fugue-project/fugue/596d28eea1834ecc9087b864b83e1b67b9748977/tests/fugue_spark/utils/__init__.py


--------------------------------------------------------------------------------