├── altair_transform
    ├── tests
    │   ├── __init__.py
    │   ├── test_examples.py
    │   ├── test_driver.py
    │   ├── test_core.py
    │   ├── test_vegaexpr.py
    │   └── test_extract.py
    ├── utils
    │   ├── tests
    │   │   ├── __init__.py
    │   │   ├── test_evaljs.py
    │   │   ├── test_parser.py
    │   │   ├── test_data.py
    │   │   ├── test_timeunit.py
    │   │   └── _testcases.py
    │   ├── __init__.py
    │   ├── ast.py
    │   ├── data.py
    │   ├── timeunit.py
    │   ├── _evaljs.py
    │   ├── _parser.py
    │   └── _parser_Parser_parsetab.py
    ├── transform
    │   ├── tests
    │   │   ├── __init__.py
    │   │   ├── test_impute.py
    │   │   ├── test_fold.py
    │   │   ├── test_filter.py
    │   │   ├── test_quantile.py
    │   │   ├── test_pivot.py
    │   │   ├── test_flatten.py
    │   │   ├── test_window.py
    │   │   ├── test_lookup.py
    │   │   ├── test_timeunit.py
    │   │   ├── test_aggregate.py
    │   │   ├── test_bin.py
    │   │   ├── test_regression.py
    │   │   └── test_transform.py
    │   ├── __init__.py
    │   ├── timeunit.py
    │   ├── sample.py
    │   ├── calculate.py
    │   ├── visitor.py
    │   ├── pivot.py
    │   ├── joinaggregate.py
    │   ├── fold.py
    │   ├── flatten.py
    │   ├── quantile.py
    │   ├── bin.py
    │   ├── lookup.py
    │   ├── impute.py
    │   ├── aggregate.py
    │   ├── window.py
    │   ├── filter.py
    │   ├── vega_utils.py
    │   └── regression.py
    ├── __init__.py
    ├── conftest.py
    ├── core.py
    ├── driver.py
    └── extract.py
├── requirements_dev.txt
├── requirements_driver.txt
├── images
    ├── histogram.png
    └── random_walk.png
├── requirements.txt
├── pytest.ini
├── MANIFEST.in
├── setup.cfg
├── pyproject.toml
├── .travis.yml
├── Makefile
├── mypy.ini
├── CHANGES.md
├── .github
    └── workflows
    │   ├── lint.yml
    │   └── build.yml
├── RELEASING.md
├── LICENSE
├── .gitignore
├── setup.py
└── README.md


/altair_transform/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/altair_transform/utils/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/altair_transform/transform/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/requirements_dev.txt:
--------------------------------------------------------------------------------
1 | black
2 | flake8
3 | mypy
4 | pytest
5 | 


--------------------------------------------------------------------------------
/requirements_driver.txt:
--------------------------------------------------------------------------------
1 | altair_saver
2 | altair_viewer
3 | selenium


--------------------------------------------------------------------------------
/images/histogram.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/altair-viz/altair-transform/HEAD/images/histogram.png


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | ply
2 | altair>=4.0
3 | numpy
4 | pandas
5 | dataclasses; python_version < '3.7'
6 | 


--------------------------------------------------------------------------------
/images/random_walk.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/altair-viz/altair-transform/HEAD/images/random_walk.png


--------------------------------------------------------------------------------
/pytest.ini:
--------------------------------------------------------------------------------
1 | [pytest]
2 | filterwarnings =
3 |     error
4 |     ignore::DeprecationWarning
5 |     ignore::altair.utils.AltairDeprecationWarning


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include *.md
2 | include *.ini
3 | include Makefile
4 | include LICENSE
5 | include requirements.txt
6 | include requirements_dev.txt
7 | include requirements_driver.txt
8 | recursive-include altair_transform *.py *.md *.json
9 | 


--------------------------------------------------------------------------------
/altair_transform/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from ._parser import parser, Parser
2 | from ._evaljs import evaljs, undefined, JSRegex
3 | from .data import to_dataframe
4 | 
5 | __all__ = ["parser", "Parser", "evaljs", "to_dataframe", "undefined", "JSRegex"]
6 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
 1 | [flake8]
 2 | exclude = altair_transform/utils/_parser_Parser_parsetab.py
 3 | max-line-length = 88
 4 | ignore = E203, E266, E501, W503
 5 | max-complexity = 18
 6 | select = B,C,E,F,W,T4,B9
 7 | 
 8 | [metadata]
 9 | description-file = README.md
10 | license_file = LICENSE
11 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [tool.black]
 2 | line-length = 88
 3 | target-version = ['py36', 'py37', 'py38']
 4 | include = '\.pyi?$'
 5 | exclude = '''
 6 | /(
 7 |     \.eggs
 8 |   | \.git
 9 |   | \.hg
10 |   | \.mypy_cache
11 |   | \.tox
12 |   | \.venv
13 |   | _build
14 |   | build
15 |   | dist
16 | )/
17 | | altair_transform/utils/_parser_Parser_parsetab.py
18 | '''


--------------------------------------------------------------------------------
/altair_transform/transform/__init__.py:
--------------------------------------------------------------------------------
 1 | from .visitor import visit  # noqa: F401
 2 | 
 3 | # These submodules register appropriate visitors.
 4 | from . import (  # noqa: F401
 5 |     aggregate,
 6 |     bin,
 7 |     calculate,
 8 |     filter,
 9 |     flatten,
10 |     fold,
11 |     impute,
12 |     joinaggregate,
13 |     lookup,
14 |     pivot,
15 |     quantile,
16 |     regression,
17 |     sample,
18 |     timeunit,
19 |     window,
20 | )
21 | 


--------------------------------------------------------------------------------
/altair_transform/__init__.py:
--------------------------------------------------------------------------------
 1 | """Altair Transform
 2 | 
 3 | This module provides a Python implementation of Vega-Lite transforms.
 4 | The main function is the ``altair_transform.apply()`` function.
 5 | """
 6 | __version__ = "0.3.0.dev0"
 7 | __all__ = ["apply", "extract_data", "transform_chart", "extract_transform"]
 8 | 
 9 | from altair_transform.core import (
10 |     apply,
11 |     extract_data,
12 |     transform_chart,
13 |     extract_transform,
14 | )
15 | 


--------------------------------------------------------------------------------
/altair_transform/transform/timeunit.py:
--------------------------------------------------------------------------------
 1 | import altair as alt
 2 | import pandas as pd
 3 | from .visitor import visit
 4 | from ..utils.timeunit import compute_timeunit
 5 | 
 6 | 
 7 | @visit.register(alt.TimeUnitTransform)
 8 | def visit_timeunit(transform: alt.TimeUnitTransform, df: pd.DataFrame) -> pd.DataFrame:
 9 |     transform = transform.to_dict()
10 |     df[transform["as"]] = compute_timeunit(
11 |         df[transform["field"]], transform["timeUnit"]
12 |     )
13 |     return df
14 | 


--------------------------------------------------------------------------------
/altair_transform/transform/sample.py:
--------------------------------------------------------------------------------
 1 | import altair as alt
 2 | import numpy as np
 3 | import pandas as pd
 4 | from .visitor import visit
 5 | 
 6 | 
 7 | @visit.register(alt.SampleTransform)
 8 | def visit_sample(transform: alt.SampleTransform, df: pd.DataFrame) -> pd.DataFrame:
 9 |     transform = transform.to_dict()
10 |     sample = transform["sample"]
11 | 
12 |     if sample < df.shape[0]:
13 |         index = np.sort(np.random.permutation(df.shape[0])[:sample])
14 |         df = df.iloc[index]
15 |     return df
16 | 


--------------------------------------------------------------------------------
/altair_transform/transform/calculate.py:
--------------------------------------------------------------------------------
 1 | import altair as alt
 2 | import pandas as pd
 3 | from .visitor import visit
 4 | from ..vegaexpr import eval_vegajs
 5 | 
 6 | 
 7 | @visit.register(alt.CalculateTransform)
 8 | def visit_calculate(
 9 |     transform: alt.CalculateTransform, df: pd.DataFrame
10 | ) -> pd.DataFrame:
11 |     transform = transform.to_dict()
12 |     col = transform["as"]
13 |     calc = transform["calculate"]
14 |     df[col] = df.apply(lambda datum: eval_vegajs(calc, datum), axis=1)
15 |     return df
16 | 


--------------------------------------------------------------------------------
/altair_transform/conftest.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | import altair_transform.driver
 3 | 
 4 | 
 5 | @pytest.fixture(scope="session")
 6 | def driver():
 7 |     try:
 8 |         from altair_saver import SeleniumSaver
 9 |     except (ImportError, ModuleNotFoundError):
10 |         pytest.skip("altair_saver not importable; cannot run driver tests.")
11 |     if not SeleniumSaver.enabled():
12 |         pytest.skip("selenium not properly configured; cannot run driver tests.")
13 |     yield altair_transform.driver
14 |     SeleniumSaver._stop_serving()
15 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: python
 2 | 
 3 | matrix:
 4 |   include:
 5 |     - python: 3.6
 6 |     - python: 3.7
 7 |     - python: 3.8
 8 | 
 9 | env:
10 |   global:
11 |     - TEST_DIR=/tmp/_altair_transform/
12 | 
13 | before_install:
14 |   - pip install pip --upgrade;
15 |   - pip install -r requirements_dev.txt
16 |   - mkdir -p $TEST_DIR
17 | 
18 | install:
19 |   - pip install .;
20 | 
21 | script:
22 |   - black --check .
23 |   - python -m flake8 altair_transform
24 |   - python -m mypy altair_transform
25 |   - cd $TEST_DIR && python -m pytest --pyargs --doctest-modules altair_transform;


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | test :
 2 | 	black .
 3 | 	python -m flake8 altair_transform
 4 | 	python -m mypy altair_transform
 5 | 	rm -r build
 6 | 	python setup.py build &&\
 7 | 	  cd build/lib &&\
 8 | 	  python -m pytest --pyargs --doctest-modules altair_transform
 9 | 
10 | test-coverage:
11 | 	python setup.py build &&\
12 | 	  cd build/lib &&\
13 | 	  python -m pytest --pyargs --doctest-modules --cov=altair_transform --cov-report term altair_transform
14 | 
15 | test-coverage-html:
16 | 	python setup.py build &&\
17 | 	  cd build/lib &&\
18 | 	  python -m pytest --pyargs --doctest-modules --cov=altair_transform --cov-report html altair_transform
19 | 


--------------------------------------------------------------------------------
/mypy.ini:
--------------------------------------------------------------------------------
 1 | [mypy]
 2 | python_version = 3.7
 3 | 
 4 | [mypy-altair_transform.utils._parser_Parser_parsetab]
 5 | ignore_errors = True
 6 | 
 7 | [mypy-altair.*]
 8 | ignore_missing_imports = True
 9 | 
10 | [mypy-altair_saver.*]
11 | ignore_missing_imports = True
12 | 
13 | [mypy-altair_viewer.*]
14 | ignore_missing_imports = True
15 | 
16 | [mypy-numpy.*]
17 | ignore_missing_imports = True
18 | 
19 | [mypy-pandas.*]
20 | ignore_missing_imports = True
21 | 
22 | [mypy-ply.*]
23 | ignore_missing_imports = True
24 | 
25 | [mypy-pytest.*]
26 | ignore_missing_imports = True
27 | 
28 | [mypy-scipy.*]
29 | ignore_missing_imports = True
30 | 
31 | [mypy-selenium.*]
32 | ignore_missing_imports = True
33 | 


--------------------------------------------------------------------------------
/altair_transform/transform/visitor.py:
--------------------------------------------------------------------------------
 1 | from functools import singledispatch
 2 | from typing import Any
 3 | 
 4 | import altair as alt
 5 | import pandas as pd
 6 | 
 7 | 
 8 | @singledispatch
 9 | def visit(transform: Any, df: pd.DataFrame) -> pd.DataFrame:
10 |     raise NotImplementedError(f"transform of type {type(transform)}")
11 | 
12 | 
13 | @visit.register(list)
14 | def visit_list(transform: list, df: pd.DataFrame) -> pd.DataFrame:
15 |     for t in transform:
16 |         df = visit(t, df)
17 |     return df
18 | 
19 | 
20 | @visit.register(dict)
21 | def visit_dict(transform: dict, df: pd.DataFrame) -> pd.DataFrame:
22 |     transform = alt.Transform.from_dict(transform)
23 |     return visit(transform, df)
24 | 


--------------------------------------------------------------------------------
/CHANGES.md:
--------------------------------------------------------------------------------
 1 | # Change Log
 2 | 
 3 | ## Version 0.3 (unreleased)
 4 | 
 5 | ## Version 0.2 (released 2019-12-03)
 6 | 
 7 | ### Enhancements
 8 | 
 9 | - vegaexpr: support date functions () string functions (#21, #22), stats functions (#24), array functions (#26, #27), regex functions (#28)
10 | - support sequence generators (#25)
11 | - Support full set of bin options (#14)
12 | - Support extraction of implicit transforms from encodings (#10, #12, #13, #15, #29, #33)
13 | 
14 | 
15 | ### Bug Fixes
16 | 
17 | - Fix issue with fold transform (#5)
18 | 
19 | ### Maintenance
20 | 
21 | - Compatibility with Python 3.6 & 3.8 (#16 & #17)
22 | - Format package with [black](https://black.readthedocs.io/) (#11)
23 | 
24 | ## Version 0.1 (released 2019-07-18)
25 | 
26 | Initial release, supporting most transforms from Altair 3.X


--------------------------------------------------------------------------------
/altair_transform/utils/tests/test_evaljs.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from altair_transform.utils import evaljs, parser
 4 | from ._testcases import extract
 5 | from ._testcases import EXPRESSIONS, JSONLY_EXPRESSIONS, NAMES
 6 | 
 7 | 
 8 | @pytest.fixture
 9 | def names():
10 |     return NAMES
11 | 
12 | 
13 | @pytest.mark.parametrize("expression", extract(EXPRESSIONS))
14 | def test_expressions(expression, names):
15 |     assert eval(expression, names) == evaljs(expression, names)
16 | 
17 | 
18 | @pytest.mark.parametrize("expression,output", JSONLY_EXPRESSIONS)
19 | def test_jsonly_expressions(expression, output, names):
20 |     assert evaljs(expression, names) == output
21 | 
22 | 
23 | def test_string_vs_ast():
24 |     expression = "2 * (3 + 4)"
25 |     parsed = parser.parse(expression)
26 |     assert evaljs(expression) == evaljs(parsed)
27 | 


--------------------------------------------------------------------------------
/altair_transform/transform/pivot.py:
--------------------------------------------------------------------------------
 1 | import altair as alt
 2 | import pandas as pd
 3 | from .visitor import visit
 4 | from .aggregate import AGG_REPLACEMENTS
 5 | 
 6 | 
 7 | @visit.register(alt.PivotTransform)
 8 | def visit_pivot(transform: alt.PivotTransform, df: pd.DataFrame) -> pd.DataFrame:
 9 |     transform = transform.to_dict()
10 |     pivot = transform["pivot"]
11 |     limit = transform.get("limit")
12 |     if limit:
13 |         vals = sorted(df[pivot].unique())[:limit]
14 |         df = df[df[pivot].isin(vals)]
15 |     groupby = transform.get("groupby")
16 |     agg = transform.get("op", "sum")
17 |     agg = AGG_REPLACEMENTS.get(agg, agg)
18 |     out = df.pivot_table(
19 |         columns=pivot, values=transform["value"], index=groupby, aggfunc=agg,
20 |     ).reset_index(drop=not groupby)
21 |     out.columns.names = [None]
22 |     return out
23 | 


--------------------------------------------------------------------------------
/.github/workflows/lint.yml:
--------------------------------------------------------------------------------
 1 | name: lint
 2 | 
 3 | on: [push, pull_request]
 4 | 
 5 | jobs:
 6 |   build:
 7 |     runs-on: ubuntu-latest
 8 |     name: flake8-black-mypy
 9 |     steps:
10 |     - uses: actions/checkout@v1
11 |     - name: Set up Python 3.8
12 |       uses: actions/setup-python@v1
13 |       with:
14 |         python-version: 3.8
15 |     - name: Lint with flake8
16 |       run: |
17 |         pip install flake8
18 |         # stop the build if there are Python syntax errors or undefined names
19 |         flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
20 |         # exit-zero treats all errors as warnings.
21 |         flake8 . --count --exit-zero --max-complexity=10 --statistics
22 |     - name: Check formatting with black
23 |       run: |
24 |         pip install black
25 |         black --check .
26 |     - name: Check types with mypy
27 |       run: |
28 |         pip install mypy
29 |         mypy altair_transform
30 | 


--------------------------------------------------------------------------------
/altair_transform/utils/tests/test_parser.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from altair_transform.utils import ast, Parser
 4 | from ._testcases import extract
 5 | from ._testcases import EXPRESSIONS, BAD_EXPRESSIONS, JSONLY_EXPRESSIONS
 6 | 
 7 | 
 8 | @pytest.fixture
 9 | def parser():
10 |     return Parser()
11 | 
12 | 
13 | @pytest.mark.parametrize("bad_expression", extract(BAD_EXPRESSIONS))
14 | def test_bad_expressions(bad_expression, parser):
15 |     with pytest.raises(ValueError):
16 |         parser.parse(bad_expression)
17 | 
18 | 
19 | @pytest.mark.parametrize("expression", extract(EXPRESSIONS))
20 | def test_expressions(expression, parser):
21 |     output = parser.parse(expression)
22 |     assert isinstance(output, ast.Node)
23 | 
24 | 
25 | @pytest.mark.parametrize("expression,output", JSONLY_EXPRESSIONS)
26 | def test_jsonly_expressions(expression, output, parser):
27 |     output = parser.parse(expression)
28 |     assert isinstance(output, ast.Node)
29 | 


--------------------------------------------------------------------------------
/altair_transform/tests/test_examples.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import re
 3 | 
 4 | import pytest
 5 | 
 6 | 
 7 | @pytest.fixture
 8 | def readme():
 9 |     possible_paths = [
10 |         # Path within built distributions:
11 |         os.path.join(os.path.dirname(__file__), "README.md"),
12 |         # Path within source tree:
13 |         os.path.join(os.path.dirname(__file__), "..", "..", "README.md"),
14 |     ]
15 | 
16 |     for path in possible_paths:
17 |         if os.path.exists(path):
18 |             with open(path) as f:
19 |                 return f.read()
20 | 
21 |     raise ValueError("README file not found.")
22 | 
23 | 
24 | def test_readme_snippets(readme):
25 |     """Tests the code snippets from the package README."""
26 |     regex = re.compile("```python\n(.*?)\n```", re.MULTILINE | re.DOTALL)
27 | 
28 |     codeblocks = regex.findall(readme)
29 |     assert len(codeblocks) > 0
30 | 
31 |     namespace = {}
32 |     for codeblock in codeblocks:
33 |         exec(codeblock, namespace)
34 | 


--------------------------------------------------------------------------------
/altair_transform/transform/joinaggregate.py:
--------------------------------------------------------------------------------
 1 | import altair as alt
 2 | import pandas as pd
 3 | from .visitor import visit
 4 | from .aggregate import AGG_REPLACEMENTS
 5 | 
 6 | 
 7 | @visit.register(alt.JoinAggregateTransform)
 8 | def visit_joinaggregate(
 9 |     transform: alt.JoinAggregateTransform, df: pd.DataFrame
10 | ) -> pd.DataFrame:
11 |     transform = transform.to_dict()
12 |     groupby = transform.get("groupby")
13 |     for aggregate in transform["joinaggregate"]:
14 |         op = aggregate["op"]
15 |         field = aggregate["field"]
16 |         col = aggregate["as"]
17 | 
18 |         op = AGG_REPLACEMENTS.get(op, op)
19 |         if field == "*" and field not in df.columns:
20 |             field = df.columns[0]
21 | 
22 |         if groupby is None:
23 |             df[col] = df[field].aggregate(op)
24 |         else:
25 |             result = df.groupby(groupby)[field].aggregate(op)
26 |             result.name = col
27 |             df = df.join(result, on=groupby)
28 |     return df
29 | 


--------------------------------------------------------------------------------
/.github/workflows/build.yml:
--------------------------------------------------------------------------------
 1 | name: build
 2 | 
 3 | on: [push, pull_request]
 4 | 
 5 | jobs:
 6 |   build:
 7 |     runs-on: ubuntu-latest
 8 |     strategy:
 9 |       matrix:
10 |         python-version: [ '3.6', '3.7', '3.8' ]
11 |     name: Python ${{ matrix.python-version }}
12 |     steps:
13 |     - uses: actions/checkout@v1
14 |     - name: Set up Python ${{ matrix.python-version }}
15 |       uses: actions/setup-python@v1
16 |       with:
17 |         python-version: ${{ matrix.python-version }}
18 |     - name: Set Up Chromedriver
19 |       run: |
20 |         sudo apt-get update
21 |         sudo apt-get --only-upgrade install google-chrome-stable
22 |         sudo apt-get -yqq install chromium-chromedriver
23 |     - name: Install dependencies
24 |       run: |
25 |         python -m pip install --upgrade pip
26 |         pip install -r requirements.txt -r requirements_driver.txt
27 |     - name: Test with pytest
28 |       run: |
29 |         pip install pytest
30 |         pytest --doctest-modules altair_transform
31 |       


--------------------------------------------------------------------------------
/altair_transform/transform/fold.py:
--------------------------------------------------------------------------------
 1 | import altair as alt
 2 | import pandas as pd
 3 | from .visitor import visit
 4 | 
 5 | 
 6 | @visit.register(alt.FoldTransform)
 7 | def visit_fold(transform: alt.FoldTransform, df: pd.DataFrame) -> pd.DataFrame:
 8 |     transform = transform.to_dict()
 9 |     fold = transform["fold"]
10 |     var_name, value_name = transform.get("as", ("key", "value"))
11 |     value_vars = [c for c in df.columns if c in fold]
12 |     id_vars = [c for c in df.columns if c not in fold]
13 | 
14 |     # Add an index to track input order
15 |     dfi = df.reset_index(drop=True).reset_index()
16 |     index_name = dfi.columns[0]
17 |     melted = dfi.melt(
18 |         id_vars=[index_name] + id_vars,
19 |         value_vars=value_vars,
20 |         var_name=var_name,
21 |         value_name=value_name,
22 |     )
23 |     return (
24 |         pd.merge(melted, dfi, on=[index_name] + id_vars, how="left")
25 |         .sort_values(index_name)
26 |         .drop(index_name, axis=1)
27 |         .reset_index(drop=True)
28 |     )
29 | 


--------------------------------------------------------------------------------
/altair_transform/transform/flatten.py:
--------------------------------------------------------------------------------
 1 | import altair as alt
 2 | import pandas as pd
 3 | from .visitor import visit
 4 | 
 5 | 
 6 | @visit.register(alt.FlattenTransform)
 7 | def visit_flatten(transform: alt.FlattenTransform, df: pd.DataFrame) -> pd.DataFrame:
 8 |     transform = transform.to_dict()
 9 | 
10 |     fields = transform["flatten"]
11 |     out = transform.get("as", [])
12 | 
13 |     if len(out) < len(fields):
14 |         out = out + fields[len(out) :]
15 |     if len(out) > len(fields):
16 |         out = out[: len(fields)]
17 | 
18 |     if not fields:
19 |         return df
20 | 
21 |     to_flatten = df[fields]
22 |     others = df[[c for c in df.columns if c not in out]]
23 | 
24 |     def flatten_row(row):
25 |         flattened = to_flatten.iloc[row].apply(pd.Series).T
26 |         flattened.index = flattened.shape[0] * [row]
27 |         return flattened
28 | 
29 |     flattened = pd.concat([flatten_row(i) for i in range(df.shape[0])], axis=0)
30 |     flattened.columns = out
31 | 
32 |     return flattened.join(others).reset_index(drop=True)
33 | 


--------------------------------------------------------------------------------
/altair_transform/transform/quantile.py:
--------------------------------------------------------------------------------
 1 | import altair as alt
 2 | import numpy as np
 3 | import pandas as pd
 4 | from .visitor import visit
 5 | 
 6 | 
 7 | @visit.register(alt.QuantileTransform)
 8 | def visit_quantile(transform: alt.QuantileTransform, df: pd.DataFrame) -> pd.DataFrame:
 9 |     transform = transform.to_dict()
10 |     quantile = transform["quantile"]
11 |     groupby = transform.get("groupby")
12 |     pname, vname = transform.get("as", ["prob", "value"])
13 |     probs = transform.get("probs")
14 |     if probs is None:
15 |         step = transform.get("step", 0.01)
16 |         probs = np.arange(0.5 * step, 1.0, step)
17 | 
18 |     def qq(s: pd.Series) -> pd.DataFrame:
19 |         return pd.DataFrame({pname: probs, vname: np.quantile(s, probs)})
20 | 
21 |     if groupby:
22 |         return (
23 |             df.groupby(groupby)[quantile]
24 |             .apply(qq)
25 |             .reset_index(groupby)
26 |             .reset_index(drop=True)
27 |         )
28 | 
29 |     else:
30 |         return qq(df[quantile]).reset_index(drop=True)
31 | 


--------------------------------------------------------------------------------
/RELEASING.md:
--------------------------------------------------------------------------------
 1 | 1. Update version to, e.g. 1.0.0
 2 | 
 3 |    - in altair_transform/__init__.py
 4 | 
 5 | 2. Make sure CHANGES.md is up to date for the release
 6 | 
 7 | 3. Commit change and push to master
 8 | 
 9 |        git add . -u
10 |        git commit -m "MAINT: bump version to 1.0.0"
11 |        git push origin master
12 | 
13 | 4. Tag the release:
14 | 
15 |        git tag -a v1.0.0 -m "version 1.0.0 release"
16 |        git push origin v1.0.0
17 | 
18 | 5. Build source & wheel distributions
19 | 
20 |        rm -r dist build  # clean old builds & distributions
21 |        python setup.py sdist  # create a source distribution
22 |        python setup.py bdist_wheel  # create a universal wheel
23 | 
24 | 6. publish to PyPI (Requires correct PyPI owner permissions)
25 | 
26 |        twine upload dist/*
27 | 
28 | 7. update version to, e.g. 1.1.0dev
29 | 
30 |    - in altair_transform/__init__.py
31 | 
32 | 8. add a new changelog entry for the unreleased version
33 | 
34 | 9. Commit change and push to master
35 | 
36 |        git add . -u
37 |        git commit -m "MAINT: bump version to 1.1.0dev"
38 |        git push origin master
39 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2019 Jake Vanderplas
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/altair_transform/tests/test_driver.py:
--------------------------------------------------------------------------------
 1 | import altair as alt
 2 | import pandas as pd
 3 | from pandas.testing import assert_frame_equal
 4 | 
 5 | # Note: driver fixture here comes from conftest.py
 6 | # These tests will be skipped if selenium driver is not available.
 7 | 
 8 | 
 9 | def test_extract_data_source(driver):
10 |     df = pd.DataFrame({"x": [1, 2, 3], "y": ["A", "B", "C"]})
11 |     chart = alt.Chart(df).mark_point()
12 |     with alt.data_transformers.enable(consolidate_datasets=False):
13 |         spec = chart.to_dict()
14 |     df_out = driver._extract_data(spec, "source_0")
15 |     assert_frame_equal(df, df_out)
16 | 
17 | 
18 | def test_driver_apply(driver):
19 |     df = pd.DataFrame({"x": [1, 2, 3]})
20 |     transform = {"calculate": "2 * datum.x", "as": "y"}
21 |     df_out = driver.apply(df, transform)
22 | 
23 |     df["y"] = 2 * df["x"]
24 |     assert_frame_equal(df, df_out)
25 | 
26 | 
27 | def test_utc_tz_offset(driver):
28 |     # Test that UTC dates have zero offset between Python and Javascript.
29 |     assert driver.get_tz_offset("UTC") == pd.Timedelta(0)
30 | 
31 | 
32 | def test_tz_code(driver):
33 |     code = driver.get_tz_code()
34 |     pd.to_datetime("2012-01-01").tz_localize(code)
35 | 


--------------------------------------------------------------------------------
/altair_transform/transform/tests/test_impute.py:
--------------------------------------------------------------------------------
 1 | import altair as alt
 2 | import numpy as np
 3 | from numpy.testing import assert_equal
 4 | import pandas as pd
 5 | import pytest
 6 | 
 7 | import altair_transform
 8 | 
 9 | 
10 | @pytest.mark.parametrize("method", ["value", "mean", "median", "max", "min"])
11 | def test_impute_transform_no_groupby(method: str) -> None:
12 |     data = pd.DataFrame({"x": [1, 2], "y": [2, 3]})
13 |     transform = alt.ImputeTransform(
14 |         impute="y", key="x", keyvals={"start": 0, "stop": 5}, value=0, method=method
15 |     )
16 |     if method == "value":
17 |         value = 0
18 |     else:
19 |         value = data.y.agg(method)
20 |     imputed = altair_transform.apply(data, transform)
21 | 
22 |     assert_equal(imputed.x.values, range(5))
23 |     assert_equal(imputed.y[[1, 2]].values, data.y.values)
24 |     assert_equal(imputed.y[[0, 3, 4]].values, value)
25 | 
26 | 
27 | def test_impute_transform_with_groupby() -> None:
28 |     data = pd.DataFrame(
29 |         {"x": [1, 2, 4, 1, 3, 4], "y": [1, 2, 4, 2, 4, 5], "cat": list("AAABBB")}
30 |     )
31 | 
32 |     transform = alt.ImputeTransform(impute="y", key="x", method="max", groupby=["cat"])
33 | 
34 |     imputed = altair_transform.apply(data, transform)
35 |     assert_equal(imputed.x.values, np.tile(range(1, 5), 2))
36 |     assert_equal(imputed.y.values, [1, 2, 4, 4, 2, 5, 4, 5])
37 | 


--------------------------------------------------------------------------------
/altair_transform/utils/ast.py:
--------------------------------------------------------------------------------
 1 | """Abstract syntax tree for parser"""
 2 | from dataclasses import dataclass
 3 | import typing
 4 | 
 5 | 
 6 | class Node:
 7 |     pass
 8 | 
 9 | 
10 | @dataclass
11 | class Expr(Node):
12 |     value: Node
13 | 
14 | 
15 | @dataclass
16 | class BinOp(Node):
17 |     op: str
18 |     lhs: Expr
19 |     rhs: Expr
20 | 
21 | 
22 | @dataclass
23 | class UnOp(Node):
24 |     op: str
25 |     rhs: Expr
26 | 
27 | 
28 | @dataclass
29 | class TernOp(Node):
30 |     op: typing.Tuple[str, str]
31 |     lhs: Expr
32 |     mid: Expr
33 |     rhs: Expr
34 | 
35 | 
36 | @dataclass
37 | class Number(Node):
38 |     value: float
39 | 
40 | 
41 | @dataclass
42 | class String(Node):
43 |     value: str
44 | 
45 | 
46 | @dataclass
47 | class Regex(Node):
48 |     value: typing.Dict[str, str]
49 | 
50 | 
51 | @dataclass
52 | class Global(Node):
53 |     name: str
54 | 
55 | 
56 | @dataclass
57 | class Name(Node):
58 |     name: str
59 | 
60 | 
61 | @dataclass
62 | class List(Node):
63 |     entries: typing.List[Expr]
64 | 
65 | 
66 | @dataclass
67 | class Object(Node):
68 |     entries: typing.List[typing.Union[Name, typing.Tuple[Expr, Expr]]]
69 | 
70 | 
71 | @dataclass
72 | class Attr(Node):
73 |     obj: Expr
74 |     attr: Name
75 | 
76 | 
77 | @dataclass
78 | class Item(Node):
79 |     obj: Expr
80 |     item: Expr
81 | 
82 | 
83 | @dataclass
84 | class Func(Node):
85 |     func: Expr
86 |     args: typing.List[Expr]
87 | 


--------------------------------------------------------------------------------
/altair_transform/transform/bin.py:
--------------------------------------------------------------------------------
 1 | """Implementation of the bin transform."""
 2 | from typing import Tuple
 3 | 
 4 | import altair as alt
 5 | import pandas as pd
 6 | import numpy as np
 7 | 
 8 | from .visitor import visit
 9 | from .vega_utils import calculate_bins
10 | 
11 | 
12 | def _cut(series: pd.Series, edges: np.ndarray) -> Tuple[pd.Series, pd.Series]:
13 |     """Like pd.cut(), but include outliers in the outer bins."""
14 |     bins = pd.cut(series, edges, labels=False, right=False)
15 |     out_of_range = (series < edges[0]) | (series > edges[-1])
16 |     bins[out_of_range] = -1
17 |     bins = bins.astype(int)
18 |     bins1 = pd.Series(edges[bins.values], index=bins.index, dtype=float)
19 |     bins2 = pd.Series(edges[bins.values + 1], index=bins.index, dtype=float)
20 |     bins1[out_of_range] = np.nan
21 |     bins2[out_of_range] = np.nan
22 |     return bins1, bins2
23 | 
24 | 
25 | @visit.register(alt.BinTransform)
26 | def visit_bin(transform: alt.BinTransform, df: pd.DataFrame) -> pd.DataFrame:
27 |     transform_dct: dict = transform.to_dict()
28 |     col = transform_dct["as"]
29 |     bin_ = {} if transform_dct["bin"] is True else transform_dct["bin"]
30 |     field = transform_dct["field"]
31 | 
32 |     bin_.setdefault("extent", [df[field].min(), df[field].max()])
33 |     bins = calculate_bins(**bin_)
34 | 
35 |     if isinstance(col, str):
36 |         df[col], df[col + "_end"] = _cut(df[field], bins)
37 |     else:
38 |         df[col[0]], df[col[1]] = _cut(df[field], bins)
39 | 
40 |     return df
41 | 


--------------------------------------------------------------------------------
/altair_transform/transform/lookup.py:
--------------------------------------------------------------------------------
 1 | from typing import Union
 2 | 
 3 | import altair as alt
 4 | import pandas as pd
 5 | from .visitor import visit
 6 | from ..utils import to_dataframe
 7 | 
 8 | 
 9 | @visit.register(alt.LookupTransform)
10 | def visit_lookup(transform: alt.LookupTransform, df: pd.DataFrame) -> pd.DataFrame:
11 |     with alt.data_transformers.enable(consolidate_datasets=False):
12 |         transform = transform.to_dict()
13 |     lookup_data = transform["from"]
14 |     data = lookup_data["data"]
15 |     key = lookup_data["key"]
16 |     fields = lookup_data["fields"]
17 | 
18 |     other_df = to_dataframe(data)
19 |     if fields is alt.Undefined:
20 |         fields = list(other_df.columns)
21 | 
22 |     cols_to_use = fields
23 |     if key not in fields:
24 |         cols_to_use = fields + [key]
25 |     else:
26 |         cols_to_use = fields
27 |     other_df = other_df[cols_to_use]
28 | 
29 |     lookup = transform["lookup"]
30 |     default = transform.get("default")
31 | 
32 |     # TODO: use as_ if fields are not specified
33 |     indicator: Union[str, bool]
34 |     if default is None:
35 |         indicator = False
36 |     else:
37 |         # TODO: make sure this doesn't conflict
38 |         indicator = "__merge_indicator"
39 | 
40 |     # TODO: how to handle conficting fields?
41 |     merged = pd.merge(
42 |         df, other_df, left_on=lookup, right_on=key, how="left", indicator=indicator
43 |     )
44 | 
45 |     if key != lookup and key not in fields:
46 |         merged = merged.drop(key, axis=1)
47 |     if indicator:
48 |         merged.loc[merged[indicator] == "left_only", fields] = default
49 |         merged = merged.drop(indicator, axis=1)
50 |     return merged
51 | 


--------------------------------------------------------------------------------
/altair_transform/utils/data.py:
--------------------------------------------------------------------------------
 1 | from typing import Union, Optional
 2 | 
 3 | import altair as alt
 4 | import numpy as np
 5 | import pandas as pd
 6 | 
 7 | DataType = Union[dict, pd.DataFrame, alt.SchemaBase]
 8 | ChartType = Union[dict, alt.SchemaBase]
 9 | 
10 | 
11 | def to_dataframe(data: DataType, context: Optional[ChartType] = None) -> pd.DataFrame:
12 |     if isinstance(data, pd.DataFrame):
13 |         return data
14 | 
15 |     if not isinstance(data, dict):
16 |         data = data.to_dict()
17 | 
18 |     if "values" in data:
19 |         return pd.DataFrame(data["values"])
20 | 
21 |     if "url" in data:
22 |         url = data["url"]
23 |         fmt = data.get("format", url.split(".")[-1])
24 |         if fmt == "csv":
25 |             return pd.read_csv(url)
26 |         elif fmt == "json":
27 |             return pd.read_json(url)
28 |         else:
29 |             raise ValueError(f"Unknown format for UrlData: '{fmt}'")
30 | 
31 |     if "name" in data:
32 |         name = data["name"]
33 |         if context is None:
34 |             raise ValueError("NamedData not supported.")
35 |         if isinstance(context, dict):
36 |             datasets = context.get("datasets", {})
37 |         else:
38 |             datasets = context._get("datasets", {})
39 |         if name not in datasets:
40 |             raise ValueError(f"dataset '{name}' not specified in chart.")
41 |         return pd.DataFrame(datasets[name])
42 | 
43 |     if "sequence" in data:
44 |         start = data["sequence"]["start"]
45 |         stop = data["sequence"]["stop"]
46 |         step = data["sequence"].get("step", 1)
47 |         name = data["sequence"].get("as", "data")
48 |         return pd.DataFrame({name: np.arange(start, stop, step)})
49 | 
50 |     data = alt.Data.from_dict(data)
51 |     raise NotImplementedError(f"Data of type {type(data)}")
52 | 


--------------------------------------------------------------------------------
/altair_transform/transform/tests/test_fold.py:
--------------------------------------------------------------------------------
 1 | from typing import Any, Dict, List, Optional
 2 | 
 3 | import numpy as np
 4 | import pandas as pd
 5 | from pandas.testing import assert_frame_equal
 6 | import pytest
 7 | 
 8 | import altair_transform
 9 | 
10 | 
11 | @pytest.fixture
12 | def data() -> pd.DataFrame:
13 |     return pd.DataFrame({"x": [1, 2, 2], "y1": ["A", "B", "C"], "y2": ["D", "E", "F"]})
14 | 
15 | 
16 | @pytest.mark.parametrize("as_", (None, ["name", "val"]))
17 | def test_fold_transform(data, as_: Optional[List[str]]):
18 |     if as_ is None:
19 |         out = altair_transform.apply(data, {"fold": ["y1", "y2"]})
20 |         as_ = ["key", "value"]
21 |     else:
22 |         out = altair_transform.apply(data, {"fold": ["y1", "y2"], "as": as_})
23 | 
24 |     expected = pd.DataFrame(
25 |         {
26 |             "x": np.repeat(data["x"], 2),
27 |             as_[0]: 3 * ["y1", "y2"],
28 |             as_[1]: np.ravel((data["y1"], data["y2"]), "F"),
29 |             "y1": np.repeat(data["y1"], 2),
30 |             "y2": np.repeat(data["y2"], 2),
31 |         }
32 |     ).reset_index(drop=True)
33 |     assert_frame_equal(out, expected)
34 | 
35 | 
36 | @pytest.mark.parametrize("fold", [["y1"], ["y1", "y2"]])
37 | @pytest.mark.parametrize("as_", [None, ["name", "val"]])
38 | def test_fold_against_js(
39 |     driver, data: pd.DataFrame, fold: List[str], as_: Optional[str]
40 | ) -> None:
41 |     transform: Dict[str, Any] = {"fold": fold}
42 |     if as_ is not None:
43 |         transform["as"] = as_
44 | 
45 |     got = altair_transform.apply(data, transform)
46 |     want = driver.apply(data, transform)
47 | 
48 |     assert_frame_equal(
49 |         got[sorted(got.columns)],
50 |         want[sorted(want.columns)],
51 |         check_dtype=False,
52 |         check_index_type=False,
53 |         check_less_precise=True,
54 |     )
55 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | *.egg-info/
 24 | .installed.cfg
 25 | *.egg
 26 | MANIFEST
 27 | 
 28 | # PyInstaller
 29 | #  Usually these files are written by a python script from a template
 30 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 31 | *.manifest
 32 | *.spec
 33 | 
 34 | # Installer logs
 35 | pip-log.txt
 36 | pip-delete-this-directory.txt
 37 | 
 38 | # Unit test / coverage reports
 39 | htmlcov/
 40 | .tox/
 41 | .coverage
 42 | .coverage.*
 43 | .cache
 44 | nosetests.xml
 45 | coverage.xml
 46 | *.cover
 47 | .hypothesis/
 48 | .pytest_cache/
 49 | 
 50 | # Translations
 51 | *.mo
 52 | *.pot
 53 | 
 54 | # Django stuff:
 55 | *.log
 56 | local_settings.py
 57 | db.sqlite3
 58 | 
 59 | # Flask stuff:
 60 | instance/
 61 | .webassets-cache
 62 | 
 63 | # Scrapy stuff:
 64 | .scrapy
 65 | 
 66 | # Sphinx documentation
 67 | docs/_build/
 68 | 
 69 | # PyBuilder
 70 | target/
 71 | 
 72 | # Jupyter Notebook
 73 | .ipynb_checkpoints
 74 | 
 75 | # pyenv
 76 | .python-version
 77 | 
 78 | # celery beat schedule file
 79 | celerybeat-schedule
 80 | 
 81 | # SageMath parsed files
 82 | *.sage.py
 83 | 
 84 | # Environments
 85 | .env
 86 | .venv
 87 | env/
 88 | venv/
 89 | ENV/
 90 | env.bak/
 91 | venv.bak/
 92 | 
 93 | # Spyder project settings
 94 | .spyderproject
 95 | .spyproject
 96 | 
 97 | # Rope project settings
 98 | .ropeproject
 99 | 
100 | # mkdocs documentation
101 | /site
102 | 
103 | # mypy
104 | .mypy_cache/
105 | 
106 | # ply files
107 | altair_transform/parser.out
108 | 
109 | # emacs
110 | *~
111 | 
112 | # jupyter
113 | Untitled*.ipynb


--------------------------------------------------------------------------------
/altair_transform/transform/impute.py:
--------------------------------------------------------------------------------
 1 | import altair as alt
 2 | import numpy as np
 3 | import pandas as pd
 4 | from .visitor import visit
 5 | 
 6 | 
 7 | @visit.register(alt.ImputeTransform)
 8 | def visit_impute(transform: alt.ImputeTransform, df: pd.DataFrame) -> pd.DataFrame:
 9 |     transform = transform.to_dict()
10 | 
11 |     field = transform["impute"]
12 |     key = transform["key"]
13 | 
14 |     frame = transform.get("frame", None)
15 |     if frame:
16 |         raise NotImplementedError("Impute Transform frame argument.")
17 | 
18 |     # Keyvals are the values at which the field is imputed.
19 |     keyvals = transform.get("keyvals", [])
20 |     if isinstance(keyvals, dict):
21 |         start = keyvals.get("start", 0)
22 |         stop = keyvals["stop"]
23 |         step = keyvals.get("step", np.sign(stop - start))
24 |         keyvals = np.arange(start, stop, step)
25 |     keyvals = np.sort(np.unique(np.concatenate([keyvals, df[key].values])))
26 |     keyvals = pd.Series(keyvals, name=key)
27 | 
28 |     groupby = transform.get("groupby", [])
29 | 
30 |     method = transform.get("method", "value")
31 |     value = transform.get("value", None)
32 |     if "method" not in transform and "value" not in transform:
33 |         raise ValueError("Must specify either method or value.")
34 |     if method == "value" and "value" not in transform:
35 |         raise ValueError("For method='value', must supply a value argument.")
36 | 
37 |     def _impute(group):
38 |         imputed = pd.merge(keyvals, group, on=key, how="left")
39 |         if method == "value":
40 |             fill = value
41 |         else:
42 |             fill = group[field].agg(method)
43 |         imputed[field].fillna(fill, inplace=True)
44 |         for col in groupby:
45 |             imputed[col].fillna(group[col].iloc[0], inplace=True)
46 |         return imputed
47 | 
48 |     if groupby:
49 |         imputed = df.groupby(groupby).apply(_impute).reset_index(drop=True)
50 |     else:
51 |         imputed = _impute(df)
52 | 
53 |     return imputed
54 | 


--------------------------------------------------------------------------------
/altair_transform/tests/test_core.py:
--------------------------------------------------------------------------------
 1 | import altair as alt
 2 | from altair_transform import extract_data, transform_chart
 3 | import numpy as np
 4 | import pandas as pd
 5 | import pytest
 6 | 
 7 | 
 8 | @pytest.fixture
 9 | def data():
10 |     rand = np.random.RandomState(42)
11 |     return pd.DataFrame(
12 |         {
13 |             "x": rand.randint(0, 100, 12),
14 |             "y": rand.randint(0, 100, 12),
15 |             "t": pd.date_range("2012-01-15", freq="M", periods=12),
16 |             "i": range(12),
17 |             "c": list("AAABBBCCCDDD"),
18 |         }
19 |     )
20 | 
21 | 
22 | @pytest.fixture
23 | def chart(data):
24 |     return (
25 |         alt.Chart(data)
26 |         .transform_calculate(xpy="datum.x + datum.y", xmy="datum.x - datum.y")
27 |         .mark_point()
28 |         .encode(x="xpy:Q", y="xmy:Q")
29 |     )
30 | 
31 | 
32 | def test_extract_data(data, chart):
33 |     out1 = extract_data(chart)
34 |     out2 = data.copy()
35 |     out2["xpy"] = data.x + data.y
36 |     out2["xmy"] = data.x - data.y
37 |     assert out1.equals(out2)
38 | 
39 | 
40 | def test_transform_chart(data, chart):
41 |     original_chart = chart.copy()
42 |     data_out = extract_data(chart)
43 |     chart_out = transform_chart(chart)
44 | 
45 |     # Original chart not modified
46 |     assert original_chart == chart
47 | 
48 |     # Transform applied to output chart
49 |     assert chart_out.data.equals(data_out)
50 |     assert chart_out.transform is alt.Undefined
51 |     assert chart.mark == chart_out.mark
52 |     assert chart.encoding == chart_out.encoding
53 | 
54 | 
55 | def test_transform_chart_with_aggregate():
56 |     data = pd.DataFrame({"x": list("AABBBCCCC")})
57 |     chart = alt.Chart(data).mark_bar().encode(x="x:N", y="count():Q")
58 |     chart_out = transform_chart(chart)
59 |     assert chart_out.data.equals(pd.DataFrame({"x": list("ABC"), "__count": [2, 3, 4]}))
60 |     assert chart_out.encoding.to_dict() == {
61 |         "x": {"field": "x", "type": "nominal"},
62 |         "y": {"field": "__count", "type": "quantitative", "title": "Count of Records"},
63 |     }
64 | 


--------------------------------------------------------------------------------
/altair_transform/utils/tests/test_data.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import tempfile
 3 | 
 4 | import pytest
 5 | 
 6 | import altair as alt
 7 | from altair_transform.utils import to_dataframe
 8 | 
 9 | 
10 | @pytest.fixture
11 | def df():
12 |     return pd.DataFrame({"x": [1, 2, 3], "y": ["A", "B", "C"]})
13 | 
14 | 
15 | @pytest.fixture
16 | def csv_data(df):
17 |     with tempfile.NamedTemporaryFile("w+", suffix=".csv") as f:
18 |         df.to_csv(f.name, index=False)
19 |         yield {"url": f.name}
20 | 
21 | 
22 | @pytest.fixture
23 | def json_data(df):
24 |     with tempfile.NamedTemporaryFile("w+", suffix=".json") as f:
25 |         df.to_json(f.name, orient="records")
26 |         yield {"url": f.name}
27 | 
28 | 
29 | @pytest.fixture
30 | def inline_data(df):
31 |     return {"values": df.to_dict(orient="records")}
32 | 
33 | 
34 | @pytest.fixture
35 | def named_data(df):
36 |     return {"name": "my-dataset"}
37 | 
38 | 
39 | @pytest.fixture
40 | def sequence_data(df):
41 |     return {"sequence": {"start": 1, "stop": 4, "as": "x"}}
42 | 
43 | 
44 | @pytest.fixture
45 | def chart(named_data, inline_data):
46 |     return alt.Chart(
47 |         data=named_data,
48 |         mark="bar",
49 |         datasets={named_data["name"]: inline_data["values"]},
50 |     )
51 | 
52 | 
53 | @pytest.mark.parametrize("data_type", [dict, alt.Data])
54 | def test_csv_to_dataframe(df, csv_data, data_type):
55 |     data = data_type(csv_data)
56 |     assert df.equals(to_dataframe(data))
57 | 
58 | 
59 | @pytest.mark.parametrize("data_type", [dict, alt.Data])
60 | def test_json_to_dataframe(df, json_data, data_type):
61 |     data = data_type(json_data)
62 |     assert df.equals(to_dataframe(data))
63 | 
64 | 
65 | @pytest.mark.parametrize("data_type", [dict, alt.Data])
66 | def test_inline_to_dataframe(df, inline_data, data_type):
67 |     data = data_type(inline_data)
68 |     assert df.equals(to_dataframe(data))
69 | 
70 | 
71 | @pytest.mark.parametrize("data_type", [dict, alt.Data])
72 | def test_named_to_dataframe(df, chart, named_data, data_type):
73 |     data = data_type(named_data)
74 |     assert df.equals(to_dataframe(data, context=chart))
75 | 
76 | 
77 | @pytest.mark.parametrize("data_type", [dict, alt.Data])
78 | def test_sequence_to_dataframe(df, sequence_data, data_type):
79 |     data = data_type(sequence_data)
80 |     assert df[["x"]].equals(to_dataframe(data))
81 | 


--------------------------------------------------------------------------------
/altair_transform/transform/tests/test_filter.py:
--------------------------------------------------------------------------------
 1 | from typing import Any, Callable, Dict, List, Tuple, Union
 2 | 
 3 | import numpy as np
 4 | import pandas as pd
 5 | from pandas.testing import assert_frame_equal
 6 | import pytest
 7 | 
 8 | import altair_transform
 9 | 
10 | 
11 | @pytest.fixture
12 | def data() -> pd.DataFrame:
13 |     rand = np.random.RandomState(42)
14 |     return pd.DataFrame(
15 |         {
16 |             "x": rand.randint(0, 100, 12),
17 |             "y": rand.randint(0, 100, 12),
18 |             "i": range(12),
19 |             "c": list("AAABBBCCCDDD"),
20 |         }
21 |     )
22 | 
23 | 
24 | FILTER_PREDICATES: List[
25 |     Tuple[Union[str, Dict[str, Any]], Callable[[pd.DataFrame], pd.DataFrame]]
26 | ] = [
27 |     ("datum.x < datum.y", lambda df: df[df.x < df.y]),
28 |     ({"not": "datum.i < 5"}, lambda df: df[~(df.i < 5)]),
29 |     (
30 |         {"and": [{"field": "x", "lt": 50}, {"field": "i", "gte": 2}]},
31 |         lambda df: df[(df.x < 50) & (df.i >= 2)],
32 |     ),
33 |     (
34 |         {"or": [{"field": "y", "gt": 50}, {"field": "i", "lte": 4}]},
35 |         lambda df: df[(df.y > 50) | (df.i <= 4)],
36 |     ),
37 |     ({"field": "c", "oneOf": ["A", "B"]}, lambda df: df[df.c.isin(["A", "B"])]),
38 |     ({"field": "x", "range": [30, 60]}, lambda df: df[(df.x >= 30) & (df.x <= 60)]),
39 |     ({"field": "c", "equal": "B"}, lambda df: df[df.c == "B"]),
40 | ]
41 | 
42 | 
43 | @pytest.mark.parametrize("filter,calc", FILTER_PREDICATES)
44 | def test_filter_transform(
45 |     data: pd.DataFrame,
46 |     filter: Union[str, Dict[str, Any]],
47 |     calc: Callable[[pd.DataFrame], pd.DataFrame],
48 | ):
49 |     out1 = altair_transform.apply(data, {"filter": filter})
50 |     out2 = calc(data).reset_index(drop=True)
51 |     assert_frame_equal(out1, out2)
52 | 
53 | 
54 | @pytest.mark.parametrize("filter,_", FILTER_PREDICATES)
55 | def test_filter_against_js(
56 |     driver,
57 |     data: pd.DataFrame,
58 |     filter: Union[str, Dict[str, Any]],
59 |     _: Callable[[pd.DataFrame], pd.DataFrame],
60 | ) -> None:
61 |     transform = {"filter": filter}
62 |     got = altair_transform.apply(data, transform)
63 |     want = driver.apply(data, transform)
64 | 
65 |     assert_frame_equal(
66 |         got[sorted(got.columns)],
67 |         want[sorted(want.columns)],
68 |         check_dtype=False,
69 |         check_index_type=False,
70 |         check_less_precise=True,
71 |     )
72 | 


--------------------------------------------------------------------------------
/altair_transform/transform/aggregate.py:
--------------------------------------------------------------------------------
 1 | import altair as alt
 2 | import numpy as np
 3 | import pandas as pd
 4 | from .visitor import visit
 5 | 
 6 | 
 7 | @visit.register(alt.AggregateTransform)
 8 | def visit_aggregate(
 9 |     transform: alt.AggregateTransform, df: pd.DataFrame
10 | ) -> pd.DataFrame:
11 |     transform = transform.to_dict()
12 |     groupby = transform.get("groupby", [])
13 |     agg_cols = {}
14 |     for aggregate in transform["aggregate"]:
15 |         op = aggregate["op"]
16 |         col = aggregate["as"]
17 |         field = aggregate.get("field", df.columns[0])
18 | 
19 |         if op == "argmin":
20 | 
21 |             def op(col, df=df):
22 |                 return df.loc[col.idxmin()].to_dict()
23 | 
24 |         elif op == "argmax":
25 | 
26 |             def op(col, df=df):
27 |                 return df.loc[col.idxmax()].to_dict()
28 | 
29 |         else:
30 |             op = AGG_REPLACEMENTS.get(op, op)
31 | 
32 |         if field == "*" and field not in df.columns:
33 |             field = df.columns[0]
34 | 
35 |         if op == "values":
36 |             if groupby:
37 |                 agg_cols[col] = df.groupby(groupby).apply(
38 |                     lambda x: x.to_dict(orient="records")
39 |                 )
40 |             else:
41 |                 agg_cols[col] = [df.to_dict(orient="records")]
42 |         else:
43 |             if groupby:
44 |                 agg_cols[col] = df.groupby(groupby)[field].aggregate(op)
45 |             else:
46 |                 agg_cols[col] = [df[field].aggregate(op)]
47 | 
48 |     df = pd.DataFrame(agg_cols)
49 |     if groupby:
50 |         df = df.reset_index()
51 |     return df
52 | 
53 | 
54 | def confidence_interval(x: np.ndarray, level: float):
55 |     from scipy import stats
56 | 
57 |     return stats.t.interval(level, len(x) - 1, loc=x.mean(), scale=x.sem())
58 | 
59 | 
60 | AGG_REPLACEMENTS = {
61 |     "argmin": "idxmin",
62 |     "argmax": "idxmax",
63 |     "average": "mean",
64 |     "ci0": lambda x: confidence_interval(x, 0.05),
65 |     "ci1": lambda x: confidence_interval(x, 0.95),
66 |     "distinct": "nunique",
67 |     "stderr": "sem",
68 |     "stdev": "std",
69 |     "stdevp": lambda x: x.std(ddof=0),
70 |     "missing": lambda x: x.isnull().sum(),
71 |     "q1": lambda x: x.quantile(0.25),
72 |     "q3": lambda x: x.quantile(0.75),
73 |     "valid": "count",
74 |     "variance": "var",
75 |     "variancep": lambda x: x.var(ddof=0),
76 | }
77 | 


--------------------------------------------------------------------------------
/altair_transform/transform/tests/test_quantile.py:
--------------------------------------------------------------------------------
 1 | from typing import Any, Dict, List, Optional
 2 | 
 3 | import numpy as np
 4 | from numpy.testing import assert_allclose
 5 | import pandas as pd
 6 | from pandas.testing import assert_frame_equal
 7 | import pytest
 8 | 
 9 | import altair_transform
10 | 
11 | 
12 | @pytest.fixture
13 | def data() -> pd.DataFrame:
14 |     rand = np.random.RandomState(42)
15 |     return pd.DataFrame({"x": rand.randint(0, 100, 12), "c": list("AAABBBCCCDDD")})
16 | 
17 | 
18 | def test_quantile_transform(data: pd.DataFrame) -> None:
19 |     transform = {"quantile": "x", "step": 0.1}
20 |     out = altair_transform.apply(data, transform)
21 |     assert list(out.columns) == ["prob", "value"]
22 |     assert_allclose(out.prob, np.arange(0.05, 1, 0.1))
23 |     assert_allclose(out.value, np.quantile(data.x, out.prob))
24 | 
25 | 
26 | def test_quantile_transform_groupby(data: pd.DataFrame) -> None:
27 |     group = "c"
28 |     transform = {"quantile": "x", "step": 0.1, "groupby": [group]}
29 |     out = altair_transform.apply(data, transform)
30 |     assert list(out.columns) == ["c", "prob", "value"]
31 | 
32 |     for key in data[group].unique():
33 |         out_group_1 = altair_transform.apply(data[data[group] == key], transform)
34 |         out_group_2 = out[out[group] == key][out_group_1.columns].reset_index(drop=True)
35 |         assert_frame_equal(out_group_1, out_group_2)
36 | 
37 | 
38 | @pytest.mark.parametrize("step", [None, 0.1])
39 | @pytest.mark.parametrize("groupby", [None, ["c"]])
40 | @pytest.mark.parametrize("probs", [None, [0.2 * i for i in range(6)]])
41 | @pytest.mark.parametrize("as_", [None, ["p", "q"]])
42 | def test_quantile_against_js(
43 |     driver,
44 |     data: pd.DataFrame,
45 |     step: Optional[float],
46 |     groupby: Optional[List[str]],
47 |     probs: Optional[List[float]],
48 |     as_: Optional[List[str]],
49 | ) -> None:
50 |     transform: Dict[str, Any] = {"quantile": "x"}
51 |     if step is not None:
52 |         transform["step"] = step
53 |     if groupby is not None:
54 |         transform["groupby"] = groupby
55 |     if probs is not None:
56 |         transform["probs"] = probs
57 |     if as_ is not None:
58 |         transform["as"] = as_
59 |     got = altair_transform.apply(data, transform)
60 |     want = driver.apply(data, transform)
61 |     assert_frame_equal(
62 |         got[sorted(got.columns)],
63 |         want[sorted(want.columns)],
64 |         check_dtype=False,
65 |         check_index_type=False,
66 |         check_less_precise=True,
67 |     )
68 | 


--------------------------------------------------------------------------------
/altair_transform/transform/tests/test_pivot.py:
--------------------------------------------------------------------------------
 1 | from typing import Any, Dict, List, Optional
 2 | 
 3 | import numpy as np
 4 | import pandas as pd
 5 | from pandas.testing import assert_frame_equal
 6 | import pytest
 7 | 
 8 | import altair_transform
 9 | 
10 | 
11 | @pytest.fixture
12 | def data() -> pd.DataFrame:
13 |     rand = np.random.RandomState(42)
14 |     return pd.DataFrame(
15 |         {
16 |             "x": rand.randint(0, 100, 12),
17 |             "c": list("AAABBBCCCDDD"),
18 |             "d": list("ABCABCABCABC"),
19 |         }
20 |     )
21 | 
22 | 
23 | def test_pivot_transform(data: pd.DataFrame) -> None:
24 |     transform = {"pivot": "c", "value": "x"}
25 |     expected = pd.DataFrame(
26 |         {key: [data.x[data.c == key].sum()] for key in data.c.unique()},
27 |     )
28 |     out = altair_transform.apply(data, transform)
29 |     assert_frame_equal(out, expected)
30 | 
31 | 
32 | def test_pivot_transform_groupby(data: pd.DataFrame) -> None:
33 |     transform = {"pivot": "c", "value": "x", "groupby": ["d"]}
34 |     expected = data.pivot(values="x", index="d", columns="c").reset_index()
35 |     expected.columns.names = [None]
36 |     out = altair_transform.apply(data, transform)
37 |     assert_frame_equal(out, expected)
38 | 
39 | 
40 | def test_pivot_transform_limit(data: pd.DataFrame) -> None:
41 |     transform = {"pivot": "c", "value": "x", "limit": 2}
42 |     expected = pd.DataFrame(
43 |         {key: [data.x[data.c == key].sum()] for key in sorted(data.c.unique())[:2]}
44 |     )
45 |     out = altair_transform.apply(data, transform)
46 |     assert_frame_equal(out, expected)
47 | 
48 | 
49 | @pytest.mark.parametrize("groupby", [None, ["d"]])
50 | @pytest.mark.parametrize("limit", [None, 1])
51 | @pytest.mark.parametrize("op", [None, "sum", "max"])
52 | def test_pivot_against_js(
53 |     driver,
54 |     data: pd.DataFrame,
55 |     groupby: Optional[List[str]],
56 |     limit: Optional[int],
57 |     op: Optional[str],
58 | ) -> None:
59 |     transform: Dict[str, Any] = {"pivot": "c", "value": "x"}
60 |     if groupby is not None:
61 |         transform["groupby"] = groupby
62 |     if limit is not None:
63 |         transform["limit"] = limit
64 |     if op is not None:
65 |         transform["op"] = op
66 |     got = altair_transform.apply(data, transform)
67 |     want = driver.apply(data, transform)
68 |     assert_frame_equal(
69 |         got[sorted(got.columns)],
70 |         want[sorted(want.columns)],
71 |         check_dtype=False,
72 |         check_index_type=False,
73 |         check_less_precise=True,
74 |     )
75 | 


--------------------------------------------------------------------------------
/altair_transform/transform/tests/test_flatten.py:
--------------------------------------------------------------------------------
 1 | from typing import Dict, List
 2 | 
 3 | import numpy as np
 4 | from numpy.testing import assert_equal
 5 | import pandas as pd
 6 | from pandas.testing import assert_frame_equal
 7 | import pytest
 8 | 
 9 | import altair_transform
10 | 
11 | 
12 | @pytest.fixture
13 | def data() -> pd.DataFrame:
14 |     return pd.DataFrame(
15 |         {
16 |             "x": [[1, 2, 3], [4, 5, 6, 7], [8, 9]],
17 |             "y": [[1, 2], [3, 4], [5, 6]],
18 |             "cat": list("ABC"),
19 |         }
20 |     )
21 | 
22 | 
23 | def test_flatten_transform(data: pd.DataFrame) -> None:
24 |     out = altair_transform.apply(data, {"flatten": ["x"]})
25 |     assert out.shape == (9, 3)
26 |     assert out.columns.tolist() == ["x", "y", "cat"]
27 |     assert_equal(out.x.values, range(1, 10))
28 |     assert_equal(out.cat.values, list("AAABBBBCC"))
29 | 
30 |     out = altair_transform.apply(data, {"flatten": ["x", "y"]})
31 |     assert out.shape == (9, 3)
32 |     assert out.columns.tolist() == ["x", "y", "cat"]
33 |     assert_equal(out.x.values, range(1, 10))
34 |     assert_equal(out.y.values, [1, 2, np.nan, 3, 4, np.nan, np.nan, 5, 6])
35 |     assert_equal(out.cat.values, list("AAABBBBCC"))
36 | 
37 | 
38 | def test_flatten_transform_with_as(data: pd.DataFrame):
39 |     out = altair_transform.apply(data, {"flatten": ["y"], "as": ["yflat"]})
40 |     assert out.shape == (6, 4)
41 |     assert out.columns.tolist() == ["yflat", "x", "y", "cat"]
42 |     assert_equal(out.yflat.values, range(1, 7))
43 |     assert_equal(out.cat.values, list("AABBCC"))
44 | 
45 |     out = altair_transform.apply(
46 |         data, {"flatten": ["x", "y"], "as": ["xflat", "yflat"]}
47 |     )
48 |     assert out.shape == (9, 5)
49 |     assert out.columns.tolist() == ["xflat", "yflat", "x", "y", "cat"]
50 |     assert_equal(out.xflat.values, range(1, 10))
51 |     assert_equal(out.yflat.values, [1, 2, np.nan, 3, 4, np.nan, np.nan, 5, 6])
52 |     assert_equal(out.cat.values, list("AAABBBBCC"))
53 | 
54 | 
55 | @pytest.mark.parametrize(
56 |     "transform",
57 |     [
58 |         {"flatten": ["x"]},
59 |         {"flatten": ["x"], "as": ["xflat"]},
60 |         {"flatten": ["x", "y"]},
61 |         {"flatten": ["x", "y"], "as": ["xflat"]},
62 |         {"flatten": ["x", "y"], "as": ["xflat", "yflat"]},
63 |     ],
64 | )
65 | def test_flatten_against_js(
66 |     driver, data: pd.DataFrame, transform: Dict[str, List[str]],
67 | ) -> None:
68 |     got = altair_transform.apply(data, transform)
69 |     want = driver.apply(data, transform)
70 | 
71 |     assert_frame_equal(
72 |         got[sorted(got.columns)],
73 |         want[sorted(want.columns)],
74 |         check_dtype=False,
75 |         check_index_type=False,
76 |         check_less_precise=True,
77 |     )
78 | 


--------------------------------------------------------------------------------
/altair_transform/transform/tests/test_window.py:
--------------------------------------------------------------------------------
 1 | from typing import Any, Dict, List, Optional
 2 | 
 3 | import numpy as np
 4 | import pandas as pd
 5 | from pandas.testing import assert_series_equal, assert_frame_equal
 6 | import pytest
 7 | 
 8 | import altair_transform
 9 | 
10 | 
11 | @pytest.fixture
12 | def data() -> pd.DataFrame:
13 |     rand = np.random.RandomState(1)
14 |     return pd.DataFrame({"x": rand.randint(0, 100, 12), "c": list("AAABBBCCCDDD")})
15 | 
16 | 
17 | def test_window_transform_basic(data: pd.DataFrame) -> None:
18 |     transform = {"window": [{"op": "sum", "field": "x", "as": "xsum"}]}
19 |     out = altair_transform.apply(data, transform)
20 |     expected = data["x"].cumsum()
21 |     expected.name = "xsum"
22 |     assert_series_equal(out["xsum"], expected.astype(float))
23 | 
24 | 
25 | def test_window_transform_sorted(data: pd.DataFrame) -> None:
26 |     transform = {
27 |         "window": [{"op": "sum", "field": "x", "as": "xsum"}],
28 |         "sort": [{"field": "x"}],
29 |     }
30 |     out = altair_transform.apply(data, transform)
31 |     expected = data["x"].sort_values().cumsum().sort_index()
32 |     expected.name = "xsum"
33 |     assert_series_equal(out["xsum"], expected.astype(float))
34 | 
35 | 
36 | def test_window_transform_grouped(data: pd.DataFrame) -> None:
37 |     transform = {
38 |         "window": [{"op": "sum", "field": "x", "as": "xsum"}],
39 |         "groupby": ["c"],
40 |     }
41 |     out = altair_transform.apply(data, transform)
42 |     expected = data.groupby("c").rolling(len(data), min_periods=1)
43 |     expected = expected["x"].sum().reset_index("c", drop=True).sort_index()
44 |     expected.name = "xsum"
45 |     assert_series_equal(out["xsum"], expected)
46 | 
47 | 
48 | @pytest.mark.parametrize("groupby", [None, ["c"]])
49 | @pytest.mark.parametrize("sort", [None, "x"])
50 | @pytest.mark.parametrize("frame", [None, [1, 1], [-2, 2], [None, None]])
51 | def test_window_against_js(
52 |     driver,
53 |     data: pd.DataFrame,
54 |     groupby: Optional[List[str]],
55 |     sort: Optional[str],
56 |     frame: Optional[List[Optional[int]]],
57 | ) -> None:
58 |     transform: Dict[str, Any] = {
59 |         "window": [{"op": "sum", "field": "x", "as": "xsum"}],
60 |         "ignorePeers": False,
61 |     }
62 |     if groupby is not None:
63 |         transform["groupby"] = groupby
64 |     if sort is not None:
65 |         transform["sort"] = [{"field": sort}]
66 |     if frame is not None:
67 |         transform["frame"] = frame
68 |     got = altair_transform.apply(data, transform)
69 |     want = driver.apply(data, transform)
70 |     assert_frame_equal(
71 |         got[sorted(got.columns)],
72 |         want[sorted(want.columns)],
73 |         check_dtype=False,
74 |         check_index_type=False,
75 |         check_less_precise=True,
76 |     )
77 | 


--------------------------------------------------------------------------------
/altair_transform/transform/window.py:
--------------------------------------------------------------------------------
 1 | from typing import Dict
 2 | 
 3 | import altair as alt
 4 | import pandas as pd
 5 | from .visitor import visit
 6 | from .aggregate import AGG_REPLACEMENTS
 7 | 
 8 | 
 9 | @visit.register(alt.WindowTransform)
10 | def visit_window(transform: alt.WindowTransform, df: pd.DataFrame) -> pd.DataFrame:
11 |     transform = transform.to_dict()
12 |     window = transform["window"]
13 |     frame = transform.get("frame", [None, 0])
14 |     groupby = transform.get("groupby", [])
15 |     ignorePeers = transform.get("ignorePeers", False)
16 |     sort = transform.get("sort", [])
17 | 
18 |     if ignorePeers:
19 |         raise NotImplementedError("Window transform with ignorePeers=True")
20 | 
21 |     # First sort the dataframe if required.
22 |     if sort:
23 |         fields = [s["field"] for s in sort]
24 |         ascending = [s.get("order", "ascending") == "ascending" for s in sort]
25 |         df2 = df.sort_values(fields, ascending=ascending)
26 |     else:
27 |         df2 = df
28 | 
29 |     if groupby:
30 |         grouped = df2.groupby(groupby)
31 |     else:
32 |         grouped = df2
33 | 
34 |     # TODO: implement other frame options
35 |     if frame == [None, 0]:
36 |         rolling = grouped.rolling(len(df), min_periods=1)
37 |     elif frame[1] == 0:
38 |         rolling = grouped.rolling(frame[0] + 1, min_periods=1)
39 |     elif frame == [None, None]:
40 |         rolling = grouped.rolling(2 * len(df), min_periods=1, center=True)
41 |     elif abs(frame[0]) == abs(frame[1]):
42 |         # TODO: duplicate values may increase the effective window size
43 |         rolling = grouped.rolling(2 * abs(frame[0]) + 1, min_periods=1, center=True)
44 |     else:
45 |         raise NotImplementedError("frame={}".format(frame))
46 | 
47 |     for w in window:
48 |         # TODO: if field not specified, must be count, rank, or dense_rank
49 |         if "param" in w:
50 |             raise NotImplementedError("window function with param")
51 |         col = w.get("field", df2.columns[0])
52 |         if col == "*" and col not in df2.columns:
53 |             col = df2.columns[0]
54 |         agg = w["op"]
55 |         agg = WINDOW_AGG_REPLACEMENTS.get(agg, agg)
56 |         df2[w["as"]] = rolling[col].aggregate(agg).reset_index(groupby, drop=True)
57 | 
58 |     return df2.loc[df.index]
59 | 
60 | 
61 | # TODO: implement these.
62 | WINDOW_AGG_REPLACEMENTS: Dict[str, object] = {
63 |     "row_number": "row_number",
64 |     "rank": "rank",
65 |     "dense_rank": "dense_rank",
66 |     "percent_rank": "percent_rank",
67 |     "cume_dist": "cume_dist",
68 |     "ntile": "ntile",
69 |     "lag": "lag",
70 |     "lead": "lead",
71 |     "first_value": "first_value",
72 |     "last_value": "last_value",
73 |     "nth_value": "nth_value",
74 | }
75 | WINDOW_AGG_REPLACEMENTS.update(AGG_REPLACEMENTS)
76 | 


--------------------------------------------------------------------------------
/altair_transform/transform/tests/test_lookup.py:
--------------------------------------------------------------------------------
 1 | from typing import Any, Dict, Optional
 2 | 
 3 | from altair.utils.data import to_values
 4 | import numpy as np
 5 | import pandas as pd
 6 | from pandas.testing import assert_frame_equal
 7 | import pytest
 8 | 
 9 | import altair_transform
10 | 
11 | 
12 | @pytest.fixture
13 | def data() -> pd.DataFrame:
14 |     rand = np.random.RandomState(42)
15 |     return pd.DataFrame({"x": rand.randint(0, 100, 12), "c": list("AAABBBCCCDDD")})
16 | 
17 | 
18 | @pytest.fixture
19 | def lookup_data() -> Dict[str, Any]:
20 |     rand = np.random.RandomState(0)
21 |     df = pd.DataFrame(
22 |         {"y": rand.randint(0, 50, 4), "d": list("ABCD"), "e": list("ACDE")}
23 |     )
24 |     return to_values(df)
25 | 
26 | 
27 | @pytest.mark.parametrize("lookup_key", ["c", "c2"])
28 | def test_lookup_transform(data: pd.DataFrame, lookup_key: str) -> None:
29 |     lookup = pd.DataFrame({lookup_key: list("ABCD"), "z": [3, 1, 4, 5]})
30 |     transform = {
31 |         "lookup": "c",
32 |         "from": {"data": to_values(lookup), "key": lookup_key, "fields": ["z"]},
33 |     }
34 |     out1 = altair_transform.apply(data, transform)
35 |     out2 = pd.merge(data, lookup, left_on="c", right_on=lookup_key)
36 |     if lookup_key != "c":
37 |         out2 = out2.drop(lookup_key, axis=1)
38 |     assert_frame_equal(out1, out2)
39 | 
40 | 
41 | @pytest.mark.parametrize("lookup_key", ["c", "c2"])
42 | @pytest.mark.parametrize("default", [None, "missing"])
43 | def test_lookup_transform_default(
44 |     data: pd.DataFrame, lookup_key: str, default: Optional[str]
45 | ) -> None:
46 |     lookup = pd.DataFrame({lookup_key: list("ABC"), "z": [3, 1, 4]})
47 |     transform = {
48 |         "lookup": "c",
49 |         "from": {"data": to_values(lookup), "key": lookup_key, "fields": ["z"]},
50 |     }
51 |     if default is not None:
52 |         transform["default"] = default
53 | 
54 |     out = altair_transform.apply(data, transform)
55 |     undef = out["c"] == "D"
56 |     if default is None:
57 |         assert out.loc[undef, "z"].isnull().all()
58 |     else:
59 |         assert (out.loc[undef, "z"] == default).all()
60 | 
61 | 
62 | @pytest.mark.parametrize("key", ["d", "e"])
63 | @pytest.mark.parametrize("default", [None, "N/A"])
64 | def test_lookup_against_js(
65 |     driver,
66 |     data: pd.DataFrame,
67 |     lookup_data: Dict[str, Any],
68 |     key: str,
69 |     default: Optional[str],
70 | ) -> None:
71 |     transform = {
72 |         "lookup": "c",
73 |         "from": {"data": lookup_data, "key": key, "fields": ["y"]},
74 |     }
75 |     if default is not None:
76 |         transform["default"] = default
77 |     got = altair_transform.apply(data, transform)
78 |     want = driver.apply(data, transform)
79 | 
80 |     assert_frame_equal(
81 |         got[sorted(got.columns)],
82 |         want[sorted(want.columns)],
83 |         check_dtype=False,
84 |         check_index_type=False,
85 |         check_less_precise=True,
86 |     )
87 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | import io
 2 | import os
 3 | import re
 4 | import shutil
 5 | 
 6 | try:
 7 |     from setuptools import setup
 8 | except ImportError:
 9 |     from distutils.core import setup
10 | 
11 | 
12 | def read(path, encoding="utf-8"):
13 |     path = os.path.join(os.path.dirname(__file__), path)
14 |     with io.open(path, encoding=encoding) as fp:
15 |         return fp.read()
16 | 
17 | 
18 | def get_install_requirements(path):
19 |     content = read(path)
20 |     return [req for req in content.split("\n") if req != "" and not req.startswith("#")]
21 | 
22 | 
23 | def version(path):
24 |     """Obtain the packge version from a python file e.g. pkg/__init__.py
25 | 
26 |     See <https://packaging.python.org/en/latest/single_source_version.html>.
27 |     """
28 |     version_file = read(path)
29 |     version_match = re.search(
30 |         r"""^__version__ = ['"]([^'"]*)['"]""", version_file, re.M
31 |     )
32 |     if version_match:
33 |         return version_match.group(1)
34 |     raise RuntimeError("Unable to find version string.")
35 | 
36 | 
37 | HERE = os.path.abspath(os.path.dirname(__file__))
38 | 
39 | 
40 | # From https://github.com/jupyterlab/jupyterlab/blob/master/setupbase.py,
41 | # BSD licensed
42 | def find_packages(top=HERE):
43 |     """
44 |     Find all of the packages.
45 |     """
46 |     packages = []
47 |     for d, dirs, _ in os.walk(top, followlinks=True):
48 |         if os.path.exists(os.path.join(d, "__init__.py")):
49 |             packages.append(os.path.relpath(d, top).replace(os.path.sep, "."))
50 |         elif d != top:
51 |             # Do not look for packages in subfolders
52 |             # if current is not a package
53 |             dirs[:] = []
54 |     return packages
55 | 
56 | 
57 | README_TEST_PATH = "altair_transform/tests/README.md"
58 | try:
59 |     shutil.copyfile("README.md", README_TEST_PATH)
60 |     setup(
61 |         name="altair_transform",
62 |         version=version("altair_transform/__init__.py"),
63 |         description="A python engine for evaluating Altair transforms.",
64 |         long_description=read("README.md"),
65 |         long_description_content_type="text/markdown",
66 |         author="Jake VanderPlas",
67 |         author_email="jakevdp@gmail.com",
68 |         url="http://github.com/altair-viz/altair-transform/",
69 |         download_url="http://github.com/altair-viz/altair-transform/",
70 |         license="MIT",
71 |         packages=find_packages(),
72 |         include_package_data=True,
73 |         install_requires=get_install_requirements("requirements.txt"),
74 |         python_requires=">=3.6",
75 |         classifiers=[
76 |             "Environment :: Console",
77 |             "Intended Audience :: Science/Research",
78 |             "License :: OSI Approved :: MIT License",
79 |             "Natural Language :: English",
80 |             "Programming Language :: Python :: 3.6",
81 |             "Programming Language :: Python :: 3.7",
82 |             "Programming Language :: Python :: 3.8",
83 |         ],
84 |     )
85 | finally:
86 |     os.remove(README_TEST_PATH)
87 | 


--------------------------------------------------------------------------------
/altair_transform/utils/tests/test_timeunit.py:
--------------------------------------------------------------------------------
 1 | """Tests of the timeunit utilities"""
 2 | from dateutil.tz import tzlocal
 3 | import pytest
 4 | 
 5 | import pandas as pd
 6 | 
 7 | from altair_transform.utils import timeunit
 8 | 
 9 | 
10 | TIMEUNITS = [
11 |     "year",
12 |     "quarter",
13 |     "month",
14 |     "day",
15 |     "date",
16 |     "hours",
17 |     "minutes",
18 |     "seconds",
19 |     "milliseconds",
20 |     "yearquarter",
21 |     "yearquartermonth",
22 |     "yearmonth",
23 |     "yearmonthdate",
24 |     "yearmonthdatehours",
25 |     "yearmonthdatehoursminutes",
26 |     "yearmonthdatehoursminutesseconds",
27 |     "quartermonth",
28 |     "monthdate",
29 |     "hoursminutes",
30 |     "hoursminutesseconds",
31 |     "minutesseconds",
32 |     "secondsmilliseconds",
33 | ]
34 | TIMEUNITS += [f"utc{unit}" for unit in TIMEUNITS]
35 | TIMEZONES = [None, tzlocal(), "UTC", "US/Pacific", "US/Eastern"]
36 | 
37 | 
38 | @pytest.fixture
39 | def dates():
40 |     # Use dates on either side of a year boundary to hit corner cases.
41 |     return pd.DatetimeIndex(["1999-12-31 23:59:55.050", "2000-01-01 00:00:05.050"])
42 | 
43 | 
44 | @pytest.mark.parametrize("timezone", TIMEZONES)
45 | @pytest.mark.parametrize("unit", TIMEUNITS)
46 | def test_timeunit_input_types(dates, timezone, unit):
47 |     dates = dates.tz_localize(timezone)
48 | 
49 |     timestamps = [timeunit.compute_timeunit(d, unit) for d in dates]
50 |     series = timeunit.compute_timeunit(pd.Series(dates), unit)
51 |     datetimeindex = timeunit.compute_timeunit(dates, unit)
52 | 
53 |     assert isinstance(timestamps[0], pd.Timestamp)
54 |     assert isinstance(series, pd.Series)
55 |     assert isinstance(datetimeindex, pd.DatetimeIndex)
56 |     assert datetimeindex.equals(pd.DatetimeIndex(series))
57 |     assert datetimeindex.equals(pd.DatetimeIndex(timestamps))
58 | 
59 | 
60 | @pytest.mark.parametrize("timezone", TIMEZONES)
61 | @pytest.mark.parametrize("timeunit_name", TIMEUNITS)
62 | def test_all_timeunits(dates, timezone, timeunit_name):
63 |     timeunit_calc = timeunit.compute_timeunit(
64 |         dates.tz_localize(timezone), timeunit_name
65 |     )
66 | 
67 |     tz = "UTC" if timeunit_name.startswith("utc") else tzlocal()
68 |     dates = dates.tz_localize(timezone or tzlocal()).tz_convert(tz)
69 | 
70 |     to_check = [
71 |         ("year", "year", 2012),
72 |         ("quarter", "quarter", None),
73 |         ("month", "month", None if "quarter" in timeunit_name else 1),
74 |         ("day", "dayofweek", None),
75 |         ("date", "day", None if "day" in timeunit_name else 1),
76 |         ("hours", "hour", 0),
77 |         ("minutes", "minute", 0),
78 |         ("seconds", "second", 0),
79 |         ("milliseconds", "microsecond", 0),
80 |     ]
81 | 
82 |     if timeunit_name.startswith("utc"):
83 |         timeunit_name = timeunit_name[3:]
84 | 
85 |     for name, attr, default in to_check:
86 |         if timeunit_name.startswith(name):
87 |             timeunit_name = timeunit_name[len(name) :]
88 |             assert getattr(dates, attr).equals(getattr(timeunit_calc, attr))
89 |         elif default is not None:
90 |             assert (getattr(timeunit_calc, attr) == default).all()
91 |     assert (timeunit_calc.nanosecond == 0).all()
92 | 


--------------------------------------------------------------------------------
/altair_transform/transform/tests/test_timeunit.py:
--------------------------------------------------------------------------------
  1 | import pytest
  2 | 
  3 | import numpy as np
  4 | import pandas as pd
  5 | from pandas.testing import assert_frame_equal
  6 | 
  7 | import altair_transform
  8 | 
  9 | TIMEUNITS = [
 10 |     "year",
 11 |     "quarter",
 12 |     "month",
 13 |     "day",
 14 |     "date",
 15 |     "hours",
 16 |     "minutes",
 17 |     "seconds",
 18 |     "milliseconds",
 19 |     "yearquarter",
 20 |     "yearquartermonth",
 21 |     "yearmonth",
 22 |     "yearmonthdate",
 23 |     "yearmonthdatehours",
 24 |     "yearmonthdatehoursminutes",
 25 |     "yearmonthdatehoursminutesseconds",
 26 |     "quartermonth",
 27 |     "monthdate",
 28 |     "hoursminutes",
 29 |     "hoursminutesseconds",
 30 |     "minutesseconds",
 31 |     "secondsmilliseconds",
 32 | ]
 33 | 
 34 | 
 35 | @pytest.fixture
 36 | def timezone(driver) -> str:
 37 |     return driver.get_tz_code()
 38 | 
 39 | 
 40 | @pytest.fixture
 41 | def data() -> pd.DataFrame:
 42 |     rand = np.random.RandomState(42)
 43 |     return pd.DataFrame(
 44 |         {
 45 |             "t": (
 46 |                 pd.to_datetime("2020-01-01")
 47 |                 + pd.to_timedelta(rand.randint(0, 60_000_000, 50), unit="s")
 48 |             ).sort_values()
 49 |         }
 50 |     )
 51 | 
 52 | 
 53 | @pytest.mark.parametrize(
 54 |     "timeUnit,fmt",
 55 |     [
 56 |         ("year", "%Y"),
 57 |         ("yearmonth", "%Y-%m"),
 58 |         ("yearmonthdate", "%Y-%m-%d"),
 59 |         ("monthdate", "2012-%m-%d"),
 60 |         ("date", "2012-01-%d"),
 61 |     ],
 62 | )
 63 | def test_timeunit_transform(data: pd.DataFrame, timeUnit: str, fmt: str) -> None:
 64 |     transform = {"timeUnit": timeUnit, "field": "t", "as": "unit"}
 65 |     out = altair_transform.apply(data, transform)
 66 |     unit = pd.to_datetime(data.t.dt.strftime(fmt))
 67 |     assert (out.unit == unit).all()
 68 | 
 69 | 
 70 | @pytest.mark.parametrize("timeUnit", TIMEUNITS)
 71 | def test_timeunit_against_js(
 72 |     driver, data: pd.DataFrame, timezone: str, timeUnit: str
 73 | ) -> None:
 74 |     transform = {"timeUnit": timeUnit, "field": "t", "as": "unit"}
 75 | 
 76 |     got = altair_transform.apply(data, transform)
 77 | 
 78 |     data["t"] = data["t"].apply(lambda x: x.isoformat())
 79 |     want = driver.apply(data, transform)
 80 | 
 81 |     want["t"] = (
 82 |         pd.to_datetime(1e6 * want["t"])
 83 |         .dt.tz_localize("UTC")
 84 |         .dt.tz_convert(timezone)
 85 |         .dt.tz_localize(None)
 86 |     )
 87 |     want["unit"] = (
 88 |         pd.to_datetime(want["unit"]).dt.tz_convert(timezone).dt.tz_localize(None)
 89 |     )
 90 | 
 91 |     cols = ["t", "unit"]
 92 |     print(want[cols])
 93 |     print(got[cols])
 94 |     print(want[cols] - got[cols])
 95 | 
 96 |     assert_frame_equal(want[cols], got[cols])
 97 | 
 98 |     # want["t"] = pd.to_datetime(want["t"])
 99 |     # want["unit"] = pd.to_datetime(want["unit"])
100 |     # want["unit_end"] = pd.to_datetime(want["unit_end"])
101 | 
102 |     # assert_frame_equal(
103 |     #     got[sorted(got.columns)],
104 |     #     want[sorted(want.columns)],
105 |     #     check_dtype=False,
106 |     #     check_index_type=False,
107 |     #     check_less_precise=True,
108 |     # )
109 | 


--------------------------------------------------------------------------------
/altair_transform/transform/tests/test_aggregate.py:
--------------------------------------------------------------------------------
  1 | from typing import Any, Dict, List, Optional
  2 | 
  3 | import numpy as np
  4 | import pandas as pd
  5 | from pandas.testing import assert_frame_equal
  6 | import pytest
  7 | 
  8 | import altair_transform
  9 | from altair_transform.transform.aggregate import AGG_REPLACEMENTS
 10 | 
 11 | AGGREGATES = [
 12 |     "argmax",
 13 |     "argmin",
 14 |     "average",
 15 |     "count",
 16 |     "distinct",
 17 |     "max",
 18 |     "mean",
 19 |     "median",
 20 |     "min",
 21 |     "missing",
 22 |     "q1",
 23 |     "q3",
 24 |     "ci0",
 25 |     "ci1",
 26 |     "stderr",
 27 |     "stdev",
 28 |     "stdevp",
 29 |     "sum",
 30 |     "valid",
 31 |     "values",
 32 |     "variance",
 33 |     "variancep",
 34 | ]
 35 | 
 36 | AGG_SKIP = ["ci0", "ci1"]  # These require scipy.
 37 | 
 38 | 
 39 | @pytest.fixture
 40 | def data() -> pd.DataFrame:
 41 |     rand = np.random.RandomState(42)
 42 |     return pd.DataFrame({"x": rand.randint(0, 100, 12), "c": list("AAABBBCCCDDD")})
 43 | 
 44 | 
 45 | @pytest.mark.parametrize("groupby", [True, False])
 46 | @pytest.mark.parametrize("op", set(AGGREGATES) - set(AGG_SKIP))
 47 | def test_aggregate_transform(data: pd.DataFrame, groupby: bool, op: Any):
 48 |     field = "x"
 49 |     col = "z"
 50 |     group = "c"
 51 | 
 52 |     transform: Dict[str, Any] = {"aggregate": [{"op": op, "field": field, "as": col}]}
 53 |     if groupby:
 54 |         transform["groupby"] = [group]
 55 | 
 56 |     if op == "argmin":
 57 | 
 58 |         def op(col, df=data):
 59 |             return df.loc[col.idxmin()].to_dict()
 60 | 
 61 |     elif op == "argmax":
 62 | 
 63 |         def op(col, df=data):
 64 |             return df.loc[col.idxmax()].to_dict()
 65 | 
 66 |     else:
 67 |         op = AGG_REPLACEMENTS.get(op, op)
 68 | 
 69 |     out = altair_transform.apply(data, transform)
 70 | 
 71 |     data = data.reset_index(drop=True)
 72 | 
 73 |     if op == "values":
 74 |         if groupby:
 75 |             grouped = data.groupby(group).apply(lambda x: x.to_dict(orient="records"))
 76 |             grouped.name = col
 77 |             grouped = grouped.reset_index()
 78 |         else:
 79 |             grouped = pd.DataFrame({col: [data.to_dict(orient="records")]})
 80 |     elif groupby:
 81 |         grouped = data.groupby(group)[field].aggregate(op)
 82 |         grouped.name = col
 83 |         grouped = grouped.reset_index()
 84 |     else:
 85 |         grouped = pd.DataFrame({col: [data[field].aggregate(op)]})
 86 | 
 87 |     assert_frame_equal(grouped, out)
 88 | 
 89 | 
 90 | @pytest.mark.parametrize("groupby", [None, ["c"]])
 91 | @pytest.mark.parametrize("op", set(AGGREGATES) - set(AGG_SKIP))
 92 | def test_aggregate_against_js(
 93 |     driver, data: pd.DataFrame, groupby: Optional[List[str]], op: str
 94 | ) -> None:
 95 |     transform: Dict[str, Any] = {"aggregate": [{"op": op, "field": "x", "as": "z"}]}
 96 |     if groupby is not None:
 97 |         transform["groupby"] = groupby
 98 | 
 99 |     got = altair_transform.apply(data, transform)
100 |     want = driver.apply(data, transform)
101 | 
102 |     print(data)
103 |     print(got)
104 |     print(want)
105 | 
106 |     assert_frame_equal(
107 |         got[sorted(got.columns)],
108 |         want[sorted(want.columns)],
109 |         check_dtype=False,
110 |         check_index_type=False,
111 |         check_less_precise=True,
112 |     )
113 | 


--------------------------------------------------------------------------------
/altair_transform/transform/tests/test_bin.py:
--------------------------------------------------------------------------------
 1 | from typing import Any, Dict, List
 2 | 
 3 | import numpy as np
 4 | import pandas as pd
 5 | from pandas.testing import assert_frame_equal
 6 | import pytest
 7 | 
 8 | import altair_transform
 9 | 
10 | 
11 | @pytest.fixture
12 | def data() -> pd.DataFrame:
13 |     rand = np.random.RandomState(42)
14 |     return pd.DataFrame({"x": rand.randint(0, 100, 12)})
15 | 
16 | 
17 | def test_bin_transform_simple(data: pd.DataFrame) -> None:
18 |     transform = {"bin": True, "field": "x", "as": "xbin"}
19 |     out = altair_transform.apply(data, transform)
20 |     assert "xbin" in out.columns
21 | 
22 |     transform = {"bin": True, "field": "x", "as": ["xbin1", "xbin2"]}
23 |     out = altair_transform.apply(data, transform)
24 |     assert "xbin1" in out.columns
25 |     assert "xbin2" in out.columns
26 | 
27 | 
28 | @pytest.mark.parametrize("maxbins", [5, 10, 20])
29 | @pytest.mark.parametrize("nice", [True, False])
30 | def test_bin_transform_maxbins(nice: bool, maxbins: int) -> None:
31 |     data = pd.DataFrame({"x": np.arange(100)})
32 |     transform = {"bin": {"maxbins": maxbins, "nice": nice}, "field": "x", "as": "xbin"}
33 |     out = altair_transform.apply(data, transform)
34 |     assert "xbin" in out.columns
35 |     assert "xbin_end" in out.columns
36 |     bins = np.sort(out["xbin"].unique())
37 |     assert len(bins) - 1 <= maxbins
38 |     assert not out.xbin.isnull().any()
39 | 
40 | 
41 | @pytest.mark.parametrize("step", [5, 10, 20])
42 | @pytest.mark.parametrize("nice", [True, False])
43 | def test_bin_transform_step(nice: bool, step: int) -> None:
44 |     data = pd.DataFrame({"x": np.arange(100)})
45 |     transform = {"bin": {"step": step, "nice": nice}, "field": "x", "as": "xbin"}
46 |     out = altair_transform.apply(data, transform)
47 |     bins = np.sort(out.xbin.unique())
48 |     assert np.allclose(bins[1:] - bins[:-1], step)
49 |     assert not out.xbin.isnull().any()
50 | 
51 | 
52 | @pytest.mark.parametrize("nice", [True, False])
53 | def test_bin_transform_steps(nice: bool, steps: List[int] = [5, 10, 20]) -> None:
54 |     data = pd.DataFrame({"x": range(100)})
55 |     transform = {"bin": {"steps": steps, "nice": nice}, "field": "x", "as": "xbin"}
56 |     out = altair_transform.apply(data, transform)
57 |     bins = np.sort(out.xbin.unique())
58 |     assert bins[1] - bins[0] in steps
59 |     assert not out.xbin.isnull().any()
60 | 
61 | 
62 | @pytest.mark.parametrize(
63 |     "transform",
64 |     [
65 |         {"bin": True, "field": "x", "as": "xbin"},
66 |         {"bin": True, "field": "x", "as": ["xbin1", "xbin2"]},
67 |         {"bin": {"maxbins": 20}, "field": "x", "as": "xbin"},
68 |         {"bin": {"nice": False}, "field": "x", "as": "xbin"},
69 |         {"bin": {"anchor": 3.5}, "field": "x", "as": "xbin"},
70 |         {"bin": {"step": 20}, "field": "x", "as": "xbin"},
71 |         {"bin": {"base": 2}, "field": "x", "as": "xbin"},
72 |         {"bin": {"extent": [20, 80]}, "field": "x", "as": "xbin"},
73 |     ],
74 | )
75 | def test_bin_against_js(driver, data: pd.DataFrame, transform: Dict[str, Any]) -> None:
76 |     got = altair_transform.apply(data, transform)
77 |     want = driver.apply(data, transform)
78 |     assert_frame_equal(
79 |         got[sorted(got.columns)],
80 |         want[sorted(want.columns)],
81 |         check_dtype=False,
82 |         check_index_type=False,
83 |         check_less_precise=True,
84 |     )
85 | 


--------------------------------------------------------------------------------
/altair_transform/utils/tests/_testcases.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Common test cases for parser & evaljs
  3 | """
  4 | import functools
  5 | import operator
  6 | 
  7 | 
  8 | def extract(expressions):
  9 |     """Extract expressions from multi-line strings"""
 10 |     return (
 11 |         line
 12 |         for line in expressions.splitlines()
 13 |         if line.strip() and not line.startswith("#")
 14 |     )
 15 | 
 16 | 
 17 | class Bunch:
 18 |     """A simple class to enable testing of attribute & item access"""
 19 | 
 20 |     def __init__(self, **kwargs):
 21 |         for key, val in kwargs.items():
 22 |             setattr(self, key, val)
 23 | 
 24 |     def __getitem__(self, item):
 25 |         return getattr(self, item)
 26 | 
 27 | 
 28 | NAMES = {
 29 |     "A": 10,
 30 |     "B": 20,
 31 |     "C": 30,
 32 |     "obj": Bunch(foo=1, bar=2, func=lambda x: x),
 33 |     "foo": "bar",
 34 |     "bar": "baz",
 35 |     "sum": lambda *args: sum(args),
 36 |     "prod": lambda *args: functools.reduce(operator.mul, args),
 37 |     "_123": 2.0,
 38 |     "abc_123": "hello",
 39 |     "true": True,
 40 |     "false": False,
 41 | }
 42 | 
 43 | EXPRESSIONS = r"""
 44 | # Integers
 45 | 0
 46 | 12
 47 | 234
 48 | # Floats
 49 | 3.14
 50 | 0.10
 51 | 10.
 52 | .1
 53 | 1E5
 54 | 2e6
 55 | 3.7E02
 56 | # Binary
 57 | 0x0
 58 | 0X10101
 59 | # Octal
 60 | 0o17
 61 | 0O0
 62 | # Hex
 63 | 0xffaa11
 64 | 0XF0c
 65 | # Boolean
 66 | true
 67 | false
 68 | # Strings
 69 | 'abc123'
 70 | 'a\'b\'c123'
 71 | 'abc123\\'
 72 | '\t""\n'
 73 | "abc123"
 74 | "a\"b\"c123"
 75 | "abc123\\"
 76 | "\t''\n"
 77 | # Regex
 78 | "/[0-9]+/"
 79 | "/(?P<foo>foo)(?P<bar>bar)/g"
 80 | # Globals
 81 | A
 82 | B
 83 | C
 84 | obj
 85 | foo
 86 | _123
 87 | abc_123
 88 | # Unary operations
 89 | -1
 90 | +3.5
 91 | -A
 92 | +B
 93 | ~0b0101
 94 | # Binary operations
 95 | 1 + 1
 96 | 2E3 - 1
 97 | 0xF * 5.0
 98 | A / B
 99 | 2 ** 3
100 | # Comparisons
101 | 1 < 2
102 | A > B
103 | 5 <= 5
104 | A >= 10
105 | A == 10
106 | A != 10
107 | # Bitwise
108 | 3 | 4
109 | 3 & 4
110 | 3 ^ 4
111 | 4 << 2
112 | 4 >> 2
113 | # Compound operations
114 | 2 * 3 % 4 / 5
115 | 2 % 3 * 4 / 5
116 | 2 + 3 % 4
117 | 2 % 3 - 4
118 | 2.5 * 3 + 4 / 5.2
119 | 2.5 + 3 * 4 - 5.0
120 | 2.5 * (3 + 4)
121 | (2 * 3) + 4
122 | B * 3 ** 4
123 | 1.5 + 2. * .3
124 | -0.6 * (C / 1.5)
125 | 3 * (4 + C)
126 | A & B | ~C + 4 * 5
127 | # Functions
128 | prod(1, 2, 3)
129 | sum(1, 2, 3)
130 | prod(1, 2 * 4, -6)
131 | sum(1, (2 * 4), -6)
132 | A * prod(B, C)
133 | A * prod(B, sum(B, C))
134 | obj.func(2)
135 | # Lists
136 | []
137 | [2]
138 | [1 + 1]
139 | [A, 'foo', 23 * B, []]
140 | # Objects
141 | {}
142 | {'a': 4}
143 | {'a': 5, 'b': 5}
144 | # Attribute access
145 | obj.foo + C / 5
146 | obj["foo"] + C / 5
147 | (obj).bar + C * 2
148 | (obj)['bar'] + C * 2
149 | ['a', 'b', 'c'][1]
150 | """
151 | 
152 | BAD_EXPRESSIONS = r"""
153 | "'
154 | 1.B
155 | *24
156 | "\"
157 | (1, 2]
158 | [1, 2)
159 | B.1
160 | (1 + 2)[]
161 | [1;2]
162 | 009
163 | 0x01FG
164 | 00.56
165 | true : 3 ? 4
166 | """
167 | 
168 | JSONLY_EXPRESSIONS = [
169 |     ("{A, B, C: 3, 'd': 4, 1: 5}", {"A": 10, "B": 20, "C": 3, "d": 4, 1: 5}),
170 |     ("!true", False),
171 |     ("!false", True),
172 |     ("true && false", False),
173 |     ("true || false", True),
174 |     ("-12 >>> 2", 1073741821),
175 |     ("12 >>> 2", 3),
176 |     ("A === 10", True),
177 |     ("A !== 10", False),
178 |     ("true ? 0 + 1 : 1 + 1", 1),
179 |     ("!true ? 0 + 1 : 1 | 2", 3),
180 |     ("(true ? 1 : 2) ? 3 : 4", 3),
181 |     ("true ? 1 : (2 ? 3 : 4)", 1),
182 |     ("true ? 1 : 2 ? 3 : 4", 1),
183 | ]
184 | 


--------------------------------------------------------------------------------
/altair_transform/utils/timeunit.py:
--------------------------------------------------------------------------------
  1 | """Utilities for working with pandas & JS datetimes."""
  2 | import re
  3 | from typing import Union, Set
  4 | import pandas as pd
  5 | from dateutil.tz import tzlocal
  6 | 
  7 | __all__ = ["compute_timeunit"]
  8 | 
  9 | Date = Union[pd.Series, pd.DatetimeIndex, pd.Timestamp]
 10 | 
 11 | 
 12 | def compute_timeunit(date: Date, timeunit: str) -> Date:
 13 |     """Evaluate a timeUnit transform.
 14 | 
 15 |     Parameters
 16 |     ----------
 17 |     date : pd.DatetimeIndex, pd.Series, or pd.Timestamp
 18 |         The date to be converted
 19 |     timeunit : string
 20 |         The Altair timeUnit identifier.
 21 | 
 22 |     Returns
 23 |     -------
 24 |     date_tu : pd.DatetimeIndex, pd.Series, or pd.Timestamp
 25 |         The converted date, of the same type as the input.
 26 |     """
 27 |     # Convert to either UTC or localtime as appropriate.
 28 |     def dt(date):
 29 |         return date.dt if isinstance(date, pd.Series) else date
 30 | 
 31 |     if dt(date).tz is None:
 32 |         date = dt(date).tz_localize(tzlocal())
 33 |     date = dt(date).tz_convert("UTC" if timeunit.startswith("utc") else tzlocal())
 34 | 
 35 |     if isinstance(date, pd.Series):
 36 |         return pd.Series(_compute_timeunit(timeunit, date.dt))
 37 |     elif isinstance(date, pd.Timestamp):
 38 |         return _compute_timeunit(timeunit, pd.DatetimeIndex([date]))[0]
 39 |     else:
 40 |         return _compute_timeunit(timeunit, date)
 41 | 
 42 | 
 43 | _simple_timeunits = [
 44 |     "utc",
 45 |     "year",
 46 |     "quarter",
 47 |     "month",
 48 |     "day",
 49 |     "date",
 50 |     "hours",
 51 |     "minutes",
 52 |     "seconds",
 53 |     "milliseconds",
 54 | ]
 55 | _elements = "".join(f"(?P<{name}>{name})?" for name in _simple_timeunits)
 56 | _timeunit_regex = re.compile(f"^{_elements}$")
 57 | 
 58 | 
 59 | def _parse_timeunit_string(timeunit: str) -> Set[str]:
 60 |     """Return the set of timeunit keys in a specification string."""
 61 |     match = _timeunit_regex.match(timeunit)
 62 |     if not match:
 63 |         raise ValueError(f"Unrecognized timeUnit: {timeunit!r}")
 64 |     return {k for k, v in match.groupdict().items() if v}
 65 | 
 66 | 
 67 | def _compute_timeunit(name: str, date: pd.DatetimeIndex) -> pd.DatetimeIndex:
 68 |     """Workhorse for compute_timeunit."""
 69 |     if name in ["day", "utcday"]:
 70 |         return pd.to_datetime("2012-01-01") + pd.to_timedelta(
 71 |             (date.dayofweek + 1) % 7, "D"
 72 |         )
 73 |     units = _parse_timeunit_string(name)
 74 |     if "day" in units:
 75 |         raise NotImplementedError("quarter and day timeunit")
 76 |     if not units:
 77 |         raise ValueError(f"{0!r} is not a recognized timeunit")
 78 | 
 79 |     def quarter(month: pd.Int64Index) -> pd.Int64Index:
 80 |         return month - (month - 1) % 3
 81 | 
 82 |     Y = date.year.astype(str) if "year" in units else "2012"
 83 |     M = (
 84 |         date.month.astype(str).str.zfill(2)
 85 |         if "month" in units
 86 |         else (
 87 |             quarter(date.month).astype(str).str.zfill(2) if "quarter" in units else "01"
 88 |         )
 89 |     )
 90 |     D = date.day.astype(str).str.zfill(2) if "date" in units else "01"
 91 |     h = date.hour.astype(str).str.zfill(2) if "hours" in units else "00"
 92 |     m = date.minute.astype(str).str.zfill(2) if "minutes" in units else "00"
 93 |     s = date.second.astype(str).str.zfill(2) if "seconds" in units else "00"
 94 |     ms = (
 95 |         (date.microsecond // 1000).astype(str).str.zfill(3)
 96 |         if "milliseconds" in units
 97 |         else "00"
 98 |     )
 99 |     return pd.to_datetime(
100 |         Y + "-" + M + "-" + D + " " + h + ":" + m + ":" + s + "." + ms
101 |     )
102 | 


--------------------------------------------------------------------------------
/altair_transform/transform/tests/test_regression.py:
--------------------------------------------------------------------------------
  1 | from typing import Any, Dict, List, Optional
  2 | 
  3 | import numpy as np
  4 | import pandas as pd
  5 | from pandas.testing import assert_frame_equal
  6 | import pytest
  7 | 
  8 | import altair_transform
  9 | 
 10 | 
 11 | @pytest.fixture
 12 | def data() -> pd.DataFrame:
 13 |     rand = np.random.RandomState(42)
 14 |     df = pd.DataFrame(
 15 |         {
 16 |             "x": rand.randint(0, 100, 12),
 17 |             "y": rand.randint(0, 100, 12),
 18 |             "g": list(6 * "AB"),
 19 |         }
 20 |     )
 21 |     return df
 22 | 
 23 | 
 24 | def test_linear() -> None:
 25 |     data = pd.DataFrame({"x": [0, 1, 2, 3, 4], "y": [2, 4, 6, 8, 10]})
 26 |     transform = {"regression": "y", "on": "x"}
 27 |     out = altair_transform.apply(data, transform)
 28 |     assert_frame_equal(
 29 |         out, pd.DataFrame({"x": [0.0, 4.0], "y": [2.0, 10.0]}), check_dtype=False
 30 |     )
 31 | 
 32 | 
 33 | def test_linear_groupby() -> None:
 34 |     data = pd.DataFrame(
 35 |         {
 36 |             "x": [0, 1, 2, 3, 4, 1, 2, 3],
 37 |             "y": [2, 4, 6, 8, 10, 2, 3, 4],
 38 |             "g": [0, 0, 0, 0, 0, 1, 1, 1],
 39 |         }
 40 |     )
 41 |     transform = {"regression": "y", "on": "x", "groupby": ["g"]}
 42 |     out = altair_transform.apply(data, transform)
 43 |     assert_frame_equal(
 44 |         out[out.g == 0].reset_index(drop=True),
 45 |         pd.DataFrame({"g": [0, 0], "x": [0.0, 4.0], "y": [2.0, 10.0]}),
 46 |         check_dtype=False,
 47 |     )
 48 |     assert_frame_equal(
 49 |         out[out.g == 1].reset_index(drop=True),
 50 |         pd.DataFrame({"g": [1, 1], "x": [1.0, 3.0], "y": [2.0, 4.0]}),
 51 |         check_dtype=False,
 52 |     )
 53 | 
 54 | 
 55 | @pytest.mark.parametrize(
 56 |     "method,coef", [("linear", [1, 2]), ("quad", [1, 2, 0]), ("poly", [1, 2, 0, 0])]
 57 | )
 58 | def test_linear_params(method: str, coef: List[int]) -> None:
 59 |     data = pd.DataFrame({"x": [0, 1, 2, 3, 4], "y": [1, 3, 5, 7, 9]})
 60 |     transform = {"regression": "y", "on": "x", "params": True, "method": method}
 61 |     out = altair_transform.apply(data, transform)
 62 |     assert_frame_equal(out, pd.DataFrame({"coef": [coef], "rSquared": [1.0]}))
 63 | 
 64 | 
 65 | @pytest.mark.parametrize("groupby", [None, ["g"]])
 66 | @pytest.mark.parametrize("method,order", [("linear", 1), ("quad", 2)])
 67 | def test_poly_vs_linear(groupby: List[str], method: str, order: int) -> None:
 68 |     data = pd.DataFrame(
 69 |         {
 70 |             "x": [0, 1, 2, 3, 4, 1, 2, 3],
 71 |             "y": [2, 4, 6, 8, 10, 2, 3, 4],
 72 |             "g": [0, 0, 0, 0, 0, 1, 1, 1],
 73 |         }
 74 |     )
 75 |     kwds = {} if not groupby else {"groupby": groupby}
 76 |     out1 = altair_transform.apply(
 77 |         data, {"regression": "y", "on": "x", "method": method, **kwds}
 78 |     )
 79 |     out2 = altair_transform.apply(
 80 |         data, {"regression": "y", "on": "x", "method": "poly", "order": order, **kwds}
 81 |     )
 82 |     assert_frame_equal(out1, out2, check_dtype=False)
 83 | 
 84 | 
 85 | @pytest.mark.parametrize("method", ["linear", "log", "exp", "pow", "quad", "poly"])
 86 | @pytest.mark.parametrize("params", [True, False])
 87 | @pytest.mark.parametrize("groupby", [None, ["g"]])
 88 | def test_regression_against_js(
 89 |     driver, data: pd.DataFrame, method: str, params: str, groupby: Optional[List[str]],
 90 | ) -> None:
 91 |     transform: Dict[str, Any] = {
 92 |         "regression": "y",
 93 |         "on": "x",
 94 |         "method": method,
 95 |         "params": params,
 96 |     }
 97 |     if groupby:
 98 |         transform["groupby"] = groupby
 99 |     got = altair_transform.apply(data, transform)
100 |     want = driver.apply(data, transform)
101 | 
102 |     # Account for differences in handling of undefined between browsers.
103 |     if params and not groupby and got.shape != want.shape:
104 |         got["keys"] = [None]
105 | 
106 |     assert_frame_equal(
107 |         got[sorted(got.columns)],
108 |         want[sorted(want.columns)],
109 |         check_dtype=False,
110 |         check_index_type=False,
111 |         check_less_precise=True,
112 |     )
113 | 


--------------------------------------------------------------------------------
/altair_transform/transform/filter.py:
--------------------------------------------------------------------------------
  1 | from functools import singledispatch
  2 | from typing import Any
  3 | 
  4 | import altair as alt
  5 | import numpy as np
  6 | import pandas as pd
  7 | from .visitor import visit
  8 | from ..vegaexpr import eval_vegajs
  9 | 
 10 | 
 11 | @visit.register(alt.FilterTransform)
 12 | def visit_filter(transform: alt.FilterTransform, df: pd.DataFrame) -> pd.DataFrame:
 13 |     mask = eval_predicate(transform.filter, df).astype(bool)
 14 |     return df[mask].reset_index(drop=True)
 15 | 
 16 | 
 17 | def get_column(df: pd.DataFrame, predicate: Any) -> pd.Series:
 18 |     """Get the transformed column from the predicate."""
 19 |     if predicate.timeUnit is not alt.Undefined:
 20 |         raise NotImplementedError("timeUnit Transform in Predicates")
 21 |     return df[eval_value(predicate["field"])]
 22 | 
 23 | 
 24 | @singledispatch
 25 | def eval_predicate(predicate: Any, df: pd.DataFrame) -> pd.Series:
 26 |     raise NotImplementedError(f"Evaluating predicate of type {type(predicate)}")
 27 | 
 28 | 
 29 | @singledispatch
 30 | def eval_dict(predicate: dict, df: pd.DataFrame) -> pd.Series:
 31 |     transform = alt.FilterTrasform({"filter": predicate})
 32 |     return eval_predicate(transform.filter, df)
 33 | 
 34 | 
 35 | @eval_predicate.register(str)
 36 | def eval_string(predicate: str, df: pd.DataFrame) -> pd.Series:
 37 |     return df.apply(lambda datum: eval_vegajs(predicate, datum), axis=1)
 38 | 
 39 | 
 40 | @eval_predicate.register(alt.FieldEqualPredicate)
 41 | def eval_field_equal(predicate: alt.FieldEqualPredicate, df: pd.DataFrame) -> pd.Series:
 42 |     return get_column(df, predicate) == eval_value(predicate.equal)
 43 | 
 44 | 
 45 | @eval_predicate.register(alt.FieldRangePredicate)
 46 | def eval_field_range(predicate: alt.FieldRangePredicate, df: pd.DataFrame) -> pd.Series:
 47 |     min_, max_ = [eval_value(val) for val in predicate.range]
 48 |     column = get_column(df, predicate)
 49 |     if min_ is None:
 50 |         min_ = column.min()
 51 |     if max_ is None:
 52 |         max_ = column.max()
 53 |     return column.between(min_, max_, inclusive=True)
 54 | 
 55 | 
 56 | @eval_predicate.register(alt.FieldOneOfPredicate)
 57 | def eval_field_oneof(predicate: alt.FieldOneOfPredicate, df: pd.DataFrame) -> pd.Series:
 58 |     options = [eval_value(val) for val in predicate.oneOf]
 59 |     return get_column(df, predicate).isin(options)
 60 | 
 61 | 
 62 | @eval_predicate.register(alt.FieldLTPredicate)
 63 | def eval_field_lt(predicate: alt.FieldLTPredicate, df: pd.DataFrame) -> pd.Series:
 64 |     return get_column(df, predicate) < eval_value(predicate.lt)
 65 | 
 66 | 
 67 | @eval_predicate.register(alt.FieldLTEPredicate)
 68 | def eval_field_lte(predicate: alt.FieldLTEPredicate, df: pd.DataFrame) -> pd.Series:
 69 |     return get_column(df, predicate) <= eval_value(predicate.lte)
 70 | 
 71 | 
 72 | @eval_predicate.register(alt.FieldGTPredicate)
 73 | def eval_field_gt(predicate: alt.FieldGTPredicate, df: pd.DataFrame) -> pd.Series:
 74 |     return get_column(df, predicate) > eval_value(predicate.gt)
 75 | 
 76 | 
 77 | @eval_predicate.register(alt.FieldGTEPredicate)
 78 | def eval_field_gte(predicate: alt.FieldGTEPredicate, df: pd.DataFrame) -> pd.Series:
 79 |     return get_column(df, predicate) >= eval_value(predicate.gte)
 80 | 
 81 | 
 82 | @eval_predicate.register(alt.LogicalNotPredicate)
 83 | def eval_logical_not(predicate: alt.LogicalNotPredicate, df: pd.DataFrame) -> pd.Series:
 84 |     return ~eval_predicate(predicate["not"], df)
 85 | 
 86 | 
 87 | @eval_predicate.register(alt.LogicalAndPredicate)
 88 | def eval_logical_and(predicate: alt.LogicalAndPredicate, df: pd.DataFrame) -> pd.Series:
 89 |     return np.logical_and.reduce([eval_predicate(p, df) for p in predicate["and"]])
 90 | 
 91 | 
 92 | @eval_predicate.register(alt.LogicalOrPredicate)
 93 | def eval_logical_or(predicate: alt.LogicalOrPredicate, df: pd.DataFrame) -> pd.Series:
 94 |     return np.logical_or.reduce([eval_predicate(p, df) for p in predicate["or"]])
 95 | 
 96 | 
 97 | @singledispatch
 98 | def eval_value(value: Any) -> Any:
 99 |     return value
100 | 
101 | 
102 | @eval_value.register(alt.DateTime)
103 | def eval_datetime(value: alt.DateTime) -> pd.Series:
104 |     # TODO: implement datetime conversion & comparison
105 |     raise NotImplementedError("Evaluating alt.DateTime object")
106 | 
107 | 
108 | @eval_value.register(alt.SchemaBase)
109 | def eval_schemabase(value: alt.SchemaBase) -> dict:
110 |     return value.to_dict()
111 | 


--------------------------------------------------------------------------------
/altair_transform/core.py:
--------------------------------------------------------------------------------
  1 | """Core altair_transform routines."""
  2 | 
  3 | from typing import List, Union
  4 | 
  5 | import pandas as pd
  6 | import altair as alt
  7 | 
  8 | from altair_transform.transform import visit
  9 | from altair_transform.utils import to_dataframe
 10 | from altair_transform.extract import extract_transform
 11 | 
 12 | __all__ = ["apply", "extract_data", "transform_chart"]
 13 | 
 14 | 
 15 | def apply(
 16 |     df: pd.DataFrame,
 17 |     transform: Union[alt.Transform, List[alt.Transform]],
 18 |     inplace: bool = False,
 19 | ) -> pd.DataFrame:
 20 |     """Apply transform or transforms to dataframe.
 21 | 
 22 |     Parameters
 23 |     ----------
 24 |     df : pd.DataFrame
 25 |     transform : list|dict
 26 |         A transform specification or list of transform specifications.
 27 |         Each specification must be valid according to Altair's transform
 28 |         schema.
 29 |     inplace : bool
 30 |         If True, then dataframe may be modified in-place. Default: False.
 31 | 
 32 |     Returns
 33 |     -------
 34 |     df_transformed : pd.DataFrame
 35 |         The transformed dataframe.
 36 | 
 37 |     Example
 38 |     -------
 39 |     >>> import pandas as pd
 40 |     >>> data = pd.DataFrame({'x': range(5), 'y': list('ABCAB')})
 41 |     >>> chart = alt.Chart(data).transform_aggregate(sum_x='sum(x)', groupby=['y'])
 42 |     >>> apply(data, chart.transform)
 43 |        y  sum_x
 44 |     0  A      3
 45 |     1  B      5
 46 |     2  C      2
 47 |     """
 48 |     if not inplace:
 49 |         df = df.copy()
 50 |     if transform is alt.Undefined:
 51 |         return df
 52 |     return visit(transform, df)
 53 | 
 54 | 
 55 | def extract_data(
 56 |     chart: alt.Chart, apply_encoding_transforms: bool = True
 57 | ) -> pd.DataFrame:
 58 |     """Extract transformed data from a chart.
 59 | 
 60 |     This only works with data and transform defined at the
 61 |     top level of the chart.
 62 | 
 63 |     Parameters
 64 |     ----------
 65 |     chart : alt.Chart
 66 |         The chart instance from which the data and transform
 67 |         will be extracted
 68 |     apply_encoding_transforms : bool
 69 |         If True (default), then apply transforms specified within an
 70 |         encoding as well as those specified directly in the transforms
 71 |         attribute.
 72 | 
 73 |     Returns
 74 |     -------
 75 |     df_transformed : pd.DataFrame
 76 |         The extracted and transformed dataframe.
 77 | 
 78 |     Example
 79 |     -------
 80 |     >>> import pandas as pd
 81 |     >>> data = pd.DataFrame({'x': range(5), 'y': list('ABCAB')})
 82 |     >>> chart = alt.Chart(data).mark_bar().encode(x='sum(x)', y='y')
 83 |     >>> extract_data(chart)
 84 |        y  sum_x
 85 |     0  A      3
 86 |     1  B      5
 87 |     2  C      2
 88 |     """
 89 |     if apply_encoding_transforms:
 90 |         chart = extract_transform(chart)
 91 |     return apply(to_dataframe(chart.data, chart), chart.transform)
 92 | 
 93 | 
 94 | def transform_chart(
 95 |     chart: alt.Chart, extract_encoding_transforms: bool = True
 96 | ) -> alt.Chart:
 97 |     """Return a chart with the transformed data
 98 | 
 99 |     Parameters
100 |     ----------
101 |     chart : alt.Chart
102 |         The chart instance from which the data and transform
103 |         will be extracted.
104 |     extract_encoding_transforms : bool
105 |         If True (default), then also extract transforms from encodings.
106 | 
107 |     Returns
108 |     -------
109 |     chart_out : alt.Chart
110 |         A copy of the input chart with the transformed data.
111 | 
112 |     Example
113 |     -------
114 |     >>> import pandas as pd
115 |     >>> data = pd.DataFrame({'x': range(5), 'y': list('ABCAB')})
116 |     >>> chart = alt.Chart(data).mark_bar().encode(x='sum(x)', y='y')
117 |     >>> new_chart = transform_chart(chart)
118 |     >>> new_chart.data
119 |        y  sum_x
120 |     0  A      3
121 |     1  B      5
122 |     2  C      2
123 |     >>> new_chart.encoding
124 |     FacetedEncoding({
125 |       x: PositionFieldDef({
126 |         field: FieldName('sum_x'),
127 |         title: 'Sum of x',
128 |         type: StandardType('quantitative')
129 |       }),
130 |       y: PositionFieldDef({
131 |         field: FieldName('y'),
132 |         type: StandardType('nominal')
133 |       })
134 |     })
135 |     """
136 |     if extract_encoding_transforms:
137 |         chart = extract_transform(chart)
138 |     chart = chart.properties(data=extract_data(chart, apply_encoding_transforms=False))
139 |     chart.transform = alt.Undefined
140 |     return chart
141 | 


--------------------------------------------------------------------------------
/altair_transform/transform/tests/test_transform.py:
--------------------------------------------------------------------------------
  1 | import pytest
  2 | 
  3 | import numpy as np
  4 | import pandas as pd
  5 | from pandas.testing import assert_frame_equal
  6 | 
  7 | import altair_transform
  8 | from altair_transform.transform.aggregate import AGG_REPLACEMENTS
  9 | 
 10 | 
 11 | AGGREGATES = [
 12 |     "argmax",
 13 |     "argmin",
 14 |     "average",
 15 |     "count",
 16 |     "distinct",
 17 |     "max",
 18 |     "mean",
 19 |     "median",
 20 |     "min",
 21 |     "missing",
 22 |     "q1",
 23 |     "q3",
 24 |     "ci0",
 25 |     "ci1",
 26 |     "stderr",
 27 |     "stdev",
 28 |     "stdevp",
 29 |     "sum",
 30 |     "valid",
 31 |     "values",
 32 |     "variance",
 33 |     "variancep",
 34 | ]
 35 | 
 36 | AGG_SKIP = ["ci0", "ci1", "values"]  # These require scipy.
 37 | 
 38 | 
 39 | @pytest.fixture
 40 | def data():
 41 |     rand = np.random.RandomState(42)
 42 |     return pd.DataFrame(
 43 |         {
 44 |             "x": rand.randint(0, 100, 12),
 45 |             "y": rand.randint(0, 100, 12),
 46 |             "t": pd.date_range("2012-01-15", freq="M", periods=12),
 47 |             "i": range(12),
 48 |             "c": list("AAABBBCCCDDD"),
 49 |             "d": list("ABCABCABCABC"),
 50 |         }
 51 |     )
 52 | 
 53 | 
 54 | def test_calculate_transform(data):
 55 |     transform = {"calculate": "datum.x + datum.y", "as": "z"}
 56 |     out1 = altair_transform.apply(data, transform)
 57 | 
 58 |     out2 = data.copy()
 59 |     out2["z"] = data.x + data.y
 60 | 
 61 |     assert_frame_equal(out1, out2)
 62 | 
 63 | 
 64 | @pytest.mark.parametrize("groupby", [True, False])
 65 | @pytest.mark.parametrize("op", set(AGGREGATES) - set(AGG_SKIP))
 66 | def test_joinaggregate_transform(data, groupby, op):
 67 |     field = "x"
 68 |     col = "z"
 69 |     group = "c"
 70 | 
 71 |     transform = {"joinaggregate": [{"op": op, "field": field, "as": col}]}
 72 |     if groupby:
 73 |         transform["groupby"] = [group]
 74 | 
 75 |     op = AGG_REPLACEMENTS.get(op, op)
 76 |     out = altair_transform.apply(data, transform)
 77 | 
 78 |     def validate(group):
 79 |         return np.allclose(group[field].aggregate(op), group[col])
 80 | 
 81 |     if groupby:
 82 |         assert out.groupby(group).apply(validate).all()
 83 |     else:
 84 |         assert validate(out)
 85 | 
 86 | 
 87 | def test_quantile_values():
 88 |     np.random.seed(0)
 89 |     data = pd.DataFrame(
 90 |         {"x": np.random.randn(12), "C": np.random.choice(["A", "B"], 12)}
 91 |     )
 92 |     transform = {"quantile": "x", "groupby": ["C"], "as": ["p", "v"], "step": 0.1}
 93 |     # Copied from vega editor for above data/transform
 94 |     expected = pd.DataFrame(
 95 |         [
 96 |             ["A", 0.05, -0.853389779139604],
 97 |             ["A", 0.15, -0.6056135776659901],
 98 |             ["A", 0.25, -0.3578373761923762],
 99 |             ["A", 0.35, -0.12325942278589436],
100 |             ["A", 0.45, 0.04532729028492671],
101 |             ["A", 0.55, 0.21391400335574778],
102 |             ["A", 0.65, 0.38250071642656897],
103 |             ["A", 0.75, 0.7489619629456958],
104 |             ["A", 0.85, 1.1549981161544833],
105 |             ["A", 0.95, 1.5610342693632706],
106 |             ["B", 0.05, -0.016677003759505288],
107 |             ["B", 0.15, 0.15684925302119532],
108 |             ["B", 0.25, 0.336128799065637],
109 |             ["B", 0.35, 0.6476262524884882],
110 |             ["B", 0.45, 0.9543858525126119],
111 |             ["B", 0.55, 0.9744405491187167],
112 |             ["B", 0.65, 1.2402825216772193],
113 |             ["B", 0.75, 1.5575946277597235],
114 |             ["B", 0.85, 1.8468937659906184],
115 |             ["B", 0.95, 2.1102258760334363],
116 |         ],
117 |         columns=["C", "p", "v"],
118 |     )
119 |     out = altair_transform.apply(data, transform)
120 |     assert_frame_equal(out, expected)
121 | 
122 | 
123 | @pytest.mark.parametrize("N", [1, 5, 50])
124 | def test_sample_transform(data, N):
125 |     transform = {"sample": N}
126 |     out = altair_transform.apply(data, transform)
127 | 
128 |     # Ensure the shape is correct
129 |     assert out.shape == (min(N, data.shape[0]), data.shape[1])
130 | 
131 |     # Ensure the content are correct
132 |     assert_frame_equal(out, data.iloc[out.index])
133 | 
134 | 
135 | def test_multiple_transforms(data):
136 |     transform = [
137 |         {"calculate": "0.5 * (datum.x + datum.y)", "as": "xy_mean"},
138 |         {"filter": "datum.x < datum.xy_mean"},
139 |     ]
140 |     out1 = altair_transform.apply(data, transform)
141 |     out2 = data.copy()
142 |     out2["xy_mean"] = 0.5 * (data.x + data.y)
143 |     out2 = out2[out2.x < out2.xy_mean].reset_index(drop=True)
144 | 
145 |     assert_frame_equal(out1, out2)
146 | 


--------------------------------------------------------------------------------
/altair_transform/tests/test_vegaexpr.py:
--------------------------------------------------------------------------------
  1 | import datetime as dt
  2 | import pytest
  3 | import numpy as np
  4 | from altair_transform.vegaexpr import eval_vegajs, undefined, JSRegex
  5 | 
  6 | # Most parsing is tested in the parser; here we just test a sampling of the
  7 | # variables and functions defined in the vegaexpr namespace.
  8 | 
  9 | EXPRESSIONS = {
 10 |     "null": None,
 11 |     "true": True,
 12 |     "false": False,
 13 |     "/[A-Za-z0-9]+/": JSRegex("[A-Za-z0-9]+"),
 14 |     "/.*/i": JSRegex(".*", "i"),
 15 |     "{}[1]": undefined,
 16 |     "{}.foo": undefined,
 17 |     "[][0]": undefined,
 18 |     "2 * PI": 2 * np.pi,
 19 |     "1 / SQRT2": 1.0 / np.sqrt(2),
 20 |     "LOG2E + LN10": np.log2(np.e) + np.log(10),
 21 |     "isArray([1, 2, 3])": True,
 22 |     "isBoolean(false)": True,
 23 |     "isBoolean(true)": True,
 24 |     "isBoolean(1)": False,
 25 |     "isDate(datetime(2019, 1, 1))": True,
 26 |     "isDate('2019-01-01')": False,
 27 |     "isDefined(null)": True,
 28 |     "isDefined({}[1])": False,
 29 |     "isNumber(3.5)": True,
 30 |     "isNumber(now())": True,
 31 |     "isString('abc')": True,
 32 |     'isString("abc")': True,
 33 |     "isObject({a:2})": True,
 34 |     "isObject({'a':2})": True,
 35 |     "isRegExp(/[A-Z0-9]+/)": True,
 36 |     "isRegExp('[A-Z0-9]+')": False,
 37 |     "isValid(null)": False,
 38 |     "isValid(NaN)": False,
 39 |     "isValid({}[1])": False,
 40 |     "isValid(0)": True,
 41 |     "toBoolean(1)": True,
 42 |     "toBoolean(0)": False,
 43 |     "toDate('')": None,
 44 |     "toDate(null)": None,
 45 |     "toDate(1547510400000)": 1547510400000,
 46 |     "toDate('2019-01-15')": 1547510400000,
 47 |     "toNumber('1234.5')": 1234.5,
 48 |     "toNumber('')": None,
 49 |     "toNumber(null)": None,
 50 |     "toString(123)": "123",
 51 |     "toString(0.5)": "0.5",
 52 |     "toString('')": None,
 53 |     "toString(null)": None,
 54 |     "toString(123)": "123",
 55 |     "toString('123')": "123",
 56 |     'if(4 > PI, "yes", "no")': "yes",
 57 |     "pow(sin(PI), 2) + pow(cos(PI), 2)": 1,
 58 |     "floor(1.5) == ceil(0.5)": True,
 59 |     "max(1, 2, 3) == min(3, 4, 5)": True,
 60 |     "time(datetime(1546338896789))": 1546338896789,
 61 |     "isDate(datetime())": True,
 62 |     "datetime(1546329600000)": dt.datetime.fromtimestamp(1546329600),
 63 |     "datetime(2019, 0, 1)": dt.datetime(2019, 1, 1),
 64 |     "year(datetime(2019, 0, 1, 2, 34, 56, 789))": 2019,
 65 |     "quarter(datetime(2019, 0, 1, 2, 34, 56, 789))": 0,
 66 |     "month(datetime(2019, 0, 1, 2, 34, 56, 789))": 0,
 67 |     "date(datetime(2019, 0, 1, 2, 34, 56, 789))": 1,
 68 |     "day(datetime(2019, 0, 1, 2, 34, 56, 789))": 2,
 69 |     "hours(datetime(2019, 0, 1, 2, 34, 56, 789))": 2,
 70 |     "minutes(datetime(2019, 0, 1, 2, 34, 56, 789))": 34,
 71 |     "seconds(datetime(2019, 0, 1, 2, 34, 56, 789))": 56,
 72 |     "milliseconds(datetime(2019, 0, 1, 2, 34, 56, 789))": 789,
 73 |     "utc(2019, 0, 1, 2, 34, 56, 789)": 1546310096789,
 74 |     "utcyear(datetime(utc(2019, 0, 1, 2, 34, 56, 789)))": 2019,
 75 |     "utcquarter(datetime(utc(2019, 0, 1, 2, 34, 56, 789)))": 0,
 76 |     "utcmonth(datetime(utc(2019, 0, 1, 2, 34, 56, 789)))": 0,
 77 |     "utcdate(datetime(utc(2019, 0, 1, 2, 34, 56, 789)))": 1,
 78 |     "utcday(datetime(utc(2019, 0, 1, 2, 34, 56, 789)))": 2,
 79 |     "utchours(datetime(utc(2019, 0, 1, 2, 34, 56, 789)))": 2,
 80 |     "utcminutes(datetime(utc(2019, 0, 1, 2, 34, 56, 789)))": 34,
 81 |     "utcseconds(datetime(utc(2019, 0, 1, 2, 34, 56, 789)))": 56,
 82 |     "utcmilliseconds(datetime(utc(2019, 0, 1, 2, 34, 56, 789)))": 789,
 83 |     "parseInt('1234 years')": 1234,
 84 |     "parseInt('2A', 16)": 42,
 85 |     "parseFloat('  3.125 is close to pi')": 3.125,
 86 |     "indexof('ABCABC', 'C')": 2,
 87 |     "lastindexof('ABCABC', 'C')": 5,
 88 |     "length('ABCABC')": 6,
 89 |     "lower('AbC')": "abc",
 90 |     "pad('abc', 6, 'x', 'left')": "xxxabc",
 91 |     "pad('abc', 6, 'x', 'right')": "abcxxx",
 92 |     "pad('abc', 6, 'x', 'center')": "xabcxx",
 93 |     "replace('ABCDABCD', 'BC', 'xx')": "AxxDABCD",
 94 |     "replace('ABCDABCD', /[B-D]+/, 'xxx')": "AxxxABCD",
 95 |     "replace('ABCDABCD', /BC/g, 'xx')": "AxxDAxxD",
 96 |     "split('AB CD EF', ' ')": ["AB", "CD", "EF"],
 97 |     "substring('ABCDEF', 3, -1)": "ABC",
 98 |     "slice('ABCDEF', 3, -1)": "DE",
 99 |     "trim('   ABC   ')": "ABC",
100 |     "truncate('1234567', 4, 'right', 'x')": "123x",
101 |     "truncate('1234567', 4, 'left', 'x')": "x567",
102 |     "truncate('1234567', 4, 'center', 'x')": "12x7",
103 |     "upper('AbC')": "ABC",
104 |     "extent([5, {}[1], 2, null, 4, NaN, 1])": [1, 5],
105 |     "clampRange([5, 2], 1, 7)": [2, 5],
106 |     "clampRange([5, 2], 3, 7)": [3, 6],
107 |     "clampRange([5, 2], 0, 4)": [1, 4],
108 |     "clampRange([5, 2], 3, 4)": [3, 4],
109 |     "inrange(4, [3, 4])": True,
110 |     "inrange(4, [4, 5])": True,
111 |     "inrange(4, [5, 7])": False,
112 |     "join(['a', 'b', 'c'])": "a,b,c",
113 |     "join(['a', 'b', 'c'], '-')": "a-b-c",
114 |     "lerp([0, 50], 0.5)": 25.0,
115 |     "peek([1, 2, 3])": 3,
116 |     "reverse([1, 2, 3])": [3, 2, 1],
117 |     "sequence(3)": [0, 1, 2],
118 |     "sequence(1, 4)": [1, 2, 3],
119 |     "sequence(0, 2, 0.5)": [0, 0.5, 1, 1.5],
120 |     "slice([1, 2, 3, 4], 1, 3)": [2, 3],
121 |     "span([0, 2, 4])": 4,
122 |     "regexp('[A-Z]?','g')": JSRegex("[A-Z]?", "g"),
123 |     "test(/[A-Z]+/, '123ABC')": True,
124 |     "test(/[A-Z]+/y, '123ABC')": False,
125 | }
126 | 
127 | 
128 | @pytest.mark.parametrize("expression,expected", EXPRESSIONS.items())
129 | def test_vegajs_expressions(expression, expected):
130 |     result = eval_vegajs(expression)
131 |     if isinstance(result, float):
132 |         assert np.allclose(result, expected)
133 |     else:
134 |         assert result == expected
135 | 


--------------------------------------------------------------------------------
/altair_transform/driver.py:
--------------------------------------------------------------------------------
  1 | """Extract transformed data directly via a selenium webdriver."""
  2 | import io
  3 | import json
  4 | from typing import Any, Dict, List, Optional, Union
  5 | 
  6 | import altair as alt
  7 | import pandas as pd
  8 | 
  9 | JSON = Union[str, int, float, bool, None, Dict[str, Any], List[Any]]
 10 | JSONDict = Dict[str, JSON]
 11 | 
 12 | CDN_URL = "https://cdn.jsdelivr.net/npm/{package}@{version}"
 13 | 
 14 | HTML_TEMPLATE = """
 15 | <!DOCTYPE html>
 16 | <html>
 17 | <head>
 18 |   <title>Embedding Vega-Lite</title>
 19 |   <script src="{vega_url}"></script>
 20 |   <script src="{vegalite_url}"></script>
 21 |   <script src="{vegaembed_url}"></script>
 22 | </head>
 23 | <body>
 24 |   <div id="vis"></div>
 25 | </body>
 26 | </html>
 27 | """
 28 | 
 29 | EXTRACT_CODE = """
 30 | var spec = arguments[0];
 31 | var name = arguments[1];
 32 | var done = arguments[2];
 33 | 
 34 | vegaEmbed("#vis", spec, {"mode": "vega-lite"})
 35 |   .then(result => done({data: JSON.stringify(result.view.data(name))}))
 36 |   .catch(error => done({error: error.toString()}));
 37 | """
 38 | 
 39 | 
 40 | def _serialize(df: pd.DataFrame) -> JSONDict:
 41 |     """Serialize a dataframe to a JSON dict."""
 42 |     return json.loads(df.to_json(orient="table"))
 43 | 
 44 | 
 45 | def _load(serialized: JSONDict) -> pd.DataFrame:
 46 |     """Load a dataframe from a JSON dict."""
 47 |     return pd.read_json(io.StringIO(json.dumps(serialized)), orient="table")
 48 | 
 49 | 
 50 | def _extract_data(spec: JSONDict, name: str = "data_0") -> pd.DataFrame:
 51 |     """Extract named data from a Vega-Lite chart spec.
 52 | 
 53 |     Parameters
 54 |     ----------
 55 |     spec : dict
 56 |         The Vega-Lite specification containing the data to extract
 57 | 
 58 |     name : string
 59 |         The name of the data stream to extract
 60 | 
 61 |     Returns
 62 |     -------
 63 |     data : pd.DataFrame
 64 |         The extracted data
 65 |     """
 66 |     # Optional deps
 67 |     from selenium.common.exceptions import NoSuchElementException
 68 |     from altair_saver import SeleniumSaver
 69 |     from altair_viewer import get_bundled_script
 70 | 
 71 |     js_resources = {
 72 |         "vega.js": get_bundled_script("vega", alt.VEGA_VERSION),
 73 |         "vega-lite.js": get_bundled_script("vega-lite", alt.VEGALITE_VERSION),
 74 |         "vega-embed.js": get_bundled_script("vega-embed", alt.VEGAEMBED_VERSION),
 75 |     }
 76 |     html = HTML_TEMPLATE.format(
 77 |         vega_url="/vega.js",
 78 |         vegalite_url="/vega-lite.js",
 79 |         vegaembed_url="/vega-embed.js",
 80 |     )
 81 | 
 82 |     url = SeleniumSaver._serve(html, js_resources)
 83 |     driver_name = SeleniumSaver._select_webdriver(20)
 84 |     driver = SeleniumSaver._registry.get(driver_name, 20)
 85 | 
 86 |     driver.get("about:blank")
 87 |     driver.get(url)
 88 | 
 89 |     try:
 90 |         driver.find_element_by_id("vis")
 91 |     except NoSuchElementException:
 92 |         raise RuntimeError(f"Could not load {url}")
 93 | 
 94 |     data = driver.execute_async_script(EXTRACT_CODE, spec, name)
 95 | 
 96 |     if "error" in data:
 97 |         raise ValueError(f"Javascript Error: {data['error']}")
 98 | 
 99 |     return pd.DataFrame.from_records(json.loads(data["data"]))
100 | 
101 | 
102 | def apply(
103 |     df: pd.DataFrame,
104 |     transform: Union[
105 |         None, JSONDict, alt.Transform, List[Union[JSONDict, alt.Transform]]
106 |     ] = None,
107 | ) -> pd.DataFrame:
108 |     """Extract transformed data from a Javascript rendering.
109 | 
110 |     Parameters
111 |     ----------
112 |     df : pd.DataFrame
113 |     transform : list|dict
114 |         A transform specification or list of transform specifications.
115 |         Each specification must be valid according to Altair's transform
116 |         schema.
117 | 
118 |     Returns
119 |     -------
120 |     df_transformed : pd.DataFrame
121 |         The transformed dataframe.
122 |     """
123 |     if transform is None:
124 |         transform = []
125 |     elif not isinstance(transform, list):
126 |         transform = [transform]
127 |     chart = alt.Chart(df).mark_point()._add_transform(*transform)
128 |     with alt.data_transformers.enable(max_rows=None, consolidate_datasets=False):
129 |         spec = chart.to_dict()
130 |     return _extract_data(spec, "data_0")
131 | 
132 | 
133 | def get_tz_code() -> str:
134 |     """Get the timezone code used by chromedriver."""
135 |     # Optional deps
136 |     from selenium.common.exceptions import NoSuchElementException
137 |     from altair_saver import SeleniumSaver
138 | 
139 |     html = """<html><body><div id="vis"></div></body></html>"""
140 |     script = "arguments[0](Intl.DateTimeFormat().resolvedOptions().timeZone)"
141 |     url = SeleniumSaver._serve(html, {})
142 |     driver_name = SeleniumSaver._select_webdriver(20)
143 |     driver = SeleniumSaver._registry.get(driver_name, 20)
144 |     driver.get("about:blank")
145 |     driver.get(url)
146 |     try:
147 |         driver.find_element_by_id("vis")
148 |     except NoSuchElementException:
149 |         raise RuntimeError(f"Could not load {url}")
150 |     return driver.execute_async_script(script)
151 | 
152 | 
153 | def get_tz_offset(tz: Optional[str] = None) -> pd.Timedelta:
154 |     """Get the timezone offset between Python and Javascript for dates with the given timezone.
155 | 
156 |     Parameters
157 |     ----------
158 |     tz : string (optional)
159 |         The timezone of the input dates
160 | 
161 |     Returns
162 |     -------
163 |     offset : pd.Timedelta
164 |         The offset between the Javasript representation and the Python representation
165 |         of a date with the given timezone.
166 |     """
167 |     ts = pd.to_datetime("2012-01-01").tz_localize(tz)
168 |     df = pd.DataFrame({"t": [ts]})
169 |     out = apply(df, {"timeUnit": "year", "field": "t", "as": "year"})
170 | 
171 |     date_in = df.t[0]
172 |     date_out = pd.to_datetime(1e6 * out.t)[0].tz_localize(tz)
173 | 
174 |     return date_out - date_in
175 | 


--------------------------------------------------------------------------------
/altair_transform/extract.py:
--------------------------------------------------------------------------------
  1 | """Tools for extracting transforms from encodings"""
  2 | from collections import defaultdict
  3 | import copy
  4 | from typing import Any, Dict, List, Tuple
  5 | 
  6 | import altair as alt
  7 | 
  8 | _EncodingType = Dict[str, dict]
  9 | _SpecType = Dict[str, Any]
 10 | _TransformType = List[_SpecType]
 11 | 
 12 | 
 13 | def extract_transform(chart: alt.Chart) -> alt.Chart:
 14 |     """Extract transforms from encodings
 15 | 
 16 |     This takes a chart with transforms specified within encodings, and returns
 17 |     an equivalent chart with transforms specified separately in the ``transform``
 18 |     field.
 19 | 
 20 |     Parameters
 21 |     ----------
 22 |     chart : alt.Chart
 23 |         Input chart, which will not be modified
 24 | 
 25 |     Returns
 26 |     -------
 27 |     chart : alt.Chart
 28 |         A copy of the input chart with any encoding-specified transforms moved
 29 |         to the transforms-attribute
 30 | 
 31 |     Example
 32 |     -------
 33 |     >>> chart = alt.Chart('data.csv').mark_bar().encode(x='mean(x):Q', y='y:N')
 34 |     >>> new_chart = extract_transform(chart)
 35 |     >>> new_chart.transform
 36 |     [AggregateTransform({
 37 |       aggregate: [AggregatedFieldDef({
 38 |         as: FieldName('mean_x'),
 39 |         field: FieldName('x'),
 40 |         op: AggregateOp('mean')
 41 |       })],
 42 |       groupby: [FieldName('y')]
 43 |     })]
 44 |     >>> new_chart.encoding
 45 |     FacetedEncoding({
 46 |       x: PositionFieldDef({
 47 |         field: FieldName('mean_x'),
 48 |         title: 'Mean of x',
 49 |         type: StandardType('quantitative')
 50 |       }),
 51 |       y: PositionFieldDef({
 52 |         field: FieldName('y'),
 53 |         type: StandardType('nominal')
 54 |       })
 55 |     })
 56 |     """
 57 | 
 58 |     chart = chart.copy()
 59 |     encoding_dict = chart.encoding.copy().to_dict(context={"data": chart.data})
 60 |     encoding, transform = _encoding_to_transform(encoding_dict)
 61 |     if transform:
 62 |         chart.encoding = alt.FacetedEncoding.from_dict(encoding)
 63 |         if chart.transform is alt.Undefined:
 64 |             chart.transform = []
 65 |         chart.transform.extend(alt.Transform.from_dict(t) for t in transform)
 66 |     return chart
 67 | 
 68 | 
 69 | def _encoding_to_transform(
 70 |     encoding: _EncodingType,
 71 | ) -> Tuple[_EncodingType, _TransformType]:
 72 |     """Extract transforms from an encoding dict."""
 73 |     # TODO: what if one encoding has multiple transforms? Is this valid?
 74 |     by_category: Dict[str, _EncodingType] = defaultdict(dict)
 75 |     new_encoding: _EncodingType = {}
 76 |     for channel, spec in encoding.items():
 77 |         for key in ["impute", "bin", "aggregate", "timeUnit"]:
 78 |             if key in spec:
 79 |                 by_category[key][channel] = copy.deepcopy(spec)
 80 |                 break
 81 |         else:
 82 |             new_encoding[channel] = copy.deepcopy(spec)
 83 | 
 84 |     groupby: List[str] = [
 85 |         enc["field"] for enc in new_encoding.values() if "field" in enc
 86 |     ]
 87 |     transforms: _TransformType = []
 88 |     field: str = ""
 89 |     new_field: str = ""
 90 |     new_field2: str = ""
 91 | 
 92 |     for channel, spec in by_category["bin"].items():
 93 |         if spec["bin"] == "binned":
 94 |             new_encoding[channel] = spec
 95 |             if "field" in spec:
 96 |                 groupby.append(spec["field"])
 97 |             continue
 98 |         field = spec.pop("field")
 99 |         new_field = f"{field}_binned"
100 |         new_field2 = f"{field}_binned2"
101 |         needs_upper_limit: bool = (
102 |             channel in ["x", "y"]
103 |             and spec["type"] == "quantitative"
104 |             and f"{channel}2" not in encoding
105 |         )
106 |         bin_transform: _SpecType = {
107 |             "field": field,
108 |             "bin": spec.pop("bin"),
109 |             "as": [new_field, new_field2] if needs_upper_limit else new_field,
110 |         }
111 |         spec["field"] = new_field
112 |         spec.setdefault("title", f"{field} (binned)")
113 |         new_encoding[channel] = spec
114 |         groupby.append(new_field)
115 | 
116 |         if needs_upper_limit:
117 |             spec["bin"] = "binned"
118 |             new_encoding[f"{channel}2"] = {"field": new_field2}
119 |             groupby.append(new_field2)
120 |         transforms.append(bin_transform)
121 | 
122 |     for channel, spec in by_category["timeUnit"].items():
123 |         timeUnit: str = spec[
124 |             "timeUnit"
125 |         ]  # leave timeUnit in spec for the sake of formatting
126 |         field = spec.pop("field")
127 |         new_field = f"{timeUnit}_{field}"
128 |         spec["field"] = new_field
129 |         spec.setdefault("title", f"{field} ({timeUnit})")
130 |         new_encoding[channel] = spec
131 |         transforms.append({"timeUnit": timeUnit, "field": field, "as": new_field})
132 |         groupby.append(new_field)
133 | 
134 |     for channel, spec in by_category["impute"].items():
135 |         keychannel = "y" if channel == "x" else "x"
136 |         key = encoding.get(keychannel, {}).get("field", spec["field"])
137 |         impute_transform: _SpecType = spec.pop("impute")
138 |         impute_transform.update(
139 |             {
140 |                 "impute": spec["field"],
141 |                 "key": key,
142 |                 "groupby": [field for field in groupby if field != key],
143 |             }
144 |         )
145 |         new_encoding[channel] = spec
146 |         transforms.append(impute_transform)
147 | 
148 |     agg_transforms: _TransformType = []
149 |     for channel, spec in by_category["aggregate"].items():
150 |         aggregate: str = spec.pop("aggregate")
151 |         field = spec.pop("field", None)
152 |         new_field = "__count" if aggregate == "count" else f"{aggregate}_{field}"
153 |         agg_dict: Dict[str, str] = {"op": aggregate, "as": new_field}
154 |         if field is not None:
155 |             agg_dict["field"] = field
156 |         agg_transforms.append(agg_dict)
157 |         spec["field"] = new_field
158 |         spec.setdefault(
159 |             "title",
160 |             (
161 |                 "Count of Records"
162 |                 if aggregate == "count"
163 |                 else f"{aggregate.title()} of {field}"
164 |             ),
165 |         )
166 |         new_encoding[channel] = spec
167 |     if agg_transforms:
168 |         transform: Dict[str, list] = {"aggregate": agg_transforms}
169 |         if groupby:
170 |             transform["groupby"] = groupby
171 |         transforms.append(transform)
172 | 
173 |     return new_encoding, transforms
174 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # altair-transform
  2 | 
  3 | Python evaluation of Altair/Vega-Lite transforms.
  4 | 
  5 | [![build status](http://img.shields.io/travis/altair-viz/altair-transform/master.svg)](https://travis-ci.org/altair-viz/altair-transform)
  6 | [![github actions](https://github.com/altair-viz/altair-transform/workflows/build/badge.svg)](https://github.com/altair-viz/altair-transform/actions?query=workflow%3Abuild)
  7 | [![github actions](https://github.com/altair-viz/altair-transform/workflows/lint/badge.svg)](https://github.com/altair-viz/altair-transform/actions?query=workflow%3Alint)
  8 | [![code style black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
  9 | 
 10 | ``altair-transform`` requires Python 3.6 or later. Install with:
 11 | 
 12 |     $ pip install altair_transform
 13 | 
 14 | Altair-transform evaluates [Altair](http://altair-viz.github.io) and [Vega-Lite](http://vega.github.io/vega-lite)
 15 | transforms directly in Python. This can be useful in a number of contexts, illustrated in the examples below.
 16 | 
 17 | ## Example: Extracting Data
 18 | 
 19 | The Vega-Lite specification includes the ability to apply a
 20 | wide range of transformations to input data within the chart
 21 | specification. As an example, here is a sliding window average
 22 | of a Gaussian random walk, implemented in Altair:
 23 | 
 24 | ```python
 25 | import altair as alt
 26 | import numpy as np
 27 | import pandas as pd
 28 | 
 29 | rand = np.random.RandomState(12345)
 30 | 
 31 | df = pd.DataFrame({
 32 |     'x': np.arange(200),
 33 |     'y': rand.randn(200).cumsum()
 34 | })
 35 | 
 36 | points = alt.Chart(df).mark_point().encode(
 37 |     x='x:Q',
 38 |     y='y:Q'
 39 | )
 40 | 
 41 | line = alt.Chart(df).transform_window(
 42 |     ymean='mean(y)',
 43 |     sort=[alt.SortField('x')],
 44 |     frame=[5, 5]
 45 | ).mark_line(color='red').encode(
 46 |     x='x:Q',
 47 |     y='ymean:Q'
 48 | )
 49 | 
 50 | points + line
 51 | ```
 52 | ![Altair Visualization](https://raw.githubusercontent.com/altair-viz/altair-transform/master/images/random_walk.png)
 53 | 
 54 | Because the transform is encoded within the renderer, however, the
 55 | computed values are not directly accessible from the Python layer.
 56 | 
 57 | This is where ``altair_transform`` comes in. It includes a (nearly)
 58 | complete Python implementation of Vega-Lite's transform layer, so
 59 | that you can easily extract a pandas dataframe with the computed
 60 | values shown in the chart:
 61 | 
 62 | ```python
 63 | from altair_transform import extract_data
 64 | data = extract_data(line)
 65 | data.head()
 66 | ```
 67 | <table border="0" class="dataframe">
 68 |   <thead>
 69 |     <tr style="text-align: right;">
 70 |       <th></th>
 71 |       <th>x</th>
 72 |       <th>y</th>
 73 |       <th>ymean</th>
 74 |     </tr>
 75 |   </thead>
 76 |   <tbody>
 77 |     <tr>
 78 |       <th>0</th>
 79 |       <td>0</td>
 80 |       <td>-0.204708</td>
 81 |       <td>0.457749</td>
 82 |     </tr>
 83 |     <tr>
 84 |       <th>1</th>
 85 |       <td>1</td>
 86 |       <td>0.274236</td>
 87 |       <td>0.771093</td>
 88 |     </tr>
 89 |     <tr>
 90 |       <th>2</th>
 91 |       <td>2</td>
 92 |       <td>-0.245203</td>
 93 |       <td>1.041320</td>
 94 |     </tr>
 95 |     <tr>
 96 |       <th>3</th>
 97 |       <td>3</td>
 98 |       <td>-0.800933</td>
 99 |       <td>1.336943</td>
100 |     </tr>
101 |     <tr>
102 |       <th>4</th>
103 |       <td>4</td>
104 |       <td>1.164847</td>
105 |       <td>1.698085</td>
106 |     </tr>
107 |   </tbody>
108 | </table>
109 | 
110 | From here, you can work with the transformed data directly
111 | in Python.
112 | 
113 | ## Example: Pre-Aggregating Large Datasets
114 | 
115 | Altair creates chart specifications containing the full dataset.
116 | The advantage of this is that the data used to make the chart is entirely transparent; the disadvantage is that it causes issues as datasets grow large.
117 | To prevent users from inadvertently crashing their browsers by trying to send too much data to the frontend, Altair limits the data size by default.
118 | For example, a histogram of 20000 points:
119 | 
120 | ```python
121 | import altair as alt
122 | import pandas as pd
123 | import numpy as np
124 | 
125 | np.random.seed(12345)
126 | 
127 | df = pd.DataFrame({
128 |     'x': np.random.randn(20000)
129 | })
130 | chart = alt.Chart(df).mark_bar().encode(
131 |     alt.X('x', bin=True),
132 |     y='count()'
133 | )
134 | chart
135 | ```
136 | ```pyerr
137 | MaxRowsError: The number of rows in your dataset is greater than the maximum allowed (5000). For information on how to plot larger datasets in Altair, see the documentation
138 | ```
139 | There are several possible ways around this, as mentioned in Altair's [FAQ](https://altair-viz.github.io/user_guide/faq.html#maxrowserror-how-can-i-plot-large-datasets).
140 | Altiar-transform provides another option via the ``transform_chart()`` function, which will pre-transform the data according to the chart specification, so that the final chart specification holds the aggregated data rather than the full dataset:
141 | ```python
142 | from altair_transform import transform_chart
143 | new_chart = transform_chart(chart)
144 | new_chart
145 | ```
146 | ![Altair Visualization](https://raw.githubusercontent.com/altair-viz/altair-transform/master/images/histogram.png)
147 | 
148 | Examining the new chart specification, we can see that it contains the pre-aggregated dataset:
149 | ```python
150 | new_chart.data
151 | ```
152 | <table border="0" class="dataframe">
153 |   <thead>
154 |     <tr style="text-align: right;">
155 |       <th></th>
156 |       <th>x_binned</th>
157 |       <th>x_binned2</th>
158 |       <th>count</th>
159 |     </tr>
160 |   </thead>
161 |   <tbody>
162 |     <tr>
163 |       <th>0</th>
164 |       <td>-4.0</td>
165 |       <td>-3.0</td>
166 |       <td>29</td>
167 |     </tr>
168 |     <tr>
169 |       <th>1</th>
170 |       <td>-3.0</td>
171 |       <td>-2.0</td>
172 |       <td>444</td>
173 |     </tr>
174 |     <tr>
175 |       <th>2</th>
176 |       <td>-2.0</td>
177 |       <td>-1.0</td>
178 |       <td>2703</td>
179 |     </tr>
180 |     <tr>
181 |       <th>3</th>
182 |       <td>-1.0</td>
183 |       <td>0.0</td>
184 |       <td>6815</td>
185 |     </tr>
186 |     <tr>
187 |       <th>4</th>
188 |       <td>0.0</td>
189 |       <td>1.0</td>
190 |       <td>6858</td>
191 |     </tr>
192 |     <tr>
193 |       <th>5</th>
194 |       <td>1.0</td>
195 |       <td>2.0</td>
196 |       <td>2706</td>
197 |     </tr>
198 |     <tr>
199 |       <th>6</th>
200 |       <td>2.0</td>
201 |       <td>3.0</td>
202 |       <td>423</td>
203 |     </tr>
204 |     <tr>
205 |       <th>7</th>
206 |       <td>3.0</td>
207 |       <td>4.0</td>
208 |       <td>22</td>
209 |     </tr>
210 |   </tbody>
211 | </table>
212 | 
213 | ## Limitations
214 | 
215 | ``altair_transform`` currently works only for non-compound charts; that is, it cannot transform or extract data from layered, faceted, repeated, or concatenated charts.
216 | 
217 | There are also a number of less-used transform options that are not yet fully supported. These should explicitly raise a ``NotImplementedError`` if you attempt to use them.
218 | 


--------------------------------------------------------------------------------
/altair_transform/utils/_evaljs.py:
--------------------------------------------------------------------------------
  1 | """Functionality to evaluate contents of the ast"""
  2 | from functools import singledispatch, wraps
  3 | import operator
  4 | import re
  5 | from typing import Any, Dict, List, Union
  6 | 
  7 | from altair_transform.utils import ast, Parser
  8 | 
  9 | __all__ = ["evaljs", "undefined", "JSRegex"]
 10 | 
 11 | 
 12 | class _UndefinedType(object):
 13 |     def __repr__(self):
 14 |         return "undefined"
 15 | 
 16 | 
 17 | undefined = _UndefinedType()
 18 | 
 19 | 
 20 | class JSRegex:
 21 |     _flagmap: Dict[str, re.RegexFlag] = {
 22 |         "i": re.I,
 23 |         "m": re.M,
 24 |         "s": re.S,
 25 |         "u": re.U,
 26 |     }
 27 | 
 28 |     def __init__(self, pattern: str, flags: str = ""):
 29 |         self._pattern = pattern
 30 |         self._flags = flags
 31 |         self._regex = re.compile(pattern, self._reflags())
 32 | 
 33 |     def __eq__(self, other):
 34 |         if isinstance(other, JSRegex):
 35 |             return (self._pattern, self._flags) == (other._pattern, other._flags)
 36 | 
 37 |     def _reflags(self) -> re.RegexFlag:
 38 |         flags = re.RegexFlag(0)
 39 |         for key, flag in self._flagmap.items():
 40 |             if key in self._flags:
 41 |                 flags |= flag
 42 |         return flags
 43 | 
 44 |     def test(self, string: str) -> bool:
 45 |         if "y" in self._flags:
 46 |             return bool(self._regex.match(string))
 47 |         else:
 48 |             return bool(self._regex.search(string))
 49 | 
 50 |     def replace(self, string: str, replacement: str) -> str:
 51 |         if "g" in self._flags:
 52 |             return self._regex.sub(replacement, string)
 53 |         else:
 54 |             return self._regex.sub(replacement, string, count=1)
 55 | 
 56 | 
 57 | def evaljs(expression: Union[str, ast.Expr], namespace: dict = None) -> Any:
 58 |     """Evaluate a javascript expression, optionally with a namespace."""
 59 |     if isinstance(expression, str):
 60 |         parser = Parser()
 61 |         expression = parser.parse(expression)
 62 |     return visit(expression, namespace or {})
 63 | 
 64 | 
 65 | @singledispatch
 66 | def visit(obj: Any, namespace: dict) -> Any:
 67 |     return obj
 68 | 
 69 | 
 70 | @visit.register(ast.Expr)
 71 | def _visit_expr(obj: ast.Expr, namespace: dict) -> Any:
 72 |     return obj.value
 73 | 
 74 | 
 75 | @visit.register(ast.BinOp)
 76 | def _visit_binop(obj: ast.BinOp, namespace: dict) -> Any:
 77 |     if obj.op not in BINARY_OPERATORS:
 78 |         raise NotImplementedError(f"Binary Operator A {obj.op} B")
 79 |     op = BINARY_OPERATORS[obj.op]
 80 |     return op(visit(obj.lhs, namespace), visit(obj.rhs, namespace))
 81 | 
 82 | 
 83 | @visit.register(ast.UnOp)
 84 | def _visit_unop(obj: ast.UnOp, namespace: dict) -> Any:
 85 |     if obj.op not in UNARY_OPERATORS:
 86 |         raise NotImplementedError(f"Unary Operator {obj.op}x")
 87 |     op = UNARY_OPERATORS[obj.op]
 88 |     return op(visit(obj.rhs, namespace))
 89 | 
 90 | 
 91 | @visit.register(ast.TernOp)
 92 | def _visit_ternop(obj: ast.TernOp, namespace: dict) -> Any:
 93 |     if obj.op not in TERNARY_OPERATORS:
 94 |         raise NotImplementedError(f"Ternary Operator A {obj.op[0]} B {obj.op[1]} C")
 95 |     op = TERNARY_OPERATORS[obj.op]
 96 |     return op(
 97 |         visit(obj.lhs, namespace), visit(obj.mid, namespace), visit(obj.rhs, namespace)
 98 |     )
 99 | 
100 | 
101 | @visit.register(ast.Number)
102 | def _visit_number(obj: ast.Number, namespace: dict) -> Any:
103 |     return obj.value
104 | 
105 | 
106 | @visit.register(ast.String)
107 | def _visit_string(obj: ast.String, namespace: dict) -> Any:
108 |     return obj.value
109 | 
110 | 
111 | @visit.register(ast.Regex)
112 | def _visit_regex(obj: ast.Regex, namespace: dict) -> JSRegex:
113 |     return JSRegex(obj.value["pattern"], obj.value["flags"])
114 | 
115 | 
116 | @visit.register(ast.Global)
117 | def _visit_global(obj: ast.Global, namespace: dict) -> Any:
118 |     if obj.name not in namespace:
119 |         raise NameError("{0} is not a valid name".format(obj.name))
120 |     return namespace[obj.name]
121 | 
122 | 
123 | @visit.register(ast.Name)
124 | def _visit_name(obj: ast.Name, namespace: dict) -> str:
125 |     return obj.name
126 | 
127 | 
128 | @visit.register(ast.List)
129 | def _visit_list(obj: ast.List, namespace: dict) -> List:
130 |     return [visit(entry, namespace) for entry in obj.entries]
131 | 
132 | 
133 | @visit.register(ast.Object)
134 | def _visit_object(obj: ast.Object, namespace: dict) -> Any:
135 |     def _visit(entry):
136 |         if isinstance(entry, tuple):
137 |             return tuple(visit(e, namespace) for e in entry)
138 |         if isinstance(entry, ast.Name):
139 |             return (visit(entry, namespace), visit(ast.Global(entry.name), namespace))
140 | 
141 |     return dict(_visit(entry) for entry in obj.entries)
142 | 
143 | 
144 | @visit.register(ast.Attr)
145 | def _visit_attr(obj: ast.Attr, namespace: dict) -> Any:
146 |     obj_ = visit(obj.obj, namespace)
147 |     attr = visit(obj.attr, namespace)
148 |     if isinstance(obj_, dict):
149 |         return obj_.get(attr, undefined)
150 |     else:
151 |         return getattr(obj_, attr, undefined)
152 | 
153 | 
154 | @visit.register(ast.Item)
155 | def _visit_item(obj: ast.Item, namespace: dict) -> Any:
156 |     obj_ = visit(obj.obj, namespace)
157 |     item = visit(obj.item, namespace)
158 |     if isinstance(obj_, list) and isinstance(item, float):
159 |         item = int(item)
160 |     try:
161 |         return obj_[item]
162 |     except (KeyError, IndexError):
163 |         return undefined
164 | 
165 | 
166 | @visit.register(ast.Func)
167 | def _visit_func(obj: ast.Func, namespace: dict) -> Any:
168 |     func = visit(obj.func, namespace)
169 |     args = [visit(arg, namespace) for arg in obj.args]
170 |     return func(*args)
171 | 
172 | 
173 | def int_inputs(func):
174 |     @wraps(func)
175 |     def wrapper(*args):
176 |         return float(func(*map(int, args)))
177 | 
178 |     return wrapper
179 | 
180 | 
181 | @int_inputs
182 | def zerofill_rshift(lhs: int, rhs: int) -> int:
183 |     if lhs < 0:
184 |         lhs = lhs + 0x100000000
185 |     return lhs >> rhs
186 | 
187 | 
188 | # TODO: do implicit type conversions ugh...
189 | UNARY_OPERATORS = {
190 |     "~": int_inputs(operator.inv),
191 |     "-": operator.neg,
192 |     "+": operator.pos,
193 |     "!": operator.not_,
194 | }
195 | 
196 | 
197 | BINARY_OPERATORS = {
198 |     "+": operator.add,
199 |     "-": operator.sub,
200 |     "*": operator.mul,
201 |     "/": operator.truediv,
202 |     "**": operator.pow,
203 |     "%": operator.mod,
204 |     "&": int_inputs(operator.and_),
205 |     "|": int_inputs(operator.or_),
206 |     "^": int_inputs(operator.xor),
207 |     "<<": int_inputs(operator.lshift),
208 |     ">>": int_inputs(operator.rshift),
209 |     ">>>": zerofill_rshift,
210 |     "<": operator.lt,
211 |     "<=": operator.le,
212 |     ">": operator.gt,
213 |     ">=": operator.ge,
214 |     "==": operator.eq,
215 |     "===": operator.eq,
216 |     "!=": operator.ne,
217 |     "!==": operator.ne,
218 |     "&&": lambda a, b: a and b,
219 |     "||": lambda a, b: a or b,
220 | }
221 | 
222 | 
223 | TERNARY_OPERATORS = {("?", ":"): lambda a, b, c: b if a else c}
224 | 


--------------------------------------------------------------------------------
/altair_transform/tests/test_extract.py:
--------------------------------------------------------------------------------
  1 | import pytest
  2 | 
  3 | from altair_transform.extract import _encoding_to_transform
  4 | from typing import Any, Dict, List, NamedTuple
  5 | 
  6 | 
  7 | class _TestCase(NamedTuple):
  8 |     encoding: Dict[str, Dict[str, Any]]
  9 |     expected_encoding: Dict[str, Dict[str, Any]]
 10 |     expected_transform: List[Dict[str, Any]]
 11 | 
 12 | 
 13 | @pytest.mark.parametrize(
 14 |     _TestCase._fields,
 15 |     [
 16 |         _TestCase(
 17 |             encoding={"x": {"aggregate": "count", "type": "quantitative"}},
 18 |             expected_encoding={
 19 |                 "x": {
 20 |                     "field": "__count",
 21 |                     "type": "quantitative",
 22 |                     "title": "Count of Records",
 23 |                 }
 24 |             },
 25 |             expected_transform=[{"aggregate": [{"op": "count", "as": "__count"}]}],
 26 |         ),
 27 |         _TestCase(
 28 |             encoding={"x": {"field": "foo", "bin": True, "type": "ordinal"}},
 29 |             expected_encoding={
 30 |                 "x": {"field": "foo_binned", "type": "ordinal", "title": "foo (binned)"}
 31 |             },
 32 |             expected_transform=[{"bin": True, "field": "foo", "as": "foo_binned"}],
 33 |         ),
 34 |         _TestCase(
 35 |             encoding={
 36 |                 "x": {"aggregate": "sum", "field": "people", "type": "quantitative"},
 37 |                 "y": {"field": "age", "type": "ordinal"},
 38 |             },
 39 |             expected_encoding={
 40 |                 "x": {
 41 |                     "field": "sum_people",
 42 |                     "type": "quantitative",
 43 |                     "title": "Sum of people",
 44 |                 },
 45 |                 "y": {"field": "age", "type": "ordinal"},
 46 |             },
 47 |             expected_transform=[
 48 |                 {
 49 |                     "aggregate": [{"op": "sum", "field": "people", "as": "sum_people"}],
 50 |                     "groupby": ["age"],
 51 |                 }
 52 |             ],
 53 |         ),
 54 |         _TestCase(
 55 |             encoding={
 56 |                 "x": {"aggregate": "count", "type": "quantitative"},
 57 |                 "y": {"field": "age", "bin": {"maxbins": 10}, "type": "quantitative"},
 58 |             },
 59 |             expected_encoding={
 60 |                 "x": {
 61 |                     "field": "__count",
 62 |                     "type": "quantitative",
 63 |                     "title": "Count of Records",
 64 |                 },
 65 |                 "y": {
 66 |                     "field": "age_binned",
 67 |                     "bin": "binned",
 68 |                     "type": "quantitative",
 69 |                     "title": "age (binned)",
 70 |                 },
 71 |                 "y2": {"field": "age_binned2"},
 72 |             },
 73 |             expected_transform=[
 74 |                 {
 75 |                     "bin": {"maxbins": 10},
 76 |                     "field": "age",
 77 |                     "as": ["age_binned", "age_binned2"],
 78 |                 },
 79 |                 {
 80 |                     "aggregate": [{"op": "count", "as": "__count"}],
 81 |                     "groupby": ["age_binned", "age_binned2"],
 82 |                 },
 83 |             ],
 84 |         ),
 85 |         _TestCase(
 86 |             encoding={
 87 |                 "x": {"aggregate": "count", "type": "quantitative"},
 88 |                 "y": {"field": "age", "bin": True, "type": "ordinal"},
 89 |             },
 90 |             expected_encoding={
 91 |                 "x": {
 92 |                     "field": "__count",
 93 |                     "type": "quantitative",
 94 |                     "title": "Count of Records",
 95 |                 },
 96 |                 "y": {
 97 |                     "field": "age_binned",
 98 |                     "type": "ordinal",
 99 |                     "title": "age (binned)",
100 |                 },
101 |             },
102 |             expected_transform=[
103 |                 {"bin": True, "field": "age", "as": "age_binned"},
104 |                 {
105 |                     "aggregate": [{"op": "count", "as": "__count"}],
106 |                     "groupby": ["age_binned"],
107 |                 },
108 |             ],
109 |         ),
110 |         _TestCase(
111 |             encoding={
112 |                 "x": {"aggregate": "count", "field": "x", "type": "quantitative"},
113 |                 "y": {"field": "y", "timeUnit": "day", "type": "ordinal"},
114 |             },
115 |             expected_encoding={
116 |                 "x": {
117 |                     "field": "__count",
118 |                     "type": "quantitative",
119 |                     "title": "Count of Records",
120 |                 },
121 |                 "y": {
122 |                     "field": "day_y",
123 |                     "timeUnit": "day",
124 |                     "type": "ordinal",
125 |                     "title": "y (day)",
126 |                 },
127 |             },
128 |             expected_transform=[
129 |                 {"timeUnit": "day", "field": "y", "as": "day_y"},
130 |                 {
131 |                     "aggregate": [{"field": "x", "op": "count", "as": "__count"}],
132 |                     "groupby": ["day_y"],
133 |                 },
134 |             ],
135 |         ),
136 |         _TestCase(
137 |             encoding={
138 |                 "x": {"field": "xval", "type": "ordinal"},
139 |                 "y": {
140 |                     "field": "yval",
141 |                     "type": "quantitative",
142 |                     "impute": {"value": 0, "method": "mean", "keyvals": [1, 2, 3]},
143 |                 },
144 |                 "color": {"field": "cval", "type": "nominal"},
145 |             },
146 |             expected_encoding={
147 |                 "x": {"field": "xval", "type": "ordinal"},
148 |                 "y": {"field": "yval", "type": "quantitative"},
149 |                 "color": {"field": "cval", "type": "nominal"},
150 |             },
151 |             expected_transform=[
152 |                 {
153 |                     "impute": "yval",
154 |                     "key": "xval",
155 |                     "keyvals": [1, 2, 3],
156 |                     "groupby": ["cval"],
157 |                     "value": 0,
158 |                     "method": "mean",
159 |                 }
160 |             ],
161 |         ),
162 |         _TestCase(
163 |             encoding={
164 |                 "x": {"field": "xval", "bin": "binned", "type": "ordinal"},
165 |                 "y": {"aggregate": "count", "type": "quantitative"},
166 |             },
167 |             expected_encoding={
168 |                 "x": {"field": "xval", "bin": "binned", "type": "ordinal"},
169 |                 "y": {
170 |                     "field": "__count",
171 |                     "title": "Count of Records",
172 |                     "type": "quantitative",
173 |                 },
174 |             },
175 |             expected_transform=[
176 |                 {"aggregate": [{"op": "count", "as": "__count"}], "groupby": ["xval"]}
177 |             ],
178 |         ),
179 |     ],
180 | )
181 | def test_extract_simple_aggregate(encoding, expected_encoding, expected_transform):
182 |     encoding, transform = _encoding_to_transform(encoding)
183 |     assert encoding == expected_encoding
184 |     assert transform == expected_transform
185 | 


--------------------------------------------------------------------------------
/altair_transform/transform/vega_utils.py:
--------------------------------------------------------------------------------
  1 | """Python ports of vega utilities"""
  2 | 
  3 | from typing import Callable, List, Optional, Tuple, Union
  4 | import numpy as np
  5 | import math
  6 | 
  7 | 
  8 | # subdivide up to accuracy of 0.1 degrees
  9 | MIN_RADIANS = 0.1 * math.pi / 180
 10 | 
 11 | Number = Union[int, float]
 12 | 
 13 | 
 14 | def calculate_bins(
 15 |     extent: Tuple[Number, Number],
 16 |     anchor: Optional[Number] = None,
 17 |     base: Number = 10,
 18 |     divide: List[Number] = [5, 2],
 19 |     maxbins: Number = 10,
 20 |     minstep: Number = 0,
 21 |     nice: bool = True,
 22 |     step: Optional[Number] = None,
 23 |     steps: Optional[List[Number]] = None,
 24 |     span: Optional[Number] = None,
 25 | ) -> np.ndarray:
 26 |     """Calculate the bins for a given dataset.
 27 | 
 28 |     This is a Python translation of the Javascript function available at
 29 |     https://github.com/vega/vega/blob/v5.9.1/packages/vega-statistics/src/bin.js
 30 | 
 31 |     Parameters
 32 |     ----------
 33 |     extent: Tuple[Number, Number]
 34 |         A two-element ([min, max]) array indicating the range of desired bin values.
 35 |     anchor: Number
 36 |         A value in the binned domain at which to anchor the bins, shifting the bin boundaries
 37 |         if necessary to ensure that a boundary aligns with the anchor value.
 38 |         Default value: the minimum bin extent value
 39 |     base: Number
 40 |         The number base to use for automatic bin determination (default is base 10).
 41 |         Default value: 10
 42 |     divide: List[Number]
 43 |         Scale factors indicating allowable subdivisions. The default value is [5, 2],
 44 |         which indicates that for base 10 numbers (the default base), the method may
 45 |         consider dividing bin sizes by 5 and/or 2. For example, for an initial step
 46 |         size of 10, the method can check if bin sizes of 2 (= 10/5), 5 (= 10/2),
 47 |         or 1 (= 10/(5*2)) might also satisfy the given constraints.
 48 |         Default value: [5, 2]
 49 |     maxbins: Number
 50 |         Maximum number of bins.
 51 |         Default value: 10
 52 |     minstep: Number
 53 |         A minimum allowable step size (particularly useful for integer values).
 54 |     nice: boolean
 55 |         If true, attempts to make the bin boundaries use human-friendly boundaries,
 56 |         such as multiples of ten.
 57 |         Default value: True
 58 |     step: Number
 59 |         An exact step size to use between bins.
 60 |         Note: If provided, options such as maxbins will be ignored.
 61 |     steps: List[Number]
 62 |         An array of allowable step sizes to choose from.
 63 | 
 64 |     Returns
 65 |     -------
 66 |     bins : numpy.ndarray
 67 |         array of bin edges.
 68 |     """
 69 |     start, stop, step = _bin(
 70 |         extent=extent,
 71 |         base=base,
 72 |         divide=divide,
 73 |         maxbins=maxbins,
 74 |         minstep=minstep,
 75 |         nice=nice,
 76 |         step=step,
 77 |         steps=steps,
 78 |         span=span,
 79 |     )
 80 | 
 81 |     N = math.ceil((stop - start) / step)
 82 | 
 83 |     if anchor is not None:
 84 |         start += anchor - (start + step * math.floor((anchor - start) / step))
 85 | 
 86 |     return start + step * np.arange(N + 1)
 87 | 
 88 | 
 89 | def _bin(
 90 |     extent: Tuple[Number, Number],
 91 |     base: Number = 20,
 92 |     divide: List[Number] = [5, 2],
 93 |     maxbins: Number = 10,
 94 |     minstep: Number = 0,
 95 |     nice: bool = True,
 96 |     step: Optional[Number] = None,
 97 |     steps: Optional[List[Number]] = None,
 98 |     span: Optional[Number] = None,
 99 | ) -> Tuple[Number, Number, Number]:
100 |     """Calculate the bins for a given dataset.
101 | 
102 |     This is a Python translation of the Javascript function available at
103 |     https://github.com/vega/vega/blob/v5.9.1/packages/vega-statistics/src/bin.js
104 |     """
105 |     min_, max_ = extent
106 |     assert max_ > min_
107 |     span = span or (max_ - min_) or abs(min_) or 1
108 |     logb = math.log(base)
109 | 
110 |     if step is not None:
111 |         # If step is provided, we use it.
112 |         pass
113 |     elif steps is not None:
114 |         # If steps provided, limit choice to acceptable sizes.
115 |         v = span / maxbins
116 |         steps = [step for step in steps if step < v]
117 |         step = max(steps) if steps else steps[0]
118 |     else:
119 |         # Otherwise use span to determine step size.
120 |         level = math.ceil(math.log(maxbins) / logb)
121 |         step = max(minstep, pow(base, round(math.log(span) / logb) - level))
122 | 
123 |         # increase step size if too many bins
124 |         while math.ceil(span / step) > maxbins:
125 |             step *= base
126 | 
127 |         # decrease step size if allowed
128 |         for div in divide:
129 |             v = step / div
130 |             if v >= minstep and span / v <= maxbins:
131 |                 step = v
132 | 
133 |     # update precision of min_ and max_
134 |     v = math.log(step)
135 |     precision = 0 if v >= 0 else math.floor(-v / logb) + 1
136 |     eps = pow(base, -precision - 1)
137 |     if nice:
138 |         v = math.floor(min_ / step + eps) * step
139 |         min_ = v - step if min_ < v else v
140 |         max_ = math.ceil(max_ / step) * step
141 | 
142 |     start = min_
143 |     stop = max_ if max_ != min_ else min_ + step
144 |     return start, stop, step
145 | 
146 | 
147 | def adaptive_sample(
148 |     f: Callable[[np.ndarray], np.ndarray],
149 |     extent: Tuple[float, float],
150 |     min_steps: int = 25,
151 |     max_steps: int = 200,
152 | ) -> Tuple[np.ndarray, np.ndarray]:
153 |     """Adaptive sampling of a function.
154 | 
155 |     This is a Python translation of the Javascript function available at
156 |     https://github.com/vega/vega/blob/v5.9.1/packages/vega-statistics/src/sampleCurve.js
157 | 
158 |     Parameters
159 |     ----------
160 |     f : callable
161 |         Function to be adaptively sampled
162 |     extent : tuple
163 |         The extent of the sampling
164 |     min_steps : int
165 |         The minimum number of steps to consider
166 |     max_steps : int
167 |         The maximum number of steps to consider
168 | 
169 |     Returns
170 |     -------
171 |     x, y : np.ndarray
172 |         The sampled function
173 |     """
174 | 
175 |     min_x, max_x = extent
176 |     span = max_x - min_x
177 |     stop = span / max_steps
178 | 
179 |     # sample minimum points on uniform grid
180 |     x = min_x + (np.arange(min_steps + 1) / min_steps) * span
181 |     y = f(x)
182 | 
183 |     if min_steps == max_steps:
184 |         # no adaptation, sample uniform grid directly and return
185 |         return x, y
186 | 
187 |     # move on to perform adaptive refinement
188 |     start_grid = list(zip(x, y))
189 |     prev, next_ = start_grid[:1], start_grid[:0:-1]
190 | 
191 |     while next_:
192 |         p0, p1 = prev[-1], next_[-1]
193 | 
194 |         # midpoint for potential curve subdivision
195 |         xm = (p0[0] + p1[0]) / 2
196 |         pm = (xm, f(xm))
197 | 
198 |         if pm[0] - p0[0] >= stop and _angleDelta(p0, pm, p1) > MIN_RADIANS:
199 |             # maximum resolution has not yet been met, and
200 |             # subdivision midpoint sufficiently different from endpoint
201 |             # save subdivision, push midpoint onto the visitation stack
202 |             next_.append(pm)
203 |         else:
204 |             # subdivision midpoint sufficiently similar to endpoint
205 |             # skip subdivision, store endpoint, move to next point on the stack
206 |             prev.append(p1)
207 |             next_.pop()
208 |     out = np.array(prev)
209 |     return out[:, 0], out[:, 1]
210 | 
211 | 
212 | def _angleDelta(
213 |     p: Tuple[float, float], q: Tuple[float, float], r: Tuple[float, float]
214 | ) -> float:
215 |     a0 = np.arctan2(r[1] - p[1], r[0] - p[0])
216 |     a1 = np.arctan2(q[1] - p[1], q[0] - p[0])
217 |     return abs(a0 - a1)
218 | 


--------------------------------------------------------------------------------
/altair_transform/transform/regression.py:
--------------------------------------------------------------------------------
  1 | import abc
  2 | from typing import Dict, Optional, Tuple, Type
  3 | 
  4 | import altair as alt
  5 | import numpy as np
  6 | from numpy.polynomial import Polynomial
  7 | import pandas as pd
  8 | from .visitor import visit
  9 | from .vega_utils import adaptive_sample
 10 | 
 11 | 
 12 | def _ensure_length(coef: np.ndarray, k: int) -> np.ndarray:
 13 |     return np.hstack([coef, np.zeros(k - len(coef), dtype=coef.dtype)])
 14 | 
 15 | 
 16 | @visit.register(alt.RegressionTransform)
 17 | def visit_regression(
 18 |     transform: alt.RegressionTransform, df: pd.DataFrame
 19 | ) -> pd.DataFrame:
 20 |     transform = transform.to_dict()
 21 |     reg = transform["regression"]
 22 |     on = transform["on"]
 23 |     extent = transform.get("extent")
 24 |     method = transform.get("method", "linear")
 25 |     as_ = transform.get("as", (on, reg))
 26 |     groupby = transform.get("groupby")
 27 |     order = transform.get("order", 3)
 28 |     params = transform.get("params", False)
 29 | 
 30 |     models: Dict[str, Type[Model]] = {
 31 |         "exp": ExpModel,
 32 |         "linear": LinearModel,
 33 |         "log": LogModel,
 34 |         "poly": PolyModel,
 35 |         "pow": PowModel,
 36 |         "quad": QuadModel,
 37 |     }
 38 | 
 39 |     if method not in models:
 40 |         raise NotImplementedError(f"method={method}")
 41 | 
 42 |     M = models[method]
 43 |     model = M(on=on, reg=reg, extent=extent, as_=as_, order=order)
 44 | 
 45 |     if params:
 46 |         if groupby:
 47 |             params = df.groupby(groupby).apply(model.params)
 48 |             params["keys"] = [list(p)[:-1] for p in params.index]
 49 |             return params.reset_index(drop=True)
 50 |         else:
 51 |             return model.params(df)
 52 |     else:
 53 |         if groupby:
 54 |             return (
 55 |                 df.groupby(groupby)
 56 |                 .apply(model.predict)
 57 |                 .reset_index(groupby)
 58 |                 .reset_index(drop=True)
 59 |             )
 60 |         else:
 61 |             return model.predict(df)
 62 | 
 63 | 
 64 | class Model(metaclass=abc.ABCMeta):
 65 |     _coef: Optional[np.ndarray]
 66 | 
 67 |     def __init__(
 68 |         self,
 69 |         reg: str,
 70 |         on: str,
 71 |         extent: Optional[Tuple[float, float]],
 72 |         as_: Tuple[str, str],
 73 |         order: int,
 74 |     ):
 75 |         self._reg = reg
 76 |         self._on = on
 77 |         self._extent = extent
 78 |         self._as = as_
 79 |         self._order = order
 80 | 
 81 |     def params(self, df: pd.DataFrame) -> pd.DataFrame:
 82 |         """Return a dataframe with model parameters and r-square values.
 83 | 
 84 |         Parameters
 85 |         ----------
 86 |         df : pd.DataFrame
 87 |             The input data to which the model will be fit.
 88 | 
 89 |         Returns
 90 |         -------
 91 |         coef : pd.DataFrame
 92 |             DataFrame with model fit results.
 93 |         """
 94 |         x = df[self._on].values
 95 |         y = df[self._reg].values
 96 |         self._fit(x, y)
 97 |         SS_tot = ((y - y.mean()) ** 2).sum()
 98 |         SS_res = ((y - self._predict(x)) ** 2).sum()
 99 |         rsquare = 1 - SS_res / SS_tot
100 |         return pd.DataFrame({"coef": [list(self._params())], "rSquared": [rsquare]})
101 | 
102 |     def predict(self, df: pd.DataFrame) -> pd.DataFrame:
103 |         """Return the fit model
104 | 
105 |         Parameters
106 |         ----------
107 |         df : pd.DataFrame
108 |             The input data to which the model will be fit.
109 | 
110 |         Returns
111 |         -------
112 |         model : pd.DataFrame
113 |             DataFrame with model fit results.
114 |         """
115 |         self._fit(df[self._on].values, df[self._reg].values)
116 |         x, y = self._grid(df)
117 |         on, reg = self._as
118 |         return pd.DataFrame({on: x, reg: y})
119 | 
120 |     def _grid(self, df: pd.DataFrame) -> Tuple[np.ndarray, np.ndarray]:
121 |         extent = self._extent_from_data(df)
122 |         return adaptive_sample(self._predict, extent)
123 | 
124 |     def _extent_from_data(self, df: pd.DataFrame) -> Tuple[float, float]:
125 |         xmin: float = df[self._on].min()
126 |         xmax: float = df[self._on].max()
127 |         return self._extent or (xmin, xmax)
128 | 
129 |     @abc.abstractmethod
130 |     def _fit(self, x: np.ndarray, y: np.ndarray) -> None:
131 |         ...
132 | 
133 |     @abc.abstractmethod
134 |     def _params(self) -> np.ndarray:
135 |         ...
136 | 
137 |     @abc.abstractmethod
138 |     def _predict(self, x: np.ndarray) -> np.ndarray:
139 |         ...
140 | 
141 | 
142 | class ExpModel(Model):
143 |     """y = a * e ^ (b * x)"""
144 | 
145 |     _model: Optional[Polynomial]
146 | 
147 |     def _fit(self, x: np.ndarray, y: np.ndarray) -> None:
148 |         self._model = Polynomial.fit(x, np.log(y), 1, w=np.sqrt(abs(y)))
149 | 
150 |     def _predict(self, x: np.ndarray) -> np.ndarray:
151 |         assert self._model is not None
152 |         return np.exp(self._model(x))
153 | 
154 |     def _params(self) -> np.ndarray:
155 |         assert self._model is not None
156 |         log_a, b = _ensure_length(
157 |             self._model.convert(domain=self._model.window).coef, 2
158 |         )
159 |         return np.array([np.exp(log_a), b])
160 | 
161 | 
162 | class LinearModel(Model):
163 |     """y = a + b * x"""
164 | 
165 |     _model: Optional[Polynomial]
166 | 
167 |     def _grid(self, df: pd.DataFrame) -> Tuple[np.ndarray, np.ndarray]:
168 |         extent = self._extent_from_data(df)
169 |         x = np.array(extent)
170 |         return x, self._predict(np.array(extent))
171 | 
172 |     def _fit(self, x: np.ndarray, y: np.ndarray) -> None:
173 |         self._model = Polynomial.fit(x, y, 1)
174 | 
175 |     def _predict(self, x: np.ndarray) -> np.ndarray:
176 |         assert self._model is not None
177 |         return self._model(x)
178 | 
179 |     def _params(self):
180 |         assert self._model is not None
181 |         return _ensure_length(self._model.convert(domain=self._model.window).coef, 2)
182 | 
183 | 
184 | class LogModel(Model):
185 |     """y = a + b * log(x)"""
186 | 
187 |     _model: Optional[Polynomial]
188 | 
189 |     def _fit(self, x: np.ndarray, y: np.ndarray) -> None:
190 |         self._model = Polynomial.fit(np.log(x), y, 1)
191 | 
192 |     def _predict(self, x: np.ndarray) -> np.ndarray:
193 |         assert self._model is not None
194 |         return self._model(np.log(x))
195 | 
196 |     def _params(self) -> np.ndarray:
197 |         assert self._model is not None
198 |         return _ensure_length(self._model.convert(domain=self._model.window).coef, 2)
199 | 
200 | 
201 | class PolyModel(Model):
202 |     """y = a + b * x + ... + k * x^k"""
203 | 
204 |     _model: Optional[Polynomial]
205 | 
206 |     def _grid(self, df: pd.DataFrame) -> Tuple[np.ndarray, np.ndarray]:
207 |         if self._order == 1:
208 |             extent = self._extent_from_data(df)
209 |             x = np.array(extent)
210 |             return x, self._predict(np.array(extent))
211 |         else:
212 |             return super()._grid(df)
213 | 
214 |     def _fit(self, x: np.ndarray, y: np.ndarray) -> None:
215 |         self._model = Polynomial.fit(x, y, self._order)
216 | 
217 |     def _predict(self, x: np.ndarray) -> np.ndarray:
218 |         assert self._model is not None
219 |         return self._model(x)
220 | 
221 |     def _params(self):
222 |         assert self._model is not None
223 |         return _ensure_length(
224 |             self._model.convert(domain=self._model.window).coef, self._order + 1
225 |         )
226 | 
227 | 
228 | class PowModel(Model):
229 |     """y = a * x ^ b"""
230 | 
231 |     _model: Optional[Polynomial]
232 | 
233 |     def _fit(self, x: np.ndarray, y: np.ndarray) -> None:
234 |         self._model = Polynomial.fit(np.log(x), np.log(y), 1)
235 | 
236 |     def _predict(self, x: np.ndarray) -> np.ndarray:
237 |         assert self._model is not None
238 |         return np.exp(self._model(np.log(x)))
239 | 
240 |     def _params(self) -> np.ndarray:
241 |         assert self._model is not None
242 |         log_a, b = _ensure_length(
243 |             self._model.convert(domain=self._model.window).coef, 2
244 |         )
245 |         return np.array([np.exp(log_a), b])
246 | 
247 | 
248 | class QuadModel(Model):
249 |     """y = a + b * x + c * x^2"""
250 | 
251 |     _model: Optional[Polynomial]
252 | 
253 |     def _fit(self, x: np.ndarray, y: np.ndarray) -> None:
254 |         self._model = Polynomial.fit(x, y, 2)
255 | 
256 |     def _predict(self, x: np.ndarray) -> np.ndarray:
257 |         assert self._model is not None
258 |         return self._model(x)
259 | 
260 |     def _params(self):
261 |         assert self._model is not None
262 |         return _ensure_length(self._model.convert(domain=self._model.window).coef, 3)
263 | 


--------------------------------------------------------------------------------
/altair_transform/utils/_parser.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Simplified Javascript expression parser.
  3 | """
  4 | # pylint: disable=W,C,R
  5 | import os
  6 | 
  7 | from typing import Tuple
  8 | 
  9 | import ply.lex as lex
 10 | import ply.yacc as yacc
 11 | 
 12 | from altair_transform.utils import ast
 13 | 
 14 | 
 15 | # TODO: regexp literals?
 16 | 
 17 | 
 18 | class ParserBase:
 19 |     """
 20 |     Base class for a lexer/parser that has the rules defined as methods
 21 |     """
 22 | 
 23 |     tokens: Tuple = ()
 24 |     precedence: Tuple = ()
 25 | 
 26 |     def __init__(self, **kw):
 27 |         self.debug = kw.get("debug", 0)
 28 |         try:
 29 |             modname = (
 30 |                 os.path.split(os.path.splitext(__file__)[0])[1]
 31 |                 + "_"
 32 |                 + self.__class__.__name__
 33 |             )
 34 |         except ValueError:
 35 |             modname = "parser" + "_" + self.__class__.__name__
 36 |         self.debugfile = modname + ".dbg"
 37 |         self.tabmodule = modname + "_" + "parsetab"
 38 | 
 39 |         # Build the lexer and parser
 40 |         lex.lex(module=self, debug=self.debug)
 41 |         yacc.yacc(
 42 |             module=self,
 43 |             debug=self.debug,
 44 |             debugfile=self.debugfile,
 45 |             tabmodule=self.tabmodule,
 46 |         )
 47 | 
 48 |     def parse(self, expression):
 49 |         return yacc.parse(expression)
 50 | 
 51 | 
 52 | class Parser(ParserBase):
 53 | 
 54 |     tokens = (
 55 |         "NAME",
 56 |         "STRING",
 57 |         "FLOAT",
 58 |         "BINARY",
 59 |         "OCTAL",
 60 |         "HEX",
 61 |         "REGEX",
 62 |         "PLUS",
 63 |         "MINUS",
 64 |         "EXP",
 65 |         "TIMES",
 66 |         "DIVIDE",
 67 |         "MODULO",
 68 |         "PERIOD",
 69 |         "COMMA",
 70 |         "COLON",
 71 |         "QUESTION",
 72 |         "LPAREN",
 73 |         "RPAREN",
 74 |         "LBRACKET",
 75 |         "RBRACKET",
 76 |         "LBRACE",
 77 |         "RBRACE",
 78 |         "LOGICAL_OR",
 79 |         "LOGICAL_AND",
 80 |         "LOGICAL_NOT",
 81 |         "BITWISE_NOT",
 82 |         "BITWISE_OR",
 83 |         "BITWISE_AND",
 84 |         "BITWISE_XOR",
 85 |         "LSHIFT",
 86 |         "RSHIFT",
 87 |         "ZFRSHIFT",
 88 |         "GREATER_EQUAL",
 89 |         "GREATER",
 90 |         "LESS_EQUAL",
 91 |         "LESS",
 92 |         "IDENT",
 93 |         "NIDENT",
 94 |         "EQUAL",
 95 |         "NEQUAL",
 96 |     )
 97 | 
 98 |     # Tokens
 99 | 
100 |     t_PLUS = r"\+"
101 |     t_MINUS = r"-"
102 |     t_EXP = r"\*\*"
103 |     t_TIMES = r"\*"
104 |     t_DIVIDE = r"/"
105 |     t_MODULO = r"%"
106 |     t_LPAREN = r"\("
107 |     t_RPAREN = r"\)"
108 |     t_LBRACKET = r"\["
109 |     t_RBRACKET = r"\]"
110 |     t_LBRACE = r"\{"
111 |     t_RBRACE = r"\}"
112 |     t_PERIOD = r"\."
113 |     t_COMMA = r","
114 |     t_COLON = r"\:"
115 |     t_QUESTION = r"\?"
116 |     t_LOGICAL_OR = r"\|\|"
117 |     t_BITWISE_OR = r"\|"
118 |     t_LOGICAL_AND = r"&&"
119 |     t_BITWISE_AND = r"&"
120 |     t_BITWISE_XOR = r"\^"
121 |     t_BITWISE_NOT = r"~"
122 |     t_LSHIFT = r"<<"
123 |     t_ZFRSHIFT = r">>>"
124 |     t_RSHIFT = r">>"
125 |     t_GREATER_EQUAL = r">="
126 |     t_GREATER = r">"
127 |     t_LESS_EQUAL = r"<="
128 |     t_LESS = r"<"
129 |     t_IDENT = r"==="
130 |     t_EQUAL = r"=="
131 |     t_NIDENT = r"!=="
132 |     t_NEQUAL = r"!="
133 |     t_LOGICAL_NOT = r"!"
134 |     t_NAME = r"[a-zA-Z_][a-zA-Z0-9_]*"
135 | 
136 |     def t_BINARY(self, t):
137 |         r"0[bB][01]+"
138 |         t.value = int(t.value, 2)
139 |         return t
140 | 
141 |     def t_OCTAL(self, t):
142 |         r"0[oO]?[0-7]+"
143 |         t.value = int(t.value, 8)
144 |         return t
145 | 
146 |     def t_HEX(self, t):
147 |         r"0[xX][0-9A-Fa-f]+"
148 |         t.value = int(t.value, 16)
149 |         return t
150 | 
151 |     def t_FLOAT(self, t):
152 |         r"([1-9]\d*(\.\d*)?|0?\.\d+|0)([eE]\d+)?"
153 |         t.value = float(t.value)
154 |         return t
155 | 
156 |     def t_STRING(self, t):
157 |         r"""(?P<openquote>["'])((\\{2})*|(.*?[^\\](\\{2})*))(?P=openquote)"""
158 |         t.value = bytes(t.value[1:-1], "utf-8").decode("unicode_escape")
159 |         return t
160 | 
161 |     # TODO: actually parse & validate regexps?
162 |     def t_REGEX(self, t):
163 |         r"\/(?P<REGEX_pattern>(?![*+?])(?:[^\r\n\[/\\]|\\.|\[(?:[^\r\n\]\\]|\\.)*\])+)\/(?P<REGEX_flags>[gmisuy]{0,6})"
164 |         groups = t.lexer.lexmatch.groupdict()
165 |         t.value = {"pattern": groups["REGEX_pattern"], "flags": groups["REGEX_flags"]}
166 |         return t
167 | 
168 |     t_ignore = " \t"
169 | 
170 |     def t_newline(self, t):
171 |         r"\n+"
172 |         t.lexer.lineno += t.value.count("\n")
173 | 
174 |     def t_error(self, t):
175 |         raise ValueError("Illegal character '%s'" % t.value[0])
176 | 
177 |     # Parsing rules
178 | 
179 |     precedence = (
180 |         ("right", "QUESTION"),
181 |         ("left", "LOGICAL_OR"),
182 |         ("left", "LOGICAL_AND"),
183 |         ("left", "BITWISE_OR"),
184 |         ("left", "BITWISE_XOR"),
185 |         ("left", "BITWISE_AND"),
186 |         ("left", "EQUAL", "NEQUAL", "IDENT", "NIDENT"),
187 |         ("left", "LESS", "LESS_EQUAL", "GREATER", "GREATER_EQUAL"),
188 |         ("left", "LSHIFT", "RSHIFT", "ZFRSHIFT"),
189 |         ("left", "PLUS", "MINUS"),
190 |         ("left", "TIMES", "DIVIDE", "MODULO"),
191 |         ("left", "EXP"),
192 |         ("right", "UMINUS", "UPLUS", "LOGICAL_NOT", "BITWISE_NOT"),
193 |     )
194 | 
195 |     def p_expression_binop(self, p):
196 |         """
197 |         expression : expression PLUS expression
198 |                    | expression MINUS expression
199 |                    | expression TIMES expression
200 |                    | expression DIVIDE expression
201 |                    | expression EXP expression
202 |                    | expression MODULO expression
203 |                    | expression LESS expression
204 |                    | expression LESS_EQUAL expression
205 |                    | expression GREATER expression
206 |                    | expression GREATER_EQUAL expression
207 |                    | expression LSHIFT expression
208 |                    | expression RSHIFT expression
209 |                    | expression ZFRSHIFT expression
210 |                    | expression EQUAL expression
211 |                    | expression IDENT expression
212 |                    | expression NEQUAL expression
213 |                    | expression NIDENT expression
214 |                    | expression BITWISE_AND expression
215 |                    | expression BITWISE_OR expression
216 |                    | expression BITWISE_XOR expression
217 |                    | expression LOGICAL_OR expression
218 |                    | expression LOGICAL_AND expression
219 |         """
220 |         p[0] = ast.BinOp(lhs=p[1], op=p[2], rhs=p[3])
221 | 
222 |     def p_expression_ternary(self, p):
223 |         "expression : expression QUESTION expression COLON expression"
224 |         p[0] = ast.TernOp(op=(p[2], p[4]), lhs=p[1], mid=p[3], rhs=p[5])
225 | 
226 |     def p_expression_unaryop(self, p):
227 |         """
228 |         expression : MINUS expression %prec UMINUS
229 |                    | PLUS expression %prec UPLUS
230 |                    | BITWISE_NOT expression
231 |                    | LOGICAL_NOT expression
232 |         """
233 |         p[0] = ast.UnOp(op=p[1], rhs=p[2])
234 | 
235 |     def p_expression_atom(self, p):
236 |         """
237 |         expression : atom
238 |         """
239 |         p[0] = p[1]
240 | 
241 |     def p_atom(self, p):
242 |         """
243 |         atom : number
244 |              | string
245 |              | regex
246 |              | global
247 |              | list
248 |              | object
249 |              | group
250 |              | attraccess
251 |              | functioncall
252 |              | indexing
253 |         """
254 |         p[0] = p[1]
255 | 
256 |     def p_number(self, p):
257 |         """
258 |         number : HEX
259 |                | OCTAL
260 |                | BINARY
261 |                | FLOAT
262 |         """
263 |         p[0] = ast.Number(p[1])
264 | 
265 |     def p_string(self, p):
266 |         "string : STRING"
267 |         p[0] = ast.String(p[1])
268 | 
269 |     def p_regex(self, p):
270 |         "regex : REGEX"
271 |         p[0] = ast.Regex(p[1])
272 | 
273 |     def p_global(self, p):
274 |         "global : NAME"
275 |         p[0] = ast.Global(p[1])
276 | 
277 |     def p_name(self, p):
278 |         "name : NAME"
279 |         p[0] = ast.Name(p[1])
280 | 
281 |     def p_list(self, p):
282 |         """
283 |         list : LBRACKET RBRACKET
284 |              | LBRACKET arglist RBRACKET
285 |         """
286 |         if len(p) == 3:
287 |             p[0] = ast.List([])
288 |         elif len(p) == 4:
289 |             p[0] = ast.List(p[2])
290 | 
291 |     def p_object(self, p):
292 |         """
293 |         object : LBRACE RBRACE
294 |                | LBRACE objectarglist RBRACE
295 |         """
296 |         if len(p) == 3:
297 |             p[0] = ast.Object([])
298 |         elif len(p) == 4:
299 |             p[0] = ast.Object(p[2])
300 | 
301 |     def p_objectarglist(self, p):
302 |         """
303 |         objectarglist : objectarglist COMMA objectarg
304 |                       | objectarg
305 |         """
306 |         if len(p) == 4:
307 |             p[0] = p[1] + [p[3]]
308 |         else:
309 |             p[0] = [p[1]]
310 | 
311 |     def p_objectarg(self, p):
312 |         """
313 |         objectarg : objectkey COLON expression
314 |                   | name
315 |         """
316 |         if len(p) == 4:
317 |             p[0] = (p[1], p[3])
318 |         elif len(p) == 2:
319 |             p[0] = p[1]
320 | 
321 |     def p_objectkey(self, p):
322 |         """
323 |         objectkey : name
324 |                   | string
325 |                   | number
326 |         """
327 |         p[0] = p[1]
328 | 
329 |     def p_group(self, p):
330 |         "group : LPAREN expression RPAREN"
331 |         p[0] = p[2]
332 | 
333 |     def p_attraccess(self, p):
334 |         "attraccess : atom PERIOD NAME"
335 |         p[0] = ast.Attr(obj=p[1], attr=p[3])
336 | 
337 |     def p_indexing(self, p):
338 |         "indexing : atom LBRACKET expression RBRACKET"
339 |         p[0] = ast.Item(obj=p[1], item=p[3])
340 | 
341 |     def p_functioncall(self, p):
342 |         """
343 |         functioncall : atom LPAREN RPAREN
344 |                      | atom LPAREN arglist RPAREN
345 |         """
346 |         if len(p) == 4:
347 |             p[0] = ast.Func(func=p[1], args=[])
348 |         elif len(p) == 5:
349 |             p[0] = ast.Func(func=p[1], args=p[3])
350 | 
351 |     def p_arglist(self, p):
352 |         """
353 |         arglist : arglist COMMA expression
354 |                 | expression
355 |         """
356 |         if len(p) == 4:
357 |             p[0] = p[1] + [p[3]]
358 |         else:
359 |             p[0] = [p[1]]
360 | 
361 |     def p_error(self, p):
362 |         if p:
363 |             raise ValueError(f"Syntax error at '{p.value}'")
364 |         else:
365 |             raise ValueError("Syntax error at EOF")
366 | 
367 | 
368 | parser = Parser()
369 | 


--------------------------------------------------------------------------------
/altair_transform/utils/_parser_Parser_parsetab.py:
--------------------------------------------------------------------------------
 1 | 
 2 | # _parser_Parser_parsetab.py
 3 | # This file is automatically generated. Do not edit.
 4 | # pylint: disable=W,C,R
 5 | _tabversion = '3.10'
 6 | 
 7 | _lr_method = 'LALR'
 8 | 
 9 | _lr_signature = 'rightQUESTIONleftLOGICAL_ORleftLOGICAL_ANDleftBITWISE_ORleftBITWISE_XORleftBITWISE_ANDleftEQUALNEQUALIDENTNIDENTleftLESSLESS_EQUALGREATERGREATER_EQUALleftLSHIFTRSHIFTZFRSHIFTleftPLUSMINUSleftTIMESDIVIDEMODULOleftEXPrightUMINUSUPLUSLOGICAL_NOTBITWISE_NOTBINARY BITWISE_AND BITWISE_NOT BITWISE_OR BITWISE_XOR COLON COMMA DIVIDE EQUAL EXP FLOAT GREATER GREATER_EQUAL HEX IDENT LBRACE LBRACKET LESS LESS_EQUAL LOGICAL_AND LOGICAL_NOT LOGICAL_OR LPAREN LSHIFT MINUS MODULO NAME NEQUAL NIDENT OCTAL PERIOD PLUS QUESTION RBRACE RBRACKET REGEX RPAREN RSHIFT STRING TIMES ZFRSHIFT\n        expression : expression PLUS expression\n                   | expression MINUS expression\n                   | expression TIMES expression\n                   | expression DIVIDE expression\n                   | expression EXP expression\n                   | expression MODULO expression\n                   | expression LESS expression\n                   | expression LESS_EQUAL expression\n                   | expression GREATER expression\n                   | expression GREATER_EQUAL expression\n                   | expression LSHIFT expression\n                   | expression RSHIFT expression\n                   | expression ZFRSHIFT expression\n                   | expression EQUAL expression\n                   | expression IDENT expression\n                   | expression NEQUAL expression\n                   | expression NIDENT expression\n                   | expression BITWISE_AND expression\n                   | expression BITWISE_OR expression\n                   | expression BITWISE_XOR expression\n                   | expression LOGICAL_OR expression\n                   | expression LOGICAL_AND expression\n        expression : expression QUESTION expression COLON expression\n        expression : MINUS expression %prec UMINUS\n                   | PLUS expression %prec UPLUS\n                   | BITWISE_NOT expression\n                   | LOGICAL_NOT expression\n        \n        expression : atom\n        \n        atom : number\n             | string\n             | regex\n             | global\n             | list\n             | object\n             | group\n             | attraccess\n             | functioncall\n             | indexing\n        \n        number : HEX\n               | OCTAL\n               | BINARY\n               | FLOAT\n        string : STRINGregex : REGEXglobal : NAMEname : NAME\n        list : LBRACKET RBRACKET\n             | LBRACKET arglist RBRACKET\n        \n        object : LBRACE RBRACE\n               | LBRACE objectarglist RBRACE\n        \n        objectarglist : objectarglist COMMA objectarg\n                      | objectarg\n        \n        objectarg : objectkey COLON expression\n                  | name\n        \n        objectkey : name\n                  | string\n                  | number\n        group : LPAREN expression RPARENattraccess : atom PERIOD NAMEindexing : atom LBRACKET expression RBRACKET\n        functioncall : atom LPAREN RPAREN\n                     | atom LPAREN arglist RPAREN\n        \n        arglist : arglist COMMA expression\n                | expression\n        '
10 |     
11 | _lr_action_items = {'MINUS':([0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,55,56,57,59,60,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,95,96,97,98,100,101,102,103,104,105,107,108,],[3,28,3,3,3,3,-28,-29,-30,-31,-32,-33,-34,-35,-36,-37,-38,-39,-40,-41,-42,-43,-44,-45,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,-25,-24,-26,-27,3,3,-47,28,-49,28,-1,-2,-3,-4,-5,-6,28,28,28,28,28,28,28,28,28,28,28,28,28,28,28,28,28,-59,-61,28,-48,3,-50,3,-58,3,-62,-60,28,28,28,]),'PLUS':([0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,55,56,57,59,60,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,95,96,97,98,100,101,102,103,104,105,107,108,],[2,27,2,2,2,2,-28,-29,-30,-31,-32,-33,-34,-35,-36,-37,-38,-39,-40,-41,-42,-43,-44,-45,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,-25,-24,-26,-27,2,2,-47,27,-49,27,-1,-2,-3,-4,-5,-6,27,27,27,27,27,27,27,27,27,27,27,27,27,27,27,27,27,-59,-61,27,-48,2,-50,2,-58,2,-62,-60,27,27,27,]),'BITWISE_NOT':([0,2,3,4,5,24,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,55,56,97,100,102,],[4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,]),'LOGICAL_NOT':([0,2,3,4,5,24,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,55,56,97,100,102,],[5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,]),'HEX':([0,2,3,4,5,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,55,56,97,99,100,102,],[17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,]),'OCTAL':([0,2,3,4,5,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,55,56,97,99,100,102,],[18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,]),'BINARY':([0,2,3,4,5,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,55,56,97,99,100,102,],[19,19,19,19,19,19,19,19,19,19,19,19,19,19,19,19,19,19,19,19,19,19,19,19,19,19,19,19,19,19,19,19,19,19,19,19,19,]),'FLOAT':([0,2,3,4,5,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,55,56,97,99,100,102,],[20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,]),'STRING':([0,2,3,4,5,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,55,56,97,99,100,102,],[21,21,21,21,21,21,21,21,21,21,21,21,21,21,21,21,21,21,21,21,21,21,21,21,21,21,21,21,21,21,21,21,21,21,21,21,21,]),'REGEX':([0,2,3,4,5,24,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,55,56,97,100,102,],[22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,]),'NAME':([0,2,3,4,5,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,54,55,56,97,99,100,102,],[23,23,23,23,23,23,67,23,23,23,23,23,23,23,23,23,23,23,23,23,23,23,23,23,23,23,23,23,23,23,23,92,23,23,23,67,23,23,]),'LBRACKET':([0,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,55,56,57,60,92,93,96,97,98,100,101,102,103,104,],[24,24,24,24,24,56,-29,-30,-31,-32,-33,-34,-35,-36,-37,-38,-39,-40,-41,-42,-43,-44,-45,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,-47,-49,-59,-61,-48,24,-50,24,-58,24,-62,-60,]),'LBRACE':([0,2,3,4,5,24,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,55,56,97,100,102,],[25,25,25,25,25,25,25,25,25,25,25,25,25,25,25,25,25,25,25,25,25,25,25,25,25,25,25,25,25,25,25,25,25,25,25,]),'LPAREN':([0,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,55,56,57,60,92,93,96,97,98,100,101,102,103,104,],[26,26,26,26,26,55,-29,-30,-31,-32,-33,-34,-35,-36,-37,-38,-39,-40,-41,-42,-43,-44,-45,26,26,26,26,26,26,26,26,26,26,26,26,26,26,26,26,26,26,26,26,26,26,26,26,26,26,26,-47,-49,-59,-61,-48,26,-50,26,-58,26,-62,-60,]),'$end':([1,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,50,51,52,53,57,60,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,92,93,96,98,101,103,104,108,],[0,-28,-29,-30,-31,-32,-33,-34,-35,-36,-37,-38,-39,-40,-41,-42,-43,-44,-45,-25,-24,-26,-27,-47,-49,-1,-2,-3,-4,-5,-6,-7,-8,-9,-10,-11,-12,-13,-14,-15,-16,-17,-18,-19,-20,-21,-22,-59,-61,-48,-50,-58,-62,-60,-23,]),'TIMES':([1,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,50,51,52,53,57,59,60,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,95,96,98,101,103,104,105,107,108,],[29,-28,-29,-30,-31,-32,-33,-34,-35,-36,-37,-38,-39,-40,-41,-42,-43,-44,-45,-25,-24,-26,-27,-47,29,-49,29,29,29,-3,-4,-5,-6,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,-59,-61,29,-48,-50,-58,-62,-60,29,29,29,]),'DIVIDE':([1,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,50,51,52,53,57,59,60,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,95,96,98,101,103,104,105,107,108,],[30,-28,-29,-30,-31,-32,-33,-34,-35,-36,-37,-38,-39,-40,-41,-42,-43,-44,-45,-25,-24,-26,-27,-47,30,-49,30,30,30,-3,-4,-5,-6,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,-59,-61,30,-48,-50,-58,-62,-60,30,30,30,]),'EXP':([1,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,50,51,52,53,57,59,60,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,95,96,98,101,103,104,105,107,108,],[31,-28,-29,-30,-31,-32,-33,-34,-35,-36,-37,-38,-39,-40,-41,-42,-43,-44,-45,-25,-24,-26,-27,-47,31,-49,31,31,31,31,31,-5,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,-59,-61,31,-48,-50,-58,-62,-60,31,31,31,]),'MODULO':([1,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,50,51,52,53,57,59,60,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,95,96,98,101,103,104,105,107,108,],[32,-28,-29,-30,-31,-32,-33,-34,-35,-36,-37,-38,-39,-40,-41,-42,-43,-44,-45,-25,-24,-26,-27,-47,32,-49,32,32,32,-3,-4,-5,-6,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,-59,-61,32,-48,-50,-58,-62,-60,32,32,32,]),'LESS':([1,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,50,51,52,53,57,59,60,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,95,96,98,101,103,104,105,107,108,],[33,-28,-29,-30,-31,-32,-33,-34,-35,-36,-37,-38,-39,-40,-41,-42,-43,-44,-45,-25,-24,-26,-27,-47,33,-49,33,-1,-2,-3,-4,-5,-6,-7,-8,-9,-10,-11,-12,-13,33,33,33,33,33,33,33,33,33,33,-59,-61,33,-48,-50,-58,-62,-60,33,33,33,]),'LESS_EQUAL':([1,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,50,51,52,53,57,59,60,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,95,96,98,101,103,104,105,107,108,],[34,-28,-29,-30,-31,-32,-33,-34,-35,-36,-37,-38,-39,-40,-41,-42,-43,-44,-45,-25,-24,-26,-27,-47,34,-49,34,-1,-2,-3,-4,-5,-6,-7,-8,-9,-10,-11,-12,-13,34,34,34,34,34,34,34,34,34,34,-59,-61,34,-48,-50,-58,-62,-60,34,34,34,]),'GREATER':([1,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,50,51,52,53,57,59,60,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,95,96,98,101,103,104,105,107,108,],[35,-28,-29,-30,-31,-32,-33,-34,-35,-36,-37,-38,-39,-40,-41,-42,-43,-44,-45,-25,-24,-26,-27,-47,35,-49,35,-1,-2,-3,-4,-5,-6,-7,-8,-9,-10,-11,-12,-13,35,35,35,35,35,35,35,35,35,35,-59,-61,35,-48,-50,-58,-62,-60,35,35,35,]),'GREATER_EQUAL':([1,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,50,51,52,53,57,59,60,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,95,96,98,101,103,104,105,107,108,],[36,-28,-29,-30,-31,-32,-33,-34,-35,-36,-37,-38,-39,-40,-41,-42,-43,-44,-45,-25,-24,-26,-27,-47,36,-49,36,-1,-2,-3,-4,-5,-6,-7,-8,-9,-10,-11,-12,-13,36,36,36,36,36,36,36,36,36,36,-59,-61,36,-48,-50,-58,-62,-60,36,36,36,]),'LSHIFT':([1,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,50,51,52,53,57,59,60,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,95,96,98,101,103,104,105,107,108,],[37,-28,-29,-30,-31,-32,-33,-34,-35,-36,-37,-38,-39,-40,-41,-42,-43,-44,-45,-25,-24,-26,-27,-47,37,-49,37,-1,-2,-3,-4,-5,-6,37,37,37,37,-11,-12,-13,37,37,37,37,37,37,37,37,37,37,-59,-61,37,-48,-50,-58,-62,-60,37,37,37,]),'RSHIFT':([1,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,50,51,52,53,57,59,60,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,95,96,98,101,103,104,105,107,108,],[38,-28,-29,-30,-31,-32,-33,-34,-35,-36,-37,-38,-39,-40,-41,-42,-43,-44,-45,-25,-24,-26,-27,-47,38,-49,38,-1,-2,-3,-4,-5,-6,38,38,38,38,-11,-12,-13,38,38,38,38,38,38,38,38,38,38,-59,-61,38,-48,-50,-58,-62,-60,38,38,38,]),'ZFRSHIFT':([1,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,50,51,52,53,57,59,60,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,95,96,98,101,103,104,105,107,108,],[39,-28,-29,-30,-31,-32,-33,-34,-35,-36,-37,-38,-39,-40,-41,-42,-43,-44,-45,-25,-24,-26,-27,-47,39,-49,39,-1,-2,-3,-4,-5,-6,39,39,39,39,-11,-12,-13,39,39,39,39,39,39,39,39,39,39,-59,-61,39,-48,-50,-58,-62,-60,39,39,39,]),'EQUAL':([1,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,50,51,52,53,57,59,60,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,95,96,98,101,103,104,105,107,108,],[40,-28,-29,-30,-31,-32,-33,-34,-35,-36,-37,-38,-39,-40,-41,-42,-43,-44,-45,-25,-24,-26,-27,-47,40,-49,40,-1,-2,-3,-4,-5,-6,-7,-8,-9,-10,-11,-12,-13,-14,-15,-16,-17,40,40,40,40,40,40,-59,-61,40,-48,-50,-58,-62,-60,40,40,40,]),'IDENT':([1,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,50,51,52,53,57,59,60,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,95,96,98,101,103,104,105,107,108,],[41,-28,-29,-30,-31,-32,-33,-34,-35,-36,-37,-38,-39,-40,-41,-42,-43,-44,-45,-25,-24,-26,-27,-47,41,-49,41,-1,-2,-3,-4,-5,-6,-7,-8,-9,-10,-11,-12,-13,-14,-15,-16,-17,41,41,41,41,41,41,-59,-61,41,-48,-50,-58,-62,-60,41,41,41,]),'NEQUAL':([1,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,50,51,52,53,57,59,60,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,95,96,98,101,103,104,105,107,108,],[42,-28,-29,-30,-31,-32,-33,-34,-35,-36,-37,-38,-39,-40,-41,-42,-43,-44,-45,-25,-24,-26,-27,-47,42,-49,42,-1,-2,-3,-4,-5,-6,-7,-8,-9,-10,-11,-12,-13,-14,-15,-16,-17,42,42,42,42,42,42,-59,-61,42,-48,-50,-58,-62,-60,42,42,42,]),'NIDENT':([1,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,50,51,52,53,57,59,60,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,95,96,98,101,103,104,105,107,108,],[43,-28,-29,-30,-31,-32,-33,-34,-35,-36,-37,-38,-39,-40,-41,-42,-43,-44,-45,-25,-24,-26,-27,-47,43,-49,43,-1,-2,-3,-4,-5,-6,-7,-8,-9,-10,-11,-12,-13,-14,-15,-16,-17,43,43,43,43,43,43,-59,-61,43,-48,-50,-58,-62,-60,43,43,43,]),'BITWISE_AND':([1,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,50,51,52,53,57,59,60,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,95,96,98,101,103,104,105,107,108,],[44,-28,-29,-30,-31,-32,-33,-34,-35,-36,-37,-38,-39,-40,-41,-42,-43,-44,-45,-25,-24,-26,-27,-47,44,-49,44,-1,-2,-3,-4,-5,-6,-7,-8,-9,-10,-11,-12,-13,-14,-15,-16,-17,-18,44,44,44,44,44,-59,-61,44,-48,-50,-58,-62,-60,44,44,44,]),'BITWISE_OR':([1,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,50,51,52,53,57,59,60,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,95,96,98,101,103,104,105,107,108,],[45,-28,-29,-30,-31,-32,-33,-34,-35,-36,-37,-38,-39,-40,-41,-42,-43,-44,-45,-25,-24,-26,-27,-47,45,-49,45,-1,-2,-3,-4,-5,-6,-7,-8,-9,-10,-11,-12,-13,-14,-15,-16,-17,-18,-19,-20,45,45,45,-59,-61,45,-48,-50,-58,-62,-60,45,45,45,]),'BITWISE_XOR':([1,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,50,51,52,53,57,59,60,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,95,96,98,101,103,104,105,107,108,],[46,-28,-29,-30,-31,-32,-33,-34,-35,-36,-37,-38,-39,-40,-41,-42,-43,-44,-45,-25,-24,-26,-27,-47,46,-49,46,-1,-2,-3,-4,-5,-6,-7,-8,-9,-10,-11,-12,-13,-14,-15,-16,-17,-18,46,-20,46,46,46,-59,-61,46,-48,-50,-58,-62,-60,46,46,46,]),'LOGICAL_OR':([1,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,50,51,52,53,57,59,60,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,95,96,98,101,103,104,105,107,108,],[47,-28,-29,-30,-31,-32,-33,-34,-35,-36,-37,-38,-39,-40,-41,-42,-43,-44,-45,-25,-24,-26,-27,-47,47,-49,47,-1,-2,-3,-4,-5,-6,-7,-8,-9,-10,-11,-12,-13,-14,-15,-16,-17,-18,-19,-20,-21,-22,47,-59,-61,47,-48,-50,-58,-62,-60,47,47,47,]),'LOGICAL_AND':([1,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,50,51,52,53,57,59,60,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,95,96,98,101,103,104,105,107,108,],[48,-28,-29,-30,-31,-32,-33,-34,-35,-36,-37,-38,-39,-40,-41,-42,-43,-44,-45,-25,-24,-26,-27,-47,48,-49,48,-1,-2,-3,-4,-5,-6,-7,-8,-9,-10,-11,-12,-13,-14,-15,-16,-17,-18,-19,-20,48,-22,48,-59,-61,48,-48,-50,-58,-62,-60,48,48,48,]),'QUESTION':([1,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,50,51,52,53,57,59,60,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,95,96,98,101,103,104,105,107,108,],[49,-28,-29,-30,-31,-32,-33,-34,-35,-36,-37,-38,-39,-40,-41,-42,-43,-44,-45,-25,-24,-26,-27,-47,49,-49,49,-1,-2,-3,-4,-5,-6,-7,-8,-9,-10,-11,-12,-13,-14,-15,-16,-17,-18,-19,-20,-21,-22,49,-59,-61,49,-48,-50,-58,-62,-60,49,49,49,]),'RBRACKET':([6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,50,51,52,53,57,58,59,60,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,92,93,95,96,98,101,103,104,105,108,],[-28,-29,-30,-31,-32,-33,-34,-35,-36,-37,-38,-39,-40,-41,-42,-43,-44,-45,57,-25,-24,-26,-27,-47,96,-64,-49,-1,-2,-3,-4,-5,-6,-7,-8,-9,-10,-11,-12,-13,-14,-15,-16,-17,-18,-19,-20,-21,-22,-59,-61,104,-48,-50,-58,-62,-60,-63,-23,]),'COMMA':([6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,50,51,52,53,57,58,59,60,61,62,64,67,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,92,93,94,96,98,101,103,104,105,106,107,108,],[-28,-29,-30,-31,-32,-33,-34,-35,-36,-37,-38,-39,-40,-41,-42,-43,-44,-45,-25,-24,-26,-27,-47,97,-64,-49,99,-52,-54,-46,-1,-2,-3,-4,-5,-6,-7,-8,-9,-10,-11,-12,-13,-14,-15,-16,-17,-18,-19,-20,-21,-22,-59,-61,97,-48,-50,-58,-62,-60,-63,-51,-53,-23,]),'RPAREN':([6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,50,51,52,53,55,57,59,60,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,92,93,94,96,98,101,103,104,105,108,],[-28,-29,-30,-31,-32,-33,-34,-35,-36,-37,-38,-39,-40,-41,-42,-43,-44,-45,-25,-24,-26,-27,93,-47,-64,-49,101,-1,-2,-3,-4,-5,-6,-7,-8,-9,-10,-11,-12,-13,-14,-15,-16,-17,-18,-19,-20,-21,-22,-59,-61,103,-48,-50,-58,-62,-60,-63,-23,]),'COLON':([6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,50,51,52,53,57,60,63,64,65,66,67,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,96,98,101,103,104,108,],[-28,-29,-30,-31,-32,-33,-34,-35,-36,-37,-38,-39,-40,-41,-42,-43,-44,-45,-25,-24,-26,-27,-47,-49,100,-55,-56,-57,-46,-1,-2,-3,-4,-5,-6,-7,-8,-9,-10,-11,-12,-13,-14,-15,-16,-17,-18,-19,-20,-21,-22,102,-59,-61,-48,-50,-58,-62,-60,-23,]),'RBRACE':([6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,25,50,51,52,53,57,60,61,62,64,67,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,92,93,96,98,101,103,104,106,107,108,],[-28,-29,-30,-31,-32,-33,-34,-35,-36,-37,-38,-39,-40,-41,-42,-43,-44,-45,60,-25,-24,-26,-27,-47,-49,98,-52,-54,-46,-1,-2,-3,-4,-5,-6,-7,-8,-9,-10,-11,-12,-13,-14,-15,-16,-17,-18,-19,-20,-21,-22,-59,-61,-48,-50,-58,-62,-60,-51,-53,-23,]),'PERIOD':([6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,57,60,92,93,96,98,101,103,104,],[54,-29,-30,-31,-32,-33,-34,-35,-36,-37,-38,-39,-40,-41,-42,-43,-44,-45,-47,-49,-59,-61,-48,-50,-58,-62,-60,]),}
12 | 
13 | _lr_action = {}
14 | for _k, _v in _lr_action_items.items():
15 |    for _x,_y in zip(_v[0],_v[1]):
16 |       if not _x in _lr_action:  _lr_action[_x] = {}
17 |       _lr_action[_x][_k] = _y
18 | del _lr_action_items
19 | 
20 | _lr_goto_items = {'expression':([0,2,3,4,5,24,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,55,56,97,100,102,],[1,50,51,52,53,59,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,59,95,105,107,108,]),'atom':([0,2,3,4,5,24,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,55,56,97,100,102,],[6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,]),'number':([0,2,3,4,5,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,55,56,97,99,100,102,],[7,7,7,7,7,7,66,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,66,7,7,]),'string':([0,2,3,4,5,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,55,56,97,99,100,102,],[8,8,8,8,8,8,65,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,65,8,8,]),'regex':([0,2,3,4,5,24,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,55,56,97,100,102,],[9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,]),'global':([0,2,3,4,5,24,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,55,56,97,100,102,],[10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,]),'list':([0,2,3,4,5,24,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,55,56,97,100,102,],[11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,]),'object':([0,2,3,4,5,24,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,55,56,97,100,102,],[12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,]),'group':([0,2,3,4,5,24,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,55,56,97,100,102,],[13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,]),'attraccess':([0,2,3,4,5,24,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,55,56,97,100,102,],[14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,]),'functioncall':([0,2,3,4,5,24,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,55,56,97,100,102,],[15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,]),'indexing':([0,2,3,4,5,24,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,55,56,97,100,102,],[16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,]),'arglist':([24,55,],[58,94,]),'objectarglist':([25,],[61,]),'objectarg':([25,99,],[62,106,]),'objectkey':([25,99,],[63,63,]),'name':([25,99,],[64,64,]),}
21 | 
22 | _lr_goto = {}
23 | for _k, _v in _lr_goto_items.items():
24 |    for _x, _y in zip(_v[0], _v[1]):
25 |        if not _x in _lr_goto: _lr_goto[_x] = {}
26 |        _lr_goto[_x][_k] = _y
27 | del _lr_goto_items
28 | _lr_productions = [
29 |   ("S' -> expression","S'",1,None,None,None),
30 |   ('expression -> expression PLUS expression','expression',3,'p_expression_binop','_parser.py',197),
31 |   ('expression -> expression MINUS expression','expression',3,'p_expression_binop','_parser.py',198),
32 |   ('expression -> expression TIMES expression','expression',3,'p_expression_binop','_parser.py',199),
33 |   ('expression -> expression DIVIDE expression','expression',3,'p_expression_binop','_parser.py',200),
34 |   ('expression -> expression EXP expression','expression',3,'p_expression_binop','_parser.py',201),
35 |   ('expression -> expression MODULO expression','expression',3,'p_expression_binop','_parser.py',202),
36 |   ('expression -> expression LESS expression','expression',3,'p_expression_binop','_parser.py',203),
37 |   ('expression -> expression LESS_EQUAL expression','expression',3,'p_expression_binop','_parser.py',204),
38 |   ('expression -> expression GREATER expression','expression',3,'p_expression_binop','_parser.py',205),
39 |   ('expression -> expression GREATER_EQUAL expression','expression',3,'p_expression_binop','_parser.py',206),
40 |   ('expression -> expression LSHIFT expression','expression',3,'p_expression_binop','_parser.py',207),
41 |   ('expression -> expression RSHIFT expression','expression',3,'p_expression_binop','_parser.py',208),
42 |   ('expression -> expression ZFRSHIFT expression','expression',3,'p_expression_binop','_parser.py',209),
43 |   ('expression -> expression EQUAL expression','expression',3,'p_expression_binop','_parser.py',210),
44 |   ('expression -> expression IDENT expression','expression',3,'p_expression_binop','_parser.py',211),
45 |   ('expression -> expression NEQUAL expression','expression',3,'p_expression_binop','_parser.py',212),
46 |   ('expression -> expression NIDENT expression','expression',3,'p_expression_binop','_parser.py',213),
47 |   ('expression -> expression BITWISE_AND expression','expression',3,'p_expression_binop','_parser.py',214),
48 |   ('expression -> expression BITWISE_OR expression','expression',3,'p_expression_binop','_parser.py',215),
49 |   ('expression -> expression BITWISE_XOR expression','expression',3,'p_expression_binop','_parser.py',216),
50 |   ('expression -> expression LOGICAL_OR expression','expression',3,'p_expression_binop','_parser.py',217),
51 |   ('expression -> expression LOGICAL_AND expression','expression',3,'p_expression_binop','_parser.py',218),
52 |   ('expression -> expression QUESTION expression COLON expression','expression',5,'p_expression_ternary','_parser.py',223),
53 |   ('expression -> MINUS expression','expression',2,'p_expression_unaryop','_parser.py',228),
54 |   ('expression -> PLUS expression','expression',2,'p_expression_unaryop','_parser.py',229),
55 |   ('expression -> BITWISE_NOT expression','expression',2,'p_expression_unaryop','_parser.py',230),
56 |   ('expression -> LOGICAL_NOT expression','expression',2,'p_expression_unaryop','_parser.py',231),
57 |   ('expression -> atom','expression',1,'p_expression_atom','_parser.py',237),
58 |   ('atom -> number','atom',1,'p_atom','_parser.py',243),
59 |   ('atom -> string','atom',1,'p_atom','_parser.py',244),
60 |   ('atom -> regex','atom',1,'p_atom','_parser.py',245),
61 |   ('atom -> global','atom',1,'p_atom','_parser.py',246),
62 |   ('atom -> list','atom',1,'p_atom','_parser.py',247),
63 |   ('atom -> object','atom',1,'p_atom','_parser.py',248),
64 |   ('atom -> group','atom',1,'p_atom','_parser.py',249),
65 |   ('atom -> attraccess','atom',1,'p_atom','_parser.py',250),
66 |   ('atom -> functioncall','atom',1,'p_atom','_parser.py',251),
67 |   ('atom -> indexing','atom',1,'p_atom','_parser.py',252),
68 |   ('number -> HEX','number',1,'p_number','_parser.py',258),
69 |   ('number -> OCTAL','number',1,'p_number','_parser.py',259),
70 |   ('number -> BINARY','number',1,'p_number','_parser.py',260),
71 |   ('number -> FLOAT','number',1,'p_number','_parser.py',261),
72 |   ('string -> STRING','string',1,'p_string','_parser.py',266),
73 |   ('regex -> REGEX','regex',1,'p_regex','_parser.py',270),
74 |   ('global -> NAME','global',1,'p_global','_parser.py',274),
75 |   ('name -> NAME','name',1,'p_name','_parser.py',278),
76 |   ('list -> LBRACKET RBRACKET','list',2,'p_list','_parser.py',283),
77 |   ('list -> LBRACKET arglist RBRACKET','list',3,'p_list','_parser.py',284),
78 |   ('object -> LBRACE RBRACE','object',2,'p_object','_parser.py',293),
79 |   ('object -> LBRACE objectarglist RBRACE','object',3,'p_object','_parser.py',294),
80 |   ('objectarglist -> objectarglist COMMA objectarg','objectarglist',3,'p_objectarglist','_parser.py',303),
81 |   ('objectarglist -> objectarg','objectarglist',1,'p_objectarglist','_parser.py',304),
82 |   ('objectarg -> objectkey COLON expression','objectarg',3,'p_objectarg','_parser.py',313),
83 |   ('objectarg -> name','objectarg',1,'p_objectarg','_parser.py',314),
84 |   ('objectkey -> name','objectkey',1,'p_objectkey','_parser.py',323),
85 |   ('objectkey -> string','objectkey',1,'p_objectkey','_parser.py',324),
86 |   ('objectkey -> number','objectkey',1,'p_objectkey','_parser.py',325),
87 |   ('group -> LPAREN expression RPAREN','group',3,'p_group','_parser.py',330),
88 |   ('attraccess -> atom PERIOD NAME','attraccess',3,'p_attraccess','_parser.py',334),
89 |   ('indexing -> atom LBRACKET expression RBRACKET','indexing',4,'p_indexing','_parser.py',338),
90 |   ('functioncall -> atom LPAREN RPAREN','functioncall',3,'p_functioncall','_parser.py',343),
91 |   ('functioncall -> atom LPAREN arglist RPAREN','functioncall',4,'p_functioncall','_parser.py',344),
92 |   ('arglist -> arglist COMMA expression','arglist',3,'p_arglist','_parser.py',353),
93 |   ('arglist -> expression','arglist',1,'p_arglist','_parser.py',354),
94 | ]
95 | 


--------------------------------------------------------------------------------