├── altair_transform ├── tests │ ├── __init__.py │ ├── test_examples.py │ ├── test_driver.py │ ├── test_core.py │ ├── test_vegaexpr.py │ └── test_extract.py ├── utils │ ├── tests │ │ ├── __init__.py │ │ ├── test_evaljs.py │ │ ├── test_parser.py │ │ ├── test_data.py │ │ ├── test_timeunit.py │ │ └── _testcases.py │ ├── __init__.py │ ├── ast.py │ ├── data.py │ ├── timeunit.py │ ├── _evaljs.py │ ├── _parser.py │ └── _parser_Parser_parsetab.py ├── transform │ ├── tests │ │ ├── __init__.py │ │ ├── test_impute.py │ │ ├── test_fold.py │ │ ├── test_filter.py │ │ ├── test_quantile.py │ │ ├── test_pivot.py │ │ ├── test_flatten.py │ │ ├── test_window.py │ │ ├── test_lookup.py │ │ ├── test_timeunit.py │ │ ├── test_aggregate.py │ │ ├── test_bin.py │ │ ├── test_regression.py │ │ └── test_transform.py │ ├── __init__.py │ ├── timeunit.py │ ├── sample.py │ ├── calculate.py │ ├── visitor.py │ ├── pivot.py │ ├── joinaggregate.py │ ├── fold.py │ ├── flatten.py │ ├── quantile.py │ ├── bin.py │ ├── lookup.py │ ├── impute.py │ ├── aggregate.py │ ├── window.py │ ├── filter.py │ ├── vega_utils.py │ └── regression.py ├── __init__.py ├── conftest.py ├── core.py ├── driver.py └── extract.py ├── requirements_dev.txt ├── requirements_driver.txt ├── images ├── histogram.png └── random_walk.png ├── requirements.txt ├── pytest.ini ├── MANIFEST.in ├── setup.cfg ├── pyproject.toml ├── .travis.yml ├── Makefile ├── mypy.ini ├── CHANGES.md ├── .github └── workflows │ ├── lint.yml │ └── build.yml ├── RELEASING.md ├── LICENSE ├── .gitignore ├── setup.py └── README.md /altair_transform/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /altair_transform/utils/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /altair_transform/transform/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /requirements_dev.txt: -------------------------------------------------------------------------------- 1 | black 2 | flake8 3 | mypy 4 | pytest 5 | -------------------------------------------------------------------------------- /requirements_driver.txt: -------------------------------------------------------------------------------- 1 | altair_saver 2 | altair_viewer 3 | selenium -------------------------------------------------------------------------------- /images/histogram.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/altair-viz/altair-transform/HEAD/images/histogram.png -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | ply 2 | altair>=4.0 3 | numpy 4 | pandas 5 | dataclasses; python_version < '3.7' 6 | -------------------------------------------------------------------------------- /images/random_walk.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/altair-viz/altair-transform/HEAD/images/random_walk.png -------------------------------------------------------------------------------- /pytest.ini: -------------------------------------------------------------------------------- 1 | [pytest] 2 | filterwarnings = 3 | error 4 | ignore::DeprecationWarning 5 | ignore::altair.utils.AltairDeprecationWarning -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include *.md 2 | include *.ini 3 | include Makefile 4 | include LICENSE 5 | include requirements.txt 6 | include requirements_dev.txt 7 | include requirements_driver.txt 8 | recursive-include altair_transform *.py *.md *.json 9 | -------------------------------------------------------------------------------- /altair_transform/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from ._parser import parser, Parser 2 | from ._evaljs import evaljs, undefined, JSRegex 3 | from .data import to_dataframe 4 | 5 | __all__ = ["parser", "Parser", "evaljs", "to_dataframe", "undefined", "JSRegex"] 6 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [flake8] 2 | exclude = altair_transform/utils/_parser_Parser_parsetab.py 3 | max-line-length = 88 4 | ignore = E203, E266, E501, W503 5 | max-complexity = 18 6 | select = B,C,E,F,W,T4,B9 7 | 8 | [metadata] 9 | description-file = README.md 10 | license_file = LICENSE 11 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.black] 2 | line-length = 88 3 | target-version = ['py36', 'py37', 'py38'] 4 | include = '\.pyi?$' 5 | exclude = ''' 6 | /( 7 | \.eggs 8 | | \.git 9 | | \.hg 10 | | \.mypy_cache 11 | | \.tox 12 | | \.venv 13 | | _build 14 | | build 15 | | dist 16 | )/ 17 | | altair_transform/utils/_parser_Parser_parsetab.py 18 | ''' -------------------------------------------------------------------------------- /altair_transform/transform/__init__.py: -------------------------------------------------------------------------------- 1 | from .visitor import visit # noqa: F401 2 | 3 | # These submodules register appropriate visitors. 4 | from . import ( # noqa: F401 5 | aggregate, 6 | bin, 7 | calculate, 8 | filter, 9 | flatten, 10 | fold, 11 | impute, 12 | joinaggregate, 13 | lookup, 14 | pivot, 15 | quantile, 16 | regression, 17 | sample, 18 | timeunit, 19 | window, 20 | ) 21 | -------------------------------------------------------------------------------- /altair_transform/__init__.py: -------------------------------------------------------------------------------- 1 | """Altair Transform 2 | 3 | This module provides a Python implementation of Vega-Lite transforms. 4 | The main function is the ``altair_transform.apply()`` function. 5 | """ 6 | __version__ = "0.3.0.dev0" 7 | __all__ = ["apply", "extract_data", "transform_chart", "extract_transform"] 8 | 9 | from altair_transform.core import ( 10 | apply, 11 | extract_data, 12 | transform_chart, 13 | extract_transform, 14 | ) 15 | -------------------------------------------------------------------------------- /altair_transform/transform/timeunit.py: -------------------------------------------------------------------------------- 1 | import altair as alt 2 | import pandas as pd 3 | from .visitor import visit 4 | from ..utils.timeunit import compute_timeunit 5 | 6 | 7 | @visit.register(alt.TimeUnitTransform) 8 | def visit_timeunit(transform: alt.TimeUnitTransform, df: pd.DataFrame) -> pd.DataFrame: 9 | transform = transform.to_dict() 10 | df[transform["as"]] = compute_timeunit( 11 | df[transform["field"]], transform["timeUnit"] 12 | ) 13 | return df 14 | -------------------------------------------------------------------------------- /altair_transform/transform/sample.py: -------------------------------------------------------------------------------- 1 | import altair as alt 2 | import numpy as np 3 | import pandas as pd 4 | from .visitor import visit 5 | 6 | 7 | @visit.register(alt.SampleTransform) 8 | def visit_sample(transform: alt.SampleTransform, df: pd.DataFrame) -> pd.DataFrame: 9 | transform = transform.to_dict() 10 | sample = transform["sample"] 11 | 12 | if sample < df.shape[0]: 13 | index = np.sort(np.random.permutation(df.shape[0])[:sample]) 14 | df = df.iloc[index] 15 | return df 16 | -------------------------------------------------------------------------------- /altair_transform/transform/calculate.py: -------------------------------------------------------------------------------- 1 | import altair as alt 2 | import pandas as pd 3 | from .visitor import visit 4 | from ..vegaexpr import eval_vegajs 5 | 6 | 7 | @visit.register(alt.CalculateTransform) 8 | def visit_calculate( 9 | transform: alt.CalculateTransform, df: pd.DataFrame 10 | ) -> pd.DataFrame: 11 | transform = transform.to_dict() 12 | col = transform["as"] 13 | calc = transform["calculate"] 14 | df[col] = df.apply(lambda datum: eval_vegajs(calc, datum), axis=1) 15 | return df 16 | -------------------------------------------------------------------------------- /altair_transform/conftest.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import altair_transform.driver 3 | 4 | 5 | @pytest.fixture(scope="session") 6 | def driver(): 7 | try: 8 | from altair_saver import SeleniumSaver 9 | except (ImportError, ModuleNotFoundError): 10 | pytest.skip("altair_saver not importable; cannot run driver tests.") 11 | if not SeleniumSaver.enabled(): 12 | pytest.skip("selenium not properly configured; cannot run driver tests.") 13 | yield altair_transform.driver 14 | SeleniumSaver._stop_serving() 15 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | 3 | matrix: 4 | include: 5 | - python: 3.6 6 | - python: 3.7 7 | - python: 3.8 8 | 9 | env: 10 | global: 11 | - TEST_DIR=/tmp/_altair_transform/ 12 | 13 | before_install: 14 | - pip install pip --upgrade; 15 | - pip install -r requirements_dev.txt 16 | - mkdir -p $TEST_DIR 17 | 18 | install: 19 | - pip install .; 20 | 21 | script: 22 | - black --check . 23 | - python -m flake8 altair_transform 24 | - python -m mypy altair_transform 25 | - cd $TEST_DIR && python -m pytest --pyargs --doctest-modules altair_transform; -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | test : 2 | black . 3 | python -m flake8 altair_transform 4 | python -m mypy altair_transform 5 | rm -r build 6 | python setup.py build &&\ 7 | cd build/lib &&\ 8 | python -m pytest --pyargs --doctest-modules altair_transform 9 | 10 | test-coverage: 11 | python setup.py build &&\ 12 | cd build/lib &&\ 13 | python -m pytest --pyargs --doctest-modules --cov=altair_transform --cov-report term altair_transform 14 | 15 | test-coverage-html: 16 | python setup.py build &&\ 17 | cd build/lib &&\ 18 | python -m pytest --pyargs --doctest-modules --cov=altair_transform --cov-report html altair_transform 19 | -------------------------------------------------------------------------------- /mypy.ini: -------------------------------------------------------------------------------- 1 | [mypy] 2 | python_version = 3.7 3 | 4 | [mypy-altair_transform.utils._parser_Parser_parsetab] 5 | ignore_errors = True 6 | 7 | [mypy-altair.*] 8 | ignore_missing_imports = True 9 | 10 | [mypy-altair_saver.*] 11 | ignore_missing_imports = True 12 | 13 | [mypy-altair_viewer.*] 14 | ignore_missing_imports = True 15 | 16 | [mypy-numpy.*] 17 | ignore_missing_imports = True 18 | 19 | [mypy-pandas.*] 20 | ignore_missing_imports = True 21 | 22 | [mypy-ply.*] 23 | ignore_missing_imports = True 24 | 25 | [mypy-pytest.*] 26 | ignore_missing_imports = True 27 | 28 | [mypy-scipy.*] 29 | ignore_missing_imports = True 30 | 31 | [mypy-selenium.*] 32 | ignore_missing_imports = True 33 | -------------------------------------------------------------------------------- /altair_transform/transform/visitor.py: -------------------------------------------------------------------------------- 1 | from functools import singledispatch 2 | from typing import Any 3 | 4 | import altair as alt 5 | import pandas as pd 6 | 7 | 8 | @singledispatch 9 | def visit(transform: Any, df: pd.DataFrame) -> pd.DataFrame: 10 | raise NotImplementedError(f"transform of type {type(transform)}") 11 | 12 | 13 | @visit.register(list) 14 | def visit_list(transform: list, df: pd.DataFrame) -> pd.DataFrame: 15 | for t in transform: 16 | df = visit(t, df) 17 | return df 18 | 19 | 20 | @visit.register(dict) 21 | def visit_dict(transform: dict, df: pd.DataFrame) -> pd.DataFrame: 22 | transform = alt.Transform.from_dict(transform) 23 | return visit(transform, df) 24 | -------------------------------------------------------------------------------- /CHANGES.md: -------------------------------------------------------------------------------- 1 | # Change Log 2 | 3 | ## Version 0.3 (unreleased) 4 | 5 | ## Version 0.2 (released 2019-12-03) 6 | 7 | ### Enhancements 8 | 9 | - vegaexpr: support date functions () string functions (#21, #22), stats functions (#24), array functions (#26, #27), regex functions (#28) 10 | - support sequence generators (#25) 11 | - Support full set of bin options (#14) 12 | - Support extraction of implicit transforms from encodings (#10, #12, #13, #15, #29, #33) 13 | 14 | 15 | ### Bug Fixes 16 | 17 | - Fix issue with fold transform (#5) 18 | 19 | ### Maintenance 20 | 21 | - Compatibility with Python 3.6 & 3.8 (#16 & #17) 22 | - Format package with [black](https://black.readthedocs.io/) (#11) 23 | 24 | ## Version 0.1 (released 2019-07-18) 25 | 26 | Initial release, supporting most transforms from Altair 3.X -------------------------------------------------------------------------------- /altair_transform/utils/tests/test_evaljs.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from altair_transform.utils import evaljs, parser 4 | from ._testcases import extract 5 | from ._testcases import EXPRESSIONS, JSONLY_EXPRESSIONS, NAMES 6 | 7 | 8 | @pytest.fixture 9 | def names(): 10 | return NAMES 11 | 12 | 13 | @pytest.mark.parametrize("expression", extract(EXPRESSIONS)) 14 | def test_expressions(expression, names): 15 | assert eval(expression, names) == evaljs(expression, names) 16 | 17 | 18 | @pytest.mark.parametrize("expression,output", JSONLY_EXPRESSIONS) 19 | def test_jsonly_expressions(expression, output, names): 20 | assert evaljs(expression, names) == output 21 | 22 | 23 | def test_string_vs_ast(): 24 | expression = "2 * (3 + 4)" 25 | parsed = parser.parse(expression) 26 | assert evaljs(expression) == evaljs(parsed) 27 | -------------------------------------------------------------------------------- /altair_transform/transform/pivot.py: -------------------------------------------------------------------------------- 1 | import altair as alt 2 | import pandas as pd 3 | from .visitor import visit 4 | from .aggregate import AGG_REPLACEMENTS 5 | 6 | 7 | @visit.register(alt.PivotTransform) 8 | def visit_pivot(transform: alt.PivotTransform, df: pd.DataFrame) -> pd.DataFrame: 9 | transform = transform.to_dict() 10 | pivot = transform["pivot"] 11 | limit = transform.get("limit") 12 | if limit: 13 | vals = sorted(df[pivot].unique())[:limit] 14 | df = df[df[pivot].isin(vals)] 15 | groupby = transform.get("groupby") 16 | agg = transform.get("op", "sum") 17 | agg = AGG_REPLACEMENTS.get(agg, agg) 18 | out = df.pivot_table( 19 | columns=pivot, values=transform["value"], index=groupby, aggfunc=agg, 20 | ).reset_index(drop=not groupby) 21 | out.columns.names = [None] 22 | return out 23 | -------------------------------------------------------------------------------- /.github/workflows/lint.yml: -------------------------------------------------------------------------------- 1 | name: lint 2 | 3 | on: [push, pull_request] 4 | 5 | jobs: 6 | build: 7 | runs-on: ubuntu-latest 8 | name: flake8-black-mypy 9 | steps: 10 | - uses: actions/checkout@v1 11 | - name: Set up Python 3.8 12 | uses: actions/setup-python@v1 13 | with: 14 | python-version: 3.8 15 | - name: Lint with flake8 16 | run: | 17 | pip install flake8 18 | # stop the build if there are Python syntax errors or undefined names 19 | flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics 20 | # exit-zero treats all errors as warnings. 21 | flake8 . --count --exit-zero --max-complexity=10 --statistics 22 | - name: Check formatting with black 23 | run: | 24 | pip install black 25 | black --check . 26 | - name: Check types with mypy 27 | run: | 28 | pip install mypy 29 | mypy altair_transform 30 | -------------------------------------------------------------------------------- /altair_transform/utils/tests/test_parser.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from altair_transform.utils import ast, Parser 4 | from ._testcases import extract 5 | from ._testcases import EXPRESSIONS, BAD_EXPRESSIONS, JSONLY_EXPRESSIONS 6 | 7 | 8 | @pytest.fixture 9 | def parser(): 10 | return Parser() 11 | 12 | 13 | @pytest.mark.parametrize("bad_expression", extract(BAD_EXPRESSIONS)) 14 | def test_bad_expressions(bad_expression, parser): 15 | with pytest.raises(ValueError): 16 | parser.parse(bad_expression) 17 | 18 | 19 | @pytest.mark.parametrize("expression", extract(EXPRESSIONS)) 20 | def test_expressions(expression, parser): 21 | output = parser.parse(expression) 22 | assert isinstance(output, ast.Node) 23 | 24 | 25 | @pytest.mark.parametrize("expression,output", JSONLY_EXPRESSIONS) 26 | def test_jsonly_expressions(expression, output, parser): 27 | output = parser.parse(expression) 28 | assert isinstance(output, ast.Node) 29 | -------------------------------------------------------------------------------- /altair_transform/tests/test_examples.py: -------------------------------------------------------------------------------- 1 | import os 2 | import re 3 | 4 | import pytest 5 | 6 | 7 | @pytest.fixture 8 | def readme(): 9 | possible_paths = [ 10 | # Path within built distributions: 11 | os.path.join(os.path.dirname(__file__), "README.md"), 12 | # Path within source tree: 13 | os.path.join(os.path.dirname(__file__), "..", "..", "README.md"), 14 | ] 15 | 16 | for path in possible_paths: 17 | if os.path.exists(path): 18 | with open(path) as f: 19 | return f.read() 20 | 21 | raise ValueError("README file not found.") 22 | 23 | 24 | def test_readme_snippets(readme): 25 | """Tests the code snippets from the package README.""" 26 | regex = re.compile("```python\n(.*?)\n```", re.MULTILINE | re.DOTALL) 27 | 28 | codeblocks = regex.findall(readme) 29 | assert len(codeblocks) > 0 30 | 31 | namespace = {} 32 | for codeblock in codeblocks: 33 | exec(codeblock, namespace) 34 | -------------------------------------------------------------------------------- /altair_transform/transform/joinaggregate.py: -------------------------------------------------------------------------------- 1 | import altair as alt 2 | import pandas as pd 3 | from .visitor import visit 4 | from .aggregate import AGG_REPLACEMENTS 5 | 6 | 7 | @visit.register(alt.JoinAggregateTransform) 8 | def visit_joinaggregate( 9 | transform: alt.JoinAggregateTransform, df: pd.DataFrame 10 | ) -> pd.DataFrame: 11 | transform = transform.to_dict() 12 | groupby = transform.get("groupby") 13 | for aggregate in transform["joinaggregate"]: 14 | op = aggregate["op"] 15 | field = aggregate["field"] 16 | col = aggregate["as"] 17 | 18 | op = AGG_REPLACEMENTS.get(op, op) 19 | if field == "*" and field not in df.columns: 20 | field = df.columns[0] 21 | 22 | if groupby is None: 23 | df[col] = df[field].aggregate(op) 24 | else: 25 | result = df.groupby(groupby)[field].aggregate(op) 26 | result.name = col 27 | df = df.join(result, on=groupby) 28 | return df 29 | -------------------------------------------------------------------------------- /.github/workflows/build.yml: -------------------------------------------------------------------------------- 1 | name: build 2 | 3 | on: [push, pull_request] 4 | 5 | jobs: 6 | build: 7 | runs-on: ubuntu-latest 8 | strategy: 9 | matrix: 10 | python-version: [ '3.6', '3.7', '3.8' ] 11 | name: Python ${{ matrix.python-version }} 12 | steps: 13 | - uses: actions/checkout@v1 14 | - name: Set up Python ${{ matrix.python-version }} 15 | uses: actions/setup-python@v1 16 | with: 17 | python-version: ${{ matrix.python-version }} 18 | - name: Set Up Chromedriver 19 | run: | 20 | sudo apt-get update 21 | sudo apt-get --only-upgrade install google-chrome-stable 22 | sudo apt-get -yqq install chromium-chromedriver 23 | - name: Install dependencies 24 | run: | 25 | python -m pip install --upgrade pip 26 | pip install -r requirements.txt -r requirements_driver.txt 27 | - name: Test with pytest 28 | run: | 29 | pip install pytest 30 | pytest --doctest-modules altair_transform 31 | -------------------------------------------------------------------------------- /altair_transform/transform/fold.py: -------------------------------------------------------------------------------- 1 | import altair as alt 2 | import pandas as pd 3 | from .visitor import visit 4 | 5 | 6 | @visit.register(alt.FoldTransform) 7 | def visit_fold(transform: alt.FoldTransform, df: pd.DataFrame) -> pd.DataFrame: 8 | transform = transform.to_dict() 9 | fold = transform["fold"] 10 | var_name, value_name = transform.get("as", ("key", "value")) 11 | value_vars = [c for c in df.columns if c in fold] 12 | id_vars = [c for c in df.columns if c not in fold] 13 | 14 | # Add an index to track input order 15 | dfi = df.reset_index(drop=True).reset_index() 16 | index_name = dfi.columns[0] 17 | melted = dfi.melt( 18 | id_vars=[index_name] + id_vars, 19 | value_vars=value_vars, 20 | var_name=var_name, 21 | value_name=value_name, 22 | ) 23 | return ( 24 | pd.merge(melted, dfi, on=[index_name] + id_vars, how="left") 25 | .sort_values(index_name) 26 | .drop(index_name, axis=1) 27 | .reset_index(drop=True) 28 | ) 29 | -------------------------------------------------------------------------------- /altair_transform/transform/flatten.py: -------------------------------------------------------------------------------- 1 | import altair as alt 2 | import pandas as pd 3 | from .visitor import visit 4 | 5 | 6 | @visit.register(alt.FlattenTransform) 7 | def visit_flatten(transform: alt.FlattenTransform, df: pd.DataFrame) -> pd.DataFrame: 8 | transform = transform.to_dict() 9 | 10 | fields = transform["flatten"] 11 | out = transform.get("as", []) 12 | 13 | if len(out) < len(fields): 14 | out = out + fields[len(out) :] 15 | if len(out) > len(fields): 16 | out = out[: len(fields)] 17 | 18 | if not fields: 19 | return df 20 | 21 | to_flatten = df[fields] 22 | others = df[[c for c in df.columns if c not in out]] 23 | 24 | def flatten_row(row): 25 | flattened = to_flatten.iloc[row].apply(pd.Series).T 26 | flattened.index = flattened.shape[0] * [row] 27 | return flattened 28 | 29 | flattened = pd.concat([flatten_row(i) for i in range(df.shape[0])], axis=0) 30 | flattened.columns = out 31 | 32 | return flattened.join(others).reset_index(drop=True) 33 | -------------------------------------------------------------------------------- /altair_transform/transform/quantile.py: -------------------------------------------------------------------------------- 1 | import altair as alt 2 | import numpy as np 3 | import pandas as pd 4 | from .visitor import visit 5 | 6 | 7 | @visit.register(alt.QuantileTransform) 8 | def visit_quantile(transform: alt.QuantileTransform, df: pd.DataFrame) -> pd.DataFrame: 9 | transform = transform.to_dict() 10 | quantile = transform["quantile"] 11 | groupby = transform.get("groupby") 12 | pname, vname = transform.get("as", ["prob", "value"]) 13 | probs = transform.get("probs") 14 | if probs is None: 15 | step = transform.get("step", 0.01) 16 | probs = np.arange(0.5 * step, 1.0, step) 17 | 18 | def qq(s: pd.Series) -> pd.DataFrame: 19 | return pd.DataFrame({pname: probs, vname: np.quantile(s, probs)}) 20 | 21 | if groupby: 22 | return ( 23 | df.groupby(groupby)[quantile] 24 | .apply(qq) 25 | .reset_index(groupby) 26 | .reset_index(drop=True) 27 | ) 28 | 29 | else: 30 | return qq(df[quantile]).reset_index(drop=True) 31 | -------------------------------------------------------------------------------- /RELEASING.md: -------------------------------------------------------------------------------- 1 | 1. Update version to, e.g. 1.0.0 2 | 3 | - in altair_transform/__init__.py 4 | 5 | 2. Make sure CHANGES.md is up to date for the release 6 | 7 | 3. Commit change and push to master 8 | 9 | git add . -u 10 | git commit -m "MAINT: bump version to 1.0.0" 11 | git push origin master 12 | 13 | 4. Tag the release: 14 | 15 | git tag -a v1.0.0 -m "version 1.0.0 release" 16 | git push origin v1.0.0 17 | 18 | 5. Build source & wheel distributions 19 | 20 | rm -r dist build # clean old builds & distributions 21 | python setup.py sdist # create a source distribution 22 | python setup.py bdist_wheel # create a universal wheel 23 | 24 | 6. publish to PyPI (Requires correct PyPI owner permissions) 25 | 26 | twine upload dist/* 27 | 28 | 7. update version to, e.g. 1.1.0dev 29 | 30 | - in altair_transform/__init__.py 31 | 32 | 8. add a new changelog entry for the unreleased version 33 | 34 | 9. Commit change and push to master 35 | 36 | git add . -u 37 | git commit -m "MAINT: bump version to 1.1.0dev" 38 | git push origin master 39 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 Jake Vanderplas 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /altair_transform/tests/test_driver.py: -------------------------------------------------------------------------------- 1 | import altair as alt 2 | import pandas as pd 3 | from pandas.testing import assert_frame_equal 4 | 5 | # Note: driver fixture here comes from conftest.py 6 | # These tests will be skipped if selenium driver is not available. 7 | 8 | 9 | def test_extract_data_source(driver): 10 | df = pd.DataFrame({"x": [1, 2, 3], "y": ["A", "B", "C"]}) 11 | chart = alt.Chart(df).mark_point() 12 | with alt.data_transformers.enable(consolidate_datasets=False): 13 | spec = chart.to_dict() 14 | df_out = driver._extract_data(spec, "source_0") 15 | assert_frame_equal(df, df_out) 16 | 17 | 18 | def test_driver_apply(driver): 19 | df = pd.DataFrame({"x": [1, 2, 3]}) 20 | transform = {"calculate": "2 * datum.x", "as": "y"} 21 | df_out = driver.apply(df, transform) 22 | 23 | df["y"] = 2 * df["x"] 24 | assert_frame_equal(df, df_out) 25 | 26 | 27 | def test_utc_tz_offset(driver): 28 | # Test that UTC dates have zero offset between Python and Javascript. 29 | assert driver.get_tz_offset("UTC") == pd.Timedelta(0) 30 | 31 | 32 | def test_tz_code(driver): 33 | code = driver.get_tz_code() 34 | pd.to_datetime("2012-01-01").tz_localize(code) 35 | -------------------------------------------------------------------------------- /altair_transform/transform/tests/test_impute.py: -------------------------------------------------------------------------------- 1 | import altair as alt 2 | import numpy as np 3 | from numpy.testing import assert_equal 4 | import pandas as pd 5 | import pytest 6 | 7 | import altair_transform 8 | 9 | 10 | @pytest.mark.parametrize("method", ["value", "mean", "median", "max", "min"]) 11 | def test_impute_transform_no_groupby(method: str) -> None: 12 | data = pd.DataFrame({"x": [1, 2], "y": [2, 3]}) 13 | transform = alt.ImputeTransform( 14 | impute="y", key="x", keyvals={"start": 0, "stop": 5}, value=0, method=method 15 | ) 16 | if method == "value": 17 | value = 0 18 | else: 19 | value = data.y.agg(method) 20 | imputed = altair_transform.apply(data, transform) 21 | 22 | assert_equal(imputed.x.values, range(5)) 23 | assert_equal(imputed.y[[1, 2]].values, data.y.values) 24 | assert_equal(imputed.y[[0, 3, 4]].values, value) 25 | 26 | 27 | def test_impute_transform_with_groupby() -> None: 28 | data = pd.DataFrame( 29 | {"x": [1, 2, 4, 1, 3, 4], "y": [1, 2, 4, 2, 4, 5], "cat": list("AAABBB")} 30 | ) 31 | 32 | transform = alt.ImputeTransform(impute="y", key="x", method="max", groupby=["cat"]) 33 | 34 | imputed = altair_transform.apply(data, transform) 35 | assert_equal(imputed.x.values, np.tile(range(1, 5), 2)) 36 | assert_equal(imputed.y.values, [1, 2, 4, 4, 2, 5, 4, 5]) 37 | -------------------------------------------------------------------------------- /altair_transform/utils/ast.py: -------------------------------------------------------------------------------- 1 | """Abstract syntax tree for parser""" 2 | from dataclasses import dataclass 3 | import typing 4 | 5 | 6 | class Node: 7 | pass 8 | 9 | 10 | @dataclass 11 | class Expr(Node): 12 | value: Node 13 | 14 | 15 | @dataclass 16 | class BinOp(Node): 17 | op: str 18 | lhs: Expr 19 | rhs: Expr 20 | 21 | 22 | @dataclass 23 | class UnOp(Node): 24 | op: str 25 | rhs: Expr 26 | 27 | 28 | @dataclass 29 | class TernOp(Node): 30 | op: typing.Tuple[str, str] 31 | lhs: Expr 32 | mid: Expr 33 | rhs: Expr 34 | 35 | 36 | @dataclass 37 | class Number(Node): 38 | value: float 39 | 40 | 41 | @dataclass 42 | class String(Node): 43 | value: str 44 | 45 | 46 | @dataclass 47 | class Regex(Node): 48 | value: typing.Dict[str, str] 49 | 50 | 51 | @dataclass 52 | class Global(Node): 53 | name: str 54 | 55 | 56 | @dataclass 57 | class Name(Node): 58 | name: str 59 | 60 | 61 | @dataclass 62 | class List(Node): 63 | entries: typing.List[Expr] 64 | 65 | 66 | @dataclass 67 | class Object(Node): 68 | entries: typing.List[typing.Union[Name, typing.Tuple[Expr, Expr]]] 69 | 70 | 71 | @dataclass 72 | class Attr(Node): 73 | obj: Expr 74 | attr: Name 75 | 76 | 77 | @dataclass 78 | class Item(Node): 79 | obj: Expr 80 | item: Expr 81 | 82 | 83 | @dataclass 84 | class Func(Node): 85 | func: Expr 86 | args: typing.List[Expr] 87 | -------------------------------------------------------------------------------- /altair_transform/transform/bin.py: -------------------------------------------------------------------------------- 1 | """Implementation of the bin transform.""" 2 | from typing import Tuple 3 | 4 | import altair as alt 5 | import pandas as pd 6 | import numpy as np 7 | 8 | from .visitor import visit 9 | from .vega_utils import calculate_bins 10 | 11 | 12 | def _cut(series: pd.Series, edges: np.ndarray) -> Tuple[pd.Series, pd.Series]: 13 | """Like pd.cut(), but include outliers in the outer bins.""" 14 | bins = pd.cut(series, edges, labels=False, right=False) 15 | out_of_range = (series < edges[0]) | (series > edges[-1]) 16 | bins[out_of_range] = -1 17 | bins = bins.astype(int) 18 | bins1 = pd.Series(edges[bins.values], index=bins.index, dtype=float) 19 | bins2 = pd.Series(edges[bins.values + 1], index=bins.index, dtype=float) 20 | bins1[out_of_range] = np.nan 21 | bins2[out_of_range] = np.nan 22 | return bins1, bins2 23 | 24 | 25 | @visit.register(alt.BinTransform) 26 | def visit_bin(transform: alt.BinTransform, df: pd.DataFrame) -> pd.DataFrame: 27 | transform_dct: dict = transform.to_dict() 28 | col = transform_dct["as"] 29 | bin_ = {} if transform_dct["bin"] is True else transform_dct["bin"] 30 | field = transform_dct["field"] 31 | 32 | bin_.setdefault("extent", [df[field].min(), df[field].max()]) 33 | bins = calculate_bins(**bin_) 34 | 35 | if isinstance(col, str): 36 | df[col], df[col + "_end"] = _cut(df[field], bins) 37 | else: 38 | df[col[0]], df[col[1]] = _cut(df[field], bins) 39 | 40 | return df 41 | -------------------------------------------------------------------------------- /altair_transform/transform/lookup.py: -------------------------------------------------------------------------------- 1 | from typing import Union 2 | 3 | import altair as alt 4 | import pandas as pd 5 | from .visitor import visit 6 | from ..utils import to_dataframe 7 | 8 | 9 | @visit.register(alt.LookupTransform) 10 | def visit_lookup(transform: alt.LookupTransform, df: pd.DataFrame) -> pd.DataFrame: 11 | with alt.data_transformers.enable(consolidate_datasets=False): 12 | transform = transform.to_dict() 13 | lookup_data = transform["from"] 14 | data = lookup_data["data"] 15 | key = lookup_data["key"] 16 | fields = lookup_data["fields"] 17 | 18 | other_df = to_dataframe(data) 19 | if fields is alt.Undefined: 20 | fields = list(other_df.columns) 21 | 22 | cols_to_use = fields 23 | if key not in fields: 24 | cols_to_use = fields + [key] 25 | else: 26 | cols_to_use = fields 27 | other_df = other_df[cols_to_use] 28 | 29 | lookup = transform["lookup"] 30 | default = transform.get("default") 31 | 32 | # TODO: use as_ if fields are not specified 33 | indicator: Union[str, bool] 34 | if default is None: 35 | indicator = False 36 | else: 37 | # TODO: make sure this doesn't conflict 38 | indicator = "__merge_indicator" 39 | 40 | # TODO: how to handle conficting fields? 41 | merged = pd.merge( 42 | df, other_df, left_on=lookup, right_on=key, how="left", indicator=indicator 43 | ) 44 | 45 | if key != lookup and key not in fields: 46 | merged = merged.drop(key, axis=1) 47 | if indicator: 48 | merged.loc[merged[indicator] == "left_only", fields] = default 49 | merged = merged.drop(indicator, axis=1) 50 | return merged 51 | -------------------------------------------------------------------------------- /altair_transform/utils/data.py: -------------------------------------------------------------------------------- 1 | from typing import Union, Optional 2 | 3 | import altair as alt 4 | import numpy as np 5 | import pandas as pd 6 | 7 | DataType = Union[dict, pd.DataFrame, alt.SchemaBase] 8 | ChartType = Union[dict, alt.SchemaBase] 9 | 10 | 11 | def to_dataframe(data: DataType, context: Optional[ChartType] = None) -> pd.DataFrame: 12 | if isinstance(data, pd.DataFrame): 13 | return data 14 | 15 | if not isinstance(data, dict): 16 | data = data.to_dict() 17 | 18 | if "values" in data: 19 | return pd.DataFrame(data["values"]) 20 | 21 | if "url" in data: 22 | url = data["url"] 23 | fmt = data.get("format", url.split(".")[-1]) 24 | if fmt == "csv": 25 | return pd.read_csv(url) 26 | elif fmt == "json": 27 | return pd.read_json(url) 28 | else: 29 | raise ValueError(f"Unknown format for UrlData: '{fmt}'") 30 | 31 | if "name" in data: 32 | name = data["name"] 33 | if context is None: 34 | raise ValueError("NamedData not supported.") 35 | if isinstance(context, dict): 36 | datasets = context.get("datasets", {}) 37 | else: 38 | datasets = context._get("datasets", {}) 39 | if name not in datasets: 40 | raise ValueError(f"dataset '{name}' not specified in chart.") 41 | return pd.DataFrame(datasets[name]) 42 | 43 | if "sequence" in data: 44 | start = data["sequence"]["start"] 45 | stop = data["sequence"]["stop"] 46 | step = data["sequence"].get("step", 1) 47 | name = data["sequence"].get("as", "data") 48 | return pd.DataFrame({name: np.arange(start, stop, step)}) 49 | 50 | data = alt.Data.from_dict(data) 51 | raise NotImplementedError(f"Data of type {type(data)}") 52 | -------------------------------------------------------------------------------- /altair_transform/transform/tests/test_fold.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Dict, List, Optional 2 | 3 | import numpy as np 4 | import pandas as pd 5 | from pandas.testing import assert_frame_equal 6 | import pytest 7 | 8 | import altair_transform 9 | 10 | 11 | @pytest.fixture 12 | def data() -> pd.DataFrame: 13 | return pd.DataFrame({"x": [1, 2, 2], "y1": ["A", "B", "C"], "y2": ["D", "E", "F"]}) 14 | 15 | 16 | @pytest.mark.parametrize("as_", (None, ["name", "val"])) 17 | def test_fold_transform(data, as_: Optional[List[str]]): 18 | if as_ is None: 19 | out = altair_transform.apply(data, {"fold": ["y1", "y2"]}) 20 | as_ = ["key", "value"] 21 | else: 22 | out = altair_transform.apply(data, {"fold": ["y1", "y2"], "as": as_}) 23 | 24 | expected = pd.DataFrame( 25 | { 26 | "x": np.repeat(data["x"], 2), 27 | as_[0]: 3 * ["y1", "y2"], 28 | as_[1]: np.ravel((data["y1"], data["y2"]), "F"), 29 | "y1": np.repeat(data["y1"], 2), 30 | "y2": np.repeat(data["y2"], 2), 31 | } 32 | ).reset_index(drop=True) 33 | assert_frame_equal(out, expected) 34 | 35 | 36 | @pytest.mark.parametrize("fold", [["y1"], ["y1", "y2"]]) 37 | @pytest.mark.parametrize("as_", [None, ["name", "val"]]) 38 | def test_fold_against_js( 39 | driver, data: pd.DataFrame, fold: List[str], as_: Optional[str] 40 | ) -> None: 41 | transform: Dict[str, Any] = {"fold": fold} 42 | if as_ is not None: 43 | transform["as"] = as_ 44 | 45 | got = altair_transform.apply(data, transform) 46 | want = driver.apply(data, transform) 47 | 48 | assert_frame_equal( 49 | got[sorted(got.columns)], 50 | want[sorted(want.columns)], 51 | check_dtype=False, 52 | check_index_type=False, 53 | check_less_precise=True, 54 | ) 55 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | MANIFEST 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | .pytest_cache/ 49 | 50 | # Translations 51 | *.mo 52 | *.pot 53 | 54 | # Django stuff: 55 | *.log 56 | local_settings.py 57 | db.sqlite3 58 | 59 | # Flask stuff: 60 | instance/ 61 | .webassets-cache 62 | 63 | # Scrapy stuff: 64 | .scrapy 65 | 66 | # Sphinx documentation 67 | docs/_build/ 68 | 69 | # PyBuilder 70 | target/ 71 | 72 | # Jupyter Notebook 73 | .ipynb_checkpoints 74 | 75 | # pyenv 76 | .python-version 77 | 78 | # celery beat schedule file 79 | celerybeat-schedule 80 | 81 | # SageMath parsed files 82 | *.sage.py 83 | 84 | # Environments 85 | .env 86 | .venv 87 | env/ 88 | venv/ 89 | ENV/ 90 | env.bak/ 91 | venv.bak/ 92 | 93 | # Spyder project settings 94 | .spyderproject 95 | .spyproject 96 | 97 | # Rope project settings 98 | .ropeproject 99 | 100 | # mkdocs documentation 101 | /site 102 | 103 | # mypy 104 | .mypy_cache/ 105 | 106 | # ply files 107 | altair_transform/parser.out 108 | 109 | # emacs 110 | *~ 111 | 112 | # jupyter 113 | Untitled*.ipynb -------------------------------------------------------------------------------- /altair_transform/transform/impute.py: -------------------------------------------------------------------------------- 1 | import altair as alt 2 | import numpy as np 3 | import pandas as pd 4 | from .visitor import visit 5 | 6 | 7 | @visit.register(alt.ImputeTransform) 8 | def visit_impute(transform: alt.ImputeTransform, df: pd.DataFrame) -> pd.DataFrame: 9 | transform = transform.to_dict() 10 | 11 | field = transform["impute"] 12 | key = transform["key"] 13 | 14 | frame = transform.get("frame", None) 15 | if frame: 16 | raise NotImplementedError("Impute Transform frame argument.") 17 | 18 | # Keyvals are the values at which the field is imputed. 19 | keyvals = transform.get("keyvals", []) 20 | if isinstance(keyvals, dict): 21 | start = keyvals.get("start", 0) 22 | stop = keyvals["stop"] 23 | step = keyvals.get("step", np.sign(stop - start)) 24 | keyvals = np.arange(start, stop, step) 25 | keyvals = np.sort(np.unique(np.concatenate([keyvals, df[key].values]))) 26 | keyvals = pd.Series(keyvals, name=key) 27 | 28 | groupby = transform.get("groupby", []) 29 | 30 | method = transform.get("method", "value") 31 | value = transform.get("value", None) 32 | if "method" not in transform and "value" not in transform: 33 | raise ValueError("Must specify either method or value.") 34 | if method == "value" and "value" not in transform: 35 | raise ValueError("For method='value', must supply a value argument.") 36 | 37 | def _impute(group): 38 | imputed = pd.merge(keyvals, group, on=key, how="left") 39 | if method == "value": 40 | fill = value 41 | else: 42 | fill = group[field].agg(method) 43 | imputed[field].fillna(fill, inplace=True) 44 | for col in groupby: 45 | imputed[col].fillna(group[col].iloc[0], inplace=True) 46 | return imputed 47 | 48 | if groupby: 49 | imputed = df.groupby(groupby).apply(_impute).reset_index(drop=True) 50 | else: 51 | imputed = _impute(df) 52 | 53 | return imputed 54 | -------------------------------------------------------------------------------- /altair_transform/tests/test_core.py: -------------------------------------------------------------------------------- 1 | import altair as alt 2 | from altair_transform import extract_data, transform_chart 3 | import numpy as np 4 | import pandas as pd 5 | import pytest 6 | 7 | 8 | @pytest.fixture 9 | def data(): 10 | rand = np.random.RandomState(42) 11 | return pd.DataFrame( 12 | { 13 | "x": rand.randint(0, 100, 12), 14 | "y": rand.randint(0, 100, 12), 15 | "t": pd.date_range("2012-01-15", freq="M", periods=12), 16 | "i": range(12), 17 | "c": list("AAABBBCCCDDD"), 18 | } 19 | ) 20 | 21 | 22 | @pytest.fixture 23 | def chart(data): 24 | return ( 25 | alt.Chart(data) 26 | .transform_calculate(xpy="datum.x + datum.y", xmy="datum.x - datum.y") 27 | .mark_point() 28 | .encode(x="xpy:Q", y="xmy:Q") 29 | ) 30 | 31 | 32 | def test_extract_data(data, chart): 33 | out1 = extract_data(chart) 34 | out2 = data.copy() 35 | out2["xpy"] = data.x + data.y 36 | out2["xmy"] = data.x - data.y 37 | assert out1.equals(out2) 38 | 39 | 40 | def test_transform_chart(data, chart): 41 | original_chart = chart.copy() 42 | data_out = extract_data(chart) 43 | chart_out = transform_chart(chart) 44 | 45 | # Original chart not modified 46 | assert original_chart == chart 47 | 48 | # Transform applied to output chart 49 | assert chart_out.data.equals(data_out) 50 | assert chart_out.transform is alt.Undefined 51 | assert chart.mark == chart_out.mark 52 | assert chart.encoding == chart_out.encoding 53 | 54 | 55 | def test_transform_chart_with_aggregate(): 56 | data = pd.DataFrame({"x": list("AABBBCCCC")}) 57 | chart = alt.Chart(data).mark_bar().encode(x="x:N", y="count():Q") 58 | chart_out = transform_chart(chart) 59 | assert chart_out.data.equals(pd.DataFrame({"x": list("ABC"), "__count": [2, 3, 4]})) 60 | assert chart_out.encoding.to_dict() == { 61 | "x": {"field": "x", "type": "nominal"}, 62 | "y": {"field": "__count", "type": "quantitative", "title": "Count of Records"}, 63 | } 64 | -------------------------------------------------------------------------------- /altair_transform/utils/tests/test_data.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import tempfile 3 | 4 | import pytest 5 | 6 | import altair as alt 7 | from altair_transform.utils import to_dataframe 8 | 9 | 10 | @pytest.fixture 11 | def df(): 12 | return pd.DataFrame({"x": [1, 2, 3], "y": ["A", "B", "C"]}) 13 | 14 | 15 | @pytest.fixture 16 | def csv_data(df): 17 | with tempfile.NamedTemporaryFile("w+", suffix=".csv") as f: 18 | df.to_csv(f.name, index=False) 19 | yield {"url": f.name} 20 | 21 | 22 | @pytest.fixture 23 | def json_data(df): 24 | with tempfile.NamedTemporaryFile("w+", suffix=".json") as f: 25 | df.to_json(f.name, orient="records") 26 | yield {"url": f.name} 27 | 28 | 29 | @pytest.fixture 30 | def inline_data(df): 31 | return {"values": df.to_dict(orient="records")} 32 | 33 | 34 | @pytest.fixture 35 | def named_data(df): 36 | return {"name": "my-dataset"} 37 | 38 | 39 | @pytest.fixture 40 | def sequence_data(df): 41 | return {"sequence": {"start": 1, "stop": 4, "as": "x"}} 42 | 43 | 44 | @pytest.fixture 45 | def chart(named_data, inline_data): 46 | return alt.Chart( 47 | data=named_data, 48 | mark="bar", 49 | datasets={named_data["name"]: inline_data["values"]}, 50 | ) 51 | 52 | 53 | @pytest.mark.parametrize("data_type", [dict, alt.Data]) 54 | def test_csv_to_dataframe(df, csv_data, data_type): 55 | data = data_type(csv_data) 56 | assert df.equals(to_dataframe(data)) 57 | 58 | 59 | @pytest.mark.parametrize("data_type", [dict, alt.Data]) 60 | def test_json_to_dataframe(df, json_data, data_type): 61 | data = data_type(json_data) 62 | assert df.equals(to_dataframe(data)) 63 | 64 | 65 | @pytest.mark.parametrize("data_type", [dict, alt.Data]) 66 | def test_inline_to_dataframe(df, inline_data, data_type): 67 | data = data_type(inline_data) 68 | assert df.equals(to_dataframe(data)) 69 | 70 | 71 | @pytest.mark.parametrize("data_type", [dict, alt.Data]) 72 | def test_named_to_dataframe(df, chart, named_data, data_type): 73 | data = data_type(named_data) 74 | assert df.equals(to_dataframe(data, context=chart)) 75 | 76 | 77 | @pytest.mark.parametrize("data_type", [dict, alt.Data]) 78 | def test_sequence_to_dataframe(df, sequence_data, data_type): 79 | data = data_type(sequence_data) 80 | assert df[["x"]].equals(to_dataframe(data)) 81 | -------------------------------------------------------------------------------- /altair_transform/transform/tests/test_filter.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Callable, Dict, List, Tuple, Union 2 | 3 | import numpy as np 4 | import pandas as pd 5 | from pandas.testing import assert_frame_equal 6 | import pytest 7 | 8 | import altair_transform 9 | 10 | 11 | @pytest.fixture 12 | def data() -> pd.DataFrame: 13 | rand = np.random.RandomState(42) 14 | return pd.DataFrame( 15 | { 16 | "x": rand.randint(0, 100, 12), 17 | "y": rand.randint(0, 100, 12), 18 | "i": range(12), 19 | "c": list("AAABBBCCCDDD"), 20 | } 21 | ) 22 | 23 | 24 | FILTER_PREDICATES: List[ 25 | Tuple[Union[str, Dict[str, Any]], Callable[[pd.DataFrame], pd.DataFrame]] 26 | ] = [ 27 | ("datum.x < datum.y", lambda df: df[df.x < df.y]), 28 | ({"not": "datum.i < 5"}, lambda df: df[~(df.i < 5)]), 29 | ( 30 | {"and": [{"field": "x", "lt": 50}, {"field": "i", "gte": 2}]}, 31 | lambda df: df[(df.x < 50) & (df.i >= 2)], 32 | ), 33 | ( 34 | {"or": [{"field": "y", "gt": 50}, {"field": "i", "lte": 4}]}, 35 | lambda df: df[(df.y > 50) | (df.i <= 4)], 36 | ), 37 | ({"field": "c", "oneOf": ["A", "B"]}, lambda df: df[df.c.isin(["A", "B"])]), 38 | ({"field": "x", "range": [30, 60]}, lambda df: df[(df.x >= 30) & (df.x <= 60)]), 39 | ({"field": "c", "equal": "B"}, lambda df: df[df.c == "B"]), 40 | ] 41 | 42 | 43 | @pytest.mark.parametrize("filter,calc", FILTER_PREDICATES) 44 | def test_filter_transform( 45 | data: pd.DataFrame, 46 | filter: Union[str, Dict[str, Any]], 47 | calc: Callable[[pd.DataFrame], pd.DataFrame], 48 | ): 49 | out1 = altair_transform.apply(data, {"filter": filter}) 50 | out2 = calc(data).reset_index(drop=True) 51 | assert_frame_equal(out1, out2) 52 | 53 | 54 | @pytest.mark.parametrize("filter,_", FILTER_PREDICATES) 55 | def test_filter_against_js( 56 | driver, 57 | data: pd.DataFrame, 58 | filter: Union[str, Dict[str, Any]], 59 | _: Callable[[pd.DataFrame], pd.DataFrame], 60 | ) -> None: 61 | transform = {"filter": filter} 62 | got = altair_transform.apply(data, transform) 63 | want = driver.apply(data, transform) 64 | 65 | assert_frame_equal( 66 | got[sorted(got.columns)], 67 | want[sorted(want.columns)], 68 | check_dtype=False, 69 | check_index_type=False, 70 | check_less_precise=True, 71 | ) 72 | -------------------------------------------------------------------------------- /altair_transform/transform/aggregate.py: -------------------------------------------------------------------------------- 1 | import altair as alt 2 | import numpy as np 3 | import pandas as pd 4 | from .visitor import visit 5 | 6 | 7 | @visit.register(alt.AggregateTransform) 8 | def visit_aggregate( 9 | transform: alt.AggregateTransform, df: pd.DataFrame 10 | ) -> pd.DataFrame: 11 | transform = transform.to_dict() 12 | groupby = transform.get("groupby", []) 13 | agg_cols = {} 14 | for aggregate in transform["aggregate"]: 15 | op = aggregate["op"] 16 | col = aggregate["as"] 17 | field = aggregate.get("field", df.columns[0]) 18 | 19 | if op == "argmin": 20 | 21 | def op(col, df=df): 22 | return df.loc[col.idxmin()].to_dict() 23 | 24 | elif op == "argmax": 25 | 26 | def op(col, df=df): 27 | return df.loc[col.idxmax()].to_dict() 28 | 29 | else: 30 | op = AGG_REPLACEMENTS.get(op, op) 31 | 32 | if field == "*" and field not in df.columns: 33 | field = df.columns[0] 34 | 35 | if op == "values": 36 | if groupby: 37 | agg_cols[col] = df.groupby(groupby).apply( 38 | lambda x: x.to_dict(orient="records") 39 | ) 40 | else: 41 | agg_cols[col] = [df.to_dict(orient="records")] 42 | else: 43 | if groupby: 44 | agg_cols[col] = df.groupby(groupby)[field].aggregate(op) 45 | else: 46 | agg_cols[col] = [df[field].aggregate(op)] 47 | 48 | df = pd.DataFrame(agg_cols) 49 | if groupby: 50 | df = df.reset_index() 51 | return df 52 | 53 | 54 | def confidence_interval(x: np.ndarray, level: float): 55 | from scipy import stats 56 | 57 | return stats.t.interval(level, len(x) - 1, loc=x.mean(), scale=x.sem()) 58 | 59 | 60 | AGG_REPLACEMENTS = { 61 | "argmin": "idxmin", 62 | "argmax": "idxmax", 63 | "average": "mean", 64 | "ci0": lambda x: confidence_interval(x, 0.05), 65 | "ci1": lambda x: confidence_interval(x, 0.95), 66 | "distinct": "nunique", 67 | "stderr": "sem", 68 | "stdev": "std", 69 | "stdevp": lambda x: x.std(ddof=0), 70 | "missing": lambda x: x.isnull().sum(), 71 | "q1": lambda x: x.quantile(0.25), 72 | "q3": lambda x: x.quantile(0.75), 73 | "valid": "count", 74 | "variance": "var", 75 | "variancep": lambda x: x.var(ddof=0), 76 | } 77 | -------------------------------------------------------------------------------- /altair_transform/transform/tests/test_quantile.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Dict, List, Optional 2 | 3 | import numpy as np 4 | from numpy.testing import assert_allclose 5 | import pandas as pd 6 | from pandas.testing import assert_frame_equal 7 | import pytest 8 | 9 | import altair_transform 10 | 11 | 12 | @pytest.fixture 13 | def data() -> pd.DataFrame: 14 | rand = np.random.RandomState(42) 15 | return pd.DataFrame({"x": rand.randint(0, 100, 12), "c": list("AAABBBCCCDDD")}) 16 | 17 | 18 | def test_quantile_transform(data: pd.DataFrame) -> None: 19 | transform = {"quantile": "x", "step": 0.1} 20 | out = altair_transform.apply(data, transform) 21 | assert list(out.columns) == ["prob", "value"] 22 | assert_allclose(out.prob, np.arange(0.05, 1, 0.1)) 23 | assert_allclose(out.value, np.quantile(data.x, out.prob)) 24 | 25 | 26 | def test_quantile_transform_groupby(data: pd.DataFrame) -> None: 27 | group = "c" 28 | transform = {"quantile": "x", "step": 0.1, "groupby": [group]} 29 | out = altair_transform.apply(data, transform) 30 | assert list(out.columns) == ["c", "prob", "value"] 31 | 32 | for key in data[group].unique(): 33 | out_group_1 = altair_transform.apply(data[data[group] == key], transform) 34 | out_group_2 = out[out[group] == key][out_group_1.columns].reset_index(drop=True) 35 | assert_frame_equal(out_group_1, out_group_2) 36 | 37 | 38 | @pytest.mark.parametrize("step", [None, 0.1]) 39 | @pytest.mark.parametrize("groupby", [None, ["c"]]) 40 | @pytest.mark.parametrize("probs", [None, [0.2 * i for i in range(6)]]) 41 | @pytest.mark.parametrize("as_", [None, ["p", "q"]]) 42 | def test_quantile_against_js( 43 | driver, 44 | data: pd.DataFrame, 45 | step: Optional[float], 46 | groupby: Optional[List[str]], 47 | probs: Optional[List[float]], 48 | as_: Optional[List[str]], 49 | ) -> None: 50 | transform: Dict[str, Any] = {"quantile": "x"} 51 | if step is not None: 52 | transform["step"] = step 53 | if groupby is not None: 54 | transform["groupby"] = groupby 55 | if probs is not None: 56 | transform["probs"] = probs 57 | if as_ is not None: 58 | transform["as"] = as_ 59 | got = altair_transform.apply(data, transform) 60 | want = driver.apply(data, transform) 61 | assert_frame_equal( 62 | got[sorted(got.columns)], 63 | want[sorted(want.columns)], 64 | check_dtype=False, 65 | check_index_type=False, 66 | check_less_precise=True, 67 | ) 68 | -------------------------------------------------------------------------------- /altair_transform/transform/tests/test_pivot.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Dict, List, Optional 2 | 3 | import numpy as np 4 | import pandas as pd 5 | from pandas.testing import assert_frame_equal 6 | import pytest 7 | 8 | import altair_transform 9 | 10 | 11 | @pytest.fixture 12 | def data() -> pd.DataFrame: 13 | rand = np.random.RandomState(42) 14 | return pd.DataFrame( 15 | { 16 | "x": rand.randint(0, 100, 12), 17 | "c": list("AAABBBCCCDDD"), 18 | "d": list("ABCABCABCABC"), 19 | } 20 | ) 21 | 22 | 23 | def test_pivot_transform(data: pd.DataFrame) -> None: 24 | transform = {"pivot": "c", "value": "x"} 25 | expected = pd.DataFrame( 26 | {key: [data.x[data.c == key].sum()] for key in data.c.unique()}, 27 | ) 28 | out = altair_transform.apply(data, transform) 29 | assert_frame_equal(out, expected) 30 | 31 | 32 | def test_pivot_transform_groupby(data: pd.DataFrame) -> None: 33 | transform = {"pivot": "c", "value": "x", "groupby": ["d"]} 34 | expected = data.pivot(values="x", index="d", columns="c").reset_index() 35 | expected.columns.names = [None] 36 | out = altair_transform.apply(data, transform) 37 | assert_frame_equal(out, expected) 38 | 39 | 40 | def test_pivot_transform_limit(data: pd.DataFrame) -> None: 41 | transform = {"pivot": "c", "value": "x", "limit": 2} 42 | expected = pd.DataFrame( 43 | {key: [data.x[data.c == key].sum()] for key in sorted(data.c.unique())[:2]} 44 | ) 45 | out = altair_transform.apply(data, transform) 46 | assert_frame_equal(out, expected) 47 | 48 | 49 | @pytest.mark.parametrize("groupby", [None, ["d"]]) 50 | @pytest.mark.parametrize("limit", [None, 1]) 51 | @pytest.mark.parametrize("op", [None, "sum", "max"]) 52 | def test_pivot_against_js( 53 | driver, 54 | data: pd.DataFrame, 55 | groupby: Optional[List[str]], 56 | limit: Optional[int], 57 | op: Optional[str], 58 | ) -> None: 59 | transform: Dict[str, Any] = {"pivot": "c", "value": "x"} 60 | if groupby is not None: 61 | transform["groupby"] = groupby 62 | if limit is not None: 63 | transform["limit"] = limit 64 | if op is not None: 65 | transform["op"] = op 66 | got = altair_transform.apply(data, transform) 67 | want = driver.apply(data, transform) 68 | assert_frame_equal( 69 | got[sorted(got.columns)], 70 | want[sorted(want.columns)], 71 | check_dtype=False, 72 | check_index_type=False, 73 | check_less_precise=True, 74 | ) 75 | -------------------------------------------------------------------------------- /altair_transform/transform/tests/test_flatten.py: -------------------------------------------------------------------------------- 1 | from typing import Dict, List 2 | 3 | import numpy as np 4 | from numpy.testing import assert_equal 5 | import pandas as pd 6 | from pandas.testing import assert_frame_equal 7 | import pytest 8 | 9 | import altair_transform 10 | 11 | 12 | @pytest.fixture 13 | def data() -> pd.DataFrame: 14 | return pd.DataFrame( 15 | { 16 | "x": [[1, 2, 3], [4, 5, 6, 7], [8, 9]], 17 | "y": [[1, 2], [3, 4], [5, 6]], 18 | "cat": list("ABC"), 19 | } 20 | ) 21 | 22 | 23 | def test_flatten_transform(data: pd.DataFrame) -> None: 24 | out = altair_transform.apply(data, {"flatten": ["x"]}) 25 | assert out.shape == (9, 3) 26 | assert out.columns.tolist() == ["x", "y", "cat"] 27 | assert_equal(out.x.values, range(1, 10)) 28 | assert_equal(out.cat.values, list("AAABBBBCC")) 29 | 30 | out = altair_transform.apply(data, {"flatten": ["x", "y"]}) 31 | assert out.shape == (9, 3) 32 | assert out.columns.tolist() == ["x", "y", "cat"] 33 | assert_equal(out.x.values, range(1, 10)) 34 | assert_equal(out.y.values, [1, 2, np.nan, 3, 4, np.nan, np.nan, 5, 6]) 35 | assert_equal(out.cat.values, list("AAABBBBCC")) 36 | 37 | 38 | def test_flatten_transform_with_as(data: pd.DataFrame): 39 | out = altair_transform.apply(data, {"flatten": ["y"], "as": ["yflat"]}) 40 | assert out.shape == (6, 4) 41 | assert out.columns.tolist() == ["yflat", "x", "y", "cat"] 42 | assert_equal(out.yflat.values, range(1, 7)) 43 | assert_equal(out.cat.values, list("AABBCC")) 44 | 45 | out = altair_transform.apply( 46 | data, {"flatten": ["x", "y"], "as": ["xflat", "yflat"]} 47 | ) 48 | assert out.shape == (9, 5) 49 | assert out.columns.tolist() == ["xflat", "yflat", "x", "y", "cat"] 50 | assert_equal(out.xflat.values, range(1, 10)) 51 | assert_equal(out.yflat.values, [1, 2, np.nan, 3, 4, np.nan, np.nan, 5, 6]) 52 | assert_equal(out.cat.values, list("AAABBBBCC")) 53 | 54 | 55 | @pytest.mark.parametrize( 56 | "transform", 57 | [ 58 | {"flatten": ["x"]}, 59 | {"flatten": ["x"], "as": ["xflat"]}, 60 | {"flatten": ["x", "y"]}, 61 | {"flatten": ["x", "y"], "as": ["xflat"]}, 62 | {"flatten": ["x", "y"], "as": ["xflat", "yflat"]}, 63 | ], 64 | ) 65 | def test_flatten_against_js( 66 | driver, data: pd.DataFrame, transform: Dict[str, List[str]], 67 | ) -> None: 68 | got = altair_transform.apply(data, transform) 69 | want = driver.apply(data, transform) 70 | 71 | assert_frame_equal( 72 | got[sorted(got.columns)], 73 | want[sorted(want.columns)], 74 | check_dtype=False, 75 | check_index_type=False, 76 | check_less_precise=True, 77 | ) 78 | -------------------------------------------------------------------------------- /altair_transform/transform/tests/test_window.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Dict, List, Optional 2 | 3 | import numpy as np 4 | import pandas as pd 5 | from pandas.testing import assert_series_equal, assert_frame_equal 6 | import pytest 7 | 8 | import altair_transform 9 | 10 | 11 | @pytest.fixture 12 | def data() -> pd.DataFrame: 13 | rand = np.random.RandomState(1) 14 | return pd.DataFrame({"x": rand.randint(0, 100, 12), "c": list("AAABBBCCCDDD")}) 15 | 16 | 17 | def test_window_transform_basic(data: pd.DataFrame) -> None: 18 | transform = {"window": [{"op": "sum", "field": "x", "as": "xsum"}]} 19 | out = altair_transform.apply(data, transform) 20 | expected = data["x"].cumsum() 21 | expected.name = "xsum" 22 | assert_series_equal(out["xsum"], expected.astype(float)) 23 | 24 | 25 | def test_window_transform_sorted(data: pd.DataFrame) -> None: 26 | transform = { 27 | "window": [{"op": "sum", "field": "x", "as": "xsum"}], 28 | "sort": [{"field": "x"}], 29 | } 30 | out = altair_transform.apply(data, transform) 31 | expected = data["x"].sort_values().cumsum().sort_index() 32 | expected.name = "xsum" 33 | assert_series_equal(out["xsum"], expected.astype(float)) 34 | 35 | 36 | def test_window_transform_grouped(data: pd.DataFrame) -> None: 37 | transform = { 38 | "window": [{"op": "sum", "field": "x", "as": "xsum"}], 39 | "groupby": ["c"], 40 | } 41 | out = altair_transform.apply(data, transform) 42 | expected = data.groupby("c").rolling(len(data), min_periods=1) 43 | expected = expected["x"].sum().reset_index("c", drop=True).sort_index() 44 | expected.name = "xsum" 45 | assert_series_equal(out["xsum"], expected) 46 | 47 | 48 | @pytest.mark.parametrize("groupby", [None, ["c"]]) 49 | @pytest.mark.parametrize("sort", [None, "x"]) 50 | @pytest.mark.parametrize("frame", [None, [1, 1], [-2, 2], [None, None]]) 51 | def test_window_against_js( 52 | driver, 53 | data: pd.DataFrame, 54 | groupby: Optional[List[str]], 55 | sort: Optional[str], 56 | frame: Optional[List[Optional[int]]], 57 | ) -> None: 58 | transform: Dict[str, Any] = { 59 | "window": [{"op": "sum", "field": "x", "as": "xsum"}], 60 | "ignorePeers": False, 61 | } 62 | if groupby is not None: 63 | transform["groupby"] = groupby 64 | if sort is not None: 65 | transform["sort"] = [{"field": sort}] 66 | if frame is not None: 67 | transform["frame"] = frame 68 | got = altair_transform.apply(data, transform) 69 | want = driver.apply(data, transform) 70 | assert_frame_equal( 71 | got[sorted(got.columns)], 72 | want[sorted(want.columns)], 73 | check_dtype=False, 74 | check_index_type=False, 75 | check_less_precise=True, 76 | ) 77 | -------------------------------------------------------------------------------- /altair_transform/transform/window.py: -------------------------------------------------------------------------------- 1 | from typing import Dict 2 | 3 | import altair as alt 4 | import pandas as pd 5 | from .visitor import visit 6 | from .aggregate import AGG_REPLACEMENTS 7 | 8 | 9 | @visit.register(alt.WindowTransform) 10 | def visit_window(transform: alt.WindowTransform, df: pd.DataFrame) -> pd.DataFrame: 11 | transform = transform.to_dict() 12 | window = transform["window"] 13 | frame = transform.get("frame", [None, 0]) 14 | groupby = transform.get("groupby", []) 15 | ignorePeers = transform.get("ignorePeers", False) 16 | sort = transform.get("sort", []) 17 | 18 | if ignorePeers: 19 | raise NotImplementedError("Window transform with ignorePeers=True") 20 | 21 | # First sort the dataframe if required. 22 | if sort: 23 | fields = [s["field"] for s in sort] 24 | ascending = [s.get("order", "ascending") == "ascending" for s in sort] 25 | df2 = df.sort_values(fields, ascending=ascending) 26 | else: 27 | df2 = df 28 | 29 | if groupby: 30 | grouped = df2.groupby(groupby) 31 | else: 32 | grouped = df2 33 | 34 | # TODO: implement other frame options 35 | if frame == [None, 0]: 36 | rolling = grouped.rolling(len(df), min_periods=1) 37 | elif frame[1] == 0: 38 | rolling = grouped.rolling(frame[0] + 1, min_periods=1) 39 | elif frame == [None, None]: 40 | rolling = grouped.rolling(2 * len(df), min_periods=1, center=True) 41 | elif abs(frame[0]) == abs(frame[1]): 42 | # TODO: duplicate values may increase the effective window size 43 | rolling = grouped.rolling(2 * abs(frame[0]) + 1, min_periods=1, center=True) 44 | else: 45 | raise NotImplementedError("frame={}".format(frame)) 46 | 47 | for w in window: 48 | # TODO: if field not specified, must be count, rank, or dense_rank 49 | if "param" in w: 50 | raise NotImplementedError("window function with param") 51 | col = w.get("field", df2.columns[0]) 52 | if col == "*" and col not in df2.columns: 53 | col = df2.columns[0] 54 | agg = w["op"] 55 | agg = WINDOW_AGG_REPLACEMENTS.get(agg, agg) 56 | df2[w["as"]] = rolling[col].aggregate(agg).reset_index(groupby, drop=True) 57 | 58 | return df2.loc[df.index] 59 | 60 | 61 | # TODO: implement these. 62 | WINDOW_AGG_REPLACEMENTS: Dict[str, object] = { 63 | "row_number": "row_number", 64 | "rank": "rank", 65 | "dense_rank": "dense_rank", 66 | "percent_rank": "percent_rank", 67 | "cume_dist": "cume_dist", 68 | "ntile": "ntile", 69 | "lag": "lag", 70 | "lead": "lead", 71 | "first_value": "first_value", 72 | "last_value": "last_value", 73 | "nth_value": "nth_value", 74 | } 75 | WINDOW_AGG_REPLACEMENTS.update(AGG_REPLACEMENTS) 76 | -------------------------------------------------------------------------------- /altair_transform/transform/tests/test_lookup.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Dict, Optional 2 | 3 | from altair.utils.data import to_values 4 | import numpy as np 5 | import pandas as pd 6 | from pandas.testing import assert_frame_equal 7 | import pytest 8 | 9 | import altair_transform 10 | 11 | 12 | @pytest.fixture 13 | def data() -> pd.DataFrame: 14 | rand = np.random.RandomState(42) 15 | return pd.DataFrame({"x": rand.randint(0, 100, 12), "c": list("AAABBBCCCDDD")}) 16 | 17 | 18 | @pytest.fixture 19 | def lookup_data() -> Dict[str, Any]: 20 | rand = np.random.RandomState(0) 21 | df = pd.DataFrame( 22 | {"y": rand.randint(0, 50, 4), "d": list("ABCD"), "e": list("ACDE")} 23 | ) 24 | return to_values(df) 25 | 26 | 27 | @pytest.mark.parametrize("lookup_key", ["c", "c2"]) 28 | def test_lookup_transform(data: pd.DataFrame, lookup_key: str) -> None: 29 | lookup = pd.DataFrame({lookup_key: list("ABCD"), "z": [3, 1, 4, 5]}) 30 | transform = { 31 | "lookup": "c", 32 | "from": {"data": to_values(lookup), "key": lookup_key, "fields": ["z"]}, 33 | } 34 | out1 = altair_transform.apply(data, transform) 35 | out2 = pd.merge(data, lookup, left_on="c", right_on=lookup_key) 36 | if lookup_key != "c": 37 | out2 = out2.drop(lookup_key, axis=1) 38 | assert_frame_equal(out1, out2) 39 | 40 | 41 | @pytest.mark.parametrize("lookup_key", ["c", "c2"]) 42 | @pytest.mark.parametrize("default", [None, "missing"]) 43 | def test_lookup_transform_default( 44 | data: pd.DataFrame, lookup_key: str, default: Optional[str] 45 | ) -> None: 46 | lookup = pd.DataFrame({lookup_key: list("ABC"), "z": [3, 1, 4]}) 47 | transform = { 48 | "lookup": "c", 49 | "from": {"data": to_values(lookup), "key": lookup_key, "fields": ["z"]}, 50 | } 51 | if default is not None: 52 | transform["default"] = default 53 | 54 | out = altair_transform.apply(data, transform) 55 | undef = out["c"] == "D" 56 | if default is None: 57 | assert out.loc[undef, "z"].isnull().all() 58 | else: 59 | assert (out.loc[undef, "z"] == default).all() 60 | 61 | 62 | @pytest.mark.parametrize("key", ["d", "e"]) 63 | @pytest.mark.parametrize("default", [None, "N/A"]) 64 | def test_lookup_against_js( 65 | driver, 66 | data: pd.DataFrame, 67 | lookup_data: Dict[str, Any], 68 | key: str, 69 | default: Optional[str], 70 | ) -> None: 71 | transform = { 72 | "lookup": "c", 73 | "from": {"data": lookup_data, "key": key, "fields": ["y"]}, 74 | } 75 | if default is not None: 76 | transform["default"] = default 77 | got = altair_transform.apply(data, transform) 78 | want = driver.apply(data, transform) 79 | 80 | assert_frame_equal( 81 | got[sorted(got.columns)], 82 | want[sorted(want.columns)], 83 | check_dtype=False, 84 | check_index_type=False, 85 | check_less_precise=True, 86 | ) 87 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import io 2 | import os 3 | import re 4 | import shutil 5 | 6 | try: 7 | from setuptools import setup 8 | except ImportError: 9 | from distutils.core import setup 10 | 11 | 12 | def read(path, encoding="utf-8"): 13 | path = os.path.join(os.path.dirname(__file__), path) 14 | with io.open(path, encoding=encoding) as fp: 15 | return fp.read() 16 | 17 | 18 | def get_install_requirements(path): 19 | content = read(path) 20 | return [req for req in content.split("\n") if req != "" and not req.startswith("#")] 21 | 22 | 23 | def version(path): 24 | """Obtain the packge version from a python file e.g. pkg/__init__.py 25 | 26 | See . 27 | """ 28 | version_file = read(path) 29 | version_match = re.search( 30 | r"""^__version__ = ['"]([^'"]*)['"]""", version_file, re.M 31 | ) 32 | if version_match: 33 | return version_match.group(1) 34 | raise RuntimeError("Unable to find version string.") 35 | 36 | 37 | HERE = os.path.abspath(os.path.dirname(__file__)) 38 | 39 | 40 | # From https://github.com/jupyterlab/jupyterlab/blob/master/setupbase.py, 41 | # BSD licensed 42 | def find_packages(top=HERE): 43 | """ 44 | Find all of the packages. 45 | """ 46 | packages = [] 47 | for d, dirs, _ in os.walk(top, followlinks=True): 48 | if os.path.exists(os.path.join(d, "__init__.py")): 49 | packages.append(os.path.relpath(d, top).replace(os.path.sep, ".")) 50 | elif d != top: 51 | # Do not look for packages in subfolders 52 | # if current is not a package 53 | dirs[:] = [] 54 | return packages 55 | 56 | 57 | README_TEST_PATH = "altair_transform/tests/README.md" 58 | try: 59 | shutil.copyfile("README.md", README_TEST_PATH) 60 | setup( 61 | name="altair_transform", 62 | version=version("altair_transform/__init__.py"), 63 | description="A python engine for evaluating Altair transforms.", 64 | long_description=read("README.md"), 65 | long_description_content_type="text/markdown", 66 | author="Jake VanderPlas", 67 | author_email="jakevdp@gmail.com", 68 | url="http://github.com/altair-viz/altair-transform/", 69 | download_url="http://github.com/altair-viz/altair-transform/", 70 | license="MIT", 71 | packages=find_packages(), 72 | include_package_data=True, 73 | install_requires=get_install_requirements("requirements.txt"), 74 | python_requires=">=3.6", 75 | classifiers=[ 76 | "Environment :: Console", 77 | "Intended Audience :: Science/Research", 78 | "License :: OSI Approved :: MIT License", 79 | "Natural Language :: English", 80 | "Programming Language :: Python :: 3.6", 81 | "Programming Language :: Python :: 3.7", 82 | "Programming Language :: Python :: 3.8", 83 | ], 84 | ) 85 | finally: 86 | os.remove(README_TEST_PATH) 87 | -------------------------------------------------------------------------------- /altair_transform/utils/tests/test_timeunit.py: -------------------------------------------------------------------------------- 1 | """Tests of the timeunit utilities""" 2 | from dateutil.tz import tzlocal 3 | import pytest 4 | 5 | import pandas as pd 6 | 7 | from altair_transform.utils import timeunit 8 | 9 | 10 | TIMEUNITS = [ 11 | "year", 12 | "quarter", 13 | "month", 14 | "day", 15 | "date", 16 | "hours", 17 | "minutes", 18 | "seconds", 19 | "milliseconds", 20 | "yearquarter", 21 | "yearquartermonth", 22 | "yearmonth", 23 | "yearmonthdate", 24 | "yearmonthdatehours", 25 | "yearmonthdatehoursminutes", 26 | "yearmonthdatehoursminutesseconds", 27 | "quartermonth", 28 | "monthdate", 29 | "hoursminutes", 30 | "hoursminutesseconds", 31 | "minutesseconds", 32 | "secondsmilliseconds", 33 | ] 34 | TIMEUNITS += [f"utc{unit}" for unit in TIMEUNITS] 35 | TIMEZONES = [None, tzlocal(), "UTC", "US/Pacific", "US/Eastern"] 36 | 37 | 38 | @pytest.fixture 39 | def dates(): 40 | # Use dates on either side of a year boundary to hit corner cases. 41 | return pd.DatetimeIndex(["1999-12-31 23:59:55.050", "2000-01-01 00:00:05.050"]) 42 | 43 | 44 | @pytest.mark.parametrize("timezone", TIMEZONES) 45 | @pytest.mark.parametrize("unit", TIMEUNITS) 46 | def test_timeunit_input_types(dates, timezone, unit): 47 | dates = dates.tz_localize(timezone) 48 | 49 | timestamps = [timeunit.compute_timeunit(d, unit) for d in dates] 50 | series = timeunit.compute_timeunit(pd.Series(dates), unit) 51 | datetimeindex = timeunit.compute_timeunit(dates, unit) 52 | 53 | assert isinstance(timestamps[0], pd.Timestamp) 54 | assert isinstance(series, pd.Series) 55 | assert isinstance(datetimeindex, pd.DatetimeIndex) 56 | assert datetimeindex.equals(pd.DatetimeIndex(series)) 57 | assert datetimeindex.equals(pd.DatetimeIndex(timestamps)) 58 | 59 | 60 | @pytest.mark.parametrize("timezone", TIMEZONES) 61 | @pytest.mark.parametrize("timeunit_name", TIMEUNITS) 62 | def test_all_timeunits(dates, timezone, timeunit_name): 63 | timeunit_calc = timeunit.compute_timeunit( 64 | dates.tz_localize(timezone), timeunit_name 65 | ) 66 | 67 | tz = "UTC" if timeunit_name.startswith("utc") else tzlocal() 68 | dates = dates.tz_localize(timezone or tzlocal()).tz_convert(tz) 69 | 70 | to_check = [ 71 | ("year", "year", 2012), 72 | ("quarter", "quarter", None), 73 | ("month", "month", None if "quarter" in timeunit_name else 1), 74 | ("day", "dayofweek", None), 75 | ("date", "day", None if "day" in timeunit_name else 1), 76 | ("hours", "hour", 0), 77 | ("minutes", "minute", 0), 78 | ("seconds", "second", 0), 79 | ("milliseconds", "microsecond", 0), 80 | ] 81 | 82 | if timeunit_name.startswith("utc"): 83 | timeunit_name = timeunit_name[3:] 84 | 85 | for name, attr, default in to_check: 86 | if timeunit_name.startswith(name): 87 | timeunit_name = timeunit_name[len(name) :] 88 | assert getattr(dates, attr).equals(getattr(timeunit_calc, attr)) 89 | elif default is not None: 90 | assert (getattr(timeunit_calc, attr) == default).all() 91 | assert (timeunit_calc.nanosecond == 0).all() 92 | -------------------------------------------------------------------------------- /altair_transform/transform/tests/test_timeunit.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | import numpy as np 4 | import pandas as pd 5 | from pandas.testing import assert_frame_equal 6 | 7 | import altair_transform 8 | 9 | TIMEUNITS = [ 10 | "year", 11 | "quarter", 12 | "month", 13 | "day", 14 | "date", 15 | "hours", 16 | "minutes", 17 | "seconds", 18 | "milliseconds", 19 | "yearquarter", 20 | "yearquartermonth", 21 | "yearmonth", 22 | "yearmonthdate", 23 | "yearmonthdatehours", 24 | "yearmonthdatehoursminutes", 25 | "yearmonthdatehoursminutesseconds", 26 | "quartermonth", 27 | "monthdate", 28 | "hoursminutes", 29 | "hoursminutesseconds", 30 | "minutesseconds", 31 | "secondsmilliseconds", 32 | ] 33 | 34 | 35 | @pytest.fixture 36 | def timezone(driver) -> str: 37 | return driver.get_tz_code() 38 | 39 | 40 | @pytest.fixture 41 | def data() -> pd.DataFrame: 42 | rand = np.random.RandomState(42) 43 | return pd.DataFrame( 44 | { 45 | "t": ( 46 | pd.to_datetime("2020-01-01") 47 | + pd.to_timedelta(rand.randint(0, 60_000_000, 50), unit="s") 48 | ).sort_values() 49 | } 50 | ) 51 | 52 | 53 | @pytest.mark.parametrize( 54 | "timeUnit,fmt", 55 | [ 56 | ("year", "%Y"), 57 | ("yearmonth", "%Y-%m"), 58 | ("yearmonthdate", "%Y-%m-%d"), 59 | ("monthdate", "2012-%m-%d"), 60 | ("date", "2012-01-%d"), 61 | ], 62 | ) 63 | def test_timeunit_transform(data: pd.DataFrame, timeUnit: str, fmt: str) -> None: 64 | transform = {"timeUnit": timeUnit, "field": "t", "as": "unit"} 65 | out = altair_transform.apply(data, transform) 66 | unit = pd.to_datetime(data.t.dt.strftime(fmt)) 67 | assert (out.unit == unit).all() 68 | 69 | 70 | @pytest.mark.parametrize("timeUnit", TIMEUNITS) 71 | def test_timeunit_against_js( 72 | driver, data: pd.DataFrame, timezone: str, timeUnit: str 73 | ) -> None: 74 | transform = {"timeUnit": timeUnit, "field": "t", "as": "unit"} 75 | 76 | got = altair_transform.apply(data, transform) 77 | 78 | data["t"] = data["t"].apply(lambda x: x.isoformat()) 79 | want = driver.apply(data, transform) 80 | 81 | want["t"] = ( 82 | pd.to_datetime(1e6 * want["t"]) 83 | .dt.tz_localize("UTC") 84 | .dt.tz_convert(timezone) 85 | .dt.tz_localize(None) 86 | ) 87 | want["unit"] = ( 88 | pd.to_datetime(want["unit"]).dt.tz_convert(timezone).dt.tz_localize(None) 89 | ) 90 | 91 | cols = ["t", "unit"] 92 | print(want[cols]) 93 | print(got[cols]) 94 | print(want[cols] - got[cols]) 95 | 96 | assert_frame_equal(want[cols], got[cols]) 97 | 98 | # want["t"] = pd.to_datetime(want["t"]) 99 | # want["unit"] = pd.to_datetime(want["unit"]) 100 | # want["unit_end"] = pd.to_datetime(want["unit_end"]) 101 | 102 | # assert_frame_equal( 103 | # got[sorted(got.columns)], 104 | # want[sorted(want.columns)], 105 | # check_dtype=False, 106 | # check_index_type=False, 107 | # check_less_precise=True, 108 | # ) 109 | -------------------------------------------------------------------------------- /altair_transform/transform/tests/test_aggregate.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Dict, List, Optional 2 | 3 | import numpy as np 4 | import pandas as pd 5 | from pandas.testing import assert_frame_equal 6 | import pytest 7 | 8 | import altair_transform 9 | from altair_transform.transform.aggregate import AGG_REPLACEMENTS 10 | 11 | AGGREGATES = [ 12 | "argmax", 13 | "argmin", 14 | "average", 15 | "count", 16 | "distinct", 17 | "max", 18 | "mean", 19 | "median", 20 | "min", 21 | "missing", 22 | "q1", 23 | "q3", 24 | "ci0", 25 | "ci1", 26 | "stderr", 27 | "stdev", 28 | "stdevp", 29 | "sum", 30 | "valid", 31 | "values", 32 | "variance", 33 | "variancep", 34 | ] 35 | 36 | AGG_SKIP = ["ci0", "ci1"] # These require scipy. 37 | 38 | 39 | @pytest.fixture 40 | def data() -> pd.DataFrame: 41 | rand = np.random.RandomState(42) 42 | return pd.DataFrame({"x": rand.randint(0, 100, 12), "c": list("AAABBBCCCDDD")}) 43 | 44 | 45 | @pytest.mark.parametrize("groupby", [True, False]) 46 | @pytest.mark.parametrize("op", set(AGGREGATES) - set(AGG_SKIP)) 47 | def test_aggregate_transform(data: pd.DataFrame, groupby: bool, op: Any): 48 | field = "x" 49 | col = "z" 50 | group = "c" 51 | 52 | transform: Dict[str, Any] = {"aggregate": [{"op": op, "field": field, "as": col}]} 53 | if groupby: 54 | transform["groupby"] = [group] 55 | 56 | if op == "argmin": 57 | 58 | def op(col, df=data): 59 | return df.loc[col.idxmin()].to_dict() 60 | 61 | elif op == "argmax": 62 | 63 | def op(col, df=data): 64 | return df.loc[col.idxmax()].to_dict() 65 | 66 | else: 67 | op = AGG_REPLACEMENTS.get(op, op) 68 | 69 | out = altair_transform.apply(data, transform) 70 | 71 | data = data.reset_index(drop=True) 72 | 73 | if op == "values": 74 | if groupby: 75 | grouped = data.groupby(group).apply(lambda x: x.to_dict(orient="records")) 76 | grouped.name = col 77 | grouped = grouped.reset_index() 78 | else: 79 | grouped = pd.DataFrame({col: [data.to_dict(orient="records")]}) 80 | elif groupby: 81 | grouped = data.groupby(group)[field].aggregate(op) 82 | grouped.name = col 83 | grouped = grouped.reset_index() 84 | else: 85 | grouped = pd.DataFrame({col: [data[field].aggregate(op)]}) 86 | 87 | assert_frame_equal(grouped, out) 88 | 89 | 90 | @pytest.mark.parametrize("groupby", [None, ["c"]]) 91 | @pytest.mark.parametrize("op", set(AGGREGATES) - set(AGG_SKIP)) 92 | def test_aggregate_against_js( 93 | driver, data: pd.DataFrame, groupby: Optional[List[str]], op: str 94 | ) -> None: 95 | transform: Dict[str, Any] = {"aggregate": [{"op": op, "field": "x", "as": "z"}]} 96 | if groupby is not None: 97 | transform["groupby"] = groupby 98 | 99 | got = altair_transform.apply(data, transform) 100 | want = driver.apply(data, transform) 101 | 102 | print(data) 103 | print(got) 104 | print(want) 105 | 106 | assert_frame_equal( 107 | got[sorted(got.columns)], 108 | want[sorted(want.columns)], 109 | check_dtype=False, 110 | check_index_type=False, 111 | check_less_precise=True, 112 | ) 113 | -------------------------------------------------------------------------------- /altair_transform/transform/tests/test_bin.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Dict, List 2 | 3 | import numpy as np 4 | import pandas as pd 5 | from pandas.testing import assert_frame_equal 6 | import pytest 7 | 8 | import altair_transform 9 | 10 | 11 | @pytest.fixture 12 | def data() -> pd.DataFrame: 13 | rand = np.random.RandomState(42) 14 | return pd.DataFrame({"x": rand.randint(0, 100, 12)}) 15 | 16 | 17 | def test_bin_transform_simple(data: pd.DataFrame) -> None: 18 | transform = {"bin": True, "field": "x", "as": "xbin"} 19 | out = altair_transform.apply(data, transform) 20 | assert "xbin" in out.columns 21 | 22 | transform = {"bin": True, "field": "x", "as": ["xbin1", "xbin2"]} 23 | out = altair_transform.apply(data, transform) 24 | assert "xbin1" in out.columns 25 | assert "xbin2" in out.columns 26 | 27 | 28 | @pytest.mark.parametrize("maxbins", [5, 10, 20]) 29 | @pytest.mark.parametrize("nice", [True, False]) 30 | def test_bin_transform_maxbins(nice: bool, maxbins: int) -> None: 31 | data = pd.DataFrame({"x": np.arange(100)}) 32 | transform = {"bin": {"maxbins": maxbins, "nice": nice}, "field": "x", "as": "xbin"} 33 | out = altair_transform.apply(data, transform) 34 | assert "xbin" in out.columns 35 | assert "xbin_end" in out.columns 36 | bins = np.sort(out["xbin"].unique()) 37 | assert len(bins) - 1 <= maxbins 38 | assert not out.xbin.isnull().any() 39 | 40 | 41 | @pytest.mark.parametrize("step", [5, 10, 20]) 42 | @pytest.mark.parametrize("nice", [True, False]) 43 | def test_bin_transform_step(nice: bool, step: int) -> None: 44 | data = pd.DataFrame({"x": np.arange(100)}) 45 | transform = {"bin": {"step": step, "nice": nice}, "field": "x", "as": "xbin"} 46 | out = altair_transform.apply(data, transform) 47 | bins = np.sort(out.xbin.unique()) 48 | assert np.allclose(bins[1:] - bins[:-1], step) 49 | assert not out.xbin.isnull().any() 50 | 51 | 52 | @pytest.mark.parametrize("nice", [True, False]) 53 | def test_bin_transform_steps(nice: bool, steps: List[int] = [5, 10, 20]) -> None: 54 | data = pd.DataFrame({"x": range(100)}) 55 | transform = {"bin": {"steps": steps, "nice": nice}, "field": "x", "as": "xbin"} 56 | out = altair_transform.apply(data, transform) 57 | bins = np.sort(out.xbin.unique()) 58 | assert bins[1] - bins[0] in steps 59 | assert not out.xbin.isnull().any() 60 | 61 | 62 | @pytest.mark.parametrize( 63 | "transform", 64 | [ 65 | {"bin": True, "field": "x", "as": "xbin"}, 66 | {"bin": True, "field": "x", "as": ["xbin1", "xbin2"]}, 67 | {"bin": {"maxbins": 20}, "field": "x", "as": "xbin"}, 68 | {"bin": {"nice": False}, "field": "x", "as": "xbin"}, 69 | {"bin": {"anchor": 3.5}, "field": "x", "as": "xbin"}, 70 | {"bin": {"step": 20}, "field": "x", "as": "xbin"}, 71 | {"bin": {"base": 2}, "field": "x", "as": "xbin"}, 72 | {"bin": {"extent": [20, 80]}, "field": "x", "as": "xbin"}, 73 | ], 74 | ) 75 | def test_bin_against_js(driver, data: pd.DataFrame, transform: Dict[str, Any]) -> None: 76 | got = altair_transform.apply(data, transform) 77 | want = driver.apply(data, transform) 78 | assert_frame_equal( 79 | got[sorted(got.columns)], 80 | want[sorted(want.columns)], 81 | check_dtype=False, 82 | check_index_type=False, 83 | check_less_precise=True, 84 | ) 85 | -------------------------------------------------------------------------------- /altair_transform/utils/tests/_testcases.py: -------------------------------------------------------------------------------- 1 | """ 2 | Common test cases for parser & evaljs 3 | """ 4 | import functools 5 | import operator 6 | 7 | 8 | def extract(expressions): 9 | """Extract expressions from multi-line strings""" 10 | return ( 11 | line 12 | for line in expressions.splitlines() 13 | if line.strip() and not line.startswith("#") 14 | ) 15 | 16 | 17 | class Bunch: 18 | """A simple class to enable testing of attribute & item access""" 19 | 20 | def __init__(self, **kwargs): 21 | for key, val in kwargs.items(): 22 | setattr(self, key, val) 23 | 24 | def __getitem__(self, item): 25 | return getattr(self, item) 26 | 27 | 28 | NAMES = { 29 | "A": 10, 30 | "B": 20, 31 | "C": 30, 32 | "obj": Bunch(foo=1, bar=2, func=lambda x: x), 33 | "foo": "bar", 34 | "bar": "baz", 35 | "sum": lambda *args: sum(args), 36 | "prod": lambda *args: functools.reduce(operator.mul, args), 37 | "_123": 2.0, 38 | "abc_123": "hello", 39 | "true": True, 40 | "false": False, 41 | } 42 | 43 | EXPRESSIONS = r""" 44 | # Integers 45 | 0 46 | 12 47 | 234 48 | # Floats 49 | 3.14 50 | 0.10 51 | 10. 52 | .1 53 | 1E5 54 | 2e6 55 | 3.7E02 56 | # Binary 57 | 0x0 58 | 0X10101 59 | # Octal 60 | 0o17 61 | 0O0 62 | # Hex 63 | 0xffaa11 64 | 0XF0c 65 | # Boolean 66 | true 67 | false 68 | # Strings 69 | 'abc123' 70 | 'a\'b\'c123' 71 | 'abc123\\' 72 | '\t""\n' 73 | "abc123" 74 | "a\"b\"c123" 75 | "abc123\\" 76 | "\t''\n" 77 | # Regex 78 | "/[0-9]+/" 79 | "/(?Pfoo)(?Pbar)/g" 80 | # Globals 81 | A 82 | B 83 | C 84 | obj 85 | foo 86 | _123 87 | abc_123 88 | # Unary operations 89 | -1 90 | +3.5 91 | -A 92 | +B 93 | ~0b0101 94 | # Binary operations 95 | 1 + 1 96 | 2E3 - 1 97 | 0xF * 5.0 98 | A / B 99 | 2 ** 3 100 | # Comparisons 101 | 1 < 2 102 | A > B 103 | 5 <= 5 104 | A >= 10 105 | A == 10 106 | A != 10 107 | # Bitwise 108 | 3 | 4 109 | 3 & 4 110 | 3 ^ 4 111 | 4 << 2 112 | 4 >> 2 113 | # Compound operations 114 | 2 * 3 % 4 / 5 115 | 2 % 3 * 4 / 5 116 | 2 + 3 % 4 117 | 2 % 3 - 4 118 | 2.5 * 3 + 4 / 5.2 119 | 2.5 + 3 * 4 - 5.0 120 | 2.5 * (3 + 4) 121 | (2 * 3) + 4 122 | B * 3 ** 4 123 | 1.5 + 2. * .3 124 | -0.6 * (C / 1.5) 125 | 3 * (4 + C) 126 | A & B | ~C + 4 * 5 127 | # Functions 128 | prod(1, 2, 3) 129 | sum(1, 2, 3) 130 | prod(1, 2 * 4, -6) 131 | sum(1, (2 * 4), -6) 132 | A * prod(B, C) 133 | A * prod(B, sum(B, C)) 134 | obj.func(2) 135 | # Lists 136 | [] 137 | [2] 138 | [1 + 1] 139 | [A, 'foo', 23 * B, []] 140 | # Objects 141 | {} 142 | {'a': 4} 143 | {'a': 5, 'b': 5} 144 | # Attribute access 145 | obj.foo + C / 5 146 | obj["foo"] + C / 5 147 | (obj).bar + C * 2 148 | (obj)['bar'] + C * 2 149 | ['a', 'b', 'c'][1] 150 | """ 151 | 152 | BAD_EXPRESSIONS = r""" 153 | "' 154 | 1.B 155 | *24 156 | "\" 157 | (1, 2] 158 | [1, 2) 159 | B.1 160 | (1 + 2)[] 161 | [1;2] 162 | 009 163 | 0x01FG 164 | 00.56 165 | true : 3 ? 4 166 | """ 167 | 168 | JSONLY_EXPRESSIONS = [ 169 | ("{A, B, C: 3, 'd': 4, 1: 5}", {"A": 10, "B": 20, "C": 3, "d": 4, 1: 5}), 170 | ("!true", False), 171 | ("!false", True), 172 | ("true && false", False), 173 | ("true || false", True), 174 | ("-12 >>> 2", 1073741821), 175 | ("12 >>> 2", 3), 176 | ("A === 10", True), 177 | ("A !== 10", False), 178 | ("true ? 0 + 1 : 1 + 1", 1), 179 | ("!true ? 0 + 1 : 1 | 2", 3), 180 | ("(true ? 1 : 2) ? 3 : 4", 3), 181 | ("true ? 1 : (2 ? 3 : 4)", 1), 182 | ("true ? 1 : 2 ? 3 : 4", 1), 183 | ] 184 | -------------------------------------------------------------------------------- /altair_transform/utils/timeunit.py: -------------------------------------------------------------------------------- 1 | """Utilities for working with pandas & JS datetimes.""" 2 | import re 3 | from typing import Union, Set 4 | import pandas as pd 5 | from dateutil.tz import tzlocal 6 | 7 | __all__ = ["compute_timeunit"] 8 | 9 | Date = Union[pd.Series, pd.DatetimeIndex, pd.Timestamp] 10 | 11 | 12 | def compute_timeunit(date: Date, timeunit: str) -> Date: 13 | """Evaluate a timeUnit transform. 14 | 15 | Parameters 16 | ---------- 17 | date : pd.DatetimeIndex, pd.Series, or pd.Timestamp 18 | The date to be converted 19 | timeunit : string 20 | The Altair timeUnit identifier. 21 | 22 | Returns 23 | ------- 24 | date_tu : pd.DatetimeIndex, pd.Series, or pd.Timestamp 25 | The converted date, of the same type as the input. 26 | """ 27 | # Convert to either UTC or localtime as appropriate. 28 | def dt(date): 29 | return date.dt if isinstance(date, pd.Series) else date 30 | 31 | if dt(date).tz is None: 32 | date = dt(date).tz_localize(tzlocal()) 33 | date = dt(date).tz_convert("UTC" if timeunit.startswith("utc") else tzlocal()) 34 | 35 | if isinstance(date, pd.Series): 36 | return pd.Series(_compute_timeunit(timeunit, date.dt)) 37 | elif isinstance(date, pd.Timestamp): 38 | return _compute_timeunit(timeunit, pd.DatetimeIndex([date]))[0] 39 | else: 40 | return _compute_timeunit(timeunit, date) 41 | 42 | 43 | _simple_timeunits = [ 44 | "utc", 45 | "year", 46 | "quarter", 47 | "month", 48 | "day", 49 | "date", 50 | "hours", 51 | "minutes", 52 | "seconds", 53 | "milliseconds", 54 | ] 55 | _elements = "".join(f"(?P<{name}>{name})?" for name in _simple_timeunits) 56 | _timeunit_regex = re.compile(f"^{_elements}$") 57 | 58 | 59 | def _parse_timeunit_string(timeunit: str) -> Set[str]: 60 | """Return the set of timeunit keys in a specification string.""" 61 | match = _timeunit_regex.match(timeunit) 62 | if not match: 63 | raise ValueError(f"Unrecognized timeUnit: {timeunit!r}") 64 | return {k for k, v in match.groupdict().items() if v} 65 | 66 | 67 | def _compute_timeunit(name: str, date: pd.DatetimeIndex) -> pd.DatetimeIndex: 68 | """Workhorse for compute_timeunit.""" 69 | if name in ["day", "utcday"]: 70 | return pd.to_datetime("2012-01-01") + pd.to_timedelta( 71 | (date.dayofweek + 1) % 7, "D" 72 | ) 73 | units = _parse_timeunit_string(name) 74 | if "day" in units: 75 | raise NotImplementedError("quarter and day timeunit") 76 | if not units: 77 | raise ValueError(f"{0!r} is not a recognized timeunit") 78 | 79 | def quarter(month: pd.Int64Index) -> pd.Int64Index: 80 | return month - (month - 1) % 3 81 | 82 | Y = date.year.astype(str) if "year" in units else "2012" 83 | M = ( 84 | date.month.astype(str).str.zfill(2) 85 | if "month" in units 86 | else ( 87 | quarter(date.month).astype(str).str.zfill(2) if "quarter" in units else "01" 88 | ) 89 | ) 90 | D = date.day.astype(str).str.zfill(2) if "date" in units else "01" 91 | h = date.hour.astype(str).str.zfill(2) if "hours" in units else "00" 92 | m = date.minute.astype(str).str.zfill(2) if "minutes" in units else "00" 93 | s = date.second.astype(str).str.zfill(2) if "seconds" in units else "00" 94 | ms = ( 95 | (date.microsecond // 1000).astype(str).str.zfill(3) 96 | if "milliseconds" in units 97 | else "00" 98 | ) 99 | return pd.to_datetime( 100 | Y + "-" + M + "-" + D + " " + h + ":" + m + ":" + s + "." + ms 101 | ) 102 | -------------------------------------------------------------------------------- /altair_transform/transform/tests/test_regression.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Dict, List, Optional 2 | 3 | import numpy as np 4 | import pandas as pd 5 | from pandas.testing import assert_frame_equal 6 | import pytest 7 | 8 | import altair_transform 9 | 10 | 11 | @pytest.fixture 12 | def data() -> pd.DataFrame: 13 | rand = np.random.RandomState(42) 14 | df = pd.DataFrame( 15 | { 16 | "x": rand.randint(0, 100, 12), 17 | "y": rand.randint(0, 100, 12), 18 | "g": list(6 * "AB"), 19 | } 20 | ) 21 | return df 22 | 23 | 24 | def test_linear() -> None: 25 | data = pd.DataFrame({"x": [0, 1, 2, 3, 4], "y": [2, 4, 6, 8, 10]}) 26 | transform = {"regression": "y", "on": "x"} 27 | out = altair_transform.apply(data, transform) 28 | assert_frame_equal( 29 | out, pd.DataFrame({"x": [0.0, 4.0], "y": [2.0, 10.0]}), check_dtype=False 30 | ) 31 | 32 | 33 | def test_linear_groupby() -> None: 34 | data = pd.DataFrame( 35 | { 36 | "x": [0, 1, 2, 3, 4, 1, 2, 3], 37 | "y": [2, 4, 6, 8, 10, 2, 3, 4], 38 | "g": [0, 0, 0, 0, 0, 1, 1, 1], 39 | } 40 | ) 41 | transform = {"regression": "y", "on": "x", "groupby": ["g"]} 42 | out = altair_transform.apply(data, transform) 43 | assert_frame_equal( 44 | out[out.g == 0].reset_index(drop=True), 45 | pd.DataFrame({"g": [0, 0], "x": [0.0, 4.0], "y": [2.0, 10.0]}), 46 | check_dtype=False, 47 | ) 48 | assert_frame_equal( 49 | out[out.g == 1].reset_index(drop=True), 50 | pd.DataFrame({"g": [1, 1], "x": [1.0, 3.0], "y": [2.0, 4.0]}), 51 | check_dtype=False, 52 | ) 53 | 54 | 55 | @pytest.mark.parametrize( 56 | "method,coef", [("linear", [1, 2]), ("quad", [1, 2, 0]), ("poly", [1, 2, 0, 0])] 57 | ) 58 | def test_linear_params(method: str, coef: List[int]) -> None: 59 | data = pd.DataFrame({"x": [0, 1, 2, 3, 4], "y": [1, 3, 5, 7, 9]}) 60 | transform = {"regression": "y", "on": "x", "params": True, "method": method} 61 | out = altair_transform.apply(data, transform) 62 | assert_frame_equal(out, pd.DataFrame({"coef": [coef], "rSquared": [1.0]})) 63 | 64 | 65 | @pytest.mark.parametrize("groupby", [None, ["g"]]) 66 | @pytest.mark.parametrize("method,order", [("linear", 1), ("quad", 2)]) 67 | def test_poly_vs_linear(groupby: List[str], method: str, order: int) -> None: 68 | data = pd.DataFrame( 69 | { 70 | "x": [0, 1, 2, 3, 4, 1, 2, 3], 71 | "y": [2, 4, 6, 8, 10, 2, 3, 4], 72 | "g": [0, 0, 0, 0, 0, 1, 1, 1], 73 | } 74 | ) 75 | kwds = {} if not groupby else {"groupby": groupby} 76 | out1 = altair_transform.apply( 77 | data, {"regression": "y", "on": "x", "method": method, **kwds} 78 | ) 79 | out2 = altair_transform.apply( 80 | data, {"regression": "y", "on": "x", "method": "poly", "order": order, **kwds} 81 | ) 82 | assert_frame_equal(out1, out2, check_dtype=False) 83 | 84 | 85 | @pytest.mark.parametrize("method", ["linear", "log", "exp", "pow", "quad", "poly"]) 86 | @pytest.mark.parametrize("params", [True, False]) 87 | @pytest.mark.parametrize("groupby", [None, ["g"]]) 88 | def test_regression_against_js( 89 | driver, data: pd.DataFrame, method: str, params: str, groupby: Optional[List[str]], 90 | ) -> None: 91 | transform: Dict[str, Any] = { 92 | "regression": "y", 93 | "on": "x", 94 | "method": method, 95 | "params": params, 96 | } 97 | if groupby: 98 | transform["groupby"] = groupby 99 | got = altair_transform.apply(data, transform) 100 | want = driver.apply(data, transform) 101 | 102 | # Account for differences in handling of undefined between browsers. 103 | if params and not groupby and got.shape != want.shape: 104 | got["keys"] = [None] 105 | 106 | assert_frame_equal( 107 | got[sorted(got.columns)], 108 | want[sorted(want.columns)], 109 | check_dtype=False, 110 | check_index_type=False, 111 | check_less_precise=True, 112 | ) 113 | -------------------------------------------------------------------------------- /altair_transform/transform/filter.py: -------------------------------------------------------------------------------- 1 | from functools import singledispatch 2 | from typing import Any 3 | 4 | import altair as alt 5 | import numpy as np 6 | import pandas as pd 7 | from .visitor import visit 8 | from ..vegaexpr import eval_vegajs 9 | 10 | 11 | @visit.register(alt.FilterTransform) 12 | def visit_filter(transform: alt.FilterTransform, df: pd.DataFrame) -> pd.DataFrame: 13 | mask = eval_predicate(transform.filter, df).astype(bool) 14 | return df[mask].reset_index(drop=True) 15 | 16 | 17 | def get_column(df: pd.DataFrame, predicate: Any) -> pd.Series: 18 | """Get the transformed column from the predicate.""" 19 | if predicate.timeUnit is not alt.Undefined: 20 | raise NotImplementedError("timeUnit Transform in Predicates") 21 | return df[eval_value(predicate["field"])] 22 | 23 | 24 | @singledispatch 25 | def eval_predicate(predicate: Any, df: pd.DataFrame) -> pd.Series: 26 | raise NotImplementedError(f"Evaluating predicate of type {type(predicate)}") 27 | 28 | 29 | @singledispatch 30 | def eval_dict(predicate: dict, df: pd.DataFrame) -> pd.Series: 31 | transform = alt.FilterTrasform({"filter": predicate}) 32 | return eval_predicate(transform.filter, df) 33 | 34 | 35 | @eval_predicate.register(str) 36 | def eval_string(predicate: str, df: pd.DataFrame) -> pd.Series: 37 | return df.apply(lambda datum: eval_vegajs(predicate, datum), axis=1) 38 | 39 | 40 | @eval_predicate.register(alt.FieldEqualPredicate) 41 | def eval_field_equal(predicate: alt.FieldEqualPredicate, df: pd.DataFrame) -> pd.Series: 42 | return get_column(df, predicate) == eval_value(predicate.equal) 43 | 44 | 45 | @eval_predicate.register(alt.FieldRangePredicate) 46 | def eval_field_range(predicate: alt.FieldRangePredicate, df: pd.DataFrame) -> pd.Series: 47 | min_, max_ = [eval_value(val) for val in predicate.range] 48 | column = get_column(df, predicate) 49 | if min_ is None: 50 | min_ = column.min() 51 | if max_ is None: 52 | max_ = column.max() 53 | return column.between(min_, max_, inclusive=True) 54 | 55 | 56 | @eval_predicate.register(alt.FieldOneOfPredicate) 57 | def eval_field_oneof(predicate: alt.FieldOneOfPredicate, df: pd.DataFrame) -> pd.Series: 58 | options = [eval_value(val) for val in predicate.oneOf] 59 | return get_column(df, predicate).isin(options) 60 | 61 | 62 | @eval_predicate.register(alt.FieldLTPredicate) 63 | def eval_field_lt(predicate: alt.FieldLTPredicate, df: pd.DataFrame) -> pd.Series: 64 | return get_column(df, predicate) < eval_value(predicate.lt) 65 | 66 | 67 | @eval_predicate.register(alt.FieldLTEPredicate) 68 | def eval_field_lte(predicate: alt.FieldLTEPredicate, df: pd.DataFrame) -> pd.Series: 69 | return get_column(df, predicate) <= eval_value(predicate.lte) 70 | 71 | 72 | @eval_predicate.register(alt.FieldGTPredicate) 73 | def eval_field_gt(predicate: alt.FieldGTPredicate, df: pd.DataFrame) -> pd.Series: 74 | return get_column(df, predicate) > eval_value(predicate.gt) 75 | 76 | 77 | @eval_predicate.register(alt.FieldGTEPredicate) 78 | def eval_field_gte(predicate: alt.FieldGTEPredicate, df: pd.DataFrame) -> pd.Series: 79 | return get_column(df, predicate) >= eval_value(predicate.gte) 80 | 81 | 82 | @eval_predicate.register(alt.LogicalNotPredicate) 83 | def eval_logical_not(predicate: alt.LogicalNotPredicate, df: pd.DataFrame) -> pd.Series: 84 | return ~eval_predicate(predicate["not"], df) 85 | 86 | 87 | @eval_predicate.register(alt.LogicalAndPredicate) 88 | def eval_logical_and(predicate: alt.LogicalAndPredicate, df: pd.DataFrame) -> pd.Series: 89 | return np.logical_and.reduce([eval_predicate(p, df) for p in predicate["and"]]) 90 | 91 | 92 | @eval_predicate.register(alt.LogicalOrPredicate) 93 | def eval_logical_or(predicate: alt.LogicalOrPredicate, df: pd.DataFrame) -> pd.Series: 94 | return np.logical_or.reduce([eval_predicate(p, df) for p in predicate["or"]]) 95 | 96 | 97 | @singledispatch 98 | def eval_value(value: Any) -> Any: 99 | return value 100 | 101 | 102 | @eval_value.register(alt.DateTime) 103 | def eval_datetime(value: alt.DateTime) -> pd.Series: 104 | # TODO: implement datetime conversion & comparison 105 | raise NotImplementedError("Evaluating alt.DateTime object") 106 | 107 | 108 | @eval_value.register(alt.SchemaBase) 109 | def eval_schemabase(value: alt.SchemaBase) -> dict: 110 | return value.to_dict() 111 | -------------------------------------------------------------------------------- /altair_transform/core.py: -------------------------------------------------------------------------------- 1 | """Core altair_transform routines.""" 2 | 3 | from typing import List, Union 4 | 5 | import pandas as pd 6 | import altair as alt 7 | 8 | from altair_transform.transform import visit 9 | from altair_transform.utils import to_dataframe 10 | from altair_transform.extract import extract_transform 11 | 12 | __all__ = ["apply", "extract_data", "transform_chart"] 13 | 14 | 15 | def apply( 16 | df: pd.DataFrame, 17 | transform: Union[alt.Transform, List[alt.Transform]], 18 | inplace: bool = False, 19 | ) -> pd.DataFrame: 20 | """Apply transform or transforms to dataframe. 21 | 22 | Parameters 23 | ---------- 24 | df : pd.DataFrame 25 | transform : list|dict 26 | A transform specification or list of transform specifications. 27 | Each specification must be valid according to Altair's transform 28 | schema. 29 | inplace : bool 30 | If True, then dataframe may be modified in-place. Default: False. 31 | 32 | Returns 33 | ------- 34 | df_transformed : pd.DataFrame 35 | The transformed dataframe. 36 | 37 | Example 38 | ------- 39 | >>> import pandas as pd 40 | >>> data = pd.DataFrame({'x': range(5), 'y': list('ABCAB')}) 41 | >>> chart = alt.Chart(data).transform_aggregate(sum_x='sum(x)', groupby=['y']) 42 | >>> apply(data, chart.transform) 43 | y sum_x 44 | 0 A 3 45 | 1 B 5 46 | 2 C 2 47 | """ 48 | if not inplace: 49 | df = df.copy() 50 | if transform is alt.Undefined: 51 | return df 52 | return visit(transform, df) 53 | 54 | 55 | def extract_data( 56 | chart: alt.Chart, apply_encoding_transforms: bool = True 57 | ) -> pd.DataFrame: 58 | """Extract transformed data from a chart. 59 | 60 | This only works with data and transform defined at the 61 | top level of the chart. 62 | 63 | Parameters 64 | ---------- 65 | chart : alt.Chart 66 | The chart instance from which the data and transform 67 | will be extracted 68 | apply_encoding_transforms : bool 69 | If True (default), then apply transforms specified within an 70 | encoding as well as those specified directly in the transforms 71 | attribute. 72 | 73 | Returns 74 | ------- 75 | df_transformed : pd.DataFrame 76 | The extracted and transformed dataframe. 77 | 78 | Example 79 | ------- 80 | >>> import pandas as pd 81 | >>> data = pd.DataFrame({'x': range(5), 'y': list('ABCAB')}) 82 | >>> chart = alt.Chart(data).mark_bar().encode(x='sum(x)', y='y') 83 | >>> extract_data(chart) 84 | y sum_x 85 | 0 A 3 86 | 1 B 5 87 | 2 C 2 88 | """ 89 | if apply_encoding_transforms: 90 | chart = extract_transform(chart) 91 | return apply(to_dataframe(chart.data, chart), chart.transform) 92 | 93 | 94 | def transform_chart( 95 | chart: alt.Chart, extract_encoding_transforms: bool = True 96 | ) -> alt.Chart: 97 | """Return a chart with the transformed data 98 | 99 | Parameters 100 | ---------- 101 | chart : alt.Chart 102 | The chart instance from which the data and transform 103 | will be extracted. 104 | extract_encoding_transforms : bool 105 | If True (default), then also extract transforms from encodings. 106 | 107 | Returns 108 | ------- 109 | chart_out : alt.Chart 110 | A copy of the input chart with the transformed data. 111 | 112 | Example 113 | ------- 114 | >>> import pandas as pd 115 | >>> data = pd.DataFrame({'x': range(5), 'y': list('ABCAB')}) 116 | >>> chart = alt.Chart(data).mark_bar().encode(x='sum(x)', y='y') 117 | >>> new_chart = transform_chart(chart) 118 | >>> new_chart.data 119 | y sum_x 120 | 0 A 3 121 | 1 B 5 122 | 2 C 2 123 | >>> new_chart.encoding 124 | FacetedEncoding({ 125 | x: PositionFieldDef({ 126 | field: FieldName('sum_x'), 127 | title: 'Sum of x', 128 | type: StandardType('quantitative') 129 | }), 130 | y: PositionFieldDef({ 131 | field: FieldName('y'), 132 | type: StandardType('nominal') 133 | }) 134 | }) 135 | """ 136 | if extract_encoding_transforms: 137 | chart = extract_transform(chart) 138 | chart = chart.properties(data=extract_data(chart, apply_encoding_transforms=False)) 139 | chart.transform = alt.Undefined 140 | return chart 141 | -------------------------------------------------------------------------------- /altair_transform/transform/tests/test_transform.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | import numpy as np 4 | import pandas as pd 5 | from pandas.testing import assert_frame_equal 6 | 7 | import altair_transform 8 | from altair_transform.transform.aggregate import AGG_REPLACEMENTS 9 | 10 | 11 | AGGREGATES = [ 12 | "argmax", 13 | "argmin", 14 | "average", 15 | "count", 16 | "distinct", 17 | "max", 18 | "mean", 19 | "median", 20 | "min", 21 | "missing", 22 | "q1", 23 | "q3", 24 | "ci0", 25 | "ci1", 26 | "stderr", 27 | "stdev", 28 | "stdevp", 29 | "sum", 30 | "valid", 31 | "values", 32 | "variance", 33 | "variancep", 34 | ] 35 | 36 | AGG_SKIP = ["ci0", "ci1", "values"] # These require scipy. 37 | 38 | 39 | @pytest.fixture 40 | def data(): 41 | rand = np.random.RandomState(42) 42 | return pd.DataFrame( 43 | { 44 | "x": rand.randint(0, 100, 12), 45 | "y": rand.randint(0, 100, 12), 46 | "t": pd.date_range("2012-01-15", freq="M", periods=12), 47 | "i": range(12), 48 | "c": list("AAABBBCCCDDD"), 49 | "d": list("ABCABCABCABC"), 50 | } 51 | ) 52 | 53 | 54 | def test_calculate_transform(data): 55 | transform = {"calculate": "datum.x + datum.y", "as": "z"} 56 | out1 = altair_transform.apply(data, transform) 57 | 58 | out2 = data.copy() 59 | out2["z"] = data.x + data.y 60 | 61 | assert_frame_equal(out1, out2) 62 | 63 | 64 | @pytest.mark.parametrize("groupby", [True, False]) 65 | @pytest.mark.parametrize("op", set(AGGREGATES) - set(AGG_SKIP)) 66 | def test_joinaggregate_transform(data, groupby, op): 67 | field = "x" 68 | col = "z" 69 | group = "c" 70 | 71 | transform = {"joinaggregate": [{"op": op, "field": field, "as": col}]} 72 | if groupby: 73 | transform["groupby"] = [group] 74 | 75 | op = AGG_REPLACEMENTS.get(op, op) 76 | out = altair_transform.apply(data, transform) 77 | 78 | def validate(group): 79 | return np.allclose(group[field].aggregate(op), group[col]) 80 | 81 | if groupby: 82 | assert out.groupby(group).apply(validate).all() 83 | else: 84 | assert validate(out) 85 | 86 | 87 | def test_quantile_values(): 88 | np.random.seed(0) 89 | data = pd.DataFrame( 90 | {"x": np.random.randn(12), "C": np.random.choice(["A", "B"], 12)} 91 | ) 92 | transform = {"quantile": "x", "groupby": ["C"], "as": ["p", "v"], "step": 0.1} 93 | # Copied from vega editor for above data/transform 94 | expected = pd.DataFrame( 95 | [ 96 | ["A", 0.05, -0.853389779139604], 97 | ["A", 0.15, -0.6056135776659901], 98 | ["A", 0.25, -0.3578373761923762], 99 | ["A", 0.35, -0.12325942278589436], 100 | ["A", 0.45, 0.04532729028492671], 101 | ["A", 0.55, 0.21391400335574778], 102 | ["A", 0.65, 0.38250071642656897], 103 | ["A", 0.75, 0.7489619629456958], 104 | ["A", 0.85, 1.1549981161544833], 105 | ["A", 0.95, 1.5610342693632706], 106 | ["B", 0.05, -0.016677003759505288], 107 | ["B", 0.15, 0.15684925302119532], 108 | ["B", 0.25, 0.336128799065637], 109 | ["B", 0.35, 0.6476262524884882], 110 | ["B", 0.45, 0.9543858525126119], 111 | ["B", 0.55, 0.9744405491187167], 112 | ["B", 0.65, 1.2402825216772193], 113 | ["B", 0.75, 1.5575946277597235], 114 | ["B", 0.85, 1.8468937659906184], 115 | ["B", 0.95, 2.1102258760334363], 116 | ], 117 | columns=["C", "p", "v"], 118 | ) 119 | out = altair_transform.apply(data, transform) 120 | assert_frame_equal(out, expected) 121 | 122 | 123 | @pytest.mark.parametrize("N", [1, 5, 50]) 124 | def test_sample_transform(data, N): 125 | transform = {"sample": N} 126 | out = altair_transform.apply(data, transform) 127 | 128 | # Ensure the shape is correct 129 | assert out.shape == (min(N, data.shape[0]), data.shape[1]) 130 | 131 | # Ensure the content are correct 132 | assert_frame_equal(out, data.iloc[out.index]) 133 | 134 | 135 | def test_multiple_transforms(data): 136 | transform = [ 137 | {"calculate": "0.5 * (datum.x + datum.y)", "as": "xy_mean"}, 138 | {"filter": "datum.x < datum.xy_mean"}, 139 | ] 140 | out1 = altair_transform.apply(data, transform) 141 | out2 = data.copy() 142 | out2["xy_mean"] = 0.5 * (data.x + data.y) 143 | out2 = out2[out2.x < out2.xy_mean].reset_index(drop=True) 144 | 145 | assert_frame_equal(out1, out2) 146 | -------------------------------------------------------------------------------- /altair_transform/tests/test_vegaexpr.py: -------------------------------------------------------------------------------- 1 | import datetime as dt 2 | import pytest 3 | import numpy as np 4 | from altair_transform.vegaexpr import eval_vegajs, undefined, JSRegex 5 | 6 | # Most parsing is tested in the parser; here we just test a sampling of the 7 | # variables and functions defined in the vegaexpr namespace. 8 | 9 | EXPRESSIONS = { 10 | "null": None, 11 | "true": True, 12 | "false": False, 13 | "/[A-Za-z0-9]+/": JSRegex("[A-Za-z0-9]+"), 14 | "/.*/i": JSRegex(".*", "i"), 15 | "{}[1]": undefined, 16 | "{}.foo": undefined, 17 | "[][0]": undefined, 18 | "2 * PI": 2 * np.pi, 19 | "1 / SQRT2": 1.0 / np.sqrt(2), 20 | "LOG2E + LN10": np.log2(np.e) + np.log(10), 21 | "isArray([1, 2, 3])": True, 22 | "isBoolean(false)": True, 23 | "isBoolean(true)": True, 24 | "isBoolean(1)": False, 25 | "isDate(datetime(2019, 1, 1))": True, 26 | "isDate('2019-01-01')": False, 27 | "isDefined(null)": True, 28 | "isDefined({}[1])": False, 29 | "isNumber(3.5)": True, 30 | "isNumber(now())": True, 31 | "isString('abc')": True, 32 | 'isString("abc")': True, 33 | "isObject({a:2})": True, 34 | "isObject({'a':2})": True, 35 | "isRegExp(/[A-Z0-9]+/)": True, 36 | "isRegExp('[A-Z0-9]+')": False, 37 | "isValid(null)": False, 38 | "isValid(NaN)": False, 39 | "isValid({}[1])": False, 40 | "isValid(0)": True, 41 | "toBoolean(1)": True, 42 | "toBoolean(0)": False, 43 | "toDate('')": None, 44 | "toDate(null)": None, 45 | "toDate(1547510400000)": 1547510400000, 46 | "toDate('2019-01-15')": 1547510400000, 47 | "toNumber('1234.5')": 1234.5, 48 | "toNumber('')": None, 49 | "toNumber(null)": None, 50 | "toString(123)": "123", 51 | "toString(0.5)": "0.5", 52 | "toString('')": None, 53 | "toString(null)": None, 54 | "toString(123)": "123", 55 | "toString('123')": "123", 56 | 'if(4 > PI, "yes", "no")': "yes", 57 | "pow(sin(PI), 2) + pow(cos(PI), 2)": 1, 58 | "floor(1.5) == ceil(0.5)": True, 59 | "max(1, 2, 3) == min(3, 4, 5)": True, 60 | "time(datetime(1546338896789))": 1546338896789, 61 | "isDate(datetime())": True, 62 | "datetime(1546329600000)": dt.datetime.fromtimestamp(1546329600), 63 | "datetime(2019, 0, 1)": dt.datetime(2019, 1, 1), 64 | "year(datetime(2019, 0, 1, 2, 34, 56, 789))": 2019, 65 | "quarter(datetime(2019, 0, 1, 2, 34, 56, 789))": 0, 66 | "month(datetime(2019, 0, 1, 2, 34, 56, 789))": 0, 67 | "date(datetime(2019, 0, 1, 2, 34, 56, 789))": 1, 68 | "day(datetime(2019, 0, 1, 2, 34, 56, 789))": 2, 69 | "hours(datetime(2019, 0, 1, 2, 34, 56, 789))": 2, 70 | "minutes(datetime(2019, 0, 1, 2, 34, 56, 789))": 34, 71 | "seconds(datetime(2019, 0, 1, 2, 34, 56, 789))": 56, 72 | "milliseconds(datetime(2019, 0, 1, 2, 34, 56, 789))": 789, 73 | "utc(2019, 0, 1, 2, 34, 56, 789)": 1546310096789, 74 | "utcyear(datetime(utc(2019, 0, 1, 2, 34, 56, 789)))": 2019, 75 | "utcquarter(datetime(utc(2019, 0, 1, 2, 34, 56, 789)))": 0, 76 | "utcmonth(datetime(utc(2019, 0, 1, 2, 34, 56, 789)))": 0, 77 | "utcdate(datetime(utc(2019, 0, 1, 2, 34, 56, 789)))": 1, 78 | "utcday(datetime(utc(2019, 0, 1, 2, 34, 56, 789)))": 2, 79 | "utchours(datetime(utc(2019, 0, 1, 2, 34, 56, 789)))": 2, 80 | "utcminutes(datetime(utc(2019, 0, 1, 2, 34, 56, 789)))": 34, 81 | "utcseconds(datetime(utc(2019, 0, 1, 2, 34, 56, 789)))": 56, 82 | "utcmilliseconds(datetime(utc(2019, 0, 1, 2, 34, 56, 789)))": 789, 83 | "parseInt('1234 years')": 1234, 84 | "parseInt('2A', 16)": 42, 85 | "parseFloat(' 3.125 is close to pi')": 3.125, 86 | "indexof('ABCABC', 'C')": 2, 87 | "lastindexof('ABCABC', 'C')": 5, 88 | "length('ABCABC')": 6, 89 | "lower('AbC')": "abc", 90 | "pad('abc', 6, 'x', 'left')": "xxxabc", 91 | "pad('abc', 6, 'x', 'right')": "abcxxx", 92 | "pad('abc', 6, 'x', 'center')": "xabcxx", 93 | "replace('ABCDABCD', 'BC', 'xx')": "AxxDABCD", 94 | "replace('ABCDABCD', /[B-D]+/, 'xxx')": "AxxxABCD", 95 | "replace('ABCDABCD', /BC/g, 'xx')": "AxxDAxxD", 96 | "split('AB CD EF', ' ')": ["AB", "CD", "EF"], 97 | "substring('ABCDEF', 3, -1)": "ABC", 98 | "slice('ABCDEF', 3, -1)": "DE", 99 | "trim(' ABC ')": "ABC", 100 | "truncate('1234567', 4, 'right', 'x')": "123x", 101 | "truncate('1234567', 4, 'left', 'x')": "x567", 102 | "truncate('1234567', 4, 'center', 'x')": "12x7", 103 | "upper('AbC')": "ABC", 104 | "extent([5, {}[1], 2, null, 4, NaN, 1])": [1, 5], 105 | "clampRange([5, 2], 1, 7)": [2, 5], 106 | "clampRange([5, 2], 3, 7)": [3, 6], 107 | "clampRange([5, 2], 0, 4)": [1, 4], 108 | "clampRange([5, 2], 3, 4)": [3, 4], 109 | "inrange(4, [3, 4])": True, 110 | "inrange(4, [4, 5])": True, 111 | "inrange(4, [5, 7])": False, 112 | "join(['a', 'b', 'c'])": "a,b,c", 113 | "join(['a', 'b', 'c'], '-')": "a-b-c", 114 | "lerp([0, 50], 0.5)": 25.0, 115 | "peek([1, 2, 3])": 3, 116 | "reverse([1, 2, 3])": [3, 2, 1], 117 | "sequence(3)": [0, 1, 2], 118 | "sequence(1, 4)": [1, 2, 3], 119 | "sequence(0, 2, 0.5)": [0, 0.5, 1, 1.5], 120 | "slice([1, 2, 3, 4], 1, 3)": [2, 3], 121 | "span([0, 2, 4])": 4, 122 | "regexp('[A-Z]?','g')": JSRegex("[A-Z]?", "g"), 123 | "test(/[A-Z]+/, '123ABC')": True, 124 | "test(/[A-Z]+/y, '123ABC')": False, 125 | } 126 | 127 | 128 | @pytest.mark.parametrize("expression,expected", EXPRESSIONS.items()) 129 | def test_vegajs_expressions(expression, expected): 130 | result = eval_vegajs(expression) 131 | if isinstance(result, float): 132 | assert np.allclose(result, expected) 133 | else: 134 | assert result == expected 135 | -------------------------------------------------------------------------------- /altair_transform/driver.py: -------------------------------------------------------------------------------- 1 | """Extract transformed data directly via a selenium webdriver.""" 2 | import io 3 | import json 4 | from typing import Any, Dict, List, Optional, Union 5 | 6 | import altair as alt 7 | import pandas as pd 8 | 9 | JSON = Union[str, int, float, bool, None, Dict[str, Any], List[Any]] 10 | JSONDict = Dict[str, JSON] 11 | 12 | CDN_URL = "https://cdn.jsdelivr.net/npm/{package}@{version}" 13 | 14 | HTML_TEMPLATE = """ 15 | 16 | 17 | 18 | Embedding Vega-Lite 19 | 20 | 21 | 22 | 23 | 24 |
25 | 26 | 27 | """ 28 | 29 | EXTRACT_CODE = """ 30 | var spec = arguments[0]; 31 | var name = arguments[1]; 32 | var done = arguments[2]; 33 | 34 | vegaEmbed("#vis", spec, {"mode": "vega-lite"}) 35 | .then(result => done({data: JSON.stringify(result.view.data(name))})) 36 | .catch(error => done({error: error.toString()})); 37 | """ 38 | 39 | 40 | def _serialize(df: pd.DataFrame) -> JSONDict: 41 | """Serialize a dataframe to a JSON dict.""" 42 | return json.loads(df.to_json(orient="table")) 43 | 44 | 45 | def _load(serialized: JSONDict) -> pd.DataFrame: 46 | """Load a dataframe from a JSON dict.""" 47 | return pd.read_json(io.StringIO(json.dumps(serialized)), orient="table") 48 | 49 | 50 | def _extract_data(spec: JSONDict, name: str = "data_0") -> pd.DataFrame: 51 | """Extract named data from a Vega-Lite chart spec. 52 | 53 | Parameters 54 | ---------- 55 | spec : dict 56 | The Vega-Lite specification containing the data to extract 57 | 58 | name : string 59 | The name of the data stream to extract 60 | 61 | Returns 62 | ------- 63 | data : pd.DataFrame 64 | The extracted data 65 | """ 66 | # Optional deps 67 | from selenium.common.exceptions import NoSuchElementException 68 | from altair_saver import SeleniumSaver 69 | from altair_viewer import get_bundled_script 70 | 71 | js_resources = { 72 | "vega.js": get_bundled_script("vega", alt.VEGA_VERSION), 73 | "vega-lite.js": get_bundled_script("vega-lite", alt.VEGALITE_VERSION), 74 | "vega-embed.js": get_bundled_script("vega-embed", alt.VEGAEMBED_VERSION), 75 | } 76 | html = HTML_TEMPLATE.format( 77 | vega_url="/vega.js", 78 | vegalite_url="/vega-lite.js", 79 | vegaembed_url="/vega-embed.js", 80 | ) 81 | 82 | url = SeleniumSaver._serve(html, js_resources) 83 | driver_name = SeleniumSaver._select_webdriver(20) 84 | driver = SeleniumSaver._registry.get(driver_name, 20) 85 | 86 | driver.get("about:blank") 87 | driver.get(url) 88 | 89 | try: 90 | driver.find_element_by_id("vis") 91 | except NoSuchElementException: 92 | raise RuntimeError(f"Could not load {url}") 93 | 94 | data = driver.execute_async_script(EXTRACT_CODE, spec, name) 95 | 96 | if "error" in data: 97 | raise ValueError(f"Javascript Error: {data['error']}") 98 | 99 | return pd.DataFrame.from_records(json.loads(data["data"])) 100 | 101 | 102 | def apply( 103 | df: pd.DataFrame, 104 | transform: Union[ 105 | None, JSONDict, alt.Transform, List[Union[JSONDict, alt.Transform]] 106 | ] = None, 107 | ) -> pd.DataFrame: 108 | """Extract transformed data from a Javascript rendering. 109 | 110 | Parameters 111 | ---------- 112 | df : pd.DataFrame 113 | transform : list|dict 114 | A transform specification or list of transform specifications. 115 | Each specification must be valid according to Altair's transform 116 | schema. 117 | 118 | Returns 119 | ------- 120 | df_transformed : pd.DataFrame 121 | The transformed dataframe. 122 | """ 123 | if transform is None: 124 | transform = [] 125 | elif not isinstance(transform, list): 126 | transform = [transform] 127 | chart = alt.Chart(df).mark_point()._add_transform(*transform) 128 | with alt.data_transformers.enable(max_rows=None, consolidate_datasets=False): 129 | spec = chart.to_dict() 130 | return _extract_data(spec, "data_0") 131 | 132 | 133 | def get_tz_code() -> str: 134 | """Get the timezone code used by chromedriver.""" 135 | # Optional deps 136 | from selenium.common.exceptions import NoSuchElementException 137 | from altair_saver import SeleniumSaver 138 | 139 | html = """
""" 140 | script = "arguments[0](Intl.DateTimeFormat().resolvedOptions().timeZone)" 141 | url = SeleniumSaver._serve(html, {}) 142 | driver_name = SeleniumSaver._select_webdriver(20) 143 | driver = SeleniumSaver._registry.get(driver_name, 20) 144 | driver.get("about:blank") 145 | driver.get(url) 146 | try: 147 | driver.find_element_by_id("vis") 148 | except NoSuchElementException: 149 | raise RuntimeError(f"Could not load {url}") 150 | return driver.execute_async_script(script) 151 | 152 | 153 | def get_tz_offset(tz: Optional[str] = None) -> pd.Timedelta: 154 | """Get the timezone offset between Python and Javascript for dates with the given timezone. 155 | 156 | Parameters 157 | ---------- 158 | tz : string (optional) 159 | The timezone of the input dates 160 | 161 | Returns 162 | ------- 163 | offset : pd.Timedelta 164 | The offset between the Javasript representation and the Python representation 165 | of a date with the given timezone. 166 | """ 167 | ts = pd.to_datetime("2012-01-01").tz_localize(tz) 168 | df = pd.DataFrame({"t": [ts]}) 169 | out = apply(df, {"timeUnit": "year", "field": "t", "as": "year"}) 170 | 171 | date_in = df.t[0] 172 | date_out = pd.to_datetime(1e6 * out.t)[0].tz_localize(tz) 173 | 174 | return date_out - date_in 175 | -------------------------------------------------------------------------------- /altair_transform/extract.py: -------------------------------------------------------------------------------- 1 | """Tools for extracting transforms from encodings""" 2 | from collections import defaultdict 3 | import copy 4 | from typing import Any, Dict, List, Tuple 5 | 6 | import altair as alt 7 | 8 | _EncodingType = Dict[str, dict] 9 | _SpecType = Dict[str, Any] 10 | _TransformType = List[_SpecType] 11 | 12 | 13 | def extract_transform(chart: alt.Chart) -> alt.Chart: 14 | """Extract transforms from encodings 15 | 16 | This takes a chart with transforms specified within encodings, and returns 17 | an equivalent chart with transforms specified separately in the ``transform`` 18 | field. 19 | 20 | Parameters 21 | ---------- 22 | chart : alt.Chart 23 | Input chart, which will not be modified 24 | 25 | Returns 26 | ------- 27 | chart : alt.Chart 28 | A copy of the input chart with any encoding-specified transforms moved 29 | to the transforms-attribute 30 | 31 | Example 32 | ------- 33 | >>> chart = alt.Chart('data.csv').mark_bar().encode(x='mean(x):Q', y='y:N') 34 | >>> new_chart = extract_transform(chart) 35 | >>> new_chart.transform 36 | [AggregateTransform({ 37 | aggregate: [AggregatedFieldDef({ 38 | as: FieldName('mean_x'), 39 | field: FieldName('x'), 40 | op: AggregateOp('mean') 41 | })], 42 | groupby: [FieldName('y')] 43 | })] 44 | >>> new_chart.encoding 45 | FacetedEncoding({ 46 | x: PositionFieldDef({ 47 | field: FieldName('mean_x'), 48 | title: 'Mean of x', 49 | type: StandardType('quantitative') 50 | }), 51 | y: PositionFieldDef({ 52 | field: FieldName('y'), 53 | type: StandardType('nominal') 54 | }) 55 | }) 56 | """ 57 | 58 | chart = chart.copy() 59 | encoding_dict = chart.encoding.copy().to_dict(context={"data": chart.data}) 60 | encoding, transform = _encoding_to_transform(encoding_dict) 61 | if transform: 62 | chart.encoding = alt.FacetedEncoding.from_dict(encoding) 63 | if chart.transform is alt.Undefined: 64 | chart.transform = [] 65 | chart.transform.extend(alt.Transform.from_dict(t) for t in transform) 66 | return chart 67 | 68 | 69 | def _encoding_to_transform( 70 | encoding: _EncodingType, 71 | ) -> Tuple[_EncodingType, _TransformType]: 72 | """Extract transforms from an encoding dict.""" 73 | # TODO: what if one encoding has multiple transforms? Is this valid? 74 | by_category: Dict[str, _EncodingType] = defaultdict(dict) 75 | new_encoding: _EncodingType = {} 76 | for channel, spec in encoding.items(): 77 | for key in ["impute", "bin", "aggregate", "timeUnit"]: 78 | if key in spec: 79 | by_category[key][channel] = copy.deepcopy(spec) 80 | break 81 | else: 82 | new_encoding[channel] = copy.deepcopy(spec) 83 | 84 | groupby: List[str] = [ 85 | enc["field"] for enc in new_encoding.values() if "field" in enc 86 | ] 87 | transforms: _TransformType = [] 88 | field: str = "" 89 | new_field: str = "" 90 | new_field2: str = "" 91 | 92 | for channel, spec in by_category["bin"].items(): 93 | if spec["bin"] == "binned": 94 | new_encoding[channel] = spec 95 | if "field" in spec: 96 | groupby.append(spec["field"]) 97 | continue 98 | field = spec.pop("field") 99 | new_field = f"{field}_binned" 100 | new_field2 = f"{field}_binned2" 101 | needs_upper_limit: bool = ( 102 | channel in ["x", "y"] 103 | and spec["type"] == "quantitative" 104 | and f"{channel}2" not in encoding 105 | ) 106 | bin_transform: _SpecType = { 107 | "field": field, 108 | "bin": spec.pop("bin"), 109 | "as": [new_field, new_field2] if needs_upper_limit else new_field, 110 | } 111 | spec["field"] = new_field 112 | spec.setdefault("title", f"{field} (binned)") 113 | new_encoding[channel] = spec 114 | groupby.append(new_field) 115 | 116 | if needs_upper_limit: 117 | spec["bin"] = "binned" 118 | new_encoding[f"{channel}2"] = {"field": new_field2} 119 | groupby.append(new_field2) 120 | transforms.append(bin_transform) 121 | 122 | for channel, spec in by_category["timeUnit"].items(): 123 | timeUnit: str = spec[ 124 | "timeUnit" 125 | ] # leave timeUnit in spec for the sake of formatting 126 | field = spec.pop("field") 127 | new_field = f"{timeUnit}_{field}" 128 | spec["field"] = new_field 129 | spec.setdefault("title", f"{field} ({timeUnit})") 130 | new_encoding[channel] = spec 131 | transforms.append({"timeUnit": timeUnit, "field": field, "as": new_field}) 132 | groupby.append(new_field) 133 | 134 | for channel, spec in by_category["impute"].items(): 135 | keychannel = "y" if channel == "x" else "x" 136 | key = encoding.get(keychannel, {}).get("field", spec["field"]) 137 | impute_transform: _SpecType = spec.pop("impute") 138 | impute_transform.update( 139 | { 140 | "impute": spec["field"], 141 | "key": key, 142 | "groupby": [field for field in groupby if field != key], 143 | } 144 | ) 145 | new_encoding[channel] = spec 146 | transforms.append(impute_transform) 147 | 148 | agg_transforms: _TransformType = [] 149 | for channel, spec in by_category["aggregate"].items(): 150 | aggregate: str = spec.pop("aggregate") 151 | field = spec.pop("field", None) 152 | new_field = "__count" if aggregate == "count" else f"{aggregate}_{field}" 153 | agg_dict: Dict[str, str] = {"op": aggregate, "as": new_field} 154 | if field is not None: 155 | agg_dict["field"] = field 156 | agg_transforms.append(agg_dict) 157 | spec["field"] = new_field 158 | spec.setdefault( 159 | "title", 160 | ( 161 | "Count of Records" 162 | if aggregate == "count" 163 | else f"{aggregate.title()} of {field}" 164 | ), 165 | ) 166 | new_encoding[channel] = spec 167 | if agg_transforms: 168 | transform: Dict[str, list] = {"aggregate": agg_transforms} 169 | if groupby: 170 | transform["groupby"] = groupby 171 | transforms.append(transform) 172 | 173 | return new_encoding, transforms 174 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # altair-transform 2 | 3 | Python evaluation of Altair/Vega-Lite transforms. 4 | 5 | [![build status](http://img.shields.io/travis/altair-viz/altair-transform/master.svg)](https://travis-ci.org/altair-viz/altair-transform) 6 | [![github actions](https://github.com/altair-viz/altair-transform/workflows/build/badge.svg)](https://github.com/altair-viz/altair-transform/actions?query=workflow%3Abuild) 7 | [![github actions](https://github.com/altair-viz/altair-transform/workflows/lint/badge.svg)](https://github.com/altair-viz/altair-transform/actions?query=workflow%3Alint) 8 | [![code style black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black) 9 | 10 | ``altair-transform`` requires Python 3.6 or later. Install with: 11 | 12 | $ pip install altair_transform 13 | 14 | Altair-transform evaluates [Altair](http://altair-viz.github.io) and [Vega-Lite](http://vega.github.io/vega-lite) 15 | transforms directly in Python. This can be useful in a number of contexts, illustrated in the examples below. 16 | 17 | ## Example: Extracting Data 18 | 19 | The Vega-Lite specification includes the ability to apply a 20 | wide range of transformations to input data within the chart 21 | specification. As an example, here is a sliding window average 22 | of a Gaussian random walk, implemented in Altair: 23 | 24 | ```python 25 | import altair as alt 26 | import numpy as np 27 | import pandas as pd 28 | 29 | rand = np.random.RandomState(12345) 30 | 31 | df = pd.DataFrame({ 32 | 'x': np.arange(200), 33 | 'y': rand.randn(200).cumsum() 34 | }) 35 | 36 | points = alt.Chart(df).mark_point().encode( 37 | x='x:Q', 38 | y='y:Q' 39 | ) 40 | 41 | line = alt.Chart(df).transform_window( 42 | ymean='mean(y)', 43 | sort=[alt.SortField('x')], 44 | frame=[5, 5] 45 | ).mark_line(color='red').encode( 46 | x='x:Q', 47 | y='ymean:Q' 48 | ) 49 | 50 | points + line 51 | ``` 52 | ![Altair Visualization](https://raw.githubusercontent.com/altair-viz/altair-transform/master/images/random_walk.png) 53 | 54 | Because the transform is encoded within the renderer, however, the 55 | computed values are not directly accessible from the Python layer. 56 | 57 | This is where ``altair_transform`` comes in. It includes a (nearly) 58 | complete Python implementation of Vega-Lite's transform layer, so 59 | that you can easily extract a pandas dataframe with the computed 60 | values shown in the chart: 61 | 62 | ```python 63 | from altair_transform import extract_data 64 | data = extract_data(line) 65 | data.head() 66 | ``` 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 |
xyymean
00-0.2047080.457749
110.2742360.771093
22-0.2452031.041320
33-0.8009331.336943
441.1648471.698085
109 | 110 | From here, you can work with the transformed data directly 111 | in Python. 112 | 113 | ## Example: Pre-Aggregating Large Datasets 114 | 115 | Altair creates chart specifications containing the full dataset. 116 | The advantage of this is that the data used to make the chart is entirely transparent; the disadvantage is that it causes issues as datasets grow large. 117 | To prevent users from inadvertently crashing their browsers by trying to send too much data to the frontend, Altair limits the data size by default. 118 | For example, a histogram of 20000 points: 119 | 120 | ```python 121 | import altair as alt 122 | import pandas as pd 123 | import numpy as np 124 | 125 | np.random.seed(12345) 126 | 127 | df = pd.DataFrame({ 128 | 'x': np.random.randn(20000) 129 | }) 130 | chart = alt.Chart(df).mark_bar().encode( 131 | alt.X('x', bin=True), 132 | y='count()' 133 | ) 134 | chart 135 | ``` 136 | ```pyerr 137 | MaxRowsError: The number of rows in your dataset is greater than the maximum allowed (5000). For information on how to plot larger datasets in Altair, see the documentation 138 | ``` 139 | There are several possible ways around this, as mentioned in Altair's [FAQ](https://altair-viz.github.io/user_guide/faq.html#maxrowserror-how-can-i-plot-large-datasets). 140 | Altiar-transform provides another option via the ``transform_chart()`` function, which will pre-transform the data according to the chart specification, so that the final chart specification holds the aggregated data rather than the full dataset: 141 | ```python 142 | from altair_transform import transform_chart 143 | new_chart = transform_chart(chart) 144 | new_chart 145 | ``` 146 | ![Altair Visualization](https://raw.githubusercontent.com/altair-viz/altair-transform/master/images/histogram.png) 147 | 148 | Examining the new chart specification, we can see that it contains the pre-aggregated dataset: 149 | ```python 150 | new_chart.data 151 | ``` 152 | 153 | 154 | 155 | 156 | 157 | 158 | 159 | 160 | 161 | 162 | 163 | 164 | 165 | 166 | 167 | 168 | 169 | 170 | 171 | 172 | 173 | 174 | 175 | 176 | 177 | 178 | 179 | 180 | 181 | 182 | 183 | 184 | 185 | 186 | 187 | 188 | 189 | 190 | 191 | 192 | 193 | 194 | 195 | 196 | 197 | 198 | 199 | 200 | 201 | 202 | 203 | 204 | 205 | 206 | 207 | 208 | 209 | 210 | 211 |
x_binnedx_binned2count
0-4.0-3.029
1-3.0-2.0444
2-2.0-1.02703
3-1.00.06815
40.01.06858
51.02.02706
62.03.0423
73.04.022
212 | 213 | ## Limitations 214 | 215 | ``altair_transform`` currently works only for non-compound charts; that is, it cannot transform or extract data from layered, faceted, repeated, or concatenated charts. 216 | 217 | There are also a number of less-used transform options that are not yet fully supported. These should explicitly raise a ``NotImplementedError`` if you attempt to use them. 218 | -------------------------------------------------------------------------------- /altair_transform/utils/_evaljs.py: -------------------------------------------------------------------------------- 1 | """Functionality to evaluate contents of the ast""" 2 | from functools import singledispatch, wraps 3 | import operator 4 | import re 5 | from typing import Any, Dict, List, Union 6 | 7 | from altair_transform.utils import ast, Parser 8 | 9 | __all__ = ["evaljs", "undefined", "JSRegex"] 10 | 11 | 12 | class _UndefinedType(object): 13 | def __repr__(self): 14 | return "undefined" 15 | 16 | 17 | undefined = _UndefinedType() 18 | 19 | 20 | class JSRegex: 21 | _flagmap: Dict[str, re.RegexFlag] = { 22 | "i": re.I, 23 | "m": re.M, 24 | "s": re.S, 25 | "u": re.U, 26 | } 27 | 28 | def __init__(self, pattern: str, flags: str = ""): 29 | self._pattern = pattern 30 | self._flags = flags 31 | self._regex = re.compile(pattern, self._reflags()) 32 | 33 | def __eq__(self, other): 34 | if isinstance(other, JSRegex): 35 | return (self._pattern, self._flags) == (other._pattern, other._flags) 36 | 37 | def _reflags(self) -> re.RegexFlag: 38 | flags = re.RegexFlag(0) 39 | for key, flag in self._flagmap.items(): 40 | if key in self._flags: 41 | flags |= flag 42 | return flags 43 | 44 | def test(self, string: str) -> bool: 45 | if "y" in self._flags: 46 | return bool(self._regex.match(string)) 47 | else: 48 | return bool(self._regex.search(string)) 49 | 50 | def replace(self, string: str, replacement: str) -> str: 51 | if "g" in self._flags: 52 | return self._regex.sub(replacement, string) 53 | else: 54 | return self._regex.sub(replacement, string, count=1) 55 | 56 | 57 | def evaljs(expression: Union[str, ast.Expr], namespace: dict = None) -> Any: 58 | """Evaluate a javascript expression, optionally with a namespace.""" 59 | if isinstance(expression, str): 60 | parser = Parser() 61 | expression = parser.parse(expression) 62 | return visit(expression, namespace or {}) 63 | 64 | 65 | @singledispatch 66 | def visit(obj: Any, namespace: dict) -> Any: 67 | return obj 68 | 69 | 70 | @visit.register(ast.Expr) 71 | def _visit_expr(obj: ast.Expr, namespace: dict) -> Any: 72 | return obj.value 73 | 74 | 75 | @visit.register(ast.BinOp) 76 | def _visit_binop(obj: ast.BinOp, namespace: dict) -> Any: 77 | if obj.op not in BINARY_OPERATORS: 78 | raise NotImplementedError(f"Binary Operator A {obj.op} B") 79 | op = BINARY_OPERATORS[obj.op] 80 | return op(visit(obj.lhs, namespace), visit(obj.rhs, namespace)) 81 | 82 | 83 | @visit.register(ast.UnOp) 84 | def _visit_unop(obj: ast.UnOp, namespace: dict) -> Any: 85 | if obj.op not in UNARY_OPERATORS: 86 | raise NotImplementedError(f"Unary Operator {obj.op}x") 87 | op = UNARY_OPERATORS[obj.op] 88 | return op(visit(obj.rhs, namespace)) 89 | 90 | 91 | @visit.register(ast.TernOp) 92 | def _visit_ternop(obj: ast.TernOp, namespace: dict) -> Any: 93 | if obj.op not in TERNARY_OPERATORS: 94 | raise NotImplementedError(f"Ternary Operator A {obj.op[0]} B {obj.op[1]} C") 95 | op = TERNARY_OPERATORS[obj.op] 96 | return op( 97 | visit(obj.lhs, namespace), visit(obj.mid, namespace), visit(obj.rhs, namespace) 98 | ) 99 | 100 | 101 | @visit.register(ast.Number) 102 | def _visit_number(obj: ast.Number, namespace: dict) -> Any: 103 | return obj.value 104 | 105 | 106 | @visit.register(ast.String) 107 | def _visit_string(obj: ast.String, namespace: dict) -> Any: 108 | return obj.value 109 | 110 | 111 | @visit.register(ast.Regex) 112 | def _visit_regex(obj: ast.Regex, namespace: dict) -> JSRegex: 113 | return JSRegex(obj.value["pattern"], obj.value["flags"]) 114 | 115 | 116 | @visit.register(ast.Global) 117 | def _visit_global(obj: ast.Global, namespace: dict) -> Any: 118 | if obj.name not in namespace: 119 | raise NameError("{0} is not a valid name".format(obj.name)) 120 | return namespace[obj.name] 121 | 122 | 123 | @visit.register(ast.Name) 124 | def _visit_name(obj: ast.Name, namespace: dict) -> str: 125 | return obj.name 126 | 127 | 128 | @visit.register(ast.List) 129 | def _visit_list(obj: ast.List, namespace: dict) -> List: 130 | return [visit(entry, namespace) for entry in obj.entries] 131 | 132 | 133 | @visit.register(ast.Object) 134 | def _visit_object(obj: ast.Object, namespace: dict) -> Any: 135 | def _visit(entry): 136 | if isinstance(entry, tuple): 137 | return tuple(visit(e, namespace) for e in entry) 138 | if isinstance(entry, ast.Name): 139 | return (visit(entry, namespace), visit(ast.Global(entry.name), namespace)) 140 | 141 | return dict(_visit(entry) for entry in obj.entries) 142 | 143 | 144 | @visit.register(ast.Attr) 145 | def _visit_attr(obj: ast.Attr, namespace: dict) -> Any: 146 | obj_ = visit(obj.obj, namespace) 147 | attr = visit(obj.attr, namespace) 148 | if isinstance(obj_, dict): 149 | return obj_.get(attr, undefined) 150 | else: 151 | return getattr(obj_, attr, undefined) 152 | 153 | 154 | @visit.register(ast.Item) 155 | def _visit_item(obj: ast.Item, namespace: dict) -> Any: 156 | obj_ = visit(obj.obj, namespace) 157 | item = visit(obj.item, namespace) 158 | if isinstance(obj_, list) and isinstance(item, float): 159 | item = int(item) 160 | try: 161 | return obj_[item] 162 | except (KeyError, IndexError): 163 | return undefined 164 | 165 | 166 | @visit.register(ast.Func) 167 | def _visit_func(obj: ast.Func, namespace: dict) -> Any: 168 | func = visit(obj.func, namespace) 169 | args = [visit(arg, namespace) for arg in obj.args] 170 | return func(*args) 171 | 172 | 173 | def int_inputs(func): 174 | @wraps(func) 175 | def wrapper(*args): 176 | return float(func(*map(int, args))) 177 | 178 | return wrapper 179 | 180 | 181 | @int_inputs 182 | def zerofill_rshift(lhs: int, rhs: int) -> int: 183 | if lhs < 0: 184 | lhs = lhs + 0x100000000 185 | return lhs >> rhs 186 | 187 | 188 | # TODO: do implicit type conversions ugh... 189 | UNARY_OPERATORS = { 190 | "~": int_inputs(operator.inv), 191 | "-": operator.neg, 192 | "+": operator.pos, 193 | "!": operator.not_, 194 | } 195 | 196 | 197 | BINARY_OPERATORS = { 198 | "+": operator.add, 199 | "-": operator.sub, 200 | "*": operator.mul, 201 | "/": operator.truediv, 202 | "**": operator.pow, 203 | "%": operator.mod, 204 | "&": int_inputs(operator.and_), 205 | "|": int_inputs(operator.or_), 206 | "^": int_inputs(operator.xor), 207 | "<<": int_inputs(operator.lshift), 208 | ">>": int_inputs(operator.rshift), 209 | ">>>": zerofill_rshift, 210 | "<": operator.lt, 211 | "<=": operator.le, 212 | ">": operator.gt, 213 | ">=": operator.ge, 214 | "==": operator.eq, 215 | "===": operator.eq, 216 | "!=": operator.ne, 217 | "!==": operator.ne, 218 | "&&": lambda a, b: a and b, 219 | "||": lambda a, b: a or b, 220 | } 221 | 222 | 223 | TERNARY_OPERATORS = {("?", ":"): lambda a, b, c: b if a else c} 224 | -------------------------------------------------------------------------------- /altair_transform/tests/test_extract.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from altair_transform.extract import _encoding_to_transform 4 | from typing import Any, Dict, List, NamedTuple 5 | 6 | 7 | class _TestCase(NamedTuple): 8 | encoding: Dict[str, Dict[str, Any]] 9 | expected_encoding: Dict[str, Dict[str, Any]] 10 | expected_transform: List[Dict[str, Any]] 11 | 12 | 13 | @pytest.mark.parametrize( 14 | _TestCase._fields, 15 | [ 16 | _TestCase( 17 | encoding={"x": {"aggregate": "count", "type": "quantitative"}}, 18 | expected_encoding={ 19 | "x": { 20 | "field": "__count", 21 | "type": "quantitative", 22 | "title": "Count of Records", 23 | } 24 | }, 25 | expected_transform=[{"aggregate": [{"op": "count", "as": "__count"}]}], 26 | ), 27 | _TestCase( 28 | encoding={"x": {"field": "foo", "bin": True, "type": "ordinal"}}, 29 | expected_encoding={ 30 | "x": {"field": "foo_binned", "type": "ordinal", "title": "foo (binned)"} 31 | }, 32 | expected_transform=[{"bin": True, "field": "foo", "as": "foo_binned"}], 33 | ), 34 | _TestCase( 35 | encoding={ 36 | "x": {"aggregate": "sum", "field": "people", "type": "quantitative"}, 37 | "y": {"field": "age", "type": "ordinal"}, 38 | }, 39 | expected_encoding={ 40 | "x": { 41 | "field": "sum_people", 42 | "type": "quantitative", 43 | "title": "Sum of people", 44 | }, 45 | "y": {"field": "age", "type": "ordinal"}, 46 | }, 47 | expected_transform=[ 48 | { 49 | "aggregate": [{"op": "sum", "field": "people", "as": "sum_people"}], 50 | "groupby": ["age"], 51 | } 52 | ], 53 | ), 54 | _TestCase( 55 | encoding={ 56 | "x": {"aggregate": "count", "type": "quantitative"}, 57 | "y": {"field": "age", "bin": {"maxbins": 10}, "type": "quantitative"}, 58 | }, 59 | expected_encoding={ 60 | "x": { 61 | "field": "__count", 62 | "type": "quantitative", 63 | "title": "Count of Records", 64 | }, 65 | "y": { 66 | "field": "age_binned", 67 | "bin": "binned", 68 | "type": "quantitative", 69 | "title": "age (binned)", 70 | }, 71 | "y2": {"field": "age_binned2"}, 72 | }, 73 | expected_transform=[ 74 | { 75 | "bin": {"maxbins": 10}, 76 | "field": "age", 77 | "as": ["age_binned", "age_binned2"], 78 | }, 79 | { 80 | "aggregate": [{"op": "count", "as": "__count"}], 81 | "groupby": ["age_binned", "age_binned2"], 82 | }, 83 | ], 84 | ), 85 | _TestCase( 86 | encoding={ 87 | "x": {"aggregate": "count", "type": "quantitative"}, 88 | "y": {"field": "age", "bin": True, "type": "ordinal"}, 89 | }, 90 | expected_encoding={ 91 | "x": { 92 | "field": "__count", 93 | "type": "quantitative", 94 | "title": "Count of Records", 95 | }, 96 | "y": { 97 | "field": "age_binned", 98 | "type": "ordinal", 99 | "title": "age (binned)", 100 | }, 101 | }, 102 | expected_transform=[ 103 | {"bin": True, "field": "age", "as": "age_binned"}, 104 | { 105 | "aggregate": [{"op": "count", "as": "__count"}], 106 | "groupby": ["age_binned"], 107 | }, 108 | ], 109 | ), 110 | _TestCase( 111 | encoding={ 112 | "x": {"aggregate": "count", "field": "x", "type": "quantitative"}, 113 | "y": {"field": "y", "timeUnit": "day", "type": "ordinal"}, 114 | }, 115 | expected_encoding={ 116 | "x": { 117 | "field": "__count", 118 | "type": "quantitative", 119 | "title": "Count of Records", 120 | }, 121 | "y": { 122 | "field": "day_y", 123 | "timeUnit": "day", 124 | "type": "ordinal", 125 | "title": "y (day)", 126 | }, 127 | }, 128 | expected_transform=[ 129 | {"timeUnit": "day", "field": "y", "as": "day_y"}, 130 | { 131 | "aggregate": [{"field": "x", "op": "count", "as": "__count"}], 132 | "groupby": ["day_y"], 133 | }, 134 | ], 135 | ), 136 | _TestCase( 137 | encoding={ 138 | "x": {"field": "xval", "type": "ordinal"}, 139 | "y": { 140 | "field": "yval", 141 | "type": "quantitative", 142 | "impute": {"value": 0, "method": "mean", "keyvals": [1, 2, 3]}, 143 | }, 144 | "color": {"field": "cval", "type": "nominal"}, 145 | }, 146 | expected_encoding={ 147 | "x": {"field": "xval", "type": "ordinal"}, 148 | "y": {"field": "yval", "type": "quantitative"}, 149 | "color": {"field": "cval", "type": "nominal"}, 150 | }, 151 | expected_transform=[ 152 | { 153 | "impute": "yval", 154 | "key": "xval", 155 | "keyvals": [1, 2, 3], 156 | "groupby": ["cval"], 157 | "value": 0, 158 | "method": "mean", 159 | } 160 | ], 161 | ), 162 | _TestCase( 163 | encoding={ 164 | "x": {"field": "xval", "bin": "binned", "type": "ordinal"}, 165 | "y": {"aggregate": "count", "type": "quantitative"}, 166 | }, 167 | expected_encoding={ 168 | "x": {"field": "xval", "bin": "binned", "type": "ordinal"}, 169 | "y": { 170 | "field": "__count", 171 | "title": "Count of Records", 172 | "type": "quantitative", 173 | }, 174 | }, 175 | expected_transform=[ 176 | {"aggregate": [{"op": "count", "as": "__count"}], "groupby": ["xval"]} 177 | ], 178 | ), 179 | ], 180 | ) 181 | def test_extract_simple_aggregate(encoding, expected_encoding, expected_transform): 182 | encoding, transform = _encoding_to_transform(encoding) 183 | assert encoding == expected_encoding 184 | assert transform == expected_transform 185 | -------------------------------------------------------------------------------- /altair_transform/transform/vega_utils.py: -------------------------------------------------------------------------------- 1 | """Python ports of vega utilities""" 2 | 3 | from typing import Callable, List, Optional, Tuple, Union 4 | import numpy as np 5 | import math 6 | 7 | 8 | # subdivide up to accuracy of 0.1 degrees 9 | MIN_RADIANS = 0.1 * math.pi / 180 10 | 11 | Number = Union[int, float] 12 | 13 | 14 | def calculate_bins( 15 | extent: Tuple[Number, Number], 16 | anchor: Optional[Number] = None, 17 | base: Number = 10, 18 | divide: List[Number] = [5, 2], 19 | maxbins: Number = 10, 20 | minstep: Number = 0, 21 | nice: bool = True, 22 | step: Optional[Number] = None, 23 | steps: Optional[List[Number]] = None, 24 | span: Optional[Number] = None, 25 | ) -> np.ndarray: 26 | """Calculate the bins for a given dataset. 27 | 28 | This is a Python translation of the Javascript function available at 29 | https://github.com/vega/vega/blob/v5.9.1/packages/vega-statistics/src/bin.js 30 | 31 | Parameters 32 | ---------- 33 | extent: Tuple[Number, Number] 34 | A two-element ([min, max]) array indicating the range of desired bin values. 35 | anchor: Number 36 | A value in the binned domain at which to anchor the bins, shifting the bin boundaries 37 | if necessary to ensure that a boundary aligns with the anchor value. 38 | Default value: the minimum bin extent value 39 | base: Number 40 | The number base to use for automatic bin determination (default is base 10). 41 | Default value: 10 42 | divide: List[Number] 43 | Scale factors indicating allowable subdivisions. The default value is [5, 2], 44 | which indicates that for base 10 numbers (the default base), the method may 45 | consider dividing bin sizes by 5 and/or 2. For example, for an initial step 46 | size of 10, the method can check if bin sizes of 2 (= 10/5), 5 (= 10/2), 47 | or 1 (= 10/(5*2)) might also satisfy the given constraints. 48 | Default value: [5, 2] 49 | maxbins: Number 50 | Maximum number of bins. 51 | Default value: 10 52 | minstep: Number 53 | A minimum allowable step size (particularly useful for integer values). 54 | nice: boolean 55 | If true, attempts to make the bin boundaries use human-friendly boundaries, 56 | such as multiples of ten. 57 | Default value: True 58 | step: Number 59 | An exact step size to use between bins. 60 | Note: If provided, options such as maxbins will be ignored. 61 | steps: List[Number] 62 | An array of allowable step sizes to choose from. 63 | 64 | Returns 65 | ------- 66 | bins : numpy.ndarray 67 | array of bin edges. 68 | """ 69 | start, stop, step = _bin( 70 | extent=extent, 71 | base=base, 72 | divide=divide, 73 | maxbins=maxbins, 74 | minstep=minstep, 75 | nice=nice, 76 | step=step, 77 | steps=steps, 78 | span=span, 79 | ) 80 | 81 | N = math.ceil((stop - start) / step) 82 | 83 | if anchor is not None: 84 | start += anchor - (start + step * math.floor((anchor - start) / step)) 85 | 86 | return start + step * np.arange(N + 1) 87 | 88 | 89 | def _bin( 90 | extent: Tuple[Number, Number], 91 | base: Number = 20, 92 | divide: List[Number] = [5, 2], 93 | maxbins: Number = 10, 94 | minstep: Number = 0, 95 | nice: bool = True, 96 | step: Optional[Number] = None, 97 | steps: Optional[List[Number]] = None, 98 | span: Optional[Number] = None, 99 | ) -> Tuple[Number, Number, Number]: 100 | """Calculate the bins for a given dataset. 101 | 102 | This is a Python translation of the Javascript function available at 103 | https://github.com/vega/vega/blob/v5.9.1/packages/vega-statistics/src/bin.js 104 | """ 105 | min_, max_ = extent 106 | assert max_ > min_ 107 | span = span or (max_ - min_) or abs(min_) or 1 108 | logb = math.log(base) 109 | 110 | if step is not None: 111 | # If step is provided, we use it. 112 | pass 113 | elif steps is not None: 114 | # If steps provided, limit choice to acceptable sizes. 115 | v = span / maxbins 116 | steps = [step for step in steps if step < v] 117 | step = max(steps) if steps else steps[0] 118 | else: 119 | # Otherwise use span to determine step size. 120 | level = math.ceil(math.log(maxbins) / logb) 121 | step = max(minstep, pow(base, round(math.log(span) / logb) - level)) 122 | 123 | # increase step size if too many bins 124 | while math.ceil(span / step) > maxbins: 125 | step *= base 126 | 127 | # decrease step size if allowed 128 | for div in divide: 129 | v = step / div 130 | if v >= minstep and span / v <= maxbins: 131 | step = v 132 | 133 | # update precision of min_ and max_ 134 | v = math.log(step) 135 | precision = 0 if v >= 0 else math.floor(-v / logb) + 1 136 | eps = pow(base, -precision - 1) 137 | if nice: 138 | v = math.floor(min_ / step + eps) * step 139 | min_ = v - step if min_ < v else v 140 | max_ = math.ceil(max_ / step) * step 141 | 142 | start = min_ 143 | stop = max_ if max_ != min_ else min_ + step 144 | return start, stop, step 145 | 146 | 147 | def adaptive_sample( 148 | f: Callable[[np.ndarray], np.ndarray], 149 | extent: Tuple[float, float], 150 | min_steps: int = 25, 151 | max_steps: int = 200, 152 | ) -> Tuple[np.ndarray, np.ndarray]: 153 | """Adaptive sampling of a function. 154 | 155 | This is a Python translation of the Javascript function available at 156 | https://github.com/vega/vega/blob/v5.9.1/packages/vega-statistics/src/sampleCurve.js 157 | 158 | Parameters 159 | ---------- 160 | f : callable 161 | Function to be adaptively sampled 162 | extent : tuple 163 | The extent of the sampling 164 | min_steps : int 165 | The minimum number of steps to consider 166 | max_steps : int 167 | The maximum number of steps to consider 168 | 169 | Returns 170 | ------- 171 | x, y : np.ndarray 172 | The sampled function 173 | """ 174 | 175 | min_x, max_x = extent 176 | span = max_x - min_x 177 | stop = span / max_steps 178 | 179 | # sample minimum points on uniform grid 180 | x = min_x + (np.arange(min_steps + 1) / min_steps) * span 181 | y = f(x) 182 | 183 | if min_steps == max_steps: 184 | # no adaptation, sample uniform grid directly and return 185 | return x, y 186 | 187 | # move on to perform adaptive refinement 188 | start_grid = list(zip(x, y)) 189 | prev, next_ = start_grid[:1], start_grid[:0:-1] 190 | 191 | while next_: 192 | p0, p1 = prev[-1], next_[-1] 193 | 194 | # midpoint for potential curve subdivision 195 | xm = (p0[0] + p1[0]) / 2 196 | pm = (xm, f(xm)) 197 | 198 | if pm[0] - p0[0] >= stop and _angleDelta(p0, pm, p1) > MIN_RADIANS: 199 | # maximum resolution has not yet been met, and 200 | # subdivision midpoint sufficiently different from endpoint 201 | # save subdivision, push midpoint onto the visitation stack 202 | next_.append(pm) 203 | else: 204 | # subdivision midpoint sufficiently similar to endpoint 205 | # skip subdivision, store endpoint, move to next point on the stack 206 | prev.append(p1) 207 | next_.pop() 208 | out = np.array(prev) 209 | return out[:, 0], out[:, 1] 210 | 211 | 212 | def _angleDelta( 213 | p: Tuple[float, float], q: Tuple[float, float], r: Tuple[float, float] 214 | ) -> float: 215 | a0 = np.arctan2(r[1] - p[1], r[0] - p[0]) 216 | a1 = np.arctan2(q[1] - p[1], q[0] - p[0]) 217 | return abs(a0 - a1) 218 | -------------------------------------------------------------------------------- /altair_transform/transform/regression.py: -------------------------------------------------------------------------------- 1 | import abc 2 | from typing import Dict, Optional, Tuple, Type 3 | 4 | import altair as alt 5 | import numpy as np 6 | from numpy.polynomial import Polynomial 7 | import pandas as pd 8 | from .visitor import visit 9 | from .vega_utils import adaptive_sample 10 | 11 | 12 | def _ensure_length(coef: np.ndarray, k: int) -> np.ndarray: 13 | return np.hstack([coef, np.zeros(k - len(coef), dtype=coef.dtype)]) 14 | 15 | 16 | @visit.register(alt.RegressionTransform) 17 | def visit_regression( 18 | transform: alt.RegressionTransform, df: pd.DataFrame 19 | ) -> pd.DataFrame: 20 | transform = transform.to_dict() 21 | reg = transform["regression"] 22 | on = transform["on"] 23 | extent = transform.get("extent") 24 | method = transform.get("method", "linear") 25 | as_ = transform.get("as", (on, reg)) 26 | groupby = transform.get("groupby") 27 | order = transform.get("order", 3) 28 | params = transform.get("params", False) 29 | 30 | models: Dict[str, Type[Model]] = { 31 | "exp": ExpModel, 32 | "linear": LinearModel, 33 | "log": LogModel, 34 | "poly": PolyModel, 35 | "pow": PowModel, 36 | "quad": QuadModel, 37 | } 38 | 39 | if method not in models: 40 | raise NotImplementedError(f"method={method}") 41 | 42 | M = models[method] 43 | model = M(on=on, reg=reg, extent=extent, as_=as_, order=order) 44 | 45 | if params: 46 | if groupby: 47 | params = df.groupby(groupby).apply(model.params) 48 | params["keys"] = [list(p)[:-1] for p in params.index] 49 | return params.reset_index(drop=True) 50 | else: 51 | return model.params(df) 52 | else: 53 | if groupby: 54 | return ( 55 | df.groupby(groupby) 56 | .apply(model.predict) 57 | .reset_index(groupby) 58 | .reset_index(drop=True) 59 | ) 60 | else: 61 | return model.predict(df) 62 | 63 | 64 | class Model(metaclass=abc.ABCMeta): 65 | _coef: Optional[np.ndarray] 66 | 67 | def __init__( 68 | self, 69 | reg: str, 70 | on: str, 71 | extent: Optional[Tuple[float, float]], 72 | as_: Tuple[str, str], 73 | order: int, 74 | ): 75 | self._reg = reg 76 | self._on = on 77 | self._extent = extent 78 | self._as = as_ 79 | self._order = order 80 | 81 | def params(self, df: pd.DataFrame) -> pd.DataFrame: 82 | """Return a dataframe with model parameters and r-square values. 83 | 84 | Parameters 85 | ---------- 86 | df : pd.DataFrame 87 | The input data to which the model will be fit. 88 | 89 | Returns 90 | ------- 91 | coef : pd.DataFrame 92 | DataFrame with model fit results. 93 | """ 94 | x = df[self._on].values 95 | y = df[self._reg].values 96 | self._fit(x, y) 97 | SS_tot = ((y - y.mean()) ** 2).sum() 98 | SS_res = ((y - self._predict(x)) ** 2).sum() 99 | rsquare = 1 - SS_res / SS_tot 100 | return pd.DataFrame({"coef": [list(self._params())], "rSquared": [rsquare]}) 101 | 102 | def predict(self, df: pd.DataFrame) -> pd.DataFrame: 103 | """Return the fit model 104 | 105 | Parameters 106 | ---------- 107 | df : pd.DataFrame 108 | The input data to which the model will be fit. 109 | 110 | Returns 111 | ------- 112 | model : pd.DataFrame 113 | DataFrame with model fit results. 114 | """ 115 | self._fit(df[self._on].values, df[self._reg].values) 116 | x, y = self._grid(df) 117 | on, reg = self._as 118 | return pd.DataFrame({on: x, reg: y}) 119 | 120 | def _grid(self, df: pd.DataFrame) -> Tuple[np.ndarray, np.ndarray]: 121 | extent = self._extent_from_data(df) 122 | return adaptive_sample(self._predict, extent) 123 | 124 | def _extent_from_data(self, df: pd.DataFrame) -> Tuple[float, float]: 125 | xmin: float = df[self._on].min() 126 | xmax: float = df[self._on].max() 127 | return self._extent or (xmin, xmax) 128 | 129 | @abc.abstractmethod 130 | def _fit(self, x: np.ndarray, y: np.ndarray) -> None: 131 | ... 132 | 133 | @abc.abstractmethod 134 | def _params(self) -> np.ndarray: 135 | ... 136 | 137 | @abc.abstractmethod 138 | def _predict(self, x: np.ndarray) -> np.ndarray: 139 | ... 140 | 141 | 142 | class ExpModel(Model): 143 | """y = a * e ^ (b * x)""" 144 | 145 | _model: Optional[Polynomial] 146 | 147 | def _fit(self, x: np.ndarray, y: np.ndarray) -> None: 148 | self._model = Polynomial.fit(x, np.log(y), 1, w=np.sqrt(abs(y))) 149 | 150 | def _predict(self, x: np.ndarray) -> np.ndarray: 151 | assert self._model is not None 152 | return np.exp(self._model(x)) 153 | 154 | def _params(self) -> np.ndarray: 155 | assert self._model is not None 156 | log_a, b = _ensure_length( 157 | self._model.convert(domain=self._model.window).coef, 2 158 | ) 159 | return np.array([np.exp(log_a), b]) 160 | 161 | 162 | class LinearModel(Model): 163 | """y = a + b * x""" 164 | 165 | _model: Optional[Polynomial] 166 | 167 | def _grid(self, df: pd.DataFrame) -> Tuple[np.ndarray, np.ndarray]: 168 | extent = self._extent_from_data(df) 169 | x = np.array(extent) 170 | return x, self._predict(np.array(extent)) 171 | 172 | def _fit(self, x: np.ndarray, y: np.ndarray) -> None: 173 | self._model = Polynomial.fit(x, y, 1) 174 | 175 | def _predict(self, x: np.ndarray) -> np.ndarray: 176 | assert self._model is not None 177 | return self._model(x) 178 | 179 | def _params(self): 180 | assert self._model is not None 181 | return _ensure_length(self._model.convert(domain=self._model.window).coef, 2) 182 | 183 | 184 | class LogModel(Model): 185 | """y = a + b * log(x)""" 186 | 187 | _model: Optional[Polynomial] 188 | 189 | def _fit(self, x: np.ndarray, y: np.ndarray) -> None: 190 | self._model = Polynomial.fit(np.log(x), y, 1) 191 | 192 | def _predict(self, x: np.ndarray) -> np.ndarray: 193 | assert self._model is not None 194 | return self._model(np.log(x)) 195 | 196 | def _params(self) -> np.ndarray: 197 | assert self._model is not None 198 | return _ensure_length(self._model.convert(domain=self._model.window).coef, 2) 199 | 200 | 201 | class PolyModel(Model): 202 | """y = a + b * x + ... + k * x^k""" 203 | 204 | _model: Optional[Polynomial] 205 | 206 | def _grid(self, df: pd.DataFrame) -> Tuple[np.ndarray, np.ndarray]: 207 | if self._order == 1: 208 | extent = self._extent_from_data(df) 209 | x = np.array(extent) 210 | return x, self._predict(np.array(extent)) 211 | else: 212 | return super()._grid(df) 213 | 214 | def _fit(self, x: np.ndarray, y: np.ndarray) -> None: 215 | self._model = Polynomial.fit(x, y, self._order) 216 | 217 | def _predict(self, x: np.ndarray) -> np.ndarray: 218 | assert self._model is not None 219 | return self._model(x) 220 | 221 | def _params(self): 222 | assert self._model is not None 223 | return _ensure_length( 224 | self._model.convert(domain=self._model.window).coef, self._order + 1 225 | ) 226 | 227 | 228 | class PowModel(Model): 229 | """y = a * x ^ b""" 230 | 231 | _model: Optional[Polynomial] 232 | 233 | def _fit(self, x: np.ndarray, y: np.ndarray) -> None: 234 | self._model = Polynomial.fit(np.log(x), np.log(y), 1) 235 | 236 | def _predict(self, x: np.ndarray) -> np.ndarray: 237 | assert self._model is not None 238 | return np.exp(self._model(np.log(x))) 239 | 240 | def _params(self) -> np.ndarray: 241 | assert self._model is not None 242 | log_a, b = _ensure_length( 243 | self._model.convert(domain=self._model.window).coef, 2 244 | ) 245 | return np.array([np.exp(log_a), b]) 246 | 247 | 248 | class QuadModel(Model): 249 | """y = a + b * x + c * x^2""" 250 | 251 | _model: Optional[Polynomial] 252 | 253 | def _fit(self, x: np.ndarray, y: np.ndarray) -> None: 254 | self._model = Polynomial.fit(x, y, 2) 255 | 256 | def _predict(self, x: np.ndarray) -> np.ndarray: 257 | assert self._model is not None 258 | return self._model(x) 259 | 260 | def _params(self): 261 | assert self._model is not None 262 | return _ensure_length(self._model.convert(domain=self._model.window).coef, 3) 263 | -------------------------------------------------------------------------------- /altair_transform/utils/_parser.py: -------------------------------------------------------------------------------- 1 | """ 2 | Simplified Javascript expression parser. 3 | """ 4 | # pylint: disable=W,C,R 5 | import os 6 | 7 | from typing import Tuple 8 | 9 | import ply.lex as lex 10 | import ply.yacc as yacc 11 | 12 | from altair_transform.utils import ast 13 | 14 | 15 | # TODO: regexp literals? 16 | 17 | 18 | class ParserBase: 19 | """ 20 | Base class for a lexer/parser that has the rules defined as methods 21 | """ 22 | 23 | tokens: Tuple = () 24 | precedence: Tuple = () 25 | 26 | def __init__(self, **kw): 27 | self.debug = kw.get("debug", 0) 28 | try: 29 | modname = ( 30 | os.path.split(os.path.splitext(__file__)[0])[1] 31 | + "_" 32 | + self.__class__.__name__ 33 | ) 34 | except ValueError: 35 | modname = "parser" + "_" + self.__class__.__name__ 36 | self.debugfile = modname + ".dbg" 37 | self.tabmodule = modname + "_" + "parsetab" 38 | 39 | # Build the lexer and parser 40 | lex.lex(module=self, debug=self.debug) 41 | yacc.yacc( 42 | module=self, 43 | debug=self.debug, 44 | debugfile=self.debugfile, 45 | tabmodule=self.tabmodule, 46 | ) 47 | 48 | def parse(self, expression): 49 | return yacc.parse(expression) 50 | 51 | 52 | class Parser(ParserBase): 53 | 54 | tokens = ( 55 | "NAME", 56 | "STRING", 57 | "FLOAT", 58 | "BINARY", 59 | "OCTAL", 60 | "HEX", 61 | "REGEX", 62 | "PLUS", 63 | "MINUS", 64 | "EXP", 65 | "TIMES", 66 | "DIVIDE", 67 | "MODULO", 68 | "PERIOD", 69 | "COMMA", 70 | "COLON", 71 | "QUESTION", 72 | "LPAREN", 73 | "RPAREN", 74 | "LBRACKET", 75 | "RBRACKET", 76 | "LBRACE", 77 | "RBRACE", 78 | "LOGICAL_OR", 79 | "LOGICAL_AND", 80 | "LOGICAL_NOT", 81 | "BITWISE_NOT", 82 | "BITWISE_OR", 83 | "BITWISE_AND", 84 | "BITWISE_XOR", 85 | "LSHIFT", 86 | "RSHIFT", 87 | "ZFRSHIFT", 88 | "GREATER_EQUAL", 89 | "GREATER", 90 | "LESS_EQUAL", 91 | "LESS", 92 | "IDENT", 93 | "NIDENT", 94 | "EQUAL", 95 | "NEQUAL", 96 | ) 97 | 98 | # Tokens 99 | 100 | t_PLUS = r"\+" 101 | t_MINUS = r"-" 102 | t_EXP = r"\*\*" 103 | t_TIMES = r"\*" 104 | t_DIVIDE = r"/" 105 | t_MODULO = r"%" 106 | t_LPAREN = r"\(" 107 | t_RPAREN = r"\)" 108 | t_LBRACKET = r"\[" 109 | t_RBRACKET = r"\]" 110 | t_LBRACE = r"\{" 111 | t_RBRACE = r"\}" 112 | t_PERIOD = r"\." 113 | t_COMMA = r"," 114 | t_COLON = r"\:" 115 | t_QUESTION = r"\?" 116 | t_LOGICAL_OR = r"\|\|" 117 | t_BITWISE_OR = r"\|" 118 | t_LOGICAL_AND = r"&&" 119 | t_BITWISE_AND = r"&" 120 | t_BITWISE_XOR = r"\^" 121 | t_BITWISE_NOT = r"~" 122 | t_LSHIFT = r"<<" 123 | t_ZFRSHIFT = r">>>" 124 | t_RSHIFT = r">>" 125 | t_GREATER_EQUAL = r">=" 126 | t_GREATER = r">" 127 | t_LESS_EQUAL = r"<=" 128 | t_LESS = r"<" 129 | t_IDENT = r"===" 130 | t_EQUAL = r"==" 131 | t_NIDENT = r"!==" 132 | t_NEQUAL = r"!=" 133 | t_LOGICAL_NOT = r"!" 134 | t_NAME = r"[a-zA-Z_][a-zA-Z0-9_]*" 135 | 136 | def t_BINARY(self, t): 137 | r"0[bB][01]+" 138 | t.value = int(t.value, 2) 139 | return t 140 | 141 | def t_OCTAL(self, t): 142 | r"0[oO]?[0-7]+" 143 | t.value = int(t.value, 8) 144 | return t 145 | 146 | def t_HEX(self, t): 147 | r"0[xX][0-9A-Fa-f]+" 148 | t.value = int(t.value, 16) 149 | return t 150 | 151 | def t_FLOAT(self, t): 152 | r"([1-9]\d*(\.\d*)?|0?\.\d+|0)([eE]\d+)?" 153 | t.value = float(t.value) 154 | return t 155 | 156 | def t_STRING(self, t): 157 | r"""(?P["'])((\\{2})*|(.*?[^\\](\\{2})*))(?P=openquote)""" 158 | t.value = bytes(t.value[1:-1], "utf-8").decode("unicode_escape") 159 | return t 160 | 161 | # TODO: actually parse & validate regexps? 162 | def t_REGEX(self, t): 163 | r"\/(?P(?![*+?])(?:[^\r\n\[/\\]|\\.|\[(?:[^\r\n\]\\]|\\.)*\])+)\/(?P[gmisuy]{0,6})" 164 | groups = t.lexer.lexmatch.groupdict() 165 | t.value = {"pattern": groups["REGEX_pattern"], "flags": groups["REGEX_flags"]} 166 | return t 167 | 168 | t_ignore = " \t" 169 | 170 | def t_newline(self, t): 171 | r"\n+" 172 | t.lexer.lineno += t.value.count("\n") 173 | 174 | def t_error(self, t): 175 | raise ValueError("Illegal character '%s'" % t.value[0]) 176 | 177 | # Parsing rules 178 | 179 | precedence = ( 180 | ("right", "QUESTION"), 181 | ("left", "LOGICAL_OR"), 182 | ("left", "LOGICAL_AND"), 183 | ("left", "BITWISE_OR"), 184 | ("left", "BITWISE_XOR"), 185 | ("left", "BITWISE_AND"), 186 | ("left", "EQUAL", "NEQUAL", "IDENT", "NIDENT"), 187 | ("left", "LESS", "LESS_EQUAL", "GREATER", "GREATER_EQUAL"), 188 | ("left", "LSHIFT", "RSHIFT", "ZFRSHIFT"), 189 | ("left", "PLUS", "MINUS"), 190 | ("left", "TIMES", "DIVIDE", "MODULO"), 191 | ("left", "EXP"), 192 | ("right", "UMINUS", "UPLUS", "LOGICAL_NOT", "BITWISE_NOT"), 193 | ) 194 | 195 | def p_expression_binop(self, p): 196 | """ 197 | expression : expression PLUS expression 198 | | expression MINUS expression 199 | | expression TIMES expression 200 | | expression DIVIDE expression 201 | | expression EXP expression 202 | | expression MODULO expression 203 | | expression LESS expression 204 | | expression LESS_EQUAL expression 205 | | expression GREATER expression 206 | | expression GREATER_EQUAL expression 207 | | expression LSHIFT expression 208 | | expression RSHIFT expression 209 | | expression ZFRSHIFT expression 210 | | expression EQUAL expression 211 | | expression IDENT expression 212 | | expression NEQUAL expression 213 | | expression NIDENT expression 214 | | expression BITWISE_AND expression 215 | | expression BITWISE_OR expression 216 | | expression BITWISE_XOR expression 217 | | expression LOGICAL_OR expression 218 | | expression LOGICAL_AND expression 219 | """ 220 | p[0] = ast.BinOp(lhs=p[1], op=p[2], rhs=p[3]) 221 | 222 | def p_expression_ternary(self, p): 223 | "expression : expression QUESTION expression COLON expression" 224 | p[0] = ast.TernOp(op=(p[2], p[4]), lhs=p[1], mid=p[3], rhs=p[5]) 225 | 226 | def p_expression_unaryop(self, p): 227 | """ 228 | expression : MINUS expression %prec UMINUS 229 | | PLUS expression %prec UPLUS 230 | | BITWISE_NOT expression 231 | | LOGICAL_NOT expression 232 | """ 233 | p[0] = ast.UnOp(op=p[1], rhs=p[2]) 234 | 235 | def p_expression_atom(self, p): 236 | """ 237 | expression : atom 238 | """ 239 | p[0] = p[1] 240 | 241 | def p_atom(self, p): 242 | """ 243 | atom : number 244 | | string 245 | | regex 246 | | global 247 | | list 248 | | object 249 | | group 250 | | attraccess 251 | | functioncall 252 | | indexing 253 | """ 254 | p[0] = p[1] 255 | 256 | def p_number(self, p): 257 | """ 258 | number : HEX 259 | | OCTAL 260 | | BINARY 261 | | FLOAT 262 | """ 263 | p[0] = ast.Number(p[1]) 264 | 265 | def p_string(self, p): 266 | "string : STRING" 267 | p[0] = ast.String(p[1]) 268 | 269 | def p_regex(self, p): 270 | "regex : REGEX" 271 | p[0] = ast.Regex(p[1]) 272 | 273 | def p_global(self, p): 274 | "global : NAME" 275 | p[0] = ast.Global(p[1]) 276 | 277 | def p_name(self, p): 278 | "name : NAME" 279 | p[0] = ast.Name(p[1]) 280 | 281 | def p_list(self, p): 282 | """ 283 | list : LBRACKET RBRACKET 284 | | LBRACKET arglist RBRACKET 285 | """ 286 | if len(p) == 3: 287 | p[0] = ast.List([]) 288 | elif len(p) == 4: 289 | p[0] = ast.List(p[2]) 290 | 291 | def p_object(self, p): 292 | """ 293 | object : LBRACE RBRACE 294 | | LBRACE objectarglist RBRACE 295 | """ 296 | if len(p) == 3: 297 | p[0] = ast.Object([]) 298 | elif len(p) == 4: 299 | p[0] = ast.Object(p[2]) 300 | 301 | def p_objectarglist(self, p): 302 | """ 303 | objectarglist : objectarglist COMMA objectarg 304 | | objectarg 305 | """ 306 | if len(p) == 4: 307 | p[0] = p[1] + [p[3]] 308 | else: 309 | p[0] = [p[1]] 310 | 311 | def p_objectarg(self, p): 312 | """ 313 | objectarg : objectkey COLON expression 314 | | name 315 | """ 316 | if len(p) == 4: 317 | p[0] = (p[1], p[3]) 318 | elif len(p) == 2: 319 | p[0] = p[1] 320 | 321 | def p_objectkey(self, p): 322 | """ 323 | objectkey : name 324 | | string 325 | | number 326 | """ 327 | p[0] = p[1] 328 | 329 | def p_group(self, p): 330 | "group : LPAREN expression RPAREN" 331 | p[0] = p[2] 332 | 333 | def p_attraccess(self, p): 334 | "attraccess : atom PERIOD NAME" 335 | p[0] = ast.Attr(obj=p[1], attr=p[3]) 336 | 337 | def p_indexing(self, p): 338 | "indexing : atom LBRACKET expression RBRACKET" 339 | p[0] = ast.Item(obj=p[1], item=p[3]) 340 | 341 | def p_functioncall(self, p): 342 | """ 343 | functioncall : atom LPAREN RPAREN 344 | | atom LPAREN arglist RPAREN 345 | """ 346 | if len(p) == 4: 347 | p[0] = ast.Func(func=p[1], args=[]) 348 | elif len(p) == 5: 349 | p[0] = ast.Func(func=p[1], args=p[3]) 350 | 351 | def p_arglist(self, p): 352 | """ 353 | arglist : arglist COMMA expression 354 | | expression 355 | """ 356 | if len(p) == 4: 357 | p[0] = p[1] + [p[3]] 358 | else: 359 | p[0] = [p[1]] 360 | 361 | def p_error(self, p): 362 | if p: 363 | raise ValueError(f"Syntax error at '{p.value}'") 364 | else: 365 | raise ValueError("Syntax error at EOF") 366 | 367 | 368 | parser = Parser() 369 | -------------------------------------------------------------------------------- /altair_transform/utils/_parser_Parser_parsetab.py: -------------------------------------------------------------------------------- 1 | 2 | # _parser_Parser_parsetab.py 3 | # This file is automatically generated. Do not edit. 4 | # pylint: disable=W,C,R 5 | _tabversion = '3.10' 6 | 7 | _lr_method = 'LALR' 8 | 9 | _lr_signature = 'rightQUESTIONleftLOGICAL_ORleftLOGICAL_ANDleftBITWISE_ORleftBITWISE_XORleftBITWISE_ANDleftEQUALNEQUALIDENTNIDENTleftLESSLESS_EQUALGREATERGREATER_EQUALleftLSHIFTRSHIFTZFRSHIFTleftPLUSMINUSleftTIMESDIVIDEMODULOleftEXPrightUMINUSUPLUSLOGICAL_NOTBITWISE_NOTBINARY BITWISE_AND BITWISE_NOT BITWISE_OR BITWISE_XOR COLON COMMA DIVIDE EQUAL EXP FLOAT GREATER GREATER_EQUAL HEX IDENT LBRACE LBRACKET LESS LESS_EQUAL LOGICAL_AND LOGICAL_NOT LOGICAL_OR LPAREN LSHIFT MINUS MODULO NAME NEQUAL NIDENT OCTAL PERIOD PLUS QUESTION RBRACE RBRACKET REGEX RPAREN RSHIFT STRING TIMES ZFRSHIFT\n expression : expression PLUS expression\n | expression MINUS expression\n | expression TIMES expression\n | expression DIVIDE expression\n | expression EXP expression\n | expression MODULO expression\n | expression LESS expression\n | expression LESS_EQUAL expression\n | expression GREATER expression\n | expression GREATER_EQUAL expression\n | expression LSHIFT expression\n | expression RSHIFT expression\n | expression ZFRSHIFT expression\n | expression EQUAL expression\n | expression IDENT expression\n | expression NEQUAL expression\n | expression NIDENT expression\n | expression BITWISE_AND expression\n | expression BITWISE_OR expression\n | expression BITWISE_XOR expression\n | expression LOGICAL_OR expression\n | expression LOGICAL_AND expression\n expression : expression QUESTION expression COLON expression\n expression : MINUS expression %prec UMINUS\n | PLUS expression %prec UPLUS\n | BITWISE_NOT expression\n | LOGICAL_NOT expression\n \n expression : atom\n \n atom : number\n | string\n | regex\n | global\n | list\n | object\n | group\n | attraccess\n | functioncall\n | indexing\n \n number : HEX\n | OCTAL\n | BINARY\n | FLOAT\n string : STRINGregex : REGEXglobal : NAMEname : NAME\n list : LBRACKET RBRACKET\n | LBRACKET arglist RBRACKET\n \n object : LBRACE RBRACE\n | LBRACE objectarglist RBRACE\n \n objectarglist : objectarglist COMMA objectarg\n | objectarg\n \n objectarg : objectkey COLON expression\n | name\n \n objectkey : name\n | string\n | number\n group : LPAREN expression RPARENattraccess : atom PERIOD NAMEindexing : atom LBRACKET expression RBRACKET\n functioncall : atom LPAREN RPAREN\n | atom LPAREN arglist RPAREN\n \n arglist : arglist COMMA expression\n | expression\n ' 10 | 11 | _lr_action_items = {'MINUS':([0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,55,56,57,59,60,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,95,96,97,98,100,101,102,103,104,105,107,108,],[3,28,3,3,3,3,-28,-29,-30,-31,-32,-33,-34,-35,-36,-37,-38,-39,-40,-41,-42,-43,-44,-45,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,-25,-24,-26,-27,3,3,-47,28,-49,28,-1,-2,-3,-4,-5,-6,28,28,28,28,28,28,28,28,28,28,28,28,28,28,28,28,28,-59,-61,28,-48,3,-50,3,-58,3,-62,-60,28,28,28,]),'PLUS':([0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,55,56,57,59,60,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,95,96,97,98,100,101,102,103,104,105,107,108,],[2,27,2,2,2,2,-28,-29,-30,-31,-32,-33,-34,-35,-36,-37,-38,-39,-40,-41,-42,-43,-44,-45,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,-25,-24,-26,-27,2,2,-47,27,-49,27,-1,-2,-3,-4,-5,-6,27,27,27,27,27,27,27,27,27,27,27,27,27,27,27,27,27,-59,-61,27,-48,2,-50,2,-58,2,-62,-60,27,27,27,]),'BITWISE_NOT':([0,2,3,4,5,24,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,55,56,97,100,102,],[4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,]),'LOGICAL_NOT':([0,2,3,4,5,24,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,55,56,97,100,102,],[5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,]),'HEX':([0,2,3,4,5,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,55,56,97,99,100,102,],[17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,]),'OCTAL':([0,2,3,4,5,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,55,56,97,99,100,102,],[18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,18,]),'BINARY':([0,2,3,4,5,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,55,56,97,99,100,102,],[19,19,19,19,19,19,19,19,19,19,19,19,19,19,19,19,19,19,19,19,19,19,19,19,19,19,19,19,19,19,19,19,19,19,19,19,19,]),'FLOAT':([0,2,3,4,5,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,55,56,97,99,100,102,],[20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,]),'STRING':([0,2,3,4,5,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,55,56,97,99,100,102,],[21,21,21,21,21,21,21,21,21,21,21,21,21,21,21,21,21,21,21,21,21,21,21,21,21,21,21,21,21,21,21,21,21,21,21,21,21,]),'REGEX':([0,2,3,4,5,24,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,55,56,97,100,102,],[22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,]),'NAME':([0,2,3,4,5,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,54,55,56,97,99,100,102,],[23,23,23,23,23,23,67,23,23,23,23,23,23,23,23,23,23,23,23,23,23,23,23,23,23,23,23,23,23,23,23,92,23,23,23,67,23,23,]),'LBRACKET':([0,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,55,56,57,60,92,93,96,97,98,100,101,102,103,104,],[24,24,24,24,24,56,-29,-30,-31,-32,-33,-34,-35,-36,-37,-38,-39,-40,-41,-42,-43,-44,-45,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,-47,-49,-59,-61,-48,24,-50,24,-58,24,-62,-60,]),'LBRACE':([0,2,3,4,5,24,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,55,56,97,100,102,],[25,25,25,25,25,25,25,25,25,25,25,25,25,25,25,25,25,25,25,25,25,25,25,25,25,25,25,25,25,25,25,25,25,25,25,]),'LPAREN':([0,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,55,56,57,60,92,93,96,97,98,100,101,102,103,104,],[26,26,26,26,26,55,-29,-30,-31,-32,-33,-34,-35,-36,-37,-38,-39,-40,-41,-42,-43,-44,-45,26,26,26,26,26,26,26,26,26,26,26,26,26,26,26,26,26,26,26,26,26,26,26,26,26,26,26,-47,-49,-59,-61,-48,26,-50,26,-58,26,-62,-60,]),'$end':([1,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,50,51,52,53,57,60,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,92,93,96,98,101,103,104,108,],[0,-28,-29,-30,-31,-32,-33,-34,-35,-36,-37,-38,-39,-40,-41,-42,-43,-44,-45,-25,-24,-26,-27,-47,-49,-1,-2,-3,-4,-5,-6,-7,-8,-9,-10,-11,-12,-13,-14,-15,-16,-17,-18,-19,-20,-21,-22,-59,-61,-48,-50,-58,-62,-60,-23,]),'TIMES':([1,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,50,51,52,53,57,59,60,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,95,96,98,101,103,104,105,107,108,],[29,-28,-29,-30,-31,-32,-33,-34,-35,-36,-37,-38,-39,-40,-41,-42,-43,-44,-45,-25,-24,-26,-27,-47,29,-49,29,29,29,-3,-4,-5,-6,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,-59,-61,29,-48,-50,-58,-62,-60,29,29,29,]),'DIVIDE':([1,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,50,51,52,53,57,59,60,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,95,96,98,101,103,104,105,107,108,],[30,-28,-29,-30,-31,-32,-33,-34,-35,-36,-37,-38,-39,-40,-41,-42,-43,-44,-45,-25,-24,-26,-27,-47,30,-49,30,30,30,-3,-4,-5,-6,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,30,-59,-61,30,-48,-50,-58,-62,-60,30,30,30,]),'EXP':([1,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,50,51,52,53,57,59,60,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,95,96,98,101,103,104,105,107,108,],[31,-28,-29,-30,-31,-32,-33,-34,-35,-36,-37,-38,-39,-40,-41,-42,-43,-44,-45,-25,-24,-26,-27,-47,31,-49,31,31,31,31,31,-5,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,-59,-61,31,-48,-50,-58,-62,-60,31,31,31,]),'MODULO':([1,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,50,51,52,53,57,59,60,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,95,96,98,101,103,104,105,107,108,],[32,-28,-29,-30,-31,-32,-33,-34,-35,-36,-37,-38,-39,-40,-41,-42,-43,-44,-45,-25,-24,-26,-27,-47,32,-49,32,32,32,-3,-4,-5,-6,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,-59,-61,32,-48,-50,-58,-62,-60,32,32,32,]),'LESS':([1,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,50,51,52,53,57,59,60,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,95,96,98,101,103,104,105,107,108,],[33,-28,-29,-30,-31,-32,-33,-34,-35,-36,-37,-38,-39,-40,-41,-42,-43,-44,-45,-25,-24,-26,-27,-47,33,-49,33,-1,-2,-3,-4,-5,-6,-7,-8,-9,-10,-11,-12,-13,33,33,33,33,33,33,33,33,33,33,-59,-61,33,-48,-50,-58,-62,-60,33,33,33,]),'LESS_EQUAL':([1,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,50,51,52,53,57,59,60,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,95,96,98,101,103,104,105,107,108,],[34,-28,-29,-30,-31,-32,-33,-34,-35,-36,-37,-38,-39,-40,-41,-42,-43,-44,-45,-25,-24,-26,-27,-47,34,-49,34,-1,-2,-3,-4,-5,-6,-7,-8,-9,-10,-11,-12,-13,34,34,34,34,34,34,34,34,34,34,-59,-61,34,-48,-50,-58,-62,-60,34,34,34,]),'GREATER':([1,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,50,51,52,53,57,59,60,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,95,96,98,101,103,104,105,107,108,],[35,-28,-29,-30,-31,-32,-33,-34,-35,-36,-37,-38,-39,-40,-41,-42,-43,-44,-45,-25,-24,-26,-27,-47,35,-49,35,-1,-2,-3,-4,-5,-6,-7,-8,-9,-10,-11,-12,-13,35,35,35,35,35,35,35,35,35,35,-59,-61,35,-48,-50,-58,-62,-60,35,35,35,]),'GREATER_EQUAL':([1,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,50,51,52,53,57,59,60,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,95,96,98,101,103,104,105,107,108,],[36,-28,-29,-30,-31,-32,-33,-34,-35,-36,-37,-38,-39,-40,-41,-42,-43,-44,-45,-25,-24,-26,-27,-47,36,-49,36,-1,-2,-3,-4,-5,-6,-7,-8,-9,-10,-11,-12,-13,36,36,36,36,36,36,36,36,36,36,-59,-61,36,-48,-50,-58,-62,-60,36,36,36,]),'LSHIFT':([1,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,50,51,52,53,57,59,60,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,95,96,98,101,103,104,105,107,108,],[37,-28,-29,-30,-31,-32,-33,-34,-35,-36,-37,-38,-39,-40,-41,-42,-43,-44,-45,-25,-24,-26,-27,-47,37,-49,37,-1,-2,-3,-4,-5,-6,37,37,37,37,-11,-12,-13,37,37,37,37,37,37,37,37,37,37,-59,-61,37,-48,-50,-58,-62,-60,37,37,37,]),'RSHIFT':([1,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,50,51,52,53,57,59,60,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,95,96,98,101,103,104,105,107,108,],[38,-28,-29,-30,-31,-32,-33,-34,-35,-36,-37,-38,-39,-40,-41,-42,-43,-44,-45,-25,-24,-26,-27,-47,38,-49,38,-1,-2,-3,-4,-5,-6,38,38,38,38,-11,-12,-13,38,38,38,38,38,38,38,38,38,38,-59,-61,38,-48,-50,-58,-62,-60,38,38,38,]),'ZFRSHIFT':([1,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,50,51,52,53,57,59,60,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,95,96,98,101,103,104,105,107,108,],[39,-28,-29,-30,-31,-32,-33,-34,-35,-36,-37,-38,-39,-40,-41,-42,-43,-44,-45,-25,-24,-26,-27,-47,39,-49,39,-1,-2,-3,-4,-5,-6,39,39,39,39,-11,-12,-13,39,39,39,39,39,39,39,39,39,39,-59,-61,39,-48,-50,-58,-62,-60,39,39,39,]),'EQUAL':([1,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,50,51,52,53,57,59,60,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,95,96,98,101,103,104,105,107,108,],[40,-28,-29,-30,-31,-32,-33,-34,-35,-36,-37,-38,-39,-40,-41,-42,-43,-44,-45,-25,-24,-26,-27,-47,40,-49,40,-1,-2,-3,-4,-5,-6,-7,-8,-9,-10,-11,-12,-13,-14,-15,-16,-17,40,40,40,40,40,40,-59,-61,40,-48,-50,-58,-62,-60,40,40,40,]),'IDENT':([1,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,50,51,52,53,57,59,60,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,95,96,98,101,103,104,105,107,108,],[41,-28,-29,-30,-31,-32,-33,-34,-35,-36,-37,-38,-39,-40,-41,-42,-43,-44,-45,-25,-24,-26,-27,-47,41,-49,41,-1,-2,-3,-4,-5,-6,-7,-8,-9,-10,-11,-12,-13,-14,-15,-16,-17,41,41,41,41,41,41,-59,-61,41,-48,-50,-58,-62,-60,41,41,41,]),'NEQUAL':([1,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,50,51,52,53,57,59,60,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,95,96,98,101,103,104,105,107,108,],[42,-28,-29,-30,-31,-32,-33,-34,-35,-36,-37,-38,-39,-40,-41,-42,-43,-44,-45,-25,-24,-26,-27,-47,42,-49,42,-1,-2,-3,-4,-5,-6,-7,-8,-9,-10,-11,-12,-13,-14,-15,-16,-17,42,42,42,42,42,42,-59,-61,42,-48,-50,-58,-62,-60,42,42,42,]),'NIDENT':([1,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,50,51,52,53,57,59,60,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,95,96,98,101,103,104,105,107,108,],[43,-28,-29,-30,-31,-32,-33,-34,-35,-36,-37,-38,-39,-40,-41,-42,-43,-44,-45,-25,-24,-26,-27,-47,43,-49,43,-1,-2,-3,-4,-5,-6,-7,-8,-9,-10,-11,-12,-13,-14,-15,-16,-17,43,43,43,43,43,43,-59,-61,43,-48,-50,-58,-62,-60,43,43,43,]),'BITWISE_AND':([1,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,50,51,52,53,57,59,60,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,95,96,98,101,103,104,105,107,108,],[44,-28,-29,-30,-31,-32,-33,-34,-35,-36,-37,-38,-39,-40,-41,-42,-43,-44,-45,-25,-24,-26,-27,-47,44,-49,44,-1,-2,-3,-4,-5,-6,-7,-8,-9,-10,-11,-12,-13,-14,-15,-16,-17,-18,44,44,44,44,44,-59,-61,44,-48,-50,-58,-62,-60,44,44,44,]),'BITWISE_OR':([1,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,50,51,52,53,57,59,60,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,95,96,98,101,103,104,105,107,108,],[45,-28,-29,-30,-31,-32,-33,-34,-35,-36,-37,-38,-39,-40,-41,-42,-43,-44,-45,-25,-24,-26,-27,-47,45,-49,45,-1,-2,-3,-4,-5,-6,-7,-8,-9,-10,-11,-12,-13,-14,-15,-16,-17,-18,-19,-20,45,45,45,-59,-61,45,-48,-50,-58,-62,-60,45,45,45,]),'BITWISE_XOR':([1,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,50,51,52,53,57,59,60,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,95,96,98,101,103,104,105,107,108,],[46,-28,-29,-30,-31,-32,-33,-34,-35,-36,-37,-38,-39,-40,-41,-42,-43,-44,-45,-25,-24,-26,-27,-47,46,-49,46,-1,-2,-3,-4,-5,-6,-7,-8,-9,-10,-11,-12,-13,-14,-15,-16,-17,-18,46,-20,46,46,46,-59,-61,46,-48,-50,-58,-62,-60,46,46,46,]),'LOGICAL_OR':([1,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,50,51,52,53,57,59,60,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,95,96,98,101,103,104,105,107,108,],[47,-28,-29,-30,-31,-32,-33,-34,-35,-36,-37,-38,-39,-40,-41,-42,-43,-44,-45,-25,-24,-26,-27,-47,47,-49,47,-1,-2,-3,-4,-5,-6,-7,-8,-9,-10,-11,-12,-13,-14,-15,-16,-17,-18,-19,-20,-21,-22,47,-59,-61,47,-48,-50,-58,-62,-60,47,47,47,]),'LOGICAL_AND':([1,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,50,51,52,53,57,59,60,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,95,96,98,101,103,104,105,107,108,],[48,-28,-29,-30,-31,-32,-33,-34,-35,-36,-37,-38,-39,-40,-41,-42,-43,-44,-45,-25,-24,-26,-27,-47,48,-49,48,-1,-2,-3,-4,-5,-6,-7,-8,-9,-10,-11,-12,-13,-14,-15,-16,-17,-18,-19,-20,48,-22,48,-59,-61,48,-48,-50,-58,-62,-60,48,48,48,]),'QUESTION':([1,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,50,51,52,53,57,59,60,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,95,96,98,101,103,104,105,107,108,],[49,-28,-29,-30,-31,-32,-33,-34,-35,-36,-37,-38,-39,-40,-41,-42,-43,-44,-45,-25,-24,-26,-27,-47,49,-49,49,-1,-2,-3,-4,-5,-6,-7,-8,-9,-10,-11,-12,-13,-14,-15,-16,-17,-18,-19,-20,-21,-22,49,-59,-61,49,-48,-50,-58,-62,-60,49,49,49,]),'RBRACKET':([6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,50,51,52,53,57,58,59,60,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,92,93,95,96,98,101,103,104,105,108,],[-28,-29,-30,-31,-32,-33,-34,-35,-36,-37,-38,-39,-40,-41,-42,-43,-44,-45,57,-25,-24,-26,-27,-47,96,-64,-49,-1,-2,-3,-4,-5,-6,-7,-8,-9,-10,-11,-12,-13,-14,-15,-16,-17,-18,-19,-20,-21,-22,-59,-61,104,-48,-50,-58,-62,-60,-63,-23,]),'COMMA':([6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,50,51,52,53,57,58,59,60,61,62,64,67,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,92,93,94,96,98,101,103,104,105,106,107,108,],[-28,-29,-30,-31,-32,-33,-34,-35,-36,-37,-38,-39,-40,-41,-42,-43,-44,-45,-25,-24,-26,-27,-47,97,-64,-49,99,-52,-54,-46,-1,-2,-3,-4,-5,-6,-7,-8,-9,-10,-11,-12,-13,-14,-15,-16,-17,-18,-19,-20,-21,-22,-59,-61,97,-48,-50,-58,-62,-60,-63,-51,-53,-23,]),'RPAREN':([6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,50,51,52,53,55,57,59,60,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,92,93,94,96,98,101,103,104,105,108,],[-28,-29,-30,-31,-32,-33,-34,-35,-36,-37,-38,-39,-40,-41,-42,-43,-44,-45,-25,-24,-26,-27,93,-47,-64,-49,101,-1,-2,-3,-4,-5,-6,-7,-8,-9,-10,-11,-12,-13,-14,-15,-16,-17,-18,-19,-20,-21,-22,-59,-61,103,-48,-50,-58,-62,-60,-63,-23,]),'COLON':([6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,50,51,52,53,57,60,63,64,65,66,67,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,96,98,101,103,104,108,],[-28,-29,-30,-31,-32,-33,-34,-35,-36,-37,-38,-39,-40,-41,-42,-43,-44,-45,-25,-24,-26,-27,-47,-49,100,-55,-56,-57,-46,-1,-2,-3,-4,-5,-6,-7,-8,-9,-10,-11,-12,-13,-14,-15,-16,-17,-18,-19,-20,-21,-22,102,-59,-61,-48,-50,-58,-62,-60,-23,]),'RBRACE':([6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,25,50,51,52,53,57,60,61,62,64,67,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,92,93,96,98,101,103,104,106,107,108,],[-28,-29,-30,-31,-32,-33,-34,-35,-36,-37,-38,-39,-40,-41,-42,-43,-44,-45,60,-25,-24,-26,-27,-47,-49,98,-52,-54,-46,-1,-2,-3,-4,-5,-6,-7,-8,-9,-10,-11,-12,-13,-14,-15,-16,-17,-18,-19,-20,-21,-22,-59,-61,-48,-50,-58,-62,-60,-51,-53,-23,]),'PERIOD':([6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,57,60,92,93,96,98,101,103,104,],[54,-29,-30,-31,-32,-33,-34,-35,-36,-37,-38,-39,-40,-41,-42,-43,-44,-45,-47,-49,-59,-61,-48,-50,-58,-62,-60,]),} 12 | 13 | _lr_action = {} 14 | for _k, _v in _lr_action_items.items(): 15 | for _x,_y in zip(_v[0],_v[1]): 16 | if not _x in _lr_action: _lr_action[_x] = {} 17 | _lr_action[_x][_k] = _y 18 | del _lr_action_items 19 | 20 | _lr_goto_items = {'expression':([0,2,3,4,5,24,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,55,56,97,100,102,],[1,50,51,52,53,59,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,59,95,105,107,108,]),'atom':([0,2,3,4,5,24,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,55,56,97,100,102,],[6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,]),'number':([0,2,3,4,5,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,55,56,97,99,100,102,],[7,7,7,7,7,7,66,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,66,7,7,]),'string':([0,2,3,4,5,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,55,56,97,99,100,102,],[8,8,8,8,8,8,65,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,65,8,8,]),'regex':([0,2,3,4,5,24,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,55,56,97,100,102,],[9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,]),'global':([0,2,3,4,5,24,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,55,56,97,100,102,],[10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,]),'list':([0,2,3,4,5,24,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,55,56,97,100,102,],[11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,]),'object':([0,2,3,4,5,24,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,55,56,97,100,102,],[12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,]),'group':([0,2,3,4,5,24,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,55,56,97,100,102,],[13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,]),'attraccess':([0,2,3,4,5,24,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,55,56,97,100,102,],[14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,]),'functioncall':([0,2,3,4,5,24,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,55,56,97,100,102,],[15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,]),'indexing':([0,2,3,4,5,24,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,55,56,97,100,102,],[16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,]),'arglist':([24,55,],[58,94,]),'objectarglist':([25,],[61,]),'objectarg':([25,99,],[62,106,]),'objectkey':([25,99,],[63,63,]),'name':([25,99,],[64,64,]),} 21 | 22 | _lr_goto = {} 23 | for _k, _v in _lr_goto_items.items(): 24 | for _x, _y in zip(_v[0], _v[1]): 25 | if not _x in _lr_goto: _lr_goto[_x] = {} 26 | _lr_goto[_x][_k] = _y 27 | del _lr_goto_items 28 | _lr_productions = [ 29 | ("S' -> expression","S'",1,None,None,None), 30 | ('expression -> expression PLUS expression','expression',3,'p_expression_binop','_parser.py',197), 31 | ('expression -> expression MINUS expression','expression',3,'p_expression_binop','_parser.py',198), 32 | ('expression -> expression TIMES expression','expression',3,'p_expression_binop','_parser.py',199), 33 | ('expression -> expression DIVIDE expression','expression',3,'p_expression_binop','_parser.py',200), 34 | ('expression -> expression EXP expression','expression',3,'p_expression_binop','_parser.py',201), 35 | ('expression -> expression MODULO expression','expression',3,'p_expression_binop','_parser.py',202), 36 | ('expression -> expression LESS expression','expression',3,'p_expression_binop','_parser.py',203), 37 | ('expression -> expression LESS_EQUAL expression','expression',3,'p_expression_binop','_parser.py',204), 38 | ('expression -> expression GREATER expression','expression',3,'p_expression_binop','_parser.py',205), 39 | ('expression -> expression GREATER_EQUAL expression','expression',3,'p_expression_binop','_parser.py',206), 40 | ('expression -> expression LSHIFT expression','expression',3,'p_expression_binop','_parser.py',207), 41 | ('expression -> expression RSHIFT expression','expression',3,'p_expression_binop','_parser.py',208), 42 | ('expression -> expression ZFRSHIFT expression','expression',3,'p_expression_binop','_parser.py',209), 43 | ('expression -> expression EQUAL expression','expression',3,'p_expression_binop','_parser.py',210), 44 | ('expression -> expression IDENT expression','expression',3,'p_expression_binop','_parser.py',211), 45 | ('expression -> expression NEQUAL expression','expression',3,'p_expression_binop','_parser.py',212), 46 | ('expression -> expression NIDENT expression','expression',3,'p_expression_binop','_parser.py',213), 47 | ('expression -> expression BITWISE_AND expression','expression',3,'p_expression_binop','_parser.py',214), 48 | ('expression -> expression BITWISE_OR expression','expression',3,'p_expression_binop','_parser.py',215), 49 | ('expression -> expression BITWISE_XOR expression','expression',3,'p_expression_binop','_parser.py',216), 50 | ('expression -> expression LOGICAL_OR expression','expression',3,'p_expression_binop','_parser.py',217), 51 | ('expression -> expression LOGICAL_AND expression','expression',3,'p_expression_binop','_parser.py',218), 52 | ('expression -> expression QUESTION expression COLON expression','expression',5,'p_expression_ternary','_parser.py',223), 53 | ('expression -> MINUS expression','expression',2,'p_expression_unaryop','_parser.py',228), 54 | ('expression -> PLUS expression','expression',2,'p_expression_unaryop','_parser.py',229), 55 | ('expression -> BITWISE_NOT expression','expression',2,'p_expression_unaryop','_parser.py',230), 56 | ('expression -> LOGICAL_NOT expression','expression',2,'p_expression_unaryop','_parser.py',231), 57 | ('expression -> atom','expression',1,'p_expression_atom','_parser.py',237), 58 | ('atom -> number','atom',1,'p_atom','_parser.py',243), 59 | ('atom -> string','atom',1,'p_atom','_parser.py',244), 60 | ('atom -> regex','atom',1,'p_atom','_parser.py',245), 61 | ('atom -> global','atom',1,'p_atom','_parser.py',246), 62 | ('atom -> list','atom',1,'p_atom','_parser.py',247), 63 | ('atom -> object','atom',1,'p_atom','_parser.py',248), 64 | ('atom -> group','atom',1,'p_atom','_parser.py',249), 65 | ('atom -> attraccess','atom',1,'p_atom','_parser.py',250), 66 | ('atom -> functioncall','atom',1,'p_atom','_parser.py',251), 67 | ('atom -> indexing','atom',1,'p_atom','_parser.py',252), 68 | ('number -> HEX','number',1,'p_number','_parser.py',258), 69 | ('number -> OCTAL','number',1,'p_number','_parser.py',259), 70 | ('number -> BINARY','number',1,'p_number','_parser.py',260), 71 | ('number -> FLOAT','number',1,'p_number','_parser.py',261), 72 | ('string -> STRING','string',1,'p_string','_parser.py',266), 73 | ('regex -> REGEX','regex',1,'p_regex','_parser.py',270), 74 | ('global -> NAME','global',1,'p_global','_parser.py',274), 75 | ('name -> NAME','name',1,'p_name','_parser.py',278), 76 | ('list -> LBRACKET RBRACKET','list',2,'p_list','_parser.py',283), 77 | ('list -> LBRACKET arglist RBRACKET','list',3,'p_list','_parser.py',284), 78 | ('object -> LBRACE RBRACE','object',2,'p_object','_parser.py',293), 79 | ('object -> LBRACE objectarglist RBRACE','object',3,'p_object','_parser.py',294), 80 | ('objectarglist -> objectarglist COMMA objectarg','objectarglist',3,'p_objectarglist','_parser.py',303), 81 | ('objectarglist -> objectarg','objectarglist',1,'p_objectarglist','_parser.py',304), 82 | ('objectarg -> objectkey COLON expression','objectarg',3,'p_objectarg','_parser.py',313), 83 | ('objectarg -> name','objectarg',1,'p_objectarg','_parser.py',314), 84 | ('objectkey -> name','objectkey',1,'p_objectkey','_parser.py',323), 85 | ('objectkey -> string','objectkey',1,'p_objectkey','_parser.py',324), 86 | ('objectkey -> number','objectkey',1,'p_objectkey','_parser.py',325), 87 | ('group -> LPAREN expression RPAREN','group',3,'p_group','_parser.py',330), 88 | ('attraccess -> atom PERIOD NAME','attraccess',3,'p_attraccess','_parser.py',334), 89 | ('indexing -> atom LBRACKET expression RBRACKET','indexing',4,'p_indexing','_parser.py',338), 90 | ('functioncall -> atom LPAREN RPAREN','functioncall',3,'p_functioncall','_parser.py',343), 91 | ('functioncall -> atom LPAREN arglist RPAREN','functioncall',4,'p_functioncall','_parser.py',344), 92 | ('arglist -> arglist COMMA expression','arglist',3,'p_arglist','_parser.py',353), 93 | ('arglist -> expression','arglist',1,'p_arglist','_parser.py',354), 94 | ] 95 | --------------------------------------------------------------------------------