├── pdvega ├── tests │ ├── __init__.py │ ├── test_advanced.py │ ├── utils.py │ ├── test_utils.py │ ├── test_plotting.py │ ├── test_core_common.py │ └── test_core.py ├── themes.py ├── __init__.py ├── _pandas_internals.py ├── _utils.py ├── plotting.py └── _core.py ├── doc ├── sphinxext │ └── pdvega_ext │ │ ├── __init__.py │ │ ├── utils.py │ │ └── pdvegaplot.py ├── requirements.txt ├── _static │ ├── favicon.ico │ ├── pdvega-plot.css │ └── theme_overrides.css ├── .gitignore ├── API.rst ├── Makefile ├── sync_website.sh ├── installation.rst ├── plotting.rst ├── advanced.rst ├── index.rst ├── make.bat ├── core.rst └── conf.py ├── requirements_dev.txt ├── requirements.txt ├── binder ├── postBuild └── requirements.txt ├── images ├── mpl-scatter.png └── vg-scatter.png ├── MANIFEST.in ├── setup.cfg ├── Makefile ├── .travis.yml ├── CHANGES.md ├── RELEASING.md ├── LICENSE ├── .gitignore ├── setup.py └── README.md /pdvega/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /doc/sphinxext/pdvega_ext/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /requirements_dev.txt: -------------------------------------------------------------------------------- 1 | pytest 2 | flake8 3 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | altair 2 | numpy 3 | pandas 4 | scipy 5 | -------------------------------------------------------------------------------- /binder/postBuild: -------------------------------------------------------------------------------- 1 | jupyter nbextension install --sys-prefix --py vega3 2 | -------------------------------------------------------------------------------- /binder/requirements.txt: -------------------------------------------------------------------------------- 1 | scipy 2 | matplotlib 3 | pdvega==0.1 4 | vega_datasets 5 | -------------------------------------------------------------------------------- /doc/requirements.txt: -------------------------------------------------------------------------------- 1 | sphinx 2 | jinja2 3 | numpydoc 4 | vega3 5 | vega_datasets 6 | pandas 7 | -------------------------------------------------------------------------------- /images/mpl-scatter.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/altair-viz/pdvega/HEAD/images/mpl-scatter.png -------------------------------------------------------------------------------- /images/vg-scatter.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/altair-viz/pdvega/HEAD/images/vg-scatter.png -------------------------------------------------------------------------------- /doc/_static/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/altair-viz/pdvega/HEAD/doc/_static/favicon.ico -------------------------------------------------------------------------------- /doc/.gitignore: -------------------------------------------------------------------------------- 1 | _build 2 | 3 | # gallery is auto-generated; don't version-control it 4 | gallery 5 | _images -------------------------------------------------------------------------------- /pdvega/themes.py: -------------------------------------------------------------------------------- 1 | import altair as alt 2 | 3 | 4 | def enable(theme): 5 | '''set altair to use''' 6 | alt.themes.enable(theme) 7 | -------------------------------------------------------------------------------- /doc/API.rst: -------------------------------------------------------------------------------- 1 | API Reference 2 | ============= 3 | 4 | .. automodule:: pdvega 5 | :members: 6 | :imported-members: 7 | :undoc-members: 8 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include *.md 2 | include LICENSE 3 | include Makefile 4 | recursive-include images/*.png 5 | recursive-include pdvega *.py *.json *.csv *.tsv 6 | -------------------------------------------------------------------------------- /doc/_static/pdvega-plot.css: -------------------------------------------------------------------------------- 1 | .vega-actions a { 2 | margin-right: 12px; 3 | color: #757575; 4 | font-weight: normal; 5 | font-size: 13px; 6 | } 7 | 8 | .vega-embed { 9 | margin-bottom: 20px; 10 | margin-top: 20px; 11 | } 12 | 13 | -------------------------------------------------------------------------------- /pdvega/__init__.py: -------------------------------------------------------------------------------- 1 | # flake8: noqa 2 | import altair as alt 3 | from . import plotting, themes 4 | from ._core import FramePlotMethods, SeriesPlotMethods 5 | from .plotting import scatter_matrix, andrews_curves, parallel_coordinates, lag_plot 6 | 7 | __version__ = '0.2.01.dev0' 8 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [flake8] 2 | ignore = 3 | E # Ignore all PEP8 rules 4 | W # Ignore all whitespace rules 5 | F811 # Ignore variable redefinitions until https://github.com/altair-viz/altair/issues/734 is resolved 6 | 7 | [metadata] 8 | description-file = README.md 9 | license_file = LICENSE 10 | 11 | 12 | [bdist_wheel] 13 | universal = 1 -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | all: 2 | install 3 | 4 | install: 5 | python setup.py install 6 | 7 | test: 8 | python -m pytest --pyargs --doctest-modules pdvega 9 | 10 | test-coverage: 11 | python -m pytest --pyargs --doctest-modules --cov=pdvega --cov-report term pdvega 12 | 13 | test-coverage-html: 14 | python -m pytest --pyargs --doctest-modules --cov=pdvega --cov-report html pdvega 15 | -------------------------------------------------------------------------------- /pdvega/tests/test_advanced.py: -------------------------------------------------------------------------------- 1 | import pdvega # noqa 2 | import pandas as pd 3 | 4 | 5 | def test_advanced(): 6 | df = pd.Series(range(10)) 7 | plot = df.vgplot.line() 8 | 9 | plot['encoding']['x']['scale'] = {'zero': False} 10 | spec = plot.to_dict() 11 | assert 'scale' in spec['encoding']['x'] 12 | assert spec['encoding']['x']['scale']['zero'] is False 13 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | 3 | python: 4 | - 2.7 5 | - 3.5 6 | - 3.6 7 | 8 | env: 9 | global: 10 | - TEST_DIR=/tmp/_pdvega/ 11 | 12 | before_install: 13 | - pip install pip --upgrade 14 | - pip install -U pytest 15 | 16 | install: 17 | - pip install -e .[dev] 18 | 19 | script: 20 | - flake8 ./ 21 | - mkdir -p $TEST_DIR 22 | - cd $TEST_DIR && python -m pytest --pyargs --doctest-modules pdvega 23 | -------------------------------------------------------------------------------- /doc/_static/theme_overrides.css: -------------------------------------------------------------------------------- 1 | /* override table width restrictions */ 2 | @media screen and (min-width: 767px) { 3 | 4 | .wy-table-responsive table td { 5 | /* !important prevents the common CSS stylesheets from overriding 6 | this as on RTD they are loaded after this stylesheet */ 7 | white-space: normal !important; 8 | } 9 | 10 | .wy-table-responsive { 11 | overflow: visible !important; 12 | } 13 | } 14 | 15 | .rst-content dl:not(.docutils) dt em { 16 | font-style: normal; !important; 17 | line-height: 1.4em; !important; 18 | } -------------------------------------------------------------------------------- /doc/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | SPHINXPROJ = pdvega 8 | SOURCEDIR = . 9 | BUILDDIR = _build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) -------------------------------------------------------------------------------- /pdvega/tests/utils.py: -------------------------------------------------------------------------------- 1 | IGNORE = object() 2 | 3 | 4 | def check_encodings(chart, **fields): 5 | edict = chart.encoding.to_dict() 6 | assert set(edict.keys()) == set(fields.keys()) 7 | for encoding, expected_field in fields.items(): 8 | if expected_field is IGNORE: 9 | continue 10 | 11 | actual_field = edict[encoding]['field'] 12 | if actual_field != expected_field: 13 | raise ValueError("Expected '{0}' encoding to be '{1}'; got '{2}'" 14 | "".format(encoding, expected_field, actual_field)) 15 | 16 | 17 | def get_data(chart): 18 | return chart.data 19 | 20 | 21 | def validate_vegalite(chart): 22 | assert chart.to_dict(validate=True) 23 | -------------------------------------------------------------------------------- /CHANGES.md: -------------------------------------------------------------------------------- 1 | Change Log 2 | ========== 3 | 4 | 5 | Release v0.2 (Unreleased) 6 | ------------------------- 7 | - Fixed x-axis of Andrews curves 8 | - Added layering support via the ``ax`` argument to all non-compound plot types 9 | 10 | Release v0.1 (January 31, 2018) 11 | ------------------------------- 12 | 13 | - Initial release: 14 | 15 | Basic plot methods: 16 | 17 | - data.vgplot.line() 18 | - data.vgplot.scatter() 19 | - data.vgplot.area() 20 | - data.vgplot.bar() 21 | - data.vgplot.barh() 22 | - data.vgplot.hist() 23 | - data.vgplot.kde() 24 | - data.vgplot.denity() 25 | - data.vgplot.hexbin() 26 | - data.vgplot.heatmap() 27 | 28 | Specialized plot methods 29 | 30 | - pdvega.plotting.scatter_matrix() 31 | - pdvega.plotting.andrews_curves() 32 | - pdvega.plotting.parallel_coordinates() 33 | - pdvega.plotting.lag_plot() 34 | 35 | Plot objects: 36 | 37 | - pdvega.Axes 38 | -------------------------------------------------------------------------------- /doc/sync_website.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # clean the build directory & make the website with the right internal addresses 4 | make clean 5 | sphinx-build -M html . _build -D pdvegaplot_url_root="https://altair-viz.github.io/pdvega/" 6 | 7 | # get git hash for commit message 8 | GITHASH=$(git rev-parse HEAD) 9 | MSG="doc build for commit $GITHASH" 10 | cd _build 11 | 12 | # clone the repo if needed 13 | if test -d pdvega; 14 | then echo "using existing cloned pdvega directory"; 15 | else git clone git@github.com:altair-viz/pdvega.git; 16 | fi 17 | 18 | # sync the website 19 | cd pdvega 20 | git checkout gh-pages 21 | git pull 22 | 23 | # remove all tracked files 24 | git ls-files -z | xargs -0 rm -f 25 | 26 | # sync files from html build 27 | rsync -r ../html/ ./ 28 | 29 | # ensure there is a nojekyl file for github pages 30 | touch .nojekyll 31 | 32 | # add commit, and push to github 33 | git add . --all 34 | git commit -m "$MSG" 35 | git push origin gh-pages 36 | -------------------------------------------------------------------------------- /RELEASING.md: -------------------------------------------------------------------------------- 1 | 1. Update version in pdvega/__init__.py to, e.g. 0.2 2 | 3 | 2. Update version in doc/conf.py (in two places!) 4 | 5 | 3. Make sure CHANGES.md is up to date for the release 6 | 7 | 4. Commit change and push to master 8 | 9 | git add . -u 10 | git commit -m "MAINT: bump version to 0.2" 11 | git push origin master 12 | 13 | 5. Tag the release: 14 | 15 | git tag -a v0.2 -m "version 0.2 release" 16 | git push origin v0.2 17 | 18 | 6. publish to PyPI (Requires correct PyPI owner permissions) 19 | 20 | python setup.py sdist upload 21 | 22 | 7. Build and push the docs website: 23 | 24 | python setup.py install 25 | cd doc 26 | bash sync_website.sh 27 | 28 | 8. update version in pdvega/__init__.py to, e.g. 0.3.0dev0 29 | 30 | 9. update version in doc/conf.py (in two places!) 31 | 32 | 10. add a new changelog entry for the unreleased version 33 | 34 | 11. Commit change and push to master 35 | 36 | git add . -u 37 | git commit -m "MAINT: bump version to 0.3.0dev" 38 | git push origin master 39 | 40 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 Jake Vanderplas 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /pdvega/_pandas_internals.py: -------------------------------------------------------------------------------- 1 | # flake8: noqa 2 | from pandas.core.base import PandasObject 3 | 4 | try: 5 | from pandas.api.types import infer_dtype as infer_dtype 6 | _infer_dtype_kwds = {'skipna': False} 7 | except ImportError: # Pandas before 0.20.0 8 | from pandas.lib import infer_dtype as infer_dtype 9 | _infer_dtype_kwds = {} 10 | 11 | from pandas import DataFrame, Series 12 | 13 | try: 14 | # Import register decorators from pandas >= 0.23 15 | from pandas.api.extensions import (register_dataframe_accessor, 16 | register_series_accessor) 17 | except ImportError: 18 | try: 19 | from pandas.core.accessor import AccessorProperty 20 | except ImportError: # Pandas before 0.22.0 21 | from pandas.core.base import AccessorProperty 22 | 23 | # Define register decorators for pandas < 0.23 24 | class register_dataframe_accessor(object): 25 | """Register custom accessor on DataFrame.""" 26 | 27 | def __init__(self, name): 28 | self.name = name 29 | 30 | def __call__(self, accessor): 31 | setattr(DataFrame, self.name, AccessorProperty(accessor, accessor)) 32 | return accessor 33 | 34 | class register_series_accessor(object): 35 | """Register custom accessor on Series.""" 36 | 37 | def __init__(self, name): 38 | self.name = name 39 | 40 | def __call__(self, accessor): 41 | setattr(Series, self.name, AccessorProperty(accessor, accessor)) 42 | return accessor 43 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .pytest_cache/* 48 | **/.pytest_cache/* 49 | .hypothesis/ 50 | 51 | # Translations 52 | *.mo 53 | *.pot 54 | 55 | # Django stuff: 56 | *.log 57 | local_settings.py 58 | 59 | # Flask stuff: 60 | instance/ 61 | .webassets-cache 62 | 63 | # Scrapy stuff: 64 | .scrapy 65 | 66 | # Sphinx documentation 67 | docs/_build/ 68 | 69 | # PyBuilder 70 | target/ 71 | 72 | # Jupyter Notebook 73 | .ipynb_checkpoints 74 | 75 | # pyenv 76 | .python-version 77 | 78 | # celery beat schedule file 79 | celerybeat-schedule 80 | 81 | # SageMath parsed files 82 | *.sage.py 83 | 84 | # dotenv 85 | .env 86 | 87 | # virtualenv 88 | .venv 89 | venv/ 90 | ENV/ 91 | 92 | # Spyder project settings 93 | .spyderproject 94 | .spyproject 95 | 96 | # Rope project settings 97 | .ropeproject 98 | 99 | # mkdocs documentation 100 | /site 101 | 102 | # mypy 103 | .mypy_cache/ 104 | 105 | # emacs 106 | *~ 107 | 108 | # notebooks 109 | Untitled*.ipynb 110 | 111 | # macs 112 | **/.DS_Store 113 | .DS_Store 114 | -------------------------------------------------------------------------------- /doc/sphinxext/pdvega_ext/utils.py: -------------------------------------------------------------------------------- 1 | import ast 2 | import json 3 | import importlib 4 | from hashlib import md5 5 | 6 | from itertools import tee, chain 7 | 8 | 9 | def dict_hash(dct): 10 | """Return a hash of the contents of a dictionary""" 11 | dct_s = json.dumps(dct, sort_keys=True) 12 | 13 | try: 14 | m = md5(dct_s) 15 | except TypeError: 16 | m = md5(dct_s.encode()) 17 | 18 | return m.hexdigest() 19 | 20 | 21 | def exec_then_eval(code, namespace=None): 22 | """Exec a code block & return evaluation of the last line""" 23 | # TODO: make this less brittle. 24 | namespace = namespace or {} 25 | 26 | block = ast.parse(code, mode='exec') 27 | last = ast.Expression(block.body.pop().value) 28 | 29 | exec(compile(block, '', mode='exec'), namespace) 30 | return eval(compile(last, '', mode='eval'), namespace) 31 | 32 | 33 | def import_obj(clsname, default_module=None): 34 | """ 35 | Import the object given by clsname. 36 | If default_module is specified, import from this module. 37 | """ 38 | if default_module is not None: 39 | if not clsname.startswith(default_module + '.'): 40 | clsname = '{0}.{1}'.format(default_module, clsname) 41 | mod, clsname = clsname.rsplit('.', 1) 42 | mod = importlib.import_module(mod) 43 | try: 44 | obj = getattr(mod, clsname) 45 | except AttributeError: 46 | raise ImportError('Cannot import {0} from {1}'.format(clsname, mod)) 47 | return obj 48 | 49 | 50 | 51 | def strip_vl_extension(filename): 52 | """Strip the vega-lite extension (either vl.json or json) from filename""" 53 | for ext in ['.vl.json', '.json']: 54 | if filename.endswith(ext): 55 | return filename[:-len(ext)] 56 | else: 57 | return filename 58 | 59 | 60 | def prev_this_next(it, sentinel=None): 61 | """Utility to return (prev, this, next) tuples from an iterator""" 62 | i1, i2, i3 = tee(it, 3) 63 | next(i3, None) 64 | return zip(chain([sentinel], i1), i2, chain(i3, [sentinel])) 65 | -------------------------------------------------------------------------------- /pdvega/tests/test_utils.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | import pandas as pd 4 | import numpy as np 5 | 6 | from pdvega._utils import infer_vegalite_type, unpivot_frame, validate_aggregation 7 | 8 | test_cases = [ 9 | (pd.Series(np.random.rand(20)), 'quantitative'), 10 | (pd.Series(range(4)), 'ordinal'), 11 | (pd.Series(range(40)), 'quantitative'), 12 | (pd.Series(['A', 'B', 'C', 'D']), 'nominal'), 13 | (pd.Categorical(['a', 'b', 'c']), 'nominal'), 14 | (pd.date_range('2017', freq='D', periods=10), 'temporal'), 15 | (pd.timedelta_range(0, periods=7), 'temporal') 16 | ] 17 | 18 | 19 | @pytest.mark.parametrize('data,type', test_cases) 20 | def test_infer_vegalite_type(data, type): 21 | assert infer_vegalite_type(data) == type 22 | 23 | 24 | def test_unpivot(): 25 | frame = pd.DataFrame({'x': range(10), 'y': range(10), 'z': range(10)}) 26 | df = unpivot_frame(frame, var_name='foo', value_name='bar') 27 | assert list(df.columns) == ['index', 'foo', 'bar'] 28 | assert set(pd.unique(df['foo'])) == {'x', 'y', 'z'} 29 | 30 | df = unpivot_frame(frame, x='x') 31 | assert list(df.columns) == ['x', 'variable', 'value'] 32 | assert set(pd.unique(df['variable'])) == {'y', 'z'} 33 | 34 | df = unpivot_frame(frame, y='y') 35 | assert list(df.columns) == ['index', 'variable', 'value'] 36 | assert set(pd.unique(df['variable'])) == {'y'} 37 | 38 | df = unpivot_frame(frame, y=('y', 'z')) 39 | assert list(df.columns) == ['index', 'variable', 'value'] 40 | assert set(pd.unique(df['variable'])) == {'y', 'z'} 41 | 42 | df = unpivot_frame(frame, x=('x', 'y'), y='z') 43 | assert list(df.columns) == ['x', 'y', 'variable', 'value'] 44 | assert set(pd.unique(df['variable'])) == {'z'} 45 | 46 | 47 | def test_unpivot_bad_cols(): 48 | frame = pd.DataFrame({'x': range(10), 'y': range(10)}) 49 | 50 | with pytest.raises(KeyError): 51 | unpivot_frame(frame, x='foo') 52 | 53 | with pytest.raises(KeyError): 54 | unpivot_frame(frame, y='foo') 55 | 56 | with pytest.raises(KeyError): 57 | unpivot_frame(frame, x=('x', 'foo')) 58 | 59 | with pytest.raises(KeyError): 60 | unpivot_frame(frame, y=('y', 'foo')) 61 | 62 | 63 | def test_validate_aggregation(): 64 | string_cases = ['max', 'min', 'mean', 'median', 'count', 'sum'] 65 | func_cases = {np.min: 'min', min: 'min', 66 | np.max: 'max', max: 'max', 67 | np.sum: 'sum', sum: 'sum', 68 | np.median: 'median', np.mean: 'mean'} 69 | 70 | for case in string_cases: 71 | assert validate_aggregation(case) == case 72 | 73 | for case, result in func_cases.items(): 74 | assert validate_aggregation(case) == result 75 | 76 | assert validate_aggregation(None) is None 77 | 78 | with pytest.raises(ValueError) as err: 79 | validate_aggregation('blah') 80 | assert str(err.value).startswith("Unrecognized Vega-Lite aggregation") 81 | 82 | with pytest.raises(ValueError) as err: 83 | validate_aggregation(np.array) 84 | assert str(err.value).startswith("Unrecognized Vega-Lite aggregation") 85 | -------------------------------------------------------------------------------- /doc/installation.rst: -------------------------------------------------------------------------------- 1 | .. _installation: 2 | 3 | Installing and Using ``pdvega`` 4 | =============================== 5 | 6 | To install and use ``pdvega`` run the following commands: 7 | 8 | .. code-block:: bash 9 | 10 | $ pip install pdvega 11 | $ jupyter nbextension install --sys-prefix --py vega3 12 | 13 | The first command installs the `pdvega `_ 14 | Python package along with its dependencies (`Pandas`_ and `vega3`_). 15 | The second command above installs the `vega3`_ Jupyter notebook extension, which 16 | is required for ``pdvega`` plots to display automatically in the notebook. 17 | 18 | Using ``pdvega`` in the Jupyter Notebook 19 | ---------------------------------------- 20 | When ``pdvega`` and ``vega3`` are correctly installed, you can create a 21 | visualization within the Jupyter notebook by executing a cell with a plot 22 | command as the last statement in the cell. For example: 23 | 24 | .. pdvega-plot:: 25 | 26 | import pandas as pd 27 | import pdvega # adds vgplot attribute to Pandas objects 28 | 29 | data = pd.Series([1,2,3,2,3,4,3,4,5]) 30 | data.vgplot() 31 | 32 | You can also explicitly call the ``plot.display()`` method to display a plot 33 | saved in a variable: 34 | 35 | .. code-block:: python 36 | 37 | plot = data.vgplot() 38 | plot.display() 39 | 40 | .. pdvega-plot:: 41 | :hide-code: 42 | 43 | import pandas as pd 44 | import pdvega # adds vgplot attribute to Pandas objects 45 | 46 | data = pd.Series([1,2,3,2,3,4,3,4,5]) 47 | data.vgplot() 48 | 49 | 50 | Using ``pdvega`` in JupyterLab 51 | ------------------------------ 52 | `JupyterLab`_ is the next phase 53 | of evolution for the Jupyter notebook. For reasons related to its under-the-hood 54 | implementation, the current version of ``pdvega`` will not work in JupyterLab: the 55 | main reason is that the new MIME-based rendering used by JupyterLab is not yet supported 56 | in the `vega3`_ library that ``pdvega`` depends on. 57 | 58 | We hope to address this incompatibility soon! 59 | 60 | 61 | Using ``pdvega`` Outside Jupyter 62 | -------------------------------- 63 | If you wish to use ``pdvega`` outside the Jupyter notebook, you can save the 64 | plot specification to a JSON file: 65 | 66 | .. code-block:: python 67 | 68 | import json 69 | plot = data.vgplot() 70 | json.dump(plot.spec, 'plot.json') 71 | 72 | The resulting plot specification can then be rendered within an HTML page 73 | using the `vega-embed`_ Javascript package. 74 | 75 | Saving Visualizations to PNG or SVG 76 | ----------------------------------- 77 | To save a visualization to PNG, you can use the link generated below the 78 | rendered plot. Programmatic saving of figures is not currently supported 79 | from within Python, though it is possible using the ``vl2png`` and ``vl2svg`` 80 | command-line tools provided in the `vega-lite`_ npm package. 81 | 82 | 83 | .. _Jupyter notebook: http://jupyter.org/ 84 | .. _JupyterLab: http://jupyterlab-tutorial.readthedocs.io/en/latest/ 85 | .. _Pandas: http://pandas.pydata.org/ 86 | .. _vega3: http://pypi.python.org/pypi/vega3/ 87 | .. _vega-embed: https://vega.github.io/vega-lite/usage/embed.html 88 | .. _vega-lite: https://github.com/vega/vega-lite 89 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import io 2 | import os 3 | import re 4 | 5 | from setuptools import setup 6 | 7 | 8 | def read(path, encoding='utf-8'): 9 | path = os.path.join(os.path.dirname(__file__), path) 10 | with io.open(path, encoding=encoding) as fp: 11 | return fp.read() 12 | 13 | 14 | def get_install_requirements(path): 15 | content = read(path) 16 | return [ 17 | req 18 | for req in content.split("\n") 19 | if req != '' and not req.startswith('#') 20 | ] 21 | 22 | 23 | def version(path): 24 | """Obtain the packge version from a python file e.g. pkg/__init__.py 25 | See . 26 | """ 27 | version_file = read(path) 28 | version_match = re.search(r"""^__version__ = ['"]([^'"]*)['"]""", 29 | version_file, re.M) 30 | if version_match: 31 | return version_match.group(1) 32 | raise RuntimeError("Unable to find version string.") 33 | 34 | 35 | HERE = os.path.abspath(os.path.dirname(__file__)) 36 | 37 | # From https://github.com/jupyterlab/jupyterlab/blob/master/setupbase.py, BSD licensed 38 | def find_packages(top=HERE): 39 | """ 40 | Find all of the packages. 41 | """ 42 | packages = [] 43 | for d, dirs, _ in os.walk(top, followlinks=True): 44 | if os.path.exists(os.path.join(d, '__init__.py')): 45 | packages.append(os.path.relpath(d, top).replace(os.path.sep, '.')) 46 | elif d != top: 47 | # Do not look for packages in subfolders if current is not a package 48 | dirs[:] = [] 49 | return packages 50 | 51 | 52 | DESCRIPTION = "Pandas plotting interface to Vega and Vega-Lite" 53 | LONG_DESCRIPTION = """ 54 | pdvega makes it easy to create Vega-Lite plots from pandas dataframes, 55 | using the familiar pandas visualization API. For more information, see 56 | the `pdvega documentation `_. 57 | """ 58 | NAME = "pdvega" 59 | AUTHOR = "Jake VanderPlas" 60 | AUTHOR_EMAIL = "jakevdp@gmail.com" 61 | MAINTAINER = "Jake VanderPlas" 62 | MAINTAINER_EMAIL = "jakevdp@gmail.com" 63 | URL = 'http://altair-viz.github.io/pdvega/' 64 | DOWNLOAD_URL = 'http://github.com/altair-viz/pdvega/' 65 | LICENSE = 'MIT' 66 | INSTALL_REQUIRES = get_install_requirements("requirements.txt") 67 | DEV_REQUIRES = get_install_requirements("requirements_dev.txt") 68 | PACKAGES = find_packages() 69 | VERSION = version('pdvega/__init__.py') 70 | 71 | setup(name=NAME, 72 | version=VERSION, 73 | description=DESCRIPTION, 74 | long_description=LONG_DESCRIPTION, 75 | author=AUTHOR, 76 | author_email=AUTHOR_EMAIL, 77 | maintainer=MAINTAINER, 78 | maintainer_email=MAINTAINER_EMAIL, 79 | url=URL, 80 | download_url=DOWNLOAD_URL, 81 | license=LICENSE, 82 | install_requires=INSTALL_REQUIRES, 83 | extras_require={ 84 | 'dev': DEV_REQUIRES 85 | }, 86 | packages=PACKAGES, 87 | include_package_data=True, 88 | classifiers=[ 89 | 'Development Status :: 4 - Beta', 90 | 'Environment :: Console', 91 | 'Intended Audience :: Science/Research', 92 | 'License :: OSI Approved :: BSD License', 93 | 'Natural Language :: English', 94 | 'Programming Language :: Python :: 2.7', 95 | 'Programming Language :: Python :: 3.5', 96 | 'Programming Language :: Python :: 3.6'], 97 | ) 98 | -------------------------------------------------------------------------------- /doc/plotting.rst: -------------------------------------------------------------------------------- 1 | .. _statistical-plotting: 2 | 3 | Statistical Visualization with ``pdvega.plotting`` 4 | ================================================== 5 | 6 | In addition to the basic plots made available by the ``vgplot`` interface, 7 | ``pdvega.plotting`` makes available some more sophisticated plotting types 8 | that mirror those available in `pandas.plotting`_. 9 | 10 | This section will outline a few of these. 11 | 12 | .. pdvega-setup:: 13 | 14 | import pdvega 15 | from vega_datasets import data 16 | iris = data.iris() 17 | stocks = data.stocks(pivoted=True) 18 | 19 | .. _pdvega-scatter-matrix: 20 | 21 | Scatter Matrix 22 | -------------- 23 | 24 | For multi-dimensional data, it is difficult to capture all the relevant data 25 | features using a simple scatter plot. For data with several attributes, it can 26 | be useful to visualize the pairwise relationships between all pairs of dimensions. 27 | This is done by ``pdvega.scatter_matrix``, which has an API based on 28 | :func:`pandas.plotting.scatter_matrix`: 29 | 30 | .. pdvega-plot:: 31 | 32 | pdvega.scatter_matrix(iris, "species", figsize=(7, 7)) 33 | 34 | Notice that this version is interactive in two ways: if you click and drag on 35 | any frame of the plot, all frames scales are dynamically adjusted in concert. 36 | Further, if you hold the SHIFT key while clicking and dragging, it enables a 37 | linked-brushing operation that allows you to track points between panels. 38 | 39 | 40 | .. _pdvega-parallel-coordinates: 41 | 42 | Parallel Coordinates 43 | -------------------- 44 | 45 | Another way to visualize multi-dimensional data is to look at each dimension 46 | independently, using a *parallel coordinates* plot. This can be done using 47 | :func:`pdvega.parallel_coordinates`, which follows the API of 48 | :func:`pandas.plotting.parallel_coordinates`: 49 | 50 | .. pdvega-plot:: 51 | 52 | pdvega.parallel_coordinates(iris, "species") 53 | 54 | In one glance, this lets you see relationships between points, and in particular 55 | makes clear that the "setosa" species is well-separated from the other two 56 | in the dimensions of petal width and length. 57 | 58 | .. _pdvega-andrews-curves: 59 | 60 | Andrews Curves 61 | -------------- 62 | 63 | A similar approach to visualizing data dimensions is known as *Andrews curves*: 64 | the idea is to construct a Fourier series from the features of each object, 65 | in order to qualitatively visualize the aggregate differences between classes. 66 | This can be done with the :func:`pdvega.andrews_curves` function, which follows 67 | the API of :func:`pandas.plotting.andrews_curves`: 68 | 69 | .. pdvega-plot:: 70 | 71 | pdvega.andrews_curves(iris, "species") 72 | 73 | This gives us a similar impression to what we saw in the parallel coordinates 74 | plot -- that setosa is somehow distinct from the other species -- but gives 75 | less quantitative insight into just which features lead to that distinction. 76 | 77 | .. _pdvega-lag-plot: 78 | 79 | Lag Plot 80 | -------- 81 | 82 | Finally, for time series, an interesting type of plot is known as a *lag plot*. 83 | This is implemented by the :func:`pdvega.plotting.lag_plot` function, which follows 84 | the API of :func:`pandas.plotting.lag_plot`. 85 | 86 | Here we'll visualize the stock prices of Amazon and Microsoft from 1998-2010, 87 | using a lag of 12 months: 88 | 89 | .. pdvega-plot:: 90 | 91 | pdvega.lag_plot(stocks[['AMZN', 'MSFT']], lag=12) 92 | 93 | It's immediately apparent from this plot that Amazon was far more volitile 94 | during that period: its price at any point during this period showed very 95 | little correlation with the price a year later. By contrast, it's clear that 96 | Microsoft's price was much more stable through this decade. 97 | 98 | We can see that interpretation as well in the simple time-series plot of each 99 | company's stock price: 100 | 101 | .. pdvega-plot:: 102 | 103 | stocks[['AMZN', 'MSFT']].vgplot.line() 104 | 105 | 106 | 107 | .. _pandas.plotting: http://pandas.pydata.org/ 108 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # ``pdvega``: Vega-Lite plotting for Pandas Dataframes 2 | 3 | [![build status](http://img.shields.io/travis/altair-viz/pdvega/master.svg?style=flat)](https://travis-ci.org/altair-viz/pdvega) 4 | [![Binder](https://mybinder.org/badge.svg)](https://mybinder.org/v2/gh/altair-viz/pdvega/master?filepath=examples%2Fpdvega_example.ipynb) 5 | 6 | ``pdvega`` is a library that allows you to quickly create interactive 7 | [Vega-Lite](https://vega.github.io/vega-lite/) plots from Pandas dataframes, 8 | using an API that is nearly identical to Pandas' built-in 9 | [visualization tools](https://pandas.pydata.org/pandas-docs/stable/visualization.html), and designed for easy use within the [Jupyter notebook](http://jupyter.org). 10 | 11 | - [Full Documentation](http://altair-viz.github.io/pdvega/) 12 | 13 | Pandas currently has some basic plotting capabilities based on 14 | [matplotlib](http://matplotlib.org). So, for example, you can create 15 | a scatter plot this way: 16 | 17 | ```python 18 | import numpy as np 19 | import pandas as pd 20 | 21 | df = pd.DataFrame({'x': np.random.randn(100), 'y': np.random.randn(100)}) 22 | df.plot.scatter(x='x', y='y') 23 | ``` 24 | 25 | ![matplotlib scatter output](images/mpl-scatter.png?raw=true) 26 | 27 | The goal of ``pdvega`` is that any time you use ``dataframe.plot``, you'll be 28 | able to replace it with ``dataframe.vgplot`` and instead get a similar 29 | (but prettier and more interactive) visualization output in Vega-Lite that you can easily export to share or customize: 30 | 31 | ```python 32 | import pdvega # import adds vgplot attribute to pandas 33 | 34 | df.vgplot.scatter(x='x', y='y') 35 | ``` 36 | 37 | ![vega-lite scatter output](images/vg-scatter.png?raw=true) 38 | 39 | The above image is a static screenshot of the interactive output; please see the 40 | [Documentation](http://altair-viz.github.io/pdvega/) for a full set of live 41 | usage examples. 42 | 43 | ## Installation 44 | 45 | You can get started with ``pdvega`` using pip: 46 | 47 | ``` 48 | $ pip install jupyter pdvega 49 | $ jupyter nbextension install --sys-prefix --py vega3 50 | ``` 51 | 52 | The first line installs ``pdvega`` and its dependencies; the second installs 53 | the Jupyter extensions that allows plots to be displayed in the Jupyter 54 | notebook. For more information on installation and dependencies, see the 55 | [Installation docs](https://altair-viz.github.io/pdvega/installation.html). 56 | 57 | ## Why Vega-Lite? 58 | When working with data, one of the biggest challenges is ensuring reproducibility of results. 59 | When you create a figure and export it to PNG or PDF, the data become baked-in to the rendering in a 60 | way that is difficult or impossible for others to extract. [Vega](http://vega.github.io/vega) and 61 | [Vega-Lite](http://vega.github.io/vega-lite) change this: instead of packaging a figure by encoding its 62 | pixel values, they package a figure by describing, in a declarative manner, the relationship between 63 | data values and visual encodings through a JSON specification. 64 | 65 | This means that the Vega-Lite figures produced by ``pdvega`` are portable: you can send someone the 66 | resulting JSON specification and they can choose whether to render it interactively online, convert it to 67 | a PNG or EPS for static publication, or even enhance and extend the figure to learn more about the data. 68 | 69 | ``pdvega`` is a step in bringing this vision of figure portability and reproducibility to the Python world. 70 | 71 | ### Relationship to Altair 72 | 73 | [Altair](http://altair-viz.github.io) is a project that seeks to design an intuitive declarative API for generating Vega-Lite and Vega visualizations, using Pandas dataframes as data sources. 74 | 75 | By contrast, ``pdvega`` seeks not to design new visualization APIs, but to use the existing ``DataFrame.plot`` [visualization api](https://pandas.pydata.org/pandas-docs/stable/visualization.html) and output visualizations with Vega/Vega-Lite rather than with matplotlib. 76 | 77 | In this respect, ``pdvega`` is quite similar in spirit to the now-defunct [mpld3](http://mpld3.github.io) project, though the scope is smaller and (hopefully) **much** more manageable. 78 | -------------------------------------------------------------------------------- /pdvega/_utils.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | import numpy as np 3 | import pandas as pd 4 | 5 | from ._pandas_internals import infer_dtype as pd_infer_dtype 6 | from ._pandas_internals import _infer_dtype_kwds 7 | 8 | 9 | def infer_vegalite_type(data, ordinal_threshold=6): 10 | """ 11 | From an array-like input, infer the correct vega typecode 12 | ('ordinal', 'nominal', 'quantitative', or 'temporal') 13 | 14 | Parameters 15 | ---------- 16 | data: Numpy array or Pandas Series 17 | data for which the type will be inferred 18 | ordinal_threshold: integer (default: 0) 19 | integer data will result in a 'quantitative' type, unless the 20 | number of unique values is smaller than ordinal_threshold. 21 | 22 | Adapted from code at http://github.com/altair-viz/altair/ 23 | Licence: BSD-3 24 | """ 25 | # infer based on the dtype of the input 26 | typ = pd_infer_dtype(data, **_infer_dtype_kwds) 27 | 28 | # TODO: Once this returns 'O', please update test_select_x and test_select_y in test_api.py 29 | 30 | if typ in ('mixed-integer', 'integer'): 31 | if ordinal_threshold and pd.Series(data).nunique() <= ordinal_threshold: 32 | return 'ordinal' 33 | else: 34 | return 'quantitative' 35 | elif typ in ('floating', 'mixed-integer-float', 'complex'): 36 | return 'quantitative' 37 | elif typ in ('string', 'bytes', 'categorical', 'boolean', 'mixed', 'unicode', 'object'): 38 | return 'nominal' 39 | elif typ in ('datetime', 'datetime64', 'timedelta', 40 | 'timedelta64', 'date', 'time', 'period'): 41 | return 'temporal' 42 | else: 43 | warnings.warn("I don't know how to infer vegalite type from '{0}'. " 44 | "Defaulting to nominal.".format(typ)) 45 | return 'nominal' 46 | 47 | 48 | def unpivot_frame(frame, x=None, y=None, 49 | var_name='variable', value_name='value'): 50 | """Unpivot a dataframe for use with Vega/Vega-Lite 51 | 52 | The input is a frame with any number of columns, 53 | output is a frame with three columns: x value, y values, 54 | and variable names. 55 | """ 56 | if x is None: 57 | cols = frame.columns 58 | frame = frame.reset_index() 59 | x = (set(frame.columns) - set(cols)).pop() 60 | # frame.melt doesn't properly check for nonexisting columns, so we 61 | # start by indexing here. Tuples of column names also need to be 62 | # converted to lists for checking indexing 63 | if isinstance(x, tuple): 64 | x = list(x) 65 | if isinstance(y, tuple): 66 | y = list(y) 67 | if x is not None: 68 | _ = frame[x] # noqa 69 | if y is not None: 70 | _ = frame[y] # noqa 71 | return frame.melt(id_vars=x, value_vars=y, 72 | var_name=var_name, value_name=value_name) 73 | 74 | 75 | def warn_if_keywords_unused(kind, kwds): 76 | if kwds: 77 | if len(kwds) == 1: 78 | keys = tuple(kwds.keys())[0] 79 | else: 80 | keys = tuple(kwds.keys()) 81 | warnings.warn("Unrecognized keywords in vgplot.{0}(): {1}" 82 | "".format(kind, repr(keys))) 83 | 84 | 85 | def validate_aggregation(agg): 86 | """Validate an aggregation for use in Vega-Lite. 87 | 88 | Translate agg to one of the following supported named aggregations: 89 | ['mean', 'sum', 'median', 'min', 'max', 'count'] 90 | 91 | Parameters 92 | ---------- 93 | agg : string or callable 94 | A string 95 | 96 | Supported reductions are ['mean', 'sum', 'median', 'min', 'max', 'count']. 97 | 98 | If agg is a numpy function, the return value is the string representation. 99 | 100 | If agg is unrecognized, raise a ValueError 101 | """ 102 | if agg is None: 103 | return agg 104 | supported_aggs = ['mean', 'sum', 'median', 'min', 'max', 'count'] 105 | numpy_aggs = {getattr(np, a): a 106 | for a in ['mean', 'sum', 'median', 'min', 'max']} 107 | builtin_aggs = {min: 'min', max: 'max', sum: 'sum'} 108 | 109 | agg = numpy_aggs.get(agg, agg) 110 | agg = builtin_aggs.get(agg, agg) 111 | 112 | if agg not in supported_aggs: 113 | raise ValueError("Unrecognized Vega-Lite aggregation: {0}".format(agg)) 114 | 115 | return agg 116 | -------------------------------------------------------------------------------- /doc/advanced.rst: -------------------------------------------------------------------------------- 1 | .. _advanced-plotting: 2 | 3 | Advanced Plotting: Using Vega-Lite Directly 4 | =========================================== 5 | 6 | The ``pdvega`` API is rather simplistic at the moment; it doesn't give easy 7 | access to many of the features that Vega-Lite supports. 8 | In the future, we would like to tie ``pdvega`` to the `Altair`_ project, which 9 | would allow plot outputs to be adjusted flexibly from within a Python API. 10 | 11 | In the meantime, it is possible to make more fine-tuned adjustments to your 12 | plot specifications by working directly in the specification dictionary. 13 | 14 | For example, consider this plot: 15 | 16 | .. pdvega-setup:: 17 | 18 | import pdvega 19 | import pandas 20 | 21 | .. pdvega-plot:: 22 | 23 | from vega_datasets import data 24 | iris = data.iris() 25 | 26 | iris.vgplot(kind='scatter', x='sepalLength', y='petalLength', c='species') 27 | 28 | Vega-Lite's default behavior is to include the zero-value in the scale, unless 29 | the user explicitly turns that requirement off in the JSON spec. 30 | 31 | ``pdvega`` is not designed to give easy access to every option available in the 32 | Vega-Lite schema, but it is possible to modify the specification manually. 33 | We can access the raw Vega-Lite specification from any plot using the ``spec`` 34 | attribute. For convenience, there is also a ``spec_no_data`` attribute that 35 | returns the spec without the the embedded data: 36 | 37 | .. code-block:: python 38 | 39 | >>> plot = iris.vgplot(kind='scatter', x='sepalLength', y='petalLength', c='species') 40 | >>> plot.spec_no_data 41 | {'$schema': 'https://vega.github.io/schema/vega-lite/v2.json', 42 | 'encoding': {'color': {'field': 'species', 'type': 'nominal'}, 43 | 'x': {'field': 'sepalLength', 'type': 'quantitative'}, 44 | 'y': {'field': 'petalLength', 'type': 'quantitative'}}, 45 | 'height': 300, 46 | 'mark': 'circle', 47 | 'selection': {'grid': {'bind': 'scales', 'type': 'interval'}}, 48 | 'width': 450} 49 | 50 | This dictionary contains the specification that tells the vega-lite renderer 51 | how to map data to visual components in the plot. You can read more details on 52 | the `Vega-Lite`_ website. In particular, if you look at the options for 53 | `Vega-Lite scales`_, you can see that there is a ``"scale"`` property of the "x" 54 | encoding which allows turning off the zero behavior. 55 | Knowing this, we can update the specification manually to get the desired result: 56 | 57 | .. pdvega-setup:: 58 | 59 | from vega_datasets import data 60 | iris = data.iris() 61 | plot = iris.vgplot(kind='scatter', x='sepalLength', y='petalLength', c='species') 62 | 63 | .. pdvega-plot:: 64 | 65 | plot.spec['encoding']['x']['scale'] = {'zero': False} 66 | plot 67 | 68 | Using this type of approach, you can customize your plots in any way that Vega-Lite 69 | allows. 70 | 71 | This is admittedly a bit of a clumsy solution for plot customization; mucking around 72 | in the internals of the JSON specification requires a deep knowledge of the vega-lite 73 | schema, and the renderer is not very forgiving if and when you 74 | make an error or typo. 75 | In the future, we plan to make ``pdvega`` plots output `Altair`_ 76 | objects, which will allow this sort of customization to be done much more cleanly 77 | with Altair's Python API. 78 | 79 | Skipping ``vgplot`` entirely 80 | ---------------------------- 81 | If you would like to skip pdvega's vgplot API entirely and build your Vega-Lite plot 82 | from scratch, pdvega's :class:`~pdvega.Axes` object lets you do this directly. 83 | For example: 84 | 85 | .. pdvega-plot:: 86 | 87 | from pdvega import Axes 88 | 89 | spec = { 90 | '$schema': 'https://vega.github.io/schema/vega-lite/v2.json', 91 | 'mark': 'point', 92 | 'encoding': { 93 | 'color': {'field': 'species', 'type': 'nominal'}, 94 | 'x': {'field': 'petalWidth', 'type': 'quantitative'}, 95 | 'y': {'field': 'petalLength', 'type': 'quantitative'} 96 | }, 97 | 'height': 300, 98 | 'width': 450, 99 | # this selection is what makes the plot interactive 100 | 'selection': {'grid': {'bind': 'scales', 'type': 'interval'}}, 101 | } 102 | 103 | # Build the vgplot specification 104 | Axes(spec, iris) 105 | 106 | For ideas on what sort of visualizations you can create in this way, 107 | check out the specifications on the `Vega-Lite examples`_ page. 108 | The `Vega online editor`_ is also a useful resource for developing visualizations 109 | directly in Vega or Vega-Lite. 110 | 111 | .. _Vega-Lite: http://vega.github.io/vega-lite/ 112 | .. _Altair: http://altair-viz.github.io/ 113 | .. _Vega-Lite scales: https://vega.github.io/vega-lite/docs/scale.html 114 | .. _Vega-Lite examples: https://vega.github.io/vega-lite/examples/ 115 | .. _Vega online editor: https://vega.github.io/editor/#/custom/vega-lite 116 | -------------------------------------------------------------------------------- /pdvega/tests/test_plotting.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | import numpy as np 4 | import pandas as pd 5 | 6 | import pdvega 7 | from pdvega.tests import utils 8 | 9 | 10 | def test_scatter_matrix(): 11 | df = pd.DataFrame({'x': range(5), 12 | 'y': range(5), 13 | 'label': list('ABABA')}) 14 | # no color or size specified 15 | plot = pdvega.scatter_matrix(df) 16 | utils.validate_vegalite(plot) 17 | spec = plot.to_dict() 18 | assert spec['repeat']['row'] == ['x', 'y'] 19 | assert spec['repeat']['column'] == ['y', 'x'] 20 | assert spec['spec']['encoding']['color']['condition']['value'] == 'steelblue' 21 | 22 | # with color specified 23 | plot = pdvega.scatter_matrix(df, c='label') 24 | utils.validate_vegalite(plot) 25 | spec = plot.to_dict() 26 | assert spec['repeat']['row'] == ['x', 'y'] 27 | assert spec['repeat']['column'] == ['y', 'x'] 28 | assert spec['spec']['encoding']['color']['condition']['field'] == 'label' 29 | 30 | # with size specified 31 | plot = pdvega.scatter_matrix(df, s='label') 32 | utils.validate_vegalite(plot) 33 | spec = plot.to_dict() 34 | assert spec['repeat']['row'] == ['x', 'y'] 35 | assert spec['repeat']['column'] == ['y', 'x'] 36 | assert spec['spec']['encoding']['color']['condition']['value'] == 'steelblue' 37 | assert spec['spec']['encoding']['size']['field'] == 'label' 38 | 39 | # test figsize keyword 40 | figsize = (8, 6) 41 | dpi = 40 42 | ncols = 2 43 | plot = pdvega.scatter_matrix(df, figsize=figsize, dpi=dpi) 44 | utils.validate_vegalite(plot) 45 | spec = plot.to_dict() 46 | assert np.allclose(spec['spec']['width'], 47 | 0.8 * dpi * figsize[0] / ncols) 48 | assert np.allclose(spec['spec']['height'], 49 | 0.8 * dpi * figsize[1] / ncols) 50 | 51 | 52 | def test_parallel_coordinates(): 53 | data = pd.DataFrame({'x': range(10), 54 | 'y': range(10), 55 | 'z': range(10), 56 | 'c': list('ABABABABAB')}) 57 | plot = pdvega.parallel_coordinates(data, 'c', alpha=0.5) 58 | utils.validate_vegalite(plot) 59 | utils.check_encodings(plot, x='variable', y='value', 60 | color='c', detail='index', opacity=utils.IGNORE) 61 | 62 | spec = plot.to_dict() 63 | enc = spec['encoding'] 64 | assert spec['mark'] == 'line' 65 | assert enc['x']['type'] == 'nominal' 66 | assert enc['y']['type'] == 'quantitative' 67 | assert enc['color']['type'] == 'nominal' 68 | assert enc['detail']['type'] == 'quantitative' 69 | assert enc['opacity']['value'] == 0.5 70 | 71 | df = utils.get_data(plot) 72 | assert set(pd.unique(df['variable'])) == {'x', 'y', 'z'} 73 | 74 | plot = pdvega.parallel_coordinates(data, 'c', cols=['x', 'y']) 75 | utils.validate_vegalite(plot) 76 | utils.check_encodings(plot, x='variable', y='value', 77 | color='c', detail='index') 78 | spec = plot.to_dict() 79 | enc = spec['encoding'] 80 | assert spec['mark'] == 'line' 81 | assert enc['x']['type'] == 'nominal' 82 | assert enc['y']['type'] == 'quantitative' 83 | assert enc['color']['type'] == 'nominal' 84 | assert enc['detail']['type'] == 'quantitative' 85 | df = utils.get_data(plot) 86 | assert set(pd.unique(df['variable'])) == {'x', 'y'} 87 | 88 | 89 | def test_andrews_curves(): 90 | data = pd.DataFrame({'x': range(10), 91 | 'y': range(10), 92 | 'z': range(10), 93 | 'c': list('ABABABABAB')}) 94 | n_samples = 120 95 | n_points = len(data) 96 | plot = pdvega.andrews_curves(data, 'c', samples=120, alpha=0.5) 97 | utils.validate_vegalite(plot) 98 | utils.check_encodings(plot, x='t', y=' ', 99 | color='c', detail='sample', opacity=utils.IGNORE) 100 | 101 | spec = plot.to_dict() 102 | enc = spec['encoding'] 103 | assert spec['mark'] == 'line' 104 | assert enc['x']['type'] == 'quantitative' 105 | assert enc['y']['type'] == 'quantitative' 106 | assert enc['color']['type'] == 'nominal' 107 | assert enc['detail']['type'] == 'quantitative' 108 | assert enc['opacity']['value'] == 0.5 109 | 110 | df = utils.get_data(plot) 111 | assert len(df) == n_samples * n_points 112 | 113 | 114 | @pytest.mark.parametrize('lag', [1, 5]) 115 | def test_lag_plot(lag): 116 | data = pd.DataFrame({'x': range(10), 117 | 'y': range(10)}) 118 | 119 | # test series input 120 | plot = pdvega.lag_plot(data['x'], lag=lag) 121 | lag_data = utils.get_data(plot) 122 | 123 | spec = plot.to_dict() 124 | assert spec['mark'] == 'point' 125 | assert spec['encoding']['x']['type'] == 'quantitative' 126 | assert spec['encoding']['y']['type'] == 'quantitative' 127 | 128 | utils.check_encodings(plot, x='y(t)', y='y(t + {0})'.format(lag)) 129 | assert lag_data.shape == (data.shape[0] - lag, 2) 130 | 131 | # test dataframe input 132 | plot = pdvega.lag_plot(data, lag=lag) 133 | lag_data = utils.get_data(plot) 134 | spec = plot.to_dict() 135 | 136 | assert spec['mark'] == 'point' 137 | assert spec['encoding']['x']['type'] == 'quantitative' 138 | assert spec['encoding']['y']['type'] == 'quantitative' 139 | assert spec['encoding']['color']['type'] == 'nominal' 140 | utils.check_encodings(plot, x='y(t)', y='y(t + {0})'.format(lag), 141 | color='variable') 142 | assert lag_data.shape == (2 * (data.shape[0] - lag), 3) 143 | -------------------------------------------------------------------------------- /doc/index.rst: -------------------------------------------------------------------------------- 1 | .. raw :: html 2 | 3 | Fork me on GitHub 4 | 5 | PdVega: Interactive Vega-Lite Plots for Pandas 6 | ============================================== 7 | 8 | ``pdvega`` is a library that allows you to quickly create interactive 9 | `Vega-Lite`_ plots from Pandas dataframes, using an API that is nearly 10 | identical to Pandas' built-in `plotting API `_, 11 | and designed for easy use within the `Jupyter notebook`_. 12 | 13 | .. pdvega-plot:: 14 | 15 | import pandas as pd 16 | import numpy as np 17 | data = pd.DataFrame({'x': np.random.randn(200), 18 | 'y': np.random.randn(200)}) 19 | 20 | import pdvega # adds vgplot attribute to pandas 21 | data.vgplot.scatter('x', 'y') 22 | 23 | The result is an interactive plot rendered using `Vega-Lite`_, a visualization 24 | specification that allows users to declaratively describe which 25 | data features should map to which visualization features using a well-defined 26 | JSON schema. The result is beautiful and dynamic data visualizations with a 27 | minimum of boiler-plate. 28 | 29 | ``pdvega`` aims to make the construction of these specifications 30 | more accessible to Python users, via a familiar plotting API. 31 | 32 | Quick Start 33 | ----------- 34 | ``pdvega`` is designed to be used primarily with the `Jupyter notebook`_. 35 | To get started, first install ``pdvega`` with the following commands:: 36 | 37 | $ pip install pdvega 38 | $ jupyter nbextension install --sys-prefix --py vega3 39 | 40 | (for details on installation and dependencies, see :ref:`installation`). 41 | 42 | With the package installed and imported, you can use the ``vgplot`` attribute 43 | of Pandas ``Series`` and ``DataFrame`` objects to quickly create a Vega-Lite 44 | plot. For convenience here, we will load example datasets using the 45 | `vega_datasets`_ package: 46 | 47 | .. pdvega-plot:: 48 | 49 | # load a dataframe containing stock price time-series 50 | from vega_datasets import data 51 | stocks = data.stocks(pivoted=True) 52 | 53 | # importing pdvega adds the `vgplot` attribute to pandas objects 54 | import pdvega 55 | 56 | stocks.vgplot.line() 57 | 58 | Notice that by default plots created with ``pdvega`` are interactive: you can 59 | use your mouse or track pad to pan and zoom the plot. 60 | 61 | By design, ``pdvega`` has a plotting API that is nearly identical to Pandas' 62 | existing `matplotlib API `_; 63 | just replace ``data.plot`` with ``data.vgplot``, where 64 | ``data`` refers to any Pandas ``Series`` or ``DataFrame`` object: 65 | 66 | .. plot:: 67 | :context: 68 | :nofigs: 69 | 70 | from vega_datasets import data 71 | stocks = data.stocks(pivoted=True) 72 | 73 | .. plot:: 74 | :include-source: 75 | :context: 76 | 77 | # create a matplotlib line plot 78 | stocks.plot.line(y='AAPL', alpha=0.5) 79 | 80 | 81 | .. pdvega-setup:: 82 | 83 | from vega_datasets import data 84 | stocks = data.stocks(pivoted=True) 85 | import pdvega 86 | 87 | .. pdvega-plot:: 88 | 89 | # create a vega line plot 90 | stocks.vgplot.line(y='AAPL', alpha=0.5) 91 | 92 | ``pdvega`` does not (yet?) support every available argument supported by 93 | ``DataFrame.plot`` methods, but it covers the most commonly-used arguments. 94 | 95 | To see more examples of visualizations created using the ``vgplot`` attribute 96 | of pandas ``Series`` and ``DataFrame`` objects, see :ref:`core-plotting`. 97 | 98 | More Complex Plots 99 | ------------------ 100 | 101 | The ``pdvega`` package additionally supports many of the more sophisticated 102 | plotting routines available in the 103 | `pandas.plotting `_ 104 | submodule; for example, here is a multi-panel scatter-plot matrix of Fisher's 105 | `Iris dataset`_: 106 | 107 | .. pdvega-setup:: 108 | 109 | import pdvega 110 | from vega_datasets import data 111 | 112 | .. pdvega-plot:: 113 | 114 | iris = data.iris() 115 | pdvega.scatter_matrix(iris, 'species', figsize=(7, 7)) 116 | 117 | In this plot, you can click and drag for linked panning and zooming, or you can 118 | click and drag while holding the SHIFT key to do linked brushing of the points. 119 | 120 | For more examples of statistical visualizations available in 121 | ``pdvega.plotting``, see :ref:`statistical-plotting`. 122 | 123 | 124 | Documentation 125 | ------------- 126 | 127 | .. toctree:: 128 | :maxdepth: 2 129 | 130 | installation 131 | core 132 | plotting 133 | advanced 134 | API 135 | 136 | `pdvega` is MIT-licensed and the source is available on `GitHub `_. 137 | If any questions or issues come up as you use it, please get in touch via 138 | `Git Issues `_. 139 | 140 | Indices and tables 141 | ------------------ 142 | 143 | * :ref:`genindex` 144 | * :ref:`modindex` 145 | * :ref:`search` 146 | 147 | 148 | .. _Vega-Lite: http://vega.github.io/vega-lite 149 | .. _Jupyter notebook: http://jupyter.org/ 150 | .. _vega_datasets: http://github.com/altair-viz/vega_datasets 151 | .. _Iris dataset: https://en.wikipedia.org/wiki/Iris_flower_data_set 152 | -------------------------------------------------------------------------------- /doc/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | REM Command file for Sphinx documentation 4 | 5 | if "%SPHINXBUILD%" == "" ( 6 | set SPHINXBUILD=sphinx-build 7 | ) 8 | set BUILDDIR=_build 9 | set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% . 10 | set I18NSPHINXOPTS=%SPHINXOPTS% . 11 | if NOT "%PAPER%" == "" ( 12 | set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS% 13 | set I18NSPHINXOPTS=-D latex_paper_size=%PAPER% %I18NSPHINXOPTS% 14 | ) 15 | 16 | if "%1" == "" goto help 17 | 18 | if "%1" == "help" ( 19 | :help 20 | echo.Please use `make ^` where ^ is one of 21 | echo. html to make standalone HTML files 22 | echo. dirhtml to make HTML files named index.html in directories 23 | echo. singlehtml to make a single large HTML file 24 | echo. pickle to make pickle files 25 | echo. json to make JSON files 26 | echo. htmlhelp to make HTML files and a HTML help project 27 | echo. qthelp to make HTML files and a qthelp project 28 | echo. devhelp to make HTML files and a Devhelp project 29 | echo. epub to make an epub 30 | echo. epub3 to make an epub3 31 | echo. latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter 32 | echo. text to make text files 33 | echo. man to make manual pages 34 | echo. texinfo to make Texinfo files 35 | echo. gettext to make PO message catalogs 36 | echo. changes to make an overview over all changed/added/deprecated items 37 | echo. xml to make Docutils-native XML files 38 | echo. pseudoxml to make pseudoxml-XML files for display purposes 39 | echo. linkcheck to check all external links for integrity 40 | echo. doctest to run all doctests embedded in the documentation if enabled 41 | echo. coverage to run coverage check of the documentation if enabled 42 | echo. dummy to check syntax errors of document sources 43 | goto end 44 | ) 45 | 46 | if "%1" == "clean" ( 47 | for /d %%i in (%BUILDDIR%\*) do rmdir /q /s %%i 48 | del /q /s %BUILDDIR%\* 49 | goto end 50 | ) 51 | 52 | 53 | REM Check if sphinx-build is available and fallback to Python version if any 54 | %SPHINXBUILD% 1>NUL 2>NUL 55 | if errorlevel 9009 goto sphinx_python 56 | goto sphinx_ok 57 | 58 | :sphinx_python 59 | 60 | set SPHINXBUILD=python -m sphinx.__init__ 61 | %SPHINXBUILD% 2> nul 62 | if errorlevel 9009 ( 63 | echo. 64 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 65 | echo.installed, then set the SPHINXBUILD environment variable to point 66 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 67 | echo.may add the Sphinx directory to PATH. 68 | echo. 69 | echo.If you don't have Sphinx installed, grab it from 70 | echo.http://sphinx-doc.org/ 71 | exit /b 1 72 | ) 73 | 74 | :sphinx_ok 75 | 76 | 77 | if "%1" == "html" ( 78 | %SPHINXBUILD% -b html %ALLSPHINXOPTS% %BUILDDIR%/html 79 | if errorlevel 1 exit /b 1 80 | echo. 81 | echo.Build finished. The HTML pages are in %BUILDDIR%/html. 82 | goto end 83 | ) 84 | 85 | if "%1" == "dirhtml" ( 86 | %SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% %BUILDDIR%/dirhtml 87 | if errorlevel 1 exit /b 1 88 | echo. 89 | echo.Build finished. The HTML pages are in %BUILDDIR%/dirhtml. 90 | goto end 91 | ) 92 | 93 | if "%1" == "singlehtml" ( 94 | %SPHINXBUILD% -b singlehtml %ALLSPHINXOPTS% %BUILDDIR%/singlehtml 95 | if errorlevel 1 exit /b 1 96 | echo. 97 | echo.Build finished. The HTML pages are in %BUILDDIR%/singlehtml. 98 | goto end 99 | ) 100 | 101 | if "%1" == "pickle" ( 102 | %SPHINXBUILD% -b pickle %ALLSPHINXOPTS% %BUILDDIR%/pickle 103 | if errorlevel 1 exit /b 1 104 | echo. 105 | echo.Build finished; now you can process the pickle files. 106 | goto end 107 | ) 108 | 109 | if "%1" == "json" ( 110 | %SPHINXBUILD% -b json %ALLSPHINXOPTS% %BUILDDIR%/json 111 | if errorlevel 1 exit /b 1 112 | echo. 113 | echo.Build finished; now you can process the JSON files. 114 | goto end 115 | ) 116 | 117 | if "%1" == "htmlhelp" ( 118 | %SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% %BUILDDIR%/htmlhelp 119 | if errorlevel 1 exit /b 1 120 | echo. 121 | echo.Build finished; now you can run HTML Help Workshop with the ^ 122 | .hhp project file in %BUILDDIR%/htmlhelp. 123 | goto end 124 | ) 125 | 126 | if "%1" == "qthelp" ( 127 | %SPHINXBUILD% -b qthelp %ALLSPHINXOPTS% %BUILDDIR%/qthelp 128 | if errorlevel 1 exit /b 1 129 | echo. 130 | echo.Build finished; now you can run "qcollectiongenerator" with the ^ 131 | .qhcp project file in %BUILDDIR%/qthelp, like this: 132 | echo.^> qcollectiongenerator %BUILDDIR%\qthelp\altair.qhcp 133 | echo.To view the help file: 134 | echo.^> assistant -collectionFile %BUILDDIR%\qthelp\altair.ghc 135 | goto end 136 | ) 137 | 138 | if "%1" == "devhelp" ( 139 | %SPHINXBUILD% -b devhelp %ALLSPHINXOPTS% %BUILDDIR%/devhelp 140 | if errorlevel 1 exit /b 1 141 | echo. 142 | echo.Build finished. 143 | goto end 144 | ) 145 | 146 | if "%1" == "epub" ( 147 | %SPHINXBUILD% -b epub %ALLSPHINXOPTS% %BUILDDIR%/epub 148 | if errorlevel 1 exit /b 1 149 | echo. 150 | echo.Build finished. The epub file is in %BUILDDIR%/epub. 151 | goto end 152 | ) 153 | 154 | if "%1" == "epub3" ( 155 | %SPHINXBUILD% -b epub3 %ALLSPHINXOPTS% %BUILDDIR%/epub3 156 | if errorlevel 1 exit /b 1 157 | echo. 158 | echo.Build finished. The epub3 file is in %BUILDDIR%/epub3. 159 | goto end 160 | ) 161 | 162 | if "%1" == "latex" ( 163 | %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex 164 | if errorlevel 1 exit /b 1 165 | echo. 166 | echo.Build finished; the LaTeX files are in %BUILDDIR%/latex. 167 | goto end 168 | ) 169 | 170 | if "%1" == "latexpdf" ( 171 | %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex 172 | cd %BUILDDIR%/latex 173 | make all-pdf 174 | cd %~dp0 175 | echo. 176 | echo.Build finished; the PDF files are in %BUILDDIR%/latex. 177 | goto end 178 | ) 179 | 180 | if "%1" == "latexpdfja" ( 181 | %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex 182 | cd %BUILDDIR%/latex 183 | make all-pdf-ja 184 | cd %~dp0 185 | echo. 186 | echo.Build finished; the PDF files are in %BUILDDIR%/latex. 187 | goto end 188 | ) 189 | 190 | if "%1" == "text" ( 191 | %SPHINXBUILD% -b text %ALLSPHINXOPTS% %BUILDDIR%/text 192 | if errorlevel 1 exit /b 1 193 | echo. 194 | echo.Build finished. The text files are in %BUILDDIR%/text. 195 | goto end 196 | ) 197 | 198 | if "%1" == "man" ( 199 | %SPHINXBUILD% -b man %ALLSPHINXOPTS% %BUILDDIR%/man 200 | if errorlevel 1 exit /b 1 201 | echo. 202 | echo.Build finished. The manual pages are in %BUILDDIR%/man. 203 | goto end 204 | ) 205 | 206 | if "%1" == "texinfo" ( 207 | %SPHINXBUILD% -b texinfo %ALLSPHINXOPTS% %BUILDDIR%/texinfo 208 | if errorlevel 1 exit /b 1 209 | echo. 210 | echo.Build finished. The Texinfo files are in %BUILDDIR%/texinfo. 211 | goto end 212 | ) 213 | 214 | if "%1" == "gettext" ( 215 | %SPHINXBUILD% -b gettext %I18NSPHINXOPTS% %BUILDDIR%/locale 216 | if errorlevel 1 exit /b 1 217 | echo. 218 | echo.Build finished. The message catalogs are in %BUILDDIR%/locale. 219 | goto end 220 | ) 221 | 222 | if "%1" == "changes" ( 223 | %SPHINXBUILD% -b changes %ALLSPHINXOPTS% %BUILDDIR%/changes 224 | if errorlevel 1 exit /b 1 225 | echo. 226 | echo.The overview file is in %BUILDDIR%/changes. 227 | goto end 228 | ) 229 | 230 | if "%1" == "linkcheck" ( 231 | %SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck 232 | if errorlevel 1 exit /b 1 233 | echo. 234 | echo.Link check complete; look for any errors in the above output ^ 235 | or in %BUILDDIR%/linkcheck/output.txt. 236 | goto end 237 | ) 238 | 239 | if "%1" == "doctest" ( 240 | %SPHINXBUILD% -b doctest %ALLSPHINXOPTS% %BUILDDIR%/doctest 241 | if errorlevel 1 exit /b 1 242 | echo. 243 | echo.Testing of doctests in the sources finished, look at the ^ 244 | results in %BUILDDIR%/doctest/output.txt. 245 | goto end 246 | ) 247 | 248 | if "%1" == "coverage" ( 249 | %SPHINXBUILD% -b coverage %ALLSPHINXOPTS% %BUILDDIR%/coverage 250 | if errorlevel 1 exit /b 1 251 | echo. 252 | echo.Testing of coverage in the sources finished, look at the ^ 253 | results in %BUILDDIR%/coverage/python.txt. 254 | goto end 255 | ) 256 | 257 | if "%1" == "xml" ( 258 | %SPHINXBUILD% -b xml %ALLSPHINXOPTS% %BUILDDIR%/xml 259 | if errorlevel 1 exit /b 1 260 | echo. 261 | echo.Build finished. The XML files are in %BUILDDIR%/xml. 262 | goto end 263 | ) 264 | 265 | if "%1" == "pseudoxml" ( 266 | %SPHINXBUILD% -b pseudoxml %ALLSPHINXOPTS% %BUILDDIR%/pseudoxml 267 | if errorlevel 1 exit /b 1 268 | echo. 269 | echo.Build finished. The pseudo-XML files are in %BUILDDIR%/pseudoxml. 270 | goto end 271 | ) 272 | 273 | if "%1" == "dummy" ( 274 | %SPHINXBUILD% -b dummy %ALLSPHINXOPTS% %BUILDDIR%/dummy 275 | if errorlevel 1 exit /b 1 276 | echo. 277 | echo.Build finished. Dummy builder generates no files. 278 | goto end 279 | ) 280 | 281 | :end 282 | -------------------------------------------------------------------------------- /pdvega/tests/test_core_common.py: -------------------------------------------------------------------------------- 1 | """Common tests for all plotting routines""" 2 | import pytest 3 | 4 | import pandas as pd 5 | import pdvega 6 | 7 | from .utils import validate_vegalite 8 | 9 | 10 | @pytest.fixture 11 | def data(): 12 | """A dataframe with quantitative and nominal columns""" 13 | return pd.DataFrame({ 14 | 'x': range(10), 15 | 'y': range(10), 16 | 'z': range(10), 17 | 'a': list('ABCABCABCA'), 18 | 'b': list('ABCABCABCA') 19 | }) 20 | 21 | 22 | other_chart = pd.Series(range(10)).vgplot(kind='line') 23 | AXES = [ 24 | (None, pdvega.alt.Chart), 25 | (other_chart, pdvega.alt.LayerChart), 26 | (pdvega.alt.layer(other_chart), pdvega.alt.LayerChart) 27 | ] 28 | 29 | FRAME_TEST_CASES = { 30 | 'line': { 31 | 'usecols': ['x', 'y', 'z'], 32 | }, 33 | 'bar': { 34 | 'usecols': ['x', 'y', 'z'], 35 | }, 36 | 'barh': { 37 | 'usecols': ['x', 'y', 'z'], 38 | }, 39 | 'area': { 40 | 'usecols': ['x', 'y', 'z'], 41 | }, 42 | 'scatter': { 43 | 'usecols': ['x', 'y', 'a', 'b'], 44 | 'kwds': {'x': 'x', 'y': 'y', 'c': 'a', 's': 'b'} 45 | }, 46 | 'hist': { 47 | 'usecols': ['x', 'y', 'z'], 48 | }, 49 | 'hexbin': { 50 | 'usecols': ['x', 'y', 'z'], 51 | 'kwds': {'x': 'x', 'y': 'y'} 52 | }, 53 | 'kde': { 54 | 'usecols': ['x', 'y', 'z'], 55 | }, 56 | 'density': { 57 | 'usecols': ['x', 'y', 'z'], 58 | } 59 | } 60 | 61 | SERIES_TEST_CASES = { 62 | 'line': { 63 | 'col': 'x' 64 | }, 65 | 'bar': { 66 | 'col': 'x' 67 | }, 68 | 'barh': { 69 | 'col': 'x' 70 | }, 71 | 'area': { 72 | 'col': 'x' 73 | }, 74 | 'hist': { 75 | 'col': 'x' 76 | }, 77 | 'kde': { 78 | 'col': 'x' 79 | }, 80 | 'density': { 81 | 'col': 'x' 82 | } 83 | } 84 | 85 | 86 | def is_stackable(kind): 87 | return kind in {'bar', 'barh', 'area', 'hist'} 88 | 89 | 90 | @pytest.mark.parametrize('kind,info', SERIES_TEST_CASES.items()) 91 | def test_series_plot_interactive(data, kind, info): 92 | col = info['col'] 93 | kwds = info.get('kwds', {}) 94 | data = data[col] 95 | 96 | spec = data.vgplot(kind=kind, **kwds) 97 | validate_vegalite(spec) 98 | assert 'selection' not in spec.to_dict() 99 | 100 | spec = data.vgplot(kind=kind, **kwds).interactive() 101 | validate_vegalite(spec) 102 | s = spec.to_dict() 103 | assert next(iter(s['selection'].values())) == {'bind': 'scales', 'encodings': ['x', 'y'], 'type': 'interval'} 104 | 105 | 106 | @pytest.mark.parametrize('kind,info', FRAME_TEST_CASES.items()) 107 | def test_frame_plot_interactive(data, kind, info): 108 | cols = info['usecols'] 109 | kwds = info.get('kwds', {}) 110 | data = data[cols] 111 | 112 | chart = data.vgplot(kind=kind, **kwds) 113 | validate_vegalite(chart) 114 | assert 'selection' not in chart.to_dict() 115 | 116 | chart = data.vgplot(kind=kind, **kwds).interactive() 117 | validate_vegalite(chart) 118 | s = chart.to_dict() 119 | assert next(iter(s['selection'].values())) == {'bind': 'scales', 'encodings': ['x', 'y'], 'type': 'interval'} 120 | 121 | 122 | @pytest.mark.parametrize('kind,info', SERIES_TEST_CASES.items()) 123 | def test_series_plot_alpha(data, kind, info): 124 | col = info['col'] 125 | kwds = info.get('kwds', {}) 126 | data = data[col] 127 | 128 | chart = data.vgplot(kind=kind, alpha=0.5, **kwds) 129 | validate_vegalite(chart) 130 | encoding = chart['encoding'].to_dict() 131 | assert 'opacity' in encoding, encoding.keys() 132 | assert encoding['opacity']['value'] == 0.5 133 | 134 | chart = data.vgplot(kind=kind, **kwds) 135 | validate_vegalite(chart) 136 | assert 'opacity' not in chart['encoding'].to_dict() 137 | 138 | 139 | @pytest.mark.parametrize('kind,info', SERIES_TEST_CASES.items()) 140 | @pytest.mark.parametrize('ax', AXES) 141 | def test_series_plot_ax(data, kind, info, ax): 142 | col = info['col'] 143 | kwds = info.get('kwds', {}) 144 | data = data[col] 145 | 146 | chart = data.vgplot(kind=kind, ax=ax[0], **kwds) 147 | validate_vegalite(chart) 148 | assert isinstance(chart, ax[1]) 149 | 150 | 151 | @pytest.mark.parametrize('kind,info', FRAME_TEST_CASES.items()) 152 | def test_frame_plot_alpha(data, kind, info): 153 | cols = info['usecols'] 154 | kwds = info.get('kwds', {}) 155 | data = data[cols] 156 | 157 | # if alpha is explicitly specified, then opacity should be in the spec 158 | chart = data.vgplot(kind=kind, alpha=0.5, **kwds) 159 | validate_vegalite(chart) 160 | assert chart['encoding'].to_dict()['opacity']['value'] == 0.5 161 | 162 | if is_stackable(kind): 163 | # stackable plots have a default opacity when not stacked 164 | chart = data.vgplot(kind=kind, stacked=False, **kwds) 165 | validate_vegalite(chart) 166 | assert chart['encoding'].to_dict()['opacity']['value'] == 0.7 167 | 168 | # if only one column is being plotted, then should have no opacity 169 | chart = data[cols[:1]].vgplot(kind=kind, stacked=False, **kwds) 170 | validate_vegalite(chart) 171 | assert 'opacity' not in chart['encoding'].to_dict() 172 | 173 | # if stacked, then should have no opacity 174 | chart = data.vgplot(kind=kind, stacked=True, **kwds) 175 | validate_vegalite(chart) 176 | assert 'opacity' not in chart['encoding'].to_dict() 177 | else: 178 | # non-stackable plots have no default opacity 179 | chart = data.vgplot(kind=kind, **kwds) 180 | validate_vegalite(chart) 181 | assert 'opacity' not in chart['encoding'].to_dict() 182 | 183 | 184 | @pytest.mark.parametrize('kind,info', FRAME_TEST_CASES.items()) 185 | @pytest.mark.parametrize('ax', AXES) 186 | def test_frame_plot_ax(data, kind, info, ax): 187 | cols = info['usecols'] 188 | kwds = info.get('kwds', {}) 189 | data = data[cols] 190 | 191 | chart = data.vgplot(kind=kind, ax=ax[0], **kwds) 192 | validate_vegalite(chart) 193 | assert isinstance(chart, ax[1]) 194 | 195 | 196 | @pytest.mark.parametrize('kind,info', SERIES_TEST_CASES.items()) 197 | def test_series_plot_width_height(data, kind, info): 198 | col = info['col'] 199 | kwds = info.get('kwds', {}) 200 | data = data[col] 201 | 202 | spec = data.vgplot(kind=kind, width=300, height=200, **kwds) 203 | validate_vegalite(spec) 204 | assert (spec['width'], spec['height']) == (300, 200) 205 | 206 | spec = data.vgplot(kind=kind, **kwds) 207 | validate_vegalite(spec) 208 | s = spec.to_dict() 209 | assert (s['width'], s['height']) == (450, 300) 210 | 211 | 212 | @pytest.mark.parametrize('kind,info', FRAME_TEST_CASES.items()) 213 | def test_frame_plot_width_height(data, kind, info): 214 | cols = info['usecols'] 215 | kwds = info.get('kwds', {}) 216 | data = data[cols] 217 | 218 | spec = data.vgplot(kind=kind, width=300, height=200, **kwds) 219 | validate_vegalite(spec) 220 | s = spec.to_dict() 221 | assert (s['width'], s['height']) == (300, 200) 222 | 223 | spec = data.vgplot(kind=kind, **kwds) 224 | validate_vegalite(spec) 225 | s = spec.to_dict() 226 | assert (s['width'], s['height']) == (450, 300) 227 | 228 | 229 | @pytest.mark.parametrize('kind,info', SERIES_TEST_CASES.items()) 230 | def test_series_plot_kwd_warnings(data, kind, info): 231 | col = info['col'] 232 | kwds = info.get('kwds', {}) 233 | data = data[col] 234 | 235 | with pytest.warns(UserWarning, match="Unrecognized keywords in vgplot.[a-z]+\(\): 'unrecognized_arg'"): 236 | data.vgplot(kind=kind, unrecognized_arg=None, **kwds) 237 | 238 | with pytest.warns(UserWarning): 239 | data.vgplot(kind=kind, unrecognized1=None, unrecognized2=None, **kwds) 240 | 241 | 242 | @pytest.mark.parametrize('kind,info', FRAME_TEST_CASES.items()) 243 | def test_frame_plot_kwd_warnings(data, kind, info): 244 | cols = info['usecols'] 245 | kwds = info.get('kwds', {}) 246 | data = data[cols] 247 | 248 | with pytest.warns(UserWarning, match="Unrecognized keywords in vgplot.[a-z]+\(\): 'unrecognized_arg'"): 249 | data.vgplot(kind=kind, unrecognized_arg=None, **kwds) 250 | 251 | with pytest.warns(UserWarning): 252 | data.vgplot(kind=kind, unrecognized1=None, unrecognized2=None, **kwds) 253 | 254 | 255 | @pytest.mark.parametrize('kind,info', SERIES_TEST_CASES.items()) 256 | def test_series_figsize(data, kind, info): 257 | col = info['col'] 258 | kwds = info.get('kwds', {}) 259 | data = data[col] 260 | 261 | chart = data.vgplot(kind=kind, figsize=(10, 10), dpi=72, **kwds) 262 | assert chart.height == 10 * 72 * 0.8 263 | assert chart.width == 10 * 72 * 0.8 264 | 265 | chart = data.vgplot(kind=kind, width=100, height=100, **kwds) 266 | assert chart.height == 100 267 | assert chart.width == 100 268 | 269 | 270 | @pytest.mark.parametrize('kind,info', FRAME_TEST_CASES.items()) 271 | def test_frame_plot_figsize(data, kind, info): 272 | cols = info['usecols'] 273 | kwds = info.get('kwds', {}) 274 | data = data[cols] 275 | 276 | chart = data.vgplot(kind=kind, figsize=(10, 10), dpi=72, **kwds) 277 | assert chart.height == 10 * 72 * 0.8 278 | assert chart.width == 10 * 72 * 0.8 279 | 280 | chart = data.vgplot(kind=kind, width=100, height=100, **kwds) 281 | assert chart.height == 100 282 | assert chart.width == 100 283 | 284 | 285 | @pytest.mark.parametrize('kind,info', SERIES_TEST_CASES.items()) 286 | def test_series_title(data, kind, info): 287 | col = info['col'] 288 | kwds = info.get('kwds', {}) 289 | data = data[col] 290 | 291 | title = 'Test' 292 | chart = data.vgplot(kind=kind, title=title, **kwds) 293 | assert chart.title == title 294 | 295 | 296 | @pytest.mark.parametrize('kind,info', FRAME_TEST_CASES.items()) 297 | def test_frame_title(data, kind, info): 298 | cols = info['usecols'] 299 | kwds = info.get('kwds', {}) 300 | data = data[cols] 301 | 302 | title = 'Test' 303 | chart = data.vgplot(kind=kind, title=title, **kwds) 304 | assert chart.title == title 305 | -------------------------------------------------------------------------------- /pdvega/plotting.py: -------------------------------------------------------------------------------- 1 | """Core plotting routines""" 2 | import warnings 3 | import altair as alt 4 | import numpy as np 5 | import pandas as pd 6 | 7 | from ._utils import infer_vegalite_type 8 | 9 | __all__ = ["scatter_matrix", "andrews_curves", "parallel_coordinates", "lag_plot"] 10 | 11 | 12 | def scatter_matrix(frame, c=None, s=None, figsize=None, dpi=72.0, **kwds): 13 | """Draw a matrix of scatter plots. 14 | 15 | The result is an interactive pan/zoomable plot, with linked-brushing 16 | enabled by holding the shift key. 17 | 18 | Parameters 19 | ---------- 20 | frame : DataFrame 21 | The dataframe for which to draw the scatter matrix. 22 | c : string (optional) 23 | If specified, the name of the column to be used to determine the 24 | color of each point. 25 | s : string (optional) 26 | If specified, the name of the column to be used to determine the 27 | size of each point, 28 | figsize : tuple (optional) 29 | A length-2 tuple speficying the size of the figure in inches 30 | dpi : float (default=72) 31 | The dots (i.e. pixels) per inch used to convert the figure size from 32 | inches to pixels. 33 | 34 | Returns 35 | ------- 36 | chart: alt.Chart object 37 | The alt.Chart representation of the plot. 38 | 39 | See Also 40 | -------- 41 | pandas.plotting.scatter_matrix : matplotlib version of this routine 42 | """ 43 | if kwds: 44 | warnings.warn( 45 | "Unrecognized keywords in pdvega.scatter_matrix: {0}" 46 | "".format(list(kwds.keys())) 47 | ) 48 | 49 | cols = [ 50 | col 51 | for col in frame.columns 52 | if col not in [c, s] 53 | if infer_vegalite_type(frame[col], ordinal_threshold=0) == "quantitative" 54 | ] 55 | 56 | spec = { 57 | "$schema": "https://vega.github.io/schema/vega-lite/v2.json", 58 | "repeat": {"row": cols, "column": cols[::-1]}, 59 | "spec": { 60 | "mark": "point", 61 | "selection": { 62 | "brush": { 63 | "type": "interval", 64 | "resolve": "union", 65 | "on": "[mousedown[event.shiftKey], window:mouseup] > window:mousemove!", 66 | "translate": "[mousedown[event.shiftKey], window:mouseup] > window:mousemove!", 67 | "zoom": "wheel![event.shiftKey]", 68 | }, 69 | "grid": { 70 | "type": "interval", 71 | "resolve": "global", 72 | "bind": "scales", 73 | "translate": "[mousedown[!event.shiftKey], window:mouseup] > window:mousemove!", 74 | "zoom": "wheel![!event.shiftKey]", 75 | }, 76 | }, 77 | "encoding": { 78 | "x": {"field": {"repeat": "column"}, "type": "quantitative"}, 79 | "y": {"field": {"repeat": "row"}, "type": "quantitative"}, 80 | "color": {"condition": {"selection": "brush"}, "value": "grey"}, 81 | }, 82 | }, 83 | } 84 | 85 | if figsize is not None: 86 | width_inches, height_inches = figsize 87 | spec["spec"]["width"] = 0.8 * dpi * width_inches / len(cols) 88 | spec["spec"]["height"] = 0.8 * dpi * height_inches / len(cols) 89 | 90 | if s is not None: 91 | spec["spec"]["encoding"]["size"] = { 92 | "field": s, "type": infer_vegalite_type(frame[s]) 93 | } 94 | 95 | cond = spec["spec"]["encoding"]["color"]["condition"] 96 | if c is None: 97 | cond["value"] = "steelblue" 98 | else: 99 | cond["field"] = c 100 | cond["type"] = infer_vegalite_type(frame[c]) 101 | 102 | chart = alt.Chart().from_dict(spec) 103 | chart.data = frame 104 | return chart 105 | 106 | 107 | def andrews_curves( 108 | data, class_column, samples=200, alpha=None, width=450, height=300, **kwds 109 | ): 110 | """ 111 | Generates an Andrews curves visualization for visualising clusters of 112 | multivariate data. 113 | 114 | Andrews curves have the functional form: 115 | 116 | f(t) = x_1/sqrt(2) + x_2 sin(t) + x_3 cos(t) + 117 | x_4 sin(2t) + x_5 cos(2t) + ... 118 | 119 | Where x coefficients correspond to the values of each dimension and t is 120 | linearly spaced between -pi and +pi. Each row of frame then corresponds to 121 | a single curve. 122 | 123 | Parameters: 124 | ----------- 125 | data : DataFrame 126 | Data to be plotted, preferably normalized to (0.0, 1.0) 127 | class_column : string 128 | Name of the column containing class names 129 | samples : integer 130 | Number of points to plot in each curve 131 | alpha: float, optional 132 | The transparency of the lines 133 | width : int, optional 134 | the width of the plot in pixels 135 | height : int, optional 136 | the height of the plot in pixels 137 | **kwds: keywords 138 | Additional options 139 | 140 | Returns: 141 | -------- 142 | chart: alt.Chart object 143 | 144 | """ 145 | if kwds: 146 | warnings.warn( 147 | "Unrecognized keywords in pdvega.andrews_curves(): {0}" 148 | "".format(list(kwds.keys())) 149 | ) 150 | 151 | t = np.linspace(-np.pi, np.pi, samples) 152 | vals = data.drop(class_column, axis=1).values.T 153 | 154 | curves = np.outer(vals[0], np.ones_like(t)) 155 | for i in range(1, len(vals)): 156 | ft = ((i + 1) // 2) * t 157 | if i % 2 == 1: 158 | curves += np.outer(vals[i], np.sin(ft)) 159 | else: 160 | curves += np.outer(vals[i], np.cos(ft)) 161 | 162 | df = pd.DataFrame( 163 | { 164 | "t": np.tile(t, curves.shape[0]), 165 | "sample": np.repeat(np.arange(curves.shape[0]), curves.shape[1]), 166 | " ": curves.ravel(), 167 | class_column: np.repeat(data[class_column], samples), 168 | } 169 | ) 170 | 171 | chart = alt.Chart(df).properties(width=width, height=height).mark_line() 172 | chart = chart.encode( 173 | x=alt.X(field="t", type="quantitative"), 174 | y=alt.Y(field=" ", type="quantitative"), 175 | color=alt.Color(field=class_column, type=infer_vegalite_type(df[class_column])), 176 | detail=alt.Detail(field='sample', type="quantitative") 177 | ) 178 | 179 | if alpha is None and df[class_column].nunique() > 20: 180 | alpha = 0.5 181 | 182 | if alpha is not None: 183 | assert 0 <= alpha <= 1 184 | return chart.encode(opacity=alt.value(alpha)) 185 | 186 | return chart 187 | 188 | 189 | def parallel_coordinates( 190 | data, 191 | class_column, 192 | cols=None, 193 | alpha=None, 194 | width=450, 195 | height=300, 196 | interactive=True, 197 | var_name="variable", 198 | value_name="value", 199 | **kwds 200 | ): 201 | """ 202 | Parallel coordinates plotting. 203 | 204 | Parameters 205 | ---------- 206 | frame: DataFrame 207 | class_column: str 208 | Column name containing class names 209 | cols: list, optional 210 | A list of column names to use 211 | alpha: float, optional 212 | The transparency of the lines 213 | interactive : bool, optional 214 | if True (default) then produce an interactive plot 215 | width : int, optional 216 | the width of the plot in pixels 217 | height : int, optional 218 | the height of the plot in pixels 219 | var_name : string, optional 220 | the legend title 221 | value_name : string, optional 222 | the y-axis label 223 | 224 | Returns 225 | ------- 226 | chart: alt.Chart object 227 | The altair representation of the plot. 228 | 229 | See Also 230 | -------- 231 | pandas.plotting.parallel_coordinates : matplotlib version of this routine 232 | """ 233 | if kwds: 234 | warnings.warn( 235 | "Unrecognized keywords in pdvega.scatter_matrix: {0}" 236 | "".format(list(kwds.keys())) 237 | ) 238 | 239 | # Transform the dataframe to be used in Vega-Lite 240 | if cols is not None: 241 | data = data[list(cols) + [class_column]] 242 | cols = data.columns 243 | df = data.reset_index() 244 | index = (set(df.columns) - set(cols)).pop() 245 | assert index in df.columns 246 | df = df.melt([index, class_column], var_name=var_name, value_name=value_name) 247 | 248 | chart = alt.Chart(df).properties(width=width, height=height) 249 | chart = chart.mark_line().encode( 250 | x=alt.X(field=var_name, type=infer_vegalite_type(df[var_name])), 251 | y=alt.Y(field=value_name, type=infer_vegalite_type(df[value_name])), 252 | color=alt.Color(field=class_column, type=infer_vegalite_type(df[class_column])), 253 | detail=alt.Detail(field=index, type=infer_vegalite_type(df[index])) 254 | ) 255 | 256 | if alpha is None and df[class_column].nunique() > 20: 257 | alpha = 0.3 258 | 259 | if alpha is not None: 260 | assert 0 <= alpha <= 1 261 | return chart.encode(opacity=alt.value(alpha)) 262 | return chart 263 | 264 | 265 | def lag_plot(data, lag=1, kind="scatter", **kwds): 266 | """Lag plot for time series. 267 | 268 | Parameters 269 | ---------- 270 | data: pandas.Series 271 | the time series to plot 272 | lag: integer 273 | The lag of the scatter plot, default=1 274 | kind: string 275 | The kind of plot to use (e.g. 'scatter', 'line') 276 | **kwds: 277 | Additional keywords passed to data.vgplot.scatter 278 | 279 | Returns 280 | ------- 281 | chart: alt.Chart object 282 | """ 283 | if lag != int(lag) or int(lag) <= 0: 284 | raise ValueError("lag must be a positive integer") 285 | lag = int(lag) 286 | 287 | values = data.values 288 | y1 = "y(t)" 289 | y2 = "y(t + {0})".format(lag) 290 | lags = pd.DataFrame({y1: values[:-lag].T.ravel(), y2: values[lag:].T.ravel()}) 291 | 292 | if isinstance(data, pd.DataFrame): 293 | lags["variable"] = np.repeat(data.columns, lags.shape[0] / data.shape[1]) 294 | kwds["c"] = "variable" 295 | 296 | return lags.vgplot(kind=kind, x=y1, y=y2, **kwds) 297 | -------------------------------------------------------------------------------- /doc/core.rst: -------------------------------------------------------------------------------- 1 | .. _core-plotting: 2 | 3 | Simple Visualizations with ``data.vgplot`` 4 | ========================================== 5 | 6 | The core interface of ``pdvega`` is the ``vgplot`` attribute that it adds to 7 | Pandas ``DataFrame`` and ``Series`` objects:: 8 | 9 | import pdvega 10 | 11 | .. pdvega-setup:: 12 | 13 | import pdvega 14 | 15 | from vega_datasets import data 16 | iris = data.iris() 17 | 18 | Like the ``plot`` attribute that is built-in to Pandas, there are two ways of 19 | creating plots with ``vgplot``: first, you can call the ``vgplot`` attribute 20 | of a Pandas object directly: 21 | 22 | .. pdvega-plot:: 23 | 24 | from vega_datasets import data 25 | iris = data.iris() 26 | 27 | iris.vgplot(kind='scatter', x='sepalLength', y='petalLength', c='species') 28 | 29 | Equivalently, you can call the specific method associated with each plot type: 30 | 31 | .. pdvega-plot:: 32 | 33 | iris.vgplot.scatter(x='sepalLength', y='petalLength', c='species') 34 | 35 | The benefit of the second approach is that it allows exploration of available 36 | plot types via tab completion, and the individual functions also provide more 37 | detailed documentation of the arguments available for each method. 38 | 39 | The ``vgplot`` interface exposes nine basic plot types; we will show examples 40 | of these below. 41 | 42 | Datasets 43 | -------- 44 | For the examples on this page, we will use a number of datasets made available 45 | in the `vega_datasets`_ package: 46 | 47 | .. pdvega-setup:: 48 | :show: 49 | 50 | iris = data.iris() 51 | stocks = data.stocks(pivoted=True) 52 | cars = data.cars() 53 | 54 | These datasets are stored in the form of pandas dataframes:: 55 | 56 | >>> iris.head() 57 | petalLength petalWidth sepalLength sepalWidth species 58 | 0 1.4 0.2 5.1 3.5 setosa 59 | 1 1.4 0.2 4.9 3.0 setosa 60 | 2 1.3 0.2 4.7 3.2 setosa 61 | 3 1.5 0.2 4.6 3.1 setosa 62 | 4 1.4 0.2 5.0 3.6 setosa 63 | 64 | >>> stocks.head() 65 | symbol AAPL AMZN GOOG IBM MSFT 66 | date 67 | 2000-01-01 25.94 64.56 NaN 100.52 39.81 68 | 2000-02-01 28.66 68.87 NaN 92.11 36.35 69 | 2000-03-01 33.95 67.00 NaN 106.11 43.22 70 | 2000-04-01 31.01 55.19 NaN 99.95 28.37 71 | 2000-05-01 21.00 48.31 NaN 96.31 25.45 72 | 73 | 74 | >>> cars.head() 75 | Acceleration Cylinders Displacement Horsepower Miles_per_Gallon \ 76 | 0 12.0 8 307.0 130.0 18.0 77 | 1 11.5 8 350.0 165.0 15.0 78 | 2 11.0 8 318.0 150.0 18.0 79 | 3 12.0 8 304.0 150.0 16.0 80 | 4 10.5 8 302.0 140.0 17.0 81 | 82 | Name Origin Weight_in_lbs Year 83 | 0 chevrolet chevelle malibu USA 3504 1970-01-01 84 | 1 buick skylark 320 USA 3693 1970-01-01 85 | 2 plymouth satellite USA 3436 1970-01-01 86 | 3 amc rebel sst USA 3433 1970-01-01 87 | 4 ford torino USA 3449 1970-01-01 88 | 89 | .. _vgplot-line: 90 | 91 | Line Plots with ``vgplot.line`` 92 | ------------------------------- 93 | The default plot type for ``vgplot`` is a line plot: 94 | 95 | .. pdvega-plot:: 96 | 97 | stocks.vgplot() 98 | 99 | Unless otherwise specified, the index of the DataFrame or series is used as the 100 | x-axis variable, and a separate line will be created for the y-values in each 101 | column in the dataframe. If you'd like to plot a subset of the columns, you can use 102 | pandas indexing to select the columns you are interested in: 103 | 104 | .. pdvega-plot:: 105 | 106 | stocks[['AAPL', 'AMZN']].vgplot.line() 107 | 108 | Optionally, you can specify the column names to use for the x-axis and y-axis: 109 | 110 | .. pdvega-plot:: 111 | 112 | stocks.vgplot.line(x='AAPL', y='AMZN') 113 | 114 | Line plots can be further customized; see the function documentation for 115 | more information: 116 | 117 | - Series line plot: :meth:`pdvega.SeriesPlotMethods.line` 118 | - DataFrame line plot: :meth:`pdvega.FramePlotMethods.line` 119 | 120 | .. _vgplot-scatter: 121 | 122 | Scatter Plots with ``vgplot.scatter`` 123 | ------------------------------------- 124 | The previous plot might make more sense in the form of a scatter plot. 125 | This can be done with ``vgplot.scatter()``: 126 | 127 | .. pdvega-plot:: 128 | 129 | stocks.vgplot.scatter(x='AAPL', y='AMZN') 130 | 131 | You can also encode the color and size of scatter plots; let's switch to the 132 | cars dataset to see the relationship between some of these variables: 133 | 134 | .. pdvega-plot:: 135 | 136 | cars.vgplot.scatter(x='Horsepower', y='Miles_per_Gallon', 137 | c='Origin', s='Weight_in_lbs', alpha=0.5) 138 | 139 | This is one slight difference from the Pandas plot interface: in Pandas the 140 | ``c`` and ``s`` parameters must be passed as arrays, while here we pass them 141 | as column names. 142 | 143 | Scatter plots can be further customized; see :meth:`pdvega.FramePlotMethods.scatter` 144 | for more information. 145 | 146 | .. _vgplot-area: 147 | 148 | Area Plots with ``vgplot.area`` 149 | ------------------------------- 150 | Area plots are quite similar to line plots, but curves are filled and stacked, 151 | meaning the top curve reflects the sum of all the ones below: 152 | 153 | .. pdvega-plot:: 154 | 155 | stocks[['MSFT', 'AAPL', 'AMZN']].vgplot.area() 156 | 157 | 158 | Area charts can also be unstacked and overlaid, in which case transparency 159 | can be useful: 160 | 161 | .. pdvega-plot:: 162 | 163 | stocks[['MSFT', 'AAPL', 'AMZN']].vgplot.area(stacked=False, alpha=0.4) 164 | 165 | Area plots can be further customized; see the function documentation for 166 | more information: 167 | 168 | - Series area plot: :meth:`pdvega.SeriesPlotMethods.area` 169 | - DataFrame area plot: :meth:`pdvega.FramePlotMethods.area` 170 | 171 | .. _vgplot-bar: 172 | 173 | Bar Charts with ``vgplot.bar`` 174 | ------------------------------ 175 | 176 | Bar charts are supported using ``vgplot.bar()``. Let's create a small dataset 177 | to use for this: 178 | 179 | .. pdvega-setup:: 180 | :show: 181 | 182 | import numpy as np 183 | import pandas as pd 184 | np.random.seed(1234) 185 | 186 | df = pd.DataFrame(np.random.rand(10, 2), columns=['a', 'b']) 187 | 188 | .. pdvega-plot:: 189 | 190 | df.vgplot.bar() 191 | 192 | Multiple bar plots will be layered on top of each other; like with area charts, 193 | they can be stacked using the ``stacked=True`` option: 194 | 195 | .. pdvega-plot:: 196 | 197 | df.vgplot.bar(stacked=True) 198 | 199 | Additionally, horizontal bar plots can be created with ``barh``: 200 | 201 | .. pdvega-plot:: 202 | 203 | df.vgplot.barh(stacked=True) 204 | 205 | Bar charts can be further customized; see the function documentation for 206 | more information: 207 | 208 | - Series bar plots: :meth:`pdvega.SeriesPlotMethods.bar`, :meth:`pdvega.SeriesPlotMethods.barh` 209 | - DataFrame bar plots: :meth:`pdvega.FramePlotMethods.bar`, :meth:`pdvega.FramePlotMethods.barh` 210 | 211 | 212 | .. _vgplot-hist: 213 | 214 | Histograms with ``vgplot.hist`` 215 | ------------------------------- 216 | Histograms can be created with the ``vgplot.hist()`` method. 217 | 218 | Let's create some data to make some distributions: 219 | 220 | .. pdvega-setup:: 221 | :show: 222 | 223 | import pandas as pd 224 | import numpy as np 225 | df = pd.DataFrame({'a': np.random.randn(1000) + 1, 226 | 'b': np.random.randn(1000), 227 | 'c': np.random.randn(1000) - 1}, 228 | columns=['a', 'b', 'c']) 229 | 230 | We'll specify 50 bins, and create a layered histogram with a 50% transparency: 231 | 232 | .. pdvega-plot:: 233 | 234 | df.vgplot.hist(bins=50, alpha=0.5) 235 | 236 | Alternatively, we can stack the histogram, and use ``histtype`` to specify that 237 | we want a filled step chart rather than a bar chart: 238 | 239 | .. pdvega-plot:: 240 | 241 | df.vgplot.hist(histtype='stepfilled', stacked=True, bins=50) 242 | 243 | Histograms can be further customized; see the function documentation for 244 | more information: 245 | 246 | - Series histogram: :meth:`pdvega.SeriesPlotMethods.hist` 247 | - DataFrame histogram: :meth:`pdvega.FramePlotMethods.hist` 248 | 249 | .. _vgplot-kde: 250 | 251 | KDE/Density plots with ``vgplot.kde`` 252 | ------------------------------------- 253 | Similar to a histogram is a kernel density estimation plot (kde) which creates 254 | a smooth curve representing the density of points. This can be created with 255 | the ``vgplot.kde`` method. We'll use the same data we did in the histogram 256 | section: 257 | 258 | .. pdvega-plot:: 259 | 260 | df.vgplot.kde() 261 | 262 | KDE plots can be further customized; see the function documentation for 263 | more information: 264 | 265 | - Series kde plots: :meth:`pdvega.SeriesPlotMethods.kde` 266 | - DataFrame kde plots: :meth:`pdvega.FramePlotMethods.kde` 267 | 268 | 269 | .. _vgplot-pie-chart: 270 | 271 | Pie Charts 272 | ---------- 273 | No. 274 | 275 | .. _vgplot-heatmap: 276 | 277 | Heatmaps 278 | -------- 279 | Pandas plotting has a function to create a hexagonally-binned heatmap of 280 | two-dimensional data. Unfortunately neither Vega nor Vega-Lite currently 281 | support hexagonal binning. But they do support cartesian heatmaps, and this 282 | functionality is included in ``pdvega``: 283 | 284 | .. pdvega-plot:: 285 | 286 | df.vgplot.heatmap(x='a', y='b', gridsize=20) 287 | 288 | Here the ``gridsize`` parameter indicates approximately how many grid points 289 | span the plot. Alternatively, instead of computing the count within each bin, 290 | we can compute the mean of a third column, specified by the ``C`` parameter: 291 | 292 | .. pdvega-plot:: 293 | 294 | df.vgplot.heatmap(x='a', y='b', C='c', gridsize=20) 295 | 296 | 297 | Heatmap plots can be further customized; see :meth:`pdvega.FramePlotMethods.heatmap` 298 | for more information. 299 | 300 | Other Plot Types 301 | ---------------- 302 | The above plots are the basic plot types supported by ``pdvega``; more sophisticated 303 | plots are available in the :mod:`pdvega.plotting` module. 304 | For examples of these, refer to :ref:`statistical-plotting`. 305 | 306 | 307 | 308 | .. _vega_datasets: http://github.com/altair-viz/vega_datasets 309 | -------------------------------------------------------------------------------- /doc/sphinxext/pdvega_ext/pdvegaplot.py: -------------------------------------------------------------------------------- 1 | """ 2 | PdVega Plot Sphinx Extension 3 | ============================ 4 | 5 | This extension provides a means of inserting live-rendered PdVega plots within 6 | sphinx documentation. There are two directives defined: ``pdvega-setup`` and 7 | ``altiar-plot``. ``pdvega-setup`` code is used to set-up various options 8 | prior to running the plot code. For example:: 9 | 10 | .. pdvega-setup:: 11 | 12 | import pdvega 13 | import pandas as pd 14 | data = pd.Series([1, 2, 3, 2, 1, 2, 3]) 15 | 16 | .. pdvega-plot:: 17 | 18 | data.plot.line() 19 | 20 | 21 | In the case of the ``pdvega-plot`` code, the *last statement* of the code-block 22 | should evaluate to a pdvega Axes object. 23 | 24 | Options 25 | ------- 26 | The directives have the following options:: 27 | 28 | .. pdvega-setup:: 29 | :show: # if set, then show the setup code as a code block 30 | 31 | pass 32 | 33 | .. pdvega-plot:: 34 | :hide-code: # if set, then hide the code and only show the plot 35 | :code-below: # if set, then code is below rather than above the figure 36 | :alt: text # Alternate text when plot cannot be rendered 37 | :links: editor source export # specify one or more of these options 38 | 39 | Chart() 40 | 41 | Additionally, this extension introduces a global configuration 42 | ``pdvegaplot_links``, set in your ``conf.py`` which is a dictionary 43 | of links that will appear below plots, unless the ``:links:`` option 44 | again overrides it. It should look something like this:: 45 | 46 | # conf.py 47 | # ... 48 | pdvegaplot_links = {'editor': True, 'source': True, 'export': True} 49 | # ... 50 | 51 | If this configuration is not specified, all are set to True. 52 | """ 53 | 54 | import os 55 | import json 56 | import warnings 57 | 58 | import jinja2 59 | 60 | from docutils import nodes 61 | from docutils.parsers.rst import Directive 62 | from docutils.parsers.rst.directives import flag, unchanged 63 | 64 | from sphinx.locale import _ 65 | 66 | from pdvega import Axes 67 | from .utils import exec_then_eval 68 | 69 | # These default URLs can be changed in conf.py; see setup() below. 70 | VEGA_JS_URL_DEFAULT = "https://cdn.jsdelivr.net/npm/vega" 71 | VEGALITE_JS_URL_DEFAULT = "https://cdn.jsdelivr.net/npm/vega-lite" 72 | VEGAEMBED_JS_URL_DEFAULT = "https://cdn.jsdelivr.net/npm/vega-embed" 73 | 74 | 75 | VGL_TEMPLATE = jinja2.Template(""" 76 |
77 | 86 |
87 | """) 88 | 89 | 90 | class pdvega_plot(nodes.General, nodes.Element): 91 | pass 92 | 93 | 94 | class PdVegaSetupDirective(Directive): 95 | has_content = True 96 | 97 | option_spec = {'show': flag} 98 | 99 | def run(self): 100 | env = self.state.document.settings.env 101 | 102 | targetid = "pdvega-plot-{0}".format(env.new_serialno('pdvega-plot')) 103 | targetnode = nodes.target('', '', ids=[targetid]) 104 | 105 | code = '\n'.join(self.content) 106 | 107 | # Here we cache the code for use in later setup 108 | if not hasattr(env, 'pdvega_plot_setup'): 109 | env.pdvega_plot_setup = [] 110 | env.pdvega_plot_setup.append({ 111 | 'docname': env.docname, 112 | 'lineno': self.lineno, 113 | 'code': code, 114 | 'target': targetnode, 115 | }) 116 | 117 | result = [targetnode] 118 | 119 | if 'show' in self.options: 120 | source_literal = nodes.literal_block(code, code) 121 | source_literal['language'] = 'python' 122 | result.append(source_literal) 123 | 124 | return result 125 | 126 | 127 | def purge_pdvega_plot_setup(app, env, docname): 128 | if not hasattr(env, 'pdvega_plot_setup'): 129 | return 130 | env.pdvega_plot_setup = [item for item in env.pdvega_plot_setup 131 | if item['docname'] != docname] 132 | 133 | 134 | DEFAULT_PDVEGAPLOT_LINKS = {'editor': True, 'source': True, 'export': True} 135 | 136 | 137 | def validate_links(links): 138 | if links.strip().lower() == 'none': 139 | return {} 140 | 141 | links = links.strip().split() 142 | diff = set(links) - set(DEFAULT_PDVEGAPLOT_LINKS.keys()) 143 | if diff: 144 | raise ValueError("Following links are invalid: {0}".format(list(diff))) 145 | return dict((link, link in links) for link in DEFAULT_PDVEGAPLOT_LINKS) 146 | 147 | 148 | class PdVegaPlotDirective(Directive): 149 | 150 | has_content = True 151 | 152 | option_spec = {'hide-code': flag, 153 | 'code-below': flag, 154 | 'alt': unchanged, 155 | 'links': validate_links} 156 | 157 | def run(self): 158 | env = self.state.document.settings.env 159 | app = env.app 160 | 161 | show_code = 'hide-code' not in self.options 162 | code_below = 'code-below' in self.options 163 | 164 | setupcode = '\n'.join(item['code'] 165 | for item in getattr(env, 'pdvega_plot_setup', []) 166 | if item['docname'] == env.docname) 167 | 168 | code = '\n'.join(self.content) 169 | 170 | if show_code: 171 | source_literal = nodes.literal_block(code, code) 172 | source_literal['language'] = 'python' 173 | 174 | #get the name of the source file we are currently processing 175 | rst_source = self.state_machine.document['source'] 176 | rst_dir = os.path.dirname(rst_source) 177 | rst_filename = os.path.basename(rst_source) 178 | 179 | # use the source file name to construct a friendly target_id 180 | serialno = env.new_serialno('pdvega-plot') 181 | rst_base = rst_filename.replace('.', '-') 182 | div_id = "{0}-pdvega-plot-{1}".format(rst_base, serialno) 183 | target_id = "{0}-pdvega-source-{1}".format(rst_base, serialno) 184 | target_node = nodes.target('', '', ids=[target_id]) 185 | 186 | # create the node in which the plot will appear; 187 | # this will be processed by html_visit_pdvega_plot 188 | plot_node = pdvega_plot() 189 | plot_node['target_id'] = target_id 190 | plot_node['div_id'] = div_id 191 | plot_node['code'] = code 192 | plot_node['setupcode'] = setupcode 193 | plot_node['relpath'] = os.path.relpath(rst_dir, env.srcdir) 194 | plot_node['rst_source'] = rst_source 195 | plot_node['rst_lineno'] = self.lineno 196 | plot_node['links'] = self.options.get('links', app.builder.config.pdvegaplot_links) 197 | plot_node['url_root'] = app.config.pdvegaplot_url_root 198 | 199 | if 'alt' in self.options: 200 | plot_node['alt'] = self.options['alt'] 201 | 202 | result = [target_node] 203 | 204 | if code_below: 205 | result += [plot_node] 206 | if show_code: 207 | result += [source_literal] 208 | if not code_below: 209 | result += [plot_node] 210 | 211 | return result 212 | 213 | 214 | def html_visit_pdvega_plot(self, node): 215 | # Execute the setup code, saving the global & local state 216 | 217 | namespace = {} 218 | if node['setupcode']: 219 | exec(node['setupcode'], namespace) 220 | 221 | # Execute the plot code in this context, evaluating the last line 222 | try: 223 | output = exec_then_eval(node['code'], namespace) 224 | except Exception as e: 225 | warnings.warn("pdvega-plot: {0}:{1} Code Execution failed:" 226 | "{2}: {3}".format(node['rst_source'], node['rst_lineno'], 227 | e.__class__.__name__, str(e))) 228 | raise nodes.SkipNode 229 | 230 | if isinstance(output, Axes): 231 | # Last line should be a Vega-Lite chart; get the spec: 232 | spec = output.spec 233 | 234 | # Create the vega-lite spec to embed 235 | # embed_spec = json.dumps({'mode': 'vega-lite', 236 | # 'actions': node['links'], 237 | # 'spec': spec}) 238 | 239 | # Previously we did this, but after github migrated to https only 240 | # it started causing issues for some http clients such as localhost. 241 | #embed_spec = embed_spec.replace('http://', '//') 242 | #embed_spec = embed_spec.replace('https://', '//') 243 | 244 | # Write embed_spec to a *.vl.json file 245 | dest_dir = os.path.join(self.builder.outdir, node['relpath']) 246 | if not os.path.exists(dest_dir): 247 | os.makedirs(dest_dir) 248 | filename = "{0}.vl.json".format(node['div_id']) 249 | # TODO: let this url be configured 250 | url = "{0}{1}.vl.json".format(node['url_root'], node['div_id']) 251 | dest_path = os.path.join(dest_dir, filename) 252 | with open(dest_path, 'w') as f: 253 | json.dump(spec, f) 254 | 255 | # Pass relevant info into the template and append to the output 256 | html = VGL_TEMPLATE.render(div_id=node['div_id'], url=url) 257 | self.body.append(html) 258 | else: 259 | warnings.warn('pdvega-plot: {0}:{1} Malformed block. Last line of ' 260 | 'code block should define a valid pdvega object.' 261 | ''.format(node['rst_source'], node['rst_lineno'])) 262 | raise nodes.SkipNode 263 | 264 | 265 | def generic_visit_pdvega_plot(self, node): 266 | # TODO: generate PNGs and insert them here 267 | if 'alt' in node.attributes: 268 | self.body.append(_('[ graph: %s ]') % node['alt']) 269 | else: 270 | self.body.append(_('[ graph ]')) 271 | raise nodes.SkipNode 272 | 273 | 274 | def builder_inited(app): 275 | app.add_javascript(app.config.pdvegaplot_vega_js_url) 276 | app.add_javascript(app.config.pdvegaplot_vegalite_js_url) 277 | app.add_javascript(app.config.pdvegaplot_vegaembed_js_url) 278 | 279 | 280 | def setup(app): 281 | setup.app = app 282 | setup.config = app.config 283 | setup.confdir = app.confdir 284 | 285 | app.add_config_value('pdvegaplot_links', DEFAULT_PDVEGAPLOT_LINKS, 'env') 286 | 287 | app.add_config_value('pdvegaplot_vega_js_url', VEGA_JS_URL_DEFAULT, 'html') 288 | app.add_config_value('pdvegaplot_vegalite_js_url', VEGALITE_JS_URL_DEFAULT, 'html') 289 | app.add_config_value('pdvegaplot_vegaembed_js_url', VEGAEMBED_JS_URL_DEFAULT, 'html') 290 | 291 | app.add_config_value('pdvegaplot_url_root', '/', 'html') 292 | 293 | app.add_directive('pdvega-plot', PdVegaPlotDirective) 294 | app.add_directive('pdvega-setup', PdVegaSetupDirective) 295 | 296 | app.add_stylesheet('pdvega-plot.css') 297 | 298 | app.add_node(pdvega_plot, 299 | html=(html_visit_pdvega_plot, None), 300 | latex=(generic_visit_pdvega_plot, None), 301 | texinfo=(generic_visit_pdvega_plot, None), 302 | text=(generic_visit_pdvega_plot, None), 303 | man=(generic_visit_pdvega_plot, None)) 304 | 305 | app.connect('env-purge-doc', purge_pdvega_plot_setup) 306 | app.connect('builder-inited', builder_inited) 307 | 308 | return {'version': '0.1'} 309 | -------------------------------------------------------------------------------- /doc/conf.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | # 4 | # pdvega documentation build configuration file, created by 5 | # sphinx-quickstart on Wed Sep 7 12:52:48 2016. 6 | # 7 | # This file is execfile()d with the current directory set to its 8 | # containing dir. 9 | # 10 | # Note that not all possible configuration values are present in this 11 | # autogenerated file. 12 | # 13 | # All configuration values have a default; values that are commented out 14 | # serve to show the default. 15 | 16 | import sys 17 | import os 18 | 19 | # If extensions (or modules to document with autodoc) are in another directory, 20 | # add these directories to sys.path here. If the directory is relative to the 21 | # documentation root, use os.path.abspath to make it absolute, like shown here. 22 | sys.path.insert(0, os.path.abspath('sphinxext')) 23 | 24 | # -- General configuration ------------------------------------------------ 25 | 26 | # If your documentation needs a minimal Sphinx version, state it here. 27 | #needs_sphinx = '1.0' 28 | 29 | # Add any Sphinx extension module names here, as strings. They can be 30 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 31 | # ones. 32 | extensions = [ 33 | 'sphinx.ext.autodoc', 34 | 'sphinx.ext.autosummary', 35 | 'sphinx.ext.doctest', 36 | 'sphinx.ext.coverage', 37 | 'sphinx.ext.githubpages', 38 | 'numpydoc.numpydoc', 39 | 'matplotlib.sphinxext.plot_directive', 40 | 'pdvega_ext.pdvegaplot', 41 | ] 42 | 43 | pdvega_plot_links = {'editor': True, 'source': False, 'export': False} 44 | 45 | autodoc_default_flags = ['members'] 46 | autodoc_member_order = 'groupwise' 47 | 48 | # Add any paths that contain templates here, relative to this directory. 49 | templates_path = ['_templates'] 50 | 51 | # The suffix(es) of source filenames. 52 | # You can specify multiple suffix as a list of string: 53 | # source_suffix = ['.rst', '.md'] 54 | source_suffix = '.rst' 55 | 56 | # The encoding of source files. 57 | #source_encoding = 'utf-8-sig' 58 | 59 | # The master toctree document. 60 | master_doc = 'index' 61 | 62 | # General information about the project. 63 | project = 'pdvega' 64 | copyright = '2018, Jake VanderPlas' 65 | author = 'Jake VanderPlas' 66 | 67 | # The version info for the project you're documenting, acts as replacement for 68 | # |version| and |release|, also used in various other places throughout the 69 | # built documents. 70 | # 71 | # The short X.Y version. 72 | version = '0.2' 73 | # The full version, including alpha/beta/rc tags. 74 | release = '0.2.0.dev0' 75 | 76 | # The language for content autogenerated by Sphinx. Refer to documentation 77 | # for a list of supported languages. 78 | # 79 | # This is also used if you do content translation via gettext catalogs. 80 | # Usually you set "language" from the command line for these cases. 81 | language = None 82 | 83 | # There are two options for replacing |today|: either, you set today to some 84 | # non-false value, then it is used: 85 | #today = '' 86 | # Else, today_fmt is used as the format for a strftime call. 87 | #today_fmt = '%B %d, %Y' 88 | 89 | # List of patterns, relative to source directory, that match files and 90 | # directories to ignore when looking for source files. 91 | # This patterns also effect to html_static_path and html_extra_path 92 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] 93 | 94 | # The reST default role (used for this markup: `text`) to use for all 95 | # documents. 96 | #default_role = None 97 | 98 | # If true, '()' will be appended to :func: etc. cross-reference text. 99 | #add_function_parentheses = True 100 | 101 | # If true, the current module name will be prepended to all description 102 | # unit titles (such as .. function::). 103 | #add_module_names = True 104 | 105 | # If true, sectionauthor and moduleauthor directives will be shown in the 106 | # output. They are ignored by default. 107 | #show_authors = False 108 | 109 | # The name of the Pygments (syntax highlighting) style to use. 110 | pygments_style = 'sphinx' 111 | 112 | # A list of ignored prefixes for module index sorting. 113 | #modindex_common_prefix = [] 114 | 115 | # If true, keep warnings as "system message" paragraphs in the built documents. 116 | #keep_warnings = False 117 | 118 | # If true, `todo` and `todoList` produce output, else they produce nothing. 119 | todo_include_todos = False 120 | 121 | 122 | # -- Options for HTML output ---------------------------------------------- 123 | 124 | # The theme to use for HTML and HTML Help pages. See the documentation for 125 | # a list of builtin themes. 126 | html_theme = 'sphinx_rtd_theme' 127 | 128 | # Theme options are theme-specific and customize the look and feel of a theme 129 | # further. For a list of options available for each theme, see the 130 | # documentation. 131 | #html_theme_options = {} 132 | 133 | # Add any paths that contain custom themes here, relative to this directory. 134 | #html_theme_path = [] 135 | 136 | # The name for this set of Sphinx documents. 137 | # " v documentation" by default. 138 | #html_title = 'pdvega v0.1' 139 | 140 | # A shorter title for the navigation bar. Default is the same as html_title. 141 | html_short_title = 'pdvega' 142 | 143 | # The name of an image file (relative to this directory) to place at the top 144 | # of the sidebar. 145 | #html_logo = None 146 | 147 | # The name of an image file (relative to this directory) to use as a favicon of 148 | # the docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 149 | # pixels large. 150 | html_favicon = '_static/favicon.ico' 151 | 152 | # Add any paths that contain custom static files (such as style sheets) here, 153 | # relative to this directory. They are copied after the builtin static files, 154 | # so a file named "default.css" will overwrite the builtin "default.css". 155 | html_static_path = ['_static', '_images'] 156 | 157 | # adapted from: http://rackerlabs.github.io/docs-rackspace/tools/rtd-tables.html 158 | # and 159 | # https://github.com/rtfd/sphinx_rtd_theme/issues/117 160 | def setup(app): 161 | app.add_stylesheet('theme_overrides.css') 162 | 163 | # Add any extra paths that contain custom files (such as robots.txt or 164 | # .htaccess) here, relative to this directory. These files are copied 165 | # directly to the root of the documentation. 166 | #html_extra_path = [] 167 | 168 | # If not None, a 'Last updated on:' timestamp is inserted at every page 169 | # bottom, using the given strftime format. 170 | # The empty string is equivalent to '%b %d, %Y'. 171 | #html_last_updated_fmt = None 172 | 173 | # If true, SmartyPants will be used to convert quotes and dashes to 174 | # typographically correct entities. 175 | #html_use_smartypants = True 176 | 177 | # Custom sidebar templates, maps document names to template names. 178 | #html_sidebars = {} 179 | 180 | # Additional templates that should be rendered to pages, maps page names to 181 | # template names. 182 | #html_additional_pages = {} 183 | 184 | # If false, no module index is generated. 185 | #html_domain_indices = True 186 | 187 | # If false, no index is generated. 188 | #html_use_index = True 189 | 190 | # If true, the index is split into individual pages for each letter. 191 | #html_split_index = False 192 | 193 | # If true, links to the reST sources are added to the pages. 194 | #html_show_sourcelink = True 195 | 196 | # If true, "Created using Sphinx" is shown in the HTML footer. Default is True. 197 | #html_show_sphinx = True 198 | 199 | # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. 200 | #html_show_copyright = True 201 | 202 | # If true, an OpenSearch description file will be output, and all pages will 203 | # contain a tag referring to it. The value of this option must be the 204 | # base URL from which the finished HTML is served. 205 | #html_use_opensearch = '' 206 | 207 | # This is the file name suffix for HTML files (e.g. ".xhtml"). 208 | #html_file_suffix = None 209 | 210 | # Language to be used for generating the HTML full-text search index. 211 | # Sphinx supports the following languages: 212 | # 'da', 'de', 'en', 'es', 'fi', 'fr', 'h', 'it', 'ja' 213 | # 'nl', 'no', 'pt', 'ro', 'r', 'sv', 'tr', 'zh' 214 | #html_search_language = 'en' 215 | 216 | # A dictionary with options for the search language support, empty by default. 217 | # 'ja' uses this config value. 218 | # 'zh' user can custom change `jieba` dictionary path. 219 | #html_search_options = {'type': 'default'} 220 | 221 | # The name of a javascript file (relative to the configuration directory) that 222 | # implements a search results scorer. If empty, the default will be used. 223 | #html_search_scorer = 'scorer.js' 224 | 225 | # Output file base name for HTML help builder. 226 | htmlhelp_basename = 'pdvegadoc' 227 | 228 | # -- Options for LaTeX output --------------------------------------------- 229 | 230 | latex_elements = { 231 | # The paper size ('letterpaper' or 'a4paper'). 232 | #'papersize': 'letterpaper', 233 | 234 | # The font size ('10pt', '11pt' or '12pt'). 235 | #'pointsize': '10pt', 236 | 237 | # Additional stuff for the LaTeX preamble. 238 | #'preamble': '', 239 | 240 | # Latex figure (float) alignment 241 | #'figure_align': 'htbp', 242 | } 243 | 244 | # Grouping the document tree into LaTeX files. List of tuples 245 | # (source start file, target name, title, 246 | # author, documentclass [howto, manual, or own class]). 247 | latex_documents = [ 248 | (master_doc, 'pdvega.tex', 'pdvega Documentation', 249 | 'Brian Granger and Jake VanderPlas', 'manual'), 250 | ] 251 | 252 | # The name of an image file (relative to this directory) to place at the top of 253 | # the title page. 254 | #latex_logo = None 255 | 256 | # For "manual" documents, if this is true, then toplevel headings are parts, 257 | # not chapters. 258 | #latex_use_parts = False 259 | 260 | # If true, show page references after internal links. 261 | #latex_show_pagerefs = False 262 | 263 | # If true, show URL addresses after external links. 264 | #latex_show_urls = False 265 | 266 | # Documents to append as an appendix to all manuals. 267 | #latex_appendices = [] 268 | 269 | # If false, no module index is generated. 270 | #latex_domain_indices = True 271 | 272 | 273 | # -- Options for manual page output --------------------------------------- 274 | 275 | # One entry per manual page. List of tuples 276 | # (source start file, name, description, authors, manual section). 277 | man_pages = [ 278 | (master_doc, 'pdvega', 'pdvega Documentation', 279 | [author], 1) 280 | ] 281 | 282 | # If true, show URL addresses after external links. 283 | #man_show_urls = False 284 | 285 | 286 | # -- Options for Texinfo output ------------------------------------------- 287 | 288 | # Grouping the document tree into Texinfo files. List of tuples 289 | # (source start file, target name, title, author, 290 | # dir menu entry, description, category) 291 | texinfo_documents = [ 292 | (master_doc, 'pdvega', 'pdvega Documentation', 293 | author, 'pdvega', 'One line description of project.', 294 | 'Miscellaneous'), 295 | ] 296 | 297 | # Documents to append as an appendix to all manuals. 298 | #texinfo_appendices = [] 299 | 300 | # If false, no module index is generated. 301 | #texinfo_domain_indices = True 302 | 303 | # How to display URL addresses: 'footnote', 'no', or 'inline'. 304 | #texinfo_show_urls = 'footnote' 305 | 306 | # If true, do not generate a @detailmenu in the "Top" node's menu. 307 | #texinfo_no_detailmenu = False 308 | 309 | # Hide extra class members 310 | numpydoc_show_class_members = False 311 | 312 | # For the matplotlib plot extension 313 | plot_html_show_source_link = False 314 | plot_html_show_formats = False 315 | 316 | # For the pdvegaplot extension 317 | # pdvegaplot_links = {'editor': True, 'source': True, 'export': True} 318 | pdvegaplot_vega_js_url = "https://cdn.jsdelivr.net/npm/vega@3.0.10" 319 | pdvegaplot_vegalite_js_url = "https://cdn.jsdelivr.net/npm/vega-lite@2.1.2" 320 | pdvegaplot_vegaembed_js_url = "https://cdn.jsdelivr.net/npm/vega-embed@3.0.0" 321 | pdvegaplot_url_root = "//0.0.0.0:8000/" 322 | -------------------------------------------------------------------------------- /pdvega/tests/test_core.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | import pandas as pd 4 | 5 | import altair as alt 6 | 7 | from pdvega.tests import utils 8 | 9 | 10 | def test_line_simple(): 11 | df = pd.DataFrame({"x": [1, 4, 2, 3, 5], "y": [6, 3, 4, 5, 2]}) 12 | 13 | plot = df.vgplot.line() 14 | utils.validate_vegalite(plot) 15 | 16 | assert plot.mark == "line" 17 | 18 | utils.check_encodings(plot, x="index", y="value", 19 | color="variable") 20 | data = plot.data 21 | assert set(pd.unique(data["variable"])) == {"x", "y"} 22 | 23 | 24 | def test_line_xy(): 25 | df = pd.DataFrame({"x": [1, 4, 2, 3, 5], "y": [6, 3, 4, 5, 2], "z": range(5)}) 26 | 27 | plot = df.vgplot.line(x="x", y="y") 28 | utils.validate_vegalite(plot) 29 | assert plot.mark == "line" 30 | 31 | utils.check_encodings(plot, x="x", y="value", 32 | color="variable", order="index") 33 | data = plot.data 34 | assert set(pd.unique(data["variable"])) == {"y"} 35 | 36 | 37 | def test_series_line(): 38 | ser = pd.Series([3, 2, 3, 2, 3]) 39 | plot = ser.vgplot.line() 40 | utils.validate_vegalite(plot) 41 | assert plot.mark == "line" 42 | utils.check_encodings(plot, x="index", y="0") 43 | 44 | 45 | def test_scatter_simple(): 46 | df = pd.DataFrame({"x": [1, 4, 2, 3, 5], "y": [6, 3, 4, 5, 2]}) 47 | 48 | plot = df.vgplot.scatter(x="x", y="y") 49 | utils.validate_vegalite(plot) 50 | assert plot.mark == "point" 51 | utils.check_encodings(plot, x="x", y="y") 52 | 53 | 54 | def test_scatter_color_size(): 55 | df = pd.DataFrame( 56 | {"x": [1, 4, 2, 3, 5], "y": [6, 3, 4, 5, 2], "c": range(5), "s": range(5)} 57 | ) 58 | 59 | plot = df.vgplot.scatter(x="x", y="y", c="c", s="s") 60 | utils.validate_vegalite(plot) 61 | assert plot.mark == "point" 62 | utils.check_encodings(plot, x="x", y="y", color="c", size="s") 63 | 64 | 65 | def test_scatter_common_columns(): 66 | df = pd.DataFrame({"x": [1, 4, 2, 3, 5], "y": [6, 3, 4, 5, 2]}) 67 | 68 | plot = df.vgplot.scatter(x="x", y="y", c="y") 69 | utils.validate_vegalite(plot) 70 | assert plot.mark == "point" 71 | utils.check_encodings(plot, x="x", y="y", color="y") 72 | 73 | 74 | def test_bar_simple(): 75 | df = pd.DataFrame({"x": [1, 4, 2, 3, 5], "y": [6, 3, 4, 5, 2]}) 76 | 77 | plot = df.vgplot.bar() 78 | utils.validate_vegalite(plot) 79 | assert plot.mark == "bar" 80 | utils.check_encodings( 81 | plot, x="index", y="value", color="variable", 82 | opacity=utils.IGNORE 83 | ) 84 | data = plot.data 85 | assert set(pd.unique(data["variable"])) == {"x", "y"} 86 | assert plot["encoding"]["y"]["stack"] is None 87 | 88 | 89 | def test_bar_stacked(): 90 | df = pd.DataFrame({"x": [1, 4, 2, 3, 5], "y": [6, 3, 4, 5, 2]}) 91 | 92 | plot = df.vgplot.bar(stacked=True) 93 | utils.validate_vegalite(plot) 94 | assert plot.mark == "bar" 95 | utils.check_encodings(plot, x="index", y="value", color="variable") 96 | data = plot.data 97 | assert set(pd.unique(data["variable"])) == {"x", "y"} 98 | assert plot["encoding"]["y"]["stack"] == "zero" 99 | 100 | 101 | def test_bar_xy(): 102 | df = pd.DataFrame({"x": [1, 4, 2, 3, 5], "y": [6, 3, 4, 5, 2]}) 103 | 104 | plot = df.vgplot.bar(x="x", y="y") 105 | utils.validate_vegalite(plot) 106 | assert plot.mark == "bar" 107 | utils.check_encodings(plot, x="x", y="value", color="variable") 108 | data = plot.data 109 | assert set(pd.unique(data["variable"])) == {"y"} 110 | assert plot["encoding"]["y"]["stack"] is None 111 | 112 | 113 | def test_bar_xy_stacked(): 114 | df = pd.DataFrame({"x": [1, 4, 2, 3, 5], "y": [6, 3, 4, 5, 2]}) 115 | 116 | plot = df.vgplot.bar(x="x", y="y", stacked=True) 117 | utils.validate_vegalite(plot) 118 | assert plot.mark == "bar" 119 | utils.check_encodings(plot, x="x", y="value", color="variable") 120 | data = plot.data 121 | assert set(pd.unique(data["variable"])) == {"y"} 122 | assert plot["encoding"]["y"]["stack"] == "zero" 123 | 124 | 125 | def test_series_bar(): 126 | ser = pd.Series([4, 5, 4, 5], index=["A", "B", "C", "D"]) 127 | plot = ser.vgplot.bar() 128 | utils.validate_vegalite(plot) 129 | assert plot.mark == "bar" 130 | utils.check_encodings(plot, x="index", y="0") 131 | 132 | 133 | def test_barh_simple(): 134 | df = pd.DataFrame({"x": [1, 4, 2, 3, 5], "y": [6, 3, 4, 5, 2]}) 135 | 136 | plot = df.vgplot.barh() 137 | utils.validate_vegalite(plot) 138 | assert plot.mark == "bar" 139 | utils.check_encodings( 140 | plot, y="index", x="value", color="variable", 141 | opacity=utils.IGNORE 142 | ) 143 | data = plot.data 144 | assert set(pd.unique(data["variable"])) == {"x", "y"} 145 | assert plot["encoding"]["x"]["stack"] is None 146 | 147 | 148 | def test_barh_stacked(): 149 | df = pd.DataFrame({"x": [1, 4, 2, 3, 5], "y": [6, 3, 4, 5, 2]}) 150 | 151 | plot = df.vgplot.barh(stacked=True) 152 | utils.validate_vegalite(plot) 153 | assert plot.mark == "bar" 154 | utils.check_encodings(plot, y="index", x="value", color="variable") 155 | data = plot.data 156 | assert set(pd.unique(data["variable"])) == {"x", "y"} 157 | assert plot["encoding"]["x"]["stack"] == "zero" 158 | 159 | 160 | def test_barh_xy(): 161 | df = pd.DataFrame({"x": [1, 4, 2, 3, 5], "y": [6, 3, 4, 5, 2]}) 162 | 163 | plot = df.vgplot.barh(x="x", y="y") 164 | utils.validate_vegalite(plot) 165 | assert plot.mark == "bar" 166 | utils.check_encodings(plot, x="value", y="x", color="variable") 167 | data = plot.data 168 | assert set(pd.unique(data["variable"])) == {"y"} 169 | assert plot["encoding"]["x"]["stack"] is None 170 | 171 | 172 | def test_barh_xy_stacked(): 173 | df = pd.DataFrame({"x": [1, 4, 2, 3, 5], "y": [6, 3, 4, 5, 2]}) 174 | 175 | plot = df.vgplot.barh(x="x", y="y", stacked=True) 176 | utils.validate_vegalite(plot) 177 | assert plot.mark == "bar" 178 | utils.check_encodings(plot, x="value", y="x", color="variable") 179 | data = plot.data 180 | assert set(pd.unique(data["variable"])) == {"y"} 181 | assert plot["encoding"]["x"]["stack"] == "zero" 182 | 183 | 184 | def test_series_barh(): 185 | ser = pd.Series([4, 5, 4, 5], index=["A", "B", "C", "D"]) 186 | plot = ser.vgplot.barh() 187 | utils.validate_vegalite(plot) 188 | assert plot.mark == "bar" 189 | utils.check_encodings(plot, y="index", x="0") 190 | 191 | 192 | def test_df_area_simple(): 193 | df = pd.DataFrame({"x": [1, 4, 2, 3, 5], "y": [6, 3, 4, 5, 2]}) 194 | 195 | plot = df.vgplot.area() 196 | utils.validate_vegalite(plot) 197 | assert plot.mark == "area" 198 | utils.check_encodings(plot, x="index", y="value", 199 | color="variable") 200 | data = plot.data 201 | assert set(pd.unique(data["variable"])) == {"x", "y"} 202 | assert plot["encoding"]["y"]["stack"] == "zero" 203 | 204 | 205 | def test_df_area_unstacked(): 206 | df = pd.DataFrame({"x": [1, 4, 2, 3, 5], "y": [6, 3, 4, 5, 2]}) 207 | 208 | plot = df.vgplot.area(stacked=False) 209 | utils.validate_vegalite(plot) 210 | assert plot.mark == "area" 211 | utils.check_encodings( 212 | plot, x="index", y="value", color="variable", opacity=utils.IGNORE 213 | ) 214 | data = plot.data 215 | assert set(pd.unique(data["variable"])) == {"x", "y"} 216 | assert plot["encoding"]["y"]["stack"] is None 217 | assert plot["encoding"]["opacity"]["value"] == 0.7 218 | 219 | 220 | def test_df_area_xy(): 221 | df = pd.DataFrame({"x": [1, 4, 2, 3, 5], "y": [6, 3, 4, 5, 2], "z": range(5)}) 222 | 223 | plot = df.vgplot.area(x="x", y="y") 224 | utils.validate_vegalite(plot) 225 | assert plot.mark == "area" 226 | utils.check_encodings(plot, x="x", y="value", color="variable") 227 | data = plot.data 228 | assert set(pd.unique(data["variable"])) == {"y"} 229 | assert plot["encoding"]["y"]["stack"] == "zero" 230 | 231 | 232 | def test_df_area_xy_unstacked(): 233 | df = pd.DataFrame({"x": [1, 4, 2, 3, 5], "y": [6, 3, 4, 5, 2], "z": range(5)}) 234 | 235 | plot = df.vgplot.area(x="x", y="y", stacked=False) 236 | utils.validate_vegalite(plot) 237 | assert plot.mark == "area" 238 | utils.check_encodings(plot, x="x", y="value", color="variable") 239 | data = plot.data 240 | assert set(pd.unique(data["variable"])) == {"y"} 241 | assert plot["encoding"]["y"]["stack"] is None 242 | 243 | 244 | def test_series_area(): 245 | ser = pd.Series([3, 2, 3, 2, 3]) 246 | plot = ser.vgplot.area() 247 | utils.validate_vegalite(plot) 248 | assert plot.mark == "area" 249 | utils.check_encodings(plot, x="index", y="0") 250 | 251 | 252 | @pytest.mark.parametrize("stacked", [True, False]) 253 | @pytest.mark.parametrize("histtype", ["bar", "step", "stepfilled"]) 254 | @pytest.mark.parametrize("maxbins", [3, 5, 10]) 255 | def test_df_hist(stacked, histtype, maxbins): 256 | df = pd.DataFrame({"x": range(10), "y": range(10)}) 257 | 258 | marks = { 259 | "bar": "bar", 260 | "step": {"type": "line", "interpolate": "step"}, 261 | "stepfilled": {"type": "area", "interpolate": "step"}, 262 | } 263 | 264 | # bar histogram 265 | plot = df.vgplot.hist(bins=maxbins, stacked=stacked, histtype=histtype) 266 | assert plot.mark == marks[histtype] 267 | if stacked: 268 | # No default opacity for a stacked histogram 269 | utils.check_encodings(plot, x="value", y=utils.IGNORE, 270 | color="variable") 271 | else: 272 | utils.check_encodings( 273 | plot, x="value", y=utils.IGNORE, color="variable", 274 | opacity=utils.IGNORE 275 | ) 276 | assert plot["encoding"]["x"]["bin"] == {"maxbins": maxbins} 277 | assert plot["encoding"]["y"]["aggregate"] == "count" 278 | assert plot["encoding"]["y"]["stack"] == ("zero" if stacked else None) 279 | 280 | 281 | @pytest.mark.parametrize("histtype", ["bar", "step", "stepfilled"]) 282 | @pytest.mark.parametrize("maxbins", [3, 5, 10]) 283 | def test_series_hist(histtype, maxbins): 284 | ser = pd.Series(range(10)) 285 | 286 | marks = { 287 | "bar": "bar", 288 | "step": {"type": "line", "interpolate": "step"}, 289 | "stepfilled": {"type": "area", "interpolate": "step"}, 290 | } 291 | plot = ser.vgplot.hist(bins=maxbins, histtype=histtype) 292 | assert plot.mark == marks[histtype] 293 | 294 | utils.check_encodings(plot, x="0", y=utils.IGNORE) 295 | assert plot["encoding"]["x"]["bin"] == {"maxbins": maxbins} 296 | assert plot["encoding"]["y"]["aggregate"] == "count" 297 | 298 | 299 | def test_df_hexbin(): 300 | df = pd.DataFrame({"x": range(10), "y": range(10), "C": range(10)}) 301 | gridsize = 10 302 | plot = df.vgplot.hexbin(x="x", y="y", gridsize=gridsize) 303 | assert plot.mark == "rect" 304 | utils.check_encodings(plot, x="x", y="y", color=utils.IGNORE) 305 | assert plot["encoding"]["x"]["bin"] == alt.Bin(maxbins=gridsize) 306 | assert plot["encoding"]["y"]["bin"] == alt.Bin(maxbins=gridsize) 307 | assert plot["encoding"]["color"]["aggregate"] == "count" 308 | 309 | 310 | def test_df_hexbin_C(): 311 | df = pd.DataFrame({"x": range(10), "y": range(10), "C": range(10)}) 312 | gridsize = 10 313 | plot = df.vgplot.hexbin(x="x", y="y", C="C", gridsize=gridsize) 314 | assert plot.mark == "rect" 315 | utils.check_encodings(plot, x="x", y="y", color="C") 316 | assert plot["encoding"]["x"]["bin"] == alt.Bin(maxbins=gridsize) 317 | assert plot["encoding"]["y"]["bin"] == alt.Bin(maxbins=gridsize) 318 | assert plot["encoding"]["color"]["aggregate"] == "mean" 319 | 320 | 321 | def test_df_hexbin_Cfunc(): 322 | df = pd.DataFrame({"x": range(10), "y": range(10), "C": range(10)}) 323 | plot = df.vgplot.hexbin(x="x", y="y", C="C", reduce_C_function=min) 324 | assert plot["encoding"]["color"]["aggregate"] == "min" 325 | utils.check_encodings(plot, x="x", y="y", color="C") 326 | 327 | 328 | def test_df_kde(): 329 | df = pd.DataFrame({"x": range(10), "y": range(10)}) 330 | plot = df.vgplot.kde(bw_method="scott") 331 | assert plot.mark == "line" 332 | utils.check_encodings(plot, x=" ", y="Density", color=utils.IGNORE) 333 | data = plot.data 334 | assert set(pd.unique(data["variable"])) == {"x", "y"} 335 | 336 | 337 | def test_df_kde_y(): 338 | df = pd.DataFrame({"x": range(10), "y": range(10)}) 339 | plot = df.vgplot.kde(y="y", bw_method="scott") 340 | assert plot.mark == "line" 341 | utils.check_encodings(plot, x=" ", y="Density", color=utils.IGNORE) 342 | data = plot.data 343 | assert set(pd.unique(data["variable"])) == {"y"} 344 | 345 | 346 | def test_ser_kde(): 347 | ser = pd.Series(range(10), name="x") 348 | plot = ser.vgplot.kde(bw_method="scott") 349 | assert plot.mark == "line" 350 | utils.check_encodings( 351 | plot, 352 | x=' ', 353 | y='x', 354 | ) 355 | -------------------------------------------------------------------------------- /pdvega/_core.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | import altair as alt 4 | 5 | from ._utils import ( 6 | infer_vegalite_type, 7 | unpivot_frame, 8 | warn_if_keywords_unused, 9 | validate_aggregation, 10 | ) 11 | from ._pandas_internals import ( 12 | PandasObject, 13 | register_dataframe_accessor, 14 | register_series_accessor, 15 | ) 16 | 17 | 18 | def _x(x, df, ordinal_threshold=6, **kwargs): 19 | return alt.X( 20 | field=x, 21 | type=infer_vegalite_type(df[x], ordinal_threshold=ordinal_threshold), 22 | **kwargs 23 | ) 24 | 25 | 26 | def _y(y, df, ordinal_threshold=6, **kwargs): 27 | return alt.Y( 28 | field=y, 29 | type=infer_vegalite_type(df[y], ordinal_threshold=ordinal_threshold), 30 | **kwargs 31 | ) 32 | 33 | 34 | class BasePlotMethods(PandasObject): 35 | 36 | def __init__(self, data): 37 | self._data = data 38 | 39 | def __call__(self, kind, *args, **kwargs): 40 | raise NotImplementedError() 41 | 42 | def _plot(self, data=None, width=450, height=300, title=None, figsize=None, dpi=75): 43 | if data is None: 44 | data = self._data 45 | 46 | if title is None: 47 | title = "" 48 | 49 | if figsize is not None: 50 | width_inches, height_inches = figsize 51 | width = 0.8 * dpi * width_inches 52 | height = 0.8 * dpi * height_inches 53 | 54 | chart = alt.Chart(data=data).properties(width=width, height=height, title=title) 55 | return chart 56 | 57 | 58 | @register_series_accessor("vgplot") 59 | class SeriesPlotMethods(BasePlotMethods): 60 | """Series Accessor & Method for creating Vega-Lite visualizations. 61 | 62 | Examples 63 | -------- 64 | >>> s.vgplot.line() # doctest: +SKIP 65 | >>> s.vgplot.area() # doctest: +SKIP 66 | >>> s.vgplot.bar() # doctest: +SKIP 67 | >>> s.vgplot.barh() # doctest: +SKIP 68 | >>> s.vgplot.hist() # doctest: +SKIP 69 | >>> s.vgplot.kde() # doctest: +SKIP 70 | >>> s.vgplot.density() # doctest: +SKIP 71 | 72 | Plotting methods can also be accessed by calling the accessor as a method 73 | with the ``kind`` argument: ``s.vgplot(kind='line', **kwds)`` 74 | is equivalent to ``s.vgplot.line(**kwds)`` 75 | """ 76 | 77 | def __call__(self, kind="line", **kwargs): 78 | try: 79 | plot_method = getattr(self, kind) 80 | except AttributeError: 81 | raise ValueError( 82 | "kind='{0}' not valid for {1}" "".format(kind, self.__class__.__name__) 83 | ) 84 | return plot_method(**kwargs) 85 | 86 | def line(self, alpha=None, width=450, height=300, ax=None, **kwds): 87 | """Line plot for Series data 88 | 89 | >>> series.vgplot.line() # doctest: +SKIP 90 | 91 | Parameters 92 | ---------- 93 | alpha : float, optional 94 | transparency level, 0 <= alpha <= 1 95 | width : int, optional 96 | the width of the plot in pixels 97 | height : int, optional 98 | the height of the plot in pixels 99 | ax: altair.Chart, optional 100 | chart to be overlayed with this vis (convinience method for `chart1 + chart2`) 101 | 102 | Returns 103 | ------- 104 | chart : altair.Chart 105 | The altair plot representation 106 | """ 107 | df = self._data.reset_index() 108 | df.columns = map(str, df.columns) 109 | x, y = df.columns 110 | 111 | chart = self._plot( 112 | data=df, 113 | width=width, 114 | height=height, 115 | title=kwds.pop("title", ""), 116 | figsize=kwds.pop("figsize", None), 117 | dpi=kwds.pop("dpi", None), 118 | ) 119 | 120 | chart = chart.mark_line().encode(x=_x(x, df), y=_y(y, df)) 121 | 122 | if alpha is not None: 123 | assert 0 <= alpha <= 1 124 | chart = chart.encode(opacity=alt.value(alpha)) 125 | 126 | if ax is not None: 127 | return ax + chart 128 | 129 | warn_if_keywords_unused("line", kwds) 130 | return chart 131 | 132 | def area(self, alpha=None, width=450, height=300, ax=None, **kwds): 133 | """Area plot for Series data 134 | 135 | >>> series.vgplot.area() # doctest: +SKIP 136 | 137 | Parameters 138 | ---------- 139 | alpha : float, optional 140 | transparency level, 0 <= alpha <= 1 141 | width : int, optional 142 | the width of the plot in pixels 143 | height : int, optional 144 | the height of the plot in pixels 145 | ax: altair.Chart, optional 146 | chart to be overlayed with this vis (convinience method for `chart1 + chart2`) 147 | 148 | Returns 149 | ------- 150 | chart : alt.Chart 151 | altair chart representation 152 | """ 153 | df = self._data.reset_index() 154 | df.columns = map(str, df.columns) 155 | x, y = df.columns 156 | 157 | chart = self._plot( 158 | data=df, 159 | width=width, 160 | height=height, 161 | title=kwds.pop("title", ""), 162 | figsize=kwds.pop("figsize", None), 163 | dpi=kwds.pop("dpi", None), 164 | ).mark_area().encode( 165 | x=_x(x, df), y=_y(y, df) 166 | ) 167 | 168 | if alpha is not None: 169 | assert 0 <= alpha <= 1 170 | chart = chart.encode(opacity=alt.value(alpha)) 171 | 172 | if ax is not None: 173 | return ax + chart 174 | 175 | warn_if_keywords_unused("area", kwds) 176 | return chart 177 | 178 | def bar(self, alpha=None, width=450, height=300, ax=None, **kwds): 179 | """Bar plot for Series data 180 | 181 | >>> series.vgplot.bar() # doctest: +SKIP 182 | 183 | Parameters 184 | ---------- 185 | alpha : float, optional 186 | transparency level, 0 <= alpha <= 1 187 | width : int, optional 188 | the width of the plot in pixels 189 | height : int, optional 190 | the height of the plot in pixels 191 | ax: altair.Chart, optional 192 | chart to be overlayed with this vis (convinience method for `chart1 + chart2`) 193 | 194 | Returns 195 | ------- 196 | chart : alt.Chart 197 | altair chart representation 198 | """ 199 | 200 | df = self._data.reset_index() 201 | df.columns = map(str, df.columns) 202 | x, y = df.columns 203 | 204 | chart = self._plot( 205 | data=df, 206 | width=width, 207 | height=height, 208 | title=kwds.pop("title", ""), 209 | figsize=kwds.pop("figsize", None), 210 | dpi=kwds.pop("dpi", None), 211 | ).mark_bar().encode( 212 | x=_x(x, df), y=_y(y, df) 213 | ) 214 | 215 | if alpha is not None: 216 | assert 0 <= alpha <= 1 217 | chart = chart.encode(opacity=alt.value(alpha)) 218 | 219 | if ax is not None: 220 | return ax + chart 221 | 222 | warn_if_keywords_unused("bar", kwds) 223 | return chart 224 | 225 | def barh(self, alpha=None, width=450, height=300, ax=None, **kwds): 226 | """Horizontal bar plot for Series data 227 | 228 | >>> series.vgplot.barh() # doctest: +SKIP 229 | 230 | Parameters 231 | ---------- 232 | alpha : float, optional 233 | transparency level, 0 <= alpha <= 1 234 | width : int, optional 235 | the width of the plot in pixels 236 | height : int, optional 237 | the height of the plot in pixels 238 | ax: altair.Chart, optional 239 | chart to be overlayed with this vis (convinience method for `chart1 + chart2`) 240 | 241 | Returns 242 | ------- 243 | chart : alt.Chart 244 | altair chart representation 245 | """ 246 | chart = self.bar(alpha=alpha, width=width, height=height, **kwds) 247 | 248 | enc = chart.encoding 249 | enc["x"], enc["y"] = enc["y"], enc["x"] 250 | 251 | if ax is not None: 252 | return ax + chart 253 | return chart 254 | 255 | def hist( 256 | self, 257 | bins=10, 258 | alpha=None, 259 | histtype="bar", 260 | width=450, 261 | height=300, 262 | ax=None, 263 | **kwds 264 | ): 265 | """Histogram plot for Series data 266 | 267 | >>> series.vgplot.hist() # doctest: +SKIP 268 | 269 | Parameters 270 | ---------- 271 | bins : integer, optional 272 | the maximum number of bins to use for the histogram (default: 10) 273 | alpha : float, optional 274 | transparency level, 0 <= alpha <= 1 275 | histtype : string, {'bar', 'step', 'stepfilled'} 276 | The type of histogram to generate. Default is 'bar'. 277 | width : int, optional 278 | the width of the plot in pixels 279 | height : int, optional 280 | the height of the plot in pixels 281 | ax: altair.Chart, optional 282 | chart to be overlayed with this vis (convinience method for `chart1 + chart2`) 283 | 284 | Returns 285 | ------- 286 | chart : alt.Chart 287 | altair chart representation 288 | """ 289 | df = self._data.to_frame().reset_index(drop=False) 290 | df.columns = df.columns.astype(str) 291 | y, x = df.columns 292 | 293 | marks = { 294 | "bar": "bar", 295 | "barstacked": "bar", 296 | "stepfilled": {"type": "area", "interpolate": "step"}, 297 | "step": {"type": "line", "interpolate": "step"}, 298 | } 299 | 300 | if histtype in marks: 301 | mark = marks[histtype] 302 | else: 303 | raise ValueError("histtype '{0}' is not recognized" "".format(histtype)) 304 | 305 | chart = self._plot( 306 | data=df, 307 | width=width, 308 | height=height, 309 | title=kwds.pop("title", ""), 310 | figsize=kwds.pop("figsize", None), 311 | dpi=kwds.pop("dpi", None), 312 | ) 313 | 314 | chart.mark = mark 315 | chart = chart.encode( 316 | x=_x(x, df, bin={"maxbins": bins}), 317 | y=_y(y, df, aggregate="count") 318 | ) 319 | 320 | if alpha is not None: 321 | assert 0 <= alpha <= 1 322 | chart = chart.encode(opacity=alt.value(alpha)) 323 | 324 | if ax is not None: 325 | return ax + chart 326 | 327 | warn_if_keywords_unused("hist", kwds) 328 | return chart 329 | 330 | def kde(self, bw_method=None, alpha=None, width=450, height=300, ax=None, **kwds): 331 | """Kernel Density Estimation plot for Series data 332 | 333 | >>> series.vgplot.kde() # doctest: +SKIP 334 | 335 | Parameters 336 | ---------- 337 | bw_method : str, scalar or callable, optional 338 | The method used to calculate the estimator bandwidth. This can be 339 | 'scott', 'silverman', a scalar constant or a callable. 340 | See `scipy.stats.gaussian_kde` for more details. 341 | alpha : float, optional 342 | transparency level, 0 <= alpha <= 1 343 | width : int, optional 344 | the width of the plot in pixels 345 | height : int, optional 346 | the height of the plot in pixels 347 | ax: altair.Chart, optional 348 | chart to be overlayed with this vis (convinience method for `chart1 + chart2`) 349 | 350 | Returns 351 | ------- 352 | chart : alt.Chart 353 | altair chart representation 354 | """ 355 | from scipy.stats import gaussian_kde 356 | 357 | data = self._data 358 | tmin, tmax = data.min(), data.max() 359 | trange = tmax - tmin 360 | t = np.linspace(tmin - 0.5 * trange, tmax + 0.5 * trange, 1000) 361 | 362 | kde_ser = pd.Series( 363 | gaussian_kde(data, bw_method=bw_method).evaluate(t), index=t, name=data.name 364 | ) 365 | 366 | kde_ser.index.name = " " 367 | f = self.__class__(kde_ser) 368 | return f.line(alpha=alpha, width=width, height=height, ax=ax, **kwds) 369 | 370 | density = kde 371 | 372 | 373 | @register_dataframe_accessor("vgplot") 374 | class FramePlotMethods(BasePlotMethods): 375 | """DataFrame Accessor & Method for creating Vega-Lite visualizations. 376 | 377 | Examples 378 | -------- 379 | >>> df.vgplot.line() # doctest: +SKIP 380 | >>> df.vgplot.area() # doctest: +SKIP 381 | >>> df.vgplot.bar() # doctest: +SKIP 382 | >>> df.vgplot.barh() # doctest: +SKIP 383 | >>> df.vgplot.hist() # doctest: +SKIP 384 | >>> df.vgplot.kde() # doctest: +SKIP 385 | >>> df.vgplot.density() # doctest: +SKIP 386 | >>> df.vgplot.scatter(x, y) # doctest: +SKIP 387 | >>> df.vgplot.hexbin(x, y) # doctest: +SKIP 388 | 389 | Plotting methods can also be accessed by calling the accessor as a method 390 | with the ``kind`` argument: ``df.vgplot(kind='line', **kwds)`` 391 | is equivalent to ``df.vgplot.line(**kwds)`` 392 | """ 393 | 394 | def __call__(self, x=None, y=None, kind="line", **kwargs): 395 | try: 396 | plot_method = getattr(self, kind) 397 | except AttributeError: 398 | raise ValueError( 399 | "kind='{0}' not valid for {1}" "".format(kind, self.__class__.__name__) 400 | ) 401 | return plot_method(x=x, y=y, **kwargs) 402 | 403 | def line( 404 | self, 405 | x=None, 406 | y=None, 407 | alpha=None, 408 | var_name="variable", 409 | value_name="value", 410 | width=450, 411 | height=300, 412 | ax=None, 413 | **kwds 414 | ): 415 | """Line plot for DataFrame data 416 | 417 | >>> dataframe.vgplot.line() # doctest: +SKIP 418 | 419 | Parameters 420 | ---------- 421 | x : string, optional 422 | the column to use as the x-axis variable. If not specified, the 423 | index will be used. 424 | y : string, optional 425 | the column to use as the y-axis variable. If not specified, all 426 | columns (except x if specified) will be used. 427 | alpha : float, optional 428 | transparency level, 0 <= alpha <= 1 429 | var_name : string, optional 430 | the legend title 431 | value_name : string, optional 432 | the y-axis label 433 | width : int, optional 434 | the width of the plot in pixels 435 | height : int, optional 436 | the height of the plot in pixels 437 | ax: altair.Chart, optional 438 | chart to be overlayed with this vis (convinience method for `chart1 + chart2`) 439 | 440 | Returns 441 | ------- 442 | chart : alt.Chart 443 | altair chart representation 444 | """ 445 | use_order = (x is not None) 446 | 447 | if use_order: 448 | df = self._data.reset_index() 449 | order = df.columns[0] 450 | df = unpivot_frame( 451 | df, x=(x, order), y=y, var_name=var_name, value_name=value_name 452 | ) 453 | else: 454 | df = unpivot_frame( 455 | self._data, x=x, y=y, var_name=var_name, value_name=value_name 456 | ) 457 | x = df.columns[0] 458 | 459 | chart = self._plot( 460 | data=df, 461 | width=width, 462 | height=height, 463 | title=kwds.pop("title", ""), 464 | figsize=kwds.pop("figsize", None), 465 | dpi=kwds.pop("dpi", None), 466 | ).mark_line().encode( 467 | x=_x(x, df), y=_y(value_name, df), color=alt.Color(var_name, type="nominal") 468 | ) 469 | 470 | if alpha is not None: 471 | assert 0 <= alpha <= 1 472 | chart = chart.encode(opacity=alt.value(alpha)) 473 | 474 | if use_order: 475 | chart.encoding["order"] = { 476 | "field": order, "type": infer_vegalite_type(df[order]) 477 | } 478 | 479 | if ax is not None: 480 | return ax + chart 481 | 482 | warn_if_keywords_unused("line", kwds) 483 | return chart 484 | 485 | def scatter( 486 | self, x, y, c=None, s=None, alpha=None, width=450, height=300, ax=None, **kwds 487 | ): 488 | """Scatter plot for DataFrame data 489 | 490 | >>> dataframe.vgplot.scatter(x, y) # doctest: +SKIP 491 | 492 | Parameters 493 | ---------- 494 | x : string 495 | the column to use as the x-axis variable. 496 | y : string 497 | the column to use as the y-axis variable. 498 | c : string, optional 499 | the column to use to encode the color of the points 500 | s : string, optional 501 | the column to use to encode the size of the points 502 | alpha : float, optional 503 | transparency level, 0 <= alpha <= 1 504 | width : int, optional 505 | the width of the plot in pixels 506 | height : int, optional 507 | the height of the plot in pixels 508 | ax: altair.Chart, optional 509 | chart to be overlayed with this vis (convinience method for `chart1 + chart2`) 510 | 511 | Returns 512 | ------- 513 | chart : alt.Chart 514 | altair chart representation 515 | """ 516 | df = self._data 517 | 518 | chart = self._plot( 519 | width=width, 520 | height=height, 521 | title=kwds.pop("title", ""), 522 | figsize=kwds.pop("figsize", None), 523 | dpi=kwds.pop("dpi", None), 524 | ).mark_point().encode( 525 | x=_x(x, df, ordinal_threshold=0), y=_y(y, df, ordinal_threshold=0) 526 | ) 527 | 528 | if alpha is not None: 529 | assert 0 <= alpha <= 1 530 | chart = chart.encode(opacity=alt.value(alpha)) 531 | 532 | if c is not None: 533 | chart.encoding["color"] = {"field": c, "type": infer_vegalite_type(df[c])} 534 | 535 | if s is not None: 536 | chart.encoding["size"] = {"field": s, "type": infer_vegalite_type(df[s])} 537 | 538 | if ax is not None: 539 | return ax + chart 540 | 541 | warn_if_keywords_unused("scatter", kwds) 542 | return chart 543 | 544 | def area( 545 | self, 546 | x=None, 547 | y=None, 548 | stacked=True, 549 | alpha=None, 550 | var_name="variable", 551 | value_name="value", 552 | width=450, 553 | height=300, 554 | ax=None, 555 | **kwds 556 | ): 557 | """Area plot for DataFrame data 558 | 559 | >>> dataframe.vgplot.area() # doctest: +SKIP 560 | 561 | Parameters 562 | ---------- 563 | x : string, optional 564 | the column to use as the x-axis variable. If not specified, the 565 | index will be used. 566 | y : string, optional 567 | the column to use as the y-axis variable. If not specified, all 568 | columns (except x if specified) will be used. 569 | stacked : bool, optional 570 | if True (default) then create a stacked area chart. Otherwise, 571 | areas will overlap 572 | alpha : float, optional 573 | transparency level, 0 <= alpha <= 1 574 | var_name : string, optional 575 | the legend title 576 | value_name : string, optional 577 | the y-axis label 578 | width : int, optional 579 | the width of the plot in pixels 580 | height : int, optional 581 | the height of the plot in pixels 582 | ax: altair.Chart, optional 583 | chart to be overlayed with this vis (convinience method for `chart1 + chart2`) 584 | 585 | Returns 586 | ------- 587 | chart : alt.Chart 588 | altair chart representation 589 | """ 590 | df = unpivot_frame( 591 | self._data, x=x, y=y, var_name=var_name, value_name=value_name 592 | ) 593 | 594 | x = df.columns[0] 595 | 596 | if alpha is None and not stacked and df[var_name].nunique() > 1: 597 | alpha = 0.7 598 | 599 | chart = self._plot( 600 | data=df, 601 | width=width, 602 | height=height, 603 | title=kwds.pop("title", ""), 604 | figsize=kwds.pop("figsize", None), 605 | dpi=kwds.pop("dpi", None), 606 | ).mark_area().encode( 607 | x=_x(x, df), 608 | y=alt.Y( 609 | value_name, 610 | type=infer_vegalite_type(df[value_name]), 611 | stack=(None, "zero")[stacked], 612 | ), 613 | color=alt.Color(field=var_name, type=infer_vegalite_type(df[var_name])), 614 | ) 615 | 616 | if alpha is not None: 617 | assert 0 <= alpha <= 1 618 | chart = chart.encode(opacity=alt.value(alpha)) 619 | 620 | if ax is not None: 621 | return ax + chart 622 | 623 | warn_if_keywords_unused("area", kwds) 624 | return chart 625 | 626 | def bar( 627 | self, 628 | x=None, 629 | y=None, 630 | stacked=False, 631 | alpha=None, 632 | var_name="variable", 633 | value_name="value", 634 | width=450, 635 | height=300, 636 | ax=None, 637 | **kwds 638 | ): 639 | """Bar plot for DataFrame data 640 | 641 | >>> dataframe.vgplot.bar() # doctest: +SKIP 642 | 643 | Parameters 644 | ---------- 645 | x : string, optional 646 | the column to use as the x-axis variable. If not specified, the 647 | index will be used. 648 | y : string, optional 649 | the column to use as the y-axis variable. If not specified, all 650 | columns (except x if specified) will be used. 651 | stacked : bool, optional 652 | if True (default) then create a stacked area chart. Otherwise, 653 | areas will overlap 654 | alpha : float, optional 655 | transparency level, 0 <= alpha <= 1 656 | var_name : string, optional 657 | the legend title 658 | value_name : string, optional 659 | the y-axis label 660 | width : int, optional 661 | the width of the plot in pixels 662 | height : int, optional 663 | the height of the plot in pixels 664 | ax: altair.Chart, optional 665 | chart to be overlayed with this vis (convinience method for `chart1 + chart2`) 666 | 667 | Returns 668 | ------- 669 | chart : alt.Chart 670 | altair chart representation 671 | """ 672 | df = unpivot_frame( 673 | self._data, x=x, y=y, var_name=var_name, value_name=value_name 674 | ) 675 | x = df.columns[0] 676 | 677 | if alpha is None and not stacked and df[var_name].nunique() > 1: 678 | alpha = 0.7 679 | 680 | chart = self._plot( 681 | data=df, 682 | width=width, 683 | height=height, 684 | title=kwds.pop("title", ""), 685 | figsize=kwds.pop("figsize", None), 686 | dpi=kwds.pop("dpi", None), 687 | ).mark_bar().encode( 688 | x=alt.X(x, type=infer_vegalite_type(df[x], ordinal_threshold=50)), 689 | y=alt.Y( 690 | "value", 691 | type=infer_vegalite_type(df["value"]), 692 | stack=(None, "zero")[stacked], 693 | ), 694 | color=alt.Color(field="variable", type=infer_vegalite_type(df["variable"])), 695 | ) 696 | 697 | if alpha is not None: 698 | assert 0 <= alpha <= 1 699 | chart = chart.encode(opacity=alt.value(alpha)) 700 | 701 | if ax is not None: 702 | return ax + chart 703 | 704 | warn_if_keywords_unused("bar", kwds) 705 | return chart 706 | 707 | def barh( 708 | self, 709 | x=None, 710 | y=None, 711 | stacked=False, 712 | alpha=None, 713 | var_name="variable", 714 | value_name="value", 715 | width=450, 716 | height=300, 717 | ax=None, 718 | **kwds 719 | ): 720 | """Horizontal bar plot for DataFrame data 721 | 722 | >>> dataframe.vgplot.barh() # doctest: +SKIP 723 | 724 | Parameters 725 | ---------- 726 | x : string, optional 727 | the column to use as the x-axis variable. If not specified, the 728 | index will be used. 729 | y : string, optional 730 | the column to use as the y-axis variable. If not specified, all 731 | columns (except x if specified) will be used. 732 | stacked : bool, optional 733 | if True (default) then create a stacked area chart. Otherwise, 734 | areas will overlap 735 | alpha : float, optional 736 | transparency level, 0 <= alpha <= 1 737 | var_name : string, optional 738 | the legend title 739 | value_name : string, optional 740 | the y-axis label 741 | width : int, optional 742 | the width of the plot in pixels 743 | height : int, optional 744 | the height of the plot in pixels 745 | ax: altair.Chart, optional 746 | chart to be overlayed with this vis (convinience method for `chart1 + chart2`) 747 | 748 | Returns 749 | ------- 750 | chart : alt.Chart 751 | altair chart representation 752 | """ 753 | chart = self.bar( 754 | x=x, 755 | y=y, 756 | stacked=stacked, 757 | alpha=alpha, 758 | var_name=var_name, 759 | value_name=value_name, 760 | width=width, 761 | height=height, 762 | **kwds 763 | ) 764 | 765 | enc = chart.encoding 766 | enc["x"], enc["y"] = enc["y"], enc["x"] 767 | if ax is not None: 768 | return ax + chart 769 | return chart 770 | 771 | def hist( 772 | self, 773 | x=None, 774 | y=None, 775 | by=None, 776 | bins=10, 777 | stacked=False, 778 | alpha=None, 779 | histtype="bar", 780 | var_name="variable", 781 | value_name="value", 782 | width=450, 783 | height=300, 784 | ax=None, 785 | **kwds 786 | ): 787 | """Histogram plot for DataFrame data 788 | 789 | >>> dataframe.vgplot.hist() # doctest: +SKIP 790 | 791 | Parameters 792 | ---------- 793 | x : string, optional 794 | the column to use as the x-axis variable. If not specified, the 795 | index will be used. 796 | y : string, optional 797 | the column to use as the y-axis variable. If not specified, all 798 | columns (except x if specified) will be used. 799 | by : string, optional 800 | the column by which to group the results 801 | bins : integer, optional 802 | the maximum number of bins to use for the histogram (default: 10) 803 | stacked : bool, optional 804 | if True (default) then create a stacked area chart. Otherwise, 805 | areas will overlap 806 | alpha : float, optional 807 | transparency level, 0 <= alpha <= 1 808 | histtype : string, {'bar', 'step', 'stepfilled'} 809 | The type of histogram to generate. Default is 'bar'. 810 | var_name : string, optional 811 | the legend title 812 | value_name : string, optional 813 | the y-axis label 814 | width : int, optional 815 | the width of the plot in pixels 816 | height : int, optional 817 | the height of the plot in pixels 818 | ax: altair.Chart, optional 819 | chart to be overlayed with this vis (convinience method for `chart1 + chart2`) 820 | 821 | Returns 822 | ------- 823 | chart : alt.Chart 824 | altair chart representation 825 | """ 826 | if by is not None: 827 | raise NotImplementedError("vgplot.hist `by` keyword") 828 | if x is not None or y is not None: 829 | raise NotImplementedError('"x" and "y" args to hist()') 830 | df = self._data.melt(var_name=var_name, value_name=value_name) 831 | 832 | marks = { 833 | "bar": "bar", 834 | "barstacked": "bar", 835 | "stepfilled": {"type": "area", "interpolate": "step"}, 836 | "step": {"type": "line", "interpolate": "step"}, 837 | } 838 | 839 | if histtype in marks: 840 | mark = marks[histtype] 841 | else: 842 | raise ValueError("histtype '{0}' is not recognized" "".format(histtype)) 843 | 844 | if alpha is None and not stacked and df[var_name].nunique() > 1: 845 | alpha = 0.7 846 | 847 | chart = self._plot( 848 | data=df, 849 | width=width, 850 | height=height, 851 | title=kwds.pop("title", ""), 852 | figsize=kwds.pop("figsize", None), 853 | dpi=kwds.pop("dpi", None), 854 | ) 855 | 856 | chart.mark = mark 857 | chart = chart.encode( 858 | x=alt.X(value_name, bin={"maxbins": bins}, type="quantitative"), 859 | y=alt.Y( 860 | aggregate="count", 861 | type="quantitative", 862 | stack=("zero" if stacked else None), 863 | ), 864 | color=alt.Color(field=var_name, type="nominal"), 865 | ) 866 | 867 | if alpha is not None: 868 | assert 0 <= alpha <= 1 869 | chart = chart.encode(opacity=alt.value(alpha)) 870 | 871 | if ax is not None: 872 | return ax + chart 873 | 874 | warn_if_keywords_unused("hist", kwds) 875 | return chart 876 | 877 | def heatmap( 878 | self, 879 | x, 880 | y, 881 | C=None, 882 | reduce_C_function="mean", 883 | gridsize=100, 884 | alpha=None, 885 | width=450, 886 | height=300, 887 | ax=None, 888 | **kwds 889 | ): 890 | """Heatmap plot for DataFrame data 891 | 892 | Note that Vega-Lite does not support hexagonal binning, so this method 893 | returns a cartesian heatmap. 894 | 895 | >>> dataframe.vgplot.hexbin() # doctest: +SKIP 896 | 897 | Parameters 898 | ---------- 899 | x : string 900 | the column to use as the x-axis variable. 901 | y : string 902 | the column to use as the y-axis variable. 903 | C : string, optional 904 | the column to use to compute the mean within each bin. If not 905 | specified, the count within each bin will be used. 906 | reduce_C_function : string, default = 'mean' 907 | One of ['mean', 'sum', 'median', 'min', 'max', 'count'], or 908 | associated numpy or python builtin functions. Note that arbitrary 909 | callable functions are not supported. 910 | gridsize : int, optional 911 | the number of divisions in the x and y axis (default=100) 912 | alpha : float, optional 913 | transparency level, 0 <= alpha <= 1 914 | width : int, optional 915 | the width of the plot in pixels 916 | height : int, optional 917 | the height of the plot in pixels 918 | ax: altair.Chart, optional 919 | chart to be overlayed with this vis (convinience method for `chart1 + chart2`) 920 | 921 | Returns 922 | ------- 923 | chart : alt.Chart 924 | altair chart representation 925 | """ 926 | # TODO: Use actual hexbins rather than a grid heatmap 927 | reduce_C_function = validate_aggregation(reduce_C_function) 928 | 929 | if C is None: 930 | df = self._data[[x, y]] 931 | else: 932 | df = self._data[[x, y, C]] 933 | 934 | if C is None: 935 | color = alt.Color(aggregate="count", type="quantitative") 936 | else: 937 | color = alt.Color(field=C, aggregate=reduce_C_function, type="quantitative") 938 | 939 | color.scale = alt.Scale(scheme="greens") 940 | 941 | chart = self._plot( 942 | data=df, 943 | width=width, 944 | height=height, 945 | title=kwds.pop("title", ""), 946 | figsize=kwds.pop("figsize", None), 947 | dpi=kwds.pop("dpi", None), 948 | ).mark_rect().encode( 949 | x=alt.X(x, bin=alt.Bin(maxbins=gridsize), type="quantitative"), 950 | y=alt.Y(y, bin=alt.Bin(maxbins=gridsize), type="quantitative"), 951 | color=color, 952 | ) 953 | 954 | if alpha is not None: 955 | assert 0 <= alpha <= 1 956 | chart = chart.encode(opacity=alt.value(alpha)) 957 | 958 | if ax is not None: 959 | return ax + chart 960 | 961 | warn_if_keywords_unused("hexbin", kwds) 962 | return chart 963 | 964 | hexbin = heatmap 965 | 966 | def kde( 967 | self, 968 | x=None, 969 | y=None, 970 | bw_method=None, 971 | alpha=None, 972 | width=450, 973 | height=300, 974 | ax=None, 975 | **kwds 976 | ): 977 | """Kernel Density Estimate plot for DataFrame data 978 | 979 | >>> dataframe.vgplot.kde() # doctest: +SKIP 980 | 981 | Parameters 982 | ---------- 983 | x : string, optional 984 | the column to use as the x-axis variable. If not specified, the 985 | index will be used. 986 | y : string, optional 987 | the column to use as the y-axis variable. If not specified, all 988 | columns (except x if specified) will be used. 989 | bw_method : str, scalar or callable, optional 990 | The method used to calculate the estimator bandwidth. This can be 991 | 'scott', 'silverman', a scalar constant or a callable. 992 | See `scipy.stats.gaussian_kde` for more details. 993 | alpha : float, optional 994 | transparency level, 0 <= alpha <= 1 995 | width : int, optional 996 | the width of the plot in pixels 997 | height : int, optional 998 | the height of the plot in pixels 999 | ax: altair.Chart, optional 1000 | chart to be overlayed with this vis (convinience method for `chart1 + chart2`) 1001 | 1002 | Returns 1003 | ------- 1004 | chart : alt.Chart 1005 | altair chart representation 1006 | """ 1007 | from scipy.stats import gaussian_kde as kde 1008 | 1009 | if x is not None: # ?? 1010 | raise NotImplementedError('"x" argument to df.vgplot.kde()') 1011 | 1012 | if y is not None: 1013 | df = self._data[y].to_frame() 1014 | else: 1015 | df = self._data 1016 | 1017 | tmin, tmax = df.min().min(), df.max().max() 1018 | trange = tmax - tmin 1019 | t = np.linspace(tmin - 0.5 * trange, tmax + 0.5 * trange, 1000) 1020 | 1021 | kde_df = pd.DataFrame( 1022 | {col: kde(df[col], bw_method=bw_method).evaluate(t) for col in df}, index=t 1023 | ) 1024 | kde_df.index.name = " " 1025 | 1026 | f = FramePlotMethods(kde_df) 1027 | return f.line( 1028 | value_name="Density", alpha=alpha, width=width, height=height, ax=ax, **kwds 1029 | ) 1030 | 1031 | density = kde 1032 | --------------------------------------------------------------------------------