├── pdvega
    ├── tests
    │   ├── __init__.py
    │   ├── test_advanced.py
    │   ├── utils.py
    │   ├── test_utils.py
    │   ├── test_plotting.py
    │   ├── test_core_common.py
    │   └── test_core.py
    ├── themes.py
    ├── __init__.py
    ├── _pandas_internals.py
    ├── _utils.py
    ├── plotting.py
    └── _core.py
├── doc
    ├── sphinxext
    │   └── pdvega_ext
    │   │   ├── __init__.py
    │   │   ├── utils.py
    │   │   └── pdvegaplot.py
    ├── requirements.txt
    ├── _static
    │   ├── favicon.ico
    │   ├── pdvega-plot.css
    │   └── theme_overrides.css
    ├── .gitignore
    ├── API.rst
    ├── Makefile
    ├── sync_website.sh
    ├── installation.rst
    ├── plotting.rst
    ├── advanced.rst
    ├── index.rst
    ├── make.bat
    ├── core.rst
    └── conf.py
├── requirements_dev.txt
├── requirements.txt
├── binder
    ├── postBuild
    └── requirements.txt
├── images
    ├── mpl-scatter.png
    └── vg-scatter.png
├── MANIFEST.in
├── setup.cfg
├── Makefile
├── .travis.yml
├── CHANGES.md
├── RELEASING.md
├── LICENSE
├── .gitignore
├── setup.py
└── README.md


/pdvega/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/doc/sphinxext/pdvega_ext/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/requirements_dev.txt:
--------------------------------------------------------------------------------
1 | pytest
2 | flake8
3 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | altair
2 | numpy
3 | pandas
4 | scipy
5 | 


--------------------------------------------------------------------------------
/binder/postBuild:
--------------------------------------------------------------------------------
1 | jupyter nbextension install --sys-prefix --py vega3
2 | 


--------------------------------------------------------------------------------
/binder/requirements.txt:
--------------------------------------------------------------------------------
1 | scipy
2 | matplotlib
3 | pdvega==0.1
4 | vega_datasets
5 | 


--------------------------------------------------------------------------------
/doc/requirements.txt:
--------------------------------------------------------------------------------
1 | sphinx
2 | jinja2
3 | numpydoc
4 | vega3
5 | vega_datasets
6 | pandas
7 | 


--------------------------------------------------------------------------------
/images/mpl-scatter.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/altair-viz/pdvega/HEAD/images/mpl-scatter.png


--------------------------------------------------------------------------------
/images/vg-scatter.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/altair-viz/pdvega/HEAD/images/vg-scatter.png


--------------------------------------------------------------------------------
/doc/_static/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/altair-viz/pdvega/HEAD/doc/_static/favicon.ico


--------------------------------------------------------------------------------
/doc/.gitignore:
--------------------------------------------------------------------------------
1 | _build
2 | 
3 | # gallery is auto-generated; don't version-control it
4 | gallery
5 | _images


--------------------------------------------------------------------------------
/pdvega/themes.py:
--------------------------------------------------------------------------------
1 | import altair as alt
2 | 
3 | 
4 | def enable(theme):
5 |     '''set altair to use'''
6 |     alt.themes.enable(theme)
7 | 


--------------------------------------------------------------------------------
/doc/API.rst:
--------------------------------------------------------------------------------
1 | API Reference
2 | =============
3 | 
4 | .. automodule:: pdvega
5 |    :members:
6 |    :imported-members:
7 |    :undoc-members:
8 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include *.md
2 | include LICENSE
3 | include Makefile
4 | recursive-include images/*.png
5 | recursive-include pdvega *.py *.json *.csv *.tsv
6 | 


--------------------------------------------------------------------------------
/doc/_static/pdvega-plot.css:
--------------------------------------------------------------------------------
 1 | .vega-actions a {
 2 |     margin-right: 12px;
 3 |     color: #757575;
 4 |     font-weight: normal;
 5 |     font-size: 13px;
 6 | }
 7 | 
 8 | .vega-embed {
 9 |     margin-bottom: 20px;
10 |     margin-top: 20px;
11 | }
12 | 
13 | 


--------------------------------------------------------------------------------
/pdvega/__init__.py:
--------------------------------------------------------------------------------
1 | # flake8: noqa
2 | import altair as alt
3 | from . import plotting, themes
4 | from ._core import FramePlotMethods, SeriesPlotMethods
5 | from .plotting import scatter_matrix, andrews_curves, parallel_coordinates, lag_plot
6 | 
7 | __version__ = '0.2.01.dev0'
8 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
 1 | [flake8]
 2 | ignore =
 3 |   E  # Ignore all PEP8 rules
 4 |   W  # Ignore all whitespace rules
 5 |   F811  # Ignore variable redefinitions until https://github.com/altair-viz/altair/issues/734 is resolved
 6 | 
 7 | [metadata]
 8 | description-file = README.md
 9 | license_file = LICENSE
10 | 
11 | 
12 | [bdist_wheel]
13 | universal = 1


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | all:
 2 | 	install
 3 | 
 4 | install:
 5 | 	python setup.py install
 6 | 
 7 | test:
 8 | 	python -m pytest --pyargs --doctest-modules pdvega
 9 | 
10 | test-coverage:
11 | 	python -m pytest --pyargs --doctest-modules --cov=pdvega --cov-report term pdvega
12 | 
13 | test-coverage-html:
14 | 	python -m pytest --pyargs --doctest-modules --cov=pdvega --cov-report html pdvega
15 | 


--------------------------------------------------------------------------------
/pdvega/tests/test_advanced.py:
--------------------------------------------------------------------------------
 1 | import pdvega  # noqa
 2 | import pandas as pd
 3 | 
 4 | 
 5 | def test_advanced():
 6 |     df = pd.Series(range(10))
 7 |     plot = df.vgplot.line()
 8 | 
 9 |     plot['encoding']['x']['scale'] = {'zero': False}
10 |     spec = plot.to_dict()
11 |     assert 'scale' in spec['encoding']['x']
12 |     assert spec['encoding']['x']['scale']['zero'] is False
13 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: python
 2 | 
 3 | python:
 4 |   - 2.7
 5 |   - 3.5
 6 |   - 3.6
 7 | 
 8 | env:
 9 |   global:
10 |     - TEST_DIR=/tmp/_pdvega/
11 | 
12 | before_install:
13 |   - pip install pip --upgrade
14 |   - pip install -U pytest
15 | 
16 | install:
17 |   - pip install -e .[dev]
18 | 
19 | script:
20 |   - flake8 ./
21 |   - mkdir -p $TEST_DIR
22 |   - cd $TEST_DIR && python -m pytest --pyargs  --doctest-modules pdvega
23 | 


--------------------------------------------------------------------------------
/doc/_static/theme_overrides.css:
--------------------------------------------------------------------------------
 1 | /* override table width restrictions */
 2 | @media screen and (min-width: 767px) {
 3 | 
 4 |     .wy-table-responsive table td {
 5 |         /* !important prevents the common CSS stylesheets from overriding
 6 |            this as on RTD they are loaded after this stylesheet */
 7 |         white-space: normal !important;
 8 |     }
 9 | 
10 |     .wy-table-responsive {
11 |         overflow: visible !important;
12 |     }
13 | }
14 | 
15 | .rst-content dl:not(.docutils) dt em {
16 |     font-style: normal; !important;
17 |     line-height: 1.4em; !important;
18 | }


--------------------------------------------------------------------------------
/doc/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line.
 5 | SPHINXOPTS    =
 6 | SPHINXBUILD   = sphinx-build
 7 | SPHINXPROJ    = pdvega
 8 | SOURCEDIR     = .
 9 | BUILDDIR      = _build
10 | 
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 | 
15 | .PHONY: help Makefile
16 | 
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)


--------------------------------------------------------------------------------
/pdvega/tests/utils.py:
--------------------------------------------------------------------------------
 1 | IGNORE = object()
 2 | 
 3 | 
 4 | def check_encodings(chart, **fields):
 5 |     edict = chart.encoding.to_dict()
 6 |     assert set(edict.keys()) == set(fields.keys())
 7 |     for encoding, expected_field in fields.items():
 8 |         if expected_field is IGNORE:
 9 |             continue
10 | 
11 |         actual_field = edict[encoding]['field']
12 |         if actual_field != expected_field:
13 |             raise ValueError("Expected '{0}' encoding to be '{1}'; got '{2}'"
14 |                              "".format(encoding, expected_field, actual_field))
15 | 
16 | 
17 | def get_data(chart):
18 |     return chart.data
19 | 
20 | 
21 | def validate_vegalite(chart):
22 |     assert chart.to_dict(validate=True)
23 | 


--------------------------------------------------------------------------------
/CHANGES.md:
--------------------------------------------------------------------------------
 1 | Change Log
 2 | ==========
 3 | 
 4 | 
 5 | Release v0.2 (Unreleased)
 6 | -------------------------
 7 | - Fixed x-axis of Andrews curves
 8 | - Added layering support via the ``ax`` argument to all non-compound plot types
 9 | 
10 | Release v0.1 (January 31, 2018)
11 | -------------------------------
12 | 
13 | - Initial release:
14 | 
15 |   Basic plot methods:
16 | 
17 |   - data.vgplot.line()
18 |   - data.vgplot.scatter()
19 |   - data.vgplot.area()
20 |   - data.vgplot.bar()
21 |   - data.vgplot.barh()
22 |   - data.vgplot.hist()
23 |   - data.vgplot.kde()
24 |   - data.vgplot.denity()
25 |   - data.vgplot.hexbin()
26 |   - data.vgplot.heatmap()
27 | 
28 |   Specialized plot methods
29 | 
30 |   - pdvega.plotting.scatter_matrix()
31 |   - pdvega.plotting.andrews_curves()
32 |   - pdvega.plotting.parallel_coordinates()
33 |   - pdvega.plotting.lag_plot()
34 | 
35 |   Plot objects:
36 | 
37 |   - pdvega.Axes
38 | 


--------------------------------------------------------------------------------
/doc/sync_website.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # clean the build directory & make the website with the right internal addresses
 4 | make clean
 5 | sphinx-build -M html . _build -D pdvegaplot_url_root="https://altair-viz.github.io/pdvega/"
 6 | 
 7 | # get git hash for commit message
 8 | GITHASH=$(git rev-parse HEAD)
 9 | MSG="doc build for commit $GITHASH"
10 | cd _build
11 | 
12 | # clone the repo if needed
13 | if test -d pdvega;
14 | then echo "using existing cloned pdvega directory";
15 | else git clone git@github.com:altair-viz/pdvega.git;
16 | fi
17 | 
18 | # sync the website
19 | cd pdvega
20 | git checkout gh-pages
21 | git pull
22 | 
23 | # remove all tracked files
24 | git ls-files -z | xargs -0 rm -f
25 | 
26 | # sync files from html build
27 | rsync -r ../html/ ./
28 | 
29 | # ensure there is a nojekyl file for github pages
30 | touch .nojekyll
31 | 
32 | # add commit, and push to github
33 | git add . --all
34 | git commit -m "$MSG"
35 | git push origin gh-pages
36 | 


--------------------------------------------------------------------------------
/RELEASING.md:
--------------------------------------------------------------------------------
 1 | 1. Update version in pdvega/__init__.py to, e.g. 0.2
 2 | 
 3 | 2. Update version in doc/conf.py (in two places!)
 4 | 
 5 | 3. Make sure CHANGES.md is up to date for the release
 6 | 
 7 | 4. Commit change and push to master
 8 | 
 9 |        git add . -u
10 |        git commit -m "MAINT: bump version to 0.2"
11 |        git push origin master
12 | 
13 | 5. Tag the release:
14 | 
15 |        git tag -a v0.2 -m "version 0.2 release"
16 |        git push origin v0.2
17 | 
18 | 6. publish to PyPI (Requires correct PyPI owner permissions)
19 | 
20 |        python setup.py sdist upload
21 | 
22 | 7. Build and push the docs website:
23 | 
24 |        python setup.py install
25 |        cd doc
26 |        bash sync_website.sh
27 | 
28 | 8. update version in pdvega/__init__.py to, e.g. 0.3.0dev0
29 | 
30 | 9. update version in doc/conf.py (in two places!)
31 | 
32 | 10. add a new changelog entry for the unreleased version
33 | 
34 | 11. Commit change and push to master
35 | 
36 |        git add . -u
37 |        git commit -m "MAINT: bump version to 0.3.0dev"
38 |        git push origin master
39 | 
40 |     


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2018 Jake Vanderplas
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/pdvega/_pandas_internals.py:
--------------------------------------------------------------------------------
 1 | # flake8: noqa
 2 | from pandas.core.base import PandasObject
 3 | 
 4 | try:
 5 |     from pandas.api.types import infer_dtype as infer_dtype
 6 |     _infer_dtype_kwds = {'skipna': False}
 7 | except ImportError:  # Pandas before 0.20.0
 8 |     from pandas.lib import infer_dtype as infer_dtype
 9 |     _infer_dtype_kwds = {}
10 | 
11 | from pandas import DataFrame, Series
12 | 
13 | try:
14 |     # Import register decorators from pandas >= 0.23
15 |     from pandas.api.extensions import (register_dataframe_accessor,
16 |                                        register_series_accessor)
17 | except ImportError:
18 |     try:
19 |         from pandas.core.accessor import AccessorProperty
20 |     except ImportError:  # Pandas before 0.22.0
21 |         from pandas.core.base import AccessorProperty
22 | 
23 |     # Define register decorators for pandas < 0.23
24 |     class register_dataframe_accessor(object):
25 |         """Register custom accessor on DataFrame."""
26 | 
27 |         def __init__(self, name):
28 |             self.name = name
29 | 
30 |         def __call__(self, accessor):
31 |             setattr(DataFrame, self.name, AccessorProperty(accessor, accessor))
32 |             return accessor
33 | 
34 |     class register_series_accessor(object):
35 |         """Register custom accessor on Series."""
36 | 
37 |         def __init__(self, name):
38 |             self.name = name
39 | 
40 |         def __call__(self, accessor):
41 |             setattr(Series, self.name, AccessorProperty(accessor, accessor))
42 |             return accessor
43 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | env/
 12 | build/
 13 | develop-eggs/
 14 | dist/
 15 | downloads/
 16 | eggs/
 17 | .eggs/
 18 | lib/
 19 | lib64/
 20 | parts/
 21 | sdist/
 22 | var/
 23 | wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | 
 28 | # PyInstaller
 29 | #  Usually these files are written by a python script from a template
 30 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 31 | *.manifest
 32 | *.spec
 33 | 
 34 | # Installer logs
 35 | pip-log.txt
 36 | pip-delete-this-directory.txt
 37 | 
 38 | # Unit test / coverage reports
 39 | htmlcov/
 40 | .tox/
 41 | .coverage
 42 | .coverage.*
 43 | .cache
 44 | nosetests.xml
 45 | coverage.xml
 46 | *.cover
 47 | .pytest_cache/*
 48 | **/.pytest_cache/*
 49 | .hypothesis/
 50 | 
 51 | # Translations
 52 | *.mo
 53 | *.pot
 54 | 
 55 | # Django stuff:
 56 | *.log
 57 | local_settings.py
 58 | 
 59 | # Flask stuff:
 60 | instance/
 61 | .webassets-cache
 62 | 
 63 | # Scrapy stuff:
 64 | .scrapy
 65 | 
 66 | # Sphinx documentation
 67 | docs/_build/
 68 | 
 69 | # PyBuilder
 70 | target/
 71 | 
 72 | # Jupyter Notebook
 73 | .ipynb_checkpoints
 74 | 
 75 | # pyenv
 76 | .python-version
 77 | 
 78 | # celery beat schedule file
 79 | celerybeat-schedule
 80 | 
 81 | # SageMath parsed files
 82 | *.sage.py
 83 | 
 84 | # dotenv
 85 | .env
 86 | 
 87 | # virtualenv
 88 | .venv
 89 | venv/
 90 | ENV/
 91 | 
 92 | # Spyder project settings
 93 | .spyderproject
 94 | .spyproject
 95 | 
 96 | # Rope project settings
 97 | .ropeproject
 98 | 
 99 | # mkdocs documentation
100 | /site
101 | 
102 | # mypy
103 | .mypy_cache/
104 | 
105 | # emacs
106 | *~
107 | 
108 | # notebooks
109 | Untitled*.ipynb
110 | 
111 | # macs
112 | **/.DS_Store
113 | .DS_Store
114 | 


--------------------------------------------------------------------------------
/doc/sphinxext/pdvega_ext/utils.py:
--------------------------------------------------------------------------------
 1 | import ast
 2 | import json
 3 | import importlib
 4 | from hashlib import md5
 5 | 
 6 | from itertools import tee, chain
 7 | 
 8 | 
 9 | def dict_hash(dct):
10 |     """Return a hash of the contents of a dictionary"""
11 |     dct_s = json.dumps(dct, sort_keys=True)
12 | 
13 |     try:
14 |         m = md5(dct_s)
15 |     except TypeError:
16 |         m = md5(dct_s.encode())
17 | 
18 |     return m.hexdigest()
19 | 
20 | 
21 | def exec_then_eval(code, namespace=None):
22 |     """Exec a code block & return evaluation of the last line"""
23 |     # TODO: make this less brittle.
24 |     namespace = namespace or {}
25 | 
26 |     block = ast.parse(code, mode='exec')
27 |     last = ast.Expression(block.body.pop().value)
28 | 
29 |     exec(compile(block, '<string>', mode='exec'), namespace)
30 |     return eval(compile(last, '<string>', mode='eval'), namespace)
31 | 
32 | 
33 | def import_obj(clsname, default_module=None):
34 |     """
35 |     Import the object given by clsname.
36 |     If default_module is specified, import from this module.
37 |     """
38 |     if default_module is not None:
39 |         if not clsname.startswith(default_module + '.'):
40 |             clsname = '{0}.{1}'.format(default_module, clsname)
41 |     mod, clsname = clsname.rsplit('.', 1)
42 |     mod = importlib.import_module(mod)
43 |     try:
44 |         obj = getattr(mod, clsname)
45 |     except AttributeError:
46 |         raise ImportError('Cannot import {0} from {1}'.format(clsname, mod))
47 |     return obj
48 | 
49 | 
50 | 
51 | def strip_vl_extension(filename):
52 |     """Strip the vega-lite extension (either vl.json or json) from filename"""
53 |     for ext in ['.vl.json', '.json']:
54 |         if filename.endswith(ext):
55 |             return filename[:-len(ext)]
56 |     else:
57 |         return filename
58 | 
59 | 
60 | def prev_this_next(it, sentinel=None):
61 |     """Utility to return (prev, this, next) tuples from an iterator"""
62 |     i1, i2, i3 = tee(it, 3)
63 |     next(i3, None)
64 |     return zip(chain([sentinel], i1), i2, chain(i3, [sentinel]))
65 | 


--------------------------------------------------------------------------------
/pdvega/tests/test_utils.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | import pandas as pd
 4 | import numpy as np
 5 | 
 6 | from pdvega._utils import infer_vegalite_type, unpivot_frame, validate_aggregation
 7 | 
 8 | test_cases = [
 9 |     (pd.Series(np.random.rand(20)), 'quantitative'),
10 |     (pd.Series(range(4)), 'ordinal'),
11 |     (pd.Series(range(40)), 'quantitative'),
12 |     (pd.Series(['A', 'B', 'C', 'D']), 'nominal'),
13 |     (pd.Categorical(['a', 'b', 'c']), 'nominal'),
14 |     (pd.date_range('2017', freq='D', periods=10), 'temporal'),
15 |     (pd.timedelta_range(0, periods=7), 'temporal')
16 | ]
17 | 
18 | 
19 | @pytest.mark.parametrize('data,type', test_cases)
20 | def test_infer_vegalite_type(data, type):
21 |     assert infer_vegalite_type(data) == type
22 | 
23 | 
24 | def test_unpivot():
25 |     frame = pd.DataFrame({'x': range(10), 'y': range(10), 'z': range(10)})
26 |     df = unpivot_frame(frame, var_name='foo', value_name='bar')
27 |     assert list(df.columns) == ['index', 'foo', 'bar']
28 |     assert set(pd.unique(df['foo'])) == {'x', 'y', 'z'}
29 | 
30 |     df = unpivot_frame(frame, x='x')
31 |     assert list(df.columns) == ['x', 'variable', 'value']
32 |     assert set(pd.unique(df['variable'])) == {'y', 'z'}
33 | 
34 |     df = unpivot_frame(frame, y='y')
35 |     assert list(df.columns) == ['index', 'variable', 'value']
36 |     assert set(pd.unique(df['variable'])) == {'y'}
37 | 
38 |     df = unpivot_frame(frame, y=('y', 'z'))
39 |     assert list(df.columns) == ['index', 'variable', 'value']
40 |     assert set(pd.unique(df['variable'])) == {'y', 'z'}
41 | 
42 |     df = unpivot_frame(frame, x=('x', 'y'), y='z')
43 |     assert list(df.columns) == ['x', 'y', 'variable', 'value']
44 |     assert set(pd.unique(df['variable'])) == {'z'}
45 | 
46 | 
47 | def test_unpivot_bad_cols():
48 |     frame = pd.DataFrame({'x': range(10), 'y': range(10)})
49 | 
50 |     with pytest.raises(KeyError):
51 |         unpivot_frame(frame, x='foo')
52 | 
53 |     with pytest.raises(KeyError):
54 |         unpivot_frame(frame, y='foo')
55 | 
56 |     with pytest.raises(KeyError):
57 |         unpivot_frame(frame, x=('x', 'foo'))
58 | 
59 |     with pytest.raises(KeyError):
60 |         unpivot_frame(frame, y=('y', 'foo'))
61 | 
62 | 
63 | def test_validate_aggregation():
64 |     string_cases = ['max', 'min', 'mean', 'median', 'count', 'sum']
65 |     func_cases = {np.min: 'min', min: 'min',
66 |                   np.max: 'max', max: 'max',
67 |                   np.sum: 'sum', sum: 'sum',
68 |                   np.median: 'median', np.mean: 'mean'}
69 | 
70 |     for case in string_cases:
71 |         assert validate_aggregation(case) == case
72 | 
73 |     for case, result in func_cases.items():
74 |         assert validate_aggregation(case) == result
75 | 
76 |     assert validate_aggregation(None) is None
77 | 
78 |     with pytest.raises(ValueError) as err:
79 |         validate_aggregation('blah')
80 |     assert str(err.value).startswith("Unrecognized Vega-Lite aggregation")
81 | 
82 |     with pytest.raises(ValueError) as err:
83 |         validate_aggregation(np.array)
84 |     assert str(err.value).startswith("Unrecognized Vega-Lite aggregation")
85 | 


--------------------------------------------------------------------------------
/doc/installation.rst:
--------------------------------------------------------------------------------
 1 | .. _installation:
 2 | 
 3 | Installing and Using ``pdvega``
 4 | ===============================
 5 | 
 6 | To install and use ``pdvega`` run the following commands:
 7 | 
 8 | .. code-block:: bash
 9 | 
10 |     $ pip install pdvega
11 |     $ jupyter nbextension install --sys-prefix --py vega3
12 | 
13 | The first command installs the `pdvega <https://pypi.python.org/pypi/pdvega>`_
14 | Python package along with its dependencies (`Pandas`_ and `vega3`_).
15 | The second command above installs the `vega3`_ Jupyter notebook extension, which
16 | is required for ``pdvega`` plots to display automatically in the notebook.
17 | 
18 | Using ``pdvega`` in the Jupyter Notebook
19 | ----------------------------------------
20 | When ``pdvega`` and ``vega3`` are correctly installed, you can create a
21 | visualization within the Jupyter notebook by executing a cell with a plot
22 | command as the last statement in the cell. For example:
23 | 
24 | .. pdvega-plot::
25 | 
26 |    import pandas as pd
27 |    import pdvega  # adds vgplot attribute to Pandas objects
28 | 
29 |    data = pd.Series([1,2,3,2,3,4,3,4,5])
30 |    data.vgplot()
31 | 
32 | You can also explicitly call the ``plot.display()`` method to display a plot
33 | saved in a variable:
34 | 
35 | .. code-block:: python
36 | 
37 |    plot = data.vgplot()
38 |    plot.display()
39 | 
40 | .. pdvega-plot::
41 |     :hide-code:
42 | 
43 |     import pandas as pd
44 |     import pdvega  # adds vgplot attribute to Pandas objects
45 | 
46 |     data = pd.Series([1,2,3,2,3,4,3,4,5])
47 |     data.vgplot()
48 | 
49 | 
50 | Using ``pdvega`` in JupyterLab
51 | ------------------------------
52 | `JupyterLab`_ is the next phase
53 | of evolution for the Jupyter notebook. For reasons related to its under-the-hood
54 | implementation, the current version of ``pdvega`` will not work in JupyterLab: the
55 | main reason is that the new MIME-based rendering used by JupyterLab is not yet supported
56 | in the `vega3`_ library that ``pdvega`` depends on.
57 | 
58 | We hope to address this incompatibility soon!
59 | 
60 | 
61 | Using ``pdvega`` Outside Jupyter
62 | --------------------------------
63 | If you wish to use ``pdvega`` outside the Jupyter notebook, you can save the
64 | plot specification to a JSON file:
65 | 
66 | .. code-block:: python
67 | 
68 |     import json
69 |     plot = data.vgplot()
70 |     json.dump(plot.spec, 'plot.json')
71 | 
72 | The resulting plot specification can then be rendered within an HTML page
73 | using the `vega-embed`_ Javascript package.
74 | 
75 | Saving Visualizations to PNG or SVG
76 | -----------------------------------
77 | To save a visualization to PNG, you can use the link generated below the
78 | rendered plot. Programmatic saving of figures is not currently supported
79 | from within Python, though it is possible using the ``vl2png`` and ``vl2svg``
80 | command-line tools provided in the `vega-lite`_ npm package.
81 | 
82 | 
83 | .. _Jupyter notebook: http://jupyter.org/
84 | .. _JupyterLab: http://jupyterlab-tutorial.readthedocs.io/en/latest/
85 | .. _Pandas: http://pandas.pydata.org/
86 | .. _vega3: http://pypi.python.org/pypi/vega3/
87 | .. _vega-embed: https://vega.github.io/vega-lite/usage/embed.html
88 | .. _vega-lite: https://github.com/vega/vega-lite
89 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | import io
 2 | import os
 3 | import re
 4 | 
 5 | from setuptools import setup
 6 | 
 7 | 
 8 | def read(path, encoding='utf-8'):
 9 |     path = os.path.join(os.path.dirname(__file__), path)
10 |     with io.open(path, encoding=encoding) as fp:
11 |         return fp.read()
12 | 
13 | 
14 | def get_install_requirements(path):
15 |     content = read(path)
16 |     return [
17 |         req
18 |         for req in content.split("\n")
19 |         if req != '' and not req.startswith('#')
20 |     ]
21 | 
22 | 
23 | def version(path):
24 |     """Obtain the packge version from a python file e.g. pkg/__init__.py
25 |     See <https://packaging.python.org/en/latest/single_source_version.html>.
26 |     """
27 |     version_file = read(path)
28 |     version_match = re.search(r"""^__version__ = ['"]([^'"]*)['"]""",
29 |                               version_file, re.M)
30 |     if version_match:
31 |         return version_match.group(1)
32 |     raise RuntimeError("Unable to find version string.")
33 | 
34 | 
35 | HERE = os.path.abspath(os.path.dirname(__file__))
36 | 
37 | # From https://github.com/jupyterlab/jupyterlab/blob/master/setupbase.py, BSD licensed
38 | def find_packages(top=HERE):
39 |     """
40 |     Find all of the packages.
41 |     """
42 |     packages = []
43 |     for d, dirs, _ in os.walk(top, followlinks=True):
44 |         if os.path.exists(os.path.join(d, '__init__.py')):
45 |             packages.append(os.path.relpath(d, top).replace(os.path.sep, '.'))
46 |         elif d != top:
47 |             # Do not look for packages in subfolders if current is not a package
48 |             dirs[:] = []
49 |     return packages
50 | 
51 | 
52 | DESCRIPTION = "Pandas plotting interface to Vega and Vega-Lite"
53 | LONG_DESCRIPTION = """
54 | pdvega makes it easy to create Vega-Lite plots from pandas dataframes,
55 | using the familiar pandas visualization API. For more information, see
56 | the `pdvega documentation <http://altair-viz.github.io/pdvega/>`_.
57 | """
58 | NAME = "pdvega"
59 | AUTHOR = "Jake VanderPlas"
60 | AUTHOR_EMAIL = "jakevdp@gmail.com"
61 | MAINTAINER = "Jake VanderPlas"
62 | MAINTAINER_EMAIL = "jakevdp@gmail.com"
63 | URL = 'http://altair-viz.github.io/pdvega/'
64 | DOWNLOAD_URL = 'http://github.com/altair-viz/pdvega/'
65 | LICENSE = 'MIT'
66 | INSTALL_REQUIRES = get_install_requirements("requirements.txt")
67 | DEV_REQUIRES  = get_install_requirements("requirements_dev.txt")
68 | PACKAGES = find_packages()
69 | VERSION = version('pdvega/__init__.py')
70 | 
71 | setup(name=NAME,
72 |       version=VERSION,
73 |       description=DESCRIPTION,
74 |       long_description=LONG_DESCRIPTION,
75 |       author=AUTHOR,
76 |       author_email=AUTHOR_EMAIL,
77 |       maintainer=MAINTAINER,
78 |       maintainer_email=MAINTAINER_EMAIL,
79 |       url=URL,
80 |       download_url=DOWNLOAD_URL,
81 |       license=LICENSE,
82 |       install_requires=INSTALL_REQUIRES,
83 |       extras_require={
84 |         'dev': DEV_REQUIRES
85 |       },
86 |       packages=PACKAGES,
87 |       include_package_data=True,
88 |       classifiers=[
89 |         'Development Status :: 4 - Beta',
90 |         'Environment :: Console',
91 |         'Intended Audience :: Science/Research',
92 |         'License :: OSI Approved :: BSD License',
93 |         'Natural Language :: English',
94 |         'Programming Language :: Python :: 2.7',
95 |         'Programming Language :: Python :: 3.5',
96 |         'Programming Language :: Python :: 3.6'],
97 |      )
98 | 


--------------------------------------------------------------------------------
/doc/plotting.rst:
--------------------------------------------------------------------------------
  1 | .. _statistical-plotting:
  2 | 
  3 | Statistical Visualization with ``pdvega.plotting``
  4 | ==================================================
  5 | 
  6 | In addition to the basic plots made available by the ``vgplot`` interface,
  7 | ``pdvega.plotting`` makes available some more sophisticated plotting types
  8 | that mirror those available in `pandas.plotting`_.
  9 | 
 10 | This section will outline a few of these.
 11 | 
 12 | .. pdvega-setup::
 13 | 
 14 |    import pdvega
 15 |    from vega_datasets import data
 16 |    iris = data.iris()
 17 |    stocks = data.stocks(pivoted=True)
 18 | 
 19 | .. _pdvega-scatter-matrix:
 20 | 
 21 | Scatter Matrix
 22 | --------------
 23 | 
 24 | For multi-dimensional data, it is difficult to capture all the relevant data
 25 | features using a simple scatter plot. For data with several attributes, it can
 26 | be useful to visualize the pairwise relationships between all pairs of dimensions.
 27 | This is done by ``pdvega.scatter_matrix``, which has an API based on
 28 | :func:`pandas.plotting.scatter_matrix`:
 29 | 
 30 | .. pdvega-plot::
 31 | 
 32 |    pdvega.scatter_matrix(iris, "species", figsize=(7, 7))
 33 | 
 34 | Notice that this version is interactive in two ways: if you click and drag on
 35 | any frame of the plot, all frames scales are dynamically adjusted in concert.
 36 | Further, if you hold the SHIFT key while clicking and dragging, it enables a
 37 | linked-brushing operation that allows you to track points between panels.
 38 | 
 39 | 
 40 | .. _pdvega-parallel-coordinates:
 41 | 
 42 | Parallel Coordinates
 43 | --------------------
 44 | 
 45 | Another way to visualize multi-dimensional data is to look at each dimension
 46 | independently, using a *parallel coordinates* plot. This can be done using
 47 | :func:`pdvega.parallel_coordinates`, which follows the API of
 48 | :func:`pandas.plotting.parallel_coordinates`:
 49 | 
 50 | .. pdvega-plot::
 51 | 
 52 |    pdvega.parallel_coordinates(iris, "species")
 53 | 
 54 | In one glance, this lets you see relationships between points, and in particular
 55 | makes clear that the "setosa" species is well-separated from the other two
 56 | in the dimensions of petal width and length.
 57 | 
 58 | .. _pdvega-andrews-curves:
 59 | 
 60 | Andrews Curves
 61 | --------------
 62 | 
 63 | A similar approach to visualizing data dimensions is known as *Andrews curves*:
 64 | the idea is to construct a Fourier series from the features of each object,
 65 | in order to qualitatively visualize the aggregate differences between classes.
 66 | This can be done with the :func:`pdvega.andrews_curves` function, which follows
 67 | the API of :func:`pandas.plotting.andrews_curves`:
 68 | 
 69 | .. pdvega-plot::
 70 | 
 71 |    pdvega.andrews_curves(iris, "species")
 72 | 
 73 | This gives us a similar impression to what we saw in the parallel coordinates
 74 | plot -- that setosa is somehow distinct from the other species -- but gives
 75 | less quantitative insight into just which features lead to that distinction.
 76 | 
 77 | .. _pdvega-lag-plot:
 78 | 
 79 | Lag Plot
 80 | --------
 81 | 
 82 | Finally, for time series, an interesting type of plot is known as a *lag plot*.
 83 | This is implemented by the :func:`pdvega.plotting.lag_plot` function, which follows
 84 | the API of :func:`pandas.plotting.lag_plot`.
 85 | 
 86 | Here we'll visualize the stock prices of Amazon and Microsoft from 1998-2010,
 87 | using a lag of 12 months:
 88 | 
 89 | .. pdvega-plot::
 90 | 
 91 |    pdvega.lag_plot(stocks[['AMZN', 'MSFT']], lag=12)
 92 | 
 93 | It's immediately apparent from this plot that Amazon was far more volitile
 94 | during that period: its price at any point during this period showed very
 95 | little correlation with the price a year later. By contrast, it's clear that
 96 | Microsoft's price was much more stable through this decade.
 97 | 
 98 | We can see that interpretation as well in the simple time-series plot of each
 99 | company's stock price:
100 | 
101 | .. pdvega-plot::
102 | 
103 |    stocks[['AMZN', 'MSFT']].vgplot.line()
104 | 
105 | 
106 | 
107 | .. _pandas.plotting: http://pandas.pydata.org/
108 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # ``pdvega``: Vega-Lite plotting for Pandas Dataframes
 2 | 
 3 | [![build status](http://img.shields.io/travis/altair-viz/pdvega/master.svg?style=flat)](https://travis-ci.org/altair-viz/pdvega)
 4 | [![Binder](https://mybinder.org/badge.svg)](https://mybinder.org/v2/gh/altair-viz/pdvega/master?filepath=examples%2Fpdvega_example.ipynb)
 5 | 
 6 | ``pdvega`` is a library that allows you to quickly create interactive
 7 | [Vega-Lite](https://vega.github.io/vega-lite/) plots from Pandas dataframes,
 8 | using an API that is nearly identical to Pandas' built-in
 9 | [visualization tools](https://pandas.pydata.org/pandas-docs/stable/visualization.html), and designed for easy use within the [Jupyter notebook](http://jupyter.org).
10 | 
11 | - [Full Documentation](http://altair-viz.github.io/pdvega/)
12 | 
13 | Pandas currently has some basic plotting capabilities based on
14 | [matplotlib](http://matplotlib.org). So, for example, you can create
15 | a scatter plot this way:
16 | 
17 | ```python
18 | import numpy as np
19 | import pandas as pd
20 | 
21 | df = pd.DataFrame({'x': np.random.randn(100), 'y': np.random.randn(100)})
22 | df.plot.scatter(x='x', y='y')
23 | ```
24 | 
25 | ![matplotlib scatter output](images/mpl-scatter.png?raw=true)
26 | 
27 | The goal of ``pdvega`` is that any time you use ``dataframe.plot``, you'll be
28 | able to replace it with ``dataframe.vgplot`` and instead get a similar
29 | (but prettier and more interactive) visualization output in Vega-Lite that you can easily export to share or customize:
30 | 
31 | ```python
32 | import pdvega  # import adds vgplot attribute to pandas
33 | 
34 | df.vgplot.scatter(x='x', y='y')
35 | ```
36 | 
37 | ![vega-lite scatter output](images/vg-scatter.png?raw=true)
38 | 
39 | The above image is a static screenshot of the interactive output; please see the
40 | [Documentation](http://altair-viz.github.io/pdvega/) for a full set of live
41 | usage examples.
42 | 
43 | ## Installation
44 | 
45 | You can get started with ``pdvega`` using pip:
46 | 
47 | ```
48 | $ pip install jupyter pdvega
49 | $ jupyter nbextension install --sys-prefix --py vega3
50 | ```
51 | 
52 | The first line installs ``pdvega`` and its dependencies; the second installs
53 | the Jupyter extensions that allows plots to be displayed in the Jupyter
54 | notebook. For more information on installation and dependencies, see the
55 | [Installation docs](https://altair-viz.github.io/pdvega/installation.html).
56 | 
57 | ## Why Vega-Lite?
58 | When working with data, one of the biggest challenges is ensuring reproducibility of results.
59 | When you create a figure and export it to PNG or PDF, the data become baked-in to the rendering in a
60 | way that is difficult or impossible for others to extract. [Vega](http://vega.github.io/vega) and
61 | [Vega-Lite](http://vega.github.io/vega-lite) change this: instead of packaging a figure by encoding its
62 | pixel values, they package a figure by describing, in a declarative manner, the relationship between
63 | data values and visual encodings through a JSON specification.
64 | 
65 | This means that the Vega-Lite figures produced by ``pdvega`` are portable: you can send someone the
66 | resulting JSON specification and they can choose whether to render it interactively online, convert it to
67 | a PNG or EPS for static publication, or even enhance and extend the figure to learn more about the data.
68 | 
69 | ``pdvega`` is a step in bringing this vision of figure portability and reproducibility to the Python world.
70 | 
71 | ### Relationship to Altair
72 | 
73 | [Altair](http://altair-viz.github.io) is a project that seeks to design an intuitive declarative API for generating Vega-Lite and Vega visualizations, using Pandas dataframes as data sources.
74 | 
75 | By contrast, ``pdvega`` seeks not to design new visualization APIs, but to use the existing ``DataFrame.plot`` [visualization api](https://pandas.pydata.org/pandas-docs/stable/visualization.html) and output visualizations with Vega/Vega-Lite rather than with matplotlib.
76 | 
77 | In this respect, ``pdvega`` is quite similar in spirit to the now-defunct [mpld3](http://mpld3.github.io) project, though the scope is smaller and (hopefully) **much** more manageable.
78 | 


--------------------------------------------------------------------------------
/pdvega/_utils.py:
--------------------------------------------------------------------------------
  1 | import warnings
  2 | import numpy as np
  3 | import pandas as pd
  4 | 
  5 | from ._pandas_internals import infer_dtype as pd_infer_dtype
  6 | from ._pandas_internals import _infer_dtype_kwds
  7 | 
  8 | 
  9 | def infer_vegalite_type(data, ordinal_threshold=6):
 10 |     """
 11 |     From an array-like input, infer the correct vega typecode
 12 |     ('ordinal', 'nominal', 'quantitative', or 'temporal')
 13 | 
 14 |     Parameters
 15 |     ----------
 16 |     data: Numpy array or Pandas Series
 17 |         data for which the type will be inferred
 18 |     ordinal_threshold: integer (default: 0)
 19 |         integer data will result in a 'quantitative' type, unless the
 20 |         number of unique values is smaller than ordinal_threshold.
 21 | 
 22 |     Adapted from code at http://github.com/altair-viz/altair/
 23 |     Licence: BSD-3
 24 |     """
 25 |     # infer based on the dtype of the input
 26 |     typ = pd_infer_dtype(data, **_infer_dtype_kwds)
 27 | 
 28 |     # TODO: Once this returns 'O', please update test_select_x and test_select_y in test_api.py
 29 | 
 30 |     if typ in ('mixed-integer', 'integer'):
 31 |         if ordinal_threshold and pd.Series(data).nunique() <= ordinal_threshold:
 32 |             return 'ordinal'
 33 |         else:
 34 |             return 'quantitative'
 35 |     elif typ in ('floating', 'mixed-integer-float', 'complex'):
 36 |         return 'quantitative'
 37 |     elif typ in ('string', 'bytes', 'categorical', 'boolean', 'mixed', 'unicode', 'object'):
 38 |         return 'nominal'
 39 |     elif typ in ('datetime', 'datetime64', 'timedelta',
 40 |                  'timedelta64', 'date', 'time', 'period'):
 41 |         return 'temporal'
 42 |     else:
 43 |         warnings.warn("I don't know how to infer vegalite type from '{0}'.  "
 44 |                       "Defaulting to nominal.".format(typ))
 45 |         return 'nominal'
 46 | 
 47 | 
 48 | def unpivot_frame(frame, x=None, y=None,
 49 |                   var_name='variable', value_name='value'):
 50 |     """Unpivot a dataframe for use with Vega/Vega-Lite
 51 | 
 52 |     The input is a frame with any number of columns,
 53 |     output is a frame with three columns: x value, y values,
 54 |     and variable names.
 55 |     """
 56 |     if x is None:
 57 |         cols = frame.columns
 58 |         frame = frame.reset_index()
 59 |         x = (set(frame.columns) - set(cols)).pop()
 60 |     # frame.melt doesn't properly check for nonexisting columns, so we
 61 |     # start by indexing here. Tuples of column names also need to be
 62 |     # converted to lists for checking indexing
 63 |     if isinstance(x, tuple):
 64 |         x = list(x)
 65 |     if isinstance(y, tuple):
 66 |         y = list(y)
 67 |     if x is not None:
 68 |         _ = frame[x] # noqa
 69 |     if y is not None:
 70 |         _ = frame[y] # noqa
 71 |     return frame.melt(id_vars=x, value_vars=y,
 72 |                       var_name=var_name, value_name=value_name)
 73 | 
 74 | 
 75 | def warn_if_keywords_unused(kind, kwds):
 76 |     if kwds:
 77 |         if len(kwds) == 1:
 78 |             keys = tuple(kwds.keys())[0]
 79 |         else:
 80 |             keys = tuple(kwds.keys())
 81 |         warnings.warn("Unrecognized keywords in vgplot.{0}(): {1}"
 82 |                       "".format(kind, repr(keys)))
 83 | 
 84 | 
 85 | def validate_aggregation(agg):
 86 |     """Validate an aggregation for use in Vega-Lite.
 87 | 
 88 |     Translate agg to one of the following supported named aggregations:
 89 |     ['mean', 'sum', 'median', 'min', 'max', 'count']
 90 | 
 91 |     Parameters
 92 |     ----------
 93 |     agg : string or callable
 94 |         A string
 95 | 
 96 |     Supported reductions are ['mean', 'sum', 'median', 'min', 'max', 'count'].
 97 | 
 98 |     If agg is a numpy function, the return value is the string representation.
 99 | 
100 |     If agg is unrecognized, raise a ValueError
101 |     """
102 |     if agg is None:
103 |         return agg
104 |     supported_aggs = ['mean', 'sum', 'median', 'min', 'max', 'count']
105 |     numpy_aggs = {getattr(np, a): a
106 |                   for a in ['mean', 'sum', 'median', 'min', 'max']}
107 |     builtin_aggs = {min: 'min', max: 'max', sum: 'sum'}
108 | 
109 |     agg = numpy_aggs.get(agg, agg)
110 |     agg = builtin_aggs.get(agg, agg)
111 | 
112 |     if agg not in supported_aggs:
113 |         raise ValueError("Unrecognized Vega-Lite aggregation: {0}".format(agg))
114 | 
115 |     return agg
116 | 


--------------------------------------------------------------------------------
/doc/advanced.rst:
--------------------------------------------------------------------------------
  1 | .. _advanced-plotting:
  2 | 
  3 | Advanced Plotting: Using Vega-Lite Directly
  4 | ===========================================
  5 | 
  6 | The ``pdvega`` API is rather simplistic at the moment; it doesn't give easy
  7 | access to many of the features that Vega-Lite supports.
  8 | In the future, we would like to tie ``pdvega`` to the `Altair`_ project, which
  9 | would allow plot outputs to be adjusted flexibly from within a Python API.
 10 | 
 11 | In the meantime, it is possible to make more fine-tuned adjustments to your
 12 | plot specifications by working directly in the specification dictionary.
 13 | 
 14 | For example, consider this plot:
 15 | 
 16 | .. pdvega-setup::
 17 | 
 18 |    import pdvega
 19 |    import pandas
 20 | 
 21 | .. pdvega-plot::
 22 | 
 23 |    from vega_datasets import data
 24 |    iris = data.iris()
 25 | 
 26 |    iris.vgplot(kind='scatter', x='sepalLength', y='petalLength', c='species')
 27 | 
 28 | Vega-Lite's default behavior is to include the zero-value in the scale, unless
 29 | the user explicitly turns that requirement off in the JSON spec.
 30 | 
 31 | ``pdvega`` is not designed to give easy access to every option available in the
 32 | Vega-Lite schema, but it is possible to modify the specification manually.
 33 | We can access the raw Vega-Lite specification from any plot using the ``spec``
 34 | attribute. For convenience, there is also a ``spec_no_data`` attribute that
 35 | returns the spec without the the embedded data:
 36 | 
 37 | .. code-block:: python
 38 | 
 39 |    >>> plot = iris.vgplot(kind='scatter', x='sepalLength', y='petalLength', c='species')
 40 |    >>> plot.spec_no_data
 41 |    {'$schema': 'https://vega.github.io/schema/vega-lite/v2.json',
 42 |    'encoding': {'color': {'field': 'species', 'type': 'nominal'},
 43 |     'x': {'field': 'sepalLength', 'type': 'quantitative'},
 44 |     'y': {'field': 'petalLength', 'type': 'quantitative'}},
 45 |    'height': 300,
 46 |    'mark': 'circle',
 47 |    'selection': {'grid': {'bind': 'scales', 'type': 'interval'}},
 48 |    'width': 450}
 49 | 
 50 | This dictionary contains the specification that tells the vega-lite renderer
 51 | how to map data to visual components in the plot. You can read more details on
 52 | the `Vega-Lite`_ website. In particular, if you look at the options for
 53 | `Vega-Lite scales`_, you can see that there is a ``"scale"`` property of the "x"
 54 | encoding which allows turning off the zero behavior.
 55 | Knowing this, we can update the specification manually to get the desired result:
 56 | 
 57 | .. pdvega-setup::
 58 | 
 59 |     from vega_datasets import data
 60 |     iris = data.iris()
 61 |     plot = iris.vgplot(kind='scatter', x='sepalLength', y='petalLength', c='species')
 62 | 
 63 | .. pdvega-plot::
 64 | 
 65 |     plot.spec['encoding']['x']['scale'] = {'zero': False}
 66 |     plot
 67 | 
 68 | Using this type of approach, you can customize your plots in any way that Vega-Lite
 69 | allows.
 70 | 
 71 | This is admittedly a bit of a clumsy solution for plot customization; mucking around
 72 | in the internals of the JSON specification requires a deep knowledge of the vega-lite
 73 | schema, and the renderer is not very forgiving if and when you
 74 | make an error or typo.
 75 | In the future, we plan to make ``pdvega`` plots output `Altair`_
 76 | objects, which will allow this sort of customization to be done much more cleanly
 77 | with Altair's Python API.
 78 | 
 79 | Skipping ``vgplot`` entirely
 80 | ----------------------------
 81 | If you would like to skip pdvega's vgplot API entirely and build your Vega-Lite plot
 82 | from scratch, pdvega's :class:`~pdvega.Axes` object lets you do this directly.
 83 | For example:
 84 | 
 85 | .. pdvega-plot::
 86 | 
 87 |    from pdvega import Axes
 88 | 
 89 |    spec = {
 90 |      '$schema': 'https://vega.github.io/schema/vega-lite/v2.json',
 91 |      'mark': 'point',
 92 |      'encoding': {
 93 |        'color': {'field': 'species', 'type': 'nominal'},
 94 |        'x': {'field': 'petalWidth', 'type': 'quantitative'},
 95 |        'y': {'field': 'petalLength', 'type': 'quantitative'}
 96 |      },
 97 |      'height': 300,
 98 |      'width': 450,
 99 |      # this selection is what makes the plot interactive
100 |      'selection': {'grid': {'bind': 'scales', 'type': 'interval'}},
101 |    }
102 | 
103 |    # Build the vgplot specification
104 |    Axes(spec, iris)
105 | 
106 | For ideas on what sort of visualizations you can create in this way,
107 | check out the specifications on the `Vega-Lite examples`_ page.
108 | The `Vega online editor`_ is also a useful resource for developing visualizations
109 | directly in Vega or Vega-Lite.
110 | 
111 | .. _Vega-Lite: http://vega.github.io/vega-lite/
112 | .. _Altair: http://altair-viz.github.io/
113 | .. _Vega-Lite scales: https://vega.github.io/vega-lite/docs/scale.html
114 | .. _Vega-Lite examples: https://vega.github.io/vega-lite/examples/
115 | .. _Vega online editor: https://vega.github.io/editor/#/custom/vega-lite
116 | 


--------------------------------------------------------------------------------
/pdvega/tests/test_plotting.py:
--------------------------------------------------------------------------------
  1 | import pytest
  2 | 
  3 | import numpy as np
  4 | import pandas as pd
  5 | 
  6 | import pdvega
  7 | from pdvega.tests import utils
  8 | 
  9 | 
 10 | def test_scatter_matrix():
 11 |     df = pd.DataFrame({'x': range(5),
 12 |                        'y': range(5),
 13 |                        'label': list('ABABA')})
 14 |     # no color or size specified
 15 |     plot = pdvega.scatter_matrix(df)
 16 |     utils.validate_vegalite(plot)
 17 |     spec = plot.to_dict()
 18 |     assert spec['repeat']['row'] == ['x', 'y']
 19 |     assert spec['repeat']['column'] == ['y', 'x']
 20 |     assert spec['spec']['encoding']['color']['condition']['value'] == 'steelblue'
 21 | 
 22 |     # with color specified
 23 |     plot = pdvega.scatter_matrix(df, c='label')
 24 |     utils.validate_vegalite(plot)
 25 |     spec = plot.to_dict()
 26 |     assert spec['repeat']['row'] == ['x', 'y']
 27 |     assert spec['repeat']['column'] == ['y', 'x']
 28 |     assert spec['spec']['encoding']['color']['condition']['field'] == 'label'
 29 | 
 30 |     # with size specified
 31 |     plot = pdvega.scatter_matrix(df, s='label')
 32 |     utils.validate_vegalite(plot)
 33 |     spec = plot.to_dict()
 34 |     assert spec['repeat']['row'] == ['x', 'y']
 35 |     assert spec['repeat']['column'] == ['y', 'x']
 36 |     assert spec['spec']['encoding']['color']['condition']['value'] == 'steelblue'
 37 |     assert spec['spec']['encoding']['size']['field'] == 'label'
 38 | 
 39 |     # test figsize keyword
 40 |     figsize = (8, 6)
 41 |     dpi = 40
 42 |     ncols = 2
 43 |     plot = pdvega.scatter_matrix(df, figsize=figsize, dpi=dpi)
 44 |     utils.validate_vegalite(plot)
 45 |     spec = plot.to_dict()
 46 |     assert np.allclose(spec['spec']['width'],
 47 |                        0.8 * dpi * figsize[0] / ncols)
 48 |     assert np.allclose(spec['spec']['height'],
 49 |                        0.8 * dpi * figsize[1] / ncols)
 50 | 
 51 | 
 52 | def test_parallel_coordinates():
 53 |     data = pd.DataFrame({'x': range(10),
 54 |                          'y': range(10),
 55 |                          'z': range(10),
 56 |                          'c': list('ABABABABAB')})
 57 |     plot = pdvega.parallel_coordinates(data, 'c', alpha=0.5)
 58 |     utils.validate_vegalite(plot)
 59 |     utils.check_encodings(plot, x='variable', y='value',
 60 |                           color='c', detail='index', opacity=utils.IGNORE)
 61 | 
 62 |     spec = plot.to_dict()
 63 |     enc = spec['encoding']
 64 |     assert spec['mark'] == 'line'
 65 |     assert enc['x']['type'] == 'nominal'
 66 |     assert enc['y']['type'] == 'quantitative'
 67 |     assert enc['color']['type'] == 'nominal'
 68 |     assert enc['detail']['type'] == 'quantitative'
 69 |     assert enc['opacity']['value'] == 0.5
 70 | 
 71 |     df = utils.get_data(plot)
 72 |     assert set(pd.unique(df['variable'])) == {'x', 'y', 'z'}
 73 | 
 74 |     plot = pdvega.parallel_coordinates(data, 'c', cols=['x', 'y'])
 75 |     utils.validate_vegalite(plot)
 76 |     utils.check_encodings(plot, x='variable', y='value',
 77 |                           color='c', detail='index')
 78 |     spec = plot.to_dict()
 79 |     enc = spec['encoding']
 80 |     assert spec['mark'] == 'line'
 81 |     assert enc['x']['type'] == 'nominal'
 82 |     assert enc['y']['type'] == 'quantitative'
 83 |     assert enc['color']['type'] == 'nominal'
 84 |     assert enc['detail']['type'] == 'quantitative'
 85 |     df = utils.get_data(plot)
 86 |     assert set(pd.unique(df['variable'])) == {'x', 'y'}
 87 | 
 88 | 
 89 | def test_andrews_curves():
 90 |     data = pd.DataFrame({'x': range(10),
 91 |                          'y': range(10),
 92 |                          'z': range(10),
 93 |                          'c': list('ABABABABAB')})
 94 |     n_samples = 120
 95 |     n_points = len(data)
 96 |     plot = pdvega.andrews_curves(data, 'c', samples=120, alpha=0.5)
 97 |     utils.validate_vegalite(plot)
 98 |     utils.check_encodings(plot, x='t', y=' ',
 99 |                           color='c', detail='sample', opacity=utils.IGNORE)
100 | 
101 |     spec = plot.to_dict()
102 |     enc = spec['encoding']
103 |     assert spec['mark'] == 'line'
104 |     assert enc['x']['type'] == 'quantitative'
105 |     assert enc['y']['type'] == 'quantitative'
106 |     assert enc['color']['type'] == 'nominal'
107 |     assert enc['detail']['type'] == 'quantitative'
108 |     assert enc['opacity']['value'] == 0.5
109 | 
110 |     df = utils.get_data(plot)
111 |     assert len(df) == n_samples * n_points
112 | 
113 | 
114 | @pytest.mark.parametrize('lag', [1, 5])
115 | def test_lag_plot(lag):
116 |     data = pd.DataFrame({'x': range(10),
117 |                          'y': range(10)})
118 | 
119 |     # test series input
120 |     plot = pdvega.lag_plot(data['x'], lag=lag)
121 |     lag_data = utils.get_data(plot)
122 | 
123 |     spec = plot.to_dict()
124 |     assert spec['mark'] == 'point'
125 |     assert spec['encoding']['x']['type'] == 'quantitative'
126 |     assert spec['encoding']['y']['type'] == 'quantitative'
127 | 
128 |     utils.check_encodings(plot, x='y(t)', y='y(t + {0})'.format(lag))
129 |     assert lag_data.shape == (data.shape[0] - lag, 2)
130 | 
131 |     # test dataframe input
132 |     plot = pdvega.lag_plot(data, lag=lag)
133 |     lag_data = utils.get_data(plot)
134 |     spec = plot.to_dict()
135 | 
136 |     assert spec['mark'] == 'point'
137 |     assert spec['encoding']['x']['type'] == 'quantitative'
138 |     assert spec['encoding']['y']['type'] == 'quantitative'
139 |     assert spec['encoding']['color']['type'] == 'nominal'
140 |     utils.check_encodings(plot, x='y(t)', y='y(t + {0})'.format(lag),
141 |                           color='variable')
142 |     assert lag_data.shape == (2 * (data.shape[0] - lag), 3)
143 | 


--------------------------------------------------------------------------------
/doc/index.rst:
--------------------------------------------------------------------------------
  1 | .. raw :: html
  2 | 
  3 |    <a href="https://github.com/altair-viz/pdvega"><img style="position: absolute; top: 0; right: 0; border: 0;" src="https://camo.githubusercontent.com/a6677b08c955af8400f44c6298f40e7d19cc5b2d/68747470733a2f2f73332e616d617a6f6e6177732e636f6d2f6769746875622f726962626f6e732f666f726b6d655f72696768745f677261795f3664366436642e706e67" alt="Fork me on GitHub" data-canonical-src="https://s3.amazonaws.com/github/ribbons/forkme_right_gray_6d6d6d.png"></a>
  4 | 
  5 | PdVega: Interactive Vega-Lite Plots for Pandas
  6 | ==============================================
  7 | 
  8 | ``pdvega`` is a library that allows you to quickly create interactive
  9 | `Vega-Lite`_ plots from Pandas dataframes, using an API that is nearly
 10 | identical to Pandas' built-in `plotting API <https://pandas.pydata.org/pandas-docs/stable/visualization.html>`_,
 11 | and designed for easy use within the `Jupyter notebook`_.
 12 | 
 13 | .. pdvega-plot::
 14 | 
 15 |     import pandas as pd
 16 |     import numpy as np
 17 |     data = pd.DataFrame({'x': np.random.randn(200),
 18 |                          'y': np.random.randn(200)})
 19 | 
 20 |     import pdvega  # adds vgplot attribute to pandas
 21 |     data.vgplot.scatter('x', 'y')
 22 | 
 23 | The result is an interactive plot rendered using `Vega-Lite`_, a visualization
 24 | specification that allows users to declaratively describe which
 25 | data features should map to which visualization features using a well-defined
 26 | JSON schema. The result is beautiful and dynamic data visualizations with a
 27 | minimum of boiler-plate.
 28 | 
 29 | ``pdvega`` aims to make the construction of these specifications
 30 | more accessible to Python users, via a familiar plotting API.
 31 | 
 32 | Quick Start
 33 | -----------
 34 | ``pdvega`` is designed to be used primarily with the `Jupyter notebook`_.
 35 | To get started, first install ``pdvega`` with the following commands::
 36 | 
 37 |     $ pip install pdvega
 38 |     $ jupyter nbextension install --sys-prefix --py vega3
 39 | 
 40 | (for details on installation and dependencies, see :ref:`installation`).
 41 | 
 42 | With the package installed and imported, you can use the ``vgplot`` attribute
 43 | of Pandas ``Series`` and ``DataFrame`` objects to quickly create a Vega-Lite
 44 | plot. For convenience here, we will load example datasets using the
 45 | `vega_datasets`_ package:
 46 | 
 47 | .. pdvega-plot::
 48 | 
 49 |     # load a dataframe containing stock price time-series
 50 |     from vega_datasets import data
 51 |     stocks = data.stocks(pivoted=True)
 52 | 
 53 |     # importing pdvega adds the `vgplot` attribute to pandas objects
 54 |     import pdvega
 55 | 
 56 |     stocks.vgplot.line()
 57 | 
 58 | Notice that by default plots created with ``pdvega`` are interactive: you can
 59 | use your mouse or track pad to pan and zoom the plot.
 60 | 
 61 | By design, ``pdvega`` has a plotting API that is nearly identical to Pandas'
 62 | existing `matplotlib API <https://pandas.pydata.org/pandas-docs/stable/visualization.html>`_;
 63 | just replace ``data.plot`` with ``data.vgplot``, where
 64 | ``data`` refers to any Pandas ``Series`` or ``DataFrame`` object:
 65 | 
 66 | .. plot::
 67 |     :context:
 68 |     :nofigs:
 69 | 
 70 |     from vega_datasets import data
 71 |     stocks = data.stocks(pivoted=True)
 72 | 
 73 | .. plot::
 74 |     :include-source:
 75 |     :context:
 76 | 
 77 |     # create a matplotlib line plot
 78 |     stocks.plot.line(y='AAPL', alpha=0.5)
 79 | 
 80 | 
 81 | .. pdvega-setup::
 82 | 
 83 |     from vega_datasets import data
 84 |     stocks = data.stocks(pivoted=True)
 85 |     import pdvega
 86 | 
 87 | .. pdvega-plot::
 88 | 
 89 |     # create a vega line plot
 90 |     stocks.vgplot.line(y='AAPL', alpha=0.5)
 91 | 
 92 | ``pdvega`` does not (yet?) support every available argument supported by
 93 | ``DataFrame.plot`` methods, but it covers the most commonly-used arguments.
 94 | 
 95 | To see more examples of visualizations created using the ``vgplot`` attribute
 96 | of pandas ``Series`` and ``DataFrame`` objects, see :ref:`core-plotting`.
 97 | 
 98 | More Complex Plots
 99 | ------------------
100 | 
101 | The ``pdvega`` package additionally supports many of the more sophisticated
102 | plotting routines available in the
103 | `pandas.plotting <https://pandas.pydata.org/pandas-docs/stable/visualization.html#plotting-tools>`_
104 | submodule; for example, here is a multi-panel scatter-plot matrix of Fisher's
105 | `Iris dataset`_:
106 | 
107 | .. pdvega-setup::
108 | 
109 |     import pdvega
110 |     from vega_datasets import data
111 | 
112 | .. pdvega-plot::
113 | 
114 |     iris = data.iris()
115 |     pdvega.scatter_matrix(iris, 'species', figsize=(7, 7))
116 | 
117 | In this plot, you can click and drag for linked panning and zooming, or you can
118 | click and drag while holding the SHIFT key to do linked brushing of the points.
119 | 
120 | For more examples of statistical visualizations available in
121 | ``pdvega.plotting``, see :ref:`statistical-plotting`.
122 | 
123 | 
124 | Documentation
125 | -------------
126 | 
127 | .. toctree::
128 |    :maxdepth: 2
129 | 
130 |    installation
131 |    core
132 |    plotting
133 |    advanced
134 |    API
135 | 
136 | `pdvega` is MIT-licensed and the source is available on `GitHub <http://github.com/altair-viz/pdvega>`_.
137 | If any questions or issues come up as you use it, please get in touch via
138 | `Git Issues <http://github.com/altair-viz/pdvega/issues>`_.
139 | 
140 | Indices and tables
141 | ------------------
142 | 
143 | * :ref:`genindex`
144 | * :ref:`modindex`
145 | * :ref:`search`
146 | 
147 | 
148 | .. _Vega-Lite: http://vega.github.io/vega-lite
149 | .. _Jupyter notebook: http://jupyter.org/
150 | .. _vega_datasets: http://github.com/altair-viz/vega_datasets
151 | .. _Iris dataset: https://en.wikipedia.org/wiki/Iris_flower_data_set
152 | 


--------------------------------------------------------------------------------
/doc/make.bat:
--------------------------------------------------------------------------------
  1 | @ECHO OFF
  2 | 
  3 | REM Command file for Sphinx documentation
  4 | 
  5 | if "%SPHINXBUILD%" == "" (
  6 | 	set SPHINXBUILD=sphinx-build
  7 | )
  8 | set BUILDDIR=_build
  9 | set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% .
 10 | set I18NSPHINXOPTS=%SPHINXOPTS% .
 11 | if NOT "%PAPER%" == "" (
 12 | 	set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS%
 13 | 	set I18NSPHINXOPTS=-D latex_paper_size=%PAPER% %I18NSPHINXOPTS%
 14 | )
 15 | 
 16 | if "%1" == "" goto help
 17 | 
 18 | if "%1" == "help" (
 19 | 	:help
 20 | 	echo.Please use `make ^<target^>` where ^<target^> is one of
 21 | 	echo.  html       to make standalone HTML files
 22 | 	echo.  dirhtml    to make HTML files named index.html in directories
 23 | 	echo.  singlehtml to make a single large HTML file
 24 | 	echo.  pickle     to make pickle files
 25 | 	echo.  json       to make JSON files
 26 | 	echo.  htmlhelp   to make HTML files and a HTML help project
 27 | 	echo.  qthelp     to make HTML files and a qthelp project
 28 | 	echo.  devhelp    to make HTML files and a Devhelp project
 29 | 	echo.  epub       to make an epub
 30 | 	echo.  epub3      to make an epub3
 31 | 	echo.  latex      to make LaTeX files, you can set PAPER=a4 or PAPER=letter
 32 | 	echo.  text       to make text files
 33 | 	echo.  man        to make manual pages
 34 | 	echo.  texinfo    to make Texinfo files
 35 | 	echo.  gettext    to make PO message catalogs
 36 | 	echo.  changes    to make an overview over all changed/added/deprecated items
 37 | 	echo.  xml        to make Docutils-native XML files
 38 | 	echo.  pseudoxml  to make pseudoxml-XML files for display purposes
 39 | 	echo.  linkcheck  to check all external links for integrity
 40 | 	echo.  doctest    to run all doctests embedded in the documentation if enabled
 41 | 	echo.  coverage   to run coverage check of the documentation if enabled
 42 | 	echo.  dummy      to check syntax errors of document sources
 43 | 	goto end
 44 | )
 45 | 
 46 | if "%1" == "clean" (
 47 | 	for /d %%i in (%BUILDDIR%\*) do rmdir /q /s %%i
 48 | 	del /q /s %BUILDDIR%\*
 49 | 	goto end
 50 | )
 51 | 
 52 | 
 53 | REM Check if sphinx-build is available and fallback to Python version if any
 54 | %SPHINXBUILD% 1>NUL 2>NUL
 55 | if errorlevel 9009 goto sphinx_python
 56 | goto sphinx_ok
 57 | 
 58 | :sphinx_python
 59 | 
 60 | set SPHINXBUILD=python -m sphinx.__init__
 61 | %SPHINXBUILD% 2> nul
 62 | if errorlevel 9009 (
 63 | 	echo.
 64 | 	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
 65 | 	echo.installed, then set the SPHINXBUILD environment variable to point
 66 | 	echo.to the full path of the 'sphinx-build' executable. Alternatively you
 67 | 	echo.may add the Sphinx directory to PATH.
 68 | 	echo.
 69 | 	echo.If you don't have Sphinx installed, grab it from
 70 | 	echo.http://sphinx-doc.org/
 71 | 	exit /b 1
 72 | )
 73 | 
 74 | :sphinx_ok
 75 | 
 76 | 
 77 | if "%1" == "html" (
 78 | 	%SPHINXBUILD% -b html %ALLSPHINXOPTS% %BUILDDIR%/html
 79 | 	if errorlevel 1 exit /b 1
 80 | 	echo.
 81 | 	echo.Build finished. The HTML pages are in %BUILDDIR%/html.
 82 | 	goto end
 83 | )
 84 | 
 85 | if "%1" == "dirhtml" (
 86 | 	%SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% %BUILDDIR%/dirhtml
 87 | 	if errorlevel 1 exit /b 1
 88 | 	echo.
 89 | 	echo.Build finished. The HTML pages are in %BUILDDIR%/dirhtml.
 90 | 	goto end
 91 | )
 92 | 
 93 | if "%1" == "singlehtml" (
 94 | 	%SPHINXBUILD% -b singlehtml %ALLSPHINXOPTS% %BUILDDIR%/singlehtml
 95 | 	if errorlevel 1 exit /b 1
 96 | 	echo.
 97 | 	echo.Build finished. The HTML pages are in %BUILDDIR%/singlehtml.
 98 | 	goto end
 99 | )
100 | 
101 | if "%1" == "pickle" (
102 | 	%SPHINXBUILD% -b pickle %ALLSPHINXOPTS% %BUILDDIR%/pickle
103 | 	if errorlevel 1 exit /b 1
104 | 	echo.
105 | 	echo.Build finished; now you can process the pickle files.
106 | 	goto end
107 | )
108 | 
109 | if "%1" == "json" (
110 | 	%SPHINXBUILD% -b json %ALLSPHINXOPTS% %BUILDDIR%/json
111 | 	if errorlevel 1 exit /b 1
112 | 	echo.
113 | 	echo.Build finished; now you can process the JSON files.
114 | 	goto end
115 | )
116 | 
117 | if "%1" == "htmlhelp" (
118 | 	%SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% %BUILDDIR%/htmlhelp
119 | 	if errorlevel 1 exit /b 1
120 | 	echo.
121 | 	echo.Build finished; now you can run HTML Help Workshop with the ^
122 | .hhp project file in %BUILDDIR%/htmlhelp.
123 | 	goto end
124 | )
125 | 
126 | if "%1" == "qthelp" (
127 | 	%SPHINXBUILD% -b qthelp %ALLSPHINXOPTS% %BUILDDIR%/qthelp
128 | 	if errorlevel 1 exit /b 1
129 | 	echo.
130 | 	echo.Build finished; now you can run "qcollectiongenerator" with the ^
131 | .qhcp project file in %BUILDDIR%/qthelp, like this:
132 | 	echo.^> qcollectiongenerator %BUILDDIR%\qthelp\altair.qhcp
133 | 	echo.To view the help file:
134 | 	echo.^> assistant -collectionFile %BUILDDIR%\qthelp\altair.ghc
135 | 	goto end
136 | )
137 | 
138 | if "%1" == "devhelp" (
139 | 	%SPHINXBUILD% -b devhelp %ALLSPHINXOPTS% %BUILDDIR%/devhelp
140 | 	if errorlevel 1 exit /b 1
141 | 	echo.
142 | 	echo.Build finished.
143 | 	goto end
144 | )
145 | 
146 | if "%1" == "epub" (
147 | 	%SPHINXBUILD% -b epub %ALLSPHINXOPTS% %BUILDDIR%/epub
148 | 	if errorlevel 1 exit /b 1
149 | 	echo.
150 | 	echo.Build finished. The epub file is in %BUILDDIR%/epub.
151 | 	goto end
152 | )
153 | 
154 | if "%1" == "epub3" (
155 | 	%SPHINXBUILD% -b epub3 %ALLSPHINXOPTS% %BUILDDIR%/epub3
156 | 	if errorlevel 1 exit /b 1
157 | 	echo.
158 | 	echo.Build finished. The epub3 file is in %BUILDDIR%/epub3.
159 | 	goto end
160 | )
161 | 
162 | if "%1" == "latex" (
163 | 	%SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
164 | 	if errorlevel 1 exit /b 1
165 | 	echo.
166 | 	echo.Build finished; the LaTeX files are in %BUILDDIR%/latex.
167 | 	goto end
168 | )
169 | 
170 | if "%1" == "latexpdf" (
171 | 	%SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
172 | 	cd %BUILDDIR%/latex
173 | 	make all-pdf
174 | 	cd %~dp0
175 | 	echo.
176 | 	echo.Build finished; the PDF files are in %BUILDDIR%/latex.
177 | 	goto end
178 | )
179 | 
180 | if "%1" == "latexpdfja" (
181 | 	%SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
182 | 	cd %BUILDDIR%/latex
183 | 	make all-pdf-ja
184 | 	cd %~dp0
185 | 	echo.
186 | 	echo.Build finished; the PDF files are in %BUILDDIR%/latex.
187 | 	goto end
188 | )
189 | 
190 | if "%1" == "text" (
191 | 	%SPHINXBUILD% -b text %ALLSPHINXOPTS% %BUILDDIR%/text
192 | 	if errorlevel 1 exit /b 1
193 | 	echo.
194 | 	echo.Build finished. The text files are in %BUILDDIR%/text.
195 | 	goto end
196 | )
197 | 
198 | if "%1" == "man" (
199 | 	%SPHINXBUILD% -b man %ALLSPHINXOPTS% %BUILDDIR%/man
200 | 	if errorlevel 1 exit /b 1
201 | 	echo.
202 | 	echo.Build finished. The manual pages are in %BUILDDIR%/man.
203 | 	goto end
204 | )
205 | 
206 | if "%1" == "texinfo" (
207 | 	%SPHINXBUILD% -b texinfo %ALLSPHINXOPTS% %BUILDDIR%/texinfo
208 | 	if errorlevel 1 exit /b 1
209 | 	echo.
210 | 	echo.Build finished. The Texinfo files are in %BUILDDIR%/texinfo.
211 | 	goto end
212 | )
213 | 
214 | if "%1" == "gettext" (
215 | 	%SPHINXBUILD% -b gettext %I18NSPHINXOPTS% %BUILDDIR%/locale
216 | 	if errorlevel 1 exit /b 1
217 | 	echo.
218 | 	echo.Build finished. The message catalogs are in %BUILDDIR%/locale.
219 | 	goto end
220 | )
221 | 
222 | if "%1" == "changes" (
223 | 	%SPHINXBUILD% -b changes %ALLSPHINXOPTS% %BUILDDIR%/changes
224 | 	if errorlevel 1 exit /b 1
225 | 	echo.
226 | 	echo.The overview file is in %BUILDDIR%/changes.
227 | 	goto end
228 | )
229 | 
230 | if "%1" == "linkcheck" (
231 | 	%SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck
232 | 	if errorlevel 1 exit /b 1
233 | 	echo.
234 | 	echo.Link check complete; look for any errors in the above output ^
235 | or in %BUILDDIR%/linkcheck/output.txt.
236 | 	goto end
237 | )
238 | 
239 | if "%1" == "doctest" (
240 | 	%SPHINXBUILD% -b doctest %ALLSPHINXOPTS% %BUILDDIR%/doctest
241 | 	if errorlevel 1 exit /b 1
242 | 	echo.
243 | 	echo.Testing of doctests in the sources finished, look at the ^
244 | results in %BUILDDIR%/doctest/output.txt.
245 | 	goto end
246 | )
247 | 
248 | if "%1" == "coverage" (
249 | 	%SPHINXBUILD% -b coverage %ALLSPHINXOPTS% %BUILDDIR%/coverage
250 | 	if errorlevel 1 exit /b 1
251 | 	echo.
252 | 	echo.Testing of coverage in the sources finished, look at the ^
253 | results in %BUILDDIR%/coverage/python.txt.
254 | 	goto end
255 | )
256 | 
257 | if "%1" == "xml" (
258 | 	%SPHINXBUILD% -b xml %ALLSPHINXOPTS% %BUILDDIR%/xml
259 | 	if errorlevel 1 exit /b 1
260 | 	echo.
261 | 	echo.Build finished. The XML files are in %BUILDDIR%/xml.
262 | 	goto end
263 | )
264 | 
265 | if "%1" == "pseudoxml" (
266 | 	%SPHINXBUILD% -b pseudoxml %ALLSPHINXOPTS% %BUILDDIR%/pseudoxml
267 | 	if errorlevel 1 exit /b 1
268 | 	echo.
269 | 	echo.Build finished. The pseudo-XML files are in %BUILDDIR%/pseudoxml.
270 | 	goto end
271 | )
272 | 
273 | if "%1" == "dummy" (
274 | 	%SPHINXBUILD% -b dummy %ALLSPHINXOPTS% %BUILDDIR%/dummy
275 | 	if errorlevel 1 exit /b 1
276 | 	echo.
277 | 	echo.Build finished. Dummy builder generates no files.
278 | 	goto end
279 | )
280 | 
281 | :end
282 | 


--------------------------------------------------------------------------------
/pdvega/tests/test_core_common.py:
--------------------------------------------------------------------------------
  1 | """Common tests for all plotting routines"""
  2 | import pytest
  3 | 
  4 | import pandas as pd
  5 | import pdvega
  6 | 
  7 | from .utils import validate_vegalite
  8 | 
  9 | 
 10 | @pytest.fixture
 11 | def data():
 12 |     """A dataframe with quantitative and nominal columns"""
 13 |     return pd.DataFrame({
 14 |         'x': range(10),
 15 |         'y': range(10),
 16 |         'z': range(10),
 17 |         'a': list('ABCABCABCA'),
 18 |         'b': list('ABCABCABCA')
 19 |     })
 20 | 
 21 | 
 22 | other_chart = pd.Series(range(10)).vgplot(kind='line')
 23 | AXES = [
 24 |     (None, pdvega.alt.Chart),
 25 |     (other_chart, pdvega.alt.LayerChart),
 26 |     (pdvega.alt.layer(other_chart), pdvega.alt.LayerChart)
 27 | ]
 28 | 
 29 | FRAME_TEST_CASES = {
 30 |     'line': {
 31 |         'usecols': ['x', 'y', 'z'],
 32 |     },
 33 |     'bar': {
 34 |         'usecols': ['x', 'y', 'z'],
 35 |     },
 36 |     'barh': {
 37 |         'usecols': ['x', 'y', 'z'],
 38 |     },
 39 |     'area': {
 40 |         'usecols': ['x', 'y', 'z'],
 41 |     },
 42 |     'scatter': {
 43 |         'usecols': ['x', 'y', 'a', 'b'],
 44 |         'kwds': {'x': 'x', 'y': 'y', 'c': 'a', 's': 'b'}
 45 |     },
 46 |     'hist': {
 47 |         'usecols': ['x', 'y', 'z'],
 48 |     },
 49 |     'hexbin': {
 50 |         'usecols': ['x', 'y', 'z'],
 51 |         'kwds': {'x': 'x', 'y': 'y'}
 52 |     },
 53 |     'kde': {
 54 |         'usecols': ['x', 'y', 'z'],
 55 |     },
 56 |     'density': {
 57 |         'usecols': ['x', 'y', 'z'],
 58 |     }
 59 | }
 60 | 
 61 | SERIES_TEST_CASES = {
 62 |     'line': {
 63 |         'col': 'x'
 64 |     },
 65 |     'bar': {
 66 |         'col': 'x'
 67 |     },
 68 |     'barh': {
 69 |         'col': 'x'
 70 |     },
 71 |     'area': {
 72 |         'col': 'x'
 73 |     },
 74 |     'hist': {
 75 |         'col': 'x'
 76 |     },
 77 |     'kde': {
 78 |         'col': 'x'
 79 |     },
 80 |     'density': {
 81 |         'col': 'x'
 82 |     }
 83 | }
 84 | 
 85 | 
 86 | def is_stackable(kind):
 87 |     return kind in {'bar', 'barh', 'area', 'hist'}
 88 | 
 89 | 
 90 | @pytest.mark.parametrize('kind,info', SERIES_TEST_CASES.items())
 91 | def test_series_plot_interactive(data, kind, info):
 92 |     col = info['col']
 93 |     kwds = info.get('kwds', {})
 94 |     data = data[col]
 95 | 
 96 |     spec = data.vgplot(kind=kind, **kwds)
 97 |     validate_vegalite(spec)
 98 |     assert 'selection' not in spec.to_dict()
 99 | 
100 |     spec = data.vgplot(kind=kind, **kwds).interactive()
101 |     validate_vegalite(spec)
102 |     s = spec.to_dict()
103 |     assert next(iter(s['selection'].values())) == {'bind': 'scales', 'encodings': ['x', 'y'], 'type': 'interval'}
104 | 
105 | 
106 | @pytest.mark.parametrize('kind,info', FRAME_TEST_CASES.items())
107 | def test_frame_plot_interactive(data, kind, info):
108 |     cols = info['usecols']
109 |     kwds = info.get('kwds', {})
110 |     data = data[cols]
111 | 
112 |     chart = data.vgplot(kind=kind, **kwds)
113 |     validate_vegalite(chart)
114 |     assert 'selection' not in chart.to_dict()
115 | 
116 |     chart = data.vgplot(kind=kind, **kwds).interactive()
117 |     validate_vegalite(chart)
118 |     s = chart.to_dict()
119 |     assert next(iter(s['selection'].values())) == {'bind': 'scales', 'encodings': ['x', 'y'], 'type': 'interval'}
120 | 
121 | 
122 | @pytest.mark.parametrize('kind,info', SERIES_TEST_CASES.items())
123 | def test_series_plot_alpha(data, kind, info):
124 |     col = info['col']
125 |     kwds = info.get('kwds', {})
126 |     data = data[col]
127 | 
128 |     chart = data.vgplot(kind=kind, alpha=0.5, **kwds)
129 |     validate_vegalite(chart)
130 |     encoding = chart['encoding'].to_dict()
131 |     assert 'opacity' in encoding, encoding.keys()
132 |     assert encoding['opacity']['value'] == 0.5
133 | 
134 |     chart = data.vgplot(kind=kind, **kwds)
135 |     validate_vegalite(chart)
136 |     assert 'opacity' not in chart['encoding'].to_dict()
137 | 
138 | 
139 | @pytest.mark.parametrize('kind,info', SERIES_TEST_CASES.items())
140 | @pytest.mark.parametrize('ax', AXES)
141 | def test_series_plot_ax(data, kind, info, ax):
142 |     col = info['col']
143 |     kwds = info.get('kwds', {})
144 |     data = data[col]
145 | 
146 |     chart = data.vgplot(kind=kind, ax=ax[0], **kwds)
147 |     validate_vegalite(chart)
148 |     assert isinstance(chart, ax[1])
149 | 
150 | 
151 | @pytest.mark.parametrize('kind,info', FRAME_TEST_CASES.items())
152 | def test_frame_plot_alpha(data, kind, info):
153 |     cols = info['usecols']
154 |     kwds = info.get('kwds', {})
155 |     data = data[cols]
156 | 
157 |     # if alpha is explicitly specified, then opacity should be in the spec
158 |     chart = data.vgplot(kind=kind, alpha=0.5, **kwds)
159 |     validate_vegalite(chart)
160 |     assert chart['encoding'].to_dict()['opacity']['value'] == 0.5
161 | 
162 |     if is_stackable(kind):
163 |         # stackable plots have a default opacity when not stacked
164 |         chart = data.vgplot(kind=kind, stacked=False, **kwds)
165 |         validate_vegalite(chart)
166 |         assert chart['encoding'].to_dict()['opacity']['value'] == 0.7
167 | 
168 |         # if only one column is being plotted, then should have no opacity
169 |         chart = data[cols[:1]].vgplot(kind=kind, stacked=False, **kwds)
170 |         validate_vegalite(chart)
171 |         assert 'opacity' not in chart['encoding'].to_dict()
172 | 
173 |         # if stacked, then should have no opacity
174 |         chart = data.vgplot(kind=kind, stacked=True, **kwds)
175 |         validate_vegalite(chart)
176 |         assert 'opacity' not in chart['encoding'].to_dict()
177 |     else:
178 |         # non-stackable plots have no default opacity
179 |         chart = data.vgplot(kind=kind, **kwds)
180 |         validate_vegalite(chart)
181 |         assert 'opacity' not in chart['encoding'].to_dict()
182 | 
183 | 
184 | @pytest.mark.parametrize('kind,info', FRAME_TEST_CASES.items())
185 | @pytest.mark.parametrize('ax', AXES)
186 | def test_frame_plot_ax(data, kind, info, ax):
187 |     cols = info['usecols']
188 |     kwds = info.get('kwds', {})
189 |     data = data[cols]
190 | 
191 |     chart = data.vgplot(kind=kind, ax=ax[0], **kwds)
192 |     validate_vegalite(chart)
193 |     assert isinstance(chart, ax[1])
194 | 
195 | 
196 | @pytest.mark.parametrize('kind,info', SERIES_TEST_CASES.items())
197 | def test_series_plot_width_height(data, kind, info):
198 |     col = info['col']
199 |     kwds = info.get('kwds', {})
200 |     data = data[col]
201 | 
202 |     spec = data.vgplot(kind=kind, width=300, height=200, **kwds)
203 |     validate_vegalite(spec)
204 |     assert (spec['width'], spec['height']) == (300, 200)
205 | 
206 |     spec = data.vgplot(kind=kind, **kwds)
207 |     validate_vegalite(spec)
208 |     s = spec.to_dict()
209 |     assert (s['width'], s['height']) == (450, 300)
210 | 
211 | 
212 | @pytest.mark.parametrize('kind,info', FRAME_TEST_CASES.items())
213 | def test_frame_plot_width_height(data, kind, info):
214 |     cols = info['usecols']
215 |     kwds = info.get('kwds', {})
216 |     data = data[cols]
217 | 
218 |     spec = data.vgplot(kind=kind, width=300, height=200, **kwds)
219 |     validate_vegalite(spec)
220 |     s = spec.to_dict()
221 |     assert (s['width'], s['height']) == (300, 200)
222 | 
223 |     spec = data.vgplot(kind=kind, **kwds)
224 |     validate_vegalite(spec)
225 |     s = spec.to_dict()
226 |     assert (s['width'], s['height']) == (450, 300)
227 | 
228 | 
229 | @pytest.mark.parametrize('kind,info', SERIES_TEST_CASES.items())
230 | def test_series_plot_kwd_warnings(data, kind, info):
231 |     col = info['col']
232 |     kwds = info.get('kwds', {})
233 |     data = data[col]
234 | 
235 |     with pytest.warns(UserWarning, match="Unrecognized keywords in vgplot.[a-z]+\(\): 'unrecognized_arg'"):
236 |         data.vgplot(kind=kind, unrecognized_arg=None, **kwds)
237 | 
238 |     with pytest.warns(UserWarning):
239 |         data.vgplot(kind=kind, unrecognized1=None, unrecognized2=None, **kwds)
240 | 
241 | 
242 | @pytest.mark.parametrize('kind,info', FRAME_TEST_CASES.items())
243 | def test_frame_plot_kwd_warnings(data, kind, info):
244 |     cols = info['usecols']
245 |     kwds = info.get('kwds', {})
246 |     data = data[cols]
247 | 
248 |     with pytest.warns(UserWarning, match="Unrecognized keywords in vgplot.[a-z]+\(\): 'unrecognized_arg'"):
249 |         data.vgplot(kind=kind, unrecognized_arg=None, **kwds)
250 | 
251 |     with pytest.warns(UserWarning):
252 |         data.vgplot(kind=kind, unrecognized1=None, unrecognized2=None, **kwds)
253 | 
254 | 
255 | @pytest.mark.parametrize('kind,info', SERIES_TEST_CASES.items())
256 | def test_series_figsize(data, kind, info):
257 |     col = info['col']
258 |     kwds = info.get('kwds', {})
259 |     data = data[col]
260 | 
261 |     chart = data.vgplot(kind=kind, figsize=(10, 10), dpi=72, **kwds)
262 |     assert chart.height == 10 * 72 * 0.8
263 |     assert chart.width == 10 * 72 * 0.8
264 | 
265 |     chart = data.vgplot(kind=kind, width=100, height=100, **kwds)
266 |     assert chart.height == 100
267 |     assert chart.width == 100
268 | 
269 | 
270 | @pytest.mark.parametrize('kind,info', FRAME_TEST_CASES.items())
271 | def test_frame_plot_figsize(data, kind, info):
272 |     cols = info['usecols']
273 |     kwds = info.get('kwds', {})
274 |     data = data[cols]
275 | 
276 |     chart = data.vgplot(kind=kind, figsize=(10, 10), dpi=72, **kwds)
277 |     assert chart.height == 10 * 72 * 0.8
278 |     assert chart.width == 10 * 72 * 0.8
279 | 
280 |     chart = data.vgplot(kind=kind, width=100, height=100, **kwds)
281 |     assert chart.height == 100
282 |     assert chart.width == 100
283 | 
284 | 
285 | @pytest.mark.parametrize('kind,info', SERIES_TEST_CASES.items())
286 | def test_series_title(data, kind, info):
287 |     col = info['col']
288 |     kwds = info.get('kwds', {})
289 |     data = data[col]
290 | 
291 |     title = 'Test'
292 |     chart = data.vgplot(kind=kind, title=title, **kwds)
293 |     assert chart.title == title
294 | 
295 | 
296 | @pytest.mark.parametrize('kind,info', FRAME_TEST_CASES.items())
297 | def test_frame_title(data, kind, info):
298 |     cols = info['usecols']
299 |     kwds = info.get('kwds', {})
300 |     data = data[cols]
301 | 
302 |     title = 'Test'
303 |     chart = data.vgplot(kind=kind, title=title, **kwds)
304 |     assert chart.title == title
305 | 


--------------------------------------------------------------------------------
/pdvega/plotting.py:
--------------------------------------------------------------------------------
  1 | """Core plotting routines"""
  2 | import warnings
  3 | import altair as alt
  4 | import numpy as np
  5 | import pandas as pd
  6 | 
  7 | from ._utils import infer_vegalite_type
  8 | 
  9 | __all__ = ["scatter_matrix", "andrews_curves", "parallel_coordinates", "lag_plot"]
 10 | 
 11 | 
 12 | def scatter_matrix(frame, c=None, s=None, figsize=None, dpi=72.0, **kwds):
 13 |     """Draw a matrix of scatter plots.
 14 | 
 15 |     The result is an interactive pan/zoomable plot, with linked-brushing
 16 |     enabled by holding the shift key.
 17 | 
 18 |     Parameters
 19 |     ----------
 20 |     frame : DataFrame
 21 |         The dataframe for which to draw the scatter matrix.
 22 |     c : string (optional)
 23 |         If specified, the name of the column to be used to determine the
 24 |         color of each point.
 25 |     s : string (optional)
 26 |         If specified, the name of the column to be used to determine the
 27 |         size of each point,
 28 |     figsize : tuple (optional)
 29 |         A length-2 tuple speficying the size of the figure in inches
 30 |     dpi : float (default=72)
 31 |         The dots (i.e. pixels) per inch used to convert the figure size from
 32 |         inches to pixels.
 33 | 
 34 |     Returns
 35 |     -------
 36 |     chart: alt.Chart object
 37 |         The alt.Chart representation of the plot.
 38 | 
 39 |     See Also
 40 |     --------
 41 |     pandas.plotting.scatter_matrix : matplotlib version of this routine
 42 |     """
 43 |     if kwds:
 44 |         warnings.warn(
 45 |             "Unrecognized keywords in pdvega.scatter_matrix: {0}"
 46 |             "".format(list(kwds.keys()))
 47 |         )
 48 | 
 49 |     cols = [
 50 |         col
 51 |         for col in frame.columns
 52 |         if col not in [c, s]
 53 |         if infer_vegalite_type(frame[col], ordinal_threshold=0) == "quantitative"
 54 |     ]
 55 | 
 56 |     spec = {
 57 |         "$schema": "https://vega.github.io/schema/vega-lite/v2.json",
 58 |         "repeat": {"row": cols, "column": cols[::-1]},
 59 |         "spec": {
 60 |             "mark": "point",
 61 |             "selection": {
 62 |                 "brush": {
 63 |                     "type": "interval",
 64 |                     "resolve": "union",
 65 |                     "on": "[mousedown[event.shiftKey], window:mouseup] > window:mousemove!",
 66 |                     "translate": "[mousedown[event.shiftKey], window:mouseup] > window:mousemove!",
 67 |                     "zoom": "wheel![event.shiftKey]",
 68 |                 },
 69 |                 "grid": {
 70 |                     "type": "interval",
 71 |                     "resolve": "global",
 72 |                     "bind": "scales",
 73 |                     "translate": "[mousedown[!event.shiftKey], window:mouseup] > window:mousemove!",
 74 |                     "zoom": "wheel![!event.shiftKey]",
 75 |                 },
 76 |             },
 77 |             "encoding": {
 78 |                 "x": {"field": {"repeat": "column"}, "type": "quantitative"},
 79 |                 "y": {"field": {"repeat": "row"}, "type": "quantitative"},
 80 |                 "color": {"condition": {"selection": "brush"}, "value": "grey"},
 81 |             },
 82 |         },
 83 |     }
 84 | 
 85 |     if figsize is not None:
 86 |         width_inches, height_inches = figsize
 87 |         spec["spec"]["width"] = 0.8 * dpi * width_inches / len(cols)
 88 |         spec["spec"]["height"] = 0.8 * dpi * height_inches / len(cols)
 89 | 
 90 |     if s is not None:
 91 |         spec["spec"]["encoding"]["size"] = {
 92 |             "field": s, "type": infer_vegalite_type(frame[s])
 93 |         }
 94 | 
 95 |     cond = spec["spec"]["encoding"]["color"]["condition"]
 96 |     if c is None:
 97 |         cond["value"] = "steelblue"
 98 |     else:
 99 |         cond["field"] = c
100 |         cond["type"] = infer_vegalite_type(frame[c])
101 | 
102 |     chart = alt.Chart().from_dict(spec)
103 |     chart.data = frame
104 |     return chart
105 | 
106 | 
107 | def andrews_curves(
108 |     data, class_column, samples=200, alpha=None, width=450, height=300, **kwds
109 | ):
110 |     """
111 |     Generates an Andrews curves visualization for visualising clusters of
112 |     multivariate data.
113 | 
114 |     Andrews curves have the functional form:
115 | 
116 |     f(t) = x_1/sqrt(2) + x_2 sin(t) + x_3 cos(t) +
117 |            x_4 sin(2t) + x_5 cos(2t) + ...
118 | 
119 |     Where x coefficients correspond to the values of each dimension and t is
120 |     linearly spaced between -pi and +pi. Each row of frame then corresponds to
121 |     a single curve.
122 | 
123 |     Parameters:
124 |     -----------
125 |     data : DataFrame
126 |         Data to be plotted, preferably normalized to (0.0, 1.0)
127 |     class_column : string
128 |         Name of the column containing class names
129 |     samples : integer
130 |         Number of points to plot in each curve
131 |     alpha: float, optional
132 |         The transparency of the lines
133 |     width : int, optional
134 |         the width of the plot in pixels
135 |     height : int, optional
136 |         the height of the plot in pixels
137 |     **kwds: keywords
138 |         Additional options
139 | 
140 |     Returns:
141 |     --------
142 |     chart: alt.Chart object
143 | 
144 |     """
145 |     if kwds:
146 |         warnings.warn(
147 |             "Unrecognized keywords in pdvega.andrews_curves(): {0}"
148 |             "".format(list(kwds.keys()))
149 |         )
150 | 
151 |     t = np.linspace(-np.pi, np.pi, samples)
152 |     vals = data.drop(class_column, axis=1).values.T
153 | 
154 |     curves = np.outer(vals[0], np.ones_like(t))
155 |     for i in range(1, len(vals)):
156 |         ft = ((i + 1) // 2) * t
157 |         if i % 2 == 1:
158 |             curves += np.outer(vals[i], np.sin(ft))
159 |         else:
160 |             curves += np.outer(vals[i], np.cos(ft))
161 | 
162 |     df = pd.DataFrame(
163 |         {
164 |             "t": np.tile(t, curves.shape[0]),
165 |             "sample": np.repeat(np.arange(curves.shape[0]), curves.shape[1]),
166 |             " ": curves.ravel(),
167 |             class_column: np.repeat(data[class_column], samples),
168 |         }
169 |     )
170 | 
171 |     chart = alt.Chart(df).properties(width=width, height=height).mark_line()
172 |     chart = chart.encode(
173 |         x=alt.X(field="t", type="quantitative"),
174 |         y=alt.Y(field=" ", type="quantitative"),
175 |         color=alt.Color(field=class_column, type=infer_vegalite_type(df[class_column])),
176 |         detail=alt.Detail(field='sample', type="quantitative")
177 |     )
178 | 
179 |     if alpha is None and df[class_column].nunique() > 20:
180 |         alpha = 0.5
181 | 
182 |     if alpha is not None:
183 |         assert 0 <= alpha <= 1
184 |         return chart.encode(opacity=alt.value(alpha))
185 | 
186 |     return chart
187 | 
188 | 
189 | def parallel_coordinates(
190 |     data,
191 |     class_column,
192 |     cols=None,
193 |     alpha=None,
194 |     width=450,
195 |     height=300,
196 |     interactive=True,
197 |     var_name="variable",
198 |     value_name="value",
199 |     **kwds
200 | ):
201 |     """
202 |     Parallel coordinates plotting.
203 | 
204 |     Parameters
205 |     ----------
206 |     frame: DataFrame
207 |     class_column: str
208 |         Column name containing class names
209 |     cols: list, optional
210 |         A list of column names to use
211 |     alpha: float, optional
212 |         The transparency of the lines
213 |     interactive : bool, optional
214 |         if True (default) then produce an interactive plot
215 |     width : int, optional
216 |         the width of the plot in pixels
217 |     height : int, optional
218 |         the height of the plot in pixels
219 |     var_name : string, optional
220 |         the legend title
221 |     value_name : string, optional
222 |         the y-axis label
223 | 
224 |     Returns
225 |     -------
226 |     chart: alt.Chart object
227 |         The altair representation of the plot.
228 | 
229 |     See Also
230 |     --------
231 |     pandas.plotting.parallel_coordinates : matplotlib version of this routine
232 |     """
233 |     if kwds:
234 |         warnings.warn(
235 |             "Unrecognized keywords in pdvega.scatter_matrix: {0}"
236 |             "".format(list(kwds.keys()))
237 |         )
238 | 
239 |     # Transform the dataframe to be used in Vega-Lite
240 |     if cols is not None:
241 |         data = data[list(cols) + [class_column]]
242 |     cols = data.columns
243 |     df = data.reset_index()
244 |     index = (set(df.columns) - set(cols)).pop()
245 |     assert index in df.columns
246 |     df = df.melt([index, class_column], var_name=var_name, value_name=value_name)
247 | 
248 |     chart = alt.Chart(df).properties(width=width, height=height)
249 |     chart = chart.mark_line().encode(
250 |          x=alt.X(field=var_name, type=infer_vegalite_type(df[var_name])),
251 |          y=alt.Y(field=value_name, type=infer_vegalite_type(df[value_name])),
252 |          color=alt.Color(field=class_column, type=infer_vegalite_type(df[class_column])),
253 |          detail=alt.Detail(field=index, type=infer_vegalite_type(df[index]))
254 |     )
255 | 
256 |     if alpha is None and df[class_column].nunique() > 20:
257 |         alpha = 0.3
258 | 
259 |     if alpha is not None:
260 |         assert 0 <= alpha <= 1
261 |         return chart.encode(opacity=alt.value(alpha))
262 |     return chart
263 | 
264 | 
265 | def lag_plot(data, lag=1, kind="scatter", **kwds):
266 |     """Lag plot for time series.
267 | 
268 |     Parameters
269 |     ----------
270 |     data: pandas.Series
271 |         the time series to plot
272 |     lag: integer
273 |         The lag of the scatter plot, default=1
274 |     kind: string
275 |         The kind of plot to use (e.g. 'scatter', 'line')
276 |     **kwds:
277 |         Additional keywords passed to data.vgplot.scatter
278 | 
279 |     Returns
280 |     -------
281 |     chart: alt.Chart object
282 |     """
283 |     if lag != int(lag) or int(lag) <= 0:
284 |         raise ValueError("lag must be a positive integer")
285 |     lag = int(lag)
286 | 
287 |     values = data.values
288 |     y1 = "y(t)"
289 |     y2 = "y(t + {0})".format(lag)
290 |     lags = pd.DataFrame({y1: values[:-lag].T.ravel(), y2: values[lag:].T.ravel()})
291 | 
292 |     if isinstance(data, pd.DataFrame):
293 |         lags["variable"] = np.repeat(data.columns, lags.shape[0] / data.shape[1])
294 |         kwds["c"] = "variable"
295 | 
296 |     return lags.vgplot(kind=kind, x=y1, y=y2, **kwds)
297 | 


--------------------------------------------------------------------------------
/doc/core.rst:
--------------------------------------------------------------------------------
  1 | .. _core-plotting:
  2 | 
  3 | Simple Visualizations with ``data.vgplot``
  4 | ==========================================
  5 | 
  6 | The core interface of ``pdvega`` is the ``vgplot`` attribute that it adds to
  7 | Pandas ``DataFrame`` and ``Series`` objects::
  8 | 
  9 |     import pdvega
 10 | 
 11 | .. pdvega-setup::
 12 | 
 13 |    import pdvega
 14 | 
 15 |    from vega_datasets import data
 16 |    iris = data.iris()
 17 | 
 18 | Like the ``plot`` attribute that is built-in to Pandas, there are two ways of
 19 | creating plots with ``vgplot``: first, you can call the ``vgplot`` attribute
 20 | of a Pandas object directly:
 21 | 
 22 | .. pdvega-plot::
 23 | 
 24 |    from vega_datasets import data
 25 |    iris = data.iris()
 26 | 
 27 |    iris.vgplot(kind='scatter', x='sepalLength', y='petalLength', c='species')
 28 | 
 29 | Equivalently, you can call the specific method associated with each plot type:
 30 | 
 31 | .. pdvega-plot::
 32 | 
 33 |    iris.vgplot.scatter(x='sepalLength', y='petalLength', c='species')
 34 | 
 35 | The benefit of the second approach is that it allows exploration of available
 36 | plot types via tab completion, and the individual functions also provide more
 37 | detailed documentation of the arguments available for each method.
 38 | 
 39 | The ``vgplot`` interface exposes nine basic plot types; we will show examples
 40 | of these below.
 41 | 
 42 | Datasets
 43 | --------
 44 | For the examples on this page, we will use a number of datasets made available
 45 | in the `vega_datasets`_ package:
 46 | 
 47 | .. pdvega-setup::
 48 |    :show:
 49 | 
 50 |    iris = data.iris()
 51 |    stocks = data.stocks(pivoted=True)
 52 |    cars = data.cars()
 53 | 
 54 | These datasets are stored in the form of pandas dataframes::
 55 | 
 56 |      >>> iris.head()
 57 |         petalLength  petalWidth  sepalLength  sepalWidth species
 58 |      0          1.4         0.2          5.1         3.5  setosa
 59 |      1          1.4         0.2          4.9         3.0  setosa
 60 |      2          1.3         0.2          4.7         3.2  setosa
 61 |      3          1.5         0.2          4.6         3.1  setosa
 62 |      4          1.4         0.2          5.0         3.6  setosa
 63 | 
 64 |      >>> stocks.head()
 65 |      symbol       AAPL   AMZN  GOOG     IBM   MSFT
 66 |      date
 67 |      2000-01-01  25.94  64.56   NaN  100.52  39.81
 68 |      2000-02-01  28.66  68.87   NaN   92.11  36.35
 69 |      2000-03-01  33.95  67.00   NaN  106.11  43.22
 70 |      2000-04-01  31.01  55.19   NaN   99.95  28.37
 71 |      2000-05-01  21.00  48.31   NaN   96.31  25.45
 72 | 
 73 | 
 74 |      >>> cars.head()
 75 |         Acceleration  Cylinders  Displacement  Horsepower  Miles_per_Gallon  \
 76 |      0          12.0          8         307.0       130.0              18.0
 77 |      1          11.5          8         350.0       165.0              15.0
 78 |      2          11.0          8         318.0       150.0              18.0
 79 |      3          12.0          8         304.0       150.0              16.0
 80 |      4          10.5          8         302.0       140.0              17.0
 81 | 
 82 |                              Name Origin  Weight_in_lbs        Year
 83 |      0  chevrolet chevelle malibu    USA           3504  1970-01-01
 84 |      1          buick skylark 320    USA           3693  1970-01-01
 85 |      2         plymouth satellite    USA           3436  1970-01-01
 86 |      3              amc rebel sst    USA           3433  1970-01-01
 87 |      4                ford torino    USA           3449  1970-01-01
 88 | 
 89 | .. _vgplot-line:
 90 | 
 91 | Line Plots with ``vgplot.line``
 92 | -------------------------------
 93 | The default plot type for ``vgplot`` is a line plot:
 94 | 
 95 | .. pdvega-plot::
 96 | 
 97 |    stocks.vgplot()
 98 | 
 99 | Unless otherwise specified, the index of the DataFrame or series is used as the
100 | x-axis variable, and a separate line will be created for the y-values in each
101 | column in the dataframe. If you'd like to plot a subset of the columns, you can use
102 | pandas indexing to select the columns you are interested in:
103 | 
104 | .. pdvega-plot::
105 | 
106 |    stocks[['AAPL', 'AMZN']].vgplot.line()
107 | 
108 | Optionally, you can specify the column names to use for the x-axis and y-axis:
109 | 
110 | .. pdvega-plot::
111 | 
112 |   stocks.vgplot.line(x='AAPL', y='AMZN')
113 | 
114 | Line plots can be further customized; see the function documentation for
115 | more information:
116 | 
117 | - Series line plot: :meth:`pdvega.SeriesPlotMethods.line`
118 | - DataFrame line plot: :meth:`pdvega.FramePlotMethods.line`
119 | 
120 | .. _vgplot-scatter:
121 | 
122 | Scatter Plots with ``vgplot.scatter``
123 | -------------------------------------
124 | The previous plot might make more sense in the form of a scatter plot.
125 | This can be done with ``vgplot.scatter()``:
126 | 
127 | .. pdvega-plot::
128 | 
129 |     stocks.vgplot.scatter(x='AAPL', y='AMZN')
130 | 
131 | You can also encode the color and size of scatter plots; let's switch to the
132 | cars dataset to see the relationship between some of these variables:
133 | 
134 | .. pdvega-plot::
135 | 
136 |     cars.vgplot.scatter(x='Horsepower', y='Miles_per_Gallon',
137 |                         c='Origin', s='Weight_in_lbs', alpha=0.5)
138 | 
139 | This is one slight difference from the Pandas plot interface: in Pandas the
140 | ``c`` and ``s`` parameters must be passed as arrays, while here we pass them
141 | as column names.
142 | 
143 | Scatter plots can be further customized; see :meth:`pdvega.FramePlotMethods.scatter`
144 | for more information.
145 | 
146 | .. _vgplot-area:
147 | 
148 | Area Plots with ``vgplot.area``
149 | -------------------------------
150 | Area plots are quite similar to line plots, but curves are filled and stacked,
151 | meaning the top curve reflects the sum of all the ones below:
152 | 
153 | .. pdvega-plot::
154 | 
155 |    stocks[['MSFT', 'AAPL', 'AMZN']].vgplot.area()
156 | 
157 | 
158 | Area charts can also be unstacked and overlaid, in which case transparency
159 | can be useful:
160 | 
161 | .. pdvega-plot::
162 | 
163 |    stocks[['MSFT', 'AAPL', 'AMZN']].vgplot.area(stacked=False, alpha=0.4)
164 | 
165 | Area plots can be further customized; see the function documentation for
166 | more information:
167 | 
168 | - Series area plot: :meth:`pdvega.SeriesPlotMethods.area`
169 | - DataFrame area plot: :meth:`pdvega.FramePlotMethods.area`
170 | 
171 | .. _vgplot-bar:
172 | 
173 | Bar Charts with ``vgplot.bar``
174 | ------------------------------
175 | 
176 | Bar charts are supported using ``vgplot.bar()``. Let's create a small dataset
177 | to use for this:
178 | 
179 | .. pdvega-setup::
180 |    :show:
181 | 
182 |    import numpy as np
183 |    import pandas as pd
184 |    np.random.seed(1234)
185 | 
186 |    df = pd.DataFrame(np.random.rand(10, 2), columns=['a', 'b'])
187 | 
188 | .. pdvega-plot::
189 | 
190 |    df.vgplot.bar()
191 | 
192 | Multiple bar plots will be layered on top of each other; like with area charts,
193 | they can be stacked using the ``stacked=True`` option:
194 | 
195 | .. pdvega-plot::
196 | 
197 |    df.vgplot.bar(stacked=True)
198 | 
199 | Additionally, horizontal bar plots can be created with ``barh``:
200 | 
201 | .. pdvega-plot::
202 | 
203 |    df.vgplot.barh(stacked=True)
204 | 
205 | Bar charts can be further customized; see the function documentation for
206 | more information:
207 | 
208 | - Series bar plots: :meth:`pdvega.SeriesPlotMethods.bar`, :meth:`pdvega.SeriesPlotMethods.barh`
209 | - DataFrame bar plots: :meth:`pdvega.FramePlotMethods.bar`, :meth:`pdvega.FramePlotMethods.barh`
210 | 
211 | 
212 | .. _vgplot-hist:
213 | 
214 | Histograms with ``vgplot.hist``
215 | -------------------------------
216 | Histograms can be created with the ``vgplot.hist()`` method.
217 | 
218 | Let's create some data to make some distributions:
219 | 
220 | .. pdvega-setup::
221 |    :show:
222 | 
223 |    import pandas as pd
224 |    import numpy as np
225 |    df = pd.DataFrame({'a': np.random.randn(1000) + 1,
226 |                       'b': np.random.randn(1000),
227 |                       'c': np.random.randn(1000) - 1},
228 |                      columns=['a', 'b', 'c'])
229 | 
230 | We'll specify 50 bins, and create a layered histogram with a 50% transparency:
231 | 
232 | .. pdvega-plot::
233 | 
234 |    df.vgplot.hist(bins=50, alpha=0.5)
235 | 
236 | Alternatively, we can stack the histogram, and use ``histtype`` to specify that
237 | we want a filled step chart rather than a bar chart:
238 | 
239 | .. pdvega-plot::
240 | 
241 |    df.vgplot.hist(histtype='stepfilled', stacked=True, bins=50)
242 | 
243 | Histograms can be further customized; see the function documentation for
244 | more information:
245 | 
246 | - Series histogram: :meth:`pdvega.SeriesPlotMethods.hist`
247 | - DataFrame histogram: :meth:`pdvega.FramePlotMethods.hist`
248 | 
249 | .. _vgplot-kde:
250 | 
251 | KDE/Density plots with ``vgplot.kde``
252 | -------------------------------------
253 | Similar to a histogram is a kernel density estimation plot (kde) which creates
254 | a smooth curve representing the density of points. This can be created with
255 | the ``vgplot.kde`` method. We'll use the same data we did in the histogram
256 | section:
257 | 
258 | .. pdvega-plot::
259 | 
260 |    df.vgplot.kde()
261 | 
262 | KDE plots can be further customized; see the function documentation for
263 | more information:
264 | 
265 | - Series kde plots: :meth:`pdvega.SeriesPlotMethods.kde`
266 | - DataFrame kde plots: :meth:`pdvega.FramePlotMethods.kde`
267 | 
268 | 
269 | .. _vgplot-pie-chart:
270 | 
271 | Pie Charts
272 | ----------
273 | No.
274 | 
275 | .. _vgplot-heatmap:
276 | 
277 | Heatmaps
278 | --------
279 | Pandas plotting has a function to create a hexagonally-binned heatmap of
280 | two-dimensional data. Unfortunately neither Vega nor Vega-Lite currently
281 | support hexagonal binning. But they do support cartesian heatmaps, and this
282 | functionality is included in ``pdvega``:
283 | 
284 | .. pdvega-plot::
285 | 
286 |    df.vgplot.heatmap(x='a', y='b', gridsize=20)
287 | 
288 | Here the ``gridsize`` parameter indicates approximately how many grid points
289 | span the plot. Alternatively, instead of computing the count within each bin,
290 | we can compute the mean of a third column, specified by the ``C`` parameter:
291 | 
292 | .. pdvega-plot::
293 | 
294 |    df.vgplot.heatmap(x='a', y='b', C='c', gridsize=20)
295 | 
296 | 
297 | Heatmap plots can be further customized; see :meth:`pdvega.FramePlotMethods.heatmap`
298 | for more information.
299 | 
300 | Other Plot Types
301 | ----------------
302 | The above plots are the basic plot types supported by ``pdvega``; more sophisticated
303 | plots are available in the :mod:`pdvega.plotting` module.
304 | For examples of these, refer to :ref:`statistical-plotting`.
305 | 
306 | 
307 | 
308 | .. _vega_datasets: http://github.com/altair-viz/vega_datasets
309 | 


--------------------------------------------------------------------------------
/doc/sphinxext/pdvega_ext/pdvegaplot.py:
--------------------------------------------------------------------------------
  1 | """
  2 | PdVega Plot Sphinx Extension
  3 | ============================
  4 | 
  5 | This extension provides a means of inserting live-rendered PdVega plots within
  6 | sphinx documentation. There are two directives defined: ``pdvega-setup`` and
  7 | ``altiar-plot``. ``pdvega-setup`` code is used to set-up various options
  8 | prior to running the plot code. For example::
  9 | 
 10 |     .. pdvega-setup::
 11 | 
 12 |         import pdvega
 13 |         import pandas as pd
 14 |         data = pd.Series([1, 2, 3, 2, 1, 2, 3])
 15 | 
 16 |     .. pdvega-plot::
 17 | 
 18 |         data.plot.line()
 19 | 
 20 | 
 21 | In the case of the ``pdvega-plot`` code, the *last statement* of the code-block
 22 | should evaluate to a pdvega Axes object.
 23 | 
 24 | Options
 25 | -------
 26 | The directives have the following options::
 27 | 
 28 |     .. pdvega-setup::
 29 |         :show: # if set, then show the setup code as a code block
 30 | 
 31 |         pass
 32 | 
 33 |     .. pdvega-plot::
 34 |         :hide-code:  # if set, then hide the code and only show the plot
 35 |         :code-below:  # if set, then code is below rather than above the figure
 36 |         :alt: text  # Alternate text when plot cannot be rendered
 37 |         :links: editor source export  # specify one or more of these options
 38 | 
 39 |         Chart()
 40 | 
 41 | Additionally, this extension introduces a global configuration
 42 | ``pdvegaplot_links``, set in your ``conf.py`` which is a dictionary
 43 | of links that will appear below plots, unless the ``:links:`` option
 44 | again overrides it. It should look something like this::
 45 | 
 46 |     # conf.py
 47 |     # ...
 48 |     pdvegaplot_links = {'editor': True, 'source': True, 'export': True}
 49 |     # ...
 50 | 
 51 | If this configuration is not specified, all are set to True.
 52 | """
 53 | 
 54 | import os
 55 | import json
 56 | import warnings
 57 | 
 58 | import jinja2
 59 | 
 60 | from docutils import nodes
 61 | from docutils.parsers.rst import Directive
 62 | from docutils.parsers.rst.directives import flag, unchanged
 63 | 
 64 | from sphinx.locale import _
 65 | 
 66 | from pdvega import Axes
 67 | from .utils import exec_then_eval
 68 | 
 69 | # These default URLs can be changed in conf.py; see setup() below.
 70 | VEGA_JS_URL_DEFAULT = "https://cdn.jsdelivr.net/npm/vega"
 71 | VEGALITE_JS_URL_DEFAULT = "https://cdn.jsdelivr.net/npm/vega-lite"
 72 | VEGAEMBED_JS_URL_DEFAULT = "https://cdn.jsdelivr.net/npm/vega-embed"
 73 | 
 74 | 
 75 | VGL_TEMPLATE = jinja2.Template("""
 76 | <div id="{{ div_id }}">
 77 | <script>
 78 |   // embed when document is loaded, to ensure vega library is available
 79 |   // this works on all modern browsers, except IE8 and older
 80 |   document.addEventListener("DOMContentLoaded", function(event) {
 81 |     vegaEmbed("#{{ div_id }}", "{{ url }}").then(function(result) {
 82 |       console.log(result);
 83 |     }).catch(console.error);
 84 |   });
 85 | </script>
 86 | </div>
 87 | """)
 88 | 
 89 | 
 90 | class pdvega_plot(nodes.General, nodes.Element):
 91 |     pass
 92 | 
 93 | 
 94 | class PdVegaSetupDirective(Directive):
 95 |     has_content = True
 96 | 
 97 |     option_spec = {'show': flag}
 98 | 
 99 |     def run(self):
100 |         env = self.state.document.settings.env
101 | 
102 |         targetid = "pdvega-plot-{0}".format(env.new_serialno('pdvega-plot'))
103 |         targetnode = nodes.target('', '', ids=[targetid])
104 | 
105 |         code = '\n'.join(self.content)
106 | 
107 |         # Here we cache the code for use in later setup
108 |         if not hasattr(env, 'pdvega_plot_setup'):
109 |             env.pdvega_plot_setup = []
110 |         env.pdvega_plot_setup.append({
111 |             'docname': env.docname,
112 |             'lineno': self.lineno,
113 |             'code': code,
114 |             'target': targetnode,
115 |         })
116 | 
117 |         result = [targetnode]
118 | 
119 |         if 'show' in self.options:
120 |             source_literal = nodes.literal_block(code, code)
121 |             source_literal['language'] = 'python'
122 |             result.append(source_literal)
123 | 
124 |         return result
125 | 
126 | 
127 | def purge_pdvega_plot_setup(app, env, docname):
128 |     if not hasattr(env, 'pdvega_plot_setup'):
129 |         return
130 |     env.pdvega_plot_setup = [item for item in env.pdvega_plot_setup
131 |                              if item['docname'] != docname]
132 | 
133 | 
134 | DEFAULT_PDVEGAPLOT_LINKS = {'editor': True, 'source': True, 'export': True}
135 | 
136 | 
137 | def validate_links(links):
138 |     if links.strip().lower() == 'none':
139 |         return {}
140 | 
141 |     links = links.strip().split()
142 |     diff = set(links) - set(DEFAULT_PDVEGAPLOT_LINKS.keys())
143 |     if diff:
144 |         raise ValueError("Following links are invalid: {0}".format(list(diff)))
145 |     return dict((link, link in links) for link in DEFAULT_PDVEGAPLOT_LINKS)
146 | 
147 | 
148 | class PdVegaPlotDirective(Directive):
149 | 
150 |     has_content = True
151 | 
152 |     option_spec = {'hide-code': flag,
153 |                    'code-below': flag,
154 |                    'alt': unchanged,
155 |                    'links': validate_links}
156 | 
157 |     def run(self):
158 |         env = self.state.document.settings.env
159 |         app = env.app
160 | 
161 |         show_code = 'hide-code' not in self.options
162 |         code_below = 'code-below' in self.options
163 | 
164 |         setupcode = '\n'.join(item['code']
165 |                               for item in getattr(env, 'pdvega_plot_setup', [])
166 |                               if item['docname'] == env.docname)
167 | 
168 |         code = '\n'.join(self.content)
169 | 
170 |         if show_code:
171 |             source_literal = nodes.literal_block(code, code)
172 |             source_literal['language'] = 'python'
173 | 
174 |         #get the name of the source file we are currently processing
175 |         rst_source = self.state_machine.document['source']
176 |         rst_dir = os.path.dirname(rst_source)
177 |         rst_filename = os.path.basename(rst_source)
178 | 
179 |         # use the source file name to construct a friendly target_id
180 |         serialno = env.new_serialno('pdvega-plot')
181 |         rst_base = rst_filename.replace('.', '-')
182 |         div_id = "{0}-pdvega-plot-{1}".format(rst_base, serialno)
183 |         target_id = "{0}-pdvega-source-{1}".format(rst_base, serialno)
184 |         target_node = nodes.target('', '', ids=[target_id])
185 | 
186 |         # create the node in which the plot will appear;
187 |         # this will be processed by html_visit_pdvega_plot
188 |         plot_node = pdvega_plot()
189 |         plot_node['target_id'] = target_id
190 |         plot_node['div_id'] = div_id
191 |         plot_node['code'] = code
192 |         plot_node['setupcode'] = setupcode
193 |         plot_node['relpath'] = os.path.relpath(rst_dir, env.srcdir)
194 |         plot_node['rst_source'] = rst_source
195 |         plot_node['rst_lineno'] = self.lineno
196 |         plot_node['links'] = self.options.get('links', app.builder.config.pdvegaplot_links)
197 |         plot_node['url_root'] = app.config.pdvegaplot_url_root
198 | 
199 |         if 'alt' in self.options:
200 |             plot_node['alt'] = self.options['alt']
201 | 
202 |         result = [target_node]
203 | 
204 |         if code_below:
205 |             result += [plot_node]
206 |         if show_code:
207 |             result += [source_literal]
208 |         if not code_below:
209 |             result += [plot_node]
210 | 
211 |         return result
212 | 
213 | 
214 | def html_visit_pdvega_plot(self, node):
215 |     # Execute the setup code, saving the global & local state
216 | 
217 |     namespace = {}
218 |     if node['setupcode']:
219 |         exec(node['setupcode'], namespace)
220 | 
221 |     # Execute the plot code in this context, evaluating the last line
222 |     try:
223 |         output = exec_then_eval(node['code'], namespace)
224 |     except Exception as e:
225 |         warnings.warn("pdvega-plot: {0}:{1} Code Execution failed:"
226 |                       "{2}: {3}".format(node['rst_source'], node['rst_lineno'],
227 |                                         e.__class__.__name__, str(e)))
228 |         raise nodes.SkipNode
229 | 
230 |     if isinstance(output, Axes):
231 |         # Last line should be a Vega-Lite chart; get the spec:
232 |         spec = output.spec
233 | 
234 |         # Create the vega-lite spec to embed
235 |         # embed_spec = json.dumps({'mode': 'vega-lite',
236 |         #                          'actions': node['links'],
237 |         #                          'spec': spec})
238 | 
239 |         # Previously we did this, but after github migrated to https only
240 |         # it started causing issues for some http clients such as localhost.
241 |         #embed_spec = embed_spec.replace('http://', '//')
242 |         #embed_spec = embed_spec.replace('https://', '//')
243 | 
244 |         # Write embed_spec to a *.vl.json file
245 |         dest_dir = os.path.join(self.builder.outdir, node['relpath'])
246 |         if not os.path.exists(dest_dir):
247 |             os.makedirs(dest_dir)
248 |         filename = "{0}.vl.json".format(node['div_id'])
249 |         # TODO: let this url be configured
250 |         url = "{0}{1}.vl.json".format(node['url_root'], node['div_id'])
251 |         dest_path = os.path.join(dest_dir, filename)
252 |         with open(dest_path, 'w') as f:
253 |             json.dump(spec, f)
254 | 
255 |         # Pass relevant info into the template and append to the output
256 |         html = VGL_TEMPLATE.render(div_id=node['div_id'], url=url)
257 |         self.body.append(html)
258 |     else:
259 |         warnings.warn('pdvega-plot: {0}:{1} Malformed block. Last line of '
260 |                       'code block should define a valid pdvega object.'
261 |                       ''.format(node['rst_source'], node['rst_lineno']))
262 |     raise nodes.SkipNode
263 | 
264 | 
265 | def generic_visit_pdvega_plot(self, node):
266 |     # TODO: generate PNGs and insert them here
267 |     if 'alt' in node.attributes:
268 |         self.body.append(_('[ graph: %s ]') % node['alt'])
269 |     else:
270 |         self.body.append(_('[ graph ]'))
271 |     raise nodes.SkipNode
272 | 
273 | 
274 | def builder_inited(app):
275 |     app.add_javascript(app.config.pdvegaplot_vega_js_url)
276 |     app.add_javascript(app.config.pdvegaplot_vegalite_js_url)
277 |     app.add_javascript(app.config.pdvegaplot_vegaembed_js_url)
278 | 
279 | 
280 | def setup(app):
281 |     setup.app = app
282 |     setup.config = app.config
283 |     setup.confdir = app.confdir
284 | 
285 |     app.add_config_value('pdvegaplot_links', DEFAULT_PDVEGAPLOT_LINKS, 'env')
286 | 
287 |     app.add_config_value('pdvegaplot_vega_js_url', VEGA_JS_URL_DEFAULT, 'html')
288 |     app.add_config_value('pdvegaplot_vegalite_js_url', VEGALITE_JS_URL_DEFAULT, 'html')
289 |     app.add_config_value('pdvegaplot_vegaembed_js_url', VEGAEMBED_JS_URL_DEFAULT, 'html')
290 | 
291 |     app.add_config_value('pdvegaplot_url_root', '/', 'html')
292 | 
293 |     app.add_directive('pdvega-plot', PdVegaPlotDirective)
294 |     app.add_directive('pdvega-setup', PdVegaSetupDirective)
295 | 
296 |     app.add_stylesheet('pdvega-plot.css')
297 | 
298 |     app.add_node(pdvega_plot,
299 |                  html=(html_visit_pdvega_plot, None),
300 |                  latex=(generic_visit_pdvega_plot, None),
301 |                  texinfo=(generic_visit_pdvega_plot, None),
302 |                  text=(generic_visit_pdvega_plot, None),
303 |                  man=(generic_visit_pdvega_plot, None))
304 | 
305 |     app.connect('env-purge-doc', purge_pdvega_plot_setup)
306 |     app.connect('builder-inited', builder_inited)
307 | 
308 |     return {'version': '0.1'}
309 | 


--------------------------------------------------------------------------------
/doc/conf.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | #
  4 | # pdvega documentation build configuration file, created by
  5 | # sphinx-quickstart on Wed Sep  7 12:52:48 2016.
  6 | #
  7 | # This file is execfile()d with the current directory set to its
  8 | # containing dir.
  9 | #
 10 | # Note that not all possible configuration values are present in this
 11 | # autogenerated file.
 12 | #
 13 | # All configuration values have a default; values that are commented out
 14 | # serve to show the default.
 15 | 
 16 | import sys
 17 | import os
 18 | 
 19 | # If extensions (or modules to document with autodoc) are in another directory,
 20 | # add these directories to sys.path here. If the directory is relative to the
 21 | # documentation root, use os.path.abspath to make it absolute, like shown here.
 22 | sys.path.insert(0, os.path.abspath('sphinxext'))
 23 | 
 24 | # -- General configuration ------------------------------------------------
 25 | 
 26 | # If your documentation needs a minimal Sphinx version, state it here.
 27 | #needs_sphinx = '1.0'
 28 | 
 29 | # Add any Sphinx extension module names here, as strings. They can be
 30 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 31 | # ones.
 32 | extensions = [
 33 |     'sphinx.ext.autodoc',
 34 |     'sphinx.ext.autosummary',
 35 |     'sphinx.ext.doctest',
 36 |     'sphinx.ext.coverage',
 37 |     'sphinx.ext.githubpages',
 38 |     'numpydoc.numpydoc',
 39 |     'matplotlib.sphinxext.plot_directive',
 40 |     'pdvega_ext.pdvegaplot',
 41 | ]
 42 | 
 43 | pdvega_plot_links = {'editor': True, 'source': False, 'export': False}
 44 | 
 45 | autodoc_default_flags = ['members']
 46 | autodoc_member_order = 'groupwise'
 47 | 
 48 | # Add any paths that contain templates here, relative to this directory.
 49 | templates_path = ['_templates']
 50 | 
 51 | # The suffix(es) of source filenames.
 52 | # You can specify multiple suffix as a list of string:
 53 | # source_suffix = ['.rst', '.md']
 54 | source_suffix = '.rst'
 55 | 
 56 | # The encoding of source files.
 57 | #source_encoding = 'utf-8-sig'
 58 | 
 59 | # The master toctree document.
 60 | master_doc = 'index'
 61 | 
 62 | # General information about the project.
 63 | project = 'pdvega'
 64 | copyright = '2018, Jake VanderPlas'
 65 | author = 'Jake VanderPlas'
 66 | 
 67 | # The version info for the project you're documenting, acts as replacement for
 68 | # |version| and |release|, also used in various other places throughout the
 69 | # built documents.
 70 | #
 71 | # The short X.Y version.
 72 | version = '0.2'
 73 | # The full version, including alpha/beta/rc tags.
 74 | release = '0.2.0.dev0'
 75 | 
 76 | # The language for content autogenerated by Sphinx. Refer to documentation
 77 | # for a list of supported languages.
 78 | #
 79 | # This is also used if you do content translation via gettext catalogs.
 80 | # Usually you set "language" from the command line for these cases.
 81 | language = None
 82 | 
 83 | # There are two options for replacing |today|: either, you set today to some
 84 | # non-false value, then it is used:
 85 | #today = ''
 86 | # Else, today_fmt is used as the format for a strftime call.
 87 | #today_fmt = '%B %d, %Y'
 88 | 
 89 | # List of patterns, relative to source directory, that match files and
 90 | # directories to ignore when looking for source files.
 91 | # This patterns also effect to html_static_path and html_extra_path
 92 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
 93 | 
 94 | # The reST default role (used for this markup: `text`) to use for all
 95 | # documents.
 96 | #default_role = None
 97 | 
 98 | # If true, '()' will be appended to :func: etc. cross-reference text.
 99 | #add_function_parentheses = True
100 | 
101 | # If true, the current module name will be prepended to all description
102 | # unit titles (such as .. function::).
103 | #add_module_names = True
104 | 
105 | # If true, sectionauthor and moduleauthor directives will be shown in the
106 | # output. They are ignored by default.
107 | #show_authors = False
108 | 
109 | # The name of the Pygments (syntax highlighting) style to use.
110 | pygments_style = 'sphinx'
111 | 
112 | # A list of ignored prefixes for module index sorting.
113 | #modindex_common_prefix = []
114 | 
115 | # If true, keep warnings as "system message" paragraphs in the built documents.
116 | #keep_warnings = False
117 | 
118 | # If true, `todo` and `todoList` produce output, else they produce nothing.
119 | todo_include_todos = False
120 | 
121 | 
122 | # -- Options for HTML output ----------------------------------------------
123 | 
124 | # The theme to use for HTML and HTML Help pages.  See the documentation for
125 | # a list of builtin themes.
126 | html_theme = 'sphinx_rtd_theme'
127 | 
128 | # Theme options are theme-specific and customize the look and feel of a theme
129 | # further.  For a list of options available for each theme, see the
130 | # documentation.
131 | #html_theme_options = {}
132 | 
133 | # Add any paths that contain custom themes here, relative to this directory.
134 | #html_theme_path = []
135 | 
136 | # The name for this set of Sphinx documents.
137 | # "<project> v<release> documentation" by default.
138 | #html_title = 'pdvega v0.1'
139 | 
140 | # A shorter title for the navigation bar.  Default is the same as html_title.
141 | html_short_title = 'pdvega'
142 | 
143 | # The name of an image file (relative to this directory) to place at the top
144 | # of the sidebar.
145 | #html_logo = None
146 | 
147 | # The name of an image file (relative to this directory) to use as a favicon of
148 | # the docs.  This file should be a Windows icon file (.ico) being 16x16 or 32x32
149 | # pixels large.
150 | html_favicon = '_static/favicon.ico'
151 | 
152 | # Add any paths that contain custom static files (such as style sheets) here,
153 | # relative to this directory. They are copied after the builtin static files,
154 | # so a file named "default.css" will overwrite the builtin "default.css".
155 | html_static_path = ['_static', '_images']
156 | 
157 | # adapted from: http://rackerlabs.github.io/docs-rackspace/tools/rtd-tables.html
158 | # and
159 | # https://github.com/rtfd/sphinx_rtd_theme/issues/117
160 | def setup(app):
161 |     app.add_stylesheet('theme_overrides.css')
162 | 
163 | # Add any extra paths that contain custom files (such as robots.txt or
164 | # .htaccess) here, relative to this directory. These files are copied
165 | # directly to the root of the documentation.
166 | #html_extra_path = []
167 | 
168 | # If not None, a 'Last updated on:' timestamp is inserted at every page
169 | # bottom, using the given strftime format.
170 | # The empty string is equivalent to '%b %d, %Y'.
171 | #html_last_updated_fmt = None
172 | 
173 | # If true, SmartyPants will be used to convert quotes and dashes to
174 | # typographically correct entities.
175 | #html_use_smartypants = True
176 | 
177 | # Custom sidebar templates, maps document names to template names.
178 | #html_sidebars = {}
179 | 
180 | # Additional templates that should be rendered to pages, maps page names to
181 | # template names.
182 | #html_additional_pages = {}
183 | 
184 | # If false, no module index is generated.
185 | #html_domain_indices = True
186 | 
187 | # If false, no index is generated.
188 | #html_use_index = True
189 | 
190 | # If true, the index is split into individual pages for each letter.
191 | #html_split_index = False
192 | 
193 | # If true, links to the reST sources are added to the pages.
194 | #html_show_sourcelink = True
195 | 
196 | # If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
197 | #html_show_sphinx = True
198 | 
199 | # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
200 | #html_show_copyright = True
201 | 
202 | # If true, an OpenSearch description file will be output, and all pages will
203 | # contain a <link> tag referring to it.  The value of this option must be the
204 | # base URL from which the finished HTML is served.
205 | #html_use_opensearch = ''
206 | 
207 | # This is the file name suffix for HTML files (e.g. ".xhtml").
208 | #html_file_suffix = None
209 | 
210 | # Language to be used for generating the HTML full-text search index.
211 | # Sphinx supports the following languages:
212 | #   'da', 'de', 'en', 'es', 'fi', 'fr', 'h', 'it', 'ja'
213 | #   'nl', 'no', 'pt', 'ro', 'r', 'sv', 'tr', 'zh'
214 | #html_search_language = 'en'
215 | 
216 | # A dictionary with options for the search language support, empty by default.
217 | # 'ja' uses this config value.
218 | # 'zh' user can custom change `jieba` dictionary path.
219 | #html_search_options = {'type': 'default'}
220 | 
221 | # The name of a javascript file (relative to the configuration directory) that
222 | # implements a search results scorer. If empty, the default will be used.
223 | #html_search_scorer = 'scorer.js'
224 | 
225 | # Output file base name for HTML help builder.
226 | htmlhelp_basename = 'pdvegadoc'
227 | 
228 | # -- Options for LaTeX output ---------------------------------------------
229 | 
230 | latex_elements = {
231 | # The paper size ('letterpaper' or 'a4paper').
232 | #'papersize': 'letterpaper',
233 | 
234 | # The font size ('10pt', '11pt' or '12pt').
235 | #'pointsize': '10pt',
236 | 
237 | # Additional stuff for the LaTeX preamble.
238 | #'preamble': '',
239 | 
240 | # Latex figure (float) alignment
241 | #'figure_align': 'htbp',
242 | }
243 | 
244 | # Grouping the document tree into LaTeX files. List of tuples
245 | # (source start file, target name, title,
246 | #  author, documentclass [howto, manual, or own class]).
247 | latex_documents = [
248 |     (master_doc, 'pdvega.tex', 'pdvega Documentation',
249 |      'Brian Granger and Jake VanderPlas', 'manual'),
250 | ]
251 | 
252 | # The name of an image file (relative to this directory) to place at the top of
253 | # the title page.
254 | #latex_logo = None
255 | 
256 | # For "manual" documents, if this is true, then toplevel headings are parts,
257 | # not chapters.
258 | #latex_use_parts = False
259 | 
260 | # If true, show page references after internal links.
261 | #latex_show_pagerefs = False
262 | 
263 | # If true, show URL addresses after external links.
264 | #latex_show_urls = False
265 | 
266 | # Documents to append as an appendix to all manuals.
267 | #latex_appendices = []
268 | 
269 | # If false, no module index is generated.
270 | #latex_domain_indices = True
271 | 
272 | 
273 | # -- Options for manual page output ---------------------------------------
274 | 
275 | # One entry per manual page. List of tuples
276 | # (source start file, name, description, authors, manual section).
277 | man_pages = [
278 |     (master_doc, 'pdvega', 'pdvega Documentation',
279 |      [author], 1)
280 | ]
281 | 
282 | # If true, show URL addresses after external links.
283 | #man_show_urls = False
284 | 
285 | 
286 | # -- Options for Texinfo output -------------------------------------------
287 | 
288 | # Grouping the document tree into Texinfo files. List of tuples
289 | # (source start file, target name, title, author,
290 | #  dir menu entry, description, category)
291 | texinfo_documents = [
292 |     (master_doc, 'pdvega', 'pdvega Documentation',
293 |      author, 'pdvega', 'One line description of project.',
294 |      'Miscellaneous'),
295 | ]
296 | 
297 | # Documents to append as an appendix to all manuals.
298 | #texinfo_appendices = []
299 | 
300 | # If false, no module index is generated.
301 | #texinfo_domain_indices = True
302 | 
303 | # How to display URL addresses: 'footnote', 'no', or 'inline'.
304 | #texinfo_show_urls = 'footnote'
305 | 
306 | # If true, do not generate a @detailmenu in the "Top" node's menu.
307 | #texinfo_no_detailmenu = False
308 | 
309 | # Hide extra class members
310 | numpydoc_show_class_members = False
311 | 
312 | # For the matplotlib plot extension
313 | plot_html_show_source_link = False
314 | plot_html_show_formats = False
315 | 
316 | # For the pdvegaplot extension
317 | # pdvegaplot_links = {'editor': True, 'source': True, 'export': True}
318 | pdvegaplot_vega_js_url = "https://cdn.jsdelivr.net/npm/vega@3.0.10"
319 | pdvegaplot_vegalite_js_url = "https://cdn.jsdelivr.net/npm/vega-lite@2.1.2"
320 | pdvegaplot_vegaembed_js_url = "https://cdn.jsdelivr.net/npm/vega-embed@3.0.0"
321 | pdvegaplot_url_root = "//0.0.0.0:8000/"
322 | 


--------------------------------------------------------------------------------
/pdvega/tests/test_core.py:
--------------------------------------------------------------------------------
  1 | import pytest
  2 | 
  3 | import pandas as pd
  4 | 
  5 | import altair as alt
  6 | 
  7 | from pdvega.tests import utils
  8 | 
  9 | 
 10 | def test_line_simple():
 11 |     df = pd.DataFrame({"x": [1, 4, 2, 3, 5], "y": [6, 3, 4, 5, 2]})
 12 | 
 13 |     plot = df.vgplot.line()
 14 |     utils.validate_vegalite(plot)
 15 | 
 16 |     assert plot.mark == "line"
 17 | 
 18 |     utils.check_encodings(plot, x="index", y="value",
 19 |                           color="variable")
 20 |     data = plot.data
 21 |     assert set(pd.unique(data["variable"])) == {"x", "y"}
 22 | 
 23 | 
 24 | def test_line_xy():
 25 |     df = pd.DataFrame({"x": [1, 4, 2, 3, 5], "y": [6, 3, 4, 5, 2], "z": range(5)})
 26 | 
 27 |     plot = df.vgplot.line(x="x", y="y")
 28 |     utils.validate_vegalite(plot)
 29 |     assert plot.mark == "line"
 30 | 
 31 |     utils.check_encodings(plot, x="x", y="value",
 32 |                           color="variable", order="index")
 33 |     data = plot.data
 34 |     assert set(pd.unique(data["variable"])) == {"y"}
 35 | 
 36 | 
 37 | def test_series_line():
 38 |     ser = pd.Series([3, 2, 3, 2, 3])
 39 |     plot = ser.vgplot.line()
 40 |     utils.validate_vegalite(plot)
 41 |     assert plot.mark == "line"
 42 |     utils.check_encodings(plot, x="index", y="0")
 43 | 
 44 | 
 45 | def test_scatter_simple():
 46 |     df = pd.DataFrame({"x": [1, 4, 2, 3, 5], "y": [6, 3, 4, 5, 2]})
 47 | 
 48 |     plot = df.vgplot.scatter(x="x", y="y")
 49 |     utils.validate_vegalite(plot)
 50 |     assert plot.mark == "point"
 51 |     utils.check_encodings(plot, x="x", y="y")
 52 | 
 53 | 
 54 | def test_scatter_color_size():
 55 |     df = pd.DataFrame(
 56 |         {"x": [1, 4, 2, 3, 5], "y": [6, 3, 4, 5, 2], "c": range(5), "s": range(5)}
 57 |     )
 58 | 
 59 |     plot = df.vgplot.scatter(x="x", y="y", c="c", s="s")
 60 |     utils.validate_vegalite(plot)
 61 |     assert plot.mark == "point"
 62 |     utils.check_encodings(plot, x="x", y="y", color="c", size="s")
 63 | 
 64 | 
 65 | def test_scatter_common_columns():
 66 |     df = pd.DataFrame({"x": [1, 4, 2, 3, 5], "y": [6, 3, 4, 5, 2]})
 67 | 
 68 |     plot = df.vgplot.scatter(x="x", y="y", c="y")
 69 |     utils.validate_vegalite(plot)
 70 |     assert plot.mark == "point"
 71 |     utils.check_encodings(plot, x="x", y="y", color="y")
 72 | 
 73 | 
 74 | def test_bar_simple():
 75 |     df = pd.DataFrame({"x": [1, 4, 2, 3, 5], "y": [6, 3, 4, 5, 2]})
 76 | 
 77 |     plot = df.vgplot.bar()
 78 |     utils.validate_vegalite(plot)
 79 |     assert plot.mark == "bar"
 80 |     utils.check_encodings(
 81 |         plot, x="index", y="value", color="variable",
 82 |         opacity=utils.IGNORE
 83 |     )
 84 |     data = plot.data
 85 |     assert set(pd.unique(data["variable"])) == {"x", "y"}
 86 |     assert plot["encoding"]["y"]["stack"] is None
 87 | 
 88 | 
 89 | def test_bar_stacked():
 90 |     df = pd.DataFrame({"x": [1, 4, 2, 3, 5], "y": [6, 3, 4, 5, 2]})
 91 | 
 92 |     plot = df.vgplot.bar(stacked=True)
 93 |     utils.validate_vegalite(plot)
 94 |     assert plot.mark == "bar"
 95 |     utils.check_encodings(plot, x="index", y="value", color="variable")
 96 |     data = plot.data
 97 |     assert set(pd.unique(data["variable"])) == {"x", "y"}
 98 |     assert plot["encoding"]["y"]["stack"] == "zero"
 99 | 
100 | 
101 | def test_bar_xy():
102 |     df = pd.DataFrame({"x": [1, 4, 2, 3, 5], "y": [6, 3, 4, 5, 2]})
103 | 
104 |     plot = df.vgplot.bar(x="x", y="y")
105 |     utils.validate_vegalite(plot)
106 |     assert plot.mark == "bar"
107 |     utils.check_encodings(plot, x="x", y="value", color="variable")
108 |     data = plot.data
109 |     assert set(pd.unique(data["variable"])) == {"y"}
110 |     assert plot["encoding"]["y"]["stack"] is None
111 | 
112 | 
113 | def test_bar_xy_stacked():
114 |     df = pd.DataFrame({"x": [1, 4, 2, 3, 5], "y": [6, 3, 4, 5, 2]})
115 | 
116 |     plot = df.vgplot.bar(x="x", y="y", stacked=True)
117 |     utils.validate_vegalite(plot)
118 |     assert plot.mark == "bar"
119 |     utils.check_encodings(plot, x="x", y="value", color="variable")
120 |     data = plot.data
121 |     assert set(pd.unique(data["variable"])) == {"y"}
122 |     assert plot["encoding"]["y"]["stack"] == "zero"
123 | 
124 | 
125 | def test_series_bar():
126 |     ser = pd.Series([4, 5, 4, 5], index=["A", "B", "C", "D"])
127 |     plot = ser.vgplot.bar()
128 |     utils.validate_vegalite(plot)
129 |     assert plot.mark == "bar"
130 |     utils.check_encodings(plot, x="index", y="0")
131 | 
132 | 
133 | def test_barh_simple():
134 |     df = pd.DataFrame({"x": [1, 4, 2, 3, 5], "y": [6, 3, 4, 5, 2]})
135 | 
136 |     plot = df.vgplot.barh()
137 |     utils.validate_vegalite(plot)
138 |     assert plot.mark == "bar"
139 |     utils.check_encodings(
140 |         plot, y="index", x="value", color="variable",
141 |         opacity=utils.IGNORE
142 |     )
143 |     data = plot.data
144 |     assert set(pd.unique(data["variable"])) == {"x", "y"}
145 |     assert plot["encoding"]["x"]["stack"] is None
146 | 
147 | 
148 | def test_barh_stacked():
149 |     df = pd.DataFrame({"x": [1, 4, 2, 3, 5], "y": [6, 3, 4, 5, 2]})
150 | 
151 |     plot = df.vgplot.barh(stacked=True)
152 |     utils.validate_vegalite(plot)
153 |     assert plot.mark == "bar"
154 |     utils.check_encodings(plot, y="index", x="value", color="variable")
155 |     data = plot.data
156 |     assert set(pd.unique(data["variable"])) == {"x", "y"}
157 |     assert plot["encoding"]["x"]["stack"] == "zero"
158 | 
159 | 
160 | def test_barh_xy():
161 |     df = pd.DataFrame({"x": [1, 4, 2, 3, 5], "y": [6, 3, 4, 5, 2]})
162 | 
163 |     plot = df.vgplot.barh(x="x", y="y")
164 |     utils.validate_vegalite(plot)
165 |     assert plot.mark == "bar"
166 |     utils.check_encodings(plot, x="value", y="x", color="variable")
167 |     data = plot.data
168 |     assert set(pd.unique(data["variable"])) == {"y"}
169 |     assert plot["encoding"]["x"]["stack"] is None
170 | 
171 | 
172 | def test_barh_xy_stacked():
173 |     df = pd.DataFrame({"x": [1, 4, 2, 3, 5], "y": [6, 3, 4, 5, 2]})
174 | 
175 |     plot = df.vgplot.barh(x="x", y="y", stacked=True)
176 |     utils.validate_vegalite(plot)
177 |     assert plot.mark == "bar"
178 |     utils.check_encodings(plot, x="value", y="x", color="variable")
179 |     data = plot.data
180 |     assert set(pd.unique(data["variable"])) == {"y"}
181 |     assert plot["encoding"]["x"]["stack"] == "zero"
182 | 
183 | 
184 | def test_series_barh():
185 |     ser = pd.Series([4, 5, 4, 5], index=["A", "B", "C", "D"])
186 |     plot = ser.vgplot.barh()
187 |     utils.validate_vegalite(plot)
188 |     assert plot.mark == "bar"
189 |     utils.check_encodings(plot, y="index", x="0")
190 | 
191 | 
192 | def test_df_area_simple():
193 |     df = pd.DataFrame({"x": [1, 4, 2, 3, 5], "y": [6, 3, 4, 5, 2]})
194 | 
195 |     plot = df.vgplot.area()
196 |     utils.validate_vegalite(plot)
197 |     assert plot.mark == "area"
198 |     utils.check_encodings(plot, x="index", y="value",
199 |                           color="variable")
200 |     data = plot.data
201 |     assert set(pd.unique(data["variable"])) == {"x", "y"}
202 |     assert plot["encoding"]["y"]["stack"] == "zero"
203 | 
204 | 
205 | def test_df_area_unstacked():
206 |     df = pd.DataFrame({"x": [1, 4, 2, 3, 5], "y": [6, 3, 4, 5, 2]})
207 | 
208 |     plot = df.vgplot.area(stacked=False)
209 |     utils.validate_vegalite(plot)
210 |     assert plot.mark == "area"
211 |     utils.check_encodings(
212 |         plot, x="index", y="value", color="variable", opacity=utils.IGNORE
213 |     )
214 |     data = plot.data
215 |     assert set(pd.unique(data["variable"])) == {"x", "y"}
216 |     assert plot["encoding"]["y"]["stack"] is None
217 |     assert plot["encoding"]["opacity"]["value"] == 0.7
218 | 
219 | 
220 | def test_df_area_xy():
221 |     df = pd.DataFrame({"x": [1, 4, 2, 3, 5], "y": [6, 3, 4, 5, 2], "z": range(5)})
222 | 
223 |     plot = df.vgplot.area(x="x", y="y")
224 |     utils.validate_vegalite(plot)
225 |     assert plot.mark == "area"
226 |     utils.check_encodings(plot, x="x", y="value", color="variable")
227 |     data = plot.data
228 |     assert set(pd.unique(data["variable"])) == {"y"}
229 |     assert plot["encoding"]["y"]["stack"] == "zero"
230 | 
231 | 
232 | def test_df_area_xy_unstacked():
233 |     df = pd.DataFrame({"x": [1, 4, 2, 3, 5], "y": [6, 3, 4, 5, 2], "z": range(5)})
234 | 
235 |     plot = df.vgplot.area(x="x", y="y", stacked=False)
236 |     utils.validate_vegalite(plot)
237 |     assert plot.mark == "area"
238 |     utils.check_encodings(plot, x="x", y="value", color="variable")
239 |     data = plot.data
240 |     assert set(pd.unique(data["variable"])) == {"y"}
241 |     assert plot["encoding"]["y"]["stack"] is None
242 | 
243 | 
244 | def test_series_area():
245 |     ser = pd.Series([3, 2, 3, 2, 3])
246 |     plot = ser.vgplot.area()
247 |     utils.validate_vegalite(plot)
248 |     assert plot.mark == "area"
249 |     utils.check_encodings(plot, x="index", y="0")
250 | 
251 | 
252 | @pytest.mark.parametrize("stacked", [True, False])
253 | @pytest.mark.parametrize("histtype", ["bar", "step", "stepfilled"])
254 | @pytest.mark.parametrize("maxbins", [3, 5, 10])
255 | def test_df_hist(stacked, histtype, maxbins):
256 |     df = pd.DataFrame({"x": range(10), "y": range(10)})
257 | 
258 |     marks = {
259 |         "bar": "bar",
260 |         "step": {"type": "line", "interpolate": "step"},
261 |         "stepfilled": {"type": "area", "interpolate": "step"},
262 |     }
263 | 
264 |     # bar histogram
265 |     plot = df.vgplot.hist(bins=maxbins, stacked=stacked, histtype=histtype)
266 |     assert plot.mark == marks[histtype]
267 |     if stacked:
268 |         # No default opacity for a stacked histogram
269 |         utils.check_encodings(plot, x="value", y=utils.IGNORE,
270 |                               color="variable")
271 |     else:
272 |         utils.check_encodings(
273 |             plot, x="value", y=utils.IGNORE, color="variable",
274 |             opacity=utils.IGNORE
275 |         )
276 |     assert plot["encoding"]["x"]["bin"] == {"maxbins": maxbins}
277 |     assert plot["encoding"]["y"]["aggregate"] == "count"
278 |     assert plot["encoding"]["y"]["stack"] == ("zero" if stacked else None)
279 | 
280 | 
281 | @pytest.mark.parametrize("histtype", ["bar", "step", "stepfilled"])
282 | @pytest.mark.parametrize("maxbins", [3, 5, 10])
283 | def test_series_hist(histtype, maxbins):
284 |     ser = pd.Series(range(10))
285 | 
286 |     marks = {
287 |         "bar": "bar",
288 |         "step": {"type": "line", "interpolate": "step"},
289 |         "stepfilled": {"type": "area", "interpolate": "step"},
290 |     }
291 |     plot = ser.vgplot.hist(bins=maxbins, histtype=histtype)
292 |     assert plot.mark == marks[histtype]
293 | 
294 |     utils.check_encodings(plot, x="0", y=utils.IGNORE)
295 |     assert plot["encoding"]["x"]["bin"] == {"maxbins": maxbins}
296 |     assert plot["encoding"]["y"]["aggregate"] == "count"
297 | 
298 | 
299 | def test_df_hexbin():
300 |     df = pd.DataFrame({"x": range(10), "y": range(10), "C": range(10)})
301 |     gridsize = 10
302 |     plot = df.vgplot.hexbin(x="x", y="y", gridsize=gridsize)
303 |     assert plot.mark == "rect"
304 |     utils.check_encodings(plot, x="x", y="y", color=utils.IGNORE)
305 |     assert plot["encoding"]["x"]["bin"] == alt.Bin(maxbins=gridsize)
306 |     assert plot["encoding"]["y"]["bin"] == alt.Bin(maxbins=gridsize)
307 |     assert plot["encoding"]["color"]["aggregate"] == "count"
308 | 
309 | 
310 | def test_df_hexbin_C():
311 |     df = pd.DataFrame({"x": range(10), "y": range(10), "C": range(10)})
312 |     gridsize = 10
313 |     plot = df.vgplot.hexbin(x="x", y="y", C="C", gridsize=gridsize)
314 |     assert plot.mark == "rect"
315 |     utils.check_encodings(plot, x="x", y="y", color="C")
316 |     assert plot["encoding"]["x"]["bin"] == alt.Bin(maxbins=gridsize)
317 |     assert plot["encoding"]["y"]["bin"] == alt.Bin(maxbins=gridsize)
318 |     assert plot["encoding"]["color"]["aggregate"] == "mean"
319 | 
320 | 
321 | def test_df_hexbin_Cfunc():
322 |     df = pd.DataFrame({"x": range(10), "y": range(10), "C": range(10)})
323 |     plot = df.vgplot.hexbin(x="x", y="y", C="C", reduce_C_function=min)
324 |     assert plot["encoding"]["color"]["aggregate"] == "min"
325 |     utils.check_encodings(plot, x="x", y="y", color="C")
326 | 
327 | 
328 | def test_df_kde():
329 |     df = pd.DataFrame({"x": range(10), "y": range(10)})
330 |     plot = df.vgplot.kde(bw_method="scott")
331 |     assert plot.mark == "line"
332 |     utils.check_encodings(plot, x=" ", y="Density", color=utils.IGNORE)
333 |     data = plot.data
334 |     assert set(pd.unique(data["variable"])) == {"x", "y"}
335 | 
336 | 
337 | def test_df_kde_y():
338 |     df = pd.DataFrame({"x": range(10), "y": range(10)})
339 |     plot = df.vgplot.kde(y="y", bw_method="scott")
340 |     assert plot.mark == "line"
341 |     utils.check_encodings(plot, x=" ", y="Density", color=utils.IGNORE)
342 |     data = plot.data
343 |     assert set(pd.unique(data["variable"])) == {"y"}
344 | 
345 | 
346 | def test_ser_kde():
347 |     ser = pd.Series(range(10), name="x")
348 |     plot = ser.vgplot.kde(bw_method="scott")
349 |     assert plot.mark == "line"
350 |     utils.check_encodings(
351 |         plot,
352 |         x=' ',
353 |         y='x',
354 |     )
355 | 


--------------------------------------------------------------------------------
/pdvega/_core.py:
--------------------------------------------------------------------------------
   1 | import numpy as np
   2 | import pandas as pd
   3 | import altair as alt
   4 | 
   5 | from ._utils import (
   6 |     infer_vegalite_type,
   7 |     unpivot_frame,
   8 |     warn_if_keywords_unused,
   9 |     validate_aggregation,
  10 | )
  11 | from ._pandas_internals import (
  12 |     PandasObject,
  13 |     register_dataframe_accessor,
  14 |     register_series_accessor,
  15 | )
  16 | 
  17 | 
  18 | def _x(x, df, ordinal_threshold=6, **kwargs):
  19 |     return alt.X(
  20 |         field=x,
  21 |         type=infer_vegalite_type(df[x], ordinal_threshold=ordinal_threshold),
  22 |         **kwargs
  23 |     )
  24 | 
  25 | 
  26 | def _y(y, df, ordinal_threshold=6, **kwargs):
  27 |     return alt.Y(
  28 |         field=y,
  29 |         type=infer_vegalite_type(df[y], ordinal_threshold=ordinal_threshold),
  30 |         **kwargs
  31 |     )
  32 | 
  33 | 
  34 | class BasePlotMethods(PandasObject):
  35 | 
  36 |     def __init__(self, data):
  37 |         self._data = data
  38 | 
  39 |     def __call__(self, kind, *args, **kwargs):
  40 |         raise NotImplementedError()
  41 | 
  42 |     def _plot(self, data=None, width=450, height=300, title=None, figsize=None, dpi=75):
  43 |         if data is None:
  44 |             data = self._data
  45 | 
  46 |         if title is None:
  47 |             title = ""
  48 | 
  49 |         if figsize is not None:
  50 |             width_inches, height_inches = figsize
  51 |             width = 0.8 * dpi * width_inches
  52 |             height = 0.8 * dpi * height_inches
  53 | 
  54 |         chart = alt.Chart(data=data).properties(width=width, height=height, title=title)
  55 |         return chart
  56 | 
  57 | 
  58 | @register_series_accessor("vgplot")
  59 | class SeriesPlotMethods(BasePlotMethods):
  60 |     """Series Accessor & Method for creating Vega-Lite visualizations.
  61 | 
  62 |     Examples
  63 |     --------
  64 |     >>> s.vgplot.line()  # doctest: +SKIP
  65 |     >>> s.vgplot.area()  # doctest: +SKIP
  66 |     >>> s.vgplot.bar()  # doctest: +SKIP
  67 |     >>> s.vgplot.barh()  # doctest: +SKIP
  68 |     >>> s.vgplot.hist()  # doctest: +SKIP
  69 |     >>> s.vgplot.kde()  # doctest: +SKIP
  70 |     >>> s.vgplot.density()  # doctest: +SKIP
  71 | 
  72 |     Plotting methods can also be accessed by calling the accessor as a method
  73 |     with the ``kind`` argument: ``s.vgplot(kind='line', **kwds)``
  74 |     is equivalent to ``s.vgplot.line(**kwds)``
  75 |     """
  76 | 
  77 |     def __call__(self, kind="line", **kwargs):
  78 |         try:
  79 |             plot_method = getattr(self, kind)
  80 |         except AttributeError:
  81 |             raise ValueError(
  82 |                 "kind='{0}' not valid for {1}" "".format(kind, self.__class__.__name__)
  83 |             )
  84 |         return plot_method(**kwargs)
  85 | 
  86 |     def line(self, alpha=None, width=450, height=300, ax=None, **kwds):
  87 |         """Line plot for Series data
  88 | 
  89 |         >>> series.vgplot.line()  # doctest: +SKIP
  90 | 
  91 |         Parameters
  92 |         ----------
  93 |         alpha : float, optional
  94 |             transparency level, 0 <= alpha <= 1
  95 |         width : int, optional
  96 |             the width of the plot in pixels
  97 |         height : int, optional
  98 |             the height of the plot in pixels
  99 |         ax: altair.Chart, optional
 100 |             chart to be overlayed with this vis (convinience method for `chart1 + chart2`)
 101 | 
 102 |         Returns
 103 |         -------
 104 |         chart : altair.Chart
 105 |             The altair plot representation
 106 |         """
 107 |         df = self._data.reset_index()
 108 |         df.columns = map(str, df.columns)
 109 |         x, y = df.columns
 110 | 
 111 |         chart = self._plot(
 112 |             data=df,
 113 |             width=width,
 114 |             height=height,
 115 |             title=kwds.pop("title", ""),
 116 |             figsize=kwds.pop("figsize", None),
 117 |             dpi=kwds.pop("dpi", None),
 118 |         )
 119 | 
 120 |         chart = chart.mark_line().encode(x=_x(x, df), y=_y(y, df))
 121 | 
 122 |         if alpha is not None:
 123 |             assert 0 <= alpha <= 1
 124 |             chart = chart.encode(opacity=alt.value(alpha))
 125 | 
 126 |         if ax is not None:
 127 |             return ax + chart
 128 | 
 129 |         warn_if_keywords_unused("line", kwds)
 130 |         return chart
 131 | 
 132 |     def area(self, alpha=None, width=450, height=300, ax=None, **kwds):
 133 |         """Area plot for Series data
 134 | 
 135 |         >>> series.vgplot.area()  # doctest: +SKIP
 136 | 
 137 |         Parameters
 138 |         ----------
 139 |         alpha : float, optional
 140 |             transparency level, 0 <= alpha <= 1
 141 |         width : int, optional
 142 |             the width of the plot in pixels
 143 |         height : int, optional
 144 |             the height of the plot in pixels
 145 |         ax: altair.Chart, optional
 146 |             chart to be overlayed with this vis (convinience method for `chart1 + chart2`)
 147 | 
 148 |         Returns
 149 |         -------
 150 |         chart : alt.Chart
 151 |             altair chart representation
 152 |         """
 153 |         df = self._data.reset_index()
 154 |         df.columns = map(str, df.columns)
 155 |         x, y = df.columns
 156 | 
 157 |         chart = self._plot(
 158 |             data=df,
 159 |             width=width,
 160 |             height=height,
 161 |             title=kwds.pop("title", ""),
 162 |             figsize=kwds.pop("figsize", None),
 163 |             dpi=kwds.pop("dpi", None),
 164 |         ).mark_area().encode(
 165 |             x=_x(x, df), y=_y(y, df)
 166 |         )
 167 | 
 168 |         if alpha is not None:
 169 |             assert 0 <= alpha <= 1
 170 |             chart = chart.encode(opacity=alt.value(alpha))
 171 | 
 172 |         if ax is not None:
 173 |             return ax + chart
 174 | 
 175 |         warn_if_keywords_unused("area", kwds)
 176 |         return chart
 177 | 
 178 |     def bar(self, alpha=None, width=450, height=300, ax=None, **kwds):
 179 |         """Bar plot for Series data
 180 | 
 181 |         >>> series.vgplot.bar()  # doctest: +SKIP
 182 | 
 183 |         Parameters
 184 |         ----------
 185 |         alpha : float, optional
 186 |             transparency level, 0 <= alpha <= 1
 187 |         width : int, optional
 188 |             the width of the plot in pixels
 189 |         height : int, optional
 190 |             the height of the plot in pixels
 191 |         ax: altair.Chart, optional
 192 |             chart to be overlayed with this vis (convinience method for `chart1 + chart2`)
 193 | 
 194 |         Returns
 195 |         -------
 196 |         chart : alt.Chart
 197 |             altair chart representation
 198 |         """
 199 | 
 200 |         df = self._data.reset_index()
 201 |         df.columns = map(str, df.columns)
 202 |         x, y = df.columns
 203 | 
 204 |         chart = self._plot(
 205 |             data=df,
 206 |             width=width,
 207 |             height=height,
 208 |             title=kwds.pop("title", ""),
 209 |             figsize=kwds.pop("figsize", None),
 210 |             dpi=kwds.pop("dpi", None),
 211 |         ).mark_bar().encode(
 212 |             x=_x(x, df), y=_y(y, df)
 213 |         )
 214 | 
 215 |         if alpha is not None:
 216 |             assert 0 <= alpha <= 1
 217 |             chart = chart.encode(opacity=alt.value(alpha))
 218 | 
 219 |         if ax is not None:
 220 |             return ax + chart
 221 | 
 222 |         warn_if_keywords_unused("bar", kwds)
 223 |         return chart
 224 | 
 225 |     def barh(self, alpha=None, width=450, height=300, ax=None, **kwds):
 226 |         """Horizontal bar plot for Series data
 227 | 
 228 |         >>> series.vgplot.barh()  # doctest: +SKIP
 229 | 
 230 |         Parameters
 231 |         ----------
 232 |         alpha : float, optional
 233 |             transparency level, 0 <= alpha <= 1
 234 |         width : int, optional
 235 |             the width of the plot in pixels
 236 |         height : int, optional
 237 |             the height of the plot in pixels
 238 |         ax: altair.Chart, optional
 239 |             chart to be overlayed with this vis (convinience method for `chart1 + chart2`)
 240 | 
 241 |         Returns
 242 |         -------
 243 |         chart : alt.Chart
 244 |             altair chart representation
 245 |         """
 246 |         chart = self.bar(alpha=alpha, width=width, height=height, **kwds)
 247 | 
 248 |         enc = chart.encoding
 249 |         enc["x"], enc["y"] = enc["y"], enc["x"]
 250 | 
 251 |         if ax is not None:
 252 |             return ax + chart
 253 |         return chart
 254 | 
 255 |     def hist(
 256 |         self,
 257 |         bins=10,
 258 |         alpha=None,
 259 |         histtype="bar",
 260 |         width=450,
 261 |         height=300,
 262 |         ax=None,
 263 |         **kwds
 264 |     ):
 265 |         """Histogram plot for Series data
 266 | 
 267 |         >>> series.vgplot.hist()  # doctest: +SKIP
 268 | 
 269 |         Parameters
 270 |         ----------
 271 |         bins : integer, optional
 272 |             the maximum number of bins to use for the histogram (default: 10)
 273 |         alpha : float, optional
 274 |             transparency level, 0 <= alpha <= 1
 275 |         histtype : string, {'bar', 'step', 'stepfilled'}
 276 |             The type of histogram to generate. Default is 'bar'.
 277 |         width : int, optional
 278 |             the width of the plot in pixels
 279 |         height : int, optional
 280 |             the height of the plot in pixels
 281 |         ax: altair.Chart, optional
 282 |             chart to be overlayed with this vis (convinience method for `chart1 + chart2`)
 283 | 
 284 |         Returns
 285 |         -------
 286 |         chart : alt.Chart
 287 |             altair chart representation
 288 |         """
 289 |         df = self._data.to_frame().reset_index(drop=False)
 290 |         df.columns = df.columns.astype(str)
 291 |         y, x = df.columns
 292 | 
 293 |         marks = {
 294 |             "bar": "bar",
 295 |             "barstacked": "bar",
 296 |             "stepfilled": {"type": "area", "interpolate": "step"},
 297 |             "step": {"type": "line", "interpolate": "step"},
 298 |         }
 299 | 
 300 |         if histtype in marks:
 301 |             mark = marks[histtype]
 302 |         else:
 303 |             raise ValueError("histtype '{0}' is not recognized" "".format(histtype))
 304 | 
 305 |         chart = self._plot(
 306 |             data=df,
 307 |             width=width,
 308 |             height=height,
 309 |             title=kwds.pop("title", ""),
 310 |             figsize=kwds.pop("figsize", None),
 311 |             dpi=kwds.pop("dpi", None),
 312 |         )
 313 | 
 314 |         chart.mark = mark
 315 |         chart = chart.encode(
 316 |             x=_x(x, df, bin={"maxbins": bins}), 
 317 |             y=_y(y, df, aggregate="count")
 318 |         )
 319 | 
 320 |         if alpha is not None:
 321 |             assert 0 <= alpha <= 1
 322 |             chart = chart.encode(opacity=alt.value(alpha))
 323 | 
 324 |         if ax is not None:
 325 |             return ax + chart
 326 | 
 327 |         warn_if_keywords_unused("hist", kwds)
 328 |         return chart
 329 | 
 330 |     def kde(self, bw_method=None, alpha=None, width=450, height=300, ax=None, **kwds):
 331 |         """Kernel Density Estimation plot for Series data
 332 | 
 333 |         >>> series.vgplot.kde()  # doctest: +SKIP
 334 | 
 335 |         Parameters
 336 |         ----------
 337 |         bw_method : str, scalar or callable, optional
 338 |             The method used to calculate the estimator bandwidth. This can be
 339 |             'scott', 'silverman', a scalar constant or a callable.
 340 |             See `scipy.stats.gaussian_kde` for more details.
 341 |         alpha : float, optional
 342 |             transparency level, 0 <= alpha <= 1
 343 |         width : int, optional
 344 |             the width of the plot in pixels
 345 |         height : int, optional
 346 |             the height of the plot in pixels
 347 |         ax: altair.Chart, optional
 348 |             chart to be overlayed with this vis (convinience method for `chart1 + chart2`)
 349 | 
 350 |         Returns
 351 |         -------
 352 |         chart : alt.Chart
 353 |             altair chart representation
 354 |         """
 355 |         from scipy.stats import gaussian_kde
 356 | 
 357 |         data = self._data
 358 |         tmin, tmax = data.min(), data.max()
 359 |         trange = tmax - tmin
 360 |         t = np.linspace(tmin - 0.5 * trange, tmax + 0.5 * trange, 1000)
 361 | 
 362 |         kde_ser = pd.Series(
 363 |             gaussian_kde(data, bw_method=bw_method).evaluate(t), index=t, name=data.name
 364 |         )
 365 | 
 366 |         kde_ser.index.name = " "
 367 |         f = self.__class__(kde_ser)
 368 |         return f.line(alpha=alpha, width=width, height=height, ax=ax, **kwds)
 369 | 
 370 |     density = kde
 371 | 
 372 | 
 373 | @register_dataframe_accessor("vgplot")
 374 | class FramePlotMethods(BasePlotMethods):
 375 |     """DataFrame Accessor & Method for creating Vega-Lite visualizations.
 376 | 
 377 |     Examples
 378 |     --------
 379 |     >>> df.vgplot.line()  # doctest: +SKIP
 380 |     >>> df.vgplot.area()  # doctest: +SKIP
 381 |     >>> df.vgplot.bar()  # doctest: +SKIP
 382 |     >>> df.vgplot.barh()  # doctest: +SKIP
 383 |     >>> df.vgplot.hist()  # doctest: +SKIP
 384 |     >>> df.vgplot.kde()  # doctest: +SKIP
 385 |     >>> df.vgplot.density()  # doctest: +SKIP
 386 |     >>> df.vgplot.scatter(x, y)  # doctest: +SKIP
 387 |     >>> df.vgplot.hexbin(x, y)  # doctest: +SKIP
 388 | 
 389 |     Plotting methods can also be accessed by calling the accessor as a method
 390 |     with the ``kind`` argument: ``df.vgplot(kind='line', **kwds)``
 391 |     is equivalent to ``df.vgplot.line(**kwds)``
 392 |     """
 393 | 
 394 |     def __call__(self, x=None, y=None, kind="line", **kwargs):
 395 |         try:
 396 |             plot_method = getattr(self, kind)
 397 |         except AttributeError:
 398 |             raise ValueError(
 399 |                 "kind='{0}' not valid for {1}" "".format(kind, self.__class__.__name__)
 400 |             )
 401 |         return plot_method(x=x, y=y, **kwargs)
 402 | 
 403 |     def line(
 404 |         self,
 405 |         x=None,
 406 |         y=None,
 407 |         alpha=None,
 408 |         var_name="variable",
 409 |         value_name="value",
 410 |         width=450,
 411 |         height=300,
 412 |         ax=None,
 413 |         **kwds
 414 |     ):
 415 |         """Line plot for DataFrame data
 416 | 
 417 |         >>> dataframe.vgplot.line()  # doctest: +SKIP
 418 | 
 419 |         Parameters
 420 |         ----------
 421 |         x : string, optional
 422 |             the column to use as the x-axis variable. If not specified, the
 423 |             index will be used.
 424 |         y : string, optional
 425 |             the column to use as the y-axis variable. If not specified, all
 426 |             columns (except x if specified) will be used.
 427 |         alpha : float, optional
 428 |             transparency level, 0 <= alpha <= 1
 429 |         var_name : string, optional
 430 |             the legend title
 431 |         value_name : string, optional
 432 |             the y-axis label
 433 |         width : int, optional
 434 |             the width of the plot in pixels
 435 |         height : int, optional
 436 |             the height of the plot in pixels
 437 |         ax: altair.Chart, optional
 438 |             chart to be overlayed with this vis (convinience method for `chart1 + chart2`)
 439 | 
 440 |         Returns
 441 |         -------
 442 |         chart : alt.Chart
 443 |             altair chart representation
 444 |         """
 445 |         use_order = (x is not None)
 446 | 
 447 |         if use_order:
 448 |             df = self._data.reset_index()
 449 |             order = df.columns[0]
 450 |             df = unpivot_frame(
 451 |                 df, x=(x, order), y=y, var_name=var_name, value_name=value_name
 452 |             )
 453 |         else:
 454 |             df = unpivot_frame(
 455 |                 self._data, x=x, y=y, var_name=var_name, value_name=value_name
 456 |             )
 457 |             x = df.columns[0]
 458 | 
 459 |         chart = self._plot(
 460 |             data=df,
 461 |             width=width,
 462 |             height=height,
 463 |             title=kwds.pop("title", ""),
 464 |             figsize=kwds.pop("figsize", None),
 465 |             dpi=kwds.pop("dpi", None),
 466 |         ).mark_line().encode(
 467 |             x=_x(x, df), y=_y(value_name, df), color=alt.Color(var_name, type="nominal")
 468 |         )
 469 | 
 470 |         if alpha is not None:
 471 |             assert 0 <= alpha <= 1
 472 |             chart = chart.encode(opacity=alt.value(alpha))
 473 | 
 474 |         if use_order:
 475 |             chart.encoding["order"] = {
 476 |                 "field": order, "type": infer_vegalite_type(df[order])
 477 |             }
 478 | 
 479 |         if ax is not None:
 480 |             return ax + chart
 481 | 
 482 |         warn_if_keywords_unused("line", kwds)
 483 |         return chart
 484 | 
 485 |     def scatter(
 486 |         self, x, y, c=None, s=None, alpha=None, width=450, height=300, ax=None, **kwds
 487 |     ):
 488 |         """Scatter plot for DataFrame data
 489 | 
 490 |         >>> dataframe.vgplot.scatter(x, y)  # doctest: +SKIP
 491 | 
 492 |         Parameters
 493 |         ----------
 494 |         x : string
 495 |             the column to use as the x-axis variable.
 496 |         y : string
 497 |             the column to use as the y-axis variable.
 498 |         c : string, optional
 499 |             the column to use to encode the color of the points
 500 |         s : string, optional
 501 |             the column to use to encode the size of the points
 502 |         alpha : float, optional
 503 |             transparency level, 0 <= alpha <= 1
 504 |         width : int, optional
 505 |             the width of the plot in pixels
 506 |         height : int, optional
 507 |             the height of the plot in pixels
 508 |         ax: altair.Chart, optional
 509 |             chart to be overlayed with this vis (convinience method for `chart1 + chart2`)
 510 | 
 511 |         Returns
 512 |         -------
 513 |         chart : alt.Chart
 514 |             altair chart representation
 515 |         """
 516 |         df = self._data
 517 | 
 518 |         chart = self._plot(
 519 |             width=width,
 520 |             height=height,
 521 |             title=kwds.pop("title", ""),
 522 |             figsize=kwds.pop("figsize", None),
 523 |             dpi=kwds.pop("dpi", None),
 524 |         ).mark_point().encode(
 525 |             x=_x(x, df, ordinal_threshold=0), y=_y(y, df, ordinal_threshold=0)
 526 |         )
 527 | 
 528 |         if alpha is not None:
 529 |             assert 0 <= alpha <= 1
 530 |             chart = chart.encode(opacity=alt.value(alpha))
 531 | 
 532 |         if c is not None:
 533 |             chart.encoding["color"] = {"field": c, "type": infer_vegalite_type(df[c])}
 534 | 
 535 |         if s is not None:
 536 |             chart.encoding["size"] = {"field": s, "type": infer_vegalite_type(df[s])}
 537 | 
 538 |         if ax is not None:
 539 |             return ax + chart
 540 | 
 541 |         warn_if_keywords_unused("scatter", kwds)
 542 |         return chart
 543 | 
 544 |     def area(
 545 |         self,
 546 |         x=None,
 547 |         y=None,
 548 |         stacked=True,
 549 |         alpha=None,
 550 |         var_name="variable",
 551 |         value_name="value",
 552 |         width=450,
 553 |         height=300,
 554 |         ax=None,
 555 |         **kwds
 556 |     ):
 557 |         """Area plot for DataFrame data
 558 | 
 559 |         >>> dataframe.vgplot.area()  # doctest: +SKIP
 560 | 
 561 |         Parameters
 562 |         ----------
 563 |         x : string, optional
 564 |             the column to use as the x-axis variable. If not specified, the
 565 |             index will be used.
 566 |         y : string, optional
 567 |             the column to use as the y-axis variable. If not specified, all
 568 |             columns (except x if specified) will be used.
 569 |         stacked : bool, optional
 570 |             if True (default) then create a stacked area chart. Otherwise,
 571 |             areas will overlap
 572 |         alpha : float, optional
 573 |             transparency level, 0 <= alpha <= 1
 574 |         var_name : string, optional
 575 |             the legend title
 576 |         value_name : string, optional
 577 |             the y-axis label
 578 |         width : int, optional
 579 |             the width of the plot in pixels
 580 |         height : int, optional
 581 |             the height of the plot in pixels
 582 |         ax: altair.Chart, optional
 583 |             chart to be overlayed with this vis (convinience method for `chart1 + chart2`)
 584 | 
 585 |         Returns
 586 |         -------
 587 |         chart : alt.Chart
 588 |             altair chart representation
 589 |         """
 590 |         df = unpivot_frame(
 591 |             self._data, x=x, y=y, var_name=var_name, value_name=value_name
 592 |         )
 593 | 
 594 |         x = df.columns[0]
 595 | 
 596 |         if alpha is None and not stacked and df[var_name].nunique() > 1:
 597 |             alpha = 0.7
 598 | 
 599 |         chart = self._plot(
 600 |             data=df,
 601 |             width=width,
 602 |             height=height,
 603 |             title=kwds.pop("title", ""),
 604 |             figsize=kwds.pop("figsize", None),
 605 |             dpi=kwds.pop("dpi", None),
 606 |         ).mark_area().encode(
 607 |             x=_x(x, df),
 608 |             y=alt.Y(
 609 |                 value_name,
 610 |                 type=infer_vegalite_type(df[value_name]),
 611 |                 stack=(None, "zero")[stacked],
 612 |             ),
 613 |             color=alt.Color(field=var_name, type=infer_vegalite_type(df[var_name])),
 614 |         )
 615 | 
 616 |         if alpha is not None:
 617 |             assert 0 <= alpha <= 1
 618 |             chart = chart.encode(opacity=alt.value(alpha))
 619 | 
 620 |         if ax is not None:
 621 |             return ax + chart
 622 | 
 623 |         warn_if_keywords_unused("area", kwds)
 624 |         return chart
 625 | 
 626 |     def bar(
 627 |         self,
 628 |         x=None,
 629 |         y=None,
 630 |         stacked=False,
 631 |         alpha=None,
 632 |         var_name="variable",
 633 |         value_name="value",
 634 |         width=450,
 635 |         height=300,
 636 |         ax=None,
 637 |         **kwds
 638 |     ):
 639 |         """Bar plot for DataFrame data
 640 | 
 641 |         >>> dataframe.vgplot.bar()  # doctest: +SKIP
 642 | 
 643 |         Parameters
 644 |         ----------
 645 |         x : string, optional
 646 |             the column to use as the x-axis variable. If not specified, the
 647 |             index will be used.
 648 |         y : string, optional
 649 |             the column to use as the y-axis variable. If not specified, all
 650 |             columns (except x if specified) will be used.
 651 |         stacked : bool, optional
 652 |             if True (default) then create a stacked area chart. Otherwise,
 653 |             areas will overlap
 654 |         alpha : float, optional
 655 |             transparency level, 0 <= alpha <= 1
 656 |         var_name : string, optional
 657 |             the legend title
 658 |         value_name : string, optional
 659 |             the y-axis label
 660 |         width : int, optional
 661 |             the width of the plot in pixels
 662 |         height : int, optional
 663 |             the height of the plot in pixels
 664 |         ax: altair.Chart, optional
 665 |             chart to be overlayed with this vis (convinience method for `chart1 + chart2`)
 666 | 
 667 |         Returns
 668 |         -------
 669 |         chart : alt.Chart
 670 |             altair chart representation
 671 |         """
 672 |         df = unpivot_frame(
 673 |             self._data, x=x, y=y, var_name=var_name, value_name=value_name
 674 |         )
 675 |         x = df.columns[0]
 676 | 
 677 |         if alpha is None and not stacked and df[var_name].nunique() > 1:
 678 |             alpha = 0.7
 679 | 
 680 |         chart = self._plot(
 681 |             data=df,
 682 |             width=width,
 683 |             height=height,
 684 |             title=kwds.pop("title", ""),
 685 |             figsize=kwds.pop("figsize", None),
 686 |             dpi=kwds.pop("dpi", None),
 687 |         ).mark_bar().encode(
 688 |             x=alt.X(x, type=infer_vegalite_type(df[x], ordinal_threshold=50)),
 689 |             y=alt.Y(
 690 |                 "value",
 691 |                 type=infer_vegalite_type(df["value"]),
 692 |                 stack=(None, "zero")[stacked],
 693 |             ),
 694 |             color=alt.Color(field="variable", type=infer_vegalite_type(df["variable"])),
 695 |         )
 696 | 
 697 |         if alpha is not None:
 698 |             assert 0 <= alpha <= 1
 699 |             chart = chart.encode(opacity=alt.value(alpha))
 700 | 
 701 |         if ax is not None:
 702 |             return ax + chart
 703 | 
 704 |         warn_if_keywords_unused("bar", kwds)
 705 |         return chart
 706 | 
 707 |     def barh(
 708 |         self,
 709 |         x=None,
 710 |         y=None,
 711 |         stacked=False,
 712 |         alpha=None,
 713 |         var_name="variable",
 714 |         value_name="value",
 715 |         width=450,
 716 |         height=300,
 717 |         ax=None,
 718 |         **kwds
 719 |     ):
 720 |         """Horizontal bar plot for DataFrame data
 721 | 
 722 |         >>> dataframe.vgplot.barh()  # doctest: +SKIP
 723 | 
 724 |         Parameters
 725 |         ----------
 726 |         x : string, optional
 727 |             the column to use as the x-axis variable. If not specified, the
 728 |             index will be used.
 729 |         y : string, optional
 730 |             the column to use as the y-axis variable. If not specified, all
 731 |             columns (except x if specified) will be used.
 732 |         stacked : bool, optional
 733 |             if True (default) then create a stacked area chart. Otherwise,
 734 |             areas will overlap
 735 |         alpha : float, optional
 736 |             transparency level, 0 <= alpha <= 1
 737 |         var_name : string, optional
 738 |             the legend title
 739 |         value_name : string, optional
 740 |             the y-axis label
 741 |         width : int, optional
 742 |             the width of the plot in pixels
 743 |         height : int, optional
 744 |             the height of the plot in pixels
 745 |         ax: altair.Chart, optional
 746 |             chart to be overlayed with this vis (convinience method for `chart1 + chart2`)
 747 | 
 748 |         Returns
 749 |         -------
 750 |         chart : alt.Chart
 751 |             altair chart representation
 752 |         """
 753 |         chart = self.bar(
 754 |             x=x,
 755 |             y=y,
 756 |             stacked=stacked,
 757 |             alpha=alpha,
 758 |             var_name=var_name,
 759 |             value_name=value_name,
 760 |             width=width,
 761 |             height=height,
 762 |             **kwds
 763 |         )
 764 | 
 765 |         enc = chart.encoding
 766 |         enc["x"], enc["y"] = enc["y"], enc["x"]
 767 |         if ax is not None:
 768 |             return ax + chart
 769 |         return chart
 770 | 
 771 |     def hist(
 772 |         self,
 773 |         x=None,
 774 |         y=None,
 775 |         by=None,
 776 |         bins=10,
 777 |         stacked=False,
 778 |         alpha=None,
 779 |         histtype="bar",
 780 |         var_name="variable",
 781 |         value_name="value",
 782 |         width=450,
 783 |         height=300,
 784 |         ax=None,
 785 |         **kwds
 786 |     ):
 787 |         """Histogram plot for DataFrame data
 788 | 
 789 |         >>> dataframe.vgplot.hist()  # doctest: +SKIP
 790 | 
 791 |         Parameters
 792 |         ----------
 793 |         x : string, optional
 794 |             the column to use as the x-axis variable. If not specified, the
 795 |             index will be used.
 796 |         y : string, optional
 797 |             the column to use as the y-axis variable. If not specified, all
 798 |             columns (except x if specified) will be used.
 799 |         by : string, optional
 800 |             the column by which to group the results
 801 |         bins : integer, optional
 802 |             the maximum number of bins to use for the histogram (default: 10)
 803 |         stacked : bool, optional
 804 |             if True (default) then create a stacked area chart. Otherwise,
 805 |             areas will overlap
 806 |         alpha : float, optional
 807 |             transparency level, 0 <= alpha <= 1
 808 |         histtype : string, {'bar', 'step', 'stepfilled'}
 809 |             The type of histogram to generate. Default is 'bar'.
 810 |         var_name : string, optional
 811 |             the legend title
 812 |         value_name : string, optional
 813 |             the y-axis label
 814 |         width : int, optional
 815 |             the width of the plot in pixels
 816 |         height : int, optional
 817 |             the height of the plot in pixels
 818 |         ax: altair.Chart, optional
 819 |             chart to be overlayed with this vis (convinience method for `chart1 + chart2`)
 820 | 
 821 |         Returns
 822 |         -------
 823 |         chart : alt.Chart
 824 |             altair chart representation
 825 |         """
 826 |         if by is not None:
 827 |             raise NotImplementedError("vgplot.hist `by` keyword")
 828 |         if x is not None or y is not None:
 829 |             raise NotImplementedError('"x" and "y" args to hist()')
 830 |         df = self._data.melt(var_name=var_name, value_name=value_name)
 831 | 
 832 |         marks = {
 833 |             "bar": "bar",
 834 |             "barstacked": "bar",
 835 |             "stepfilled": {"type": "area", "interpolate": "step"},
 836 |             "step": {"type": "line", "interpolate": "step"},
 837 |         }
 838 | 
 839 |         if histtype in marks:
 840 |             mark = marks[histtype]
 841 |         else:
 842 |             raise ValueError("histtype '{0}' is not recognized" "".format(histtype))
 843 | 
 844 |         if alpha is None and not stacked and df[var_name].nunique() > 1:
 845 |             alpha = 0.7
 846 | 
 847 |         chart = self._plot(
 848 |             data=df,
 849 |             width=width,
 850 |             height=height,
 851 |             title=kwds.pop("title", ""),
 852 |             figsize=kwds.pop("figsize", None),
 853 |             dpi=kwds.pop("dpi", None),
 854 |         )
 855 | 
 856 |         chart.mark = mark
 857 |         chart = chart.encode(
 858 |             x=alt.X(value_name, bin={"maxbins": bins}, type="quantitative"),
 859 |             y=alt.Y(
 860 |                 aggregate="count",
 861 |                 type="quantitative",
 862 |                 stack=("zero" if stacked else None),
 863 |             ),
 864 |             color=alt.Color(field=var_name, type="nominal"),
 865 |         )
 866 | 
 867 |         if alpha is not None:
 868 |             assert 0 <= alpha <= 1
 869 |             chart = chart.encode(opacity=alt.value(alpha))
 870 | 
 871 |         if ax is not None:
 872 |             return ax + chart
 873 | 
 874 |         warn_if_keywords_unused("hist", kwds)
 875 |         return chart
 876 | 
 877 |     def heatmap(
 878 |         self,
 879 |         x,
 880 |         y,
 881 |         C=None,
 882 |         reduce_C_function="mean",
 883 |         gridsize=100,
 884 |         alpha=None,
 885 |         width=450,
 886 |         height=300,
 887 |         ax=None,
 888 |         **kwds
 889 |     ):
 890 |         """Heatmap plot for DataFrame data
 891 | 
 892 |         Note that Vega-Lite does not support hexagonal binning, so this method
 893 |         returns a cartesian heatmap.
 894 | 
 895 |         >>> dataframe.vgplot.hexbin()  # doctest: +SKIP
 896 | 
 897 |         Parameters
 898 |         ----------
 899 |         x : string
 900 |             the column to use as the x-axis variable.
 901 |         y : string
 902 |             the column to use as the y-axis variable.
 903 |         C : string, optional
 904 |             the column to use to compute the mean within each bin. If not
 905 |             specified, the count within each bin will be used.
 906 |         reduce_C_function : string, default = 'mean'
 907 |             One of ['mean', 'sum', 'median', 'min', 'max', 'count'], or
 908 |             associated numpy or python builtin functions. Note that arbitrary
 909 |             callable functions are not supported.
 910 |         gridsize : int, optional
 911 |             the number of divisions in the x and y axis (default=100)
 912 |         alpha : float, optional
 913 |             transparency level, 0 <= alpha <= 1
 914 |         width : int, optional
 915 |             the width of the plot in pixels
 916 |         height : int, optional
 917 |             the height of the plot in pixels
 918 |         ax: altair.Chart, optional
 919 |             chart to be overlayed with this vis (convinience method for `chart1 + chart2`)
 920 | 
 921 |         Returns
 922 |         -------
 923 |         chart : alt.Chart
 924 |             altair chart representation
 925 |         """
 926 |         # TODO: Use actual hexbins rather than a grid heatmap
 927 |         reduce_C_function = validate_aggregation(reduce_C_function)
 928 | 
 929 |         if C is None:
 930 |             df = self._data[[x, y]]
 931 |         else:
 932 |             df = self._data[[x, y, C]]
 933 | 
 934 |         if C is None:
 935 |             color = alt.Color(aggregate="count", type="quantitative")
 936 |         else:
 937 |             color = alt.Color(field=C, aggregate=reduce_C_function, type="quantitative")
 938 | 
 939 |         color.scale = alt.Scale(scheme="greens")
 940 | 
 941 |         chart = self._plot(
 942 |             data=df,
 943 |             width=width,
 944 |             height=height,
 945 |             title=kwds.pop("title", ""),
 946 |             figsize=kwds.pop("figsize", None),
 947 |             dpi=kwds.pop("dpi", None),
 948 |         ).mark_rect().encode(
 949 |             x=alt.X(x, bin=alt.Bin(maxbins=gridsize), type="quantitative"),
 950 |             y=alt.Y(y, bin=alt.Bin(maxbins=gridsize), type="quantitative"),
 951 |             color=color,
 952 |         )
 953 | 
 954 |         if alpha is not None:
 955 |             assert 0 <= alpha <= 1
 956 |             chart = chart.encode(opacity=alt.value(alpha))
 957 | 
 958 |         if ax is not None:
 959 |             return ax + chart
 960 | 
 961 |         warn_if_keywords_unused("hexbin", kwds)
 962 |         return chart
 963 | 
 964 |     hexbin = heatmap
 965 | 
 966 |     def kde(
 967 |         self,
 968 |         x=None,
 969 |         y=None,
 970 |         bw_method=None,
 971 |         alpha=None,
 972 |         width=450,
 973 |         height=300,
 974 |         ax=None,
 975 |         **kwds
 976 |     ):
 977 |         """Kernel Density Estimate plot for DataFrame data
 978 | 
 979 |         >>> dataframe.vgplot.kde()  # doctest: +SKIP
 980 | 
 981 |         Parameters
 982 |         ----------
 983 |         x : string, optional
 984 |             the column to use as the x-axis variable. If not specified, the
 985 |             index will be used.
 986 |         y : string, optional
 987 |             the column to use as the y-axis variable. If not specified, all
 988 |             columns (except x if specified) will be used.
 989 |         bw_method : str, scalar or callable, optional
 990 |             The method used to calculate the estimator bandwidth. This can be
 991 |             'scott', 'silverman', a scalar constant or a callable.
 992 |             See `scipy.stats.gaussian_kde` for more details.
 993 |         alpha : float, optional
 994 |             transparency level, 0 <= alpha <= 1
 995 |         width : int, optional
 996 |             the width of the plot in pixels
 997 |         height : int, optional
 998 |             the height of the plot in pixels
 999 |         ax: altair.Chart, optional
1000 |             chart to be overlayed with this vis (convinience method for `chart1 + chart2`)
1001 | 
1002 |         Returns
1003 |         -------
1004 |         chart : alt.Chart
1005 |             altair chart representation
1006 |         """
1007 |         from scipy.stats import gaussian_kde as kde
1008 | 
1009 |         if x is not None:  # ??
1010 |             raise NotImplementedError('"x" argument to df.vgplot.kde()')
1011 | 
1012 |         if y is not None:
1013 |             df = self._data[y].to_frame()
1014 |         else:
1015 |             df = self._data
1016 | 
1017 |         tmin, tmax = df.min().min(), df.max().max()
1018 |         trange = tmax - tmin
1019 |         t = np.linspace(tmin - 0.5 * trange, tmax + 0.5 * trange, 1000)
1020 | 
1021 |         kde_df = pd.DataFrame(
1022 |             {col: kde(df[col], bw_method=bw_method).evaluate(t) for col in df}, index=t
1023 |         )
1024 |         kde_df.index.name = " "
1025 | 
1026 |         f = FramePlotMethods(kde_df)
1027 |         return f.line(
1028 |             value_name="Density", alpha=alpha, width=width, height=height, ax=ax, **kwds
1029 |         )
1030 | 
1031 |     density = kde
1032 | 


--------------------------------------------------------------------------------