├── altair_data_server
├── tests
│ ├── __init__.py
│ ├── test_entrypoint.py
│ ├── test_altair_server.py
│ └── test_provider.py
├── __init__.py
├── _altair_server.py
├── _background_server.py
└── _provide.py
├── requirements.txt
├── requirements_dev.txt
├── MANIFEST.in
├── setup.cfg
├── mypy.ini
├── postBuild
├── .travis.yml
├── Makefile
├── .github
└── workflows
│ ├── build.yml
│ └── lint.yml
├── RELEASING.md
├── CHANGES.md
├── LICENSE
├── .gitignore
├── setup.py
├── README.md
└── AltairDataServer.ipynb
/altair_data_server/tests/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | altair
2 | portpicker
3 | tornado
4 |
--------------------------------------------------------------------------------
/requirements_dev.txt:
--------------------------------------------------------------------------------
1 | black
2 | flake8
3 | mypy
4 | pytest
5 |
--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include *.md
2 | include Makefile
3 | include LICENSE
4 | include CHANGES.md
5 | include requirements.txt
6 |
--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | [flake8]
2 | max-line-length = 88
3 | ignore = E203, E266, E501, W503
4 | max-complexity = 18
5 | select = B,C,E,F,W,T4,B9
6 |
7 | [metadata]
8 | description-file = README.md
9 | license_file = LICENSE
10 |
--------------------------------------------------------------------------------
/altair_data_server/__init__.py:
--------------------------------------------------------------------------------
1 | """Altair data server"""
2 | __version__ = "0.5.0.dev0"
3 | __all__ = [
4 | "AltairDataServer",
5 | "data_server",
6 | "data_server_proxied",
7 | "Provider",
8 | "Resource",
9 | ]
10 |
11 | from ._altair_server import AltairDataServer, data_server, data_server_proxied
12 | from ._provide import Provider, Resource
13 |
--------------------------------------------------------------------------------
/mypy.ini:
--------------------------------------------------------------------------------
1 | [mypy]
2 | python_version = 3.8
3 |
4 | [mypy-altair.*]
5 | ignore_missing_imports = True
6 |
7 | [mypy-numpy.*]
8 | ignore_missing_imports = True
9 |
10 | [mypy-pandas.*]
11 | ignore_missing_imports = True
12 |
13 | [mypy-portpicker.*]
14 | ignore_missing_imports = True
15 |
16 | [mypy-pytest.*]
17 | ignore_missing_imports = True
18 |
19 | [mypy-tornado.*]
20 | ignore_missing_imports = True
--------------------------------------------------------------------------------
/postBuild:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | # This is the binder-specific post-build file and ensure that the notebook runs
3 | # nicely. See https://mybinder.readthedocs.io/en/latest/config_files.html for more details.
4 |
5 | set -euo pipefail
6 |
7 | # To reach the data server behind the binder firewall, we need to inject a proxy.
8 | pip install jupyter-server-proxy
9 |
10 | # Jupyterlab 1.0 or newer required for Altair 3.
11 | pip install -U jupyterlab
12 |
--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | language: python
2 |
3 | cache: pip
4 |
5 | matrix:
6 | include:
7 | - python: 3.6
8 | - python: 3.7
9 | - python: 3.8
10 | - name: "lint"
11 | python: 3.8
12 | install:
13 | script:
14 | - black --check .
15 | - flake8 altair_data_server
16 | - mypy altair_data_server
17 |
18 | env:
19 | global:
20 | - TEST_DIR=/tmp/_altair_data_server/
21 |
22 | before_install:
23 | - pip install pip --upgrade
24 | - pip install -r requirements_dev.txt
25 |
26 | install:
27 | - pip install -e .
28 |
29 | script:
30 | - mkdir -p $TEST_DIR
31 | - cd $TEST_DIR && python -m pytest --pyargs altair_data_server
32 |
--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
1 | all: install
2 |
3 | PACKAGE = altair_data_server
4 |
5 | install:
6 | pip install .
7 |
8 | test:
9 | python -m black .
10 | python -m flake8 $(PACKAGE)
11 | python -m mypy $(PACKAGE)
12 | rm -r build
13 | python setup.py build &&\
14 | cd build/lib &&\
15 | python -m pytest --pyargs --doctest-modules $(PACKAGE)
16 |
17 | test-coverage:
18 | python setup.py build &&\
19 | cd build/lib &&\
20 | python -m pytest --pyargs --doctest-modules --cov=$(PACKAGE) --cov-report term $(PACKAGE)
21 |
22 | test-coverage-html:
23 | python setup.py build &&\
24 | cd build/lib &&\
25 | python -m pytest --pyargs --doctest-modules --cov=$(PACKAGE) --cov-report html $(PACKAGE)
26 |
--------------------------------------------------------------------------------
/.github/workflows/build.yml:
--------------------------------------------------------------------------------
1 | name: build
2 |
3 | on: [push, pull_request]
4 |
5 | jobs:
6 | build:
7 | runs-on: ubuntu-latest
8 | strategy:
9 | matrix:
10 | python-version: [ '3.6', '3.7', '3.8' ]
11 | name: Python ${{ matrix.python-version }}
12 | steps:
13 | - uses: actions/checkout@v1
14 | - name: Set up Python ${{ matrix.python-version }}
15 | uses: actions/setup-python@v1
16 | with:
17 | python-version: ${{ matrix.python-version }}
18 | - name: Install dependencies
19 | run: |
20 | python -m pip install --upgrade pip
21 | pip install -e .
22 | - name: Test with pytest
23 | run: |
24 | pip install pytest
25 | pytest --doctest-modules altair_data_server
26 |
--------------------------------------------------------------------------------
/altair_data_server/tests/test_entrypoint.py:
--------------------------------------------------------------------------------
1 | from typing import Callable
2 |
3 | import altair as alt
4 | from altair_data_server import data_server, data_server_proxied
5 |
6 | import pytest
7 |
8 |
9 | def test_entrypoint_exists() -> None:
10 | assert "data_server" in alt.data_transformers.names()
11 | assert "data_server_proxied" in alt.data_transformers.names()
12 |
13 |
14 | @pytest.mark.parametrize(
15 | "name,server_function",
16 | [("data_server", data_server), ("data_server_proxied", data_server_proxied)],
17 | )
18 | def test_entrypoint_identity(name: str, server_function: Callable) -> None:
19 | with alt.data_transformers.enable(name):
20 | transformer = alt.data_transformers.get()
21 | assert transformer is server_function
22 |
--------------------------------------------------------------------------------
/.github/workflows/lint.yml:
--------------------------------------------------------------------------------
1 | name: lint
2 |
3 | on: [push, pull_request]
4 |
5 | jobs:
6 | build:
7 | runs-on: ubuntu-latest
8 | name: flake8-black-mypy
9 | steps:
10 | - uses: actions/checkout@v1
11 | - name: Set up Python 3.8
12 | uses: actions/setup-python@v1
13 | with:
14 | python-version: 3.8
15 | - name: Install Requirements
16 | run: |
17 | pip install -r requirements.txt
18 | - name: Lint with flake8
19 | run: |
20 | pip install flake8
21 | flake8 . --count --statistics
22 | - name: Check formatting with black
23 | run: |
24 | pip install black
25 | black --check .
26 | - name: Check types with mypy
27 | run: |
28 | pip install mypy
29 | mypy altair_data_server
30 |
--------------------------------------------------------------------------------
/RELEASING.md:
--------------------------------------------------------------------------------
1 | 1. Update version to, e.g. 1.0.0 in ``altair_data_server/__init__.py``
2 |
3 | 2. Make sure ``CHANGES.md`` is up to date for the release
4 |
5 | 3. Commit change and push to master
6 |
7 | git add . -u
8 | git commit -m "MAINT: bump version to 1.0.0"
9 | git push origin master
10 |
11 | 4. Tag the release:
12 |
13 | git tag -a v1.0.0 -m "version 1.0.0 release"
14 | git push origin v1.0.0
15 |
16 | 5. Build source & wheel distributions
17 |
18 | rm -r dist build # clean old builds & distributions
19 | python setup.py sdist # create a source distribution
20 | python setup.py bdist_wheel # create a universal wheel
21 |
22 | 6. publish to PyPI (Requires correct PyPI owner permissions)
23 |
24 | twine upload dist/*
25 |
26 | 7. update version to, e.g. 1.1.0.dev0 in ``altair_data_server/__init__.py``
27 |
28 | 8. add a new changelog entry for the unreleased version
29 |
30 | 9. Commit change and push to master
31 |
32 | git add . -u
33 | git commit -m "MAINT: bump version to 1.1.0.dev0"
34 | git push origin master
35 |
--------------------------------------------------------------------------------
/CHANGES.md:
--------------------------------------------------------------------------------
1 | # Altair Data Server Change Log
2 |
3 | ## Version 0.5.0 (unreleased)
4 |
5 | ## Version 0.4.1
6 |
7 | - Allow content to be served from root URL
8 | - Fix some testing & distribution configurations
9 |
10 | ## Version 0.4.0
11 |
12 | - Make ``Provider`` and ``Resource`` top-level imports (#21).
13 | - Use a daemonic thread by default, so that server will automatically shut down
14 | when the parent python process terminates (#24).
15 | - Facilitate subclassing of ``Provider`` class (#27).
16 | - Add ability to specify port when enabling altair data server (#28).
17 | - Many minor bug fixes and improvements to testing, type hints, and CI.
18 |
19 | ## Version 0.3.0
20 |
21 | - Add support for Python 3.8
22 | - Drop support for Python 3.5 and lower
23 | - Format code with [black](https://black.readthedocs.io/)
24 | - Add static type checking with [mypy](http://mypy-lang.org/)
25 |
26 | ## Version 0.2.1
27 |
28 | - Add altair v4 entrypoint
29 |
30 | ## Version 0.2.0
31 |
32 | - Add `data_server_proxied` entrypoint for use with [jupyter-server-proxy](https://github.com/jupyterhub/jupyter-server-proxy) ([#5](https://github.com/altair-viz/altair_data_server/pull/5))
33 | - Update implementation to support Tornado 6.0 ([#6](https://github.com/altair-viz/altair_data_server/pull/6))
34 |
35 | ## Version 0.1.0
36 |
37 | Initial release: basic Altair data server implementation with the following
38 | entrypoints:
39 |
40 | - ``altair.vegalite.v2.data_transformer``
41 | - ``altair.vegalite.v3.data_transformer``
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | BSD 3-Clause License
2 |
3 | Copyright (c) 2019, altair-viz
4 | All rights reserved.
5 |
6 | Redistribution and use in source and binary forms, with or without
7 | modification, are permitted provided that the following conditions are met:
8 |
9 | * Redistributions of source code must retain the above copyright notice, this
10 | list of conditions and the following disclaimer.
11 |
12 | * Redistributions in binary form must reproduce the above copyright notice,
13 | this list of conditions and the following disclaimer in the documentation
14 | and/or other materials provided with the distribution.
15 |
16 | * Neither the name of the copyright holder nor the names of its
17 | contributors may be used to endorse or promote products derived from
18 | this software without specific prior written permission.
19 |
20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | wheels/
23 | *.egg-info/
24 | .installed.cfg
25 | *.egg
26 | MANIFEST
27 |
28 | # PyInstaller
29 | # Usually these files are written by a python script from a template
30 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
31 | *.manifest
32 | *.spec
33 |
34 | # Installer logs
35 | pip-log.txt
36 | pip-delete-this-directory.txt
37 |
38 | # Unit test / coverage reports
39 | htmlcov/
40 | .tox/
41 | .coverage
42 | .coverage.*
43 | .cache
44 | nosetests.xml
45 | coverage.xml
46 | *.cover
47 | .hypothesis/
48 | .pytest_cache/
49 |
50 | # Translations
51 | *.mo
52 | *.pot
53 |
54 | # Django stuff:
55 | *.log
56 | local_settings.py
57 | db.sqlite3
58 |
59 | # Flask stuff:
60 | instance/
61 | .webassets-cache
62 |
63 | # Scrapy stuff:
64 | .scrapy
65 |
66 | # Sphinx documentation
67 | docs/_build/
68 |
69 | # PyBuilder
70 | target/
71 |
72 | # Jupyter Notebook
73 | .ipynb_checkpoints
74 |
75 | # pyenv
76 | .python-version
77 |
78 | # celery beat schedule file
79 | celerybeat-schedule
80 |
81 | # SageMath parsed files
82 | *.sage.py
83 |
84 | # Environments
85 | .env
86 | .venv
87 | env/
88 | venv/
89 | ENV/
90 | env.bak/
91 | venv.bak/
92 |
93 | # Spyder project settings
94 | .spyderproject
95 | .spyproject
96 |
97 | # Rope project settings
98 | .ropeproject
99 |
100 | # mkdocs documentation
101 | /site
102 |
103 | # mypy
104 | .mypy_cache/
105 |
106 | # emacs
107 | *~
108 |
109 | # Jupyter
110 | Untitled*.ipynb
--------------------------------------------------------------------------------
/altair_data_server/tests/test_altair_server.py:
--------------------------------------------------------------------------------
1 | import portpicker
2 | import re
3 | from typing import Any, Callable
4 |
5 | import numpy as np
6 | import pandas as pd
7 | import pytest
8 | from altair_data_server import data_server, data_server_proxied
9 |
10 |
11 | @pytest.fixture(scope="session")
12 | def session_context(request: Any) -> None:
13 | # Reset the server at the end of the session.
14 | request.addfinalizer(data_server.reset)
15 | request.addfinalizer(data_server_proxied.reset)
16 |
17 |
18 | @pytest.fixture
19 | def data() -> pd.DataFrame:
20 | return pd.DataFrame({"x": np.arange(5), "y": list("ABCDE")})
21 |
22 |
23 | def _decode_normal_url(url: str, fmt: str) -> str:
24 | return url
25 |
26 |
27 | def _decode_proxied_url(url: str, fmt: str) -> str:
28 | match = re.match(r"^\.\./proxy/([0-9]+)/([a-f0-9-]*\.([a-z]+))$", url)
29 | assert match
30 | assert match.group(3) == fmt
31 |
32 | # proxy only works when running under jupyter, use direct access here
33 | return f"http://localhost:{match.group(1)}/{match.group(2)}"
34 |
35 |
36 | @pytest.mark.parametrize(
37 | "fmt,parse_function", [("json", pd.read_json), ("csv", pd.read_csv)]
38 | )
39 | @pytest.mark.parametrize(
40 | "server_function,url_decoder",
41 | [(data_server, _decode_normal_url), (data_server_proxied, _decode_proxied_url)],
42 | )
43 | def test_data_server(
44 | data: pd.DataFrame,
45 | session_context: Any,
46 | fmt: str,
47 | parse_function: Callable,
48 | server_function: Callable,
49 | url_decoder: Callable,
50 | ) -> None:
51 | spec = server_function(data, fmt=fmt)
52 | assert isinstance(spec, dict)
53 | assert list(spec.keys()) == ["url"]
54 |
55 | url = url_decoder(spec["url"], fmt)
56 | served_data = parse_function(url)
57 | assert data.equals(served_data)
58 |
59 |
60 | @pytest.mark.parametrize(
61 | "server_function,url_decoder",
62 | [(data_server, _decode_normal_url), (data_server_proxied, _decode_proxied_url)],
63 | )
64 | @pytest.mark.parametrize("fmt", ["json", "csv"])
65 | def test_data_server_port(
66 | data: pd.DataFrame,
67 | session_context: Any,
68 | fmt: str,
69 | server_function: Callable,
70 | url_decoder: Callable,
71 | ) -> None:
72 | port = portpicker.pick_unused_port()
73 | spec = server_function(data, port=port, fmt=fmt)
74 | url = url_decoder(spec["url"], fmt=fmt)
75 | assert str(port) in url
76 |
--------------------------------------------------------------------------------
/altair_data_server/_altair_server.py:
--------------------------------------------------------------------------------
1 | """Altair data server."""
2 |
3 | from typing import Dict, Optional, Tuple
4 | from urllib import parse
5 |
6 | from altair_data_server._provide import Provider, Resource
7 | from altair.utils.data import (
8 | _data_to_json_string,
9 | _data_to_csv_string,
10 | _compute_data_hash,
11 | )
12 | import pandas as pd
13 |
14 |
15 | class AltairDataServer:
16 | """Backend server for Altair datasets."""
17 |
18 | def __init__(self) -> None:
19 | self._provider: Optional[Provider] = None
20 | # We need to keep references to served resources, because the background
21 | # server uses weakrefs.
22 | self._resources: Dict[str, Resource] = {}
23 |
24 | def reset(self) -> None:
25 | if self._provider is not None:
26 | self._provider.stop()
27 | self._resources = {}
28 |
29 | @staticmethod
30 | def _serialize(data: pd.DataFrame, fmt: str) -> Tuple[str, str]:
31 | """Serialize data to the given format."""
32 | if fmt == "json":
33 | content = _data_to_json_string(data)
34 | elif fmt == "csv":
35 | content = _data_to_csv_string(data)
36 | else:
37 | raise ValueError(f"Unrecognized format: {fmt!r}")
38 | return content, _compute_data_hash(content)
39 |
40 | def __call__(
41 | self, data: pd.DataFrame, fmt: str = "json", port: Optional[int] = None
42 | ) -> Dict[str, str]:
43 | if self._provider is None:
44 | self._provider = Provider().start(port=port)
45 | if port is not None and port != self._provider.port:
46 | self._provider.stop().start(port=port)
47 | content, resource_id = self._serialize(data, fmt)
48 | if resource_id not in self._resources:
49 | self._resources[resource_id] = self._provider.create(
50 | content=content,
51 | extension=fmt,
52 | headers={"Access-Control-Allow-Origin": "*"},
53 | )
54 | return {"url": self._resources[resource_id].url}
55 |
56 |
57 | class AltairDataServerProxied(AltairDataServer):
58 | def __call__(
59 | self,
60 | data: pd.DataFrame,
61 | fmt: str = "json",
62 | port: Optional[int] = None,
63 | urlpath: str = "..",
64 | ) -> Dict[str, str]:
65 | result = super().__call__(data, fmt=fmt, port=port)
66 |
67 | url_parts = parse.urlparse(result["url"])
68 | urlpath = urlpath.rstrip("/")
69 | # vega defaults to /files, redirect it to /proxy//
70 | result["url"] = f"{urlpath}/proxy/{url_parts.port}{url_parts.path}"
71 |
72 | return result
73 |
74 |
75 | # Singleton instances
76 | data_server = AltairDataServer()
77 | data_server_proxied = AltairDataServerProxied()
78 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | import io
2 | import os
3 | import re
4 |
5 | try:
6 | from setuptools import setup
7 | except ImportError:
8 | from distutils.core import setup
9 |
10 | # ==============================================================================
11 | # Utilities
12 | # ==============================================================================
13 |
14 |
15 | def read(path, encoding="utf-8"):
16 | path = os.path.join(os.path.dirname(__file__), path)
17 | with io.open(path, encoding=encoding) as fp:
18 | return fp.read()
19 |
20 |
21 | def get_install_requirements(path):
22 | content = read(path)
23 | return [req for req in content.split("\n") if req != "" and not req.startswith("#")]
24 |
25 |
26 | def version(path):
27 | """Obtain the packge version from a python file e.g. pkg/__init__.py
28 |
29 | See .
30 | """
31 | version_file = read(path)
32 | version_match = re.search(
33 | r"""^__version__ = ['"]([^'"]*)['"]""", version_file, re.M
34 | )
35 | if version_match:
36 | return version_match.group(1)
37 | raise RuntimeError("Unable to find version string.")
38 |
39 |
40 | HERE = os.path.abspath(os.path.dirname(__file__))
41 |
42 |
43 | # From https://github.com/jupyterlab/jupyterlab/blob/master/setupbase.py, BSD licensed
44 | def find_packages(top=HERE):
45 | """
46 | Find all of the packages.
47 | """
48 | packages = []
49 | for d, dirs, _ in os.walk(top, followlinks=True):
50 | if os.path.exists(os.path.join(d, "__init__.py")):
51 | packages.append(os.path.relpath(d, top).replace(os.path.sep, "."))
52 | elif d != top:
53 | # Do not look for packages in subfolders if current is not a package
54 | dirs[:] = []
55 | return packages
56 |
57 |
58 | setup(
59 | name="altair_data_server",
60 | version=version("altair_data_server/__init__.py"),
61 | description="A background data server for Altair charts.",
62 | long_description=read("README.md"),
63 | long_description_content_type="text/markdown",
64 | author="Jake VanderPlas",
65 | author_email="jakevdp@google.com",
66 | url="http://github.com/altair-viz/altair_data_server/",
67 | download_url="http://github.com/altair-viz/altair_data_server/",
68 | license="BSD 3-clause",
69 | packages=find_packages(),
70 | include_package_data=True,
71 | install_requires=get_install_requirements("requirements.txt"),
72 | python_requires=">=3.6",
73 | entry_points={
74 | "altair.vegalite.v2.data_transformer": [
75 | "data_server=altair_data_server:data_server",
76 | "data_server_proxied=altair_data_server:data_server_proxied",
77 | ],
78 | "altair.vegalite.v3.data_transformer": [
79 | "data_server=altair_data_server:data_server",
80 | "data_server_proxied=altair_data_server:data_server_proxied",
81 | ],
82 | "altair.vegalite.v4.data_transformer": [
83 | "data_server=altair_data_server:data_server",
84 | "data_server_proxied=altair_data_server:data_server_proxied",
85 | ],
86 | },
87 | classifiers=[
88 | "Development Status :: 5 - Production/Stable",
89 | "Environment :: Console",
90 | "Intended Audience :: Science/Research",
91 | "License :: OSI Approved :: BSD License",
92 | "Natural Language :: English",
93 | "Programming Language :: Python :: 3.6",
94 | "Programming Language :: Python :: 3.7",
95 | "Programming Language :: Python :: 3.8",
96 | ],
97 | )
98 |
--------------------------------------------------------------------------------
/altair_data_server/tests/test_provider.py:
--------------------------------------------------------------------------------
1 | import tempfile
2 | from typing import Iterator
3 |
4 | import pytest
5 | from tornado.httpclient import HTTPClient, HTTPClientError
6 | import tornado.web
7 |
8 | from altair_data_server import Provider, Resource
9 |
10 |
11 | class RootHandler(tornado.web.RequestHandler):
12 | content: bytes = b"root content"
13 |
14 | def get(self) -> None:
15 | self.write(self.content)
16 |
17 |
18 | class ProviderSubclass(Provider):
19 | """Test class for Provider subclassing"""
20 |
21 | def _handlers(self) -> list:
22 | handlers = super()._handlers()
23 | return [("/", RootHandler)] + handlers
24 |
25 |
26 | @pytest.fixture
27 | def http_client() -> HTTPClient:
28 | return HTTPClient()
29 |
30 |
31 | @pytest.fixture(scope="module")
32 | def provider() -> Iterator[Provider]:
33 | provider = Provider()
34 | yield provider
35 | provider.stop()
36 |
37 |
38 | @pytest.fixture(scope="module")
39 | def provider_subclass() -> Iterator[Provider]:
40 | provider = ProviderSubclass().start()
41 | yield provider
42 | provider.stop()
43 |
44 |
45 | def test_content_resource(provider: Provider, http_client: HTTPClient) -> None:
46 | content = "testing content resource"
47 | resource = provider.create(content=content, extension="txt")
48 | assert isinstance(resource, Resource)
49 | assert resource.url.endswith("txt")
50 | assert http_client.fetch(resource.url).body.decode() == content
51 |
52 |
53 | def test_content_default_url(provider: Provider) -> None:
54 | content = "testing default url"
55 | resource1 = provider.create(content=content, extension="txt")
56 | resource2 = provider.create(content=content, extension="txt")
57 | path = resource1.url.split("/")[-1]
58 | assert path.endswith(".txt")
59 | assert len(path) > 4
60 | assert resource1.url == resource2.url
61 |
62 |
63 | @pytest.mark.parametrize("route", ["/content", "hello_world.txt", ""])
64 | def test_content_route(provider: Provider, http_client: HTTPClient, route: str) -> None:
65 | content = f"testing route {route!r}"
66 | resource = provider.create(content=content, route=route)
67 | assert resource.url.split("/")[-1] == route.lstrip("/")
68 | assert http_client.fetch(resource.url).body == content.encode()
69 |
70 |
71 | def test_handler_resource(provider: Provider, http_client: HTTPClient) -> None:
72 | class Handler:
73 | def __init__(self) -> None:
74 | self.count = 0
75 |
76 | def __call__(self) -> str:
77 | self.count += 1
78 | return f"Testing handler resource {self.count}\n"
79 |
80 | resource = provider.create(handler=Handler(), extension="txt")
81 | assert isinstance(resource, Resource)
82 | for i in range(1, 3):
83 | assert (
84 | http_client.fetch(resource.url).body.decode()
85 | == f"Testing handler resource {i}\n"
86 | )
87 |
88 |
89 | def test_file_resource(provider: Provider, http_client: HTTPClient) -> None:
90 | content = b"file content"
91 | with tempfile.NamedTemporaryFile(suffix=".txt") as f:
92 | f.write(content)
93 | f.flush()
94 |
95 | resource = provider.create(filepath=f.name)
96 | assert isinstance(resource, Resource)
97 | assert http_client.fetch(resource.url).body == content
98 |
99 |
100 | def test_provider_subclass(
101 | provider_subclass: Provider, http_client: HTTPClient
102 | ) -> None:
103 | url = provider_subclass.url
104 | content = http_client.fetch(url).body
105 | assert content == RootHandler.content
106 |
107 |
108 | def test_expected_404(provider: Provider, http_client: HTTPClient) -> None:
109 | resource = provider.create(content="some new content")
110 | url = resource.url + ".html"
111 | with pytest.raises(HTTPClientError) as err:
112 | http_client.fetch(url)
113 | assert err.value.code == 404
114 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Altair data server
2 |
3 | **This repo is no longer maintained.**
4 | In Altair 5,
5 | the recommended way to work with large datasets is via Vegafusion.
6 | The documentation contains a section of [how to use the Vegafusion renderer in Altair](https://altair-viz.github.io/user_guide/large_datasets.html#vegafusion-data-transformer)
7 | and if you need additional performance you can [use the Vegafusion widget renderer](https://vegafusion.io/widget_renderer.html) instead.
8 |
9 | ---
10 |
11 | [](https://travis-ci.org/altair-viz/altair_data_server)
12 | [](https://github.com/altair-viz/altair_data_server/actions?query=workflow%3Abuild)
13 | [](https://github.com/altair-viz/altair_data_server/actions?query=workflow%3Alint)
14 | [](https://github.com/psf/black)
15 | [](https://mybinder.org/v2/gh/altair-viz/altair_data_server/master?urlpath=lab/tree/AltairDataServer.ipynb)
16 | [](https://colab.research.google.com/github/altair-viz/altair_data_server/blob/master/AltairDataServer.ipynb)
17 |
18 |
19 | This is a data transformer plugin for [Altair](http://altair-viz.github.io)
20 | that transparently serves data for Altair charts via a background WSGI server.
21 |
22 | Note that charts will only render as long as your Python session is active.
23 |
24 | The data server is a good option when you'll be **generating multiple charts as
25 | part of an exploration of data**.
26 |
27 | ## Usage
28 |
29 | First install the package and its dependencies:
30 |
31 | ```
32 | $ pip install altair_data_server
33 | ```
34 |
35 | Next import altair and enable the data server:
36 |
37 | ```python
38 | import altair as alt
39 | alt.data_transformers.enable('data_server')
40 | ```
41 | Now when you create an Altair chart, the data will be served in the background
42 | rather than embedded in the chart specification.
43 |
44 | Once you are finished with exploration and want to generate charts that
45 | will have their data fully embedded in the notebook, you can restore the
46 | default data transformer:
47 |
48 | ```python
49 | alt.data_transformers.enable('default')
50 | ```
51 |
52 | and carry on from there.
53 |
54 | ## Remote Systems
55 | Remotely-hosted notebooks (like JupyterHub or Binder) usually do not allow the end
56 | user to access arbitrary ports. To enable users to work on that setup, make sure
57 | [jupyter-server-proxy](https://github.com/jupyterhub/jupyter-server-proxy) is
58 | installed on the jupyter server, and use the proxied data server transformer:
59 |
60 | ```python
61 | alt.data_transformers.enable('data_server_proxied')
62 | ```
63 |
64 | The `urlpath` parameter allows you to override the prefix of the proxy URL. By
65 | default, it's set to `..`, which is currently the only way to make it work for
66 | arbitrary users when running inside the classic notebook on Binder. If you
67 | intend your notebooks to be run on Binder but inside JupyterLab, change it to
68 | `.` instead, which will work provided JupyterLab is in the [default
69 | workspace](https://jupyterlab.readthedocs.io/en/stable/user/urls.html#managing-workspaces-ui).
70 |
71 | ```python
72 | # for notebooks intended for JupyterLab on Binder
73 | alt.data_transformers.enable('data_server_proxied', urlpath='.')
74 | ```
75 |
76 | On a custom JupyterHub instance, a much more robust option is to take advantage
77 | of JupyterHub's [`/user-redirect`](https://jupyterhub.readthedocs.io/en/stable/reference/urls.html#user-redirect)
78 | feature (which is not available on Binder):
79 |
80 | ```python
81 | # this will work for any JupyterHub user, whether they're using the classic
82 | # notebook, JupyterLab in the default workspace, or JupyterLab in a named
83 | # workspace
84 | alt.data_transformers.enable('data_server_proxied', urlpath='/user-redirect')
85 | ```
86 |
87 | If your JupyterHub lives somewhere else than at your server's root, add the
88 | appropriate prefix to `urlpath`.
89 |
90 | ## Example
91 |
92 | [](https://mybinder.org/v2/gh/altair-viz/altair_data_server/master?urlpath=lab/tree/AltairDataServer.ipynb)
93 | [](https://colab.research.google.com/github/altair-viz/altair_data_server/blob/master/AltairDataServer.ipynb)
94 |
95 | You can see this in action, as well as read some of the motivation for this
96 | plugin, in the example notebook: [AltairDataServer.ipynb](AltairDataServer.ipynb).
97 | Click the Binder or Colab links above to try it out in your browser.
98 |
99 | ## Known Issues
100 |
101 | Because [jupyter-server-proxy](https://github.com/jupyterhub/jupyter-server-proxy)
102 | requires at least Python 3.5, the methods described in
103 | [Remote Systems](#remote-systems) do not work do not work for older versions of Python.
104 |
--------------------------------------------------------------------------------
/altair_data_server/_background_server.py:
--------------------------------------------------------------------------------
1 | # Note: the code in this file is adapted from source at
2 | # https://github.com/googlecolab/colabtools/blob/master/google/colab/html/_background_server.py
3 | # The following is its original license:
4 |
5 | # Copyright 2018 Google LLC
6 | #
7 | # Licensed under the Apache License, Version 2.0 (the "License");
8 | # you may not use this file except in compliance with the License.
9 | # You may obtain a copy of the License at
10 | #
11 | # http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing, software
14 | # distributed under the License is distributed on an "AS IS" BASIS,
15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 | # See the License for the specific language governing permissions and
17 | # limitations under the License.
18 | """WSGI server utilities to run in thread. WSGI chosen for easier interop."""
19 |
20 | import threading
21 |
22 | import portpicker
23 | import tornado
24 | import tornado.web
25 | import tornado.ioloop
26 | import tornado.httpserver
27 | from typing import Callable, Optional, Tuple, TypeVar
28 |
29 |
30 | def _build_server(
31 | started: threading.Event,
32 | stopped: threading.Event,
33 | ioloop: tornado.ioloop.IOLoop,
34 | app: tornado.web.Application,
35 | port: int,
36 | timeout: int,
37 | ) -> Tuple[tornado.httpserver.HTTPServer, Callable[[], None]]:
38 | """Closure to build the server function to be passed to the thread.
39 |
40 | Args:
41 | started: Threading event to notify when started.
42 | ioloop: IOLoop
43 | port: Port number to serve on.
44 | timeout: Http timeout in seconds.
45 | app: tornado application to serve.
46 | Returns:
47 | A function that takes a port and WSGI app and notifies
48 | about its status via the threading events provided.
49 | """
50 |
51 |
52 | T = TypeVar("T", bound="_BackgroundServer")
53 |
54 |
55 | class _BackgroundServer:
56 | """Tornado server running in a background thread."""
57 |
58 | _app: tornado.web.Application
59 | _port: Optional[int]
60 | _server_thread: Optional[threading.Thread]
61 | _ioloop: Optional[tornado.ioloop.IOLoop]
62 | _server: Optional[tornado.httpserver.HTTPServer]
63 |
64 | def __init__(self: T, app: tornado.web.Application) -> None:
65 | """Initialize the BackgroundServer.
66 |
67 | Parameters
68 | ----------
69 | app: tornado.web.Application
70 | application to run in the background thread.
71 | """
72 | self._app = app
73 | self._port = None
74 | self._server_thread = None
75 | self._ioloop = None
76 | self._server = None
77 |
78 | @property
79 | def app(self: T) -> tornado.web.Application:
80 | """Returns the app instance."""
81 | return self._app
82 |
83 | @property
84 | def port(self: T) -> int:
85 | """Returns the current port or error if the server is not started.
86 |
87 | Returns
88 | -------
89 | port: int
90 | The port being used by the server.
91 |
92 | Raises
93 | ------
94 | RuntimeError: If server has not been started yet.
95 | """
96 | if self._server_thread is None or self._port is None:
97 | raise RuntimeError("Server not running.")
98 | return self._port
99 |
100 | def stop(self: T) -> T:
101 | """Stops the server thread.
102 |
103 | If server thread is already stopped, this is a no-op.
104 |
105 | Returns
106 | -------
107 | self :
108 | Returns self for chaining.
109 | """
110 | if self._server_thread is None:
111 | return self
112 | assert self._ioloop is not None
113 | assert self._server is not None
114 |
115 | def shutdown() -> None:
116 | if self._server is not None:
117 | self._server.stop()
118 | if self._ioloop is not None:
119 | self._ioloop.stop()
120 |
121 | try:
122 | self._ioloop.add_callback(shutdown)
123 | self._server_thread.join()
124 | self._ioloop.close(all_fds=True)
125 | finally:
126 | self._server_thread = None
127 | self._ioloop = None
128 | self._server = None
129 |
130 | return self
131 |
132 | def start(
133 | self: T, port: Optional[int] = None, timeout: int = 1, daemon: bool = True
134 | ) -> T:
135 | """Starts a server in a thread using the provided WSGI application.
136 |
137 | Will wait until the thread has started to return.
138 |
139 | Parameters
140 | ----------
141 | port: int
142 | Number of the port to use for the application, will find an open
143 | port if one is not provided.
144 | timeout: int
145 | HTTP timeout in seconds. Default = 1.
146 | daemon: bool
147 | If True (default) use a daemon thread that will automatically terminate when
148 | the main process terminates.
149 |
150 | Returns
151 | -------
152 | self :
153 | Returns self for chaining.
154 | """
155 | if self._server_thread is not None:
156 | return self
157 |
158 | self._port = port
159 |
160 | if self._port is None:
161 | self._port = portpicker.pick_unused_port()
162 |
163 | self._ioloop = tornado.ioloop.IOLoop()
164 | self._server = tornado.httpserver.HTTPServer(
165 | self._app, idle_connection_timeout=timeout, body_timeout=timeout
166 | )
167 |
168 | def start_server(
169 | ioloop: tornado.ioloop.IOLoop,
170 | httpd: tornado.httpserver.HTTPServer,
171 | port: int,
172 | ) -> None:
173 | ioloop.make_current()
174 | httpd.listen(port=port)
175 | ioloop.start()
176 |
177 | self._server_thread = threading.Thread(
178 | target=start_server,
179 | daemon=daemon,
180 | kwargs={"ioloop": self._ioloop, "httpd": self._server, "port": self._port},
181 | )
182 |
183 | started = threading.Event()
184 | self._ioloop.add_callback(started.set)
185 | self._server_thread.start()
186 | started.wait()
187 |
188 | return self
189 |
--------------------------------------------------------------------------------
/altair_data_server/_provide.py:
--------------------------------------------------------------------------------
1 | # Note: the code in this file is adapted from source at
2 | # https://github.com/googlecolab/colabtools/blob/master/google/colab/html/_background_server.py
3 | # The following is its original license:
4 |
5 | # Copyright 2018 Google LLC
6 | #
7 | # Licensed under the Apache License, Version 2.0 (the "License");
8 | # you may not use this file except in compliance with the License.
9 | # You may obtain a copy of the License at
10 | #
11 | # http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing, software
14 | # distributed under the License is distributed on an "AS IS" BASIS,
15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 | # See the License for the specific language governing permissions and
17 | # limitations under the License.
18 | """Helper to provide resources via the colab service worker."""
19 |
20 | import abc
21 | import collections
22 | import hashlib
23 | import mimetypes
24 | from typing import Callable, Dict, MutableMapping, Optional
25 | import uuid
26 | import weakref
27 |
28 | import tornado.web
29 | import tornado.wsgi
30 |
31 | from altair_data_server._background_server import _BackgroundServer
32 |
33 |
34 | class Resource(metaclass=abc.ABCMeta):
35 | """Abstract resource class to handle content to colab."""
36 |
37 | def __init__(
38 | self,
39 | provider: "Provider",
40 | headers: Dict[str, str],
41 | extension: Optional[str] = None,
42 | route: Optional[str] = None,
43 | ):
44 | if not isinstance(headers, collections.abc.Mapping):
45 | raise ValueError("headers must be a dict")
46 | if route is not None and extension is not None:
47 | raise ValueError("Should only provide one of route or extension.")
48 | self.headers = headers
49 | if route is None:
50 | route = str(uuid.uuid4())
51 | if extension:
52 | route += "." + extension
53 | self._guid = route.lstrip("/")
54 | self._provider = provider
55 |
56 | @abc.abstractmethod
57 | def get(self, handler: tornado.web.RequestHandler) -> None:
58 | """Gets the resource using the tornado handler passed in.
59 |
60 | Args:
61 | handler: Tornado handler to be used.
62 | """
63 | for key, value in self.headers.items():
64 | handler.set_header(key, value)
65 |
66 | @property
67 | def guid(self) -> str:
68 | """Unique id used to serve and reference the resource."""
69 | return self._guid
70 |
71 | @property
72 | def url(self) -> str:
73 | """Url to fetch the resource at."""
74 | return f"{self._provider.url}/{self._guid}"
75 |
76 |
77 | class _ContentResource(Resource):
78 | """Content Resource"""
79 |
80 | def __init__(
81 | self,
82 | content: str,
83 | provider: "Provider",
84 | headers: Dict[str, str],
85 | extension: Optional[str] = None,
86 | route: Optional[str] = None,
87 | ):
88 | self.content = content
89 | if route is None:
90 | route = hashlib.md5(self.content.encode()).hexdigest()
91 | if extension is not None:
92 | route += "." + extension
93 | extension = None
94 | super().__init__(
95 | provider=provider, headers=headers, extension=extension, route=route
96 | )
97 |
98 | def get(self, handler: tornado.web.RequestHandler) -> None:
99 | super().get(handler)
100 | handler.write(self.content)
101 |
102 |
103 | class _FileResource(Resource):
104 | """File Resource"""
105 |
106 | def __init__(
107 | self,
108 | filepath: str,
109 | provider: "Provider",
110 | headers: Dict[str, str],
111 | extension: Optional[str] = None,
112 | route: Optional[str] = None,
113 | ):
114 | self.filepath = filepath
115 | super().__init__(
116 | provider=provider, headers=headers, extension=extension, route=route
117 | )
118 |
119 | def get(self, handler: tornado.web.RequestHandler) -> None:
120 | super().get(handler)
121 | with open(self.filepath) as f:
122 | data = f.read()
123 | handler.write(data)
124 |
125 |
126 | class _HandlerResource(Resource):
127 | """Handler Resource"""
128 |
129 | def __init__(
130 | self,
131 | func: Callable[[], str],
132 | provider: "Provider",
133 | headers: Dict[str, str],
134 | extension: Optional[str] = None,
135 | route: Optional[str] = None,
136 | ):
137 | self.func = func
138 | super().__init__(
139 | provider=provider, headers=headers, extension=extension, route=route
140 | )
141 |
142 | def get(self, handler: tornado.web.RequestHandler) -> None:
143 | super().get(handler)
144 | content = self.func()
145 | handler.write(content)
146 |
147 |
148 | class ResourceHandler(tornado.web.RequestHandler):
149 | """Serves the `Resource` objects."""
150 |
151 | def initialize(self, resources: Dict[str, Resource]) -> None:
152 | self.resources = resources
153 |
154 | def get(self) -> None:
155 | path = self.request.path
156 | resource = self.resources.get(path.lstrip("/"))
157 | if not resource:
158 | raise tornado.web.HTTPError(404)
159 | content_type, _ = mimetypes.guess_type(path)
160 | if content_type:
161 | self.set_header("Content-Type", content_type)
162 | resource.get(self)
163 |
164 |
165 | class Provider(_BackgroundServer):
166 | """Background server which can provide a set of resources."""
167 |
168 | _resources: MutableMapping[str, Resource]
169 |
170 | def __init__(self) -> None:
171 | """Initialize the server with a ResourceHandler script."""
172 | self._resources = weakref.WeakValueDictionary()
173 | app = tornado.web.Application(self._handlers())
174 | super().__init__(app)
175 |
176 | def _handlers(self) -> list:
177 | return [(r".*", ResourceHandler, dict(resources=self._resources))]
178 |
179 | @property
180 | def url(self) -> str:
181 | return f"http://localhost:{self.port}"
182 |
183 | def create(
184 | self,
185 | content: str = "",
186 | filepath: str = "",
187 | handler: Optional[Callable[[], str]] = None,
188 | headers: Optional[Dict[str, str]] = None,
189 | extension: Optional[str] = None,
190 | route: Optional[str] = None,
191 | ) -> Resource:
192 | """Creates and provides a new resource to be served.
193 |
194 | Can only provide one of content, path, or handler.
195 |
196 | Args:
197 | content: The string or byte content to return.
198 | filepath: The filepath to a file whose contents should be returned.
199 | handler: A function which will be executed and returned on each request.
200 | resource: A custom resource instance.
201 | headers: A dict of header values to return.
202 | extension: Optional extension to add to the url.
203 | route: Optional route to serve on.
204 | Returns:
205 | The the `Resource` object which will be served and will provide its url.
206 | Raises:
207 | ValueError: If you don't provide one of content, filepath, or handler.
208 | """
209 | sources = sum(map(bool, (content, filepath, handler)))
210 | if sources != 1:
211 | raise ValueError(
212 | "Must provide exactly one of content, filepath, or handler"
213 | )
214 |
215 | headers = headers or {}
216 | resource: Resource
217 |
218 | if content:
219 | resource = _ContentResource(
220 | content,
221 | headers=headers,
222 | extension=extension,
223 | provider=self,
224 | route=route,
225 | )
226 | elif filepath:
227 | resource = _FileResource(
228 | filepath,
229 | headers=headers,
230 | extension=extension,
231 | provider=self,
232 | route=route,
233 | )
234 | elif handler:
235 | resource = _HandlerResource(
236 | handler,
237 | headers=headers,
238 | extension=extension,
239 | provider=self,
240 | route=route,
241 | )
242 | else:
243 | raise ValueError("Must provide one of content, filepath, or handler.")
244 |
245 | self._resources[resource.guid] = resource
246 | self.start()
247 | return resource
248 |
--------------------------------------------------------------------------------
/AltairDataServer.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {
6 | "colab_type": "text",
7 | "id": "zLWJIthRefRt"
8 | },
9 | "source": [
10 | "# Altair Data Server\n",
11 | "\n",
12 | "This notebook shows an example of using the [Altair data server](https://github.com/altair-viz/altair_data_server), a lightweight plugin for [Altair](http://altair-viz.github.io) that lets you efficiently and transparently work with larger datasets.\n",
13 | "\n",
14 | "Altair data server can be installed with pip:"
15 | ]
16 | },
17 | {
18 | "cell_type": "code",
19 | "execution_count": 1,
20 | "metadata": {
21 | "colab": {
22 | "base_uri": "https://localhost:8080/",
23 | "height": 394
24 | },
25 | "colab_type": "code",
26 | "id": "A1uXsLU5fJnq",
27 | "outputId": "a447c3f8-48f0-4be8-daf1-c4d66ea00208"
28 | },
29 | "outputs": [],
30 | "source": [
31 | "!pip install altair_data_server"
32 | ]
33 | },
34 | {
35 | "cell_type": "markdown",
36 | "metadata": {
37 | "colab_type": "text",
38 | "id": "JZQsUiLgfNwM"
39 | },
40 | "source": [
41 | "## Motivation\n",
42 | "\n",
43 | "Altair charts are built on [vega-lite](http://vega.github.io/vega-lite), a visualization grammar that encodes charts in JSON before rendering them in your browser with Javascript.\n",
44 | "\n",
45 | "For example, consider the following chart:"
46 | ]
47 | },
48 | {
49 | "cell_type": "code",
50 | "execution_count": 2,
51 | "metadata": {
52 | "colab": {
53 | "base_uri": "https://localhost:8080/",
54 | "height": 171
55 | },
56 | "colab_type": "code",
57 | "id": "z4hH7BTsjuDX",
58 | "outputId": "701f7198-378f-4983-fef3-4e1658b72853"
59 | },
60 | "outputs": [
61 | {
62 | "data": {
63 | "application/vnd.vegalite.v2+json": {
64 | "$schema": "https://vega.github.io/schema/vega-lite/v2.6.0.json",
65 | "config": {
66 | "view": {
67 | "height": 300,
68 | "width": 400
69 | }
70 | },
71 | "data": {
72 | "name": "data-ce24079b8164f6736574dbd5cd2f7d05"
73 | },
74 | "datasets": {
75 | "data-ce24079b8164f6736574dbd5cd2f7d05": [
76 | {
77 | "category": "A",
78 | "value": 2
79 | },
80 | {
81 | "category": "B",
82 | "value": 6
83 | },
84 | {
85 | "category": "C",
86 | "value": 4
87 | },
88 | {
89 | "category": "D",
90 | "value": 7
91 | },
92 | {
93 | "category": "E",
94 | "value": 6
95 | }
96 | ]
97 | },
98 | "encoding": {
99 | "x": {
100 | "field": "value",
101 | "type": "quantitative"
102 | },
103 | "y": {
104 | "field": "category",
105 | "type": "nominal"
106 | }
107 | },
108 | "mark": "bar"
109 | },
110 | "image/png": "",
111 | "text/plain": [
112 | "\n",
113 | "\n",
114 | "If you see this message, it means the renderer has not been properly enabled\n",
115 | "for the frontend that you are using. For more information, see\n",
116 | "https://altair-viz.github.io/user_guide/troubleshooting.html\n"
117 | ]
118 | },
119 | "execution_count": 2,
120 | "metadata": {},
121 | "output_type": "execute_result"
122 | }
123 | ],
124 | "source": [
125 | "import pandas as pd\n",
126 | "import numpy as np\n",
127 | "import altair as alt\n",
128 | "\n",
129 | "data = pd.DataFrame({\n",
130 | " 'value': [2, 6, 4, 7, 6],\n",
131 | " 'category': list('ABCDE'),\n",
132 | "})\n",
133 | "\n",
134 | "chart = alt.Chart(data).mark_bar().encode(\n",
135 | " x='value',\n",
136 | " y='category'\n",
137 | ")\n",
138 | "\n",
139 | "chart"
140 | ]
141 | },
142 | {
143 | "cell_type": "markdown",
144 | "metadata": {
145 | "colab_type": "text",
146 | "id": "jJ5TXmMpkIuu"
147 | },
148 | "source": [
149 | "The chart itself, including the data, is encoded to a JSON specification that you can inspect:"
150 | ]
151 | },
152 | {
153 | "cell_type": "code",
154 | "execution_count": 3,
155 | "metadata": {
156 | "colab": {
157 | "base_uri": "https://localhost:8080/",
158 | "height": 816
159 | },
160 | "colab_type": "code",
161 | "id": "VcslPA_AkX8Y",
162 | "outputId": "fe9da48b-49fa-421f-8f5a-6966560b8afa"
163 | },
164 | "outputs": [
165 | {
166 | "name": "stdout",
167 | "output_type": "stream",
168 | "text": [
169 | "{\n",
170 | " \"$schema\": \"https://vega.github.io/schema/vega-lite/v2.6.0.json\",\n",
171 | " \"config\": {\n",
172 | " \"view\": {\n",
173 | " \"height\": 300,\n",
174 | " \"width\": 400\n",
175 | " }\n",
176 | " },\n",
177 | " \"data\": {\n",
178 | " \"name\": \"data-ce24079b8164f6736574dbd5cd2f7d05\"\n",
179 | " },\n",
180 | " \"datasets\": {\n",
181 | " \"data-ce24079b8164f6736574dbd5cd2f7d05\": [\n",
182 | " {\n",
183 | " \"category\": \"A\",\n",
184 | " \"value\": 2\n",
185 | " },\n",
186 | " {\n",
187 | " \"category\": \"B\",\n",
188 | " \"value\": 6\n",
189 | " },\n",
190 | " {\n",
191 | " \"category\": \"C\",\n",
192 | " \"value\": 4\n",
193 | " },\n",
194 | " {\n",
195 | " \"category\": \"D\",\n",
196 | " \"value\": 7\n",
197 | " },\n",
198 | " {\n",
199 | " \"category\": \"E\",\n",
200 | " \"value\": 6\n",
201 | " }\n",
202 | " ]\n",
203 | " },\n",
204 | " \"encoding\": {\n",
205 | " \"x\": {\n",
206 | " \"field\": \"value\",\n",
207 | " \"type\": \"quantitative\"\n",
208 | " },\n",
209 | " \"y\": {\n",
210 | " \"field\": \"category\",\n",
211 | " \"type\": \"nominal\"\n",
212 | " }\n",
213 | " },\n",
214 | " \"mark\": \"bar\"\n",
215 | "}\n"
216 | ]
217 | }
218 | ],
219 | "source": [
220 | "print(chart.to_json())"
221 | ]
222 | },
223 | {
224 | "cell_type": "markdown",
225 | "metadata": {
226 | "colab_type": "text",
227 | "id": "1Rd9e1jHkZBm"
228 | },
229 | "source": [
230 | "Notice that the data is encoded in the chart specification itself: this is very convenient because it results in a single, well-defined specification that contains **everything** required to recreate the chart.\n",
231 | "\n",
232 | "However, this leads to issues for larger datasets. For example:"
233 | ]
234 | },
235 | {
236 | "cell_type": "code",
237 | "execution_count": 4,
238 | "metadata": {
239 | "colab": {
240 | "base_uri": "https://localhost:8080/",
241 | "height": 34
242 | },
243 | "colab_type": "code",
244 | "id": "QmYK5_8xlDNW",
245 | "outputId": "b5f64ac9-8b81-4a08-fcfc-84cf0ad7c193"
246 | },
247 | "outputs": [
248 | {
249 | "name": "stdout",
250 | "output_type": "stream",
251 | "text": [
252 | "Size of chart spec: 500.7 KB\n"
253 | ]
254 | }
255 | ],
256 | "source": [
257 | "df = pd.DataFrame({\n",
258 | " 'timepoint': np.arange(5000),\n",
259 | " 'value': np.random.randn(5000),\n",
260 | " 'label': np.random.choice(list('ABCDE'), 5000)\n",
261 | "})\n",
262 | "\n",
263 | "chart = alt.Chart(df).mark_line().encode(\n",
264 | " x='timepoint',\n",
265 | " y='value',\n",
266 | " color='label'\n",
267 | ")\n",
268 | "\n",
269 | "\n",
270 | "def print_size_of(chart):\n",
271 | " spec = chart.to_json()\n",
272 | " print(f\"Size of chart spec: {len(spec) / 1024:.1f} KB\")\n",
273 | " \n",
274 | "print_size_of(chart)"
275 | ]
276 | },
277 | {
278 | "cell_type": "markdown",
279 | "metadata": {
280 | "colab_type": "text",
281 | "id": "anh5j_S0mZoo"
282 | },
283 | "source": [
284 | "If we had rendered this chart, it would have resulted in about half a megabyte of JSON text being embedded into the notebook. If your notebook contains many charts, this can quickly lead to large and unwieldy notebooks, and in the worst cases to crashing the browser.\n",
285 | "\n",
286 | "For this reason, Altair builds in a protection that prevents you from embedding extremely large data. Here's what happens when you use a dataset with a large number of rows:"
287 | ]
288 | },
289 | {
290 | "cell_type": "code",
291 | "execution_count": 5,
292 | "metadata": {
293 | "colab": {
294 | "base_uri": "https://localhost:8080/",
295 | "height": 2360
296 | },
297 | "colab_type": "code",
298 | "id": "dMa55WLmn2m2",
299 | "outputId": "35d3c60b-ad3d-4abb-c913-68071687ee66"
300 | },
301 | "outputs": [
302 | {
303 | "ename": "MaxRowsError",
304 | "evalue": "The number of rows in your dataset is greater than the maximum allowed (5000). For information on how to plot larger datasets in Altair, see the documentation",
305 | "output_type": "error",
306 | "traceback": [
307 | "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
308 | "\u001b[0;31mMaxRowsError\u001b[0m Traceback (most recent call last)",
309 | "\u001b[0;32m~/anaconda/lib/python3.6/site-packages/altair/vegalite/v2/api.py\u001b[0m in \u001b[0;36mto_dict\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 374\u001b[0m \u001b[0mcopy\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcopy\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 375\u001b[0m \u001b[0moriginal_data\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mgetattr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcopy\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'data'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mUndefined\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 376\u001b[0;31m \u001b[0mcopy\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdata\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_prepare_data\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0moriginal_data\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcontext\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 377\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 378\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0moriginal_data\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mUndefined\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
310 | "\u001b[0;32m~/anaconda/lib/python3.6/site-packages/altair/vegalite/v2/api.py\u001b[0m in \u001b[0;36m_prepare_data\u001b[0;34m(data, context)\u001b[0m\n\u001b[1;32m 62\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mdata\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 63\u001b[0m \u001b[0;32melif\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mpd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mDataFrame\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 64\u001b[0;31m \u001b[0mdata\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpipe\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdata_transformers\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 65\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mdata_transformers\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mconsolidate_datasets\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdict\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0;34m'values'\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mdata\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 66\u001b[0m \u001b[0mname\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_dataset_name\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
311 | "\u001b[0;32m~/anaconda/lib/python3.6/site-packages/toolz/functoolz.py\u001b[0m in \u001b[0;36mpipe\u001b[0;34m(data, *funcs)\u001b[0m\n\u001b[1;32m 550\u001b[0m \"\"\"\n\u001b[1;32m 551\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mfunc\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mfuncs\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 552\u001b[0;31m \u001b[0mdata\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mfunc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 553\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mdata\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 554\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
312 | "\u001b[0;32m~/anaconda/lib/python3.6/site-packages/toolz/functoolz.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 281\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m__call__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 282\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 283\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_partial\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 284\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mTypeError\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mexc\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 285\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_should_curry\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkwargs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mexc\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
313 | "\u001b[0;32m~/anaconda/lib/python3.6/site-packages/altair/vegalite/data.py\u001b[0m in \u001b[0;36mdefault_data_transformer\u001b[0;34m(data, max_rows)\u001b[0m\n\u001b[1;32m 10\u001b[0m \u001b[0;34m@\u001b[0m\u001b[0mcurry\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 11\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mdefault_data_transformer\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmax_rows\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m5000\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 12\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mpipe\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlimit_rows\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmax_rows\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mmax_rows\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mto_values\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 13\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 14\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
314 | "\u001b[0;32m~/anaconda/lib/python3.6/site-packages/toolz/functoolz.py\u001b[0m in \u001b[0;36mpipe\u001b[0;34m(data, *funcs)\u001b[0m\n\u001b[1;32m 550\u001b[0m \"\"\"\n\u001b[1;32m 551\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mfunc\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mfuncs\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 552\u001b[0;31m \u001b[0mdata\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mfunc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 553\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mdata\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 554\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
315 | "\u001b[0;32m~/anaconda/lib/python3.6/site-packages/toolz/functoolz.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 281\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m__call__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 282\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 283\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_partial\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 284\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mTypeError\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mexc\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 285\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_should_curry\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkwargs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mexc\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
316 | "\u001b[0;32m~/anaconda/lib/python3.6/site-packages/altair/utils/data.py\u001b[0m in \u001b[0;36mlimit_rows\u001b[0;34m(data, max_rows)\u001b[0m\n\u001b[1;32m 70\u001b[0m \u001b[0;34m'than the maximum allowed ({0}). '\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 71\u001b[0m \u001b[0;34m'For information on how to plot larger datasets '\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 72\u001b[0;31m 'in Altair, see the documentation'.format(max_rows))\n\u001b[0m\u001b[1;32m 73\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mdata\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 74\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
317 | "\u001b[0;31mMaxRowsError\u001b[0m: The number of rows in your dataset is greater than the maximum allowed (5000). For information on how to plot larger datasets in Altair, see the documentation"
318 | ]
319 | },
320 | {
321 | "data": {
322 | "text/plain": [
323 | "Chart({\n",
324 | " data: x y\n",
325 | " 0 0 -0.187207\n",
326 | " 1 1 0.236761\n",
327 | " 2 2 0.204160\n",
328 | " 3 3 1.510038\n",
329 | " 4 4 0.855619\n",
330 | " 5 5 -1.630006\n",
331 | " 6 6 -1.017096\n",
332 | " 7 7 -1.100416\n",
333 | " 8 8 -2.332064\n",
334 | " 9 9 0.979970\n",
335 | " 10 10 1.274392\n",
336 | " 11 11 -0.500785\n",
337 | " 12 12 -0.567306\n",
338 | " 13 13 -1.946447\n",
339 | " 14 14 -0.984827\n",
340 | " 15 15 -0.045963\n",
341 | " 16 16 2.503872\n",
342 | " 17 17 0.481599\n",
343 | " 18 18 0.872812\n",
344 | " 19 19 -0.280674\n",
345 | " 20 20 0.587096\n",
346 | " 21 21 0.345636\n",
347 | " 22 22 0.226660\n",
348 | " 23 23 -0.103801\n",
349 | " 24 24 -1.070477\n",
350 | " 25 25 -0.391096\n",
351 | " 26 26 1.324717\n",
352 | " 27 27 -2.045824\n",
353 | " 28 28 -0.858693\n",
354 | " 29 29 -0.024313\n",
355 | " ... ... ...\n",
356 | " 49970 49970 1.778256\n",
357 | " 49971 49971 0.334451\n",
358 | " 49972 49972 -0.788223\n",
359 | " 49973 49973 -1.012857\n",
360 | " 49974 49974 -0.019542\n",
361 | " 49975 49975 -1.143520\n",
362 | " 49976 49976 1.499920\n",
363 | " 49977 49977 -1.715173\n",
364 | " 49978 49978 -1.476321\n",
365 | " 49979 49979 1.200555\n",
366 | " 49980 49980 -0.738952\n",
367 | " 49981 49981 -0.228170\n",
368 | " 49982 49982 -0.456527\n",
369 | " 49983 49983 1.236492\n",
370 | " 49984 49984 0.503881\n",
371 | " 49985 49985 -0.342733\n",
372 | " 49986 49986 -0.164370\n",
373 | " 49987 49987 0.217805\n",
374 | " 49988 49988 1.980753\n",
375 | " 49989 49989 1.574927\n",
376 | " 49990 49990 1.191852\n",
377 | " 49991 49991 -0.462079\n",
378 | " 49992 49992 1.255304\n",
379 | " 49993 49993 0.671050\n",
380 | " 49994 49994 1.563930\n",
381 | " 49995 49995 1.986160\n",
382 | " 49996 49996 -0.630699\n",
383 | " 49997 49997 0.455879\n",
384 | " 49998 49998 -2.018491\n",
385 | " 49999 49999 1.606584\n",
386 | " \n",
387 | " [50000 rows x 2 columns],\n",
388 | " encoding: EncodingWithFacet({\n",
389 | " x: X({\n",
390 | " shorthand: 'x'\n",
391 | " }),\n",
392 | " y: Y({\n",
393 | " shorthand: 'y'\n",
394 | " })\n",
395 | " }),\n",
396 | " mark: 'line'\n",
397 | "})"
398 | ]
399 | },
400 | "metadata": {},
401 | "output_type": "display_data"
402 | }
403 | ],
404 | "source": [
405 | "df = pd.DataFrame({\n",
406 | " 'x': np.arange(50000),\n",
407 | " 'y': np.random.randn(50000),\n",
408 | "})\n",
409 | "\n",
410 | "big_chart = alt.Chart(df).mark_line().encode(\n",
411 | " x='x',\n",
412 | " y='y'\n",
413 | ")\n",
414 | "\n",
415 | "big_chart.display()"
416 | ]
417 | },
418 | {
419 | "cell_type": "markdown",
420 | "metadata": {
421 | "colab_type": "text",
422 | "id": "XSjoiii9w-N2"
423 | },
424 | "source": [
425 | "We can print the size of the chart by temporarily disabling the maximum rows check:"
426 | ]
427 | },
428 | {
429 | "cell_type": "code",
430 | "execution_count": 6,
431 | "metadata": {
432 | "colab": {
433 | "base_uri": "https://localhost:8080/",
434 | "height": 34
435 | },
436 | "colab_type": "code",
437 | "id": "dyGpfv-ewWmD",
438 | "outputId": "6fbca078-5529-4405-e72c-754522b8e811"
439 | },
440 | "outputs": [
441 | {
442 | "name": "stdout",
443 | "output_type": "stream",
444 | "text": [
445 | "Size of chart spec: 3389.5 KB\n"
446 | ]
447 | }
448 | ],
449 | "source": [
450 | "with alt.data_transformers.enable(max_rows=None):\n",
451 | " print_size_of(big_chart)"
452 | ]
453 | },
454 | {
455 | "cell_type": "markdown",
456 | "metadata": {
457 | "colab_type": "text",
458 | "id": "QfiXTsM6n8hp"
459 | },
460 | "source": [
461 | "Had Altair displayed this, it would have added 3MB of JSON text to the notebook, and if we created multiple charts, it would be another 3MB each time. This can quickly add-up in the context of interactive data exploration.\n",
462 | "\n",
463 | "The way to get around this is to put the data somewhere that is not in the notebook itself, but is visible to the renderer running in the notebook. Altair has some [existing approaches](https://altair-viz.github.io/user_guide/faq.html#maxrowserror-how-can-i-plot-large-datasets) that work by saving the data to disk, but this is not always desirable, and doesn't always work in cloud-based Jupyter frontends."
464 | ]
465 | },
466 | {
467 | "cell_type": "markdown",
468 | "metadata": {
469 | "colab_type": "text",
470 | "id": "lXB0KYBGoek9"
471 | },
472 | "source": [
473 | "## A Solution: Altair Data Server\n",
474 | "\n",
475 | "The [Altair data server](https://github.com/altair-viz/altair_data_server) plugin provides a nice solution to this. Rather than embedding the data in the notebook or saving the data to disk, when enabled it starts a background server, serves the data, and inserts the appropriate URL into the altair chart:"
476 | ]
477 | },
478 | {
479 | "cell_type": "code",
480 | "execution_count": 7,
481 | "metadata": {
482 | "colab": {
483 | "base_uri": "https://localhost:8080/",
484 | "height": 348
485 | },
486 | "colab_type": "code",
487 | "id": "1t8btXFXoiBu",
488 | "outputId": "f72a561f-f2a1-4371-a517-7a6546a2a915"
489 | },
490 | "outputs": [
491 | {
492 | "name": "stdout",
493 | "output_type": "stream",
494 | "text": [
495 | "Size of chart spec: 0.4 KB\n"
496 | ]
497 | },
498 | {
499 | "data": {
500 | "application/vnd.vegalite.v2+json": {
501 | "$schema": "https://vega.github.io/schema/vega-lite/v2.6.0.json",
502 | "config": {
503 | "view": {
504 | "height": 300,
505 | "width": 400
506 | }
507 | },
508 | "data": {
509 | "format": {
510 | "type": "json"
511 | },
512 | "url": "http://localhost:21953/ce767407-e3f3-4d15-a806-7b05a362ddb5.json"
513 | },
514 | "encoding": {
515 | "x": {
516 | "field": "x",
517 | "type": "quantitative"
518 | },
519 | "y": {
520 | "field": "y",
521 | "type": "quantitative"
522 | }
523 | },
524 | "mark": "line"
525 | },
526 | "image/png": "",
527 | "text/plain": [
528 | "\n",
529 | "\n",
530 | "If you see this message, it means the renderer has not been properly enabled\n",
531 | "for the frontend that you are using. For more information, see\n",
532 | "https://altair-viz.github.io/user_guide/troubleshooting.html\n"
533 | ]
534 | },
535 | "metadata": {},
536 | "output_type": "display_data"
537 | }
538 | ],
539 | "source": [
540 | "alt.data_transformers.enable('data_server')\n",
541 | "print_size_of(big_chart)\n",
542 | "big_chart.display()"
543 | ]
544 | },
545 | {
546 | "cell_type": "markdown",
547 | "metadata": {},
548 | "source": [
549 | "*Note: If you are running on a cloud-based hosted notebook like MyBinder, you will have to modify the above slightly, and instead run*\n",
550 | "```python\n",
551 | "alt.data_transformers.enable('data_server_proxied')\n",
552 | "```"
553 | ]
554 | },
555 | {
556 | "cell_type": "markdown",
557 | "metadata": {
558 | "colab_type": "text",
559 | "id": "MnmkhYaHpzsn"
560 | },
561 | "source": [
562 | "The resulting spec is only 0.4KB, which is small enough that embedding it in the notebook doesn't cause problems. It's instructive to look at the spec directly:"
563 | ]
564 | },
565 | {
566 | "cell_type": "code",
567 | "execution_count": 8,
568 | "metadata": {
569 | "colab": {
570 | "base_uri": "https://localhost:8080/",
571 | "height": 408
572 | },
573 | "colab_type": "code",
574 | "id": "OQSCXfEzo_fk",
575 | "outputId": "4124afd2-8a44-4541-ce6a-894a2300d717"
576 | },
577 | "outputs": [
578 | {
579 | "name": "stdout",
580 | "output_type": "stream",
581 | "text": [
582 | "{\n",
583 | " \"$schema\": \"https://vega.github.io/schema/vega-lite/v2.6.0.json\",\n",
584 | " \"config\": {\n",
585 | " \"view\": {\n",
586 | " \"height\": 300,\n",
587 | " \"width\": 400\n",
588 | " }\n",
589 | " },\n",
590 | " \"data\": {\n",
591 | " \"url\": \"http://localhost:21953/ce767407-e3f3-4d15-a806-7b05a362ddb5.json\"\n",
592 | " },\n",
593 | " \"encoding\": {\n",
594 | " \"x\": {\n",
595 | " \"field\": \"x\",\n",
596 | " \"type\": \"quantitative\"\n",
597 | " },\n",
598 | " \"y\": {\n",
599 | " \"field\": \"y\",\n",
600 | " \"type\": \"quantitative\"\n",
601 | " }\n",
602 | " },\n",
603 | " \"mark\": \"line\"\n",
604 | "}\n"
605 | ]
606 | }
607 | ],
608 | "source": [
609 | "print(big_chart.to_json())"
610 | ]
611 | },
612 | {
613 | "cell_type": "markdown",
614 | "metadata": {
615 | "colab_type": "text",
616 | "id": "Cc2Z_xEmuw6L"
617 | },
618 | "source": [
619 | "What the data transformer has done is to replace the embedded data with a URL reference, and to make that data available at that URL. We can see this by accessing the URL directly:"
620 | ]
621 | },
622 | {
623 | "cell_type": "code",
624 | "execution_count": 9,
625 | "metadata": {
626 | "colab": {
627 | "base_uri": "https://localhost:8080/",
628 | "height": 34
629 | },
630 | "colab_type": "code",
631 | "id": "QgAYaF0Ju4Y-",
632 | "outputId": "b9b01893-fef8-4896-cda2-42475046660e"
633 | },
634 | "outputs": [
635 | {
636 | "name": "stdout",
637 | "output_type": "stream",
638 | "text": [
639 | "http://localhost:21953/ce767407-e3f3-4d15-a806-7b05a362ddb5.json\n"
640 | ]
641 | }
642 | ],
643 | "source": [
644 | "url = big_chart.to_dict()['data']['url']\n",
645 | "print(url)"
646 | ]
647 | },
648 | {
649 | "cell_type": "markdown",
650 | "metadata": {},
651 | "source": [
652 | "We can load the data from the backend using Pandas"
653 | ]
654 | },
655 | {
656 | "cell_type": "code",
657 | "execution_count": 10,
658 | "metadata": {
659 | "colab": {
660 | "base_uri": "https://localhost:8080/",
661 | "height": 204
662 | },
663 | "colab_type": "code",
664 | "id": "glrsH04Gpmwa",
665 | "outputId": "e85cdf09-99db-4129-e360-762f44d3646e"
666 | },
667 | "outputs": [
668 | {
669 | "data": {
670 | "text/html": [
671 | "\n",
672 | "\n",
685 | "
\n",
686 | " \n",
687 | " \n",
688 | " | \n",
689 | " x | \n",
690 | " y | \n",
691 | "
\n",
692 | " \n",
693 | " \n",
694 | " \n",
695 | " | 0 | \n",
696 | " 0 | \n",
697 | " -0.187207 | \n",
698 | "
\n",
699 | " \n",
700 | " | 1 | \n",
701 | " 1 | \n",
702 | " 0.236761 | \n",
703 | "
\n",
704 | " \n",
705 | " | 2 | \n",
706 | " 2 | \n",
707 | " 0.204160 | \n",
708 | "
\n",
709 | " \n",
710 | " | 3 | \n",
711 | " 3 | \n",
712 | " 1.510038 | \n",
713 | "
\n",
714 | " \n",
715 | " | 4 | \n",
716 | " 4 | \n",
717 | " 0.855619 | \n",
718 | "
\n",
719 | " \n",
720 | "
\n",
721 | "
"
722 | ],
723 | "text/plain": [
724 | " x y\n",
725 | "0 0 -0.187207\n",
726 | "1 1 0.236761\n",
727 | "2 2 0.204160\n",
728 | "3 3 1.510038\n",
729 | "4 4 0.855619"
730 | ]
731 | },
732 | "execution_count": 10,
733 | "metadata": {},
734 | "output_type": "execute_result"
735 | }
736 | ],
737 | "source": [
738 | "if not url.startswith('http://localhost'):\n",
739 | " # Using proxied URL; reconstruct the host url\n",
740 | " *proxy, port, filename = url.split('/')\n",
741 | " url = f\"http://localhost:{port}/{filename}\"\n",
742 | " \n",
743 | "served_data = pd.read_json(url)\n",
744 | "served_data.head()"
745 | ]
746 | },
747 | {
748 | "cell_type": "markdown",
749 | "metadata": {
750 | "colab_type": "text",
751 | "id": "c1AY-0hxpyQ3"
752 | },
753 | "source": [
754 | "## When to use the data server\n",
755 | "\n",
756 | "There is one distinct disadvantage of using the data server approach: your charts will only render as long as your Python session is active.\n",
757 | "\n",
758 | "So the data server is a good option when you'll be **working interactively, generating multiple charts as part of an exploration of data**.\n",
759 | "\n",
760 | "But once you are finished with exploration and want to generate charts that will be fully embedded in the notebook, you can restore the default data transformer:\n",
761 | "```python\n",
762 | "alt.data_transformers.enable('default')\n",
763 | "```\n",
764 | "and carry on from there."
765 | ]
766 | }
767 | ],
768 | "metadata": {
769 | "colab": {
770 | "collapsed_sections": [],
771 | "name": "Altair Data Server.ipynb",
772 | "provenance": [],
773 | "version": "0.3.2"
774 | },
775 | "kernelspec": {
776 | "display_name": "Python 3",
777 | "language": "python",
778 | "name": "python3"
779 | },
780 | "language_info": {
781 | "codemirror_mode": {
782 | "name": "ipython",
783 | "version": 3
784 | },
785 | "file_extension": ".py",
786 | "mimetype": "text/x-python",
787 | "name": "python",
788 | "nbconvert_exporter": "python",
789 | "pygments_lexer": "ipython3",
790 | "version": "3.6.7"
791 | }
792 | },
793 | "nbformat": 4,
794 | "nbformat_minor": 4
795 | }
796 |
--------------------------------------------------------------------------------