├── requirements.txt ├── MANIFEST.in ├── .gitignore ├── starborn ├── __init__.py └── core.py ├── LICENSE ├── README.md └── setup.py /requirements.txt: -------------------------------------------------------------------------------- 1 | altair>=2.0 2 | pandas 3 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include README.md 2 | include LICENSE 3 | recursive-include licences * 4 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | *.sw* 3 | build/ 4 | .ipynb_checkpoints/ 5 | dist/ 6 | seaborn.egg-info/ 7 | .cache/ 8 | .coverage 9 | cover/ 10 | .idea 11 | -------------------------------------------------------------------------------- /starborn/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | starborn: statistical data visualization based on Vega and Altair 3 | 4 | Plot types available: 5 | - scatterplot 6 | - jointplot 7 | - heatmap 8 | - stripplot 9 | - pairplot 10 | - barplot 11 | - violinplot 12 | """ 13 | 14 | from .core import * 15 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2018, Python Charmers Pty Ltd, Australia 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are met: 6 | 7 | * Redistributions of source code must retain the above copyright notice, this 8 | list of conditions and the following disclaimer. 9 | 10 | * Redistributions in binary form must reproduce the above copyright notice, 11 | this list of conditions and the following disclaimer in the documentation 12 | and/or other materials provided with the distribution. 13 | 14 | * Neither the name of the project nor the names of its 15 | contributors may be used to endorse or promote products derived from 16 | this software without specific prior written permission. 17 | 18 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 22 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 24 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 25 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 26 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | starborn: statistical data visualization based on Vega and Altair 2 | ================================================================= 3 | 4 | Starborn is a Python visualization library based on Vega and Altair that aims 5 | to be API-compatible with [Seaborn](seaborn.pydata.org). Like Seaborn, it 6 | provides a high-level interface for drawing attractive statistical graphics. 7 | Thanks to the underlying libraries, it can also offer interactivity with 8 | in-browser panning and zooming. 9 | 10 | 11 | **Development status:** Alpha. 12 | 13 | 14 | Plot types supported 15 | -------------------- 16 | 17 | - scatterplot 18 | - jointplot 19 | - heatmap 20 | - stripplot 21 | - pairplot 22 | - barplot 23 | - violinplot 24 | 25 | 26 | Documentation 27 | ------------- 28 | 29 | In progress. 30 | 31 | Examples are in [this notebook](https://github.com/PythonCharmers/starborn/blob/master/doc/starborn_examples.ipynb). 32 | 33 | 34 | Dependencies 35 | ------------ 36 | 37 | Starborn supports Python 2.7 and 3.4+. 38 | 39 | Installation requires [altair](http://altair-viz.github.io). 40 | 41 | 42 | Installation 43 | ------------ 44 | 45 | In the future, you will be able to install Starborn from PyPI via: 46 | 47 | pip install starborn 48 | 49 | For now, get it from GitHub: 50 | 51 | pip install git+https://github.com/PythonCharmers/starborn 52 | 53 | 54 | Testing 55 | ------- 56 | 57 | In progress ... 58 | 59 | 60 | Development 61 | ----------- 62 | 63 | Starborn development takes place on Github: https://github.com/PythonCharmers/starborn 64 | 65 | Please submit any reproducible bugs you encounter to the [issue tracker](https://github.com/PythonCharmers/starborn/issues). 66 | 67 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python 2 | # 3 | # Copyright (C) 2018 Python Charmers Pty Ltd, Australia 4 | 5 | import os 6 | 7 | DESCRIPTION = "starborn: statistical data visualization with Vega" 8 | LONG_DESCRIPTION = """\ 9 | Starborn is a library for making attractive and informative statistical 10 | graphics in Python. It aims to be API-compatible with Seaborn but built on top 11 | of Altair. 12 | """ 13 | DISTNAME = 'starborn' 14 | MAINTAINER = 'Ed Schofield' 15 | MAINTAINER_EMAIL = 'ed@pythoncharmers.com' 16 | URL = 'https://github.io/edschofield/starborn' 17 | LICENSE = 'BSD (3-clause)' 18 | DOWNLOAD_URL = 'https://github.com/edschofield/starborn/' 19 | VERSION = '0.1' 20 | 21 | INSTALL_REQUIRES = [ 22 | 'altair>=2.0', 23 | ] 24 | 25 | PACKAGES = [ 26 | 'starborn', 27 | ] 28 | 29 | CLASSIFIERS = [ 30 | 'Intended Audience :: Science/Research', 31 | 'Programming Language :: Python :: 2.7', 32 | 'Programming Language :: Python :: 3.4', 33 | 'Programming Language :: Python :: 3.5', 34 | 'Programming Language :: Python :: 3.6', 35 | 'License :: OSI Approved :: BSD License', 36 | 'Topic :: Scientific/Engineering :: Visualization', 37 | 'Topic :: Multimedia :: Graphics', 38 | 'Operating System :: POSIX', 39 | 'Operating System :: Unix', 40 | 'Operating System :: MacOS' 41 | 'Operating System :: Windows' 42 | ] 43 | 44 | try: 45 | from setuptools import setup 46 | _has_setuptools = True 47 | except ImportError: 48 | from distutils.core import setup 49 | 50 | if __name__ == "__main__": 51 | 52 | setup( 53 | name=DISTNAME, 54 | author=MAINTAINER, 55 | author_email=MAINTAINER_EMAIL, 56 | maintainer=MAINTAINER, 57 | maintainer_email=MAINTAINER_EMAIL, 58 | description=DESCRIPTION, 59 | long_description=LONG_DESCRIPTION, 60 | license=LICENSE, 61 | url=URL, 62 | version=VERSION, 63 | download_url=DOWNLOAD_URL, 64 | install_requires=INSTALL_REQUIRES, 65 | packages=PACKAGES, 66 | classifiers=CLASSIFIERS 67 | ) 68 | -------------------------------------------------------------------------------- /starborn/core.py: -------------------------------------------------------------------------------- 1 | import altair as alt 2 | import pandas as pd 3 | 4 | 5 | def get_limit_tuple(series): 6 | lim = (series.min(), series.max()) 7 | return lim 8 | 9 | 10 | def scatterplot(x, y, data, hue=None, xlim=None, ylim=None): 11 | # TODO: refactor so it uses category_chart_kwargs? 12 | if xlim is None: 13 | xlim = get_limit_tuple(data[x]) 14 | if ylim is None: 15 | ylim = get_limit_tuple(data[y]) 16 | xscale = alt.Scale(domain=xlim) 17 | yscale = alt.Scale(domain=ylim) 18 | 19 | other_args = {'color': '{hue}:N'.format(hue=hue)} if hue else {} 20 | points = alt.Chart(data).mark_circle().encode( 21 | alt.X(x, scale=xscale), 22 | alt.Y(y, scale=yscale), 23 | **other_args 24 | ) 25 | return points 26 | 27 | 28 | def jointplot(x, y, data, kind='scatter', hue=None, xlim=None, ylim=None): 29 | if xlim is None: 30 | xlim = get_limit_tuple(data[x]) 31 | if ylim is None: 32 | ylim = get_limit_tuple(data[y]) 33 | xscale = alt.Scale(domain=xlim) 34 | yscale = alt.Scale(domain=ylim) 35 | 36 | points = scatterplot(x, y, data, hue=hue, xlim=xlim, ylim=ylim) 37 | 38 | area_args = {'opacity': .3, 'interpolate': 'step'} 39 | 40 | blank_axis = alt.Axis(title='') 41 | 42 | top_hist = alt.Chart(data).mark_area(**area_args).encode( 43 | alt.X('{x}:Q'.format(x=x), 44 | # when using bins, the axis scale is set through 45 | # the bin extent, so we do not specify the scale here 46 | # (which would be ignored anyway) 47 | bin=alt.Bin(maxbins=20, extent=xscale.domain), 48 | stack=None, 49 | axis=blank_axis, 50 | ), 51 | alt.Y('count()', stack=None, axis=blank_axis), 52 | alt.Color('{hue}:N'.format(hue=hue)), 53 | ).properties(height=60) 54 | 55 | right_hist = alt.Chart(data).mark_area(**area_args).encode( 56 | alt.Y('{y}:Q'.format(y=y), 57 | bin=alt.Bin(maxbins=20, extent=yscale.domain), 58 | stack=None, 59 | axis=blank_axis, 60 | ), 61 | alt.X('count()', stack=None, axis=blank_axis), 62 | alt.Color('{hue}:N'.format(hue=hue)), 63 | ).properties(width=60) 64 | 65 | return top_hist & (points | right_hist) 66 | 67 | 68 | def heatmap(data, vmin=None, vmax=None, annot=None, fmt='.2g'): 69 | 70 | # We always want to have a DataFrame with semantic information 71 | if not isinstance(data, pd.DataFrame): 72 | matrix = np.asarray(data) 73 | data = pd.DataFrame(matrix) 74 | 75 | melted = data.stack().reset_index(name='Value') 76 | 77 | x = data.columns.name 78 | y = data.index.name 79 | 80 | heatmap = alt.Chart(melted).mark_rect().encode( 81 | alt.X('{x}:O'.format(x=x), scale=alt.Scale(paddingInner=0)), 82 | alt.Y('{y}:O'.format(y=y), scale=alt.Scale(paddingInner=0)), 83 | color='Value:Q' 84 | ) 85 | 86 | if not annot: 87 | return heatmap 88 | 89 | # Overlay text 90 | text = alt.Chart(melted).mark_text(baseline='middle').encode( 91 | x='{x}:O'.format(x=x), 92 | y='{y}:O'.format(y=y), 93 | text=alt.Text('Value', format=fmt), 94 | color=alt.condition(alt.expr.datum['Value'] > 70, 95 | alt.value('black'), 96 | alt.value('white')) 97 | ) 98 | return heatmap + text 99 | 100 | 101 | def stripplot(x=None, y=None, hue=None, data=None): 102 | # TODO: refactor so it uses category_chart_kwargs() 103 | if data is None: 104 | if y is None: 105 | data = x.to_frame() 106 | x = data.columns[0] 107 | elif x is None: 108 | data = y.to_frame() 109 | y = data.columns[0] 110 | else: 111 | raise RuntimeError('not supported yet ...') 112 | 113 | kwargs = {} 114 | if x is not None: 115 | kwargs['x'] = '{x}'.format(x=x) 116 | if y is not None: 117 | kwargs['y'] = '{y}'.format(y=y) 118 | if hue is not None: 119 | kwargs['color'] = hue 120 | 121 | chart = alt.Chart(data).mark_tick().encode(**kwargs) 122 | return chart 123 | 124 | 125 | def pairplot(data, hue=None, vars=None): 126 | if vars is None: 127 | vars = list(data.columns) 128 | 129 | chart = alt.Chart(data).mark_circle().encode( 130 | alt.X(alt.repeat("column"), type='quantitative'), 131 | alt.Y(alt.repeat("row"), type='quantitative'), 132 | color='{hue}:N'.format(hue=hue) 133 | ).properties( 134 | width=250, 135 | height=250 136 | ).repeat( 137 | row=vars, 138 | column=vars 139 | ) 140 | return chart 141 | 142 | 143 | def category_chart_kwargs(x=None, y=None, hue=None, data=None, order=None, orient=None, hue_order=None, estimator='mean'): 144 | """ 145 | Somewhat similar to the `establish_variables()` method in Seaborn's 146 | `_CategoricalPlotter` class. 147 | 148 | Return value 149 | ============ 150 | a dictionary of kwargs for encoding the Altair chart 151 | """ 152 | 153 | if data is None: 154 | if y is None: 155 | data = x.to_frame() 156 | x = data.columns[0] 157 | elif x is None: 158 | data = y.to_frame() 159 | y = data.columns[0] 160 | else: 161 | raise RuntimeError('not supported yet ...') 162 | 163 | kwargs = {} 164 | 165 | # TODO: infer the orientation automatically: 166 | if orient is None: 167 | orient = 'v' 168 | 169 | if orient == 'v': 170 | if x is not None: 171 | kwargs['x'] = '{x}'.format(x=x) 172 | if y is not None: 173 | kwargs['y'] = '{estimator}({y})'.format(estimator=estimator, y=y) 174 | else: 175 | if x is not None: 176 | kwargs['x'] = '{estimator}({x})'.format(estimator=estimator, x=x) 177 | if y is not None: 178 | kwargs['y'] = '{y}'.format(y=y) 179 | 180 | if hue is not None: 181 | if orient == 'h': 182 | kwargs['row'] = kwargs['y'] 183 | kwargs['y'] = hue 184 | elif orient == 'v': 185 | kwargs['column'] = kwargs['x'] 186 | kwargs['x'] = hue 187 | kwargs['color'] = hue 188 | 189 | empty_axis=alt.Axis(domain=False, labels=False, title='', ticks=False) #, offset=-12, zindex=1) 190 | 191 | axis_kwargs = {} 192 | if hue is None: 193 | axis_kwargs['sort'] = order 194 | else: 195 | if hue_order is not None: 196 | axis_kwargs['sort'] = hue_order 197 | axis_kwargs['axis'] = empty_axis 198 | 199 | if orient == 'v': 200 | x = alt.X(kwargs['x'], **axis_kwargs) 201 | kwargs['x'] = x 202 | else: 203 | y = alt.Y(kwargs['y'], **axis_kwargs) 204 | kwargs['y'] = y 205 | 206 | if hue is not None: 207 | if order is not None: 208 | raise ValueError('custom order is not implemented for grouped bar charts (when `hue` is not `None`). Vega-Lite current does not support sorting of facets ...') 209 | if orient == 'v': 210 | column = alt.Column(kwargs['column']) #, axis=alt.Axis(orient='bottom')) 211 | kwargs['column'] = column 212 | else: 213 | row = alt.Row(kwargs['row']) #, axis=alt.Axis(orient='left'), sort=order) 214 | kwargs['row'] = row 215 | 216 | return kwargs 217 | 218 | 219 | def barplot(x=None, y=None, hue=None, data=None, order=None, orient=None, hue_order=None): 220 | kwargs = category_chart_kwargs(x=x, y=y, hue=hue, data=data, order=order, orient=orient, hue_order=hue_order) 221 | 222 | chart = alt.Chart(data).mark_bar().encode(**kwargs) 223 | return chart 224 | 225 | 226 | def boxplot(x=None, y=None, hue=None, data=None, order=None, orient=None): 227 | # TODO: refactor so it uses category_chart_kwargs 228 | 229 | # TODO: infer the orientation automatically: 230 | if orient is None or orient == 'v': 231 | return boxplot_vertical(x=x, y=y, hue=hue, data=data, order=order) 232 | else: 233 | return boxplot_horizontal(x=x, y=y, hue=hue, data=data, order=order) 234 | 235 | 236 | def boxplot_vertical(x=None, y=None, hue=None, data=None, order=None): 237 | 238 | # orientation_mapper = {'v': {'x': 'x', 'y': 'y'}, 239 | # 'h': {'x': 'y', 'y': 'x'}} 240 | 241 | # Define aggregate fields 242 | lower_box = 'q1({value}):Q'.format(value=y) 243 | lower_whisker = 'min({value}):Q'.format(value=y) 244 | upper_box = 'q3({value}):Q'.format(value=y) 245 | upper_whisker = 'max({value}):Q'.format(value=y) 246 | 247 | kwargs = {'x': '{x}:O'.format(x=x)} 248 | 249 | if hue is not None: 250 | kwargs['color'] = '{hue}:N'.format(hue=hue) 251 | # Swap x for column 252 | column, kwargs['x'] = kwargs['x'], '{hue}:N'.format(hue=hue) 253 | 254 | base = alt.Chart().encode( 255 | **kwargs 256 | ) 257 | 258 | # Compose each layer individually 259 | lower_whisker = base.mark_rule().encode( 260 | y=alt.Y(lower_whisker, axis=alt.Axis(title=y)), 261 | y2=lower_box, 262 | ) 263 | 264 | middle_bar_kwargs = dict( 265 | y=lower_box, 266 | y2=upper_box, 267 | ) 268 | if hue is None: 269 | middle_bar_kwargs['color'] = 'year:O' 270 | 271 | middle_bar = base.mark_bar(size=10.0).encode(**middle_bar_kwargs) 272 | 273 | upper_whisker = base.mark_rule().encode( 274 | y=upper_whisker, 275 | y2=upper_box, 276 | ) 277 | 278 | middle_tick = base.mark_tick( 279 | color='white', 280 | size=10.0 281 | ).encode( 282 | y='median({value}):Q'.format(value=y), 283 | ) 284 | 285 | chart = (lower_whisker + upper_whisker + middle_bar + middle_tick) 286 | 287 | if hue is None: 288 | chart.data = data 289 | return chart 290 | else: 291 | return chart.facet(column=column, data=data) 292 | 293 | 294 | def violinplot(x=None, y=None, data=None, orient=None): 295 | # TODO: automatically infer orientation 296 | 297 | if orient is None or orient == 'v': 298 | kwargs = dict( 299 | x=alt.X('count(*):Q', 300 | axis=alt.Axis(grid=False, labels=False), 301 | stack='center', 302 | title=''), 303 | y=alt.Y('{y}:Q'.format(y=y), bin=alt.Bin(maxbins=100)), 304 | column='{x}:N'.format(x=x), 305 | color='{x}:N'.format(x=x) 306 | ) 307 | else: 308 | kwargs = dict( 309 | y=alt.Y('count(*):Q', 310 | axis=alt.Axis(grid=False, labels=False), 311 | stack='center', 312 | title=''), 313 | x=alt.X('{x}:Q'.format(x=x), bin=alt.Bin(maxbins=100)), 314 | row='{y}:N'.format(y=y), 315 | color='{y}:N'.format(y=y) 316 | ) 317 | chart = alt.Chart(data).mark_area().encode(**kwargs) 318 | return chart 319 | 320 | --------------------------------------------------------------------------------