├── .gitattributes ├── stitch ├── __main__.py ├── exc.py ├── __init__.py ├── cli.py ├── parser.py ├── options.py ├── _version.py ├── static │ └── default.css └── stitch.py ├── MANIFEST.in ├── .gitmodules ├── .gitignore ├── setup.cfg ├── tests ├── conftest.py ├── data │ └── small.md ├── test_options.py ├── test_parser.py └── test_stitcher.py ├── environment.yml ├── examples ├── small.md ├── knitpy_overview.pymd └── timeseries.md ├── .travis.yml ├── LICENSE ├── setup.py ├── README.rst └── versioneer.py /.gitattributes: -------------------------------------------------------------------------------- 1 | stitch/_version.py export-subst 2 | -------------------------------------------------------------------------------- /stitch/__main__.py: -------------------------------------------------------------------------------- 1 | from .stitch_app import main 2 | main() 3 | -------------------------------------------------------------------------------- /stitch/exc.py: -------------------------------------------------------------------------------- 1 | class StitchError(Exception): 2 | pass 3 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include stitch/static/* 2 | include versioneer.py 3 | include stitch/_version.py 4 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "pystitch.github.io"] 2 | path = pystitch.github.io 3 | url = https://github.com/pystitch/pystitch.github.io 4 | branch = src 5 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.egg-info 2 | *.whl 3 | build/ 4 | dist/ 5 | __pycache__/ 6 | htmlcov/ 7 | 8 | .coverage 9 | 10 | .RData 11 | .Rhistory 12 | 13 | *.html 14 | *.pdf 15 | _build/ 16 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [versioneer] 2 | VCS = git 3 | style = pep440 4 | versionfile_source = stitch/_version.py 5 | versionfile_build = stitch/_version.py 6 | tag_prefix = 7 | #parentdir_prefix = 8 | 9 | [flake8] 10 | ignore = E731 11 | exclude = stitch/_version.py,versioneer.py 12 | -------------------------------------------------------------------------------- /stitch/__init__.py: -------------------------------------------------------------------------------- 1 | from .stitch import ( # noqa 2 | convert, convert_file, kernel_factory, run_code, Stitch 3 | ) 4 | from .cli import cli # noqa 5 | 6 | 7 | from ._version import get_versions 8 | __version__ = get_versions()['version'] 9 | del get_versions 10 | -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | 4 | def pytest_addoption(parser): 5 | parser.addoption("--run-slow", action="store_true", 6 | help="run slow tests") 7 | 8 | 9 | def pytest_runtest_setup(item): 10 | if 'slow' in item.keywords and not item.config.getoption("--run-slow"): 11 | pytest.skip("need --run-slow option to run") 12 | -------------------------------------------------------------------------------- /environment.yml: -------------------------------------------------------------------------------- 1 | name: stitch 2 | channels: 3 | - conda-forge 4 | - pypi 5 | dependencies: 6 | - python=3.5.2 7 | - matplotlib 8 | - pandas 9 | - jupyter_core 10 | - traitlets 11 | - ipython 12 | - jupyter_client 13 | - nbconvert 14 | - pypandoc 15 | - click 16 | - pytest 17 | - pytest-cov 18 | - sphinx 19 | - pandas 20 | - sphinx_rtd_theme 21 | - pip: 22 | - pandocfilters 23 | - numpydoc 24 | -------------------------------------------------------------------------------- /examples/small.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: small 3 | author: test author 4 | --- 5 | 6 | # This is a small example 7 | 8 | ```{python} 9 | def f(x): 10 | return x * 2 11 | 12 | f(2) 13 | ``` 14 | 15 | With options 16 | 17 | ```{python, echo=False, eval=True} 18 | def f(x): 19 | return x ** 2 20 | 21 | f(2) 22 | ``` 23 | 24 | Don't evaluate. 25 | 26 | ```{python, eval=False} 27 | def f(x): 28 | return x ** 2 29 | 30 | f(2) 31 | ``` 32 | 33 | ## Plotting 34 | 35 | ```{python} 36 | %matplotlib inline 37 | import matplotlib.pyplot as plt 38 | plt.plot(range(4), range(4)) 39 | ``` 40 | 41 | ```{python} 42 | print("2 + 2 is") 43 | 2 + 2 44 | ``` 45 | 46 | Fin. 47 | 48 | -------------------------------------------------------------------------------- /tests/data/small.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: small 3 | author: test author 4 | --- 5 | 6 | # This is a small example 7 | 8 | ```{python} 9 | def f(x): 10 | print('x is equal to %s' % x) 11 | return x * 2 12 | 13 | f(2) 14 | ``` 15 | 16 | With options 17 | 18 | ```{.python echo=False eval=True} 19 | def f(x): 20 | return x ** 2 21 | 22 | f(2) 23 | ``` 24 | 25 | Don't evaluate. 26 | 27 | ```{.python eval=False} 28 | def f(x): 29 | return x ** 2 30 | 31 | f(2) 32 | ``` 33 | 34 | ```{python} 35 | raise TypeError('foo') 36 | ``` 37 | 38 | ## Plotting 39 | 40 | ```{.python} 41 | %matplotlib inline 42 | import matplotlib.pyplot as plt 43 | plt.plot(range(4), range(4)) 44 | ``` 45 | 46 | Fin. 47 | 48 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | python: 3 | - 3.5 4 | 5 | notifications: 6 | email: false 7 | 8 | git: 9 | submodules: false 10 | 11 | # Install packages 12 | install: 13 | - sudo apt-get update 14 | - wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh; 15 | - bash miniconda.sh -b -p $HOME/miniconda 16 | - export PATH="$HOME/miniconda/bin:$PATH" 17 | - hash -r 18 | - conda config --set always_yes yes --set changeps1 no 19 | - conda config --add channels conda-forge 20 | - conda update -q conda 21 | # Useful for debugging any issues with conda 22 | - conda info -a 23 | - conda create -q -n test-environment python=$TRAVIS_PYTHON_VERSION ipython pypandoc jupyter_core jupyter_client jupyter notebook matplotlib pandas doctr sphinx_rtd_theme bokeh holoviews altair numpydoc 24 | - source activate test-environment 25 | - jupyter kernelspec list 26 | - pip install -e .[dev] 27 | 28 | # Run test 29 | script: 30 | - "py.test tests --cov=stitch --cov-report html --run-slow tests/" 31 | 32 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2016 Tom Augspurger 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | 23 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, find_packages 2 | from os import path 3 | 4 | import versioneer 5 | 6 | 7 | here = path.abspath(path.dirname(__file__)) 8 | 9 | with open(path.join(here, 'README.rst'), encoding='utf-8') as f: 10 | long_description = f.read() 11 | 12 | setup( 13 | name='knotr', 14 | version=versioneer.get_version(), 15 | cmdclass=versioneer.get_cmdclass(), 16 | 17 | description='Reproducible report generation tool.', 18 | long_description=long_description, 19 | 20 | url='https://github.com/tomaugspurger/stitch', 21 | 22 | author='Tom Augspurger', 23 | author_email='tom.augspurger88@gmail.com', 24 | 25 | license='MIT', 26 | 27 | classifiers=[ 28 | 'Development Status :: 3 - Alpha', 29 | 'Intended Audience :: Developers', 30 | 'Topic :: Software Development :: Build Tools', 31 | 'License :: OSI Approved :: MIT License', 32 | 33 | 'Programming Language :: Python :: 3', 34 | 'Programming Language :: Python :: 3.4', 35 | 'Programming Language :: Python :: 3.5', 36 | 'Programming Language :: Python :: 3.6', 37 | ], 38 | 39 | # keywords='sample setuptools development', 40 | packages=find_packages(exclude=['docs', 'tests']), 41 | 42 | install_requires=['jupyter_core', 'traitlets', 'ipython', 'jupyter_client', 43 | 'nbconvert', 'pandocfilters', 'pypandoc', 'click'], 44 | 45 | extras_require={ 46 | 'dev': ['pytest', 'pytest-cov', 'sphinx', 'pandas', 'matplotlib'], 47 | }, 48 | include_package_data=True, 49 | package_data={ 50 | 'stitch': ['static/*'], 51 | }, 52 | entry_points={ 53 | 'console_scripts': [ 54 | 'stitch=stitch.cli:cli', 55 | ], 56 | }, 57 | ) 58 | -------------------------------------------------------------------------------- /tests/test_options.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from textwrap import dedent 3 | from stitch.stitch import Stitch 4 | 5 | 6 | @pytest.fixture 7 | def doc_meta(): 8 | data = {'date': '2016-01-01', 'title': 'My Title', 'author': 'Jack', 9 | 'self_contained': True, 'standalone': False, 10 | 'to': 'pdf'} 11 | doc = dedent('''\ 12 | --- 13 | to: {to} 14 | title: {title} 15 | author: {author} 16 | date: {date} 17 | self_contained: {self_contained} 18 | standalone: {standalone} 19 | --- 20 | 21 | # Hi 22 | ''') 23 | return doc.format(**data), data 24 | 25 | 26 | class TestOptions: 27 | 28 | def test_defaults(self): 29 | s = Stitch('') 30 | assert s.warning 31 | assert s.error == 'continue' 32 | assert s.to == 'html' 33 | assert s.standalone 34 | 35 | def test_override(self): 36 | doc = dedent('''\ 37 | --- 38 | title: My Title 39 | standalone: False 40 | warning: False 41 | error: raise 42 | abstract: | 43 | This is the abstract. 44 | 45 | It consists of two paragraphs. 46 | --- 47 | 48 | # Hail and well met 49 | ''') 50 | s = Stitch('') 51 | s.stitch(doc) 52 | 53 | assert s.standalone is False 54 | assert s.warning is False 55 | assert s.error == 'raise' 56 | assert getattr(s, 'abstract', None) is None 57 | 58 | @pytest.mark.parametrize('key', [ 59 | 'title', 'author', 'date', 'self_contained', 'standalone', 'to' 60 | ]) 61 | def test_meta(self, key, doc_meta): 62 | doc, meta = doc_meta 63 | s = Stitch('') 64 | s.stitch(doc) 65 | result = getattr(s, key) 66 | expected = meta[key] 67 | assert result == expected 68 | 69 | 70 | @pytest.mark.slow 71 | class TestOptionsKernel: 72 | 73 | def test_fig_cap(self): 74 | code = dedent('''\ 75 | ```{python, fig.cap="This is a caption"} 76 | import matplotlib.pyplot as plt 77 | plt.plot(range(4), range(4)) 78 | ```''') 79 | s = Stitch('') 80 | result = s.stitch(code) 81 | blocks = result['blocks'] 82 | result = blocks[-1]['c'][0]['c'][1][0]['c'] 83 | assert result == 'This is a caption' 84 | -------------------------------------------------------------------------------- /stitch/cli.py: -------------------------------------------------------------------------------- 1 | import os 2 | import click 3 | 4 | from .stitch import convert 5 | 6 | HERE = os.path.dirname(__file__) 7 | CSS = os.path.join(HERE, 'static', 'default.css') 8 | 9 | 10 | def infer_format(output_file): 11 | if output_file is None: 12 | return 'html' 13 | else: 14 | return os.path.splitext(output_file)[1].lstrip('.') 15 | 16 | 17 | def has_css(extra_args): 18 | return '-c' in extra_args or '--css' in [a.split('=')[0] for a in 19 | extra_args] 20 | 21 | 22 | def has_booktabs(extra_args): 23 | return 'header-includes:\\usepackage{booktabs}' in [ 24 | '='.join(a.split('=')[1:]) 25 | for a in extra_args 26 | ] 27 | 28 | 29 | def enhance_args(to, no_standalone, no_self_contained, extra_args): 30 | extra_args = extra_args.copy() 31 | if not no_standalone and not ('-s' in extra_args or 32 | '--standalone' in extra_args): 33 | extra_args.append('--standalone') 34 | if not no_self_contained and '--self-contained' not in extra_args: 35 | extra_args.append('--self-contained') 36 | if to == 'html' and not has_css(extra_args): 37 | extra_args.append('--css=%s' % CSS) 38 | if to in ('latex', 'pdf') and not has_booktabs(extra_args): 39 | extra_args.append('--metadata=header-includes:\\usepackage{booktabs}') 40 | return extra_args 41 | 42 | 43 | @click.command( 44 | context_settings=dict(ignore_unknown_options=True, 45 | allow_extra_args=True) 46 | ) 47 | @click.pass_context 48 | @click.argument('input_file', type=click.File('rb')) 49 | @click.option('-o', '--output_file', type=str, default=None) 50 | @click.option('-t', '--to', default=None) 51 | @click.option('--no-standalone', is_flag=True, default=False, 52 | help='Produce a document fragment instead.') 53 | @click.option('--no-self-contained', is_flag=True, default=False, 54 | help='Use external files for resources like images.') 55 | def cli(ctx, input_file, output_file, to, no_standalone, no_self_contained): 56 | if to is None: 57 | to = infer_format(output_file) 58 | input_text = input_file.read().decode('utf-8') 59 | extra_args = enhance_args(to, no_standalone, no_self_contained, ctx.args) 60 | convert(input_text, to, output_file=output_file, extra_args=extra_args) 61 | 62 | if __name__ == '__main__': 63 | cli() 64 | -------------------------------------------------------------------------------- /stitch/parser.py: -------------------------------------------------------------------------------- 1 | """ 2 | Chunk options-line parser. 3 | 4 | See *Python Cookbook* 3E, recipie 2.18 5 | """ 6 | import re 7 | from collections import namedtuple 8 | 9 | Token = namedtuple("Token", ['kind', 'value']) 10 | 11 | 12 | def validate_options(options_line): 13 | xpr = re.compile(r'^```{\w+.*}') 14 | if not xpr.match(options_line): 15 | raise TypeError("Invalid chunk options %s" % options_line) 16 | 17 | 18 | def _transform(kind, text): 19 | if kind == 'ARG': 20 | result = '.' + text 21 | elif kind in ('DELIM', 'BLANK'): 22 | result = None 23 | elif kind in ('OPEN', 'CLOSE', 'KWARG'): 24 | return text 25 | else: 26 | raise TypeError('Unknown kind %s' % kind) 27 | return result 28 | 29 | 30 | def tokenize(options_line): 31 | """ 32 | Break an options line into a list of tokens. 33 | 34 | Parameters 35 | ---------- 36 | options_line : str 37 | 38 | Returns 39 | ------- 40 | tokens : list of tuples 41 | 42 | Notes 43 | ----- 44 | The valid tokens are 45 | 46 | * ``KWARG``: an expression line ``foo=bar`` 47 | * ``ARG``: a term like `python`; used for kernel & chunk names 48 | * ``OPEN``: The literal ``\`\`\`{`` 49 | * ``CLOSE``: The literal ``}`` 50 | * ``BLANK``: Whitespace 51 | """ 52 | KWARG = r'(?P([^,=]+ *)= *(".*"|\'.*\'|[^,=}]+))' 53 | ARG = r'(?P\w+)' 54 | OPEN = r'(?P```{ *)' 55 | DELIM = r'(?P *, *)' 56 | CLOSE = r'(?P})' 57 | BLANK = r'(?P\s+)' 58 | 59 | master_pat = re.compile('|'.join([KWARG, ARG, OPEN, DELIM, 60 | CLOSE, BLANK])) 61 | 62 | def generate_tokens(pat, text): 63 | scanner = pat.scanner(text) 64 | for m in iter(scanner.match, None): 65 | yield Token(m.lastgroup, m.group(m.lastgroup)) 66 | 67 | tok = list(generate_tokens(master_pat, options_line)) 68 | return tok 69 | 70 | 71 | def preprocess_options(options_line): 72 | """ 73 | Transform a code-chunk options line to allow 74 | ``{python, arg, kwarg=val}`` instead of pandoc-style 75 | ``{.python .arg kwarg=val}``. 76 | 77 | Parameters 78 | ---------- 79 | options_line: str 80 | 81 | Returns 82 | ------- 83 | transformed: str 84 | """ 85 | tok = tokenize(options_line) 86 | 87 | items = (_transform(kind, text) for kind, text in tok) 88 | items = filter(None, items) 89 | items = ' '.join(items) 90 | result = items.replace('{ ', '{').replace(' }', '}').replace(" {", "{") 91 | return result 92 | -------------------------------------------------------------------------------- /stitch/options.py: -------------------------------------------------------------------------------- 1 | from collections.abc import Mapping 2 | from traitlets import TraitType, Enum 3 | 4 | KERNELS = 'kernels-map' 5 | STYLES = 'styles-map' 6 | 7 | 8 | class Bool(TraitType): 9 | 10 | default_value = False 11 | info_text = "True or False; unwraps pandoc's JSON AST" 12 | 13 | def validate(self, obj, value): 14 | if isinstance(value, Mapping): 15 | assert value['t'] == 'MetaBool' 16 | return value['c'] 17 | elif type(value) is bool: 18 | return value 19 | else: 20 | raise self.error(obj, value) 21 | 22 | 23 | class Choice(Enum): 24 | 25 | info_text = "Choice from a set; unwraps pandoc's JSON AST" 26 | 27 | def validate(self, obj, value): 28 | if isinstance(value, Mapping): 29 | value = ' '.join(x['c'] for x in value['c']) 30 | 31 | if value in self.values: 32 | return value 33 | self.error(obj, value) 34 | 35 | 36 | class Str(TraitType): 37 | 38 | default_value = '' 39 | info_text = "Choice from a set; unwraps pandoc's JSON AST" 40 | 41 | def validate(self, obj, value): 42 | if isinstance(value, Mapping): 43 | value = value.get('c') 44 | if value: 45 | strs = filter(None, (x.get('c') for x in value)) 46 | value = ' '.join(strs) 47 | if isinstance(value, str) or value is None: 48 | return value 49 | self.error(obj, value) 50 | 51 | 52 | class LangMapper: 53 | """ 54 | Reads metadata to a dict: 55 | --- 56 | kernels-map: 57 | r: ir 58 | py: python 59 | py2: python2 60 | styles-map: 61 | py2: py 62 | ... 63 | then maps user specified lang names (like 'r' or 'py2') 64 | to kernel names that Stitch understand and 65 | to css classes needed for highlighing 66 | """ 67 | def __init__(self, meta): 68 | self._kernels = self._read_dict(meta, KERNELS) 69 | self._styles = self._read_dict(meta, STYLES) 70 | 71 | @staticmethod 72 | def _read_dict(meta, dict_name): 73 | if dict_name in meta: 74 | try: 75 | ret = {key: val['c'][0]['c'] for key, val in meta[dict_name]['c'].items()} 76 | if all([isinstance(key, str) and isinstance(val, str) for key, val in ret.items()]): 77 | return ret 78 | except (KeyError, IndexError, AttributeError): 79 | pass 80 | raise TypeError('Invalid {0} metadata section.'.format(dict_name)) 81 | else: 82 | return {} 83 | 84 | def map_to_kernel(self, lang: str): 85 | return self._kernels.get(lang, lang) 86 | 87 | def map_to_style(self, lang: str): 88 | return self._styles.get(lang, lang) 89 | -------------------------------------------------------------------------------- /tests/test_parser.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | import stitch.parser as P 4 | from stitch.parser import Token 5 | 6 | 7 | class TestParser: 8 | 9 | @pytest.mark.parametrize('options, expected', [ 10 | ('```{python}', '```{.python}'), 11 | ('```{r, name}', '```{.r .name}'), 12 | ('```{r, echo=True}', '```{.r echo=True}'), 13 | ('```{r, name, echo=True, eval=False}', 14 | '```{.r .name echo=True eval=False}'), 15 | ('```{r, fig.cap="Caption"}', '```{.r fig.cap="Caption"}'), 16 | ('```{r, fig.cap="Cap, 2", echo=True}', 17 | '```{.r fig.cap="Cap, 2" echo=True}'), 18 | ('```{r, echo=True, fig.cap="Cap, 2"}', 19 | '```{.r echo=True fig.cap="Cap, 2"}'), 20 | ('```{r, fig.cap="Caption, too"}', '```{.r fig.cap="Caption, too"}'), 21 | ]) 22 | def test_preprocess(self, options, expected): 23 | result = P.preprocess_options(options) 24 | assert result == expected 25 | 26 | def test_tokenize(self): 27 | line = '```{r, fig.width=bar}' 28 | result = P.tokenize(line) 29 | expected = [ 30 | Token('OPEN', '```{'), 31 | Token('ARG', 'r'), 32 | Token('DELIM', ', '), 33 | Token('KWARG', 'fig.width=bar'), 34 | Token('CLOSE', '}'), 35 | ] 36 | 37 | assert result == expected 38 | 39 | def test_tokenize_quote(self): 40 | line = '```{r, fig.cap="A, Caption", echo=True}' 41 | result = P.tokenize(line) 42 | expected = [ 43 | Token('OPEN', '```{'), 44 | Token('ARG', 'r'), 45 | Token('DELIM', ', '), 46 | Token('KWARG', 'fig.cap="A, Caption"'), 47 | Token('DELIM', ', '), 48 | Token('KWARG', 'echo=True'), 49 | Token('CLOSE', '}'), 50 | ] 51 | 52 | assert result == expected 53 | 54 | @pytest.mark.parametrize('kind,text,expected', [ 55 | ("ARG", "r", ".r"), 56 | ("DELIM", ",", None), 57 | ("BLANK", " ", None), 58 | ("OPEN", "```{", "```{"), 59 | ("CLOSE", "}", "}"), 60 | ("KWARG", "foo=bar", "foo=bar"), 61 | ]) 62 | def test_transfrom_args(self, kind, text, expected): 63 | result = P._transform(kind, text) 64 | assert result == expected 65 | 66 | @pytest.mark.parametrize('options', [ 67 | # '```{r name foo=bar}''', # bad comma 68 | # '```{r foo=bar}''' # no comma 69 | '```{r, foo=bar''' # no curly 70 | ]) 71 | def test_preprocess_raises(self, options): 72 | with pytest.raises(TypeError): 73 | P.validate_options(options) 74 | 75 | def test_transform_raises(self): 76 | with pytest.raises(TypeError): 77 | P._transform('fake', 'foo') 78 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | Stitch 2 | ====== 3 | 4 | |Build Status| 5 | 6 | A `knitr `__- `RMarkdown `__-like library, in Python. 7 | 8 | *Note:* You might want to consider Jan Schulz's `knitpy `__ 9 | instead. It's probably more mature at this point. However, I wanted 10 | to see if there was a simpler way of doing things. 11 | 12 | The high-level goal of this type of library (knitr/RMarkdown, knitpy, and stitch) is to make writing 13 | reproducible reports easier. 14 | 15 | Documentation is available `here `__. 16 | 17 | Examples 18 | ======== 19 | 20 | See the project's `examples page `__ for a 21 | side-by-side comparison of input markdown and stitched HTML. 22 | 23 | More complex examples are linked to from there as well. 24 | 25 | Install 26 | ======= 27 | 28 | ``stitch`` supports Python 3.5 and above. 29 | At the moment ``stitch`` can be installed from pip via 30 | 31 | .. code-block:: sh 32 | 33 | pip install knotr 34 | 35 | I know, it's confusing. 36 | I've filed a claim for ``stitch`` on PyPI, but I think the people working that support queue are over-worked. 37 | Once that gets processed, I'll put it up on conda-forge as well. 38 | If you need a mnemonic, it's "I want knitr, but `not` the one written in `R`." 39 | Also I wanted to confuse R users. 40 | And knots are kind of like a buggy version of knits. 41 | 42 | ``stitch`` requires pandoc>=1.18. This can be installed using your 43 | system package manager, or `pypandoc `__. 44 | 45 | Design 46 | ====== 47 | 48 | The goal was to keep ``stitch`` itself extremely simple by reusing 49 | existing libraries. A high level overview of our tasks is 50 | 51 | 1. Command-line Interface 52 | 2. Parse markdown file 53 | 3. Execute code chunks, capturing the output 54 | 4. Collate execution output into the document 55 | 5. Render to final output 56 | 57 | Fortunately the building blocks are all there. 58 | 59 | We reuse 60 | 61 | - `pandoc `__ via 62 | `pypandoc `__ for parsing 63 | markdown and rendering the final output 64 | - `jupyter `__ for 65 | language kernels, executing code, and collecting the output 66 | - Use `pandocfilters `__ to 67 | collate the execution output into the document 68 | 69 | So all ``stitch`` has to do is to provide a command-line interface, scan 70 | the document for code chunks, manage some kernels, hand the code to the 71 | kernels, pass the output to an appropriate ``pandocfilter``. 72 | 73 | The biggest departure from ``knitpy`` is the use of pandoc's JSON AST. 74 | This is what you get from ``pandoc -t json input.md`` 75 | 76 | This saves us from having do any kind of custom parsing of the markdown. 77 | The only drawback so far is somewhat inscrutable Haskell exceptions if 78 | ``stitch`` happens to produce a bad document. 79 | 80 | Documentation 81 | ============= 82 | 83 | Stitch's documentation has an odd build process, so standard tools like 84 | readthedocs weren't flexible enough. 85 | To make the docs, install stitch and all the extra dependencies. 86 | Clone https://github.com/pystitch/pystitch.github.io 87 | 88 | Checkout the `src` branch. 89 | 90 | Run `make html`. 91 | 92 | .. |Build Status| image:: https://travis-ci.org/pystitch/stitch.svg?branch=master 93 | :target: https://travis-ci.org/pystitch/stitch 94 | -------------------------------------------------------------------------------- /examples/knitpy_overview.pymd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "knitpy: dynamic report generation with python" 3 | author: "Jan Schulz" 4 | date: "12.03.2015" 5 | output: 6 | pdf_document: default 7 | word_document: default 8 | html_document: 9 | keep_md: yes 10 | --- 11 | 12 | This is a port of knitr (http://yihui.name/knitr/) and rmarkdown 13 | (http://rmarkdown.rstudio.com/) to python. 14 | 15 | For a complete description of the code format see http://rmarkdown.rstudio.com/ and replace 16 | `{r...}` by `{python ...}` and of course use python code blocks... 17 | 18 | ## Examples 19 | 20 | Here are some examples: 21 | 22 | ```{python} 23 | print("Execute some code chunk and show the result") 24 | ``` 25 | 26 | Codechunks which contain lines without output (e.g. assign the result or comments) will 27 | be shown in the same code block: 28 | 29 | ```{python} 30 | # A comment 31 | text = "All code in the same code block until some output is produced..." 32 | more_text = "...and some more." 33 | print(text) 34 | print(more_text) 35 | ``` 36 | 37 | ### Code chunk arguments 38 | 39 | You can use different arguments in the codechunk declaration. Using `echo=False` will not show 40 | the code but only the result. 41 | 42 | ```{python, echo=False} 43 | print("Only the output will be visible as `echo=False`") 44 | ``` 45 | 46 | The next paragraphs explores the code chunk argument `results`. 47 | 48 | If 'hide', knitpy will not display the code's results in the final document. If 'hold', knitpy 49 | will delay displaying all output pieces until the end of the chunk. If 'asis', knitpy will pass 50 | through results without reformatting them (useful if results return raw HTML, etc.) 51 | 52 | `results='hold'` is not yet implemented. 53 | 54 | ```{python, results="hide"} 55 | print("Only the input is displayed, not the output") 56 | ``` 57 | 58 | ```{python, results="markup", echo=False} 59 | print("This is formatted as markdown:\n**This text** will be bold...") 60 | ``` 61 | 62 | ```{python, results="asis", echo=False} 63 | print("**This text** will be bold...") 64 | ``` 65 | 66 | **Note**: with python code it is recommended to use the IPython/Jupyter display system and an 67 | appropriate wrapper (see below) to display such output and not `results="asis"`. This makes it 68 | possible to convert such output if the output can't be included in the final format. 69 | 70 | You can also not show codeblocks at all, but they will be run (not included codeblock sets 71 | `have_run = True`): 72 | 73 | ```{python, include=False} 74 | have_run = True 75 | print("This will not be shown, as include is False") 76 | ``` 77 | 78 | ```{python, include=True} 79 | if have_run == True: 80 | print("'have_run==True': ran the codeblock before this one.") 81 | ``` 82 | 83 | Using `eval=False`, one can prevent the evaluation of the codechunk 84 | 85 | ```{python} 86 | x = 1 87 | ``` 88 | 89 | ```{python, eval=False} 90 | x += 1 # this is not executed as eval is False 91 | ``` 92 | 93 | ```{python} 94 | x # still 1 95 | ``` 96 | 97 | 98 | To remove/hide a codechunk completely, i.e. neither execute it nor show the code, you can use both `eval=False, include=False`: nothing will be 99 | shown between this text ... 100 | 101 | ```{python, eval=False, include=False} 102 | x += 1 # this is not executed and not even shown 103 | ``` 104 | 105 | ... and this text here! 106 | 107 | The prefix in front of text output (per default `##`) can be changed via the `comment` chunk 108 | option to a different string or completely removed by setting it to a empty string `""`or None: 109 | 110 | ```{python, comment="# result:"} 111 | print("Text output") 112 | ``` 113 | 114 | ```{python, comment=""} 115 | print("Text output") 116 | ``` 117 | 118 | ### Inline code 119 | 120 | You can also include code inline: "m=`python 1+1`" (expected: "m=2") 121 | 122 | ### IPython / Jupyter display framework 123 | 124 | The display framework is also supported. 125 | 126 | Plots will be included as images and included in the document. The filename of the 127 | plot is derived from the chunk label ("sinus" in this case). The code is not 128 | shown in this case (`echo=False`). 129 | 130 | ```{python, sinus, echo=False} 131 | # As this all produces no output, it should go into the same input section... 132 | import numpy as np 133 | import matplotlib.pyplot as plt 134 | y = np.linspace(2, 10) 135 | line, = plt.plot(y, np.sin(y)) 136 | ``` 137 | 138 | If a html or similar thing is displayed via the IPython display framework, it will be 139 | included 'as is', meaning that apart from `text/plain`-only output, everything else 140 | will be included without marking it up as output. Knitpy automagically tries to include only 141 | formats which are understood by pandoc and the final output format (in some case converting the 142 | format to one which the final output can handle). 143 | 144 | ```{python, engine="python"} 145 | from IPython.core.display import display, HTML 146 | display(HTML("strong text")) 147 | ``` 148 | 149 | It even handles `pandas.DataFrames` (be aware that not all formatting can be converted into all 150 | output formats): 151 | 152 | ```{python} 153 | import pandas as pd 154 | pd.set_option("display.width", 200) 155 | s = """This is longer text""" 156 | df = pd.DataFrame({"a":[1,2,3,4,5],"b":[s,"b","c",s,"e"]}) 157 | df 158 | ``` 159 | 160 | `pandas.DataFrame` can be represented as `text/plain` or `text/html`, but will default to the html 161 | version. To force plain text, use either `print(df)` or set the right `pandas` option: 162 | 163 | ```{python} 164 | pd.set_option("display.notebook_repr_html", False) 165 | df 166 | # set back the display 167 | pd.set_option("display.notebook_repr_html", True) 168 | ``` 169 | 170 | You can also use package like [tabulate](https://bitbucket.org/astanin/python-tabulate) 171 | together with `results="asis"` or by wrapping it with the appropriate display class: 172 | 173 | ```{python, results="asis"} 174 | from tabulate import tabulate 175 | from IPython.core.display import Markdown 176 | # either print and use `results="asis"` 177 | print(tabulate(df, list(df.columns), tablefmt="simple")) 178 | # or use the IPython display framework to publish markdown 179 | Markdown(tabulate(df, list(df.columns), tablefmt="simple")) 180 | ``` 181 | 182 | -------------------------------------------------------------------------------- /stitch/_version.py: -------------------------------------------------------------------------------- 1 | 2 | # This file helps to compute a version number in source trees obtained from 3 | # git-archive tarball (such as those provided by githubs download-from-tag 4 | # feature). Distribution tarballs (built by setup.py sdist) and build 5 | # directories (produced by setup.py build) will contain a much shorter file 6 | # that just contains the computed version number. 7 | 8 | # This file is released into the public domain. Generated by 9 | # versioneer-0.16 (https://github.com/warner/python-versioneer) 10 | 11 | """Git implementation of _version.py.""" 12 | 13 | import errno 14 | import os 15 | import re 16 | import subprocess 17 | import sys 18 | 19 | 20 | def get_keywords(): 21 | """Get the keywords needed to look up the version information.""" 22 | # these strings will be replaced by git during git-archive. 23 | # setup.py/versioneer.py will grep for the variable names, so they must 24 | # each be defined on a line of their own. _version.py will just call 25 | # get_keywords(). 26 | git_refnames = " (HEAD -> master)" 27 | git_full = "09a16da2f2af2be6a960e2338de488c8de2c2271" 28 | keywords = {"refnames": git_refnames, "full": git_full} 29 | return keywords 30 | 31 | 32 | class VersioneerConfig: 33 | """Container for Versioneer configuration parameters.""" 34 | 35 | 36 | def get_config(): 37 | """Create, populate and return the VersioneerConfig() object.""" 38 | # these strings are filled in when 'setup.py versioneer' creates 39 | # _version.py 40 | cfg = VersioneerConfig() 41 | cfg.VCS = "git" 42 | cfg.style = "pep440" 43 | cfg.tag_prefix = "" 44 | cfg.parentdir_prefix = "None" 45 | cfg.versionfile_source = "stitch/_version.py" 46 | cfg.verbose = False 47 | return cfg 48 | 49 | 50 | class NotThisMethod(Exception): 51 | """Exception raised if a method is not valid for the current scenario.""" 52 | 53 | 54 | LONG_VERSION_PY = {} 55 | HANDLERS = {} 56 | 57 | 58 | def register_vcs_handler(vcs, method): # decorator 59 | """Decorator to mark a method as the handler for a particular VCS.""" 60 | def decorate(f): 61 | """Store f in HANDLERS[vcs][method].""" 62 | if vcs not in HANDLERS: 63 | HANDLERS[vcs] = {} 64 | HANDLERS[vcs][method] = f 65 | return f 66 | return decorate 67 | 68 | 69 | def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False): 70 | """Call the given command(s).""" 71 | assert isinstance(commands, list) 72 | p = None 73 | for c in commands: 74 | try: 75 | dispcmd = str([c] + args) 76 | # remember shell=False, so use git.cmd on windows, not just git 77 | p = subprocess.Popen([c] + args, cwd=cwd, stdout=subprocess.PIPE, 78 | stderr=(subprocess.PIPE if hide_stderr 79 | else None)) 80 | break 81 | except EnvironmentError: 82 | e = sys.exc_info()[1] 83 | if e.errno == errno.ENOENT: 84 | continue 85 | if verbose: 86 | print("unable to run %s" % dispcmd) 87 | print(e) 88 | return None 89 | else: 90 | if verbose: 91 | print("unable to find command, tried %s" % (commands,)) 92 | return None 93 | stdout = p.communicate()[0].strip() 94 | if sys.version_info[0] >= 3: 95 | stdout = stdout.decode() 96 | if p.returncode != 0: 97 | if verbose: 98 | print("unable to run %s (error)" % dispcmd) 99 | return None 100 | return stdout 101 | 102 | 103 | def versions_from_parentdir(parentdir_prefix, root, verbose): 104 | """Try to determine the version from the parent directory name. 105 | 106 | Source tarballs conventionally unpack into a directory that includes 107 | both the project name and a version string. 108 | """ 109 | dirname = os.path.basename(root) 110 | if not dirname.startswith(parentdir_prefix): 111 | if verbose: 112 | print("guessing rootdir is '%s', but '%s' doesn't start with " 113 | "prefix '%s'" % (root, dirname, parentdir_prefix)) 114 | raise NotThisMethod("rootdir doesn't start with parentdir_prefix") 115 | return {"version": dirname[len(parentdir_prefix):], 116 | "full-revisionid": None, 117 | "dirty": False, "error": None} 118 | 119 | 120 | @register_vcs_handler("git", "get_keywords") 121 | def git_get_keywords(versionfile_abs): 122 | """Extract version information from the given file.""" 123 | # the code embedded in _version.py can just fetch the value of these 124 | # keywords. When used from setup.py, we don't want to import _version.py, 125 | # so we do it with a regexp instead. This function is not used from 126 | # _version.py. 127 | keywords = {} 128 | try: 129 | f = open(versionfile_abs, "r") 130 | for line in f.readlines(): 131 | if line.strip().startswith("git_refnames ="): 132 | mo = re.search(r'=\s*"(.*)"', line) 133 | if mo: 134 | keywords["refnames"] = mo.group(1) 135 | if line.strip().startswith("git_full ="): 136 | mo = re.search(r'=\s*"(.*)"', line) 137 | if mo: 138 | keywords["full"] = mo.group(1) 139 | f.close() 140 | except EnvironmentError: 141 | pass 142 | return keywords 143 | 144 | 145 | @register_vcs_handler("git", "keywords") 146 | def git_versions_from_keywords(keywords, tag_prefix, verbose): 147 | """Get version information from git keywords.""" 148 | if not keywords: 149 | raise NotThisMethod("no keywords at all, weird") 150 | refnames = keywords["refnames"].strip() 151 | if refnames.startswith("$Format"): 152 | if verbose: 153 | print("keywords are unexpanded, not using") 154 | raise NotThisMethod("unexpanded keywords, not a git-archive tarball") 155 | refs = set([r.strip() for r in refnames.strip("()").split(",")]) 156 | # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of 157 | # just "foo-1.0". If we see a "tag: " prefix, prefer those. 158 | TAG = "tag: " 159 | tags = set([r[len(TAG):] for r in refs if r.startswith(TAG)]) 160 | if not tags: 161 | # Either we're using git < 1.8.3, or there really are no tags. We use 162 | # a heuristic: assume all version tags have a digit. The old git %d 163 | # expansion behaves like git log --decorate=short and strips out the 164 | # refs/heads/ and refs/tags/ prefixes that would let us distinguish 165 | # between branches and tags. By ignoring refnames without digits, we 166 | # filter out many common branch names like "release" and 167 | # "stabilization", as well as "HEAD" and "master". 168 | tags = set([r for r in refs if re.search(r'\d', r)]) 169 | if verbose: 170 | print("discarding '%s', no digits" % ",".join(refs-tags)) 171 | if verbose: 172 | print("likely tags: %s" % ",".join(sorted(tags))) 173 | for ref in sorted(tags): 174 | # sorting will prefer e.g. "2.0" over "2.0rc1" 175 | if ref.startswith(tag_prefix): 176 | r = ref[len(tag_prefix):] 177 | if verbose: 178 | print("picking %s" % r) 179 | return {"version": r, 180 | "full-revisionid": keywords["full"].strip(), 181 | "dirty": False, "error": None 182 | } 183 | # no suitable tags, so version is "0+unknown", but full hex is still there 184 | if verbose: 185 | print("no suitable tags, using unknown + full revision id") 186 | return {"version": "0+unknown", 187 | "full-revisionid": keywords["full"].strip(), 188 | "dirty": False, "error": "no suitable tags"} 189 | 190 | 191 | @register_vcs_handler("git", "pieces_from_vcs") 192 | def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): 193 | """Get version from 'git describe' in the root of the source tree. 194 | 195 | This only gets called if the git-archive 'subst' keywords were *not* 196 | expanded, and _version.py hasn't already been rewritten with a short 197 | version string, meaning we're inside a checked out source tree. 198 | """ 199 | if not os.path.exists(os.path.join(root, ".git")): 200 | if verbose: 201 | print("no .git in %s" % root) 202 | raise NotThisMethod("no .git directory") 203 | 204 | GITS = ["git"] 205 | if sys.platform == "win32": 206 | GITS = ["git.cmd", "git.exe"] 207 | # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty] 208 | # if there isn't one, this yields HEX[-dirty] (no NUM) 209 | describe_out = run_command(GITS, ["describe", "--tags", "--dirty", 210 | "--always", "--long", 211 | "--match", "%s*" % tag_prefix], 212 | cwd=root) 213 | # --long was added in git-1.5.5 214 | if describe_out is None: 215 | raise NotThisMethod("'git describe' failed") 216 | describe_out = describe_out.strip() 217 | full_out = run_command(GITS, ["rev-parse", "HEAD"], cwd=root) 218 | if full_out is None: 219 | raise NotThisMethod("'git rev-parse' failed") 220 | full_out = full_out.strip() 221 | 222 | pieces = {} 223 | pieces["long"] = full_out 224 | pieces["short"] = full_out[:7] # maybe improved later 225 | pieces["error"] = None 226 | 227 | # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty] 228 | # TAG might have hyphens. 229 | git_describe = describe_out 230 | 231 | # look for -dirty suffix 232 | dirty = git_describe.endswith("-dirty") 233 | pieces["dirty"] = dirty 234 | if dirty: 235 | git_describe = git_describe[:git_describe.rindex("-dirty")] 236 | 237 | # now we have TAG-NUM-gHEX or HEX 238 | 239 | if "-" in git_describe: 240 | # TAG-NUM-gHEX 241 | mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe) 242 | if not mo: 243 | # unparseable. Maybe git-describe is misbehaving? 244 | pieces["error"] = ("unable to parse git-describe output: '%s'" 245 | % describe_out) 246 | return pieces 247 | 248 | # tag 249 | full_tag = mo.group(1) 250 | if not full_tag.startswith(tag_prefix): 251 | if verbose: 252 | fmt = "tag '%s' doesn't start with prefix '%s'" 253 | print(fmt % (full_tag, tag_prefix)) 254 | pieces["error"] = ("tag '%s' doesn't start with prefix '%s'" 255 | % (full_tag, tag_prefix)) 256 | return pieces 257 | pieces["closest-tag"] = full_tag[len(tag_prefix):] 258 | 259 | # distance: number of commits since tag 260 | pieces["distance"] = int(mo.group(2)) 261 | 262 | # commit: short hex revision ID 263 | pieces["short"] = mo.group(3) 264 | 265 | else: 266 | # HEX: no tags 267 | pieces["closest-tag"] = None 268 | count_out = run_command(GITS, ["rev-list", "HEAD", "--count"], 269 | cwd=root) 270 | pieces["distance"] = int(count_out) # total number of commits 271 | 272 | return pieces 273 | 274 | 275 | def plus_or_dot(pieces): 276 | """Return a + if we don't already have one, else return a .""" 277 | if "+" in pieces.get("closest-tag", ""): 278 | return "." 279 | return "+" 280 | 281 | 282 | def render_pep440(pieces): 283 | """Build up version string, with post-release "local version identifier". 284 | 285 | Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you 286 | get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty 287 | 288 | Exceptions: 289 | 1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty] 290 | """ 291 | if pieces["closest-tag"]: 292 | rendered = pieces["closest-tag"] 293 | if pieces["distance"] or pieces["dirty"]: 294 | rendered += plus_or_dot(pieces) 295 | rendered += "%d.g%s" % (pieces["distance"], pieces["short"]) 296 | if pieces["dirty"]: 297 | rendered += ".dirty" 298 | else: 299 | # exception #1 300 | rendered = "0+untagged.%d.g%s" % (pieces["distance"], 301 | pieces["short"]) 302 | if pieces["dirty"]: 303 | rendered += ".dirty" 304 | return rendered 305 | 306 | 307 | def render_pep440_pre(pieces): 308 | """TAG[.post.devDISTANCE] -- No -dirty. 309 | 310 | Exceptions: 311 | 1: no tags. 0.post.devDISTANCE 312 | """ 313 | if pieces["closest-tag"]: 314 | rendered = pieces["closest-tag"] 315 | if pieces["distance"]: 316 | rendered += ".post.dev%d" % pieces["distance"] 317 | else: 318 | # exception #1 319 | rendered = "0.post.dev%d" % pieces["distance"] 320 | return rendered 321 | 322 | 323 | def render_pep440_post(pieces): 324 | """TAG[.postDISTANCE[.dev0]+gHEX] . 325 | 326 | The ".dev0" means dirty. Note that .dev0 sorts backwards 327 | (a dirty tree will appear "older" than the corresponding clean one), 328 | but you shouldn't be releasing software with -dirty anyways. 329 | 330 | Exceptions: 331 | 1: no tags. 0.postDISTANCE[.dev0] 332 | """ 333 | if pieces["closest-tag"]: 334 | rendered = pieces["closest-tag"] 335 | if pieces["distance"] or pieces["dirty"]: 336 | rendered += ".post%d" % pieces["distance"] 337 | if pieces["dirty"]: 338 | rendered += ".dev0" 339 | rendered += plus_or_dot(pieces) 340 | rendered += "g%s" % pieces["short"] 341 | else: 342 | # exception #1 343 | rendered = "0.post%d" % pieces["distance"] 344 | if pieces["dirty"]: 345 | rendered += ".dev0" 346 | rendered += "+g%s" % pieces["short"] 347 | return rendered 348 | 349 | 350 | def render_pep440_old(pieces): 351 | """TAG[.postDISTANCE[.dev0]] . 352 | 353 | The ".dev0" means dirty. 354 | 355 | Eexceptions: 356 | 1: no tags. 0.postDISTANCE[.dev0] 357 | """ 358 | if pieces["closest-tag"]: 359 | rendered = pieces["closest-tag"] 360 | if pieces["distance"] or pieces["dirty"]: 361 | rendered += ".post%d" % pieces["distance"] 362 | if pieces["dirty"]: 363 | rendered += ".dev0" 364 | else: 365 | # exception #1 366 | rendered = "0.post%d" % pieces["distance"] 367 | if pieces["dirty"]: 368 | rendered += ".dev0" 369 | return rendered 370 | 371 | 372 | def render_git_describe(pieces): 373 | """TAG[-DISTANCE-gHEX][-dirty]. 374 | 375 | Like 'git describe --tags --dirty --always'. 376 | 377 | Exceptions: 378 | 1: no tags. HEX[-dirty] (note: no 'g' prefix) 379 | """ 380 | if pieces["closest-tag"]: 381 | rendered = pieces["closest-tag"] 382 | if pieces["distance"]: 383 | rendered += "-%d-g%s" % (pieces["distance"], pieces["short"]) 384 | else: 385 | # exception #1 386 | rendered = pieces["short"] 387 | if pieces["dirty"]: 388 | rendered += "-dirty" 389 | return rendered 390 | 391 | 392 | def render_git_describe_long(pieces): 393 | """TAG-DISTANCE-gHEX[-dirty]. 394 | 395 | Like 'git describe --tags --dirty --always -long'. 396 | The distance/hash is unconditional. 397 | 398 | Exceptions: 399 | 1: no tags. HEX[-dirty] (note: no 'g' prefix) 400 | """ 401 | if pieces["closest-tag"]: 402 | rendered = pieces["closest-tag"] 403 | rendered += "-%d-g%s" % (pieces["distance"], pieces["short"]) 404 | else: 405 | # exception #1 406 | rendered = pieces["short"] 407 | if pieces["dirty"]: 408 | rendered += "-dirty" 409 | return rendered 410 | 411 | 412 | def render(pieces, style): 413 | """Render the given version pieces into the requested style.""" 414 | if pieces["error"]: 415 | return {"version": "unknown", 416 | "full-revisionid": pieces.get("long"), 417 | "dirty": None, 418 | "error": pieces["error"]} 419 | 420 | if not style or style == "default": 421 | style = "pep440" # the default 422 | 423 | if style == "pep440": 424 | rendered = render_pep440(pieces) 425 | elif style == "pep440-pre": 426 | rendered = render_pep440_pre(pieces) 427 | elif style == "pep440-post": 428 | rendered = render_pep440_post(pieces) 429 | elif style == "pep440-old": 430 | rendered = render_pep440_old(pieces) 431 | elif style == "git-describe": 432 | rendered = render_git_describe(pieces) 433 | elif style == "git-describe-long": 434 | rendered = render_git_describe_long(pieces) 435 | else: 436 | raise ValueError("unknown style '%s'" % style) 437 | 438 | return {"version": rendered, "full-revisionid": pieces["long"], 439 | "dirty": pieces["dirty"], "error": None} 440 | 441 | 442 | def get_versions(): 443 | """Get version information or return default if unable to do so.""" 444 | # I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have 445 | # __file__, we can work backwards from there to the root. Some 446 | # py2exe/bbfreeze/non-CPython implementations don't do __file__, in which 447 | # case we can only use expanded keywords. 448 | 449 | cfg = get_config() 450 | verbose = cfg.verbose 451 | 452 | try: 453 | return git_versions_from_keywords(get_keywords(), cfg.tag_prefix, 454 | verbose) 455 | except NotThisMethod: 456 | pass 457 | 458 | try: 459 | root = os.path.realpath(__file__) 460 | # versionfile_source is the relative path from the top of the source 461 | # tree (where the .git directory might live) to this file. Invert 462 | # this to find the root from __file__. 463 | for i in cfg.versionfile_source.split('/'): 464 | root = os.path.dirname(root) 465 | except NameError: 466 | return {"version": "0+unknown", "full-revisionid": None, 467 | "dirty": None, 468 | "error": "unable to find root of source tree"} 469 | 470 | try: 471 | pieces = git_pieces_from_vcs(cfg.tag_prefix, root, verbose) 472 | return render(pieces, cfg.style) 473 | except NotThisMethod: 474 | pass 475 | 476 | try: 477 | if cfg.parentdir_prefix: 478 | return versions_from_parentdir(cfg.parentdir_prefix, root, verbose) 479 | except NotThisMethod: 480 | pass 481 | 482 | return {"version": "0+unknown", "full-revisionid": None, 483 | "dirty": None, 484 | "error": "unable to compute version"} 485 | -------------------------------------------------------------------------------- /tests/test_stitcher.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | import uuid 4 | import shutil 5 | import datetime 6 | from textwrap import dedent 7 | 8 | import pytest 9 | import pypandoc 10 | from traitlets import TraitError 11 | 12 | import stitch.stitch as R 13 | from stitch.cli import enhance_args, CSS 14 | 15 | 16 | HERE = os.path.dirname(__file__) 17 | 18 | 19 | @pytest.fixture(scope='module') 20 | def global_python_kernel(): 21 | """ 22 | A python kernel anyone can use. 23 | """ 24 | return R.kernel_factory('python') 25 | 26 | 27 | @pytest.fixture(scope='function') 28 | def clean_python_kernel(global_python_kernel): 29 | """ 30 | Takes ``global_python_kernel`` and resets all variables, 31 | returning the clean kernel. 32 | """ 33 | R.run_code('%reset -f', global_python_kernel) 34 | return global_python_kernel 35 | 36 | 37 | @pytest.fixture 38 | def clean_name(): 39 | name = str(uuid.uuid1()) 40 | yield name 41 | shutil.rmtree(name + '_files') 42 | 43 | 44 | @pytest.fixture 45 | def clean_stdout(): 46 | yield 47 | shutil.rmtree('std_out_files') 48 | 49 | 50 | @pytest.fixture 51 | def document_path(): 52 | "Path to a markdown document" 53 | return os.path.join(HERE, 'data', 'small.md') 54 | 55 | 56 | @pytest.fixture 57 | def document(): 58 | "In-memory markdown document" 59 | with open(os.path.join(HERE, 'data', 'small.md')) as f: 60 | doc = f.read() 61 | return doc 62 | 63 | 64 | @pytest.fixture 65 | def as_json(document): 66 | "JSON representation of the markdown document" 67 | return json.loads(pypandoc.convert_text(document, 'json', 68 | format='markdown')) 69 | 70 | 71 | @pytest.fixture(params=['python', 'R'], ids=['python', 'R']) 72 | def code_block(request): 73 | if request.param == 'python': 74 | code = 'def f(x):\n return x * 2\n\nf(2)' 75 | elif request.param == 'R': 76 | code = 'f <- function(x){\n return(x * 2)\n}\n\nf(2)' 77 | block = {'t': 'CodeBlock', 78 | 'c': [['', ['{}'.format(request.param)], []], 79 | code]} 80 | return block 81 | 82 | 83 | @pytest.fixture 84 | def python_kp(): 85 | return R.kernel_factory('python') 86 | 87 | 88 | class TestTesters: 89 | 90 | @pytest.mark.parametrize('block, expected', [ 91 | ({'t': 'CodeBlock', 92 | 'c': [['', ['{python}'], []], 93 | 'def f(x):\n return x * 2\n\nf(2)']}, True), 94 | ({'c': [{'c': 'With', 't': 'Str'}, 95 | {'c': [], 't': 'Space'}, 96 | {'c': 'options', 't': 'Str'}], 't': 'Para'}, False), 97 | ]) 98 | def test_is_code_block(self, block, expected): 99 | result = R.is_code_block(block) 100 | assert result == expected 101 | 102 | @pytest.mark.parametrize('output, attrs, expected', [ 103 | ([], {}, False), 104 | ([None], {}, False), 105 | ([{'text/plain': '4'}], {}, True), 106 | ([{'text/plain': '4'}], {'results': 'hide'}, False), 107 | ]) 108 | def test_is_stitchable(self, output, attrs, expected): 109 | result = R.is_stitchable(output, attrs) 110 | assert result == expected 111 | 112 | @pytest.mark.parametrize('block, lang, attrs, expected', [ 113 | ({'t': 'CodeBlock', 114 | 'c': [['', ['{python}'], []], 115 | 'def f(x):\n return x * 2\n\nf(2)']}, 'python', {}, True), 116 | 117 | ({'c': [{'c': 'With', 't': 'Str'}, 118 | {'c': [], 't': 'Space'}, 119 | {'c': 'options', 't': 'Str'}], 't': 'Para'}, '', {}, False), 120 | 121 | ({'t': 'CodeBlock', 122 | 'c': [['', ['{r}'], []], 123 | '2+2']}, 'r', {'eval': False}, False), 124 | ]) 125 | def test_is_executable(self, block, lang, attrs, expected): 126 | result = R.is_executable(block, lang, attrs) 127 | assert result is expected 128 | 129 | @pytest.mark.parametrize('message, expected', [ 130 | ({'content': {'name': 'stdout'}}, True), 131 | ({'content': {'name': 'stderr'}}, False), 132 | ({'content': {}}, False), 133 | ]) 134 | def test_is_stdout(self, message, expected): 135 | result = R.is_stdout(message) 136 | assert result == expected 137 | 138 | @pytest.mark.parametrize('message, expected', [ 139 | ({'content': {'name': 'stdout'}}, False), 140 | ({'content': {'name': 'stderr'}}, True), 141 | ({'content': {}}, False), 142 | ]) 143 | def test_is_stderr(selr, message, expected): 144 | result = R.is_stderr(message) 145 | assert result == expected 146 | 147 | @pytest.mark.parametrize('message, expected', [ 148 | ({'msg_type': 'execute_input'}, True), 149 | ({'msg_type': 'idle'}, False), 150 | ]) 151 | def test_is_execute_input(selr, message, expected): 152 | result = R.is_execute_input(message) 153 | assert result == expected 154 | 155 | 156 | class TestKernelArgs: 157 | 158 | @pytest.mark.parametrize('block, expected', [ 159 | ({'t': 'CodeBlock', 'c': [['', ['python'], []], 'foo']}, 'python'), 160 | ({'t': 'CodeBlock', 'c': [['', ['ir'], ['foo']], 'foo']}, 'ir'), 161 | ({'t': 'CodeBlock', 'c': [['', ['ir'], [['foo', 'bar']]], 'foo']}, 162 | 'ir'), 163 | ]) 164 | def test_extract_kernel_name(self, block, expected): 165 | result = R.extract_kernel_name(block) 166 | assert result == expected 167 | 168 | @pytest.mark.parametrize('block, lang, attrs, expected', [ 169 | ({'t': 'CodeBlock', 170 | 'c': [['', ['{python}'], []], 171 | 'def f(x):\n return x * 2\n\nf(2)']}, 'python', {}, True), 172 | 173 | ({'c': [{'c': 'With', 't': 'Str'}, 174 | {'c': [], 't': 'Space'}, 175 | {'c': 'options', 't': 'Str'}], 't': 'Para'}, '', {}, False), 176 | 177 | ({'t': 'CodeBlock', 178 | 'c': [['', ['{r}'], []], 179 | '2+2']}, 'r', {'eval': False}, False), 180 | ]) 181 | def test_is_executable(self, block, lang, attrs, expected): 182 | result = R.is_executable(block, lang, attrs) 183 | assert result is expected 184 | 185 | @pytest.mark.parametrize('code_block, expected', [ 186 | ({'c': [['', ['python'], []], '3'], 't': 'CodeBlock'}, 187 | (('python', None), {})), 188 | ({'c': [['', ['python', 'name'], []], '3'], 't': 'CodeBlock'}, 189 | (('python', 'name'), {})), 190 | ({'c': [['', ['r', 'n'], [['foo', 'bar']]], '3'], 't': 'CodeBlock'}, 191 | (('r', 'n'), {'foo': 'bar'})), 192 | ({'c': [['', [], [['foo', 'bar']]], '4'], 't': 'CodeBlock'}, 193 | ((None, None), {'foo': 'bar'})), 194 | ]) 195 | def test_parse_kernel_arguments(self, code_block, expected): 196 | result = R.parse_kernel_arguments(code_block) 197 | assert result == expected 198 | 199 | 200 | class TestFormatters: 201 | 202 | def test_format_input(self): 203 | code = '2 + 2' 204 | expected = '>>> 2 + 2' 205 | result = R.format_input_prompt('>>> ', code, None) 206 | assert result == expected 207 | 208 | def test_format_input_multi(self): 209 | code = dedent('''\ 210 | def f(x): 211 | return x''') 212 | expected = dedent('''\ 213 | >>> def f(x): 214 | >>> return x''') 215 | result = R.format_input_prompt('>>> ', code, None) 216 | assert result == expected 217 | 218 | def test_format_ipython_input(self): 219 | code = '2 + 2' 220 | expected = 'In [1]: 2 + 2' 221 | result = R.format_ipython_prompt(code, 1) 222 | assert result == expected 223 | 224 | def test_format_input_none(self): 225 | code = 'abcde' 226 | result = R.format_ipython_prompt(code, None) 227 | assert result == code 228 | 229 | def test_format_ipython_input_multi(self): 230 | code = dedent('''\ 231 | def f(x): 232 | return x + 2 233 | 234 | f(2) 235 | ''').strip() 236 | expected = dedent('''\ 237 | In [10]: def f(x): 238 | ...: return x + 2 239 | ...: 240 | ...: f(2) 241 | ''').strip() 242 | result = R.format_ipython_prompt(code, 10) 243 | assert result == expected 244 | 245 | def test_wrap_input__code(self): 246 | block = {'t': 'code', 'c': ['a', ['b'], 'c']} 247 | result = R.wrap_input_code(block, True, None, None) 248 | assert block is not result 249 | 250 | @pytest.mark.parametrize('messages,expected', [ 251 | ([{'content': {'data': {}, 252 | 'execution_count': 4}, 253 | 'header': {'msg_type': 'execute_result'}}], 254 | 4), 255 | 256 | ([{'content': {'execution_count': 2}, 257 | 'header': {'msg_type': 'execute_input'}}], 258 | 2), 259 | 260 | ([{'content': {'data': {'text/plain': 'foo'}}}, 261 | {'content': {'execution_count': 2}}], 262 | 2), 263 | ]) 264 | def test_extract_execution_count(self, messages, expected): 265 | assert R.extract_execution_count(messages) == expected 266 | 267 | @pytest.mark.parametrize('output, message, expected', [ 268 | ([{'text/plain': '2'}], 269 | {'content': {'execution_count': '1'}}, 270 | {'t': 'Div', 'c': (['', ['output'], []], 271 | [{'t': 'Para', 272 | 'c': [{'t': 'Str', 273 | 'c': 'Out[1]: 2'}]}])}), 274 | ]) 275 | @pytest.mark.xfail 276 | def test_wrap_output(self, output, message, expected): 277 | result = R.wrap_output(output, message) 278 | assert result == expected 279 | 280 | 281 | @pytest.mark.slow 282 | class TestIntegration: 283 | 284 | def test_from_file(self, document_path, clean_stdout): 285 | R.convert_file(document_path, 'html') 286 | 287 | def test_from_source(self, document, clean_stdout): 288 | R.convert(document, 'html') 289 | 290 | @pytest.mark.parametrize("to, value", [ 291 | ("html", "data:image/png;base64,"), 292 | ("pdf", 'unnamed_chunk_0'), # TODO: chunk name 293 | ]) 294 | def test_image(self, to, value, global_python_kernel): 295 | code = dedent('''\ 296 | ```{python} 297 | %matplotlib inline 298 | import matplotlib.pyplot as plt 299 | plt.plot(range(4), range(4)) 300 | plt.title('Foo — Bar'); # That's an em dash 301 | ``` 302 | ''') 303 | result = R.Stitch('foo', to=to).stitch(code) 304 | blocks = result['blocks'] 305 | assert blocks[1]['c'][0]['t'] == 'Image' 306 | 307 | def test_image_chunkname(self): 308 | code = dedent('''\ 309 | ```{python, chunk} 310 | %matplotlib inline 311 | import matplotlib.pyplot as plt 312 | plt.plot(range(4), range(4)); 313 | ``` 314 | ''') 315 | result = R.Stitch('foo', to='pdf', standalone=False).stitch(code) 316 | blocks = result['blocks'] 317 | assert 'chunk' in blocks[1]['c'][0]['c'][0][0] 318 | 319 | def test_image_attrs(self): 320 | code = dedent('''\ 321 | ```{python, chunk, fig.width=10, fig.height=10px} 322 | %matplotlib inline 323 | import matplotlib.pyplot as plt 324 | plt.plot(range(4), range(4)); 325 | ``` 326 | ''') 327 | result = R.Stitch('foo', to='html', standalone=False).stitch(code) 328 | blocks = result['blocks'] 329 | attrs = blocks[1]['c'][0]['c'][0][2] 330 | assert ('width', '10') in attrs 331 | assert ('height', '10px') in attrs 332 | 333 | def test_image_no_self_contained(self, clean_python_kernel, clean_name): 334 | code = dedent('''\ 335 | ```{python} 336 | %matplotlib inline 337 | import matplotlib.pyplot as plt 338 | plt.plot(range(4)) 339 | ``` 340 | ''') 341 | s = R.Stitch(clean_name, self_contained=False) 342 | s._kernel_pairs['python'] = clean_python_kernel 343 | result = s.stitch(code) 344 | blocks = result['blocks'] 345 | expected = '{}_files/unnamed_chunk_0.png'.format(clean_name) 346 | result = blocks[-1]['c'][0]['c'][2][0] 347 | assert result == expected 348 | 349 | @pytest.mark.parametrize('fmt', ['png', 'svg', 'pdf']) 350 | def test_image_no_self_contained_formats(self, clean_python_kernel, 351 | clean_name, fmt): 352 | code = dedent('''\ 353 | ```{{python}} 354 | %matplotlib inline 355 | from IPython.display import set_matplotlib_formats 356 | import numpy as np 357 | import matplotlib.pyplot as plt 358 | set_matplotlib_formats('{fmt}') 359 | 360 | x = np.linspace(-np.pi / 2, np.pi / 2) 361 | plt.plot(x, np.sin(x)) 362 | plt.plot(x, np.cos(x)) 363 | ``` 364 | ''').format(fmt=fmt) 365 | s = R.Stitch(clean_name, self_contained=False) 366 | s._kernel_pairs['python'] = clean_python_kernel 367 | s.stitch(code) 368 | expected = os.path.join(clean_name + '_files', 369 | 'unnamed_chunk_0.' + fmt) 370 | assert os.path.exists(expected) 371 | 372 | @pytest.mark.parametrize('warning, length', [ 373 | (True, 3), 374 | (False, 2), 375 | ]) 376 | def test_warning(self, clean_python_kernel, warning, length): 377 | code = dedent('''\ 378 | ```{python} 379 | import warnings 380 | warnings.warn("Hi") 381 | 2 382 | ``` 383 | ''') 384 | r = R.Stitch('foo', to='html', warning=warning) 385 | r._kernel_pairs['python'] = clean_python_kernel 386 | result = r.stitch(code) 387 | assert len(result['blocks']) == length 388 | 389 | @pytest.mark.parametrize('to', ['latex', 'beamer']) 390 | def test_rich_output(self, to, clean_python_kernel): 391 | code = dedent('''\ 392 | ```{python} 393 | import pandas as pd 394 | pd.options.display.latex.repr = True 395 | pd.DataFrame({'a': [1, 2]}) 396 | ``` 397 | ''') 398 | stitch = R.Stitch('foo', to, ) 399 | stitch._kernel_pairs['python'] = clean_python_kernel 400 | blocks = stitch.stitch(code)['blocks'] 401 | result = blocks[1]['c'][1] 402 | assert '\\begin{tabular}' in result 403 | 404 | def test_error_raises(self): 405 | s = R.Stitch('', error='raise') 406 | code = dedent('''\ 407 | ```{python} 408 | 1 / 0 409 | ``` 410 | ''') 411 | with pytest.raises(R.StitchError): 412 | s.stitch(code) 413 | 414 | s.error = 'continue' 415 | s.stitch(code) 416 | 417 | @pytest.mark.parametrize('to', [ 418 | 'html', 'pdf', 'latex', 'docx', 419 | ]) 420 | def test_ipython_display(self, clean_python_kernel, to): 421 | s = R.Stitch('', to=to) 422 | code = dedent('''\ 423 | from IPython import display 424 | import math 425 | display.Markdown("$\\alpha^{pi:1.3f}$".format(pi=math.pi)) 426 | ''') 427 | messages = R.run_code(code, clean_python_kernel) 428 | wrapped = s.wrap_output('', messages, None)[0] 429 | assert wrapped['t'] == 'Para' 430 | assert wrapped['c'][0]['c'][0]['t'] == 'InlineMath' 431 | 432 | 433 | class TestCLI: 434 | 435 | @pytest.mark.parametrize('expected, no_standalone, extra_args', [ 436 | (True, False, []), 437 | (True, False, ['--standalone']), 438 | (True, False, ['-s']), 439 | (False, True, []), 440 | ]) 441 | def test_standalone(self, expected, no_standalone, extra_args): 442 | args = enhance_args('', no_standalone, False, extra_args) 443 | result = '--standalone' in args or '-s' in args 444 | assert result is expected 445 | 446 | @pytest.mark.parametrize('expected, no_self_contained, extra_args', [ 447 | (True, False, []), 448 | (True, False, ['--self-contained']), 449 | (False, True, []), 450 | ]) 451 | def test_self_contained(self, expected, no_self_contained, extra_args): 452 | args = enhance_args('', False, no_self_contained, extra_args) 453 | result = '--self-contained' in args 454 | assert result is expected 455 | 456 | @pytest.mark.parametrize('expected, to, extra_args', [ 457 | (['--css=%s' % CSS], 'html', []), 458 | (['-s', '--css=%s' % CSS], 'html', ['-s']), 459 | (['--css=foo.css'], 'html', ['--css=foo.css']), 460 | (['-c', 'foo.css'], 'html', ['-c', 'foo.css']), 461 | ]) 462 | def test_css(self, expected, to, extra_args): 463 | result = enhance_args(to, True, True, extra_args) 464 | assert result == expected 465 | 466 | 467 | @pytest.mark.slow 468 | class TestKernel: 469 | 470 | def test_init_python_pre(self): 471 | kp = R.kernel_factory('python') 472 | result = R.run_code( 473 | 'import pandas; assert pandas.options.display.latex.repr is False', 474 | kp) 475 | assert len(result) == 1 476 | 477 | def test_init_python_latex(self, clean_python_kernel): 478 | R.initialize_kernel('python', clean_python_kernel) 479 | result = R.run_code( 480 | 'assert pandas.options.display.latex.repr is False', 481 | clean_python_kernel 482 | ) 483 | assert len(result) == 2 484 | 485 | 486 | class TestStitcher: 487 | 488 | def test_error(self): 489 | s = R.Stitch('') 490 | assert s.error == 'continue' 491 | s.error = 'raise' 492 | assert s.error == 'raise' 493 | 494 | with pytest.raises(TraitError): 495 | s.error = 'foo' 496 | 497 | def test_getattr(self): 498 | s = R.Stitch('') 499 | assert getattr(s, 'fig.width') is None 500 | assert s.fig.width is None 501 | with pytest.raises(AttributeError): 502 | assert getattr(s, 'foo.bar') 503 | 504 | with pytest.raises(AttributeError): 505 | assert getattr(s, 'foo') 506 | 507 | def test_has_trait(self): 508 | s = R.Stitch('') 509 | assert s.has_trait('fig.width') 510 | assert not s.has_trait('fake.width') 511 | assert not s.has_trait('fig.fake') 512 | 513 | def test_empty_message(): 514 | # GH 52 515 | messages = [ 516 | {'parent_header': { 517 | 'username': 't', 'session': 'a', 518 | 'msg_type': 'execute_request', 'msg_id': '3', 519 | 'date': datetime.datetime(2016, 9, 27, 7, 20, 13, 790481), 520 | 'version': '5.0' 521 | }, 'metadata': {}, 'buffers': [], 'msg_type': 'display_data', 522 | 'header': {'username': 't', 'session': 'a', 523 | 'msg_type': 'display_data', 'version': '5.0', 524 | 'date': '2016-09-27T07:20:17.461893', 525 | 'msg_id': '6'}, 526 | 'content': {'metadata': {}, 'data': {}}, 'msg_id': '6'} 527 | ] 528 | s = R.Stitch('foo') 529 | result = s.wrap_output('bar', messages, {}) 530 | assert result == [] 531 | -------------------------------------------------------------------------------- /stitch/static/default.css: -------------------------------------------------------------------------------- 1 | /*! normalize.css v2.1.3 | MIT License | git.io/normalize */ 2 | 3 | /* ========================================================================== 4 | HTML5 display definitions 5 | ========================================================================== */ 6 | 7 | /** 8 | * Correct `block` display not defined in IE 8/9. 9 | */ 10 | 11 | article, 12 | aside, 13 | details, 14 | figcaption, 15 | figure, 16 | footer, 17 | header, 18 | hgroup, 19 | main, 20 | nav, 21 | section, 22 | summary { 23 | display: block; 24 | } 25 | 26 | /** 27 | * Correct `inline-block` display not defined in IE 8/9. 28 | */ 29 | 30 | audio, 31 | canvas, 32 | video { 33 | display: inline-block; 34 | } 35 | 36 | /** 37 | * Prevent modern browsers from displaying `audio` without controls. 38 | * Remove excess height in iOS 5 devices. 39 | */ 40 | 41 | audio:not([controls]) { 42 | display: none; 43 | height: 0; 44 | } 45 | 46 | /** 47 | * Address `[hidden]` styling not present in IE 8/9. 48 | * Hide the `template` element in IE, Safari, and Firefox < 22. 49 | */ 50 | 51 | [hidden], 52 | template { 53 | display: none; 54 | } 55 | 56 | /* ========================================================================== 57 | Base 58 | ========================================================================== */ 59 | 60 | /** 61 | * 1. Set default font family to sans-serif. 62 | * 2. Prevent iOS text size adjust after orientation change, without disabling 63 | * user zoom. 64 | */ 65 | 66 | html { 67 | font-family: sans-serif; /* 1 */ 68 | -ms-text-size-adjust: 100%; /* 2 */ 69 | -webkit-text-size-adjust: 100%; /* 2 */ 70 | } 71 | 72 | /** 73 | * Remove default margin. 74 | */ 75 | 76 | body { 77 | margin: 0; 78 | } 79 | 80 | /* ========================================================================== 81 | Links 82 | ========================================================================== */ 83 | 84 | /** 85 | * Remove the gray background color from active links in IE 10. 86 | */ 87 | 88 | a { 89 | background: transparent; 90 | } 91 | 92 | /** 93 | * Address `outline` inconsistency between Chrome and other browsers. 94 | */ 95 | 96 | a:focus { 97 | outline: thin dotted; 98 | } 99 | 100 | /** 101 | * Improve readability when focused and also mouse hovered in all browsers. 102 | */ 103 | 104 | a:active, 105 | a:hover { 106 | outline: 0; 107 | } 108 | 109 | /* ========================================================================== 110 | Typography 111 | ========================================================================== */ 112 | 113 | /** 114 | * Address variable `h1` font-size and margin within `section` and `article` 115 | * contexts in Firefox 4+, Safari 5, and Chrome. 116 | */ 117 | 118 | h1 { 119 | font-size: 2em; 120 | margin: 0.67em 0; 121 | } 122 | 123 | /** 124 | * Address styling not present in IE 8/9, Safari 5, and Chrome. 125 | */ 126 | 127 | abbr[title] { 128 | border-bottom: 1px dotted; 129 | } 130 | 131 | /** 132 | * Address style set to `bolder` in Firefox 4+, Safari 5, and Chrome. 133 | */ 134 | 135 | b, 136 | strong { 137 | font-weight: bold; 138 | } 139 | 140 | /** 141 | * Address styling not present in Safari 5 and Chrome. 142 | */ 143 | 144 | dfn { 145 | font-style: italic; 146 | } 147 | 148 | /** 149 | * Address differences between Firefox and other browsers. 150 | */ 151 | 152 | hr { 153 | -moz-box-sizing: content-box; 154 | box-sizing: content-box; 155 | height: 0; 156 | } 157 | 158 | /** 159 | * Address styling not present in IE 8/9. 160 | */ 161 | 162 | mark { 163 | background: #ff0; 164 | color: #000; 165 | } 166 | 167 | /** 168 | * Correct font family set oddly in Safari 5 and Chrome. 169 | */ 170 | 171 | code, 172 | kbd, 173 | pre, 174 | samp { 175 | font-family: monospace, serif; 176 | font-size: 1em; 177 | } 178 | 179 | /** 180 | * Improve readability of pre-formatted text in all browsers. 181 | */ 182 | 183 | pre { 184 | white-space: pre-wrap; 185 | } 186 | 187 | /** 188 | * Set consistent quote types. 189 | */ 190 | 191 | q { 192 | quotes: "\201C" "\201D" "\2018" "\2019"; 193 | } 194 | 195 | /** 196 | * Address inconsistent and variable font size in all browsers. 197 | */ 198 | 199 | small { 200 | font-size: 80%; 201 | } 202 | 203 | /** 204 | * Prevent `sub` and `sup` affecting `line-height` in all browsers. 205 | */ 206 | 207 | sub, 208 | sup { 209 | font-size: 75%; 210 | line-height: 0; 211 | position: relative; 212 | vertical-align: baseline; 213 | } 214 | 215 | sup { 216 | top: -0.5em; 217 | } 218 | 219 | sub { 220 | bottom: -0.25em; 221 | } 222 | 223 | /* ========================================================================== 224 | Embedded content 225 | ========================================================================== */ 226 | 227 | /** 228 | * Remove border when inside `a` element in IE 8/9. 229 | */ 230 | 231 | img { 232 | border: 0; 233 | } 234 | 235 | /** 236 | * Correct overflow displayed oddly in IE 9. 237 | */ 238 | 239 | svg:not(:root) { 240 | overflow: hidden; 241 | } 242 | 243 | /* ========================================================================== 244 | Figures 245 | ========================================================================== */ 246 | 247 | /** 248 | * Address margin not present in IE 8/9 and Safari 5. 249 | */ 250 | 251 | figure { 252 | margin: 0; 253 | } 254 | 255 | /* ========================================================================== 256 | Forms 257 | ========================================================================== */ 258 | 259 | /** 260 | * Define consistent border, margin, and padding. 261 | */ 262 | 263 | fieldset { 264 | border: 1px solid #c0c0c0; 265 | margin: 0 2px; 266 | padding: 0.35em 0.625em 0.75em; 267 | } 268 | 269 | /** 270 | * 1. Correct `color` not being inherited in IE 8/9. 271 | * 2. Remove padding so people aren't caught out if they zero out fieldsets. 272 | */ 273 | 274 | legend { 275 | border: 0; /* 1 */ 276 | padding: 0; /* 2 */ 277 | } 278 | 279 | /** 280 | * 1. Correct font family not being inherited in all browsers. 281 | * 2. Correct font size not being inherited in all browsers. 282 | * 3. Address margins set differently in Firefox 4+, Safari 5, and Chrome. 283 | */ 284 | 285 | button, 286 | input, 287 | select, 288 | textarea { 289 | font-family: inherit; /* 1 */ 290 | font-size: 100%; /* 2 */ 291 | margin: 0; /* 3 */ 292 | } 293 | 294 | /** 295 | * Address Firefox 4+ setting `line-height` on `input` using `!important` in 296 | * the UA stylesheet. 297 | */ 298 | 299 | button, 300 | input { 301 | line-height: normal; 302 | } 303 | 304 | /** 305 | * Address inconsistent `text-transform` inheritance for `button` and `select`. 306 | * All other form control elements do not inherit `text-transform` values. 307 | * Correct `button` style inheritance in Chrome, Safari 5+, and IE 8+. 308 | * Correct `select` style inheritance in Firefox 4+ and Opera. 309 | */ 310 | 311 | button, 312 | select { 313 | text-transform: none; 314 | } 315 | 316 | /** 317 | * 1. Avoid the WebKit bug in Android 4.0.* where (2) destroys native `audio` 318 | * and `video` controls. 319 | * 2. Correct inability to style clickable `input` types in iOS. 320 | * 3. Improve usability and consistency of cursor style between image-type 321 | * `input` and others. 322 | */ 323 | 324 | button, 325 | html input[type="button"], /* 1 */ 326 | input[type="reset"], 327 | input[type="submit"] { 328 | -webkit-appearance: button; /* 2 */ 329 | cursor: pointer; /* 3 */ 330 | } 331 | 332 | /** 333 | * Re-set default cursor for disabled elements. 334 | */ 335 | 336 | button[disabled], 337 | html input[disabled] { 338 | cursor: default; 339 | } 340 | 341 | /** 342 | * 1. Address box sizing set to `content-box` in IE 8/9/10. 343 | * 2. Remove excess padding in IE 8/9/10. 344 | */ 345 | 346 | input[type="checkbox"], 347 | input[type="radio"] { 348 | box-sizing: border-box; /* 1 */ 349 | padding: 0; /* 2 */ 350 | } 351 | 352 | /** 353 | * 1. Address `appearance` set to `searchfield` in Safari 5 and Chrome. 354 | * 2. Address `box-sizing` set to `border-box` in Safari 5 and Chrome 355 | * (include `-moz` to future-proof). 356 | */ 357 | 358 | input[type="search"] { 359 | -webkit-appearance: textfield; /* 1 */ 360 | -moz-box-sizing: content-box; 361 | -webkit-box-sizing: content-box; /* 2 */ 362 | box-sizing: content-box; 363 | } 364 | 365 | /** 366 | * Remove inner padding and search cancel button in Safari 5 and Chrome 367 | * on OS X. 368 | */ 369 | 370 | input[type="search"]::-webkit-search-cancel-button, 371 | input[type="search"]::-webkit-search-decoration { 372 | -webkit-appearance: none; 373 | } 374 | 375 | /** 376 | * Remove inner padding and border in Firefox 4+. 377 | */ 378 | 379 | button::-moz-focus-inner, 380 | input::-moz-focus-inner { 381 | border: 0; 382 | padding: 0; 383 | } 384 | 385 | /** 386 | * 1. Remove default vertical scrollbar in IE 8/9. 387 | * 2. Improve readability and alignment in all browsers. 388 | */ 389 | 390 | textarea { 391 | overflow: auto; /* 1 */ 392 | vertical-align: top; /* 2 */ 393 | } 394 | 395 | /* ========================================================================== 396 | Tables 397 | ========================================================================== */ 398 | 399 | /** 400 | * Remove most spacing between table cells. 401 | */ 402 | 403 | table { 404 | border-collapse: collapse; 405 | border-spacing: 0; 406 | } 407 | 408 | .go-top { 409 | position: fixed; 410 | bottom: 2em; 411 | right: 2em; 412 | text-decoration: none; 413 | background-color: #E0E0E0; 414 | font-size: 12px; 415 | padding: 1em; 416 | display: inline; 417 | } 418 | 419 | /* Github css */ 420 | 421 | html,body{ margin: auto; 422 | padding-right: 1em; 423 | padding-left: 1em; 424 | max-width: 44em; color:black;}*:not('#mkdbuttons'){margin:0;padding:0}body{font:13.34px helvetica,arial,freesans,clean,sans-serif;-webkit-font-smoothing:subpixel-antialiased;line-height:1.4;padding:3px;background:#fff;border-radius:3px;-moz-border-radius:3px;-webkit-border-radius:3px}p{margin:1em 0}a{color:#4183c4;text-decoration:none}body{background-color:#fff;padding:30px;margin:15px;font-size:14px;line-height:1.6}body>*:first-child{margin-top:0!important}body>*:last-child{margin-bottom:0!important}@media screen{body{box-shadow:0 0 0 1px #cacaca,0 0 0 4px #eee}}h1,h2,h3,h4,h5,h6{margin:20px 0 10px;padding:0;font-weight:bold;-webkit-font-smoothing:subpixel-antialiased;cursor:text}h1{font-size:28px;color:#000}h2{font-size:24px;border-bottom:1px solid #ccc;color:#000}h3{font-size:18px;color:#333}h4{font-size:16px;color:#333}h5{font-size:14px;color:#333}h6{color:#777;font-size:14px}p,blockquote,table,pre{margin:15px 0}ul{padding-left:30px}ol{padding-left:30px}ol li ul:first-of-type{margin-top:0}hr{background:transparent url() repeat-x 0 0;border:0 none;color:#ccc;height:4px;padding:0}body>h2:first-child{margin-top:0;padding-top:0}body>h1:first-child{margin-top:0;padding-top:0}body>h1:first-child+h2{margin-top:0;padding-top:0}body>h3:first-child,body>h4:first-child,body>h5:first-child,body>h6:first-child{margin-top:0;padding-top:0}a:first-child h1,a:first-child h2,a:first-child h3,a:first-child h4,a:first-child h5,a:first-child h6{margin-top:0;padding-top:0}h1+p,h2+p,h3+p,h4+p,h5+p,h6+p,ul li>:first-child,ol li>:first-child{margin-top:0}dl{padding:0}dl dt{font-size:14px;font-weight:bold;font-style:italic;padding:0;margin:15px 0 5px}dl dt:first-child{padding:0}dl dt>:first-child{margin-top:0}dl dt>:last-child{margin-bottom:0}dl dd{margin:0 0 15px;padding:0 15px}dl dd>:first-child{margin-top:0}dl dd>:last-child{margin-bottom:0}blockquote{border-left:4px solid #DDD;padding:0 15px;color:#777}blockquote>:first-child{margin-top:0}blockquote>:last-child{margin-bottom:0}table{border-collapse:collapse;border-spacing:0;font-size:100%;font:inherit}table th{font-weight:bold;border:1px solid #ccc;padding:6px 13px}table td{border:1px solid #ccc;padding:6px 13px}table tr{border-top:1px solid #ccc;background-color:#fff}table tr:nth-child(2n){background-color:#f8f8f8}img{max-width:100%}code,tt{margin:0 2px;padding:0 5px;white-space:nowrap;border:1px solid #eaeaea;background-color:#f8f8f8;border-radius:3px;font-family:Consolas,'Liberation Mono',Courier,monospace;font-size:12px;color:#333}pre>code{margin:0;padding:0;white-space:pre;border:0;background:transparent}.highlight pre{background-color:#f8f8f8;border:1px solid #ccc;font-size:13px;line-height:19px;overflow:auto;padding:6px 10px;border-radius:3px}pre{background-color:#f8f8f8;border:1px solid #ccc;font-size:13px;line-height:19px;overflow:auto;padding:6px 10px;border-radius:3px}pre code,pre tt{background-color:transparent;border:0}.poetry pre{font-family:Georgia,Garamond,serif!important;font-style:italic;font-size:110%!important;line-height:1.6em;display:block;margin-left:1em}.poetry pre code{font-family:Georgia,Garamond,serif!important;word-break:break-all;word-break:break-word;-webkit-hyphens:auto;-moz-hyphens:auto;hyphens:auto;white-space:pre-wrap}sup,sub,a.footnote{font-size:1.4ex;height:0;line-height:1;vertical-align:super;position:relative}sub{vertical-align:sub;top:-1px}@media print{body{background:#fff}img,pre,blockquote,table,figure{page-break-inside:avoid}body{background:#fff;border:0}code{background-color:#fff;color:#333!important;padding:0 .2em;border:1px solid #dedede}pre{background:#fff}pre code{background-color:white!important;overflow:visible}}@media screen{body.inverted{color:#eee!important;border-color:#555;box-shadow:none}.inverted body,.inverted hr .inverted p,.inverted td,.inverted li,.inverted h1,.inverted h2,.inverted h3,.inverted h4,.inverted h5,.inverted h6,.inverted th,.inverted .math,.inverted caption,.inverted dd,.inverted dt,.inverted blockquote{color:#eee!important;border-color:#555;box-shadow:none}.inverted td,.inverted th{background:#333}.inverted h2{border-color:#555}.inverted hr{border-color:#777;border-width:1px!important}::selection{background:rgba(157,193,200,0.5)}h1::selection{background-color:rgba(45,156,208,0.3)}h2::selection{background-color:rgba(90,182,224,0.3)}h3::selection,h4::selection,h5::selection,h6::selection,li::selection,ol::selection{background-color:rgba(133,201,232,0.3)}code::selection{background-color:rgba(0,0,0,0.7);color:#eee}code span::selection{background-color:rgba(0,0,0,0.7)!important;color:#eee!important}a::selection{background-color:rgba(255,230,102,0.2)}.inverted a::selection{background-color:rgba(255,230,102,0.6)}td::selection,th::selection,caption::selection{background-color:rgba(180,237,95,0.5)}.inverted{background:#0b2531;background:#252a2a}.inverted body{background:#252a2a}.inverted a{color:#acd1d5}}.highlight .c{color:#998;font-style:italic}.highlight .err{color:#a61717;background-color:#e3d2d2}.highlight .k,.highlight .o{font-weight:bold}.highlight .cm{color:#998;font-style:italic}.highlight .cp{color:#999;font-weight:bold}.highlight .c1{color:#998;font-style:italic}.highlight .cs{color:#999;font-weight:bold;font-style:italic}.highlight .gd{color:#000;background-color:#fdd}.highlight .gd .x{color:#000;background-color:#faa}.highlight .ge{font-style:italic}.highlight .gr{color:#a00}.highlight .gh{color:#999}.highlight .gi{color:#000;background-color:#dfd}.highlight .gi .x{color:#000;background-color:#afa}.highlight .go{color:#888}.highlight .gp{color:#555}.highlight .gs{font-weight:bold}.highlight .gu{color:#800080;font-weight:bold}.highlight .gt{color:#a00}.highlight .kc,.highlight .kd,.highlight .kn,.highlight .kp,.highlight .kr{font-weight:bold}.highlight .kt{color:#458;font-weight:bold}.highlight .m{color:#099}.highlight .s{color:#d14}.highlight .na{color:#008080}.highlight .nb{color:#0086b3}.highlight .nc{color:#458;font-weight:bold}.highlight .no{color:#008080}.highlight .ni{color:#800080}.highlight .ne,.highlight .nf{color:#900;font-weight:bold}.highlight .nn{color:#555}.highlight .nt{color:#000080}.highlight .nv{color:#008080}.highlight .ow{font-weight:bold}.highlight .w{color:#bbb}.highlight .mf,.highlight .mh,.highlight .mi,.highlight .mo{color:#099}.highlight .sb,.highlight .sc,.highlight .sd,.highlight .s2,.highlight .se,.highlight .sh,.highlight .si,.highlight .sx{color:#d14}.highlight .sr{color:#009926}.highlight .s1{color:#d14}.highlight .ss{color:#990073}.highlight .bp{color:#999}.highlight .vc,.highlight .vg,.highlight .vi{color:#008080}.highlight .il{color:#099}.highlight .gc{color:#999;background-color:#eaf2f5}.type-csharp .highlight .k,.type-csharp .highlight .kt{color:#00F}.type-csharp .highlight .nf{color:#000;font-weight:normal}.type-csharp .highlight .nc{color:#2b91af}.type-csharp .highlight .nn{color:#000}.type-csharp .highlight .s,.type-csharp .highlight .sc{color:#a31515} 425 | div.sourceCode { overflow-x: auto; } 426 | table.sourceCode, tr.sourceCode, td.lineNumbers, td.sourceCode { 427 | margin: 0; padding: 0; vertical-align: baseline; border: none; } 428 | table.sourceCode { width: 100%; line-height: 100%; } 429 | td.lineNumbers { text-align: right; padding-right: 4px; padding-left: 4px; color: #aaaaaa; border-right: 1px solid #aaaaaa; } 430 | td.sourceCode { padding-left: 5px; } 431 | 432 | /* https://github.com/richleland/pygments-css/blob/master/ */ 433 | 434 | .hll { background-color: #ffffcc } 435 | .c { color: #999988; font-style: italic } /* Comment */ 436 | .err { color: #a61717; background-color: #e3d2d2 } /* Error */ 437 | .k { color: #000000; font-weight: bold } /* Keyword */ 438 | .o { color: #000000; font-weight: bold } /* Operator */ 439 | .cm { color: #999988; font-style: italic } /* Comment.Multiline */ 440 | .cp { color: #999999; font-weight: bold; font-style: italic } /* Comment.Preproc */ 441 | .c1 { color: #999988; font-style: italic } /* Comment.Single */ 442 | .cs { color: #999999; font-weight: bold; font-style: italic } /* Comment.Special */ 443 | .gd { color: #000000; background-color: #ffdddd } /* Generic.Deleted */ 444 | .ge { color: #000000; font-style: italic } /* Generic.Emph */ 445 | .gr { color: #aa0000 } /* Generic.Error */ 446 | .gh { color: #999999 } /* Generic.Heading */ 447 | .gi { color: #000000; background-color: #ddffdd } /* Generic.Inserted */ 448 | .go { color: #888888 } /* Generic.Output */ 449 | .gp { color: #555555 } /* Generic.Prompt */ 450 | .gs { font-weight: bold } /* Generic.Strong */ 451 | .gu { color: #aaaaaa } /* Generic.Subheading */ 452 | .gt { color: #aa0000 } /* Generic.Traceback */ 453 | .kc { color: #000000; font-weight: bold } /* Keyword.Constant */ 454 | .kd { color: #000000; font-weight: bold } /* Keyword.Declaration */ 455 | .kn { color: #000000; font-weight: bold } /* Keyword.Namespace */ 456 | .kp { color: #000000; font-weight: bold } /* Keyword.Pseudo */ 457 | .kr { color: #000000; font-weight: bold } /* Keyword.Reserved */ 458 | .kt { color: #445588; font-weight: bold } /* Keyword.Type */ 459 | .m { color: #009999 } /* Literal.Number */ 460 | .s { color: #d01040 } /* Literal.String */ 461 | .na { color: #008080 } /* Name.Attribute */ 462 | .nb { color: #0086B3 } /* Name.Builtin */ 463 | .nc { color: #445588; font-weight: bold } /* Name.Class */ 464 | .no { color: #008080 } /* Name.Constant */ 465 | .nd { color: #3c5d5d; font-weight: bold } /* Name.Decorator */ 466 | .ni { color: #800080 } /* Name.Entity */ 467 | .ne { color: #990000; font-weight: bold } /* Name.Exception */ 468 | .nf { color: #990000; font-weight: bold } /* Name.Function */ 469 | .nl { color: #990000; font-weight: bold } /* Name.Label */ 470 | .nn { color: #555555 } /* Name.Namespace */ 471 | .nt { color: #000080 } /* Name.Tag */ 472 | .nv { color: #008080 } /* Name.Variable */ 473 | .ow { color: #000000; font-weight: bold } /* Operator.Word */ 474 | .w { color: #bbbbbb } /* Text.Whitespace */ 475 | .mf { color: #009999 } /* Literal.Number.Float */ 476 | .mh { color: #009999 } /* Literal.Number.Hex */ 477 | .mi { color: #009999 } /* Literal.Number.Integer */ 478 | .mo { color: #009999 } /* Literal.Number.Oct */ 479 | .sb { color: #d01040 } /* Literal.String.Backtick */ 480 | .sc { color: #d01040 } /* Literal.String.Char */ 481 | .sd { color: #d01040 } /* Literal.String.Doc */ 482 | .s2 { color: #d01040 } /* Literal.String.Double */ 483 | .se { color: #d01040 } /* Literal.String.Escape */ 484 | .sh { color: #d01040 } /* Literal.String.Heredoc */ 485 | .si { color: #d01040 } /* Literal.String.Interpol */ 486 | .sx { color: #d01040 } /* Literal.String.Other */ 487 | .sr { color: #009926 } /* Literal.String.Regex */ 488 | .s1 { color: #d01040 } /* Literal.String.Single */ 489 | .ss { color: #990073 } /* Literal.String.Symbol */ 490 | .bp { color: #999999 } /* Name.Builtin.Pseudo */ 491 | .vc { color: #008080 } /* Name.Variable.Class */ 492 | .vg { color: #008080 } /* Name.Variable.Global */ 493 | .vi { color: #008080 } /* Name.Variable.Instance */ 494 | .il { color: #009999 } /* Literal.Number.Integer.Long */ 495 | 496 | .bk-canvas { position: relative !important;} 497 | -------------------------------------------------------------------------------- /stitch/stitch.py: -------------------------------------------------------------------------------- 1 | """ 2 | Convert markdown files, executing code chunks and stitching 3 | in the output. 4 | """ 5 | # Adapted from knitpy and nbcovert: 6 | # Copyright (c) Jan Schulz 7 | # Copyright (c) IPython Development Team. 8 | # Distributed under the terms of the Modified BSD License. 9 | import os 10 | import re 11 | import copy 12 | import json 13 | import base64 14 | import mimetypes 15 | from collections import namedtuple 16 | from queue import Empty 17 | 18 | from traitlets import HasTraits 19 | from jupyter_client.manager import start_new_kernel 20 | from nbconvert.utils.base import NbConvertBase 21 | from pandocfilters import RawBlock, Div, CodeBlock, Image, Str, Para 22 | import pypandoc 23 | 24 | from .exc import StitchError 25 | from . import options as opt 26 | from .parser import preprocess_options 27 | 28 | DISPLAY_PRIORITY = NbConvertBase().display_data_priority 29 | CODE = 'code' 30 | CODEBLOCK = 'CodeBlock' 31 | OUTPUT_FORMATS = ['html', 'latex'] 32 | HERE = os.path.dirname(__file__) 33 | 34 | KernelPair = namedtuple("KernelPair", "km kc") 35 | CODE_CHUNK_XPR = re.compile(r'^```{\w+.*}|^```\w+') 36 | 37 | 38 | class _Fig(HasTraits): 39 | """ 40 | Sub-traitlet for fig related options. 41 | Traitlets all the way down. 42 | """ 43 | 44 | width = opt.Str(None) 45 | height = opt.Str(None) 46 | cap = opt.Str(None) 47 | 48 | # -------- 49 | # User API 50 | # -------- 51 | 52 | 53 | class Stitch(HasTraits): 54 | """ 55 | Helper class for managing the execution of a document. 56 | Stores configuration variables. 57 | 58 | Attributes 59 | ---------- 60 | to : str 61 | The output file format. Optionally inferred by the output file 62 | file extension. 63 | title : str 64 | The name of the output document. 65 | date : str 66 | author : str 67 | self_contained : bool, default True 68 | Whether to publish a self-contained document, where 69 | things like images or CSS stylesheets are inlined as ``data`` 70 | attributes. 71 | standalone : bool 72 | Whether to publish a standalone document (True) or fragment (False). 73 | Standalone documents include items like ```` elements, whereas 74 | non-standlone documents are just the ```` element. 75 | warning : bool, default True 76 | Whether to include text printed to stderr in the output 77 | error : str, default 'continue' 78 | How to handle exceptions in the executed code-chunks. 79 | prompt : str, optional 80 | String to put before each line of the input code. Defaults to 81 | IPython-style counters. If you specify ``prompt`` option for a code 82 | chunk then it would have a prompt even if ``use_prompt`` is ``False``. 83 | echo : bool, default True 84 | Whether to include the input code-chunk in the output document. 85 | eval : bool, default True 86 | Whether to execute the code-chunk. 87 | 88 | fig.width : str 89 | fig.height : str 90 | 91 | use_prompt : bool, default False 92 | Whether to use prompt. 93 | results : str, default 'default' 94 | * 'default': default behaviour 95 | * 'pandoc': same as 'default' but some Jupyter output is parsed 96 | as markdown: if the output is a stdout message that is 97 | not warning/error or if it has 'text/plain' key. 98 | * 'hide': evaluate chunk but hide results 99 | 100 | eval_default : bool, default True 101 | default 'eval' attribute for every cell 102 | 103 | 104 | Notes 105 | ----- 106 | Attirbutes can be set via the command-line, document YAML metadata, 107 | or (where appropriate) the chunk-options line. 108 | """ 109 | 110 | # Document-traits 111 | to = opt.Str('html') 112 | title = opt.Str(None) 113 | date = opt.Str(None) 114 | author = opt.Str(None) # TODO: Multiple authors... 115 | self_contained = opt.Bool(True) 116 | standalone = opt.Bool(True) 117 | use_prompt = opt.Bool(False) 118 | eval_default = opt.Bool(True) 119 | 120 | # Document or Cell 121 | warning = opt.Bool(True) 122 | error = opt.Choice({"continue", "raise"}, default_value="continue") 123 | prompt = opt.Str(None) 124 | echo = opt.Bool(True) 125 | eval = opt.Bool(True) 126 | fig = _Fig() 127 | results = opt.Choice({"pandoc", "hide", "default"}, default_value="default") 128 | 129 | def __init__(self, name: str, to: str='html', 130 | standalone: bool=True, 131 | self_contained: bool=True, 132 | warning: bool=True, 133 | error: str='continue', 134 | prompt: str=None, 135 | use_prompt: bool=False, 136 | pandoc_extra_args: list=None): 137 | """ 138 | Parameters 139 | ---------- 140 | name : str 141 | controls the directory for supporting files 142 | to : str, default ``'html'`` 143 | output format 144 | standalone : bool, default True 145 | whether to make a standalone document 146 | self_contained: bool, default True 147 | warning : bool, default True 148 | whether to include warnings (stderr) in the ouput. 149 | error : ``{"continue", "raise"}`` 150 | how to handle errors in the code being executed. 151 | prompt : str, default None 152 | use_prompt : bool, default False 153 | Whether to use prompt prefixes in code chunks 154 | pandoc_extra_args : list of str, default None 155 | Pandoc extra args for converting text from markdown 156 | to JSON AST. 157 | """ 158 | super().__init__(to=to, standalone=standalone, 159 | self_contained=self_contained, warning=warning, 160 | error=error, prompt=prompt, use_prompt=use_prompt) 161 | self._kernel_pairs = {} 162 | self.name = name 163 | self.resource_dir = self.name_resource_dir(name) 164 | self.pandoc_extra_args = pandoc_extra_args 165 | 166 | def __getattr__(self, attr): 167 | if '.' in attr: 168 | thing, attr = attr.split('.', 1) 169 | return getattr(getattr(self, thing), attr) 170 | else: 171 | return getattr(super(), attr) 172 | 173 | def has_trait(self, name): 174 | # intercepted `.`ed names for ease of use 175 | if '.' in name: 176 | ns, name = name.split('.', 1) 177 | try: 178 | accessor = getattr(self, ns) 179 | except AttributeError: 180 | return False 181 | return accessor.has_trait(name) 182 | else: 183 | return super().has_trait(name) 184 | 185 | def set_trait(self, name, value): 186 | # intercepted `.`ed names for ease of use 187 | if '.' in name: 188 | ns, name = name.split('.', 1) 189 | accessor = getattr(self, ns) 190 | return accessor.set_trait(name, value) 191 | else: 192 | return super().set_trait(name, value) 193 | 194 | @staticmethod 195 | def name_resource_dir(name): 196 | """ 197 | Give the directory name for supporting resources 198 | """ 199 | return '{}_files'.format(name) 200 | 201 | @property 202 | def kernel_managers(self): 203 | """ 204 | dict of KernelManager, KernelClient pairs, keyed by 205 | kernel name. 206 | """ 207 | return self._kernel_pairs 208 | 209 | def get_kernel(self, kernel_name): 210 | """ 211 | Get a kernel from ``kernel_managers`` by ``kernel_name``, 212 | creating it if needed. 213 | 214 | Parameters 215 | ---------- 216 | kernel_name : str 217 | 218 | Returns 219 | ------- 220 | kp : KernelPair 221 | """ 222 | kp = self.kernel_managers.get(kernel_name) 223 | if not kp: 224 | kp = kernel_factory(kernel_name) 225 | initialize_kernel(kernel_name, kp) 226 | self.kernel_managers[kernel_name] = kp 227 | return kp 228 | 229 | def get_option(self, option, attrs=None): 230 | if attrs is None: 231 | attrs = {} 232 | return attrs.get(option, getattr(self, option)) 233 | 234 | def parse_document_options(self, meta): 235 | """ 236 | Modifies self to update options, depending on the document. 237 | """ 238 | for attr, val in meta.items(): 239 | if self.has_trait(attr): 240 | self.set_trait(attr, val) 241 | 242 | def stitch(self, source: str) -> dict: 243 | """ 244 | Wrapper around ``stitch_ast`` method that preprocesses 245 | source code to allow Stitch-style code blocks and 246 | then convert to loaded Pandoc JSON AST. 247 | 248 | Parameters 249 | ---------- 250 | source : str 251 | the actual text to be converted 252 | 253 | Returns 254 | ------- 255 | doc : dict 256 | """ 257 | source = preprocess(source) 258 | ast = tokenize(source) 259 | return self.stitch_ast(ast) 260 | 261 | def stitch_ast(self, ast: dict) -> dict: 262 | """ 263 | Main method for converting a document. 264 | 265 | Parameters 266 | ---------- 267 | ast : dict 268 | Loaded Pandoc JSON AST 269 | 270 | Returns 271 | ------- 272 | doc : dict 273 | These should be compatible with pando's JSON AST 274 | It's a dict with keys 275 | - pandoc-api-version 276 | - meta 277 | - blocks 278 | """ 279 | version = ast['pandoc-api-version'] 280 | meta = ast['meta'] 281 | blocks = ast['blocks'] 282 | 283 | self.parse_document_options(meta) 284 | lm = opt.LangMapper(meta) 285 | new_blocks = [] 286 | 287 | for i, block in enumerate(blocks): 288 | if not is_code_block(block): 289 | new_blocks.append(block) 290 | continue 291 | # We should only have code blocks now... 292 | # Execute first, to get prompt numbers 293 | (lang, name), attrs = parse_kernel_arguments(block) 294 | if attrs.get('eval') is None: 295 | attrs['eval'] = self.eval_default 296 | kernel_name = lm.map_to_kernel(lang) 297 | if name is None: 298 | name = "unnamed_chunk_{}".format(i) 299 | if is_executable(block, kernel_name, attrs): 300 | # still need to check, since kernel_factory(lang) is executaed 301 | # even if the key is present, only want one kernel / lang 302 | kernel = self.get_kernel(kernel_name) 303 | messages = execute_block(block, kernel) 304 | execution_count = extract_execution_count(messages) 305 | else: 306 | execution_count = None 307 | messages = [] 308 | 309 | # ... now handle input formatting... 310 | if self.get_option('echo', attrs): 311 | prompt = self.get_option('prompt', attrs) 312 | new_blocks.append(wrap_input_code(block, self.use_prompt, prompt, 313 | execution_count, lm.map_to_style(lang))) 314 | 315 | # ... and output formatting 316 | if is_stitchable(messages, attrs): 317 | result = self.wrap_output( 318 | name, messages, attrs, 319 | ) 320 | new_blocks.extend(result) 321 | result = {'pandoc-api-version': version, 322 | 'meta': meta, 323 | 'blocks': new_blocks} 324 | return result 325 | 326 | def wrap_output(self, chunk_name, messages, attrs): 327 | """ 328 | Wrap the messages of a code-block. 329 | 330 | Parameters 331 | ---------- 332 | chunk_name : str 333 | messages : list of dicts 334 | attrs : dict 335 | options from the source options-line. 336 | 337 | Returns 338 | ------- 339 | output_blocks : list 340 | 341 | Notes 342 | ----- 343 | Messages printed to stdout are wrapped in a CodeBlock. 344 | Messages publishing mimetypes (e.g. matplotlib figures) 345 | resuse Jupyter's display priority. See 346 | ``NbConvertBase.display_data_priority``. 347 | 348 | The result should be pandoc JSON AST compatible. 349 | """ 350 | pandoc = True if (self.get_option('results', attrs) == 'pandoc') else False 351 | 352 | # messsage_pairs can come from stdout or the io stream (maybe others?) 353 | output_messages = [x for x in messages if not is_execute_input(x)] 354 | display_messages = [x for x in output_messages if not is_stdout(x) and 355 | not is_stderr(x)] 356 | 357 | output_blocks = [] 358 | 359 | # Handle all stdout first... 360 | for message in output_messages: 361 | warning = self.get_option('warning', attrs) 362 | if is_stdout(message) or (is_stderr(message) and warning): 363 | text = message['content']['text'] 364 | output_blocks += plain_output( 365 | text, 366 | self.pandoc_extra_args, 367 | not (is_stderr(message) and warning) and pandoc 368 | ) 369 | 370 | priority = list(enumerate(NbConvertBase().display_data_priority)) 371 | priority.append((len(priority), 'application/javascript')) 372 | order = dict( 373 | (x[1], x[0]) for x in priority 374 | ) 375 | 376 | for message in display_messages: 377 | if message['header']['msg_type'] == 'error': 378 | error = self.get_option('error', attrs) 379 | if error == 'raise': 380 | exc = StitchError(message['content']['traceback']) 381 | raise exc 382 | blocks = plain_output( 383 | '\n'.join(message['content']['traceback']) 384 | ) 385 | else: 386 | all_data = message['content']['data'] 387 | if not all_data: # some R output 388 | continue 389 | key = min(all_data.keys(), key=lambda k: order[k]) 390 | data = all_data[key] 391 | 392 | if self.to in ('latex', 'pdf', 'beamer'): 393 | if 'text/latex' in all_data.keys(): 394 | key = 'text/latex' 395 | data = all_data[key] 396 | 397 | if key == 'text/plain': 398 | # ident, classes, kvs 399 | blocks = plain_output(data, self.pandoc_extra_args, pandoc) 400 | elif key == 'text/latex': 401 | blocks = [RawBlock('latex', data)] 402 | elif key == 'text/html': 403 | blocks = [RawBlock('html', data)] 404 | elif key == 'application/javascript': 405 | script = ''.format( 406 | data) 407 | blocks = [RawBlock('html', script)] 408 | elif key.startswith('image') or key == 'application/pdf': 409 | blocks = [self.wrap_image_output(chunk_name, data, key, 410 | attrs)] 411 | else: 412 | blocks = tokenize_block(data, self.pandoc_extra_args) 413 | 414 | output_blocks += blocks 415 | return output_blocks 416 | 417 | def wrap_image_output(self, chunk_name, data, key, attrs): 418 | """ 419 | Extra handling for images 420 | 421 | Parameters 422 | ---------- 423 | chunk_name, data, key : str 424 | attrs: dict 425 | 426 | Returns 427 | ------- 428 | Para[Image] 429 | """ 430 | # TODO: interaction of output type and standalone. 431 | # TODO: this can be simplified, do the file-writing in one step 432 | def b64_encode(data): 433 | return base64.encodebytes(data.encode('utf-8')).decode('ascii') 434 | 435 | # TODO: dict of attrs on Stitcher. 436 | image_keys = {'width', 'height'} 437 | caption = attrs.get('fig.cap', '') 438 | 439 | def transform_key(k): 440 | # fig.width -> width, fig.height -> height; 441 | return k.split('fig.', 1)[-1] 442 | 443 | attrs = [(transform_key(k), v) 444 | for k, v in attrs.items() 445 | if transform_key(k) in image_keys] 446 | 447 | if self.self_contained: 448 | if 'png' in key: 449 | data = 'data:image/png;base64,{}'.format(data) 450 | elif 'svg' in key: 451 | data = 'data:image/svg+xml;base64,{}'.format(b64_encode(data)) 452 | if 'png' in key or 'svg' in key: 453 | block = Para([Image([chunk_name, [], attrs], 454 | [Str(caption)], 455 | [data, ""])]) 456 | else: 457 | raise TypeError("Unknown mimetype %s" % key) 458 | else: 459 | # we are saving to filesystem 460 | ext = mimetypes.guess_extension(key) 461 | filepath = os.path.join(self.resource_dir, 462 | "{}{}".format(chunk_name, ext)) 463 | os.makedirs(self.resource_dir, exist_ok=True) 464 | if ext == '.svg': 465 | with open(filepath, 'wt') as f: 466 | f.write(data) 467 | else: 468 | with open(filepath, 'wb') as f: 469 | f.write(base64.decodebytes(data.encode('utf-8'))) 470 | # Image :: alt text (list of inlines), target 471 | # Image :: Attr [Inline] Target 472 | # Target :: (string, string) of (URL, title) 473 | block = Para([Image([chunk_name, [], []], 474 | [Str(caption)], 475 | [filepath, "fig: {}".format(chunk_name)])]) 476 | 477 | return block 478 | 479 | 480 | def convert_file(input_file: str, 481 | to: str, 482 | extra_args=(), 483 | output_file=None) -> None: 484 | """ 485 | Convert a markdown ``input_file`` to ``to``. 486 | 487 | Parameters 488 | ---------- 489 | input_file : str 490 | to : str 491 | extra_args : iterable 492 | output_file : str 493 | 494 | See Also 495 | -------- 496 | convert 497 | """ 498 | with open(input_file) as f: 499 | source = f.read() 500 | convert(source, to, extra_args=extra_args, output_file=output_file) 501 | 502 | 503 | def convert(source: str, to: str, extra_args=(), 504 | output_file: str=None) -> None: 505 | """ 506 | Convert a source document to an output file. 507 | 508 | Parameters 509 | ---------- 510 | source : str 511 | to : str 512 | extra_args : iterable 513 | output_file : str 514 | 515 | Notes 516 | ----- 517 | Either writes to ``output_file`` or prints to stdout. 518 | """ 519 | output_name = ( 520 | os.path.splitext(os.path.basename(output_file))[0] 521 | if output_file is not None 522 | else 'std_out' 523 | ) 524 | 525 | standalone = '--standalone' in extra_args 526 | self_contained = '--self-contained' in extra_args 527 | use_prompt = '--use-prompt' in extra_args 528 | extra_args = [item for item in extra_args if item != '--use-prompt'] 529 | stitcher = Stitch(name=output_name, to=to, standalone=standalone, 530 | self_contained=self_contained, use_prompt=use_prompt) 531 | result = stitcher.stitch(source) 532 | result = json.dumps(result) 533 | newdoc = pypandoc.convert_text(result, to, format='json', 534 | extra_args=extra_args, 535 | outputfile=output_file) 536 | 537 | if output_file is None: 538 | print(newdoc) 539 | 540 | 541 | def kernel_factory(kernel_name: str) -> KernelPair: 542 | """ 543 | Start a new kernel. 544 | 545 | Parameters 546 | ---------- 547 | kernel_name : str 548 | 549 | Returns 550 | ------- 551 | KernalPair: namedtuple 552 | - km (KernelManager) 553 | - kc (KernelClient) 554 | """ 555 | return KernelPair(*start_new_kernel(kernel_name=kernel_name)) 556 | 557 | 558 | # ----------- 559 | # Input Tests 560 | # ----------- 561 | 562 | def is_code_block(block): 563 | is_code = block['t'] == CODEBLOCK 564 | return is_code 565 | 566 | 567 | def is_executable(block, lang, attrs): 568 | """ 569 | Return whether a block should be executed. 570 | Must be a code_block, and must not have ``eval=False`` in the block 571 | arguments, and ``lang`` (kernel_name) must not be None. 572 | """ 573 | return (is_code_block(block) and attrs.get('eval') is not False and 574 | lang is not None) 575 | 576 | 577 | # ------------ 578 | # Output Tests 579 | # ------------ 580 | 581 | def is_stitchable(result, attrs): 582 | """ 583 | Return whether an output ``result`` should be included in the output. 584 | ``result`` should not be empty or None, and ``attrs`` should not 585 | include ``{'results': 'hide'}``. 586 | """ 587 | return (bool(result) and 588 | result[0] is not None and 589 | attrs.get('results') != 'hide') 590 | 591 | 592 | # ---------- 593 | # Formatting 594 | # ---------- 595 | def format_input_prompt(prompt, code, number): 596 | """ 597 | Format the actual input code-text. 598 | """ 599 | if prompt is None: 600 | return format_ipython_prompt(code, number) 601 | lines = code.split('\n') 602 | formatted = '\n'.join([prompt + line for line in lines]) 603 | return formatted 604 | 605 | 606 | def format_ipython_prompt(code, number): 607 | """ 608 | Wrap the input code in IPython style ``In [X]:`` markers. 609 | """ 610 | if number is None: 611 | return code 612 | 613 | start = 'In [{}]: '.format(number) 614 | split = code.split('\n') 615 | 616 | def trailing_space(x): 617 | # all blank lines shouldn't have a trailing space after ...: 618 | return '' if x == '' else ' ' 619 | 620 | rest = ['{}...:{}'.format(' ' * (len(start) - 5), 621 | trailing_space(x)) 622 | for x in split[1:]] 623 | formatted = '\n'.join(l + r for l, r in zip([start] + rest, split)) 624 | return formatted 625 | 626 | 627 | def wrap_input_code(block, use_prompt, prompt, execution_count, code_style=None): 628 | new = copy.deepcopy(block) 629 | code = block['c'][1] 630 | if use_prompt or prompt is not None: 631 | new['c'][1] = format_input_prompt(prompt, code, execution_count) 632 | if isinstance(code_style, str) and code_style != '': 633 | try: 634 | new['c'][0][1][0] = code_style 635 | except (KeyError, IndexError): 636 | pass 637 | return new 638 | 639 | 640 | def format_output_prompt(output, number): 641 | # TODO 642 | pass 643 | 644 | 645 | # ---------------- 646 | # Input Processing 647 | # ---------------- 648 | 649 | def tokenize(source: str) -> dict: 650 | """ 651 | Convert a document to pandoc's JSON AST. 652 | """ 653 | return json.loads(pypandoc.convert_text(source, 'json', 'markdown')) 654 | 655 | 656 | def tokenize_block(source: str, pandoc_extra_args: list=None) -> list: 657 | """ 658 | Convert a Jupyter output to Pandoc's JSON AST. 659 | """ 660 | if pandoc_extra_args is None: 661 | pandoc_extra_args = [] 662 | json_doc = pypandoc.convert_text(source, to='json', format='markdown', extra_args=pandoc_extra_args) 663 | return json.loads(json_doc)['blocks'] 664 | 665 | 666 | def preprocess(source: str) -> str: 667 | """ 668 | Process a source file prior to tokenezation. 669 | 670 | Parameters 671 | ---------- 672 | source : str 673 | 674 | Returns 675 | ------- 676 | processed : str 677 | 678 | Notes 679 | ----- 680 | Currently applies the following transformations 681 | 682 | - preprocess_options: transform code chunk arguments 683 | to allow ``{python, arg, kwarg=val}`` instead of pandoc-style 684 | ``{.python .arg kwarg=val}`` 685 | 686 | See Also 687 | -------- 688 | prerpocess_options 689 | """ 690 | doc = [] 691 | for line in source.split('\n'): 692 | if CODE_CHUNK_XPR.match(line): 693 | doc.append(preprocess_options(line)) 694 | else: 695 | doc.append(line) 696 | return '\n'.join(doc) 697 | 698 | 699 | def parse_kernel_arguments(block): 700 | """ 701 | Parse the kernel arguments of a code block, 702 | returning a tuple of (args, kwargs) 703 | 704 | Parameters 705 | ---------- 706 | block 707 | 708 | Returns 709 | ------- 710 | tuple 711 | 712 | Notes 713 | ----- 714 | The allowed positional arguments are 715 | 716 | - kernel_name 717 | - chunk_name 718 | 719 | Other positional arguments are ignored by Stitch. 720 | All other arguments must be like ``keyword=value``. 721 | """ 722 | options = block['c'][0][1] 723 | kernel_name = chunk_name = None 724 | if len(options) == 0: 725 | pass 726 | elif len(options) == 1: 727 | kernel_name = options[0] 728 | elif len(options) >= 2: 729 | kernel_name, chunk_name = options[0:2] 730 | kwargs = dict(block['c'][0][2]) 731 | kwargs = {k: v == 'True' if v in ('True', 'False') else v 732 | for k, v in kwargs.items()} 733 | 734 | return (kernel_name, chunk_name), kwargs 735 | 736 | 737 | def extract_kernel_name(block): 738 | options = block['c'][0][1] 739 | if len(options) >= 1: 740 | return options[0].strip('{}').strip() 741 | else: 742 | return None 743 | 744 | 745 | # ----------------- 746 | # Output Processing 747 | # ----------------- 748 | 749 | def plain_output(text: str, pandoc_extra_args: list=None, pandoc: bool=False) -> list: 750 | if pandoc: 751 | return tokenize_block(text, pandoc_extra_args) 752 | else: 753 | return [Div(['', ['output'], []], [CodeBlock(['', [], []], text)])] 754 | 755 | 756 | def is_stdout(message): 757 | return message['content'].get('name') == 'stdout' 758 | 759 | 760 | def is_stderr(message): 761 | return message['content'].get('name') == 'stderr' 762 | 763 | 764 | def is_execute_input(message): 765 | return message['msg_type'] == 'execute_input' 766 | 767 | 768 | # -------------- 769 | # Code Execution 770 | # -------------- 771 | def execute_block(block, kp, timeout=None): 772 | # see nbconvert.run_cell 773 | code = block['c'][1] 774 | messages = run_code(code, kp, timeout=timeout) 775 | return messages 776 | 777 | 778 | def run_code(code: str, kp: KernelPair, timeout=None): 779 | """ 780 | Execute a code chunk, capturing the output. 781 | 782 | Parameters 783 | ---------- 784 | code : str 785 | kp : KernelPair 786 | timeout : int 787 | 788 | Returns 789 | ------- 790 | outputs : List 791 | 792 | Notes 793 | ----- 794 | See https://github.com/jupyter/nbconvert/blob/master/nbconvert 795 | /preprocessors/execute.py 796 | """ 797 | msg_id = kp.kc.execute(code) 798 | while True: 799 | try: 800 | msg = kp.kc.shell_channel.get_msg(timeout=timeout) 801 | except Empty: 802 | # TODO: Log error 803 | raise 804 | 805 | if msg['parent_header'].get('msg_id') == msg_id: 806 | break 807 | else: 808 | # not our reply 809 | continue 810 | 811 | messages = [] 812 | 813 | while True: # until idle message 814 | try: 815 | # We've already waited for execute_reply, so all output 816 | # should already be waiting. However, on slow networks, like 817 | # in certain CI systems, waiting < 1 second might miss messages. 818 | # So long as the kernel sends a status:idle message when it 819 | # finishes, we won't actually have to wait this long, anyway. 820 | msg = kp.kc.iopub_channel.get_msg(timeout=4) 821 | except Empty: 822 | pass 823 | # TODO: Log error 824 | if msg['parent_header'].get('msg_id') != msg_id: 825 | # not an output from our execution 826 | continue 827 | 828 | msg_type = msg['msg_type'] 829 | content = msg['content'] 830 | 831 | if msg_type == 'status': 832 | if content['execution_state'] == 'idle': 833 | break 834 | else: 835 | continue 836 | elif msg_type in ('execute_input', 'execute_result', 'display_data', 837 | 'stream', 'error'): 838 | # Keep `execute_input` just for execution_count if there's 839 | # no result 840 | messages.append(msg) 841 | elif msg_type == 'clear_output': 842 | messages = [] 843 | continue 844 | elif msg_type.startswith('comm'): 845 | continue 846 | return messages 847 | 848 | 849 | def extract_execution_count(messages): 850 | """ 851 | """ 852 | for message in messages: 853 | count = message['content'].get('execution_count') 854 | if count is not None: 855 | return count 856 | 857 | 858 | def initialize_kernel(name, kp): 859 | # TODO: set_matplotlib_formats takes *args 860 | # TODO: do as needed? Push on user? 861 | # valid_formats = ["png", "jpg", "jpeg", "pdf", "svg"] 862 | if name == 'python': 863 | code = """\ 864 | %colors NoColor 865 | try: 866 | %matplotlib inline 867 | except: 868 | pass 869 | try: 870 | import pandas as pd 871 | pd.options.display.latex.repr = True 872 | except: 873 | pass 874 | """ 875 | kp.kc.execute(code, store_history=False) 876 | # fmt_code = '\n'.join("set_matplotlib_formats('{}')".format(fmt) 877 | # for fmt in valid_formats) 878 | # code = dedent(code) + fmt_code 879 | # kp.kc.execute(code, store_history=False) 880 | else: 881 | # raise ValueError(name) 882 | pass 883 | -------------------------------------------------------------------------------- /examples/timeseries.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Timeseries 3 | author: Tom Augspurger 4 | --- 5 | 6 | Pandas started out in the financial world, so naturally it has strong timeseries support. 7 | 8 | The first half of this post will look at pandas' capabilities for manipulating time series data. 9 | The second half will discuss modelling time series data with statsmodels. 10 | 11 | 12 | ```{python} 13 | %matplotlib inline 14 | 15 | import os 16 | import numpy as np 17 | import pandas as pd 18 | import pandas_datareader.data as web 19 | import seaborn as sns 20 | import matplotlib.pyplot as plt 21 | sns.set(style='ticks', context='talk') 22 | plt.rcParams['savefig.transparent'] = True 23 | plt.rcParams['nbagg.transparent'] = True 24 | 25 | pd.options.display.precision = 2 26 | ``` 27 | 28 | Let's grab some stock data for Goldman Sachs using the [`pandas-datareader`](http://pandas-datareader.readthedocs.io/en/latest/) package, which spun off of pandas: 29 | 30 | 31 | ```{python} 32 | gs = web.DataReader("GS", data_source='yahoo', start='2006-01-01', 33 | end='2010-01-01') 34 | gs.head().round(2) 35 | ``` 36 | 37 | There isn't a special data-container just for time series in pandas, they're just `Series` or `DataFrame`s with a `DatetimeIndex`. 38 | 39 | ## Special Slicing 40 | 41 | Looking at the elements of `gs.index`, we see that `DatetimeIndex`es are made up of `pandas.Timestamp`s: 42 | 43 | 44 | ```{python} 45 | gs.index[0] 46 | ``` 47 | 48 | A `Timestamp` is mostly compatible with the `datetime.datetime` class, but much amenable to storage in arrays. 49 | 50 | Working with `Timestamp`s can be awkward, so Series and DataFrames with `DatetimeIndexes` have some special slicing rules. 51 | The first special case is *partial-string indexing*. Say we wanted to select all the days in 2006. Even with `Timestamp`'s convenient constructors, it's a pain 52 | 53 | 54 | ```{python} 55 | gs.loc[pd.Timestamp('2006-01-01'):pd.Timestamp('2006-12-31')].head() 56 | ``` 57 | 58 | Thanks to partial-string indexing, it's as simple as 59 | 60 | ```{python} 61 | gs.loc['2006'].head() 62 | ``` 63 | 64 | 65 | Since label slicing is inclusive, this slice selects any observation where the year is 2006. 66 | 67 | The second "convenience" is `__getitem__` (square-bracket) fall-back indexing. I'm only going to mention it here, with the caveat that you should never use it. 68 | DataFrame `__getitem__` typically looks in the column: `gs['2006']` would search `gs.columns` for `'2006'`, not find it, and raise a `KeyError`. But DataFrames with a `DatetimeIndex` catch that `KeyError` and try to slice the index. 69 | If it succeeds in slicing the index, the result like `gs.loc['2006']` is returned. 70 | If it fails, the `KeyError` is re-raised. 71 | This is confusing because in pretty much every other case `DataFrame.__getitem__` works on columns, and it's fragile because if you happened to have a column `'2006'` you *would* get just that column, and no fall-back indexing would occur. Just use `gs.loc['2006']` when slicing DataFrame indexes. 72 | 73 | 74 | ## Special Methods 75 | 76 | ### Resampling 77 | 78 | Resampling is similar to a `groupby`: you split the time series into groups (5-day buckets below), apply a function to each group (`mean`), and combine the result (one row per group). 79 | 80 | 81 | ```{python} 82 | gs.resample("5d").mean().head() 83 | ``` 84 | 85 | 86 | ```{python} 87 | gs.resample("W").agg(['mean', 'sum']).head() 88 | ``` 89 | 90 | 91 | You can up-sample to convert to a higher frequency. 92 | The new points are filled with NaNs. 93 | 94 | 95 | ```{python} 96 | gs.resample("6H").mean().head() 97 | ``` 98 | 99 | ### Rolling / Expanding / EW 100 | 101 | These methods aren't unique to `DatetimeIndex`es, but they often make sense with time series, so I'll show them here. 102 | 103 | 104 | ```{python} 105 | gs.Close.plot(label='Raw') 106 | gs.Close.rolling(28).mean().plot(label='28D MA') 107 | gs.Close.expanding().mean().plot(label='Expanding Average') 108 | gs.Close.ewm(alpha=0.03).mean().plot(label='EWMA($\\alpha=.03$)') 109 | 110 | plt.legend(bbox_to_anchor=(1.25, .5)) 111 | plt.tight_layout() 112 | plt.ylabel("Close ($)") 113 | sns.despine() 114 | ``` 115 | 116 | 117 | Each of `.rolling`, `.expanding`, and `.ewm` return a deferred object, similar to a GroupBy. 118 | 119 | 120 | ```{python} 121 | roll = gs.Close.rolling(30, center=True) 122 | roll 123 | ``` 124 | 125 | 126 | ```{python} 127 | m = roll.agg(['mean', 'std']) 128 | ax = m['mean'].plot() 129 | ax.fill_between(m.index, m['mean'] - m['std'], m['mean'] + m['std'], 130 | alpha=.25) 131 | plt.tight_layout() 132 | plt.ylabel("Close ($)") 133 | sns.despine() 134 | ``` 135 | 136 | ## Grab Bag 137 | 138 | ### Offsets 139 | 140 | These are similar to `dateutil.relativedelta`, but works with arrays. 141 | 142 | ```{python} 143 | gs.index + pd.DateOffset(months=3, days=-2) 144 | ``` 145 | 146 | 147 | ### Holiday Calendars 148 | 149 | There are a whole bunch of special calendars, useful for traders probabaly. 150 | 151 | 152 | ```{python} 153 | from pandas.tseries.holiday import USColumbusDay 154 | ``` 155 | 156 | 157 | ```{python} 158 | USColumbusDay.dates('2015-01-01', '2020-01-01') 159 | ``` 160 | 161 | ### Timezones 162 | 163 | Pandas works with `pytz` for nice timezone-aware datetimes. 164 | The typical workflow is 165 | 166 | 1. localize timezone-naive timestamps to some timezone 167 | 2. convert to desired timezone 168 | 169 | If you already have timezone-aware Timestamps, there's no need for step one. 170 | 171 | 172 | ```{python} 173 | # tz naiive -> tz aware..... to desired UTC 174 | gs.tz_localize('US/Eastern').tz_convert('UTC').head() 175 | ``` 176 | 177 | 178 | ## Modeling Time Series 179 | 180 | The rest of this post will focus on time series in the econometric sense. 181 | My indented reader for this section isn't all that clear, so I apologize upfront for any sudden shifts in complexity. 182 | I'm roughly targeting material that could be presented in a first or second semester applied statisctics course. 183 | What follows certainly isn't a replacement for that. 184 | Any formality will be restricted to footnotes for the curious. 185 | I've put a whole bunch of resources at the end for people earger to learn more. 186 | 187 | ```{python, echo=False} 188 | import os 189 | import io 190 | import glob 191 | import zipfile 192 | 193 | import requests 194 | import statsmodels.api as sm 195 | 196 | 197 | def download_one(date): 198 | ''' 199 | Download a single month's flights 200 | ''' 201 | month = date.month 202 | year = date.year 203 | month_name = date.strftime('%B') 204 | headers = { 205 | 'Pragma': 'no-cache', 206 | 'Origin': 'http://www.transtats.bts.gov', 207 | 'Accept-Encoding': 'gzip, deflate', 208 | 'Accept-Language': 'en-US,en;q=0.8', 209 | 'Upgrade-Insecure-Requests': '1', 210 | 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.87 Safari/537.36', 211 | 'Content-Type': 'application/x-www-form-urlencoded', 212 | 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', 213 | 'Cache-Control': 'no-cache', 214 | 'Referer': 'http://www.transtats.bts.gov/DL_SelectFields.asp?Table_ID=236&DB_Short_Name=On-Time', 215 | 'Connection': 'keep-alive', 216 | 'DNT': '1', 217 | } 218 | os.makedirs('data/timeseries', exist_ok=True) 219 | with open('url_7.txt') as f: 220 | data = f.read().strip() 221 | 222 | 223 | r = requests.post('http://www.transtats.bts.gov/DownLoad_Table.asp?Table_ID=236&Has_Group=3&Is_Zipped=0', 224 | headers=headers, data=data.format(year=year, month=month, month_name=month_name), 225 | stream=True) 226 | fp = os.path.join('data/timeseries', '{}-{}.zip'.format(year, month)) 227 | 228 | with open(fp, 'wb') as f: 229 | for chunk in r.iter_content(chunk_size=1024): 230 | if chunk: 231 | f.write(chunk) 232 | return fp 233 | 234 | def download_many(start, end): 235 | months = pd.date_range(start, end=end, freq='M') 236 | # We could easily parallelize this loop. 237 | for i, month in enumerate(months): 238 | download_one(month) 239 | 240 | def unzip_one(fp): 241 | zf = zipfile.ZipFile(fp) 242 | csv = zf.extract(zf.filelist[0], path='data/timeseries') 243 | return csv 244 | 245 | def time_to_datetime(df, columns): 246 | ''' 247 | Combine all time items into datetimes. 248 | 249 | 2014-01-01,1149.0 -> 2014-01-01T11:49:00 250 | ''' 251 | def converter(col): 252 | timepart = (col.astype(str) 253 | .str.replace('\.0$', '') # NaNs force float dtype 254 | .str.pad(4, fillchar='0')) 255 | return pd.to_datetime(df['fl_date'] + ' ' + 256 | timepart.str.slice(0, 2) + ':' + 257 | timepart.str.slice(2, 4), 258 | errors='coerce') 259 | return datetime_part 260 | df[columns] = df[columns].apply(converter) 261 | return df 262 | 263 | 264 | def read_one(fp): 265 | df = (pd.read_csv(fp, encoding='latin1') 266 | .rename(columns=str.lower) 267 | .drop('unnamed: 21', axis=1) 268 | .pipe(time_to_datetime, ['dep_time', 'arr_time', 'crs_arr_time', 269 | 'crs_dep_time']) 270 | .assign(fl_date=lambda x: pd.to_datetime(x['fl_date']))) 271 | return df 272 | ``` 273 | 274 | 275 | ```{python} 276 | store = 'data/ts.hdf5' 277 | 278 | if not os.path.exists(store): 279 | if not os.path.exists('data/timeseries'): 280 | download_many('2000-01-01', '2016-01-01') 281 | 282 | zips = glob.glob(os.path.join('data/timeseries', '*.zip')) 283 | csvs = [unzip_one(fp) for fp in zips] 284 | dfs = [read_one(fp) for fp in csvs] 285 | df = pd.concat(dfs, ignore_index=True) 286 | 287 | cat_cols = ['unique_carrier', 'carrier', 'tail_num', 'origin', 'dest'] 288 | df[cat_cols] = df[cat_cols].apply(pd.Categorical) 289 | df.to_hdf(store, 'ts', format='table') 290 | else: 291 | df = pd.read_hdf(store, 'ts') 292 | 293 | ``` 294 | 295 | 296 | ```{python} 297 | with pd.option_context('display.max_rows', 100): 298 | print(df.dtypes) 299 | ``` 300 | 301 | We can calculate the historical values with a resample. 302 | 303 | ```{python} 304 | daily = df.fl_date.value_counts().sort_index() 305 | y = daily.resample('MS').mean() 306 | y.head() 307 | ``` 308 | 309 | Note that I use the `"MS"` frequency code there. 310 | Pandas defaults to end of month (or end of year). 311 | Append an `'S'` to get the start. 312 | 313 | 314 | ```{python} 315 | ax = y.plot() 316 | ax.set(ylabel='Average Monthly Flights') 317 | sns.despine() 318 | ``` 319 | 320 | 321 | ```{python} 322 | import statsmodels.formula.api as smf 323 | import statsmodels.tsa.api as smt 324 | import statsmodels.api as sm 325 | ``` 326 | 327 | One note of warning: I'm using the development version of statsmodels (commit `de15ec8` to be precise). 328 | Not all of the items I've shown here are available in the currently-released version. 329 | 330 | Think back to a typical regression problem, ignoring anything to do with time series for now. 331 | The usual task is to predict some value $y$ using some a linear combination of features in $X$. 332 | 333 | $$y = \beta_0 + \beta_1 X_1 + \ldots + \beta_p X_p + \epsilon$$ 334 | 335 | When working with time series, some of the most important (and sometimes *only*) features are the previous, or *lagged*, values of $y$. 336 | 337 | Let's start by trying just that "manually": running a regression of `y` on lagged values of itself. 338 | We'll see that this regression suffers from a few problems: multicollinearity, autocorrelation, non-stationarity, and seasonality. 339 | I'll explain what each of those are in turn and why they're problems. 340 | Afterwards, we'll use a second model, seasonal ARIMA, which handles those problems for us. 341 | 342 | First, let's create a dataframe with our lagged values of `y` using the `.shift` method, which shifts the index `i` periods, so it lines up with that observation. 343 | 344 | 345 | ```{python} 346 | X = (pd.concat([y.shift(i) for i in range(6)], axis=1, 347 | keys=['y'] + ['L%s' % i for i in range(1, 6)]) 348 | .dropna()) 349 | X.head() 350 | ``` 351 | 352 | We can fit the lagged model using statsmodels (which uses [patsy](http://patsy.readthedocs.org) to translate the formula string to a design matrix). 353 | 354 | 355 | ```{python} 356 | mod_lagged = smf.ols('y ~ trend + L1 + L2 + L3 + L4 + L5', 357 | data=X.assign(trend=np.arange(len(X)))) 358 | res_lagged = mod_lagged.fit() 359 | res_lagged.summary() 360 | ``` 361 | 362 | There are a few problems with this approach though. 363 | Since our lagged values are highly correlated with each other, our regression suffers from [multicollinearity](https://en.wikipedia.org/wiki/Multicollinearity). 364 | That ruins our estimates of the slopes. 365 | 366 | 367 | ```{python} 368 | sns.heatmap(X.corr()); 369 | ``` 370 | 371 | 372 | Second, we'd intuitively expect the $\beta_i$s to gradually decline to zero. 373 | The immediately preceding period *should* be most important ($\beta_1$ is the largest coefficient in absolute value), followed by $\beta_2$, and $\beta_3$... 374 | Looking at the regression summary and the bar graph below, this isn't the case (the cause is related to multicollinearity). 375 | 376 | 377 | ```{python} 378 | ax = res_lagged.params.drop(['Intercept', 'trend']).plot.bar(rot=0) 379 | plt.ylabel('Coefficeint') 380 | sns.despine() 381 | ``` 382 | 383 | 384 | Finally, our degrees of freedom drop since we lose two for each variable (one for estimating the coefficient, one for the lost observation as a result of the `shift`). 385 | At least in (macro)econometrics, each observation is precious and we're loath to throw them away, though sometimes that's unavoidable. 386 | 387 | ### Autocorrelation 388 | 389 | Another problem our lagged model suffered from is [autocorrelation](https://en.wikipedia.org/wiki/Autocorrelation) (also know as serial correlation). 390 | Roughly speaking, autocorrelation is when there's a clear pattern in the residuals of your regression (the observed minus the predicted). 391 | Let's fit a simple model of $y = \beta_0 + \beta_1 T + \epsilon$, where `T` is the time trend (`np.arange(len(y))`). 392 | 393 | 394 | ```{python} 395 | # `Results.resid` is a Series of residuals: y - ŷ 396 | mod_trend = sm.OLS.from_formula( 397 | 'y ~ trend', data=y.to_frame(name='y') 398 | .assign(trend=np.arange(len(y)))) 399 | res_trend = mod_trend.fit() 400 | ``` 401 | 402 | Residuals (the observed minus the expected, or $\hat{e_t} = y_t - \hat{y_t}$) are supposed to be [white noise](https://en.wikipedia.org/wiki/White_noise). 403 | That's [one of the assumptions](https://en.wikipedia.org/wiki/Gauss–Markov_theorem) many of the properties of linear regression are founded upon. 404 | In this case there's a correlation between one residual and the next: if the residual at time $t$ was above expectation, then the residual at time $t + 1$ is *much* more likely to be above average as well ($e_t > 0 \implies E_t[e_{t+1}] > 0$). 405 | 406 | We'll define a helper function to plot the residuals time series, and some diagnostics about them. 407 | 408 | 409 | ```{python} 410 | def tsplot(y, lags=None, figsize=(10, 8)): 411 | fig = plt.figure(figsize=figsize) 412 | layout = (2, 2) 413 | ts_ax = plt.subplot2grid(layout, (0, 0), colspan=2) 414 | acf_ax = plt.subplot2grid(layout, (1, 0)) 415 | pacf_ax = plt.subplot2grid(layout, (1, 1)) 416 | 417 | y.plot(ax=ts_ax) 418 | smt.graphics.plot_acf(y, lags=lags, ax=acf_ax) 419 | smt.graphics.plot_pacf(y, lags=lags, ax=pacf_ax) 420 | [ax.set_xlim(1.5) for ax in [acf_ax, pacf_ax]] 421 | sns.despine() 422 | plt.tight_layout() 423 | return ts_ax, acf_ax, pacf_ax 424 | ``` 425 | 426 | Calling it on the residuals from the linear trend: 427 | 428 | 429 | ```{python} 430 | tsplot(res_trend.resid, lags=36); 431 | ``` 432 | 433 | 434 | The top subplot shows the time series of our residuals $e_t$, which should be white noise (but it isn't). 435 | The bottom shows the [autocorrelation](https://www.otexts.org/fpp/2/2#autocorrelation) of the residuals as a correlogram. 436 | It measures the correlation between a value and it's lagged self, e.g. $corr(e_t, e_{t-1}), corr(e_t, e_{t-2}), \ldots$. 437 | The partial autocorrelation plot in the bottom-right shows a similar concept. 438 | It's partial in the sense that the value for $corr(e_t, e_{t-k})$ is the correlation between those two periods, after controlling for the values at all shorter lags. 439 | 440 | Autocorrelation is a problem in regular regressions like above, but we'll use it to our advantage when we setup an ARIMA model below. 441 | The basic idea is pretty sensible: if your regression residuals have a clear pattern, then there's clearly some structure in the data that you aren't taking advantage of. 442 | If a positive residual today means you'll likely have a positive residual tomorrow, why not incorporate that information into your forecast, and lower your forecasted value for tomorrow? 443 | That's pretty much what ARIMA does. 444 | 445 | It's important that your dataset be stationary, otherwise you run the risk of finding [spurious correlations](http://www.tylervigen.com/spurious-correlations). 446 | A common example is the relationship between number of TVs per person and life expectancy. 447 | It's not likely that there's an actual causal relationship there. 448 | Rather, there could be a third variable that's driving both (wealth, say). 449 | [Granger and Newbold (1974)](http://wolfweb.unr.edu/homepage/zal/STAT758/Granger_Newbold_1974.pdf) had some stern words for the econometrics literature on this. 450 | 451 | > We find it very curious that whereas virtually every textbook on econometric methodology contains explicit warnings of the dangers of autocorrelated errors, this phenomenon crops up so frequently in well-respected applied work. 452 | 453 | (:fire:), but in that academic passive-aggressive way. 454 | 455 | The typical way to handle non-stationarity is to difference the non-stationary variable until is is stationary. 456 | 457 | 458 | ```{python} 459 | y.to_frame(name='y').assign(Δy=lambda x: x.y.diff()).plot(subplots=True) 460 | sns.despine() 461 | ``` 462 | 463 | 464 | Our original series actually doesn't look *that* bad. 465 | It doesn't look like nominal GDP say, where there's a clearly rising trend. 466 | But we have more rigorous methods for detecting whether a series is non-stationary than simply plotting and squinting at it. 467 | One popular method is the Augmented Dickey-Fuller test. 468 | It's a statistical hypothesis test that roughly says: 469 | 470 | $H_0$ (null hypothesis): $y$ is non-stationary, needs to be differenced 471 | 472 | $H_A$ (alternative hypothesis): $y$ is stationary, doesn't need to be differenced 473 | 474 | I don't want to get into the weeds on exactly what the test statistic is, and what the distribution looks like. 475 | This is implemented in statsmodels as [`smt.adfuller`](http://www.statsmodels.org/dev/generated/statsmodels.tsa.stattools.adfuller.html). 476 | The return type is a bit busy for me, so we'll wrap it in a `namedtuple`. 477 | 478 | 479 | ```{python} 480 | from collections import namedtuple 481 | 482 | ADF = namedtuple("ADF", "adf pvalue usedlag nobs critical icbest") 483 | ``` 484 | 485 | 486 | ```{python} 487 | ADF(*smt.adfuller(y))._asdict() 488 | ``` 489 | 490 | 491 | So we failed to reject the null hypothesis that the original series was non-stationary. 492 | Let's difference it. 493 | 494 | 495 | ```{python} 496 | ADF(*smt.adfuller(y.diff().dropna()))._asdict() 497 | ``` 498 | 499 | This looks better. 500 | It's not statistically significant at the 5% level, but who cares what statisticins say anyway. 501 | 502 | We'll fit another OLS model of $\Delta y = \beta_0 + \beta_1 L \Delta y_{t-1} + e_t$ 503 | 504 | 505 | ```{python} 506 | data = (y.to_frame(name='y') 507 | .assign(Δy=lambda df: df.y.diff()) 508 | .assign(LΔy=lambda df: df.Δy.shift())) 509 | mod_stationary = smf.ols('Δy ~ LΔy', data=data.dropna()) 510 | res_stationary = mod_stationary.fit() 511 | ``` 512 | 513 | 514 | ```{python} 515 | tsplot(res_stationary.resid, lags=24); 516 | ``` 517 | 518 | So we've taken care of multicolinearity, autocorelation, and stationarity, but we still aren't done. 519 | 520 | ## Seasonality 521 | 522 | We have strong monthly seasonality: 523 | 524 | 525 | ```{python} 526 | smt.seasonal_decompose(y).plot(); 527 | ``` 528 | 529 | 530 | There are a few ways to handle seasonality. 531 | We'll just rely on the `SARIMAX` method to do it for us. 532 | For now, recognize that it's a problem to be solved. 533 | 534 | ## ARIMA 535 | 536 | So, we've sketched the problems with regular old regression: multicollinearity, autocorrelation, non-stationarity, and seasonality. 537 | Our tool of choice, `smt.SARIMAX`, which stands for Seasonal ARIMA with eXogenous regressors, can handle all these. 538 | We'll walk through the components in pieces. 539 | 540 | ARIMA stands for AutoRegressive Integrated Moving Average. 541 | It's a relatively simple yet flexible way of modeling univariate time series. 542 | It's made up of three components, and is typically written as $\mathrm{ARIMA}(p, d, q)$. 543 | 544 | ARIMA stands for AutoRegressive Integrated Moving Average, and it's a relatively simple way of modeling univariate time series. 545 | It's made up of three components, and is typically written as $\mathrm{ARIMA}(p, d, q)$. 546 | 547 | ### [AutoRegressive](https://www.otexts.org/fpp/8/3) 548 | 549 | The idea is to predict a variable by a linear combination of its lagged values (*auto*-regressive as in regressing a value on its past *self*). 550 | An AR(p), where $p$ represents the number of lagged values used, is written as 551 | 552 | $$y_t = c + \phi_1 y_{t-1} + \phi_2 y_{t-2} + \ldots + \phi_p y_{t-p} + e_t$$ 553 | 554 | $c$ is a constant and $e_t$ is white noise. 555 | This looks a lot like a linear regression model with multiple predictors, but the predictors happen to be lagged values of $y$ (though they are estimated differently). 556 | 557 | ### Integrated 558 | 559 | Integrated is like the opposite of differencing, and is the part that deals with stationarity. 560 | If you have to difference your dataset 1 time to get it stationary, then $d=1$. 561 | We'll introduce one bit of notation for differencing: $\Delta y_t = y_t - y_{t-1}$ for $d=1$. 562 | 563 | ### [Moving Average](https://www.otexts.org/fpp/8/4) 564 | 565 | MA models look somewhat similar to the AR component, but it's dealing with different values. 566 | 567 | $$y_t = c + e_t + \theta_1 e_{t-1} + \theta_2 e_{t-2} + \ldots + \theta_q e_{t-q}$$ 568 | 569 | $c$ again is a constant and $e_t$ again is white noise. 570 | But now the coefficients are the *residuals* from previous predictions. 571 | 572 | ### Combining 573 | 574 | Putting that together, an ARIMA(1, 1, 1) process is written as 575 | 576 | $$\Delta y_t = c + \phi_1 \Delta y_{t-1} + \theta_t e_{t-1} + e_t$$ 577 | 578 | Using *lag notation*, where $L y_t = y_{t-1}$, i.e. `y.shift()` in pandas, we can rewrite that as 579 | 580 | $$(1 - \phi_1 L) (1 - L)y_t = c + (1 + \theta L)e_t$$ 581 | 582 | That was for our specific $\mathrm{ARIMA}(1, 1, 1)$ model. For the general $\mathrm{ARIMA}(p, d, q)$, that becomes 583 | 584 | $$(1 - \phi_1 L - \ldots - \phi_p L^p) (1 - L)^d y_t = c + (1 + \theta L + \ldots + \theta_q L^q)e_t$$ 585 | 586 | We went through that *extremely* quickly, so don't feel bad if things aren't clear. 587 | Fortunately, the model is pretty easy to use with statsmodels (using it *correctly*, in a statistical sense, is another matter). 588 | 589 | 590 | ```{python} 591 | mod = smt.SARIMAX(y, trend='c', order=(1, 1, 1)) 592 | res = mod.fit() 593 | tsplot(res.resid[2:], lags=24); 594 | ``` 595 | 596 | 597 | ```{python} 598 | res.summary() 599 | ``` 600 | 601 | There's a bunch of output there with various tests, estimated parameters, and information criteria. 602 | Let's just say that things are looking better, but we still haven't accounted for seasonality. 603 | 604 | A seasonal ARIMA model is written as $\mathrm{ARIMA}(p,d,q)×(P,D,Q)_s$. 605 | Lowercase letters are for the non-seasonal component, just like before. Upper-case letters are a similar specification for the seasonal component, where $s$ is the periodicity (4 for quarterly, 12 for monthly). 606 | 607 | It's like we have two processes, one for non-seasonal component and one for seasonal components, and we multiply them together with regular algebra rules. 608 | 609 | The general form of that looks like (quoting the [statsmodels docs](http://www.statsmodels.org/dev/examples/notebooks/generated/statespace_sarimax_stata.html) here) 610 | 611 | $$ 612 | \phi_p (L) \tilde \phi_P (L^s) \Delta^d \Delta_s^D y_t = A(t) + \theta_q (L) \tilde \theta_Q (L^s) \epsilon_t 613 | $$ 614 | 615 | where 616 | 617 | - $\phi_p(L)$ is the non-seasonal autoregressive lag polynomial 618 | - $\tilde{\phi}_P(L^S)$ is the seasonal autoregressive lag polynomial 619 | - $\Delta^d\Delta_s^D$ is the time series, differenced $d$ times, and seasonally differenced $D$ times. 620 | - $A(t)$ is the trend polynomial (including the intercept) 621 | - $\theta_q(L)$ is the non-seasonal moving average lag polynomial 622 | - $\tilde{\theta}_Q(L^s)$ is the seasonal moving average lag polynomial 623 | 624 | I don't find that to be very clear, but maybe an example will help. 625 | We'll fit a seasonal ARIMA$(1,1,2)×(0, 1, 2)_{12}$. 626 | 627 | So the nonseasonal component is 628 | 629 | - $p=1$: period autoregressive: use $y_{t-1}$ 630 | - $d=1$: one first-differencing of the data (one month) 631 | - $q=2$: use the previous two non-seasonal residual, $e_{t-1}$ and $e_{t-2}$, to forecast 632 | 633 | And the seasonal component is 634 | 635 | - $P=0$: Don't use any previous seasonal values 636 | - $D=1$: Difference the series 12 periods back: `y.diff(12)` 637 | - $Q=2$: Use the two previous seasonal residuals 638 | 639 | 640 | ```{python} 641 | mod_seasonal = smt.SARIMAX(y, trend='c', 642 | order=(1, 1, 2), seasonal_order=(0, 1, 2, 12), 643 | simple_differencing=False) 644 | res_seasonal = mod_seasonal.fit() 645 | ``` 646 | 647 | 648 | ```{python} 649 | res_seasonal.summary() 650 | ``` 651 | 652 | 653 | ```{python} 654 | tsplot(res_seasonal.resid[12:], lags=24); 655 | ``` 656 | 657 | 658 | Things look much better now. 659 | 660 | One thing I didn't really talk about is order selection. How to choose $p, d, q, P, D$ and $Q$. 661 | R's forecast package does have a handy `auto.arima` function that does this for you. 662 | Python / statsmodels don't have that at the minute. 663 | The alternative seems to be experience (boo), intuition (boo), and good-old grid-search. 664 | You can fit a bunch of models for a bunch of combinations of the parameters and use the [AIC](https://en.wikipedia.org/wiki/Akaike_information_criterion) or [BIC](https://en.wikipedia.org/wiki/Bayesian_information_criterion) to choose the best. 665 | [Here](https://www.otexts.org/fpp/8/7) is a useful reference, and [this](http://stackoverflow.com/a/22770973) StackOverflow answer recommends a few options. 666 | 667 | 668 | ## Forecasting 669 | 670 | Now that we fit that model, let's put it to use. 671 | First, we'll make a bunch of one-step ahead forecasts. 672 | At each point (month), we take the history up to that point and make a forecast for the next month. 673 | So the forecast for January 2014 has available all the data up through December 2013. 674 | 675 | 676 | ```{python} 677 | pred = res_seasonal.get_prediction(start='2001-03-01') 678 | pred_ci = pred.conf_int() 679 | ``` 680 | 681 | 682 | ```{python} 683 | ax = y.plot(label='observed') 684 | pred.predicted_mean.plot(ax=ax, label='Forecast', alpha=.7) 685 | ax.fill_between(pred_ci.index, 686 | pred_ci.iloc[:, 0], 687 | pred_ci.iloc[:, 1], color='k', alpha=.2) 688 | ax.set_ylabel("Monthly Flights") 689 | plt.legend() 690 | sns.despine() 691 | ``` 692 | 693 | 694 | There are a few places where the observed series slips outside the 95% confidence interval. 695 | The series seems especially unstable before 2005. 696 | 697 | Alternatively, we can make *dynamic* forecasts as of some month (January 2013 in the example below). 698 | That means the forecast from that point forward only use information available as of January 2013. 699 | The predictions are generated in a similar way: a bunch of one-step forecasts. 700 | Only instead of plugging in the *actual* values beyond January 2013, we plug in the *forecast* values. 701 | 702 | 703 | ```{python} 704 | pred_dy = res_seasonal.get_prediction(start='2002-03-01', dynamic='2013-01-01') 705 | pred_dy_ci = pred_dy.conf_int() 706 | ``` 707 | 708 | 709 | ```{python} 710 | ax = y.plot(label='observed') 711 | pred_dy.predicted_mean.plot(ax=ax, label='Forecast') 712 | ax.fill_between(pred_dy_ci.index, 713 | pred_dy_ci.iloc[:, 0], 714 | pred_dy_ci.iloc[:, 1], color='k', alpha=.25) 715 | ax.set_ylabel("Monthly Flights") 716 | 717 | # Highlight the forecast area 718 | ax.fill_betweenx(ax.get_ylim(), pd.Timestamp('2013-01-01'), y.index[-1], 719 | alpha=.1, zorder=-1) 720 | ax.annotate('Dynamic $\\longrightarrow$', (pd.Timestamp('2013-02-01'), 550)) 721 | 722 | plt.legend() 723 | sns.despine() 724 | ``` 725 | 726 | ## Resources 727 | 728 | This is a collection of links for those interested. 729 | 730 | ### Time series modeling in Python 731 | 732 | + [Statsmodels Statespace Notebooks](http://www.statsmodels.org/dev/examples/index.html#statespace) 733 | + [Statsmodels VAR tutorial](http://www.statsmodels.org/dev/vector_ar.html#var) 734 | - [ARCH Library by Kevin Sheppard](https://github.com/bashtage/arch) 735 | 736 | ### General Textbooks 737 | 738 | - [Forecasting: Principles and Practice](https://www.otexts.org/fpp/): A great introduction 739 | - [Stock and Watson](http://wps.aw.com/aw_stock_ie_3/178/45691/11696965.cw/): Readable undergraduate resource, has a few chapters on time series 740 | - [Greene's Econometric Analysis](http://pages.stern.nyu.edu/~wgreene/Text/econometricanalysis.htm): My favorite PhD level textbook 741 | - [Hamilton's Time Series Analysis](http://www.amazon.com/Time-Analysis-James-Douglas-Hamilton/dp/0691042896): A classic 742 | - [Lutkehpohl's New Introduction to Multiple Time Series Analysis](http://www.amazon.com/New-Introduction-Multiple-Time-Analysis/dp/3540262393): Extremely dry, but useful if you're implementing this stuff 743 | 744 | ## Conclusion 745 | 746 | Congratulations if you made it this far, this piece just kept growing (and I still had to cut stuff). 747 | The main thing cut was talking about how `SARIMAX` is implemented on top of using statsmodels' statespace framework. 748 | The statespace framework, developed mostly by Chad Fulton over the past couple years, is really nice. 749 | You can pretty easily [extend it](http://www.statsmodels.org/dev/examples/notebooks/generated/statespace_local_linear_trend.html) with custom models, but still get all the benefits of the framework's estimation and results facilities. 750 | I'd recommend reading the [notebooks](http://www.statsmodels.org/dev/examples/index.html#statespace). 751 | We also didn't get to talk at all about Skipper Seabold's work on VARs, but maybe some other time. 752 | 753 | As always, [feedback is welcome](https://twitter.com/tomaugspurger). 754 | 755 | -------------------------------------------------------------------------------- /versioneer.py: -------------------------------------------------------------------------------- 1 | 2 | # Version: 0.16 3 | 4 | """The Versioneer - like a rocketeer, but for versions. 5 | 6 | The Versioneer 7 | ============== 8 | 9 | * like a rocketeer, but for versions! 10 | * https://github.com/warner/python-versioneer 11 | * Brian Warner 12 | * License: Public Domain 13 | * Compatible With: python2.6, 2.7, 3.3, 3.4, 3.5, and pypy 14 | * [![Latest Version] 15 | (https://pypip.in/version/versioneer/badge.svg?style=flat) 16 | ](https://pypi.python.org/pypi/versioneer/) 17 | * [![Build Status] 18 | (https://travis-ci.org/warner/python-versioneer.png?branch=master) 19 | ](https://travis-ci.org/warner/python-versioneer) 20 | 21 | This is a tool for managing a recorded version number in distutils-based 22 | python projects. The goal is to remove the tedious and error-prone "update 23 | the embedded version string" step from your release process. Making a new 24 | release should be as easy as recording a new tag in your version-control 25 | system, and maybe making new tarballs. 26 | 27 | 28 | ## Quick Install 29 | 30 | * `pip install versioneer` to somewhere to your $PATH 31 | * add a `[versioneer]` section to your setup.cfg (see below) 32 | * run `versioneer install` in your source tree, commit the results 33 | 34 | ## Version Identifiers 35 | 36 | Source trees come from a variety of places: 37 | 38 | * a version-control system checkout (mostly used by developers) 39 | * a nightly tarball, produced by build automation 40 | * a snapshot tarball, produced by a web-based VCS browser, like github's 41 | "tarball from tag" feature 42 | * a release tarball, produced by "setup.py sdist", distributed through PyPI 43 | 44 | Within each source tree, the version identifier (either a string or a number, 45 | this tool is format-agnostic) can come from a variety of places: 46 | 47 | * ask the VCS tool itself, e.g. "git describe" (for checkouts), which knows 48 | about recent "tags" and an absolute revision-id 49 | * the name of the directory into which the tarball was unpacked 50 | * an expanded VCS keyword ($Id$, etc) 51 | * a `_version.py` created by some earlier build step 52 | 53 | For released software, the version identifier is closely related to a VCS 54 | tag. Some projects use tag names that include more than just the version 55 | string (e.g. "myproject-1.2" instead of just "1.2"), in which case the tool 56 | needs to strip the tag prefix to extract the version identifier. For 57 | unreleased software (between tags), the version identifier should provide 58 | enough information to help developers recreate the same tree, while also 59 | giving them an idea of roughly how old the tree is (after version 1.2, before 60 | version 1.3). Many VCS systems can report a description that captures this, 61 | for example `git describe --tags --dirty --always` reports things like 62 | "0.7-1-g574ab98-dirty" to indicate that the checkout is one revision past the 63 | 0.7 tag, has a unique revision id of "574ab98", and is "dirty" (it has 64 | uncommitted changes. 65 | 66 | The version identifier is used for multiple purposes: 67 | 68 | * to allow the module to self-identify its version: `myproject.__version__` 69 | * to choose a name and prefix for a 'setup.py sdist' tarball 70 | 71 | ## Theory of Operation 72 | 73 | Versioneer works by adding a special `_version.py` file into your source 74 | tree, where your `__init__.py` can import it. This `_version.py` knows how to 75 | dynamically ask the VCS tool for version information at import time. 76 | 77 | `_version.py` also contains `$Revision$` markers, and the installation 78 | process marks `_version.py` to have this marker rewritten with a tag name 79 | during the `git archive` command. As a result, generated tarballs will 80 | contain enough information to get the proper version. 81 | 82 | To allow `setup.py` to compute a version too, a `versioneer.py` is added to 83 | the top level of your source tree, next to `setup.py` and the `setup.cfg` 84 | that configures it. This overrides several distutils/setuptools commands to 85 | compute the version when invoked, and changes `setup.py build` and `setup.py 86 | sdist` to replace `_version.py` with a small static file that contains just 87 | the generated version data. 88 | 89 | ## Installation 90 | 91 | First, decide on values for the following configuration variables: 92 | 93 | * `VCS`: the version control system you use. Currently accepts "git". 94 | 95 | * `style`: the style of version string to be produced. See "Styles" below for 96 | details. Defaults to "pep440", which looks like 97 | `TAG[+DISTANCE.gSHORTHASH[.dirty]]`. 98 | 99 | * `versionfile_source`: 100 | 101 | A project-relative pathname into which the generated version strings should 102 | be written. This is usually a `_version.py` next to your project's main 103 | `__init__.py` file, so it can be imported at runtime. If your project uses 104 | `src/myproject/__init__.py`, this should be `src/myproject/_version.py`. 105 | This file should be checked in to your VCS as usual: the copy created below 106 | by `setup.py setup_versioneer` will include code that parses expanded VCS 107 | keywords in generated tarballs. The 'build' and 'sdist' commands will 108 | replace it with a copy that has just the calculated version string. 109 | 110 | This must be set even if your project does not have any modules (and will 111 | therefore never import `_version.py`), since "setup.py sdist" -based trees 112 | still need somewhere to record the pre-calculated version strings. Anywhere 113 | in the source tree should do. If there is a `__init__.py` next to your 114 | `_version.py`, the `setup.py setup_versioneer` command (described below) 115 | will append some `__version__`-setting assignments, if they aren't already 116 | present. 117 | 118 | * `versionfile_build`: 119 | 120 | Like `versionfile_source`, but relative to the build directory instead of 121 | the source directory. These will differ when your setup.py uses 122 | 'package_dir='. If you have `package_dir={'myproject': 'src/myproject'}`, 123 | then you will probably have `versionfile_build='myproject/_version.py'` and 124 | `versionfile_source='src/myproject/_version.py'`. 125 | 126 | If this is set to None, then `setup.py build` will not attempt to rewrite 127 | any `_version.py` in the built tree. If your project does not have any 128 | libraries (e.g. if it only builds a script), then you should use 129 | `versionfile_build = None`. To actually use the computed version string, 130 | your `setup.py` will need to override `distutils.command.build_scripts` 131 | with a subclass that explicitly inserts a copy of 132 | `versioneer.get_version()` into your script file. See 133 | `test/demoapp-script-only/setup.py` for an example. 134 | 135 | * `tag_prefix`: 136 | 137 | a string, like 'PROJECTNAME-', which appears at the start of all VCS tags. 138 | If your tags look like 'myproject-1.2.0', then you should use 139 | tag_prefix='myproject-'. If you use unprefixed tags like '1.2.0', this 140 | should be an empty string, using either `tag_prefix=` or `tag_prefix=''`. 141 | 142 | * `parentdir_prefix`: 143 | 144 | a optional string, frequently the same as tag_prefix, which appears at the 145 | start of all unpacked tarball filenames. If your tarball unpacks into 146 | 'myproject-1.2.0', this should be 'myproject-'. To disable this feature, 147 | just omit the field from your `setup.cfg`. 148 | 149 | This tool provides one script, named `versioneer`. That script has one mode, 150 | "install", which writes a copy of `versioneer.py` into the current directory 151 | and runs `versioneer.py setup` to finish the installation. 152 | 153 | To versioneer-enable your project: 154 | 155 | * 1: Modify your `setup.cfg`, adding a section named `[versioneer]` and 156 | populating it with the configuration values you decided earlier (note that 157 | the option names are not case-sensitive): 158 | 159 | ```` 160 | [versioneer] 161 | VCS = git 162 | style = pep440 163 | versionfile_source = src/myproject/_version.py 164 | versionfile_build = myproject/_version.py 165 | tag_prefix = 166 | parentdir_prefix = myproject- 167 | ```` 168 | 169 | * 2: Run `versioneer install`. This will do the following: 170 | 171 | * copy `versioneer.py` into the top of your source tree 172 | * create `_version.py` in the right place (`versionfile_source`) 173 | * modify your `__init__.py` (if one exists next to `_version.py`) to define 174 | `__version__` (by calling a function from `_version.py`) 175 | * modify your `MANIFEST.in` to include both `versioneer.py` and the 176 | generated `_version.py` in sdist tarballs 177 | 178 | `versioneer install` will complain about any problems it finds with your 179 | `setup.py` or `setup.cfg`. Run it multiple times until you have fixed all 180 | the problems. 181 | 182 | * 3: add a `import versioneer` to your setup.py, and add the following 183 | arguments to the setup() call: 184 | 185 | version=versioneer.get_version(), 186 | cmdclass=versioneer.get_cmdclass(), 187 | 188 | * 4: commit these changes to your VCS. To make sure you won't forget, 189 | `versioneer install` will mark everything it touched for addition using 190 | `git add`. Don't forget to add `setup.py` and `setup.cfg` too. 191 | 192 | ## Post-Installation Usage 193 | 194 | Once established, all uses of your tree from a VCS checkout should get the 195 | current version string. All generated tarballs should include an embedded 196 | version string (so users who unpack them will not need a VCS tool installed). 197 | 198 | If you distribute your project through PyPI, then the release process should 199 | boil down to two steps: 200 | 201 | * 1: git tag 1.0 202 | * 2: python setup.py register sdist upload 203 | 204 | If you distribute it through github (i.e. users use github to generate 205 | tarballs with `git archive`), the process is: 206 | 207 | * 1: git tag 1.0 208 | * 2: git push; git push --tags 209 | 210 | Versioneer will report "0+untagged.NUMCOMMITS.gHASH" until your tree has at 211 | least one tag in its history. 212 | 213 | ## Version-String Flavors 214 | 215 | Code which uses Versioneer can learn about its version string at runtime by 216 | importing `_version` from your main `__init__.py` file and running the 217 | `get_versions()` function. From the "outside" (e.g. in `setup.py`), you can 218 | import the top-level `versioneer.py` and run `get_versions()`. 219 | 220 | Both functions return a dictionary with different flavors of version 221 | information: 222 | 223 | * `['version']`: A condensed version string, rendered using the selected 224 | style. This is the most commonly used value for the project's version 225 | string. The default "pep440" style yields strings like `0.11`, 226 | `0.11+2.g1076c97`, or `0.11+2.g1076c97.dirty`. See the "Styles" section 227 | below for alternative styles. 228 | 229 | * `['full-revisionid']`: detailed revision identifier. For Git, this is the 230 | full SHA1 commit id, e.g. "1076c978a8d3cfc70f408fe5974aa6c092c949ac". 231 | 232 | * `['dirty']`: a boolean, True if the tree has uncommitted changes. Note that 233 | this is only accurate if run in a VCS checkout, otherwise it is likely to 234 | be False or None 235 | 236 | * `['error']`: if the version string could not be computed, this will be set 237 | to a string describing the problem, otherwise it will be None. It may be 238 | useful to throw an exception in setup.py if this is set, to avoid e.g. 239 | creating tarballs with a version string of "unknown". 240 | 241 | Some variants are more useful than others. Including `full-revisionid` in a 242 | bug report should allow developers to reconstruct the exact code being tested 243 | (or indicate the presence of local changes that should be shared with the 244 | developers). `version` is suitable for display in an "about" box or a CLI 245 | `--version` output: it can be easily compared against release notes and lists 246 | of bugs fixed in various releases. 247 | 248 | The installer adds the following text to your `__init__.py` to place a basic 249 | version in `YOURPROJECT.__version__`: 250 | 251 | from ._version import get_versions 252 | __version__ = get_versions()['version'] 253 | del get_versions 254 | 255 | ## Styles 256 | 257 | The setup.cfg `style=` configuration controls how the VCS information is 258 | rendered into a version string. 259 | 260 | The default style, "pep440", produces a PEP440-compliant string, equal to the 261 | un-prefixed tag name for actual releases, and containing an additional "local 262 | version" section with more detail for in-between builds. For Git, this is 263 | TAG[+DISTANCE.gHEX[.dirty]] , using information from `git describe --tags 264 | --dirty --always`. For example "0.11+2.g1076c97.dirty" indicates that the 265 | tree is like the "1076c97" commit but has uncommitted changes (".dirty"), and 266 | that this commit is two revisions ("+2") beyond the "0.11" tag. For released 267 | software (exactly equal to a known tag), the identifier will only contain the 268 | stripped tag, e.g. "0.11". 269 | 270 | Other styles are available. See details.md in the Versioneer source tree for 271 | descriptions. 272 | 273 | ## Debugging 274 | 275 | Versioneer tries to avoid fatal errors: if something goes wrong, it will tend 276 | to return a version of "0+unknown". To investigate the problem, run `setup.py 277 | version`, which will run the version-lookup code in a verbose mode, and will 278 | display the full contents of `get_versions()` (including the `error` string, 279 | which may help identify what went wrong). 280 | 281 | ## Updating Versioneer 282 | 283 | To upgrade your project to a new release of Versioneer, do the following: 284 | 285 | * install the new Versioneer (`pip install -U versioneer` or equivalent) 286 | * edit `setup.cfg`, if necessary, to include any new configuration settings 287 | indicated by the release notes 288 | * re-run `versioneer install` in your source tree, to replace 289 | `SRC/_version.py` 290 | * commit any changed files 291 | 292 | ### Upgrading to 0.16 293 | 294 | Nothing special. 295 | 296 | ### Upgrading to 0.15 297 | 298 | Starting with this version, Versioneer is configured with a `[versioneer]` 299 | section in your `setup.cfg` file. Earlier versions required the `setup.py` to 300 | set attributes on the `versioneer` module immediately after import. The new 301 | version will refuse to run (raising an exception during import) until you 302 | have provided the necessary `setup.cfg` section. 303 | 304 | In addition, the Versioneer package provides an executable named 305 | `versioneer`, and the installation process is driven by running `versioneer 306 | install`. In 0.14 and earlier, the executable was named 307 | `versioneer-installer` and was run without an argument. 308 | 309 | ### Upgrading to 0.14 310 | 311 | 0.14 changes the format of the version string. 0.13 and earlier used 312 | hyphen-separated strings like "0.11-2-g1076c97-dirty". 0.14 and beyond use a 313 | plus-separated "local version" section strings, with dot-separated 314 | components, like "0.11+2.g1076c97". PEP440-strict tools did not like the old 315 | format, but should be ok with the new one. 316 | 317 | ### Upgrading from 0.11 to 0.12 318 | 319 | Nothing special. 320 | 321 | ### Upgrading from 0.10 to 0.11 322 | 323 | You must add a `versioneer.VCS = "git"` to your `setup.py` before re-running 324 | `setup.py setup_versioneer`. This will enable the use of additional 325 | version-control systems (SVN, etc) in the future. 326 | 327 | ## Future Directions 328 | 329 | This tool is designed to make it easily extended to other version-control 330 | systems: all VCS-specific components are in separate directories like 331 | src/git/ . The top-level `versioneer.py` script is assembled from these 332 | components by running make-versioneer.py . In the future, make-versioneer.py 333 | will take a VCS name as an argument, and will construct a version of 334 | `versioneer.py` that is specific to the given VCS. It might also take the 335 | configuration arguments that are currently provided manually during 336 | installation by editing setup.py . Alternatively, it might go the other 337 | direction and include code from all supported VCS systems, reducing the 338 | number of intermediate scripts. 339 | 340 | 341 | ## License 342 | 343 | To make Versioneer easier to embed, all its code is dedicated to the public 344 | domain. The `_version.py` that it creates is also in the public domain. 345 | Specifically, both are released under the Creative Commons "Public Domain 346 | Dedication" license (CC0-1.0), as described in 347 | https://creativecommons.org/publicdomain/zero/1.0/ . 348 | 349 | """ 350 | 351 | from __future__ import print_function 352 | try: 353 | import configparser 354 | except ImportError: 355 | import ConfigParser as configparser 356 | import errno 357 | import json 358 | import os 359 | import re 360 | import subprocess 361 | import sys 362 | 363 | 364 | class VersioneerConfig: 365 | """Container for Versioneer configuration parameters.""" 366 | 367 | 368 | def get_root(): 369 | """Get the project root directory. 370 | 371 | We require that all commands are run from the project root, i.e. the 372 | directory that contains setup.py, setup.cfg, and versioneer.py . 373 | """ 374 | root = os.path.realpath(os.path.abspath(os.getcwd())) 375 | setup_py = os.path.join(root, "setup.py") 376 | versioneer_py = os.path.join(root, "versioneer.py") 377 | if not (os.path.exists(setup_py) or os.path.exists(versioneer_py)): 378 | # allow 'python path/to/setup.py COMMAND' 379 | root = os.path.dirname(os.path.realpath(os.path.abspath(sys.argv[0]))) 380 | setup_py = os.path.join(root, "setup.py") 381 | versioneer_py = os.path.join(root, "versioneer.py") 382 | if not (os.path.exists(setup_py) or os.path.exists(versioneer_py)): 383 | err = ("Versioneer was unable to run the project root directory. " 384 | "Versioneer requires setup.py to be executed from " 385 | "its immediate directory (like 'python setup.py COMMAND'), " 386 | "or in a way that lets it use sys.argv[0] to find the root " 387 | "(like 'python path/to/setup.py COMMAND').") 388 | raise VersioneerBadRootError(err) 389 | try: 390 | # Certain runtime workflows (setup.py install/develop in a setuptools 391 | # tree) execute all dependencies in a single python process, so 392 | # "versioneer" may be imported multiple times, and python's shared 393 | # module-import table will cache the first one. So we can't use 394 | # os.path.dirname(__file__), as that will find whichever 395 | # versioneer.py was first imported, even in later projects. 396 | me = os.path.realpath(os.path.abspath(__file__)) 397 | if os.path.splitext(me)[0] != os.path.splitext(versioneer_py)[0]: 398 | print("Warning: build in %s is using versioneer.py from %s" 399 | % (os.path.dirname(me), versioneer_py)) 400 | except NameError: 401 | pass 402 | return root 403 | 404 | 405 | def get_config_from_root(root): 406 | """Read the project setup.cfg file to determine Versioneer config.""" 407 | # This might raise EnvironmentError (if setup.cfg is missing), or 408 | # configparser.NoSectionError (if it lacks a [versioneer] section), or 409 | # configparser.NoOptionError (if it lacks "VCS="). See the docstring at 410 | # the top of versioneer.py for instructions on writing your setup.cfg . 411 | setup_cfg = os.path.join(root, "setup.cfg") 412 | parser = configparser.SafeConfigParser() 413 | with open(setup_cfg, "r") as f: 414 | parser.readfp(f) 415 | VCS = parser.get("versioneer", "VCS") # mandatory 416 | 417 | def get(parser, name): 418 | if parser.has_option("versioneer", name): 419 | return parser.get("versioneer", name) 420 | return None 421 | cfg = VersioneerConfig() 422 | cfg.VCS = VCS 423 | cfg.style = get(parser, "style") or "" 424 | cfg.versionfile_source = get(parser, "versionfile_source") 425 | cfg.versionfile_build = get(parser, "versionfile_build") 426 | cfg.tag_prefix = get(parser, "tag_prefix") 427 | if cfg.tag_prefix in ("''", '""'): 428 | cfg.tag_prefix = "" 429 | cfg.parentdir_prefix = get(parser, "parentdir_prefix") 430 | cfg.verbose = get(parser, "verbose") 431 | return cfg 432 | 433 | 434 | class NotThisMethod(Exception): 435 | """Exception raised if a method is not valid for the current scenario.""" 436 | 437 | # these dictionaries contain VCS-specific tools 438 | LONG_VERSION_PY = {} 439 | HANDLERS = {} 440 | 441 | 442 | def register_vcs_handler(vcs, method): # decorator 443 | """Decorator to mark a method as the handler for a particular VCS.""" 444 | def decorate(f): 445 | """Store f in HANDLERS[vcs][method].""" 446 | if vcs not in HANDLERS: 447 | HANDLERS[vcs] = {} 448 | HANDLERS[vcs][method] = f 449 | return f 450 | return decorate 451 | 452 | 453 | def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False): 454 | """Call the given command(s).""" 455 | assert isinstance(commands, list) 456 | p = None 457 | for c in commands: 458 | try: 459 | dispcmd = str([c] + args) 460 | # remember shell=False, so use git.cmd on windows, not just git 461 | p = subprocess.Popen([c] + args, cwd=cwd, stdout=subprocess.PIPE, 462 | stderr=(subprocess.PIPE if hide_stderr 463 | else None)) 464 | break 465 | except EnvironmentError: 466 | e = sys.exc_info()[1] 467 | if e.errno == errno.ENOENT: 468 | continue 469 | if verbose: 470 | print("unable to run %s" % dispcmd) 471 | print(e) 472 | return None 473 | else: 474 | if verbose: 475 | print("unable to find command, tried %s" % (commands,)) 476 | return None 477 | stdout = p.communicate()[0].strip() 478 | if sys.version_info[0] >= 3: 479 | stdout = stdout.decode() 480 | if p.returncode != 0: 481 | if verbose: 482 | print("unable to run %s (error)" % dispcmd) 483 | return None 484 | return stdout 485 | LONG_VERSION_PY['git'] = ''' 486 | # This file helps to compute a version number in source trees obtained from 487 | # git-archive tarball (such as those provided by githubs download-from-tag 488 | # feature). Distribution tarballs (built by setup.py sdist) and build 489 | # directories (produced by setup.py build) will contain a much shorter file 490 | # that just contains the computed version number. 491 | 492 | # This file is released into the public domain. Generated by 493 | # versioneer-0.16 (https://github.com/warner/python-versioneer) 494 | 495 | """Git implementation of _version.py.""" 496 | 497 | import errno 498 | import os 499 | import re 500 | import subprocess 501 | import sys 502 | 503 | 504 | def get_keywords(): 505 | """Get the keywords needed to look up the version information.""" 506 | # these strings will be replaced by git during git-archive. 507 | # setup.py/versioneer.py will grep for the variable names, so they must 508 | # each be defined on a line of their own. _version.py will just call 509 | # get_keywords(). 510 | git_refnames = "%(DOLLAR)sFormat:%%d%(DOLLAR)s" 511 | git_full = "%(DOLLAR)sFormat:%%H%(DOLLAR)s" 512 | keywords = {"refnames": git_refnames, "full": git_full} 513 | return keywords 514 | 515 | 516 | class VersioneerConfig: 517 | """Container for Versioneer configuration parameters.""" 518 | 519 | 520 | def get_config(): 521 | """Create, populate and return the VersioneerConfig() object.""" 522 | # these strings are filled in when 'setup.py versioneer' creates 523 | # _version.py 524 | cfg = VersioneerConfig() 525 | cfg.VCS = "git" 526 | cfg.style = "%(STYLE)s" 527 | cfg.tag_prefix = "%(TAG_PREFIX)s" 528 | cfg.parentdir_prefix = "%(PARENTDIR_PREFIX)s" 529 | cfg.versionfile_source = "%(VERSIONFILE_SOURCE)s" 530 | cfg.verbose = False 531 | return cfg 532 | 533 | 534 | class NotThisMethod(Exception): 535 | """Exception raised if a method is not valid for the current scenario.""" 536 | 537 | 538 | LONG_VERSION_PY = {} 539 | HANDLERS = {} 540 | 541 | 542 | def register_vcs_handler(vcs, method): # decorator 543 | """Decorator to mark a method as the handler for a particular VCS.""" 544 | def decorate(f): 545 | """Store f in HANDLERS[vcs][method].""" 546 | if vcs not in HANDLERS: 547 | HANDLERS[vcs] = {} 548 | HANDLERS[vcs][method] = f 549 | return f 550 | return decorate 551 | 552 | 553 | def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False): 554 | """Call the given command(s).""" 555 | assert isinstance(commands, list) 556 | p = None 557 | for c in commands: 558 | try: 559 | dispcmd = str([c] + args) 560 | # remember shell=False, so use git.cmd on windows, not just git 561 | p = subprocess.Popen([c] + args, cwd=cwd, stdout=subprocess.PIPE, 562 | stderr=(subprocess.PIPE if hide_stderr 563 | else None)) 564 | break 565 | except EnvironmentError: 566 | e = sys.exc_info()[1] 567 | if e.errno == errno.ENOENT: 568 | continue 569 | if verbose: 570 | print("unable to run %%s" %% dispcmd) 571 | print(e) 572 | return None 573 | else: 574 | if verbose: 575 | print("unable to find command, tried %%s" %% (commands,)) 576 | return None 577 | stdout = p.communicate()[0].strip() 578 | if sys.version_info[0] >= 3: 579 | stdout = stdout.decode() 580 | if p.returncode != 0: 581 | if verbose: 582 | print("unable to run %%s (error)" %% dispcmd) 583 | return None 584 | return stdout 585 | 586 | 587 | def versions_from_parentdir(parentdir_prefix, root, verbose): 588 | """Try to determine the version from the parent directory name. 589 | 590 | Source tarballs conventionally unpack into a directory that includes 591 | both the project name and a version string. 592 | """ 593 | dirname = os.path.basename(root) 594 | if not dirname.startswith(parentdir_prefix): 595 | if verbose: 596 | print("guessing rootdir is '%%s', but '%%s' doesn't start with " 597 | "prefix '%%s'" %% (root, dirname, parentdir_prefix)) 598 | raise NotThisMethod("rootdir doesn't start with parentdir_prefix") 599 | return {"version": dirname[len(parentdir_prefix):], 600 | "full-revisionid": None, 601 | "dirty": False, "error": None} 602 | 603 | 604 | @register_vcs_handler("git", "get_keywords") 605 | def git_get_keywords(versionfile_abs): 606 | """Extract version information from the given file.""" 607 | # the code embedded in _version.py can just fetch the value of these 608 | # keywords. When used from setup.py, we don't want to import _version.py, 609 | # so we do it with a regexp instead. This function is not used from 610 | # _version.py. 611 | keywords = {} 612 | try: 613 | f = open(versionfile_abs, "r") 614 | for line in f.readlines(): 615 | if line.strip().startswith("git_refnames ="): 616 | mo = re.search(r'=\s*"(.*)"', line) 617 | if mo: 618 | keywords["refnames"] = mo.group(1) 619 | if line.strip().startswith("git_full ="): 620 | mo = re.search(r'=\s*"(.*)"', line) 621 | if mo: 622 | keywords["full"] = mo.group(1) 623 | f.close() 624 | except EnvironmentError: 625 | pass 626 | return keywords 627 | 628 | 629 | @register_vcs_handler("git", "keywords") 630 | def git_versions_from_keywords(keywords, tag_prefix, verbose): 631 | """Get version information from git keywords.""" 632 | if not keywords: 633 | raise NotThisMethod("no keywords at all, weird") 634 | refnames = keywords["refnames"].strip() 635 | if refnames.startswith("$Format"): 636 | if verbose: 637 | print("keywords are unexpanded, not using") 638 | raise NotThisMethod("unexpanded keywords, not a git-archive tarball") 639 | refs = set([r.strip() for r in refnames.strip("()").split(",")]) 640 | # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of 641 | # just "foo-1.0". If we see a "tag: " prefix, prefer those. 642 | TAG = "tag: " 643 | tags = set([r[len(TAG):] for r in refs if r.startswith(TAG)]) 644 | if not tags: 645 | # Either we're using git < 1.8.3, or there really are no tags. We use 646 | # a heuristic: assume all version tags have a digit. The old git %%d 647 | # expansion behaves like git log --decorate=short and strips out the 648 | # refs/heads/ and refs/tags/ prefixes that would let us distinguish 649 | # between branches and tags. By ignoring refnames without digits, we 650 | # filter out many common branch names like "release" and 651 | # "stabilization", as well as "HEAD" and "master". 652 | tags = set([r for r in refs if re.search(r'\d', r)]) 653 | if verbose: 654 | print("discarding '%%s', no digits" %% ",".join(refs-tags)) 655 | if verbose: 656 | print("likely tags: %%s" %% ",".join(sorted(tags))) 657 | for ref in sorted(tags): 658 | # sorting will prefer e.g. "2.0" over "2.0rc1" 659 | if ref.startswith(tag_prefix): 660 | r = ref[len(tag_prefix):] 661 | if verbose: 662 | print("picking %%s" %% r) 663 | return {"version": r, 664 | "full-revisionid": keywords["full"].strip(), 665 | "dirty": False, "error": None 666 | } 667 | # no suitable tags, so version is "0+unknown", but full hex is still there 668 | if verbose: 669 | print("no suitable tags, using unknown + full revision id") 670 | return {"version": "0+unknown", 671 | "full-revisionid": keywords["full"].strip(), 672 | "dirty": False, "error": "no suitable tags"} 673 | 674 | 675 | @register_vcs_handler("git", "pieces_from_vcs") 676 | def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): 677 | """Get version from 'git describe' in the root of the source tree. 678 | 679 | This only gets called if the git-archive 'subst' keywords were *not* 680 | expanded, and _version.py hasn't already been rewritten with a short 681 | version string, meaning we're inside a checked out source tree. 682 | """ 683 | if not os.path.exists(os.path.join(root, ".git")): 684 | if verbose: 685 | print("no .git in %%s" %% root) 686 | raise NotThisMethod("no .git directory") 687 | 688 | GITS = ["git"] 689 | if sys.platform == "win32": 690 | GITS = ["git.cmd", "git.exe"] 691 | # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty] 692 | # if there isn't one, this yields HEX[-dirty] (no NUM) 693 | describe_out = run_command(GITS, ["describe", "--tags", "--dirty", 694 | "--always", "--long", 695 | "--match", "%%s*" %% tag_prefix], 696 | cwd=root) 697 | # --long was added in git-1.5.5 698 | if describe_out is None: 699 | raise NotThisMethod("'git describe' failed") 700 | describe_out = describe_out.strip() 701 | full_out = run_command(GITS, ["rev-parse", "HEAD"], cwd=root) 702 | if full_out is None: 703 | raise NotThisMethod("'git rev-parse' failed") 704 | full_out = full_out.strip() 705 | 706 | pieces = {} 707 | pieces["long"] = full_out 708 | pieces["short"] = full_out[:7] # maybe improved later 709 | pieces["error"] = None 710 | 711 | # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty] 712 | # TAG might have hyphens. 713 | git_describe = describe_out 714 | 715 | # look for -dirty suffix 716 | dirty = git_describe.endswith("-dirty") 717 | pieces["dirty"] = dirty 718 | if dirty: 719 | git_describe = git_describe[:git_describe.rindex("-dirty")] 720 | 721 | # now we have TAG-NUM-gHEX or HEX 722 | 723 | if "-" in git_describe: 724 | # TAG-NUM-gHEX 725 | mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe) 726 | if not mo: 727 | # unparseable. Maybe git-describe is misbehaving? 728 | pieces["error"] = ("unable to parse git-describe output: '%%s'" 729 | %% describe_out) 730 | return pieces 731 | 732 | # tag 733 | full_tag = mo.group(1) 734 | if not full_tag.startswith(tag_prefix): 735 | if verbose: 736 | fmt = "tag '%%s' doesn't start with prefix '%%s'" 737 | print(fmt %% (full_tag, tag_prefix)) 738 | pieces["error"] = ("tag '%%s' doesn't start with prefix '%%s'" 739 | %% (full_tag, tag_prefix)) 740 | return pieces 741 | pieces["closest-tag"] = full_tag[len(tag_prefix):] 742 | 743 | # distance: number of commits since tag 744 | pieces["distance"] = int(mo.group(2)) 745 | 746 | # commit: short hex revision ID 747 | pieces["short"] = mo.group(3) 748 | 749 | else: 750 | # HEX: no tags 751 | pieces["closest-tag"] = None 752 | count_out = run_command(GITS, ["rev-list", "HEAD", "--count"], 753 | cwd=root) 754 | pieces["distance"] = int(count_out) # total number of commits 755 | 756 | return pieces 757 | 758 | 759 | def plus_or_dot(pieces): 760 | """Return a + if we don't already have one, else return a .""" 761 | if "+" in pieces.get("closest-tag", ""): 762 | return "." 763 | return "+" 764 | 765 | 766 | def render_pep440(pieces): 767 | """Build up version string, with post-release "local version identifier". 768 | 769 | Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you 770 | get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty 771 | 772 | Exceptions: 773 | 1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty] 774 | """ 775 | if pieces["closest-tag"]: 776 | rendered = pieces["closest-tag"] 777 | if pieces["distance"] or pieces["dirty"]: 778 | rendered += plus_or_dot(pieces) 779 | rendered += "%%d.g%%s" %% (pieces["distance"], pieces["short"]) 780 | if pieces["dirty"]: 781 | rendered += ".dirty" 782 | else: 783 | # exception #1 784 | rendered = "0+untagged.%%d.g%%s" %% (pieces["distance"], 785 | pieces["short"]) 786 | if pieces["dirty"]: 787 | rendered += ".dirty" 788 | return rendered 789 | 790 | 791 | def render_pep440_pre(pieces): 792 | """TAG[.post.devDISTANCE] -- No -dirty. 793 | 794 | Exceptions: 795 | 1: no tags. 0.post.devDISTANCE 796 | """ 797 | if pieces["closest-tag"]: 798 | rendered = pieces["closest-tag"] 799 | if pieces["distance"]: 800 | rendered += ".post.dev%%d" %% pieces["distance"] 801 | else: 802 | # exception #1 803 | rendered = "0.post.dev%%d" %% pieces["distance"] 804 | return rendered 805 | 806 | 807 | def render_pep440_post(pieces): 808 | """TAG[.postDISTANCE[.dev0]+gHEX] . 809 | 810 | The ".dev0" means dirty. Note that .dev0 sorts backwards 811 | (a dirty tree will appear "older" than the corresponding clean one), 812 | but you shouldn't be releasing software with -dirty anyways. 813 | 814 | Exceptions: 815 | 1: no tags. 0.postDISTANCE[.dev0] 816 | """ 817 | if pieces["closest-tag"]: 818 | rendered = pieces["closest-tag"] 819 | if pieces["distance"] or pieces["dirty"]: 820 | rendered += ".post%%d" %% pieces["distance"] 821 | if pieces["dirty"]: 822 | rendered += ".dev0" 823 | rendered += plus_or_dot(pieces) 824 | rendered += "g%%s" %% pieces["short"] 825 | else: 826 | # exception #1 827 | rendered = "0.post%%d" %% pieces["distance"] 828 | if pieces["dirty"]: 829 | rendered += ".dev0" 830 | rendered += "+g%%s" %% pieces["short"] 831 | return rendered 832 | 833 | 834 | def render_pep440_old(pieces): 835 | """TAG[.postDISTANCE[.dev0]] . 836 | 837 | The ".dev0" means dirty. 838 | 839 | Eexceptions: 840 | 1: no tags. 0.postDISTANCE[.dev0] 841 | """ 842 | if pieces["closest-tag"]: 843 | rendered = pieces["closest-tag"] 844 | if pieces["distance"] or pieces["dirty"]: 845 | rendered += ".post%%d" %% pieces["distance"] 846 | if pieces["dirty"]: 847 | rendered += ".dev0" 848 | else: 849 | # exception #1 850 | rendered = "0.post%%d" %% pieces["distance"] 851 | if pieces["dirty"]: 852 | rendered += ".dev0" 853 | return rendered 854 | 855 | 856 | def render_git_describe(pieces): 857 | """TAG[-DISTANCE-gHEX][-dirty]. 858 | 859 | Like 'git describe --tags --dirty --always'. 860 | 861 | Exceptions: 862 | 1: no tags. HEX[-dirty] (note: no 'g' prefix) 863 | """ 864 | if pieces["closest-tag"]: 865 | rendered = pieces["closest-tag"] 866 | if pieces["distance"]: 867 | rendered += "-%%d-g%%s" %% (pieces["distance"], pieces["short"]) 868 | else: 869 | # exception #1 870 | rendered = pieces["short"] 871 | if pieces["dirty"]: 872 | rendered += "-dirty" 873 | return rendered 874 | 875 | 876 | def render_git_describe_long(pieces): 877 | """TAG-DISTANCE-gHEX[-dirty]. 878 | 879 | Like 'git describe --tags --dirty --always -long'. 880 | The distance/hash is unconditional. 881 | 882 | Exceptions: 883 | 1: no tags. HEX[-dirty] (note: no 'g' prefix) 884 | """ 885 | if pieces["closest-tag"]: 886 | rendered = pieces["closest-tag"] 887 | rendered += "-%%d-g%%s" %% (pieces["distance"], pieces["short"]) 888 | else: 889 | # exception #1 890 | rendered = pieces["short"] 891 | if pieces["dirty"]: 892 | rendered += "-dirty" 893 | return rendered 894 | 895 | 896 | def render(pieces, style): 897 | """Render the given version pieces into the requested style.""" 898 | if pieces["error"]: 899 | return {"version": "unknown", 900 | "full-revisionid": pieces.get("long"), 901 | "dirty": None, 902 | "error": pieces["error"]} 903 | 904 | if not style or style == "default": 905 | style = "pep440" # the default 906 | 907 | if style == "pep440": 908 | rendered = render_pep440(pieces) 909 | elif style == "pep440-pre": 910 | rendered = render_pep440_pre(pieces) 911 | elif style == "pep440-post": 912 | rendered = render_pep440_post(pieces) 913 | elif style == "pep440-old": 914 | rendered = render_pep440_old(pieces) 915 | elif style == "git-describe": 916 | rendered = render_git_describe(pieces) 917 | elif style == "git-describe-long": 918 | rendered = render_git_describe_long(pieces) 919 | else: 920 | raise ValueError("unknown style '%%s'" %% style) 921 | 922 | return {"version": rendered, "full-revisionid": pieces["long"], 923 | "dirty": pieces["dirty"], "error": None} 924 | 925 | 926 | def get_versions(): 927 | """Get version information or return default if unable to do so.""" 928 | # I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have 929 | # __file__, we can work backwards from there to the root. Some 930 | # py2exe/bbfreeze/non-CPython implementations don't do __file__, in which 931 | # case we can only use expanded keywords. 932 | 933 | cfg = get_config() 934 | verbose = cfg.verbose 935 | 936 | try: 937 | return git_versions_from_keywords(get_keywords(), cfg.tag_prefix, 938 | verbose) 939 | except NotThisMethod: 940 | pass 941 | 942 | try: 943 | root = os.path.realpath(__file__) 944 | # versionfile_source is the relative path from the top of the source 945 | # tree (where the .git directory might live) to this file. Invert 946 | # this to find the root from __file__. 947 | for i in cfg.versionfile_source.split('/'): 948 | root = os.path.dirname(root) 949 | except NameError: 950 | return {"version": "0+unknown", "full-revisionid": None, 951 | "dirty": None, 952 | "error": "unable to find root of source tree"} 953 | 954 | try: 955 | pieces = git_pieces_from_vcs(cfg.tag_prefix, root, verbose) 956 | return render(pieces, cfg.style) 957 | except NotThisMethod: 958 | pass 959 | 960 | try: 961 | if cfg.parentdir_prefix: 962 | return versions_from_parentdir(cfg.parentdir_prefix, root, verbose) 963 | except NotThisMethod: 964 | pass 965 | 966 | return {"version": "0+unknown", "full-revisionid": None, 967 | "dirty": None, 968 | "error": "unable to compute version"} 969 | ''' 970 | 971 | 972 | @register_vcs_handler("git", "get_keywords") 973 | def git_get_keywords(versionfile_abs): 974 | """Extract version information from the given file.""" 975 | # the code embedded in _version.py can just fetch the value of these 976 | # keywords. When used from setup.py, we don't want to import _version.py, 977 | # so we do it with a regexp instead. This function is not used from 978 | # _version.py. 979 | keywords = {} 980 | try: 981 | f = open(versionfile_abs, "r") 982 | for line in f.readlines(): 983 | if line.strip().startswith("git_refnames ="): 984 | mo = re.search(r'=\s*"(.*)"', line) 985 | if mo: 986 | keywords["refnames"] = mo.group(1) 987 | if line.strip().startswith("git_full ="): 988 | mo = re.search(r'=\s*"(.*)"', line) 989 | if mo: 990 | keywords["full"] = mo.group(1) 991 | f.close() 992 | except EnvironmentError: 993 | pass 994 | return keywords 995 | 996 | 997 | @register_vcs_handler("git", "keywords") 998 | def git_versions_from_keywords(keywords, tag_prefix, verbose): 999 | """Get version information from git keywords.""" 1000 | if not keywords: 1001 | raise NotThisMethod("no keywords at all, weird") 1002 | refnames = keywords["refnames"].strip() 1003 | if refnames.startswith("$Format"): 1004 | if verbose: 1005 | print("keywords are unexpanded, not using") 1006 | raise NotThisMethod("unexpanded keywords, not a git-archive tarball") 1007 | refs = set([r.strip() for r in refnames.strip("()").split(",")]) 1008 | # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of 1009 | # just "foo-1.0". If we see a "tag: " prefix, prefer those. 1010 | TAG = "tag: " 1011 | tags = set([r[len(TAG):] for r in refs if r.startswith(TAG)]) 1012 | if not tags: 1013 | # Either we're using git < 1.8.3, or there really are no tags. We use 1014 | # a heuristic: assume all version tags have a digit. The old git %d 1015 | # expansion behaves like git log --decorate=short and strips out the 1016 | # refs/heads/ and refs/tags/ prefixes that would let us distinguish 1017 | # between branches and tags. By ignoring refnames without digits, we 1018 | # filter out many common branch names like "release" and 1019 | # "stabilization", as well as "HEAD" and "master". 1020 | tags = set([r for r in refs if re.search(r'\d', r)]) 1021 | if verbose: 1022 | print("discarding '%s', no digits" % ",".join(refs-tags)) 1023 | if verbose: 1024 | print("likely tags: %s" % ",".join(sorted(tags))) 1025 | for ref in sorted(tags): 1026 | # sorting will prefer e.g. "2.0" over "2.0rc1" 1027 | if ref.startswith(tag_prefix): 1028 | r = ref[len(tag_prefix):] 1029 | if verbose: 1030 | print("picking %s" % r) 1031 | return {"version": r, 1032 | "full-revisionid": keywords["full"].strip(), 1033 | "dirty": False, "error": None 1034 | } 1035 | # no suitable tags, so version is "0+unknown", but full hex is still there 1036 | if verbose: 1037 | print("no suitable tags, using unknown + full revision id") 1038 | return {"version": "0+unknown", 1039 | "full-revisionid": keywords["full"].strip(), 1040 | "dirty": False, "error": "no suitable tags"} 1041 | 1042 | 1043 | @register_vcs_handler("git", "pieces_from_vcs") 1044 | def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): 1045 | """Get version from 'git describe' in the root of the source tree. 1046 | 1047 | This only gets called if the git-archive 'subst' keywords were *not* 1048 | expanded, and _version.py hasn't already been rewritten with a short 1049 | version string, meaning we're inside a checked out source tree. 1050 | """ 1051 | if not os.path.exists(os.path.join(root, ".git")): 1052 | if verbose: 1053 | print("no .git in %s" % root) 1054 | raise NotThisMethod("no .git directory") 1055 | 1056 | GITS = ["git"] 1057 | if sys.platform == "win32": 1058 | GITS = ["git.cmd", "git.exe"] 1059 | # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty] 1060 | # if there isn't one, this yields HEX[-dirty] (no NUM) 1061 | describe_out = run_command(GITS, ["describe", "--tags", "--dirty", 1062 | "--always", "--long", 1063 | "--match", "%s*" % tag_prefix], 1064 | cwd=root) 1065 | # --long was added in git-1.5.5 1066 | if describe_out is None: 1067 | raise NotThisMethod("'git describe' failed") 1068 | describe_out = describe_out.strip() 1069 | full_out = run_command(GITS, ["rev-parse", "HEAD"], cwd=root) 1070 | if full_out is None: 1071 | raise NotThisMethod("'git rev-parse' failed") 1072 | full_out = full_out.strip() 1073 | 1074 | pieces = {} 1075 | pieces["long"] = full_out 1076 | pieces["short"] = full_out[:7] # maybe improved later 1077 | pieces["error"] = None 1078 | 1079 | # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty] 1080 | # TAG might have hyphens. 1081 | git_describe = describe_out 1082 | 1083 | # look for -dirty suffix 1084 | dirty = git_describe.endswith("-dirty") 1085 | pieces["dirty"] = dirty 1086 | if dirty: 1087 | git_describe = git_describe[:git_describe.rindex("-dirty")] 1088 | 1089 | # now we have TAG-NUM-gHEX or HEX 1090 | 1091 | if "-" in git_describe: 1092 | # TAG-NUM-gHEX 1093 | mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe) 1094 | if not mo: 1095 | # unparseable. Maybe git-describe is misbehaving? 1096 | pieces["error"] = ("unable to parse git-describe output: '%s'" 1097 | % describe_out) 1098 | return pieces 1099 | 1100 | # tag 1101 | full_tag = mo.group(1) 1102 | if not full_tag.startswith(tag_prefix): 1103 | if verbose: 1104 | fmt = "tag '%s' doesn't start with prefix '%s'" 1105 | print(fmt % (full_tag, tag_prefix)) 1106 | pieces["error"] = ("tag '%s' doesn't start with prefix '%s'" 1107 | % (full_tag, tag_prefix)) 1108 | return pieces 1109 | pieces["closest-tag"] = full_tag[len(tag_prefix):] 1110 | 1111 | # distance: number of commits since tag 1112 | pieces["distance"] = int(mo.group(2)) 1113 | 1114 | # commit: short hex revision ID 1115 | pieces["short"] = mo.group(3) 1116 | 1117 | else: 1118 | # HEX: no tags 1119 | pieces["closest-tag"] = None 1120 | count_out = run_command(GITS, ["rev-list", "HEAD", "--count"], 1121 | cwd=root) 1122 | pieces["distance"] = int(count_out) # total number of commits 1123 | 1124 | return pieces 1125 | 1126 | 1127 | def do_vcs_install(manifest_in, versionfile_source, ipy): 1128 | """Git-specific installation logic for Versioneer. 1129 | 1130 | For Git, this means creating/changing .gitattributes to mark _version.py 1131 | for export-time keyword substitution. 1132 | """ 1133 | GITS = ["git"] 1134 | if sys.platform == "win32": 1135 | GITS = ["git.cmd", "git.exe"] 1136 | files = [manifest_in, versionfile_source] 1137 | if ipy: 1138 | files.append(ipy) 1139 | try: 1140 | me = __file__ 1141 | if me.endswith(".pyc") or me.endswith(".pyo"): 1142 | me = os.path.splitext(me)[0] + ".py" 1143 | versioneer_file = os.path.relpath(me) 1144 | except NameError: 1145 | versioneer_file = "versioneer.py" 1146 | files.append(versioneer_file) 1147 | present = False 1148 | try: 1149 | f = open(".gitattributes", "r") 1150 | for line in f.readlines(): 1151 | if line.strip().startswith(versionfile_source): 1152 | if "export-subst" in line.strip().split()[1:]: 1153 | present = True 1154 | f.close() 1155 | except EnvironmentError: 1156 | pass 1157 | if not present: 1158 | f = open(".gitattributes", "a+") 1159 | f.write("%s export-subst\n" % versionfile_source) 1160 | f.close() 1161 | files.append(".gitattributes") 1162 | run_command(GITS, ["add", "--"] + files) 1163 | 1164 | 1165 | def versions_from_parentdir(parentdir_prefix, root, verbose): 1166 | """Try to determine the version from the parent directory name. 1167 | 1168 | Source tarballs conventionally unpack into a directory that includes 1169 | both the project name and a version string. 1170 | """ 1171 | dirname = os.path.basename(root) 1172 | if not dirname.startswith(parentdir_prefix): 1173 | if verbose: 1174 | print("guessing rootdir is '%s', but '%s' doesn't start with " 1175 | "prefix '%s'" % (root, dirname, parentdir_prefix)) 1176 | raise NotThisMethod("rootdir doesn't start with parentdir_prefix") 1177 | return {"version": dirname[len(parentdir_prefix):], 1178 | "full-revisionid": None, 1179 | "dirty": False, "error": None} 1180 | 1181 | SHORT_VERSION_PY = """ 1182 | # This file was generated by 'versioneer.py' (0.16) from 1183 | # revision-control system data, or from the parent directory name of an 1184 | # unpacked source archive. Distribution tarballs contain a pre-generated copy 1185 | # of this file. 1186 | 1187 | import json 1188 | import sys 1189 | 1190 | version_json = ''' 1191 | %s 1192 | ''' # END VERSION_JSON 1193 | 1194 | 1195 | def get_versions(): 1196 | return json.loads(version_json) 1197 | """ 1198 | 1199 | 1200 | def versions_from_file(filename): 1201 | """Try to determine the version from _version.py if present.""" 1202 | try: 1203 | with open(filename) as f: 1204 | contents = f.read() 1205 | except EnvironmentError: 1206 | raise NotThisMethod("unable to read _version.py") 1207 | mo = re.search(r"version_json = '''\n(.*)''' # END VERSION_JSON", 1208 | contents, re.M | re.S) 1209 | if not mo: 1210 | raise NotThisMethod("no version_json in _version.py") 1211 | return json.loads(mo.group(1)) 1212 | 1213 | 1214 | def write_to_version_file(filename, versions): 1215 | """Write the given version number to the given _version.py file.""" 1216 | os.unlink(filename) 1217 | contents = json.dumps(versions, sort_keys=True, 1218 | indent=1, separators=(",", ": ")) 1219 | with open(filename, "w") as f: 1220 | f.write(SHORT_VERSION_PY % contents) 1221 | 1222 | print("set %s to '%s'" % (filename, versions["version"])) 1223 | 1224 | 1225 | def plus_or_dot(pieces): 1226 | """Return a + if we don't already have one, else return a .""" 1227 | if "+" in pieces.get("closest-tag", ""): 1228 | return "." 1229 | return "+" 1230 | 1231 | 1232 | def render_pep440(pieces): 1233 | """Build up version string, with post-release "local version identifier". 1234 | 1235 | Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you 1236 | get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty 1237 | 1238 | Exceptions: 1239 | 1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty] 1240 | """ 1241 | if pieces["closest-tag"]: 1242 | rendered = pieces["closest-tag"] 1243 | if pieces["distance"] or pieces["dirty"]: 1244 | rendered += plus_or_dot(pieces) 1245 | rendered += "%d.g%s" % (pieces["distance"], pieces["short"]) 1246 | if pieces["dirty"]: 1247 | rendered += ".dirty" 1248 | else: 1249 | # exception #1 1250 | rendered = "0+untagged.%d.g%s" % (pieces["distance"], 1251 | pieces["short"]) 1252 | if pieces["dirty"]: 1253 | rendered += ".dirty" 1254 | return rendered 1255 | 1256 | 1257 | def render_pep440_pre(pieces): 1258 | """TAG[.post.devDISTANCE] -- No -dirty. 1259 | 1260 | Exceptions: 1261 | 1: no tags. 0.post.devDISTANCE 1262 | """ 1263 | if pieces["closest-tag"]: 1264 | rendered = pieces["closest-tag"] 1265 | if pieces["distance"]: 1266 | rendered += ".post.dev%d" % pieces["distance"] 1267 | else: 1268 | # exception #1 1269 | rendered = "0.post.dev%d" % pieces["distance"] 1270 | return rendered 1271 | 1272 | 1273 | def render_pep440_post(pieces): 1274 | """TAG[.postDISTANCE[.dev0]+gHEX] . 1275 | 1276 | The ".dev0" means dirty. Note that .dev0 sorts backwards 1277 | (a dirty tree will appear "older" than the corresponding clean one), 1278 | but you shouldn't be releasing software with -dirty anyways. 1279 | 1280 | Exceptions: 1281 | 1: no tags. 0.postDISTANCE[.dev0] 1282 | """ 1283 | if pieces["closest-tag"]: 1284 | rendered = pieces["closest-tag"] 1285 | if pieces["distance"] or pieces["dirty"]: 1286 | rendered += ".post%d" % pieces["distance"] 1287 | if pieces["dirty"]: 1288 | rendered += ".dev0" 1289 | rendered += plus_or_dot(pieces) 1290 | rendered += "g%s" % pieces["short"] 1291 | else: 1292 | # exception #1 1293 | rendered = "0.post%d" % pieces["distance"] 1294 | if pieces["dirty"]: 1295 | rendered += ".dev0" 1296 | rendered += "+g%s" % pieces["short"] 1297 | return rendered 1298 | 1299 | 1300 | def render_pep440_old(pieces): 1301 | """TAG[.postDISTANCE[.dev0]] . 1302 | 1303 | The ".dev0" means dirty. 1304 | 1305 | Eexceptions: 1306 | 1: no tags. 0.postDISTANCE[.dev0] 1307 | """ 1308 | if pieces["closest-tag"]: 1309 | rendered = pieces["closest-tag"] 1310 | if pieces["distance"] or pieces["dirty"]: 1311 | rendered += ".post%d" % pieces["distance"] 1312 | if pieces["dirty"]: 1313 | rendered += ".dev0" 1314 | else: 1315 | # exception #1 1316 | rendered = "0.post%d" % pieces["distance"] 1317 | if pieces["dirty"]: 1318 | rendered += ".dev0" 1319 | return rendered 1320 | 1321 | 1322 | def render_git_describe(pieces): 1323 | """TAG[-DISTANCE-gHEX][-dirty]. 1324 | 1325 | Like 'git describe --tags --dirty --always'. 1326 | 1327 | Exceptions: 1328 | 1: no tags. HEX[-dirty] (note: no 'g' prefix) 1329 | """ 1330 | if pieces["closest-tag"]: 1331 | rendered = pieces["closest-tag"] 1332 | if pieces["distance"]: 1333 | rendered += "-%d-g%s" % (pieces["distance"], pieces["short"]) 1334 | else: 1335 | # exception #1 1336 | rendered = pieces["short"] 1337 | if pieces["dirty"]: 1338 | rendered += "-dirty" 1339 | return rendered 1340 | 1341 | 1342 | def render_git_describe_long(pieces): 1343 | """TAG-DISTANCE-gHEX[-dirty]. 1344 | 1345 | Like 'git describe --tags --dirty --always -long'. 1346 | The distance/hash is unconditional. 1347 | 1348 | Exceptions: 1349 | 1: no tags. HEX[-dirty] (note: no 'g' prefix) 1350 | """ 1351 | if pieces["closest-tag"]: 1352 | rendered = pieces["closest-tag"] 1353 | rendered += "-%d-g%s" % (pieces["distance"], pieces["short"]) 1354 | else: 1355 | # exception #1 1356 | rendered = pieces["short"] 1357 | if pieces["dirty"]: 1358 | rendered += "-dirty" 1359 | return rendered 1360 | 1361 | 1362 | def render(pieces, style): 1363 | """Render the given version pieces into the requested style.""" 1364 | if pieces["error"]: 1365 | return {"version": "unknown", 1366 | "full-revisionid": pieces.get("long"), 1367 | "dirty": None, 1368 | "error": pieces["error"]} 1369 | 1370 | if not style or style == "default": 1371 | style = "pep440" # the default 1372 | 1373 | if style == "pep440": 1374 | rendered = render_pep440(pieces) 1375 | elif style == "pep440-pre": 1376 | rendered = render_pep440_pre(pieces) 1377 | elif style == "pep440-post": 1378 | rendered = render_pep440_post(pieces) 1379 | elif style == "pep440-old": 1380 | rendered = render_pep440_old(pieces) 1381 | elif style == "git-describe": 1382 | rendered = render_git_describe(pieces) 1383 | elif style == "git-describe-long": 1384 | rendered = render_git_describe_long(pieces) 1385 | else: 1386 | raise ValueError("unknown style '%s'" % style) 1387 | 1388 | return {"version": rendered, "full-revisionid": pieces["long"], 1389 | "dirty": pieces["dirty"], "error": None} 1390 | 1391 | 1392 | class VersioneerBadRootError(Exception): 1393 | """The project root directory is unknown or missing key files.""" 1394 | 1395 | 1396 | def get_versions(verbose=False): 1397 | """Get the project version from whatever source is available. 1398 | 1399 | Returns dict with two keys: 'version' and 'full'. 1400 | """ 1401 | if "versioneer" in sys.modules: 1402 | # see the discussion in cmdclass.py:get_cmdclass() 1403 | del sys.modules["versioneer"] 1404 | 1405 | root = get_root() 1406 | cfg = get_config_from_root(root) 1407 | 1408 | assert cfg.VCS is not None, "please set [versioneer]VCS= in setup.cfg" 1409 | handlers = HANDLERS.get(cfg.VCS) 1410 | assert handlers, "unrecognized VCS '%s'" % cfg.VCS 1411 | verbose = verbose or cfg.verbose 1412 | assert cfg.versionfile_source is not None, \ 1413 | "please set versioneer.versionfile_source" 1414 | assert cfg.tag_prefix is not None, "please set versioneer.tag_prefix" 1415 | 1416 | versionfile_abs = os.path.join(root, cfg.versionfile_source) 1417 | 1418 | # extract version from first of: _version.py, VCS command (e.g. 'git 1419 | # describe'), parentdir. This is meant to work for developers using a 1420 | # source checkout, for users of a tarball created by 'setup.py sdist', 1421 | # and for users of a tarball/zipball created by 'git archive' or github's 1422 | # download-from-tag feature or the equivalent in other VCSes. 1423 | 1424 | get_keywords_f = handlers.get("get_keywords") 1425 | from_keywords_f = handlers.get("keywords") 1426 | if get_keywords_f and from_keywords_f: 1427 | try: 1428 | keywords = get_keywords_f(versionfile_abs) 1429 | ver = from_keywords_f(keywords, cfg.tag_prefix, verbose) 1430 | if verbose: 1431 | print("got version from expanded keyword %s" % ver) 1432 | return ver 1433 | except NotThisMethod: 1434 | pass 1435 | 1436 | try: 1437 | ver = versions_from_file(versionfile_abs) 1438 | if verbose: 1439 | print("got version from file %s %s" % (versionfile_abs, ver)) 1440 | return ver 1441 | except NotThisMethod: 1442 | pass 1443 | 1444 | from_vcs_f = handlers.get("pieces_from_vcs") 1445 | if from_vcs_f: 1446 | try: 1447 | pieces = from_vcs_f(cfg.tag_prefix, root, verbose) 1448 | ver = render(pieces, cfg.style) 1449 | if verbose: 1450 | print("got version from VCS %s" % ver) 1451 | return ver 1452 | except NotThisMethod: 1453 | pass 1454 | 1455 | try: 1456 | if cfg.parentdir_prefix: 1457 | ver = versions_from_parentdir(cfg.parentdir_prefix, root, verbose) 1458 | if verbose: 1459 | print("got version from parentdir %s" % ver) 1460 | return ver 1461 | except NotThisMethod: 1462 | pass 1463 | 1464 | if verbose: 1465 | print("unable to compute version") 1466 | 1467 | return {"version": "0+unknown", "full-revisionid": None, 1468 | "dirty": None, "error": "unable to compute version"} 1469 | 1470 | 1471 | def get_version(): 1472 | """Get the short version string for this project.""" 1473 | return get_versions()["version"] 1474 | 1475 | 1476 | def get_cmdclass(): 1477 | """Get the custom setuptools/distutils subclasses used by Versioneer.""" 1478 | if "versioneer" in sys.modules: 1479 | del sys.modules["versioneer"] 1480 | # this fixes the "python setup.py develop" case (also 'install' and 1481 | # 'easy_install .'), in which subdependencies of the main project are 1482 | # built (using setup.py bdist_egg) in the same python process. Assume 1483 | # a main project A and a dependency B, which use different versions 1484 | # of Versioneer. A's setup.py imports A's Versioneer, leaving it in 1485 | # sys.modules by the time B's setup.py is executed, causing B to run 1486 | # with the wrong versioneer. Setuptools wraps the sub-dep builds in a 1487 | # sandbox that restores sys.modules to it's pre-build state, so the 1488 | # parent is protected against the child's "import versioneer". By 1489 | # removing ourselves from sys.modules here, before the child build 1490 | # happens, we protect the child from the parent's versioneer too. 1491 | # Also see https://github.com/warner/python-versioneer/issues/52 1492 | 1493 | cmds = {} 1494 | 1495 | # we add "version" to both distutils and setuptools 1496 | from distutils.core import Command 1497 | 1498 | class cmd_version(Command): 1499 | description = "report generated version string" 1500 | user_options = [] 1501 | boolean_options = [] 1502 | 1503 | def initialize_options(self): 1504 | pass 1505 | 1506 | def finalize_options(self): 1507 | pass 1508 | 1509 | def run(self): 1510 | vers = get_versions(verbose=True) 1511 | print("Version: %s" % vers["version"]) 1512 | print(" full-revisionid: %s" % vers.get("full-revisionid")) 1513 | print(" dirty: %s" % vers.get("dirty")) 1514 | if vers["error"]: 1515 | print(" error: %s" % vers["error"]) 1516 | cmds["version"] = cmd_version 1517 | 1518 | # we override "build_py" in both distutils and setuptools 1519 | # 1520 | # most invocation pathways end up running build_py: 1521 | # distutils/build -> build_py 1522 | # distutils/install -> distutils/build ->.. 1523 | # setuptools/bdist_wheel -> distutils/install ->.. 1524 | # setuptools/bdist_egg -> distutils/install_lib -> build_py 1525 | # setuptools/install -> bdist_egg ->.. 1526 | # setuptools/develop -> ? 1527 | 1528 | # we override different "build_py" commands for both environments 1529 | if "setuptools" in sys.modules: 1530 | from setuptools.command.build_py import build_py as _build_py 1531 | else: 1532 | from distutils.command.build_py import build_py as _build_py 1533 | 1534 | class cmd_build_py(_build_py): 1535 | def run(self): 1536 | root = get_root() 1537 | cfg = get_config_from_root(root) 1538 | versions = get_versions() 1539 | _build_py.run(self) 1540 | # now locate _version.py in the new build/ directory and replace 1541 | # it with an updated value 1542 | if cfg.versionfile_build: 1543 | target_versionfile = os.path.join(self.build_lib, 1544 | cfg.versionfile_build) 1545 | print("UPDATING %s" % target_versionfile) 1546 | write_to_version_file(target_versionfile, versions) 1547 | cmds["build_py"] = cmd_build_py 1548 | 1549 | if "cx_Freeze" in sys.modules: # cx_freeze enabled? 1550 | from cx_Freeze.dist import build_exe as _build_exe 1551 | 1552 | class cmd_build_exe(_build_exe): 1553 | def run(self): 1554 | root = get_root() 1555 | cfg = get_config_from_root(root) 1556 | versions = get_versions() 1557 | target_versionfile = cfg.versionfile_source 1558 | print("UPDATING %s" % target_versionfile) 1559 | write_to_version_file(target_versionfile, versions) 1560 | 1561 | _build_exe.run(self) 1562 | os.unlink(target_versionfile) 1563 | with open(cfg.versionfile_source, "w") as f: 1564 | LONG = LONG_VERSION_PY[cfg.VCS] 1565 | f.write(LONG % 1566 | {"DOLLAR": "$", 1567 | "STYLE": cfg.style, 1568 | "TAG_PREFIX": cfg.tag_prefix, 1569 | "PARENTDIR_PREFIX": cfg.parentdir_prefix, 1570 | "VERSIONFILE_SOURCE": cfg.versionfile_source, 1571 | }) 1572 | cmds["build_exe"] = cmd_build_exe 1573 | del cmds["build_py"] 1574 | 1575 | # we override different "sdist" commands for both environments 1576 | if "setuptools" in sys.modules: 1577 | from setuptools.command.sdist import sdist as _sdist 1578 | else: 1579 | from distutils.command.sdist import sdist as _sdist 1580 | 1581 | class cmd_sdist(_sdist): 1582 | def run(self): 1583 | versions = get_versions() 1584 | self._versioneer_generated_versions = versions 1585 | # unless we update this, the command will keep using the old 1586 | # version 1587 | self.distribution.metadata.version = versions["version"] 1588 | return _sdist.run(self) 1589 | 1590 | def make_release_tree(self, base_dir, files): 1591 | root = get_root() 1592 | cfg = get_config_from_root(root) 1593 | _sdist.make_release_tree(self, base_dir, files) 1594 | # now locate _version.py in the new base_dir directory 1595 | # (remembering that it may be a hardlink) and replace it with an 1596 | # updated value 1597 | target_versionfile = os.path.join(base_dir, cfg.versionfile_source) 1598 | print("UPDATING %s" % target_versionfile) 1599 | write_to_version_file(target_versionfile, 1600 | self._versioneer_generated_versions) 1601 | cmds["sdist"] = cmd_sdist 1602 | 1603 | return cmds 1604 | 1605 | 1606 | CONFIG_ERROR = """ 1607 | setup.cfg is missing the necessary Versioneer configuration. You need 1608 | a section like: 1609 | 1610 | [versioneer] 1611 | VCS = git 1612 | style = pep440 1613 | versionfile_source = src/myproject/_version.py 1614 | versionfile_build = myproject/_version.py 1615 | tag_prefix = 1616 | parentdir_prefix = myproject- 1617 | 1618 | You will also need to edit your setup.py to use the results: 1619 | 1620 | import versioneer 1621 | setup(version=versioneer.get_version(), 1622 | cmdclass=versioneer.get_cmdclass(), ...) 1623 | 1624 | Please read the docstring in ./versioneer.py for configuration instructions, 1625 | edit setup.cfg, and re-run the installer or 'python versioneer.py setup'. 1626 | """ 1627 | 1628 | SAMPLE_CONFIG = """ 1629 | # See the docstring in versioneer.py for instructions. Note that you must 1630 | # re-run 'versioneer.py setup' after changing this section, and commit the 1631 | # resulting files. 1632 | 1633 | [versioneer] 1634 | #VCS = git 1635 | #style = pep440 1636 | #versionfile_source = 1637 | #versionfile_build = 1638 | #tag_prefix = 1639 | #parentdir_prefix = 1640 | 1641 | """ 1642 | 1643 | INIT_PY_SNIPPET = """ 1644 | from ._version import get_versions 1645 | __version__ = get_versions()['version'] 1646 | del get_versions 1647 | """ 1648 | 1649 | 1650 | def do_setup(): 1651 | """Main VCS-independent setup function for installing Versioneer.""" 1652 | root = get_root() 1653 | try: 1654 | cfg = get_config_from_root(root) 1655 | except (EnvironmentError, configparser.NoSectionError, 1656 | configparser.NoOptionError) as e: 1657 | if isinstance(e, (EnvironmentError, configparser.NoSectionError)): 1658 | print("Adding sample versioneer config to setup.cfg", 1659 | file=sys.stderr) 1660 | with open(os.path.join(root, "setup.cfg"), "a") as f: 1661 | f.write(SAMPLE_CONFIG) 1662 | print(CONFIG_ERROR, file=sys.stderr) 1663 | return 1 1664 | 1665 | print(" creating %s" % cfg.versionfile_source) 1666 | with open(cfg.versionfile_source, "w") as f: 1667 | LONG = LONG_VERSION_PY[cfg.VCS] 1668 | f.write(LONG % {"DOLLAR": "$", 1669 | "STYLE": cfg.style, 1670 | "TAG_PREFIX": cfg.tag_prefix, 1671 | "PARENTDIR_PREFIX": cfg.parentdir_prefix, 1672 | "VERSIONFILE_SOURCE": cfg.versionfile_source, 1673 | }) 1674 | 1675 | ipy = os.path.join(os.path.dirname(cfg.versionfile_source), 1676 | "__init__.py") 1677 | if os.path.exists(ipy): 1678 | try: 1679 | with open(ipy, "r") as f: 1680 | old = f.read() 1681 | except EnvironmentError: 1682 | old = "" 1683 | if INIT_PY_SNIPPET not in old: 1684 | print(" appending to %s" % ipy) 1685 | with open(ipy, "a") as f: 1686 | f.write(INIT_PY_SNIPPET) 1687 | else: 1688 | print(" %s unmodified" % ipy) 1689 | else: 1690 | print(" %s doesn't exist, ok" % ipy) 1691 | ipy = None 1692 | 1693 | # Make sure both the top-level "versioneer.py" and versionfile_source 1694 | # (PKG/_version.py, used by runtime code) are in MANIFEST.in, so 1695 | # they'll be copied into source distributions. Pip won't be able to 1696 | # install the package without this. 1697 | manifest_in = os.path.join(root, "MANIFEST.in") 1698 | simple_includes = set() 1699 | try: 1700 | with open(manifest_in, "r") as f: 1701 | for line in f: 1702 | if line.startswith("include "): 1703 | for include in line.split()[1:]: 1704 | simple_includes.add(include) 1705 | except EnvironmentError: 1706 | pass 1707 | # That doesn't cover everything MANIFEST.in can do 1708 | # (http://docs.python.org/2/distutils/sourcedist.html#commands), so 1709 | # it might give some false negatives. Appending redundant 'include' 1710 | # lines is safe, though. 1711 | if "versioneer.py" not in simple_includes: 1712 | print(" appending 'versioneer.py' to MANIFEST.in") 1713 | with open(manifest_in, "a") as f: 1714 | f.write("include versioneer.py\n") 1715 | else: 1716 | print(" 'versioneer.py' already in MANIFEST.in") 1717 | if cfg.versionfile_source not in simple_includes: 1718 | print(" appending versionfile_source ('%s') to MANIFEST.in" % 1719 | cfg.versionfile_source) 1720 | with open(manifest_in, "a") as f: 1721 | f.write("include %s\n" % cfg.versionfile_source) 1722 | else: 1723 | print(" versionfile_source already in MANIFEST.in") 1724 | 1725 | # Make VCS-specific changes. For git, this means creating/changing 1726 | # .gitattributes to mark _version.py for export-time keyword 1727 | # substitution. 1728 | do_vcs_install(manifest_in, cfg.versionfile_source, ipy) 1729 | return 0 1730 | 1731 | 1732 | def scan_setup_py(): 1733 | """Validate the contents of setup.py against Versioneer's expectations.""" 1734 | found = set() 1735 | setters = False 1736 | errors = 0 1737 | with open("setup.py", "r") as f: 1738 | for line in f.readlines(): 1739 | if "import versioneer" in line: 1740 | found.add("import") 1741 | if "versioneer.get_cmdclass()" in line: 1742 | found.add("cmdclass") 1743 | if "versioneer.get_version()" in line: 1744 | found.add("get_version") 1745 | if "versioneer.VCS" in line: 1746 | setters = True 1747 | if "versioneer.versionfile_source" in line: 1748 | setters = True 1749 | if len(found) != 3: 1750 | print("") 1751 | print("Your setup.py appears to be missing some important items") 1752 | print("(but I might be wrong). Please make sure it has something") 1753 | print("roughly like the following:") 1754 | print("") 1755 | print(" import versioneer") 1756 | print(" setup( version=versioneer.get_version(),") 1757 | print(" cmdclass=versioneer.get_cmdclass(), ...)") 1758 | print("") 1759 | errors += 1 1760 | if setters: 1761 | print("You should remove lines like 'versioneer.VCS = ' and") 1762 | print("'versioneer.versionfile_source = ' . This configuration") 1763 | print("now lives in setup.cfg, and should be removed from setup.py") 1764 | print("") 1765 | errors += 1 1766 | return errors 1767 | 1768 | if __name__ == "__main__": 1769 | cmd = sys.argv[1] 1770 | if cmd == "setup": 1771 | errors = do_setup() 1772 | errors += scan_setup_py() 1773 | if errors: 1774 | sys.exit(1) 1775 | --------------------------------------------------------------------------------