├── nbencdec ├── __init__.py ├── cli │ ├── __init__.py │ └── commands.py └── exporters │ ├── __init__.py │ ├── templates │ └── encoded_python.tpl │ ├── encoded_python.py │ └── filters.py ├── requirements.txt ├── MANIFEST.in ├── setup.py ├── LICENSE └── README.md /nbencdec/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | ipython 2 | nbconvert 3 | pytest 4 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include requirements.txt 2 | include nbencdec/exporters/templates/* 3 | -------------------------------------------------------------------------------- /nbencdec/cli/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from .commands import main 4 | -------------------------------------------------------------------------------- /nbencdec/exporters/__init__.py: -------------------------------------------------------------------------------- 1 | from .encoded_python import EncodedPythonExporter 2 | -------------------------------------------------------------------------------- /nbencdec/exporters/templates/encoded_python.tpl: -------------------------------------------------------------------------------- 1 | {%- extends 'null.j2' -%} 2 | 3 | {%- block header -%} 4 | #!/usr/bin/env python 5 | # coding: utf-8 6 | # 7 | # EPY: stripped_notebook: {{ resources.strip_cells(nb) }} 8 | {% endblock header %} 9 | 10 | {% block input %} 11 | # EPY: START code 12 | {{ cell.source | ipython2encodedpython }} 13 | # EPY: END code 14 | {% endblock input %} 15 | 16 | {% block markdowncell scoped %} 17 | # EPY: START markdown 18 | {{ cell.source | comment_lines_with_escaping }} 19 | # EPY: END markdown 20 | {% endblock markdowncell %} 21 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import os 4 | import setuptools 5 | 6 | install_requires = [line.rstrip() for line in open(os.path.join(os.path.dirname(__file__), "requirements.txt"))] 7 | 8 | setuptools.setup( 9 | name="nbencdec", 10 | version="0.0.10", 11 | description="Encode/decode Python Notebook files to .py files.", 12 | author="Tony Tung", 13 | author_email="ttung@chanzuckerberg.com", 14 | license="MIT", 15 | packages=setuptools.find_packages(), 16 | package_data={'nbencdec': ['exporters/templates/*.tpl']}, 17 | include_package_data=True, 18 | install_requires=install_requires, 19 | entry_points={ 20 | 'console_scripts': "nbencdec=nbencdec.cli:main", 21 | 'nbconvert.exporters': [ 22 | 'encoded_python = nbencdec.exporters:EncodedPythonExporter', 23 | ], 24 | } 25 | ) 26 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017 Chan Zuckerberg Initiative 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # NoteBook ENCode DECode 2 | 3 | ## Rationale 4 | Storing `.ipynb` files in code repositories has two significant drawbacks: 5 | 1. Most, if not all existing IDEs will not index `.ipynb` files. Therefore, if one wishes to refactor code that impacts `.ipynb`, it often has to be done manually. 6 | 2. It is challenging to code review `.ipynb` files well. 7 | 8 | At the same time, ipython notebooks have significant usability advantages over plain python source files. `nbencdec` allows users to convert .ipynb into a .py file with enough hints embedded in comments such that it is possible to reconstruct the original structure of the `.ipynb` file. 9 | 10 | ## Usage 11 | * Install the nbencdec package. 12 | * Option 1: Clone the repo, and run `pip install -e .` 13 | * Option 2: run `pip install nbencdec`. 14 | * Run `nbencdec encode ` to encode an `.ipynb` file as a `.py` file. 15 | * Run `nbencdec decode ` to decode a properly encoded `.py` file. 16 | 17 | ## Hints 18 | There are four types of hints embedded in the comments: 19 | 1. `# EPY: stripped_notebook: `: This represents the json structure of the original `.ipynb` file, except for the `cells` field. See the [ipython file format specification](https://nbformat.readthedocs.io/en/latest/) for more details. 20 | 2. `# EPY: START code` and `# EPY: END code`. Lines in between these two markers should be incorporated into a single [code cell](https://nbformat.readthedocs.io/en/latest/format_description.html#code-cells) in the `.ipynb` file. 21 | 3. `# EPY: START markdown` and `# EPY: END markdown`. Lines in between these two markers should be incorporated into a single [code cell](https://nbformat.readthedocs.io/en/latest/format_description.html#markdown-cells) in the `.ipynb` file. 22 | 4. `# EPY: ESCAPE`. The remaining text on that line should be written verbatim to the `.ipynb` file. This is used for [ipython magic commands](http://ipython.readthedocs.io/en/stable/interactive/magics.html), which would not be valid python. 23 | -------------------------------------------------------------------------------- /nbencdec/exporters/encoded_python.py: -------------------------------------------------------------------------------- 1 | """Python script Exporter class""" 2 | 3 | # Copyright (c) Jupyter Development Team. 4 | # Distributed under the terms of the Modified BSD License. 5 | 6 | import copy 7 | import json 8 | import os 9 | 10 | from jupyter_core.paths import jupyter_path 11 | from traitlets import default, validate 12 | 13 | from nbconvert.exporters.templateexporter import TemplateExporter 14 | from .filters import comment_lines_with_escaping, ipython2encodedpython 15 | 16 | 17 | def strip_cells(notebook): 18 | clone = copy.deepcopy(notebook) 19 | del clone['cells'] 20 | return json.dumps(clone, sort_keys=True) 21 | 22 | 23 | class EncodedPythonExporter(TemplateExporter): 24 | """ 25 | Exports a Python code file. 26 | """ 27 | @validate('template_data_paths') 28 | def _add_template_path(self, proposal): 29 | """ 30 | We want to inherit from HTML template, and have template under 31 | `./templates/` so append it to the search path. (see next section) 32 | """ 33 | our_path = os.path.join(os.path.dirname(__file__), "templates") 34 | base_paths = jupyter_path("nbconvert", "templates", "base") 35 | if our_path not in proposal['value']: 36 | proposal['value'].append(our_path) 37 | for base_path in base_paths: 38 | if base_path not in proposal['value']: 39 | proposal['value'].append(base_path) 40 | return proposal['value'] 41 | 42 | def default_filters(self): 43 | filters = list(super().default_filters()) 44 | filters.append(("comment_lines_with_escaping", comment_lines_with_escaping)) 45 | filters.append(("ipython2encodedpython", ipython2encodedpython)) 46 | return filters 47 | 48 | @default('file_extension') 49 | def _file_extension_default(self): 50 | return '.py' 51 | 52 | @default('template_file') 53 | def _template_file_default(self): 54 | return 'encoded_python.tpl' 55 | 56 | def _init_resources(self, resources=None): 57 | resources = super(EncodedPythonExporter, self)._init_resources(resources) 58 | resources['strip_cells'] = strip_cells 59 | return resources 60 | 61 | output_mimetype = 'text/x-python' 62 | -------------------------------------------------------------------------------- /nbencdec/exporters/filters.py: -------------------------------------------------------------------------------- 1 | import functools 2 | 3 | 4 | def comment_lines_with_escaping( 5 | text, 6 | prefix="#", 7 | escape_prefix="# EPY", 8 | escape_format_string="# EPY: ESCAPE {}"): 9 | """ 10 | Build a Python comment line from input text. 11 | 12 | Parameters 13 | ---------- 14 | text : str 15 | Text to comment out. 16 | prefix : str 17 | Character to append to the start of each line. 18 | """ 19 | def escape_if_necessary(line): 20 | if line.startswith(escape_prefix): 21 | return escape_format_string.format(line) 22 | else: 23 | return "{}{}".format(prefix, line) 24 | 25 | splitlines = [ 26 | escape_if_necessary(line) 27 | for line in text.splitlines()] 28 | return "\n".join(splitlines) 29 | 30 | 31 | def ipython2encodedpython(code): 32 | def tweak_transform(orig_transform): 33 | """ 34 | Takes the transform and modifies it such that we compare each line to its transformation. If they are 35 | different, that means the line is a special ipython command. We strip that from the output, but record the 36 | special command in a comment so we can restore it. 37 | """ 38 | def new_push_builder(push_func): 39 | def new_push(line): 40 | result = push_func(line) 41 | 42 | if line != result: 43 | return "# EPY: ESCAPE {}".format(line) 44 | 45 | return result 46 | return new_push 47 | 48 | orig_transform.push = functools.update_wrapper(new_push_builder(orig_transform.push), orig_transform.push) 49 | 50 | return orig_transform 51 | 52 | from IPython.core.inputtransformer import StatelessInputTransformer 53 | @StatelessInputTransformer.wrap 54 | def escaped_epy_lines(line): 55 | """Transform lines that happen to look like EPY comments.""" 56 | if line.startswith("# EPY"): 57 | return "# EPY: ESCAPE {}".format(line) 58 | return line 59 | 60 | """Transform IPython syntax to an encoded Python syntax 61 | 62 | Parameters 63 | ---------- 64 | 65 | code : str 66 | IPython code, to be transformed to Python encoded in a way to facilitate transformation back into IPython. 67 | """ 68 | from IPython.core.inputsplitter import IPythonInputSplitter 69 | 70 | # get a list of default line transforms. then capture 71 | fake_isp = IPythonInputSplitter(line_input_checker=False) 72 | logical_line_transforms = [escaped_epy_lines()] 73 | logical_line_transforms.extend([tweak_transform(transform) for transform in fake_isp.logical_line_transforms]) 74 | 75 | isp = IPythonInputSplitter(line_input_checker=False, logical_line_transforms=logical_line_transforms) 76 | result = isp.transform_cell(code) 77 | if result.endswith("\n") and not code.endswith("\n"): 78 | # transform_cell always slaps a trailing NL. If the input did _not_ 79 | # have a trailing NL, then we remove it. 80 | result = result[:-1] 81 | return result 82 | -------------------------------------------------------------------------------- /nbencdec/cli/commands.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import argparse 4 | import contextlib 5 | import json 6 | import sys 7 | 8 | from nbencdec.exporters import EncodedPythonExporter 9 | 10 | 11 | STRIPPED_NOTEBOOK_MARKER = "# EPY: stripped_notebook: " 12 | EPY_START_MARKER = "# EPY: START " 13 | EPY_TRANSFORM_MARKER = "# EPY: ESCAPE " 14 | EPY_END_MARKER = "# EPY: END " 15 | 16 | 17 | @contextlib.contextmanager 18 | def stdio_wrapper(stdio_fh): 19 | yield stdio_fh 20 | 21 | 22 | def open_possibly_stdio(stdio_handle, filename, mode): 23 | """ 24 | Given a filename, either return a contextmanager wrapping `stdio_handle` if 25 | `filename` is "-", or a handle to the file specified in `filename`. 26 | """ 27 | if filename == "-": 28 | return stdio_wrapper(stdio_handle) 29 | else: 30 | return open(filename, mode) 31 | 32 | def decode(args): 33 | notebook = { 34 | 'cells': [] 35 | } 36 | 37 | with open_possibly_stdio(sys.stdin, args.source, "r") as input_fh: 38 | iterator = iter(input_fh.readlines()) 39 | 40 | # read until we get the stripped notebook marker. 41 | for line in iterator: 42 | if line.startswith(STRIPPED_NOTEBOOK_MARKER): 43 | remainder = line[len(STRIPPED_NOTEBOOK_MARKER):] 44 | notebook.update(json.loads(remainder)) 45 | break 46 | 47 | while True: 48 | cell = { 49 | 'metadata': {}, 50 | 'source': [], 51 | } 52 | # read until the first start marker 53 | for line in iterator: 54 | if line.startswith(EPY_START_MARKER): 55 | remainder = line[len(EPY_START_MARKER):].rstrip() 56 | cell['cell_type'] = remainder 57 | 58 | expected_transform = EPY_TRANSFORM_MARKER 59 | expected_end_marker = EPY_END_MARKER 60 | break 61 | else: 62 | break 63 | 64 | for line in iterator: 65 | if line.startswith(expected_end_marker): 66 | break 67 | if line.startswith(expected_transform): 68 | remainder = line[len(expected_transform):] 69 | cell['source'].append(remainder) 70 | continue 71 | 72 | if cell['cell_type'] == 'markdown': 73 | line = line[1:] 74 | cell['source'].append(line) 75 | 76 | if cell['cell_type'] == 'code': 77 | cell['execution_count'] = None 78 | cell['outputs'] = [] 79 | 80 | # strip the final "\n" from the last line. it's weird. 81 | if len(cell['source']) > 0 and cell['source'][-1].endswith("\n"): 82 | cell['source'][-1] = cell['source'][-1][:-1] 83 | 84 | notebook['cells'].append(cell) 85 | 86 | with open_possibly_stdio(sys.stdout, args.output, "w") as output_fh: 87 | json.dump( 88 | notebook, output_fh, 89 | sort_keys=True, indent=1, separators=(',', ': ')) 90 | 91 | 92 | def encode(args): 93 | encoder = EncodedPythonExporter() 94 | 95 | with open_possibly_stdio(sys.stdin, args.notebook, "r") as input_fh, \ 96 | open_possibly_stdio(sys.stdout, args.output, "w") as output_fh: 97 | encoded, _ = encoder.from_file(input_fh) 98 | output_fh.write(encoded) 99 | 100 | 101 | def parse_args(): 102 | parser = argparse.ArgumentParser() 103 | subparsers = parser.add_subparsers(dest="command") 104 | 105 | decode_group = subparsers.add_parser("decode") 106 | decode_group.set_defaults(command=decode) 107 | decode_group.add_argument("source", help="The python with extra state embedded to reconstitute an .ipynb file") 108 | decode_group.add_argument("output", help="The reconstituted .ipynb file") 109 | 110 | encode_group = subparsers.add_parser("encode") 111 | encode_group.set_defaults(command=encode) 112 | encode_group.add_argument("notebook", help="The .ipynb file to encode to a .py file") 113 | encode_group.add_argument("output", help="The encoded .py file") 114 | 115 | return parser, parser.parse_args() 116 | 117 | 118 | def main(): 119 | parser, args = parse_args() 120 | 121 | if args.command is None: 122 | parser.print_help() 123 | parser.exit(status=2) 124 | args.command(args) 125 | --------------------------------------------------------------------------------