├── d2lbook
    ├── _version.py
    ├── __init__.py
    ├── clear.py
    ├── main.py
    ├── upload_github.sh
    ├── rst_test.py
    ├── sagemaker.py
    ├── upload_doc_s3.sh
    ├── resource_test.py
    ├── tab.py
    ├── slides_test.py
    ├── activate.py
    ├── common.py
    ├── markdown_test.py
    ├── library_test.py
    ├── sphinx.py
    ├── deploy.py
    ├── config.py
    ├── notebook_test.py
    ├── utils.py
    ├── config_default.ini
    ├── markdown.py
    ├── colab.py
    ├── translate.py
    ├── slides.py
    ├── resource.py
    ├── rst.py
    ├── notebook.py
    └── library.py
├── docs
    ├── user
    │   ├── colab.md
    │   ├── index.md
    │   ├── code.md
    │   ├── code_tabs.md
    │   ├── edit.md
    │   ├── slides.md
    │   ├── create.md
    │   ├── build.md
    │   ├── deploy.md
    │   └── markdown.md
    ├── img
    │   ├── d2l.png
    │   ├── catdog.jpg
    │   ├── d2l-book.png
    │   ├── favicon.png
    │   ├── jupyter.png
    │   ├── koebel.jpg
    │   ├── s3-acl.png
    │   ├── build.graffle
    │   ├── record-set.png
    │   ├── github_pages.png
    │   ├── s3-web-hosting.png
    │   └── multi-lang.svg
    ├── develop
    │   ├── index.md
    │   └── pipeline.md
    ├── install.md
    ├── demo.md
    ├── refs.bib
    ├── config.ini
    └── index.md
├── .gitignore
├── README.md
├── Jenkinsfile
├── setup.py
├── scripts
    └── install_fonts.sh
└── LICENSE


/d2lbook/_version.py:
--------------------------------------------------------------------------------
1 | __version__ = '1.0.3'
2 | 


--------------------------------------------------------------------------------
/docs/user/colab.md:
--------------------------------------------------------------------------------
1 | # Colab
2 | :label:`sec_colab`
3 | 


--------------------------------------------------------------------------------
/d2lbook/__init__.py:
--------------------------------------------------------------------------------
1 | from ._version import __version__
2 | 


--------------------------------------------------------------------------------
/docs/img/d2l.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/d2l-ai/d2l-book/HEAD/docs/img/d2l.png


--------------------------------------------------------------------------------
/docs/img/catdog.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/d2l-ai/d2l-book/HEAD/docs/img/catdog.jpg


--------------------------------------------------------------------------------
/docs/img/d2l-book.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/d2l-ai/d2l-book/HEAD/docs/img/d2l-book.png


--------------------------------------------------------------------------------
/docs/img/favicon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/d2l-ai/d2l-book/HEAD/docs/img/favicon.png


--------------------------------------------------------------------------------
/docs/img/jupyter.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/d2l-ai/d2l-book/HEAD/docs/img/jupyter.png


--------------------------------------------------------------------------------
/docs/img/koebel.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/d2l-ai/d2l-book/HEAD/docs/img/koebel.jpg


--------------------------------------------------------------------------------
/docs/img/s3-acl.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/d2l-ai/d2l-book/HEAD/docs/img/s3-acl.png


--------------------------------------------------------------------------------
/docs/img/build.graffle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/d2l-ai/d2l-book/HEAD/docs/img/build.graffle


--------------------------------------------------------------------------------
/docs/img/record-set.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/d2l-ai/d2l-book/HEAD/docs/img/record-set.png


--------------------------------------------------------------------------------
/docs/img/github_pages.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/d2l-ai/d2l-book/HEAD/docs/img/github_pages.png


--------------------------------------------------------------------------------
/docs/img/s3-web-hosting.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/d2l-ai/d2l-book/HEAD/docs/img/s3-web-hosting.png


--------------------------------------------------------------------------------
/docs/develop/index.md:
--------------------------------------------------------------------------------
1 | # Development Guide
2 | 
3 | Explain how it works.
4 | 
5 | 
6 | ```toc
7 | pipeline
8 | ```
9 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | /d2lbook.egg-info/
 2 | /dist/
 3 | *.pyc
 4 | **/.ipynb_checkpoints/*
 5 | /docs/_build/
 6 | .DS_Store
 7 | build/
 8 | .mypy_cache
 9 | .eggs
10 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # D2L-Book: A Toolkit for Hands-on Books
2 | 
3 | This package helps you build and publish **a book with Python code**, or
4 | **Python package documents with tutorials**. The document site is available at
5 | https://book.d2l.ai
6 | 


--------------------------------------------------------------------------------
/d2lbook/clear.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import shutil
 3 | from d2lbook.config import Config
 4 | 
 5 | __all__  = ['clear']
 6 | 
 7 | def clear():
 8 |     config = Config()
 9 |     build_dir = config.tgt_dir
10 |     logging.info('Delete %s', build_dir)
11 |     shutil.rmtree(build_dir, ignore_errors=True)
12 | 


--------------------------------------------------------------------------------
/docs/user/index.md:
--------------------------------------------------------------------------------
 1 | # User Guide
 2 | 
 3 | In this part, we will cover
 4 | 
 5 | 1. How to create a project
 6 | 1. How to build and deploy the documents
 7 | 1. Various configuration options
 8 | 1. Markdown and code examples
 9 | 
10 | ```toc
11 | :maxdepth: 2
12 | 
13 | create
14 | edit
15 | build
16 | deploy
17 | markdown
18 | code
19 | code_tabs
20 | slides
21 | colab
22 | ```
23 | 


--------------------------------------------------------------------------------
/Jenkinsfile:
--------------------------------------------------------------------------------
 1 | stage("Build and Publish") {
 2 |   node('d2l-worker') {
 3 |     ws('workspace/d2l-book') {
 4 |       checkout scm
 5 |       sh '''set -ex
 6 |       conda remove -n d2l-book-build --all -y
 7 |       conda create -n d2l-book-build python=3.9 pip -y
 8 |       conda activate d2l-book-build
 9 |       pip install .
10 |       python -m unittest d2lbook/*_test.py
11 |       # pip install mypy
12 |       # mypy --ignore-missing-imports d2lbook/*_test.py
13 |       cd docs
14 |       rm -rf _build
15 |       pip install matplotlib numpy mypy
16 |       d2lbook build eval
17 |       d2lbook build eval --tab numpy
18 |       d2lbook build eval --tab cpython
19 |       d2lbook build pdf
20 |       d2lbook build html --tab all
21 |       '''
22 | 
23 |       if (env.BRANCH_NAME == 'master') {
24 |         sh '''set -ex
25 |         conda activate d2l-book-build
26 |         cd docs
27 |         d2lbook deploy html pdf
28 |         d2lbook clear
29 |       '''
30 |       }
31 |     }
32 |   }
33 | }
34 | 


--------------------------------------------------------------------------------
/d2lbook/main.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import sys
 3 | from d2lbook.build import build
 4 | from d2lbook.deploy import deploy
 5 | from d2lbook.clear import clear
 6 | from d2lbook.activate import activate
 7 | from d2lbook.translate import translate
 8 | from d2lbook.slides import slides
 9 | import logging
10 | 
11 | logging.basicConfig(format='[d2lbook:%(filename)s:L%(lineno)d] %(levelname)-6s %(message)s')
12 | logging.getLogger().setLevel(logging.INFO)
13 | 
14 | 
15 | def main():
16 |     commands = {'build': build, 'deploy':deploy, 'clear':clear,
17 |                 'activate':activate, 'translate':translate, 'slides':slides}
18 |     parser = argparse.ArgumentParser(description='''
19 | D2L Book: Publish a book based on Jupyter notebooks.
20 | 
21 | Run d2lbook command -h to get the help message for each command.
22 | ''')
23 |     parser.add_argument('command', nargs=1, choices=commands.keys())
24 |     args = parser.parse_args(sys.argv[1:2])
25 |     commands[args.command[0]]()
26 | 
27 | if __name__ == "__main__":
28 |     main()
29 | 


--------------------------------------------------------------------------------
/docs/install.md:
--------------------------------------------------------------------------------
 1 | # Installation
 2 | 
 3 | The `d2lbook` package is tested under macOS and Linux. (You are welcome to
 4 | contribute a Windows release).
 5 | 
 6 | First make sure you have [pip](https://pip.pypa.io/en/stable/) available. In
 7 | option, we recommend [conda](https://docs.conda.io/en/latest/miniconda.html) for
 8 | libraries that `pip` doesn't support.
 9 | 
10 | Now install the command-line interface.
11 | 
12 | ```sh
13 | pip install git+https://github.com/d2l-ai/d2l-book
14 | ```
15 | 
16 | This is a [d2lbook pip package](https://pypi.org/project/d2lbook/), but we
17 | recommend you to install latest version at Github directly since it's under fast
18 | developing.
19 | 
20 | To build HTML results, we need [pandoc](https://pandoc.org/). You can install it
21 | through `conda install pandoc`.
22 | 
23 | Building the PDF version requires
24 | [LibRsvg](https://wiki.gnome.org/Projects/LibRsvg) to convert your SVG images
25 | (our recommend format), e.g. `conda install librsvg`, and of course, you need to
26 | have a LaTeX distribution, e.g. [Tex Live](https://www.tug.org/texlive/), available,
27 | 


--------------------------------------------------------------------------------
/docs/demo.md:
--------------------------------------------------------------------------------
 1 | # Building this Website
 2 | 
 3 | You may find building this website is a good starting point for your
 4 | project. The source codes of this site is available under
 5 | [demo/](https://github.com/d2l-ai/d2l-book/tree/master/demo).
 6 | 
 7 | Please make sure you have `git` (e.g. `conda install git`),  `numpy` and
 8 | `matplotlib` (e.g. `pip install numpy matplotlib`) installed.
 9 | The following command will download the source codes, evaluate all notebooks and generate outputs in
10 | `ipynb`, `html` and `pdf` format.
11 | 
12 | ```sh
13 | git clone https://github.com/d2l-ai/d2l-book
14 | cd d2l-book/demo
15 | d2lbook build all
16 | ```
17 | 
18 | Once finished, you can check the results in the `_build` folder. For example, this page is in `_build/html/index.html`, the PDF version is at `_build/pdf/d2l-book.pdf`, all evaluated notebooks are under `_build/eval/`.
19 | 
20 | You can build a particular format:
21 | 
22 | ```sh
23 | d2lbook build eval  # evaluate noteboks and save in .ipynb formats
24 | d2lbook build html  # build the HTML version
25 | d2lbook build pdf   # build the PDF version
26 | ```
27 | 


--------------------------------------------------------------------------------
/d2lbook/upload_github.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Upload files into a github repo.
 3 | set -e
 4 | 
 5 | if [ $# -ne 3 ]; then
 6 |     echo "ERROR: needs two arguments. "
 7 |     echo "Sample usage:"
 8 |     echo "   $0 notebooks d2l-ai/notebooks version"
 9 |     exit -1
10 | fi
11 | 
12 | SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
13 | IN_DIR="$( cd $1 && pwd )"
14 | REPO=$2
15 | REPO_DIR=${IN_DIR}-git
16 | 
17 | # clone the repo, make sure GIT_USERNAME and GIT_PASSWORD have already set
18 | rm -rf ${REPO_DIR}
19 | git clone git@github.com:${REPO}.git ${REPO_DIR}
20 | 
21 | # remove all except for README.md and .git.
22 | tmp=$(mktemp -d)
23 | 
24 | if [[ -f "${REPO_DIR}/README.md" ]]; then
25 |     mv ${REPO_DIR}/README.md $tmp/
26 | fi
27 | mv ${REPO_DIR}/.git $tmp/
28 | rm -rf ${REPO_DIR}/*
29 | if [[ -f "$tmp/README.md" ]]; then
30 |     mv $tmp/README.md ${REPO_DIR}/
31 | fi
32 | mv $tmp/.git ${REPO_DIR}/.git
33 | 
34 | cp -r ${IN_DIR}/* ${REPO_DIR}/
35 | 
36 | if [ -f ${REPO_DIR}/index.html ]; then
37 |     touch ${REPO_DIR}/.nojekyll
38 | fi
39 | 
40 | cd ${REPO_DIR}
41 | git config --global push.default simple
42 | git add -f --all .
43 | git diff-index --quiet HEAD || git commit -am "Version $3"
44 | git push origin
45 | 


--------------------------------------------------------------------------------
/docs/user/code.md:
--------------------------------------------------------------------------------
 1 | # Code Cells
 2 | :label:`sec_code`
 3 | 
 4 | ## Maximum Line Length
 5 | 
 6 | We recommend you to set the maximum line length to be 78 to avoid automatic line break in PDF. You can enable the Ruler extension in [nbextensions](https://github.com/ipython-contrib/jupyter_contrib_nbextensions) to add visual vertical line in Jupyter when writing codes. 
 7 | 
 8 | ```{.python .input}
 9 | '-' * 78
10 | ```
11 | 
12 | ## Hide Source and Outputs
13 | 
14 | We can hide the source of a code cell by adding a comment line `# Hide
15 | code` in the cell. We can also hide the code cell outputs using `# Hide outputs`
16 | 
17 | For example, here is the normal code cell:
18 | 
19 | ```{.python .input}
20 | 1+2+3
21 | ```
22 | 
23 | Let's hide the source codes
24 | 
25 | ```{.python .input}
26 | # Hide code
27 | 1+2+3
28 | ```
29 | 
30 | Also try hiding the outputs
31 | 
32 | ```{.python .input}
33 | # Hide outputs
34 | 1+2+3
35 | ```
36 | 
37 | ## Plotting
38 | 
39 | We recommend you to use the `svg` format to plot a figure. For example, the following code configures `matplotlib`
40 | 
41 | ```{.python .input  n=3}
42 | %matplotlib inline
43 | from IPython import display
44 | from matplotlib import pyplot as plt
45 | import numpy as np
46 | 
47 | display.set_matplotlib_formats('svg')
48 | 
49 | x = np.arange(0, 10, 0.1)
50 | plt.plot(x, np.sin(x));
51 | ```
52 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup, find_packages
 2 | 
 3 | with open('d2lbook/_version.py') as ver_file:
 4 |     exec(ver_file.read())
 5 | 
 6 | requirements = [
 7 |     'jupyter',
 8 |     'regex',
 9 |     'sphinx==5.3.0', # >=5.1.1 to enable pre_border-radius in code cells, 6.1.3 shows blank webpages
10 |     'recommonmark',
11 |     'sphinxcontrib-bibtex==2.4.2', # >=2.2 to enable citet and citep
12 |     'pybtex-apa-style',
13 |     'd2l-notedown',
14 |     'mxtheme>=0.3.17',
15 |     'sphinxcontrib-svg2pdfconverter',
16 |     'numpydoc',
17 |     'awscli',
18 |     'gitpython',
19 |     'sphinx_autodoc_typehints',
20 |     'astor',
21 |     'yapf',
22 |     'fasteners',
23 |     'isort'
24 | ]
25 | 
26 | setup(
27 |     name='d2lbook',
28 |     version=__version__,
29 |     install_requires=requirements,
30 |     python_requires='>=3.8',
31 |     author='D2L Developers',
32 |     author_email='d2l.devs@gmail.com',
33 |     url='https://book.d2l.ai',
34 |     description="Create an online book with Jupyter Notebooks and Sphinx",
35 |     license='Apache-2.0',
36 |     packages=find_packages(),
37 |     include_package_data=True,
38 |     package_data={'d2lbook':['config_default.ini', 'upload_doc_s3.sh', 'upload_github.sh']},
39 |     entry_points={
40 |         'console_scripts': [
41 |             'd2lbook = d2lbook.main:main',
42 |         ]
43 |     },
44 | )
45 | 


--------------------------------------------------------------------------------
/d2lbook/rst_test.py:
--------------------------------------------------------------------------------
 1 | from d2lbook import notebook
 2 | from d2lbook import rst
 3 | import unittest
 4 | import nbconvert
 5 | 
 6 | _markdown_src = r'''
 7 | # Test
 8 | :label:`test`
 9 | 
10 | first para
11 | 
12 | python is good
13 | 
14 | another para
15 | 
16 | This is :eqref:`sec_1`
17 | 
18 | ```python2
19 | 1+2+3
20 | ```
21 | 
22 | python3 is better
23 | 
24 | - here
25 | - haha
26 | 
27 | 
28 | ```{.input .python}
29 | 1+2+3
30 | ```
31 | 
32 | ```{.input .python}
33 | #@tab python2
34 | 1+2+3
35 | ```
36 | 
37 | ```bash
38 | ````
39 | aa
40 | ````
41 | ```
42 | 
43 | ## Section 2
44 | :label:`sec_2`
45 | 
46 | ```eval_rst
47 | .. only:: html
48 | 
49 |    Table of Contents
50 |    -----------------
51 | ```
52 | 
53 | ```toc
54 | :numbered:
55 | :maxdepth: 2
56 | 
57 | install
58 | user/index
59 | develop/index
60 | ```
61 | 
62 | ![Estimating the length of a foot](../img/koebel.jpg)
63 | :width:`400px`
64 | 
65 | $x=1$, :numref:`sec_2`
66 | '''
67 | 
68 | class TestRst(unittest.TestCase):
69 | 
70 |     # TODO(mli) add some asserts
71 |     def test_convert_notebook(self):
72 |         nb = notebook.read_markdown(_markdown_src)
73 |         body, _ = rst.convert_notebook(nb, {})
74 |         lines = body.split('\n')
75 | 
76 |         for l in lines:
77 |             if l.startswith(':math:`x=1`'):
78 |                 self.assertEqual(l, ':math:`x=1`, :numref:`sec_2`')
79 | 
80 | 


--------------------------------------------------------------------------------
/d2lbook/sagemaker.py:
--------------------------------------------------------------------------------
 1 | """Integration with Sagemaker"""
 2 | import nbformat
 3 | from d2lbook import utils
 4 | from d2lbook import colab
 5 | from d2lbook import notebook
 6 | 
 7 | class Sagemaker(colab.Colab):
 8 |     def __init__(self, config):
 9 |         self._valid = config.sagemaker and config.sagemaker['github_repo']
10 |         self.config = config.sagemaker
11 |         self._repo, self._libs = colab.parse_repo_lib(
12 |             self.config['github_repo'], self.config['libs'], config.library["version"])
13 |         kernel_str = self.config['kernel']
14 |         if ',' not in kernel_str:
15 |             self._kernel = {None: kernel_str}
16 |         else:
17 |             kernel = utils.split_config_str(kernel_str, 2)
18 |             self._kernel = {k[0]:k[1] for k in kernel}
19 | 
20 |     def generate_notebooks(self, eval_dir, sagemaker_dir, tab):
21 |         if not self._valid:
22 |             return
23 |         utils.run_cmd(['rm -rf', sagemaker_dir])
24 |         utils.run_cmd(['cp -r', eval_dir, sagemaker_dir])
25 |         notebooks = utils.find_files('**/*.ipynb', sagemaker_dir)
26 |         for fn in notebooks:
27 |             nb = notebook.read(fn)
28 |             if not nb:
29 |                 continue
30 |             colab.update_notebook_kernel(nb, self._kernel[tab])
31 |             colab.insert_additional_installation(nb, self._libs[tab], self.config['libs_header'])
32 |             with open(fn, 'w') as f:
33 |                 f.write(nbformat.writes(nb))
34 | 


--------------------------------------------------------------------------------
/d2lbook/upload_doc_s3.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #
 3 | # Upload a doc folder into a S3 bucket, with text context compressed
 4 | #
 5 | # Sample Usage:
 6 | #
 7 | # ./upload_doc_s3.sh build/_build/html/ s3://en.d2l.ai
 8 | #
 9 | # Requres awscli is installed
10 | 
11 | set -ex
12 | 
13 | if [ $# -ne 2 ]; then
14 |     echo "ERROR: needs two arguments. "
15 |     echo "Usage:"
16 |     echo "  $0 doc_dir s3_bucket"
17 |     exit -1
18 | fi
19 | 
20 | DIR="$( cd $1 && pwd )"
21 | BUCKET=$2
22 | echo "Upload $DIR to $BUCKET"
23 | 
24 | # use a temp workspace, because we need to modify (compress) some files later.
25 | rm -rf ${DIR}_tmp
26 | cp -r ${DIR} ${DIR}_tmp
27 | DIR=${DIR}_tmp
28 | 
29 | 
30 | find $DIR \( -iname '*.css' -o -iname '*.js' \) -exec gzip -9 -n {} \; -exec mv {}.gz {} \;
31 | 
32 | aws s3 sync --exclude '*.*' --include '*.css' \
33 |      --content-type 'text/css' \
34 |      --content-encoding 'gzip' \
35 |      --acl 'public-read' --quiet \
36 |      $DIR $BUCKET
37 | 
38 | aws s3 sync --exclude '*.*' --include '*.js' \
39 |      --content-type 'application/javascript' \
40 |      --content-encoding 'gzip' \
41 |      --acl 'public-read' --quiet \
42 |      $DIR $BUCKET
43 | 
44 | # use a large expire time for fonts
45 | aws s3 sync --exclude '*.*' --include '*.woff' --include '*.woff2' \
46 |      --expires "$(date -d '+24 months' --utc +'%Y-%m-%dT%H:%M:%SZ')" \
47 |      --acl 'public-read' --quiet \
48 |      $DIR $BUCKET
49 | 
50 | aws s3 sync --delete $DIR $BUCKET --acl 'public-read' --quiet
51 | 


--------------------------------------------------------------------------------
/d2lbook/resource_test.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Run multiple instances from terminal to test
 3 | 
 4 | python d2lbook/resource_test.py
 5 | """
 6 | from d2lbook import resource
 7 | import unittest
 8 | import time
 9 | import logging
10 | import os
11 | 
12 | def _incorrect_code():
13 |     for i in a:
14 |         print(i)
15 | 
16 | def _runtime_error():
17 |     return 1 / 0
18 | 
19 | class TestResource(unittest.TestCase):
20 |     def test_get_available_gpus(self):
21 |         self.assertGreaterEqual(len(resource.get_available_gpus()), 0)
22 | 
23 |     def test_gpus(self):
24 |         def _job():
25 |             self.assertEqual(len(os.environ['CUDA_VISIBLE_DEVICES']), 1)
26 |             time.sleep(1)
27 | 
28 |         scheduler = resource.Scheduler(num_cpu_workers=2, num_gpu_workers=2)
29 |         scheduler.add(1, 1, _job, ())
30 |         scheduler.add(1, 1, _job, ())
31 |         scheduler.run()
32 | 
33 |     def test_scheduler(self):
34 |         scheduler = resource.Scheduler(num_cpu_workers=2, num_gpu_workers=2)
35 |         for _ in range(3):
36 |             scheduler.add(1, 0, time.sleep, (2,))
37 |         scheduler.add(1, 1, _incorrect_code, ())
38 |         scheduler.add(1, 2, _runtime_error, ())
39 |         scheduler.run()
40 |         self.assertEqual(len(scheduler.failed_tasks), 2)
41 |         logging.info(scheduler.error_message)
42 | 
43 | if __name__ == '__main__':
44 |     logging.basicConfig(
45 |         format='[d2lbook:%(filename)s:L%(lineno)d] %(levelname)-6s %(message)s'
46 |     )
47 |     logging.getLogger().setLevel(logging.INFO)
48 |     unittest.main()
49 | 


--------------------------------------------------------------------------------
/d2lbook/tab.py:
--------------------------------------------------------------------------------
 1 | from IPython.core.magic import Magics, magics_class, cell_magic
 2 | import os
 3 | import sys
 4 | 
 5 | _TAB = None
 6 | _LOG = sys.stderr.write
 7 | 
 8 | # the tab selected last time
 9 | _LAST_TAB = None
10 | _LAST_TAB_FILE = '/tmp/d2lbook_last_selected_tab'
11 | if os.path.exists(_LAST_TAB_FILE):
12 |     with open(_LAST_TAB_FILE) as f:
13 |         _LAST_TAB = f.read().strip()
14 | 
15 | def select_tab(tab=_LAST_TAB):
16 |     _LOG(f'Selected tab "{tab}", all other code cells not marked as "{tab}" will be ignored in execution.\n')
17 |     _LOG(f'This code block will be deleted during build.')
18 |     sys.modules[__name__]._TAB = tab
19 |     if tab:
20 |         with open(_LAST_TAB_FILE, 'w') as f:
21 |             f.write(tab+'\n')
22 | 
23 | def interact_select(*tabs):
24 |     if len(tabs) == 1 and isinstance(tabs[0], (list, tuple)):
25 |         tabs = tabs[0]
26 |     from ipywidgets import interact
27 |     interact(select_tab, tab=list(tabs))
28 | 
29 | def selected(*tabs):
30 |     if len(tabs) == 1 and isinstance(tabs[0], (list, tuple)):
31 |         tabs = tabs[0]
32 |     return _TAB in tabs
33 | 
34 | @magics_class
35 | class Tab(Magics):
36 |     @cell_magic
37 |     def tab(self, line, cell):
38 |         tabs = [tab.strip() for tab in line.strip().split(',')]
39 |         if _TAB in tabs or 'all' in tabs:
40 |             self.shell.run_cell(cell)
41 |         else:
42 |             _LOG(f'Ignored to run as it is not marked as a "{_TAB}" cell.')
43 | 
44 | 
45 | def load_ipython_extension(ipython):
46 |     ipython.run_cell('from d2lbook import tab')
47 |     ipython.register_magics(Tab)
48 | 


--------------------------------------------------------------------------------
/docs/develop/pipeline.md:
--------------------------------------------------------------------------------
 1 | # Build pipeline
 2 | 
 3 | 
 4 | ![](../img/build.svg)
 5 | 
 6 | The source files are markdown files. They are either purely markdown files or
 7 | juypyter notebooks saved in the markdown format with output removed. For the
 8 | latter, we may use Jupyter to edit them directly with the `notedown` plugin and
 9 | then run "Kernel -> Restart & Clear Output" before committing.
10 | 
11 | Then our building pipeline runs the following steps to publish the artifacts.
12 | 
13 | 1. Convert .md files into .ipynb files and evaluate each of them. The reason that
14 |    we use .md file as source format is because it's easy to review the source
15 |    changes. We evaluate every time to guarantee every notebook is
16 |    executable. This evaluation step may be time consuming, we can
17 | - Assume every notebook can be executed in 10 minutes, we may use multiple
18 |      GPUs to accelerate the execution
19 |    - If the source .md file hasn't change since last evaluation, we can reuse
20 |      the cached .ipynb file to avoid execution again.
21 |    - We use multiple processes to run notebooks in parallel.
22 | 1. The .ipynb files with outputs can be uploaded to Github directly so users can
23 |    clone it to run them locally or on the cloud. Also we zip all files so users
24 |    can download it easily
25 | 1. These .ipynb files are then converted to .rst files with format compatible to
26 |    Sphinx. Additional preprocessing steps are used for image/table/citation
27 |    references.
28 | 1. Use Sphinx to build .html and .pdf files
29 | 1. Publish all .html/.pdf/.zip files online, such as into an AWS S3 bucket.
30 | 
31 | ## Multiple Implementations
32 | 
33 | ![](../img/multi-lang.svg)


--------------------------------------------------------------------------------
/docs/refs.bib:
--------------------------------------------------------------------------------
 1 | @inproceedings{krizhevsky2012imagenet,
 2 |   title={Imagenet classification with deep convolutional neural networks},
 3 |   author={Krizhevsky, Alex and Sutskever, Ilya and Hinton, Geoffrey E},
 4 |   booktitle={Advances in neural information processing systems},
 5 |   pages={1097--1105},
 6 |   year={2012}
 7 | }
 8 | 
 9 | @inproceedings{he2016deep,
10 |   title={Deep residual learning for image recognition},
11 |   author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
12 |   booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
13 |   pages={770--778},
14 |   year={2016}
15 | }
16 | 
17 | @article{devlin2018bert,
18 |   title={Bert: Pre-training of deep bidirectional transformers for language understanding},
19 |   author={Devlin, Jacob and Chang, Ming-Wei and Lee, Kenton and Toutanova, Kristina},
20 |   journal={arXiv preprint arXiv:1810.04805},
21 |   year={2018}
22 | }
23 | 
24 | @TechReport{mitchell80,
25 |   author = 	 "T. M. Mitchell",
26 |   title = 	 "The Need for Biases in Learning Generalizations",
27 |   institution =  "Computer Science Department, Rutgers University",
28 |   year = 	 "1980",
29 |   address =	 "New Brunswick, MA",
30 | }
31 | 
32 | @InCollection{Newell81,
33 |   author =       "A. Newell and P. S. Rosenbloom",
34 |   title =        "Mechanisms of Skill Acquisition and the Law of
35 |                   Practice",
36 |   booktitle =    "Cognitive Skills and Their Acquisition",
37 |   pages =        "1--51",
38 |   publisher =    "Lawrence Erlbaum Associates, Inc.",
39 |   year =         "1981",
40 |   editor =       "J. R. Anderson",
41 |   chapter =      "1",
42 |   address =      "Hillsdale, NJ",
43 |   eprint = {arXiv:1510.01797},
44 | }
45 | 


--------------------------------------------------------------------------------
/d2lbook/slides_test.py:
--------------------------------------------------------------------------------
 1 | from d2lbook import slides, notebook, common
 2 | import unittest
 3 | import time
 4 | import logging
 5 | import os
 6 | 
 7 | # from docs/user/slides.md
 8 | 
 9 | _md = '''# Data Manipulation
10 | 
11 | ## Getting Started
12 | 
13 | To start, we can use `arange` to create a row vector `x`
14 | containing the first 12 integers starting with 0,
15 | though they are created as floats by default.
16 | 
17 | (**A tensor represents a (possibly multi-dimensional) array of numerical values. We can access a tensor's *shape*.**)
18 | 
19 | 
20 | ```{.python .input}
21 | import numpy as np
22 | 
23 | x = np.arange(12)
24 | x
25 | ```
26 | 
27 | [**Many**] more (**operations can be applied elementwise,**)
28 | including unary operators like exponentiation.
29 | (~~e.g. `exp`~~)
30 | 
31 | ```{.python .input}
32 | np.exp(x)
33 | ```
34 | 
35 | (**Even when shapes differ, we can still perform elementwise operations**)
36 | by invoking the *broadcasting mechanism*.
37 | 
38 | 
39 | ```{.python .input}
40 | a = np.arange(3).reshape(3, 1)
41 | b = np.arange(2).reshape(1, 2)
42 | a, b
43 | ```
44 | '''
45 | 
46 | class TestSlides(unittest.TestCase):
47 |     def test_match_pairs(self):
48 |         matched = slides._match_slide_marks(_md)
49 |         common.print_list(matched)
50 |         self.assertEqual(len(matched), 5)
51 | 
52 |     def test_generate_slides(self):
53 |         nb = notebook.read_markdown(_md)
54 |         nb = slides._generate_slides(nb)
55 |         common.print_list(nb.cells)
56 |         self.assertEqual(len(nb.cells), 6)
57 | 
58 |     def test_remove_slide_marks(self):
59 |         nb = notebook.read_markdown(_md)
60 |         nb = slides.remove_slide_marks(nb)
61 |         common.print_list(nb.cells)
62 | 
63 | if __name__ == '__main__':
64 |     unittest.main()
65 | 


--------------------------------------------------------------------------------
/d2lbook/activate.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | from d2lbook import markdown, common, config
 3 | import glob
 4 | import re
 5 | import sys
 6 | 
 7 | __all__  = ['activate']
 8 | 
 9 | commands = ['tab']
10 | 
11 | def activate():
12 |     parser = argparse.ArgumentParser(description='Activate tabs')
13 |     parser.add_argument('tab', default='all', help='the tab to activate')
14 |     parser.add_argument('filename', nargs='+', help='the markdown files to activate')
15 |     args = parser.parse_args(sys.argv[2:])
16 | 
17 |     cf = config.Config()
18 |     for fn in args.filename:
19 |         for f in glob.glob(fn):
20 |             _activate_tab(f, args.tab, cf.default_tab)
21 | 
22 | _tab_re = re.compile('# *@tab +([\w]+)')
23 | 
24 | def _get_cell_tab(cell, default_tab):
25 |     if cell['type'] != 'code':
26 |         return []
27 |     if not '.input' in cell['class'] and not 'python' in cell['class']:
28 |         return []
29 |     match = common.source_tab_pattern.search(cell['source'])
30 |     if match:
31 |         return [tab.strip() for tab in match[1].split(',')]
32 |     return [default_tab]
33 | 
34 | def _activate_tab(filename, tab, default_tab):
35 |     if tab == 'default':
36 |         tab = default_tab
37 |     with open(filename, 'r') as f:
38 |         src = f.read()
39 |     cells = markdown.split_markdown(src)
40 |     for cell in cells:
41 |         cell_tab = _get_cell_tab(cell, default_tab)
42 |         if not cell_tab:
43 |             continue
44 |         if tab == 'all' or cell_tab == ['all'] or tab in cell_tab:
45 |             # activate
46 |             cell['class'] = '{.python .input}'
47 |         else: # disactivate
48 |             cell['class'] = 'python'
49 |     src = markdown.join_markdown_cells(cells)
50 |     with open(filename, 'w') as f:
51 |         f.write(src)
52 | 


--------------------------------------------------------------------------------
/docs/user/code_tabs.md:
--------------------------------------------------------------------------------
 1 | # Group Code Blocks into Tabs
 2 | 
 3 | Here is an example showing grouping code blocks into three tabs.
 4 | 
 5 | ## Example
 6 | 
 7 | Let's implement $a+b$. We first show instructions, then demonstrate the codes.
 8 | 
 9 | :begin_tab:`python`
10 | You need to have python installed
11 | 
12 | :end_tab:
13 | 
14 | :begin_tab:`numpy`
15 | You can install numpy by
16 | ```bash
17 | pip install numpy
18 | ```
19 | :end_tab:
20 | 
21 | :begin_tab:`cpython`
22 | Please install cpython
23 | :end_tab:
24 | 
25 | 
26 | ```{.python .input}
27 | a = [1,1,1]
28 | b = [2,2,2]
29 | [ia+ib for ia, ib in zip(a,b)]
30 | ```
31 | 
32 | ```{.python .input}
33 | #@tab numpy
34 | import numpy as np
35 | a = np.ones(3)
36 | b = np.ones(3)*2
37 | a + b
38 | ```
39 | 
40 | ```{.python .input}
41 | #@tab cpython
42 | # Just a place holder
43 | print(1+2)
44 | ```
45 | 
46 | Next let's implement $a - b$
47 | 
48 | ```{.python .input}
49 | a = [1,1,1]
50 | b = [2,2,2]
51 | [ia-ib for ia, ib in zip(a,b)]
52 | ```
53 | 
54 | ```{.python .input}
55 | #@tab numpy
56 | a = np.ones(3)
57 | b = np.ones(3)*2
58 | a - b
59 | ```
60 | 
61 | ## Usages
62 | 
63 | To enable multi-tabs, first configure the `tabs` entry in the `config.ini` file. For example, here we use `tabs = python, numpy, cpython`. `python` is the default tab. To specify a code block that doesn't belong to the default tab, add `#@tab`, followed by the tab name (case insensitive), in the first line of the code block.
64 | 
65 | Sometimes these codes blocks conflict with each others. We can activate one tab at a time, so only code blocks belong to this tab can be evaluated in Jupyter. For example
66 | 
67 | ```bash
68 | d2lbook activate default user/code_tabs.md  # activate the default tab
69 | d2lbook activate numpy user/code_tabs.md    # activate the numpy tab
70 | d2lbook activate all user/code_tabs.md      # activate all tabs
71 | ```
72 | 


--------------------------------------------------------------------------------
/docs/user/edit.md:
--------------------------------------------------------------------------------
 1 | # Editing Source Files
 2 | 
 3 | No matter whether it is a pure text file or a Jupyter notebook, we recommend that you save it as a markdown file. If it is a notebook, you can clear output before saving to make code review and version control easier. 
 4 | 
 5 | You can use your favorite markdown editors, e.g. [Typora](https://www.typora.io/), to edit markdown files directly. We enhanced markdown to support additional feature such as image/table captions and references, please refer to :numref:`sec_markdown` for more details. For a notebook, a Jupyter source code block is placed in a markdown code block with a `{.python .input}` tag, for example,
 6 | 
 7 | ````
 8 | ```{.python .input}
 9 | print('this is a Jupyter code cell')
10 | ```
11 | ````
12 | 
13 | Another way we recommend is using Jupyter to edit markdown files directly, especially when they contain source code blocks. Jupyter's default file format is `ipynb`. We can use the `notedown` plugin to have Jupyter open and save markdown files. 
14 | 
15 | You can install this extension by 
16 | 
17 | ```bash
18 | pip install mu-notedown
19 | ```
20 | 
21 | (`mu-notedown` is a fork of [notedown](https://github.com/aaren/notedown) with several modifications. You may need to uninstall the original `notedown` first.)
22 | 
23 | To turn on the `notedown` plugin by default whenever you run Jupyter Notebook do the following: First, generate a Jupyter Notebook configuration file (if it has already been generated, you can skip this step).
24 | 
25 | ```bash
26 | jupyter notebook --generate-config
27 | ```
28 | 
29 | 
30 | Then, add the following line to the end of the Jupyter Notebook configuration file (for Linux/macOS, usually in the path `~/.jupyter/jupyter_notebook_config.py`):
31 | 
32 | ```bash
33 | c.NotebookApp.contents_manager_class = 'notedown.NotedownContentsManager'
34 | ```
35 | 
36 | 
37 | Next restart your Jupyter, you should be able to open these markdowns in Jupyter as notebooks now.
38 | 
39 | ![Use Jupyter to edit :numref:`sec_create`](../img/jupyter.png)
40 | :width:`500px`
41 | 


--------------------------------------------------------------------------------
/docs/config.ini:
--------------------------------------------------------------------------------
 1 | # A demo for d2l-book.
 2 | [project]
 3 | 
 4 | # The project name, used as the filename of the package and the PDF file. For
 5 | # example, if set to d2l-book, then will build d2l-book.zip and d2l-book.pdf
 6 | name = d2l-book
 7 | 
 8 | # All author names
 9 | author = Mu Li
10 | 
11 | release = 0.1.17
12 | 
13 | [html]
14 | 
15 | # A list of links that is displayed on the navbar. A link consists of three
16 | # items: name, URL, and a fontawesome icon
17 | # (https://fontawesome.com/icons?d=gallery). Items are separated by commas.
18 | header_links = PDF, https://book.d2l.ai/d2l-book.pdf, fas fa-file-pdf,
19 |                Github, https://github.com/d2l-ai/d2l-book, fab fa-github
20 | 
21 | # The filename of the favicon
22 | favicon = img/favicon.png
23 | 
24 | html_logo = img/d2l-book.png
25 | 
26 | [pdf]
27 | 
28 | latex_logo = img/d2l.png
29 | 
30 | bibfile = refs.bib
31 | 
32 | [build]
33 | 
34 | # A list of wildcards to indicate the markdown files that need to be evaluated as
35 | # Jupyter notebooks.
36 | notebooks = *.md */*.md
37 | 
38 | 
39 | # A list of files, if anyone is modified after the last build, will re-build all
40 | # documents.
41 | dependences = config.ini
42 | 
43 | # A list of files that will be copied to the build folder.
44 | resources = img/ refs.bib
45 | 
46 | # Specify the tabs, separated by ",". The first one will be the default tab.
47 | tabs = python, numpy, cpython
48 | 
49 | [deploy]
50 | 
51 | 
52 | # Tracking ID for the HTML pages
53 | google_analytics_tracking_id = UA-96378503-15
54 | 
55 | # The S3 bucket that all files will copy to
56 | s3_bucket = s3://book.d2l.ai
57 | 
58 | # [colab]
59 | 
60 | # # The github repo to host the notebooks for colab, such as d2l-ai/d2l-book-colab
61 | # # Also make sure that the machine's ssh key is added to github before running
62 | # # "deploy" so that it can commit into d2l-ai/d2l-book-colab
63 | # github_repo = d2l-ai/d2l-book-colab
64 | 
65 | # # Colab cannot display SVG files with a relative fname or a github URL. You can
66 | # # replace it with your website URL. For example: img, http://book.d2l.ai/_images
67 | # # will "img/test.svg" to "http://book.d2l.ai/_images/test.svg"
68 | # replace_svg_url = img, http://book.d2l.ai/_images
69 | 
70 | 
71 | # [sagemaker]
72 | # github_repo = d2l-ai/d2l-book-sagemaker
73 | 


--------------------------------------------------------------------------------
/d2lbook/common.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | from typing import Optional, List, Any, Callable, Tuple
 3 | 
 4 | # Our special mark in markdown, e.g. :label:`chapter_intro`
 5 | md_mark_pattern = re.compile(':([-\/\\._\w]+):(`[\ \*-\/\\\._\w]+`)?')
 6 | # Same for md_mark_pattern, but for rst files
 7 | rst_mark_pattern = re.compile(':([-\/\\._\w]+):(``[\ \*-\/\\\._\w]+``)?')
 8 | # The source code tab mark
 9 | source_tab_pattern = re.compile('# *@tab +([\w\,\ ]+)')
10 | source_tab_pattern_2 = re.compile('%%tab +([\w\,\ ]+)')
11 | # Pattern to match notebook specific required libraries
12 | source_libs_required_pattern = re.compile(r"# *required_libs\((.*?)\)")
13 | 
14 | # Markdown code fence
15 | md_code_fence = re.compile('(```+) *(.*)')
16 | 
17 | def split_list(list_obj: List[Any], split_fn: Callable[[Any], Any]) -> List[List[Any]]:
18 |     """Cut a list into multiple parts when fn returns True"""
19 |     prev_pos = 0
20 |     ret = []
21 |     for i, item in enumerate(list_obj):
22 |         if split_fn(item):
23 |             ret.append(list_obj[prev_pos:i])
24 |             prev_pos = i
25 |     ret.append(list_obj[prev_pos:])
26 |     return ret
27 | 
28 | def group_list(
29 |         list_obj: List[Any],
30 |         status_fn: Callable[[Any, Any], Any]) -> List[Tuple[Any, List[Any]]]:
31 |     """Cut a list into multiple parts when based on the value returned by status_fn"""
32 |     prev_status = None
33 |     prev_pos = 0
34 |     ret = []
35 |     for i, item in enumerate(list_obj):
36 |         cur_status = status_fn(item, prev_status)
37 |         if prev_status is not None and cur_status != prev_status:
38 |             ret.append((prev_status, list_obj[prev_pos:i]))
39 |             prev_pos = i
40 |         prev_status = cur_status
41 |     ret.append((cur_status, list_obj[prev_pos:]))
42 |     return ret
43 | 
44 | def head_spaces(line: str):
45 |     """"Return the head spaces."""
46 |     return line[: len(line)-len(line.lstrip())]
47 | 
48 | def flatten(x):
49 |     """flatten a list of lists into a list."""
50 |     return [item for sublist in x for item in sublist]
51 | 
52 | def print_list(x):
53 |     print(f'len: {len(x)}')
54 |     for i, y in enumerate(x):
55 |         print(f'{i}\t{y}')
56 | 
57 | def print_dict(x):
58 |     print(f'len: {len(x)}')
59 |     for k in x:
60 |         print(f'{k}\t{x[k]}')
61 | 


--------------------------------------------------------------------------------
/d2lbook/markdown_test.py:
--------------------------------------------------------------------------------
 1 | from d2lbook import markdown, common
 2 | import unittest
 3 | 
 4 | _markdown_src = r'''# Test
 5 | 
 6 | first para
 7 | 
 8 | :begin_tab:`python2`
 9 | python is good
10 | :end_tab:
11 | 
12 | another para
13 | 
14 | :eqref:`sec_1`
15 | 
16 | :begin_tab:`python 3`
17 | python3 is better
18 | 
19 | ```python 3
20 | print(3)
21 | ```
22 | :end_tab:
23 | 
24 | ````bash
25 | ```bash
26 | $ ls
27 | ```
28 | ````
29 | '''
30 | 
31 | _markdown_text_src = r'''# Test
32 | :label:`sec`
33 | 
34 | THis is good. A paragraph.
35 | 
36 | ![Image](../a.png)
37 | :label:`a.png`
38 | 
39 | Assume A
40 | 
41 | $$
42 | X^{(N)} = \sum_{i=1}^N X_i.
43 | $$
44 | :label:`adsf`
45 | 
46 | and
47 | 
48 | $$\|\boldsymbol{x}\|_2 = \sqrt{\sum_{i=1}^n x_i^2}.$$
49 | 
50 | Here is a list
51 | - sadf
52 |   wer
53 |   - asdf sadf
54 |     sd sdf
55 | - asdf
56 | 
57 | 1. wer asdf
58 |   asdf asdf
59 | 
60 | 1. Run the code in this section. Change the conditional statement `x == y` in this section to `x < y` or `x > y`, and then see what kind of tensor you can get.
61 | 1. Replace the two tensors that operate by element in the broadcasting mechanism with other shapes, e.g., 3-dimensional tensors. Is the result the same as expected?
62 | '''
63 | 
64 | class TestMarkdown(unittest.TestCase):
65 | 
66 |     def test_split(self):
67 |         cells = markdown.split_markdown(_markdown_src)
68 |         self.assertEqual(len(cells), 5)
69 |         self.assertEqual(cells[0]['type'], 'markdown')
70 |         self.assertEqual(cells[1]['type'], 'markdown')
71 |         self.assertEqual(cells[1]['class'], '`python2`')
72 |         self.assertEqual(cells[3]['class'], '`python 3`')
73 |         self.assertEqual(cells[4]['class'], 'bash')
74 | 
75 |     def test_merge(self):
76 |         cells = markdown.split_markdown(_markdown_src)
77 |         src = markdown.join_markdown_cells(cells)
78 |         self.assertEqual(_markdown_src, src)
79 | 
80 |     def test_split_text(self):
81 |         cells = markdown.split_text(_markdown_text_src)
82 |         common.print_list(cells)
83 | 
84 |     def test_join_text(self):
85 |         cells = markdown.split_text(_markdown_text_src)
86 |         src = markdown.join_text(cells)
87 |         self.assertEqual(_markdown_text_src, src)
88 | 
89 | 
90 | if __name__ == '__main__':
91 |     unittest.main()
92 | 


--------------------------------------------------------------------------------
/docs/index.md:
--------------------------------------------------------------------------------
 1 | # D2L-Book: A Toolkit for Hands-on Books
 2 | 
 3 | The D2L Book (`d2lbook`) package helps you build and publish **a book
 4 | with Python code blocks**, or **Python package documents with tutorials**. You can
 5 | check [Dive into Deep Learning](https://d2l.ai/) for a book
 6 | example and [AutoGluon](https://autogluon.mxnet.io/) for a package document site
 7 | example.
 8 | 
 9 | `d2lbook` is designed to meet the following two requirements:
10 | 
11 | - Your book may contain **a large amount of Python code** and you
12 |   expect your readers to run them. Or your package documents have **multiple
13 |   tutorials** to walk readers through your package usage through examples.
14 |   The code should be runnable and maintainable.
15 | 
16 | - You would like to publish **both a HTML website and a printable PDF
17 |   version**. You expect the website should be modern, searchable and mobile
18 |   friendly, and the PDF version should be at the same quality as written using LaTeX.
19 | 
20 | 
21 | To achieve the above goals, `d2lbook` combines
22 | [Jupyter Notebook](https://jupyter.org/), the widely used interactive
23 | environment in Python, and [Sphinx](http://www.sphinx-doc.org/en/master/), the
24 | de facto document building system for Python packages. In particular, its main
25 | features include:
26 | 
27 | - Using [markdown](https://daringfireball.net/projects/markdown/) for your contents.
28 | - A minimal configuration file to customize the building so you can focus on the
29 |   contents.
30 | - Evaluating all code blocks to obtain their output before publishing to validate the
31 |   correctness. By default, `d2lbook` only evaluates the updated code blocks to save cost.
32 | - Being able to reference sections, figure, tables, equations, function, and
33 |   class.
34 | - Pipelines to publish your website through Github or AWS.
35 | 
36 | If `d2lbook` does not fit your requirements, you may check the following tools:
37 | 
38 | - [Jupyter Book](https://jupyterbook.org): A similar tool for building books 
39 |   from computational material with Jupyter Notebooks and MyST Markdown.
40 | - [gitbook](https://www.gitbook.com/): very convenient to push a book written
41 |   with markdown if you don't need to run them as Jupyter notebooks.
42 | - [sphinx-gallery](https://sphinx-gallery.github.io/stable/index.html), a Sphinx
43 |   plugin to evaluate and publish your tutorials. It requires you to know how
44 |   to use Sphinx and write your tutorials in `.py` format with the `rst` style.
45 | 
46 | ```eval_rst
47 | .. only:: html
48 | 
49 |    Table of Contents
50 |    -----------------
51 | ```
52 | 
53 | 
54 | ```toc
55 | :numbered:
56 | :maxdepth: 2
57 | 
58 | install
59 | user/index
60 | develop/index
61 | ```
62 | 
63 | 


--------------------------------------------------------------------------------
/scripts/install_fonts.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Install fonts to build PDF
 3 | 
 4 | 
 5 | ###########################################################
 6 | # Make sure system font dir exists already, if not, run:
 7 | # sudo mkdir /usr/share/fonts/opentype/
 8 | 
 9 | # Make sure unzip is installed already, if not, run:
10 | # sudo apt-get install unzip
11 | 
12 | # fc-cache is required, if not already installed, run:
13 | # sudo apt install fontconfig
14 | ###########################################################
15 | 
16 | 
17 | # En
18 | 
19 | wget -O source-serif-pro.zip https://www.fontsquirrel.com/fonts/download/source-serif-pro
20 | unzip source-serif-pro -d source-serif-pro
21 | sudo mv source-serif-pro /usr/share/fonts/opentype/
22 | 
23 | wget -O source-sans-pro.zip https://www.fontsquirrel.com/fonts/download/source-sans-pro
24 | unzip source-sans-pro -d source-sans-pro
25 | sudo mv source-sans-pro /usr/share/fonts/opentype/
26 | 
27 | wget -O source-code-pro.zip https://www.fontsquirrel.com/fonts/download/source-code-pro
28 | unzip source-code-pro -d source-code-pro
29 | sudo mv source-code-pro /usr/share/fonts/opentype/
30 | 
31 | wget -O Inconsolata.zip https://www.fontsquirrel.com/fonts/download/Inconsolata
32 | unzip Inconsolata -d Inconsolata
33 | sudo mv Inconsolata /usr/share/fonts/opentype/
34 | 
35 | sudo fc-cache -f -v
36 | 
37 | # Zh
38 | wget https://github.com/adobe-fonts/source-han-sans/releases/download/2.004R/SourceHanSansSC.zip
39 | wget -O SourceHanSerifSC.zip https://github.com/adobe-fonts/source-han-serif/releases/download/2.001R/09_SourceHanSerifSC.zip
40 | 
41 | unzip SourceHanSansSC.zip -d SourceHanSansSC
42 | unzip SourceHanSerifSC.zip -d SourceHanSerifSC
43 | 
44 | sudo mv SourceHanSansSC SourceHanSerifSC /usr/share/fonts/opentype/
45 | sudo fc-cache -f -v
46 | 
47 | # KO
48 | 
49 | wget https://github.com/adobe-fonts/source-han-sans/releases/download/2.004R/SourceHanSansK.zip
50 | wget -O SourceHanSerifK.zip https://github.com/adobe-fonts/source-han-serif/releases/download/2.001R/08_SourceHanSerifK.zip
51 | 
52 | unzip SourceHanSansK.zip -d SourceHanSansK
53 | unzip SourceHanSerifK.zip -d SourceHanSerifK
54 | 
55 | sudo mv SourceHanSansK SourceHanSerifK /usr/share/fonts/opentype/
56 | sudo fc-cache -f -v
57 | 
58 | # JA
59 | 
60 | wget https://github.com/adobe-fonts/source-han-sans/releases/download/2.004R/SourceHanSansJ.zip
61 | wget -O SourceHanSerifJ.zip https://github.com/adobe-fonts/source-han-serif/releases/download/2.001R/07_SourceHanSerifJ.zip
62 | 
63 | unzip SourceHanSansJ.zip -d SourceHanSansJ
64 | unzip SourceHanSerifJ.zip -d SourceHanSerifJ
65 | 
66 | sudo mv SourceHanSansJ SourceHanSerifJ /usr/share/fonts/opentype/
67 | sudo fc-cache -f -v
68 | 
69 | 
70 | # Remove all zip files
71 | rm Source*.zip source*.zip Inconsolata.zip
72 | 


--------------------------------------------------------------------------------
/docs/user/slides.md:
--------------------------------------------------------------------------------
 1 | # Creating Slides
 2 | 
 3 | We can mark a notebook and then create slides from that notebook. For example, here is the generate [slides](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-pytorch-slides/blob/main/chapter_preliminaries/ndarray.ipynb#/) from the markdown [source file](https://github.com/d2l-ai/d2l-en/blob/master/chapter_preliminaries/ndarray.md).
 4 | Let explain how to do it by the following example. It's a markdown file with marks to
 5 | generate slides.
 6 | 
 7 | ````md
 8 | # Data Manipulation
 9 | 
10 | ## Getting Started
11 | 
12 | To start, we can use `arange` to create a row vector `x`
13 | containing the first 12 integers starting with 0,
14 | though they are created as floats by default.
15 | 
16 | (**A tensor represents a (possibly multi-dimensional) array of numerical values. We can access a tensor's *shape*.**)
17 | 
18 | 
19 | ```{.python .input}
20 | import numpy as np
21 | 
22 | x = np.arange(12)
23 | x
24 | ```
25 | 
26 | [**Many**] more (**operations can be applied elementwise,**)
27 | including unary operators like exponentiation.
28 | (~~e.g. `exp`~~)
29 | 
30 | ```{.python .input}
31 | np.exp(x)
32 | ```
33 | 
34 | (**Even when shapes differ, we can still perform elementwise operations**)
35 | by invoking the *broadcasting mechanism*.
36 | 
37 | 
38 | ```{.python .input}
39 | a = np.arange(3).reshape(3, 1)
40 | b = np.arange(2).reshape(1, 2)
41 | a, b
42 | ```
43 | 
44 | ````
45 | 
46 | The above code block will generate 2 slides. The first slide contains the following contents:
47 | 
48 | ````md
49 | # Data Manipulation
50 | 
51 | A tensor represents a (possibly multi-dimensional) array of numerical values. We can access a tensor's *shape*.
52 | 
53 | ```{.python .input}
54 | import numpy as np
55 | 
56 | x = np.arange(12)
57 | x
58 | ```
59 | ````
60 | 
61 | You can see that we automatically copied the level-1 heading and the code block.
62 | In addition, we copied the text between `(**` and `**)`, while dropped all others.
63 | 
64 | The second slide contains the following:
65 | 
66 | ````md
67 | Many operations can be applied elementwise,
68 | e.g. `exp`
69 | 
70 | ```{.python .input}
71 | np.exp(x)
72 | ```
73 | 
74 | Even when shapes differ, we can still perform elementwise operations
75 | 
76 | ```{.python .input}
77 | a = np.arange(3).reshape(3, 1)
78 | b = np.arange(2).reshape(1, 2)
79 | a, b
80 | ```
81 | ````
82 | 
83 | First you can see is that all text between these three paris
84 | (`[**`, `**]`),
85 | (`(**`, `**)`), and
86 | (`(~~`, `~~)`) are kept.
87 | Here `[` means starting a new slide, while `(` means continuing the current slide.
88 | (Level-1 heading will start a new slide, so we used `(` in the previous block).
89 | In addition, `~~` means the text will only appear in slides,
90 | why not in the normal notebooks, htmls or pdfs.
91 | 
92 | Second, we didn't start a new slide before the last code block, i.e. there is no
93 | level-1 heading and no (`[**`, `**]`) pair, so the last two code blocks are merged
94 | into the same slide.
95 | 


--------------------------------------------------------------------------------
/docs/user/create.md:
--------------------------------------------------------------------------------
 1 | # Creating Your Project
 2 | :label:`sec_create`
 3 | 
 4 | Let's start with a simple project from scratch.
 5 | 
 6 | ## Project From Scratch
 7 | 
 8 | First make a folder for our project.
 9 | 
10 | ```{.python .input  n=1}
11 | !mkdir -p mybook
12 | ```
13 | 
14 | Then create two pages. The `index.md` is the index page which contains the
15 | table of contents (TOC), which includes the other page `get_started.md`. Note
16 | that the TOC is defined in a code block with tag `toc`. If you are familiar with
17 | Sphinx, you can find it's similar to the TOC definition in Sphinx. Please refer
18 | to :numref:`sec_markdown` for more extensions `d2lbook` added to markdown. Also note we used the build-in magic `writefile` to save a code block into file provided by [Jupyter](https://ipython.readthedocs.io/en/stable/interactive/magics.html).
19 | 
20 | ```{.python .input  n=2}
21 | %%writefile mybook/index.md
22 | # My Book
23 | 
24 | The starting page of my book with `d2lbook`.
25 | 
26 | ````toc
27 | get_started
28 | ````
29 | ```
30 | 
31 | ```{.python .input  n=3}
32 | %%writefile mybook/get_started.md
33 | # Getting Started
34 | 
35 | Please first install my favorite package `numpy`.
36 | ```
37 | 
38 | Now let's build the HTML version.
39 | 
40 | ```{.python .input  n=4}
41 | !cd mybook && d2lbook build html
42 | ```
43 | 
44 | The HTML index page is then available at `mybook/_build/html/index.html`.
45 | 
46 | ## Configuration
47 | 
48 | You can customize how results are built and published through `config.ini` on the root folder.
49 | 
50 | ```{.python .input  n=5}
51 | %%writefile mybook/config.ini
52 | 
53 | [project]
54 | # Specify the PDF filename to mybook.pdf
55 | name = mybook  
56 | # Specify the authors names in PDF
57 | author = Adam Smith, Alex Li  
58 | 
59 | [html]
60 | # Add two links on the navbar. A link consists of three
61 | # items: name, URL, and a fontawesome icon. Items are separated by commas.
62 | header_links = PDF, https://book.d2l.ai/d2l-book.pdf, fas fa-file-pdf,
63 |                Github, https://github.com/d2l-ai/d2l-book, fab fa-github
64 | ```
65 | 
66 | Let's clear and build again.
67 | 
68 | ```{.python .input}
69 | !cd mybook && rm -rf _build && d2lbook build html
70 | ```
71 | 
72 | If you open `index.html` again, you will see the two links on the navigation bar. 
73 | 
74 | Let build the PDF output, you will find `Output written on mybook.pdf (7 pages).` in the output logs. 
75 | 
76 | ```{.python .input}
77 | !cd mybook && d2lbook build pdf
78 | ```
79 | 
80 | We will cover more configuration options in the following sections. You can check [default_config.ini](https://github.com/d2l-ai/d2l-book/blob/master/d2lbook/config_default.ini) for all configuration options and their default values. Also check these examples `config.ini` in
81 | 
82 | - [This website](https://github.com/d2l-ai/d2l-book/blob/master/docs/config.ini)
83 | - [Dive into Deep Learning](https://github.com/d2l-ai/d2l-en/blob/master/config.ini)
84 | 
85 | Last, let's clear our workspace.
86 | 
87 | ```{.python .input}
88 | !rm -rf mybook
89 | ```
90 | 


--------------------------------------------------------------------------------
/d2lbook/library_test.py:
--------------------------------------------------------------------------------
 1 | from d2lbook import library
 2 | import unittest
 3 | 
 4 | from collections import namedtuple
 5 | 
 6 | class Cell:
 7 |     def __init__(self, cell_type, source):
 8 |         self.cell_type = cell_type
 9 |         self.source = source
10 | 
11 | class Nb:
12 |     def __init__(self, cells):
13 |         self.cells = cells
14 | 
15 | class TestLibrary(unittest.TestCase):
16 |     def setUp(self):
17 |         self.nb = Nb([Cell('code', '')])
18 |         self.tab_lib = {
19 |             'lib_name': 'torch',
20 |             'simple_alias':
21 |             'ones, zeros, tensor, arange, meshgrid, sin, sinh, cos, cosh, tanh, linspace, exp, log, normal, rand(, matmul, int32, float32, concat -> cat, stack, abs, eye',
22 |             'fluent_alias':
23 |             'numpy -> detach().numpy, reshape, size -> numel, to, reduce_sum -> sum, argmax, astype -> type, transpose -> t',
24 |             'alias': '',
25 |             'reverse_alias': '',
26 |             'args_alias': 'randn(size, device=None) -> np.random.randn(size=size, ctx=device)'
27 |             }
28 | 
29 |     def test_replace_alias(self):
30 |         # Test https://github.com/d2l-ai/d2l-book/issues/14
31 |         pairs = [  # before, after
32 |             ('X = d2l.reshape(d2l.arange(10,20),(2,3))',
33 |              'X = torch.arange(10, 20).reshape((2, 3))'),
34 |             ('d2l.numpy(a)', 'a.detach().numpy()'),
35 |             ('d2l.transpose(a)', 'a.t()'),
36 |             ('metric.add(l * d2l.size(y), d2l.size(y))',
37 |              'metric.add(l * y.numel(), y.numel())'),
38 |             ('float(d2l.reduce_sum(cmp.astype(y.dtype)))',
39 |              'float(cmp.astype(y.dtype).sum())'),
40 |             ('d2l.numpy(nn.LeakyReLU(alpha)(x))',
41 |              'nn.LeakyReLU(alpha)(x).detach().numpy()'),
42 |             ('d2l.reshape(X_tile(1 - d2l.eye(n_train)).astype(\'bool\'), (1,2))',
43 |              'X_tile(1 - torch.eye(n_train)).astype(\'bool\').reshape((1, 2))'
44 |              ),
45 |             ('float(d2l.reduce_sum(d2l.astype(cmp, y.dtype)))',
46 |              'float(cmp.type(y.dtype).sum())'),
47 |             ('\nenc_attention_weights = d2l.reshape(\n    d2l.concat(net.encoder.attention_weights, 0),\n    (num_layers, num_heads, -1, num_steps))\nenc_attention_weights.shape = 2\n',
48 |              'enc_attention_weights = torch.cat(net.encoder.attention_weights, 0).reshape(\n    (num_layers, num_heads, -1, num_steps))\nenc_attention_weights.shape = 2'
49 |              ),
50 |             ('float(d2l.reduce_sum(d2l.abs(Y1 - Y2))) < 1e-6',
51 |              'float(torch.abs(Y1 - Y2).sum()) < 1e-6'),
52 |             ('d2l.plt.scatter(d2l.numpy(features[:, a + b]), d2l.numpy(labels), 1);',
53 |              'd2l.plt.scatter(features[:, a + b].detach().numpy(),labels.detach().numpy(), 1);'
54 |              ),
55 |             ('d2l.reshape(multistep_preds[i - tau: i], (1, -1))',
56 |              'multistep_preds[i - tau:i].reshape((1, -1))'),
57 |             ('X = d2l.reshape(d2l.arange(16, dtype=d2l.float32), (1, 1, 4, 4))',
58 |              'X = torch.arange(16, dtype=torch.float32).reshape((1, 1, 4, 4))'
59 |              ),
60 |             ('# comments\nX = d2l.reshape(a)', '# comments\nX = a.reshape()'),
61 |             ('X = d2l.reshape(a)  # comments', 'X = a.reshape()  # comments'),
62 |             ('Y[i, j] = d2l.reduce_sum((X[i: i + h, j: j + w] * K))',
63 |              'Y[i, j] = (X[i:i + h, j:j + w] * K).sum()'),
64 |             ('d2l.randn(size=(1,2)) * 0.01',
65 |              'np.random.randn(size=(1,2)) * 0.01'),
66 |              ('d2l.randn(size=(1,2), device=d2l.try_gpu()) * 0.01',
67 |              'np.random.randn(size=(1,2), ctx=d2l.try_gpu()) * 0.01'
68 |              ),
69 |              
70 |              ]
71 | 
72 |         for a, b in pairs:
73 |             self.nb.cells[0].source = a
74 |             nb = library.replace_alias(self.nb, self.tab_lib)
75 |             compact = lambda x: x.replace('\n', '').replace(' ', '')
76 |             self.assertEqual(compact(nb.cells[0].source), compact(b))
77 | 


--------------------------------------------------------------------------------
/docs/user/build.md:
--------------------------------------------------------------------------------
 1 | # Building 
 2 | 
 3 | This section we will explain various options to build your projects. This options can be grouped into four categories:
 4 | 
 5 | 1. Sanity check
 6 |    - `d2lbook build linkcheck` will check if all internal and external links are accessible.  
 7 |    - `d2lbook build outputcheck` will check if no notebook will contain code outputs
 8 | 1. Building results
 9 |    - `d2lbook build html`: build the HTML version into `_build/html`
10 |    - `d2lbook build pdf`: build the PDF version into `_build/pdf`
11 |    - `d2lbook build pkg`: build a zip file contains all `.ipynb` notebooks
12 | 1. Additional features   
13 |    - `d2lbook build colab`: convert all notebooks can be run on Google Colab into `_build/colab`. See more in :numref:`sec_colab`
14 |    - `d2lbook build lib`: build a Python package so we can reuse codes in other notebooks. See more in XXX.
15 | 1. Internal stages, which often are triggered automatically.  
16 |    - `d2lbook build eval`: evaluate all notebooks and save them as `.ipynb` notebooks into `_build/eval`
17 |    - `d2lbook build rst`: convert all notebooks into `rst` files and create a Sphinx project in `_build/rst`
18 |    
19 | 
20 | ## Building Cache
21 | 
22 | We encourage you to evaluate your notebooks to obtain code cell results, instead of keeping these results in the source files for two reasons:
23 | 1. These results make code review difficult, especially when they have randomness either due to numerical precision or random number generators. 
24 | 1. A notebook hasn't evaluated for a while may be broken due to package upgrading. 
25 | 
26 | But the evaluation costs additional overhead during building. We recommend to limit the runtime for each notebook within a few minutes. And `d2lbook` will reuse the previous built and only evaluate the modified notebooks.
27 | 
28 | For example, the average runtime of a notebook (section) in [Dive into Deep Learning](https://d2l.ai) is about 2 minutes on a GPU machine, due to training neural networks. It contains more than 100 notebooks, which make the total runtime cost 2-3 hours. In reality, each code change will only modify a few notebooks and therefore the [build time](http://ci.d2l.ai/blue/organizations/jenkins/d2l-en/activity) is often less than 10 minutes. 
29 | 
30 | Let's see how it works. First create a project as we did in :numref:`sec_create`. 
31 | 
32 | ```{.python .input}
33 | !mkdir -p cache
34 | ```
35 | 
36 | ```{.python .input}
37 | %%writefile cache/index.md
38 | # My Book
39 | 
40 | The starting page of my book with `d2lbook`.
41 | 
42 | ````toc
43 | get_started
44 | ````
45 | ```
46 | 
47 | ```{.python .input}
48 | %%writefile cache/get_started.md
49 | # Getting Started
50 | 
51 | Please first install my favorite package `numpy`.
52 | ```
53 | 
54 | ```{.python .input}
55 | !cd cache; d2lbook build html
56 | ```
57 | 
58 | You can see `index.md` is evaluated. (Though it doesn't contain codes, it's fine to evaluate it as a Jupyter notebook.)
59 | 
60 | If building again, we will see no notebook will be evaluated.
61 | 
62 | ```{.python .input}
63 | !cd cache; d2lbook build html
64 | ```
65 | 
66 | Now let's modify `get_started.md`, you will see it will be re-evaluated, but not `index.md`.  
67 | 
68 | ```{.python .input}
69 | %%writefile cache/get_started.md
70 | # Getting Started
71 | 
72 | Please first install my favorite package `numpy>=1.18`.
73 | ```
74 | 
75 | ```{.python .input}
76 | !cd cache; d2lbook build html
77 | ```
78 | 
79 | One way to trigger the whole built is removing the saved notebooks in `_build/eval`, or simply deleting `_build`. Another way is specifying some dependencies. For example, in the following cell we add `config.ini` into the dependencies. Every time `config.ini` is modified, it will invalid the cache of all notebooks and trigger a build from scratch. 
80 | 
81 | 
82 | ```{.python .input}
83 | %%writefile cache/config.ini
84 | 
85 | [build]
86 | dependencies = config.ini
87 | ```
88 | 
89 | ```{.python .input}
90 | !cd cache; d2lbook build html
91 | ```
92 | 
93 | Last, let's clean our workspace. 
94 | 
95 | ```{.python .input}
96 | !rm -rf cache
97 | ```
98 | 


--------------------------------------------------------------------------------
/d2lbook/sphinx.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import logging
 3 | from d2lbook import sphinx_template as template
 4 | from d2lbook import utils
 5 | 
 6 | __all__ = ['prepare_sphinx_env']
 7 | 
 8 | def prepare_sphinx_env(config):
 9 |     env = SphinxEnv(config)
10 |     env.prepare_env()
11 | 
12 | class SphinxEnv(object):
13 |     def __init__(self, config):
14 |         self.config = config
15 |         if self.config.pdf['style'] == 'cambridge':
16 |             self.pyconf = template.sphinx_conf_cambridge
17 |         else:
18 |             self.pyconf = template.sphinx_conf
19 | 
20 |     def prepare_env(self):
21 |         self._copy_static_files()
22 |         self._update_header_links()
23 |         self._write_js()
24 |         self._write_css()
25 |         for key in self.config.project:
26 |             self._update_pyconf(key, self.config.project[key])
27 |         self._update_pyconf('index', self.config.build['index'])
28 |         self._update_pyconf('sphinx_configs', self.config.build['sphinx_configs'])
29 | 
30 |         extensions = ['recommonmark', 'sphinxcontrib.bibtex',
31 |                       'sphinxcontrib.rsvgconverter', 'sphinx.ext.autodoc',
32 |                       'sphinx.ext.viewcode']
33 |         extensions.extend(self.config.build['sphinx_extensions'].split())
34 |         self._update_pyconf('extensions', ','.join('"'+ext+'"' for ext in extensions))
35 |         self._update_pyconf('bibfile', self.config.pdf['bibfile'])
36 |         for font in ['main_font', 'sans_font', 'mono_font']:
37 |             font_value = ''
38 |             if self.config.pdf[font]:
39 |                 font_value = '\set%s{%s}' % (font.replace('_', ''), self.config.pdf[font])
40 |             self._update_pyconf(font, font_value)
41 | 
42 |         fname = os.path.join(self.config.rst_dir, 'conf.py')
43 |         with open(fname, 'w') as f:
44 |             f.write(self.pyconf)
45 | 
46 |     def _update_pyconf(self, key, value):
47 |         self.pyconf = self.pyconf.replace(key.upper(), value)
48 | 
49 |     def _copy_static_files(self):
50 |         static_keys = [('html', 'favicon'), ('html', 'html_logo'), ('pdf', 'latex_logo')]
51 |         for attribute, key in static_keys:
52 |             if attribute == 'html':
53 |                 fname = self.config.html[key]
54 |             elif attribute == 'pdf':
55 |                 fname = self.config.pdf[key]
56 |             if not fname:
57 |                 self._update_pyconf(key, '')
58 |                 continue
59 |             sphinx_fname = os.path.join(self.config.rst_dir, '_static',
60 |                                         os.path.basename(fname))
61 |             utils.copy(fname, sphinx_fname)
62 |             self._update_pyconf(key, os.path.join(
63 |                 '_static', os.path.basename(fname)))
64 | 
65 |     def _update_header_links(self):
66 |         items = utils.split_config_str(self.config.html['header_links'], 3)
67 |         sphinx_links = ''
68 |         for tk in items:
69 |             if tk:
70 |                 sphinx_links += "('%s', '%s', True, '%s')," % (tk[0], tk[1], tk[2])
71 |         self._update_pyconf('header_links', sphinx_links)
72 | 
73 |     def _write_js(self):
74 |         d2l_js = (template.shorten_sec_num + template.replace_qr
75 |                   + template.copybutton_js + template.discourse_js + template.tabbar_js)
76 |         g_id = 'google_analytics_tracking_id'
77 |         if g_id in self.config.deploy:
78 |             d2l_js += template.google_tracker.replace(
79 |                 g_id.upper(), self.config.deploy[g_id])
80 | 
81 |         os.makedirs(os.path.join(self.config.rst_dir, '_static'), exist_ok=True)
82 |         fname = os.path.join(self.config.rst_dir, '_static', 'd2l.js')
83 |         with open(fname, 'w') as f:
84 |             f.write(d2l_js)
85 |             for fname in utils.find_files(self.config.html['include_js'], self.config.src_dir):
86 |                 with open (fname, 'r') as fin:
87 |                     f.write(fin.read())
88 | 
89 |     def _write_css(self):
90 |         fname = os.path.join(self.config.rst_dir, '_static', 'd2l.css')
91 |         d2l_css = template.hide_bibkey_css + template.copybutton_css + \
92 |             template.limit_output_length_css + template.tabbar_css
93 |         with open(fname, 'w') as f:
94 |             f.write(d2l_css)
95 |             for fname in utils.find_files(self.config.html['include_css'], self.config.src_dir):
96 |                 with open (fname, 'r') as fin:
97 |                     f.write(fin.read())
98 | 


--------------------------------------------------------------------------------
/d2lbook/deploy.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import pathlib
  3 | import sys
  4 | import logging
  5 | import argparse
  6 | import shutil
  7 | from d2lbook.utils import *
  8 | from d2lbook.config import Config
  9 | from d2lbook import colab
 10 | from d2lbook import sagemaker
 11 | from d2lbook import slides
 12 | import glob
 13 | 
 14 | __all__  = ['deploy']
 15 | 
 16 | commands = ['html', 'pdf', 'pkg', 'colab', 'sagemaker', 'all', 'slides']
 17 | 
 18 | def deploy():
 19 |     parser = argparse.ArgumentParser(description='Deploy documents')
 20 |     parser.add_argument('commands', nargs='+', choices=commands)
 21 |     parser.add_argument('--s3', help='s3 bucket')
 22 |     args = parser.parse_args(sys.argv[2:])
 23 |     config = Config()
 24 |     if args.s3:
 25 |         config.deploy['s3_bucket'] = args.s3
 26 |     if config.deploy['s3_bucket']:
 27 |         deployer = S3Deployer(config)
 28 |     elif config.deploy['github_repo']:
 29 |         deployer = GithubDeployer(config)
 30 |     else:
 31 |         deployer = Deployer(config)
 32 |     for cmd in args.commands:
 33 |         getattr(deployer, cmd)()
 34 | 
 35 | class Deployer(object):
 36 |     def __init__(self, config):
 37 |         self.config = config
 38 | 
 39 |     def colab(self):
 40 |         _colab = colab.Colab(self.config)
 41 |         if not _colab.valid():
 42 |             return
 43 |         def _run():
 44 |             repo = _colab.git_repo(self.config.tab)
 45 |             bash_fname = os.path.join(os.path.dirname(__file__), 'upload_github.sh')
 46 |             run_cmd(['bash', bash_fname, self.config.colab_dir, repo,
 47 |                      self.config.project['release']])
 48 |         tab = self.config.tab
 49 |         self.config.set_tab('all')
 50 |         self.config.iter_tab(_run)
 51 |         self.config.set_tab(tab)
 52 | 
 53 |     def sagemaker(self):
 54 |         _sagemaker = sagemaker.Sagemaker(self.config)
 55 |         if not _sagemaker.valid():
 56 |             return
 57 |         def _run():
 58 |             repo = _sagemaker.git_repo(self.config.tab)
 59 |             bash_fname = os.path.join(os.path.dirname(__file__), 'upload_github.sh')
 60 |             run_cmd(['bash', bash_fname, self.config.sagemaker_dir, repo,
 61 |                      self.config.project['release']])
 62 |         tab = self.config.tab
 63 |         self.config.set_tab('all')
 64 |         self.config.iter_tab(_run)
 65 |         self.config.set_tab(tab)
 66 | 
 67 |     def slides(self):
 68 |         tab = self.config.tab
 69 |         self.config.set_tab('all')
 70 |         self.config.iter_tab(lambda: slides.Slides(self.config).deploy())
 71 |         self.config.set_tab(tab)
 72 | 
 73 |     def _get_pdfs(self):        
 74 |         # get all generated pdfs
 75 |         pdfs = list(glob.glob(self.config.tgt_dir+'/pdf*/'+self.config.project['name']+'*.pdf'))
 76 |         rets = []
 77 |         for p in pdfs:
 78 |             p = pathlib.Path(p)
 79 |             tks = p.parent.name.split('_')
 80 |             if len(tks) > 1:
 81 |                 tab = tks[1]
 82 |                 if p.with_suffix('').name.split('-')[-1] != tab:
 83 |                     continue
 84 |             rets.append(str(p))
 85 |         return rets
 86 |         
 87 | class GithubDeployer(Deployer):
 88 |     def __init__(self, config):
 89 |         super(GithubDeployer, self).__init__(config)
 90 |         self.git_dir = os.path.join(self.config.tgt_dir, 'github_deploy')
 91 |         shutil.rmtree(self.git_dir, ignore_errors=True)
 92 |         mkdir(self.git_dir)
 93 | 
 94 |     def html(self):
 95 |         run_cmd(['cp -r', os.path.join(self.config.html_dir, '*'), self.git_dir])
 96 | 
 97 |     def pdf(self):
 98 |         for pdf in self._get_pdfs():
 99 |             shutil.copy(pdf, self.git_dir)
100 | 
101 |     def pkg(self):
102 |         shutil.copy(self.config.pkg_fname, self.git_dir)
103 | 
104 |     def __del__(self):
105 |         bash_fname = os.path.join(os.path.dirname(__file__), 'upload_github.sh')
106 |         run_cmd(['bash', bash_fname, self.git_dir, self.config.deploy['github_repo'], self.config.project['release']])
107 | 
108 | class S3Deployer(Deployer):
109 |     def __init__(self, config):
110 |         super(S3Deployer, self).__init__(config)
111 | 
112 |     def html(self):
113 |         bash_fname = os.path.join(os.path.dirname(__file__), 'upload_doc_s3.sh')
114 |         run_cmd(['bash', bash_fname, self.config.html_dir, self.config.deploy['s3_bucket']])
115 | 
116 |     def pdf(self):
117 |         url = self.config.deploy['s3_bucket']
118 |         if not url.endswith('/'):
119 |             url += '/'
120 |         for pdf in self._get_pdfs():
121 |             logging.info('cp %s to %s', pdf, url)
122 |             run_cmd(['aws s3 cp', pdf, url, "--acl 'public-read' --quiet"])
123 | 
124 |     def _deploy_other_files(self, tgt_url):
125 |         other_urls = self.config.deploy['other_file_s3urls'].split()
126 |         for other_url in other_urls:
127 |             logging.info('cp %s to %s', other_url, tgt_url)
128 |             run_cmd(['aws s3 cp', other_url, tgt_url, "--acl 'public-read' --quiet"])
129 | 
130 |     def pkg(self):
131 |         url = self.config.deploy['s3_bucket']
132 |         if not url.endswith('/'):
133 |             url += '/'
134 |         logging.info('cp %s to %s', self.config.pkg_fname, url)
135 |         run_cmd(['aws s3 cp', self.config.pkg_fname, url, "--acl 'public-read' --quiet"])
136 |         self._deploy_other_files(url)
137 | 
138 |     def all(self):
139 |         self.html()
140 |         self.pdf()
141 |         self.pkg()
142 | 


--------------------------------------------------------------------------------
/docs/user/deploy.md:
--------------------------------------------------------------------------------
  1 | # Deploying
  2 | 
  3 | You can copy the built result to any of your favorite places that can serve content online. Otherwise `d2lbook` provides two ways to deploy your build results: deploying through [Github](http://github.com/) or through [AWS](https://aws.amazon.com/).
  4 | 
  5 | ## Deploying Through Github
  6 | 
  7 | [Github Pages](https://pages.github.com/) allow us to host a website through a Github repo. To do so, we first need to create a github repo, for example we created [d2l-ai/d2l-book-deploy-demo](https://github.com/d2l-ai/d2l-book-deploy-demo) for this example. Then enable serving from the master branch in `Settings -> GitHub Pages`. You will get a URL to access it. It is [d2l-ai.github.io/d2l-book-deploy-demo](https://d2l-ai.github.io/d2l-book-deploy-demo/) for this example. You can add anything to `README.md`, which will not show on the website.
  8 | 
  9 | ![Enable serving from master branch at Github](../img/github_pages.png)
 10 | :width:`400px`
 11 | 
 12 | Now let's create a project with `[deploy] github_repo` specified and build both HTML and PDF. You will see a large amount of logging information thanks to LaTeX,  more exactly, `xelatex`.
 13 | 
 14 | ```{.python .input}
 15 | !mkdir -p deploy
 16 | ```
 17 | 
 18 | ```{.python .input}
 19 | %%writefile deploy/index.md
 20 | # Deploying Demo for d2lbook
 21 | 
 22 | This is a demo to deploy on Github.
 23 | 
 24 | ````toc
 25 | get_started
 26 | ````
 27 | ```
 28 | 
 29 | ```{.python .input}
 30 | %%writefile deploy/get_started.md
 31 | # Getting Started
 32 | 
 33 | Please first install my favorite package `numpy`.
 34 | ```
 35 | 
 36 | ```{.python .input}
 37 | %%writefile deploy/config.ini
 38 | [project]
 39 | name = deply-demo
 40 | 
 41 | [html]
 42 | header_links = PDF, https://https://d2l-ai.github.io/d2l-book-deploy-demo/deply-demo.pdf, fas fa-file-pdf
 43 | 
 44 | [deploy]
 45 | github_repo = d2l-ai/d2l-book-deploy-demo
 46 | ```
 47 | 
 48 | ```{.python .input}
 49 | !cd deploy; d2lbook build html pdf
 50 | ```
 51 | 
 52 | To deploy to Github, you need to have your machine's [SSH key imported to Github](https://github.com/settings/keys). Otherwise, you may need to type in your account and password. When it is done, you can assess the results online in one or two minutes. For this example, the URL is [/d2l-ai.github.io/d2l-book-deploy-demo/](https://d2l-ai.github.io/d2l-book-deploy-demo/).
 53 | 
 54 | ```{.python .input}
 55 | !cd deploy; d2lbook deploy html pdf
 56 | ```
 57 | 
 58 | Lastly, let's clean our workspace.
 59 | 
 60 | ```{.python .input}
 61 | !rm -rf deploy
 62 | ```
 63 | 
 64 | ## Deploying Through AWS
 65 | 
 66 | Another supported option is deploying through AWS. This option provides more flexibility but requires you to know the basic usage of AWS.
 67 | 
 68 | We recommend the following procedure for the deployment:
 69 | 
 70 | 1. Copying results into [S3](https://aws.amazon.com/s3/).
 71 | 2. Serving with a [CDN](https://en.wikipedia.org/wiki/Content_delivery_network) by using [CloudFront](https://aws.amazon.com/cloudfront/) to reduce the latency.
 72 | 3. Buying and adding a customized domain at [Route 53](https://aws.amazon.com/route53/)
 73 | 
 74 | Now let's walk through these three steps one-by-one. Before getting started, you need to have a valid AWS account and type in your [AWS access key](https://docs.aws.amazon.com/IAM/latest/UserGuide/id_credentials_access-keys.html) by running `aws configure`.
 75 | 
 76 | First, create a S3 bucket. You can use the targeted domain name as your bucket name. For example, this site is deployed at book.d2l.ai, then we created a bucket with the name `book.d2l.ai`. In addition, we need to disable blocking public access through ACLs when creating this bucket, see :numref:`fig_s3-acl` and enable static website hosting, see :numref:`fig_s3-web-hosting`. Then you will get a URL to access this bucket. In our example, it's http://book.d2l.ai.s3-website-us-west-2.amazonaws.com/.
 77 | 
 78 | ![Disable blocking public access through ACLs](../img/s3-acl.png)
 79 | 
 80 | :label:`fig_s3-acl`
 81 | :width:`500px`
 82 | 
 83 | ![Enable static web hosting](../img/s3-web-hosting.png)
 84 | 
 85 | :label:`fig_s3-web-hosting`
 86 | :width:`500px`
 87 | 
 88 | Second, create a new CloudFront distribution, by specifying the following options:
 89 | - Origin Domain Name: the previous S3 URL without `http://`, e.g. `book.d2l.ai.s3-website-us-west-2.amazonaws.com`
 90 | - Default Root Object: `index.html`
 91 | - [optional] Alternate Domain Names (CNAMEs): your target domain name, e.g. `book.d2l.ai`
 92 | - [optional] SSL Certificate: you can create one in [Certificate Manager](https://aws.amazon.com/certificate-manager/) and then select it.
 93 | 
 94 | After a few minute, we will obtain the domain name such as `d1y0wi2ibwmzrt.cloudfront.net`.
 95 | 
 96 | Third, you can either buy a domain at Route 53 or add Route 53's DNS into your existing domain. After that, create a "Hosted Zone" and then a Type-A Record Set with the CoundFront domain name as the alias target, see :numref:`fig_record-set` for an example.
 97 | 
 98 | ![Create a Record Set](../img/record-set.png)
 99 | 
100 | :label:`fig_record-set`
101 | :width:`350px`
102 | 
103 | Once you are done, specify your S3 bucket name in the following `config.ini` entry.
104 | 
105 | ```bash
106 | [deploy]
107 | s3_bucket = s3://book.d2l.ai
108 | ```
109 | 
110 | Each time you run `d2lbook deploy`, all results will be synchronized to this bucket and deployed automatically. Note that since we enabled a CDN, any new change may take a while to be shown in your URL (e.g. http://book.d2l.ai). But you can check the S3 bucket URL (e.g. http://book.d2l.ai.s3-website-us-west-2.amazonaws.com) to review the changes immediately.
111 | 


--------------------------------------------------------------------------------
/d2lbook/config.py:
--------------------------------------------------------------------------------
  1 | import configparser
  2 | import os
  3 | import logging
  4 | 
  5 | class Config():
  6 |     def __init__(self, tab=None, config_fname='config.ini'):
  7 |         config = configparser.ConfigParser()
  8 |         default_config_name = os.path.join(
  9 |             os.path.dirname(__file__), 'config_default.ini')
 10 |         config.read(default_config_name, encoding='UTF-8')
 11 |         if os.path.exists(config_fname):
 12 |             logging.info('Load configure from %s', config_fname)
 13 |             config.read(config_fname, encoding='UTF-8')
 14 |         tabs = config['build']['tabs']
 15 |         self.tabs = [tab.strip() for tab in tabs.lower().split(',')] if tabs else []
 16 |         self.default_tab = self.tabs[0] if self.tabs else None
 17 |         self.tab = tab.lower() if tab else self.default_tab
 18 |         if self.tab:
 19 |             assert self.tabs, 'No tabs is specified'
 20 |             if self.tab != 'all':
 21 |                 assert self.tab in tabs, self.tab + ' is not found in tabs, which are ' + tabs
 22 |         self.translation = config['translation']
 23 |         self.build = config['build']
 24 |         self.deploy = config['deploy']
 25 |         self.project = config['project']
 26 |         self.html = config['html']
 27 |         self.pdf = config['pdf']
 28 |         self.slides = config['slides']
 29 |         self.library = dict(config['library'].items())
 30 |         for tab in self.tabs:
 31 |             if f'library-{tab}' in config:
 32 |                 self.library[tab] = dict(config[f'library-{tab}'].items())
 33 |         self.colab = config['colab']
 34 |         self.sagemaker = config['sagemaker']
 35 | 
 36 |         # A bunch of directories
 37 |         self.src_dir = self.build['source_dir']
 38 |         self.tgt_dir = self.build['output_dir']
 39 |         self.eval_dir = os.path.join(self.tgt_dir, 'eval')
 40 |         self.ipynb_dir = os.path.join(self.tgt_dir, 'ipynb')
 41 |         self.rst_dir = os.path.join(self.tgt_dir, 'rst')
 42 |         try:
 43 |             self.html_dir = self.build['html_dir']
 44 |         except KeyError:
 45 |             self.html_dir = os.path.join(self.tgt_dir, 'html')
 46 |         # MM20200104 changed to allow separate html_dir to be specified in config.ini, e.g. put 'html_dir = docs' in the [build] section
 47 |         self.pdf_dir = os.path.join(self.tgt_dir, 'pdf')
 48 |         self.colab_dir = os.path.join(self.tgt_dir, 'colab')
 49 |         self.sagemaker_dir = os.path.join(self.tgt_dir, 'sagemaker')
 50 |         self.linkcheck_dir = os.path.join(self.tgt_dir, 'linkcheck')
 51 |         self.slides_dir = os.path.join(self.tgt_dir, 'slides')
 52 | 
 53 |         self._set_target()
 54 | 
 55 |         # The project must have an index page
 56 |         index_fname, ext = os.path.splitext(self.build['index'])
 57 |         if ext and ext != '.md':
 58 |             logging.info('Ignore the file extesion, %s, specified by index in %s',
 59 |                          ext, config_fname)
 60 |         index_fname = os.path.join(self.src_dir, index_fname+'.md')
 61 |         if not os.path.exists(index_fname):
 62 |             logging.fatal('Failed to find the index file: %s', index_fname)
 63 |             exit(-1)
 64 | 
 65 |         if not self.project['title']:
 66 |             # Infer the book title from the index page
 67 |             with open(index_fname, 'r') as f:
 68 |                 for line in f:
 69 |                     line = line.strip()
 70 |                     if line:
 71 |                         if line.startswith('#'):
 72 |                             line = line[1:]
 73 |                         self.project['title'] = line.strip()
 74 |                         break
 75 | 
 76 |         self.set_tab(self.tab)
 77 |         # Sanity checks.
 78 |         self.sanity_check()
 79 | 
 80 |     def _set_target(self):
 81 |         # Some targets names.
 82 |         self.tex_fname = os.path.join(self.pdf_dir, self.project['name']+'.tex')
 83 |         self.pkg_fname = os.path.join(self.tgt_dir, self.project['name']+'.zip')
 84 | 
 85 |     def sanity_check(self):
 86 |         notebook_patterns = self.build['notebooks'].split()
 87 |         for p in notebook_patterns:
 88 |             assert p.endswith('md'), '`notebooks` patterns must end with `md`' \
 89 |                    ' in `config.init`. Examples: `notebooks = *.md */*.md`.'
 90 | 
 91 |         rst_patterns = self.build['rsts'].split()
 92 |         for p in rst_patterns:
 93 |             assert p.endswith('rst'), '`rsts` patterns must end with `rst`' \
 94 |                     ' in `config.init`. Examples: `rsts = index.rst' \
 95 |                     ' api/**/*.rst`.'
 96 | 
 97 | 
 98 |     def iter_tab(self, fn):
 99 |         if self.tab == 'all':
100 |             for tab in self.tabs:
101 |                 self.set_tab(tab)
102 |                 fn()
103 |             self.set_tab('all')
104 |         else:
105 |             fn()
106 | 
107 |     def set_tab(self, tab):
108 |         if tab:
109 |             assert tab in self.tabs + ['all'], f"{tab} doesn't exist in {self.tabs}"
110 |         self.tab = tab
111 |         self.eval_dir = self._set_tab_dir(self.eval_dir, tab)
112 |         self.ipynb_dir = self._set_tab_dir(self.ipynb_dir, tab)
113 |         self.rst_dir = self._set_tab_dir(self.rst_dir, tab)
114 |         self.pdf_dir = self._set_tab_dir(self.pdf_dir, tab)
115 |         self.colab_dir = self._set_tab_dir(self.colab_dir, tab)
116 |         self.sagemaker_dir = self._set_tab_dir(self.sagemaker_dir, tab)
117 |         self.slides_dir = self._set_tab_dir(self.slides_dir, tab)
118 |         self._set_target()
119 | 
120 | 
121 |     def _default_tab_dir(self, dirname):
122 |         tokens = dirname.split('/')
123 |         if self.tabs and '_' in tokens[-1]:
124 |             tokens[-1] =  '_'.join(tokens[-1].split('_')[:-1])
125 |             return '/'.join(tokens)
126 |         return dirname
127 | 
128 |     def _set_tab_dir(self, dirname, tab):
129 |         dirname = self._default_tab_dir(dirname)
130 |         if tab == self.default_tab:
131 |             return dirname
132 |         return dirname + '_' + tab
133 | 


--------------------------------------------------------------------------------
/d2lbook/notebook_test.py:
--------------------------------------------------------------------------------
  1 | from d2lbook import notebook
  2 | from d2lbook import build
  3 | from d2lbook import common
  4 | import unittest
  5 | import nbconvert
  6 | 
  7 | # 8 blocks:
  8 | # 0: markdown
  9 | # 1: markdown python2
 10 | # 2: markdown
 11 | # 3: markdown python2
 12 | # 4: markdown python3
 13 | # 5: code default
 14 | # 6: code python2
 15 | # 7: markdown 
 16 | _markdown_src = r'''# Test
 17 | 
 18 | first para
 19 | 
 20 | :begin_tab:`python2`
 21 | python is good
 22 | :end_tab:
 23 | 
 24 | another para
 25 | 
 26 | This is :eqref:`sec_1`
 27 | 
 28 | :begin_tab:`python2`
 29 | ```python2
 30 | 1+2+3
 31 | ```
 32 | :end_tab:
 33 | 
 34 | :begin_tab:`python3`
 35 | python3 is better
 36 | 
 37 | - here
 38 | - haha
 39 | 
 40 | :end_tab:
 41 | 
 42 | ```{.input .python}
 43 | 1+2+3
 44 | ```
 45 | 
 46 | ```{.input .python}
 47 | #@tab python2
 48 | 1+2+3
 49 | ```
 50 | 
 51 | ```bash
 52 | ````
 53 | $ ls
 54 | ````
 55 | ```
 56 | '''
 57 | 
 58 | _multi_tab_cell = r'''# Test
 59 | 
 60 | ```{.input .python}
 61 | #@tab python2, python3
 62 | 1+2
 63 | ```
 64 | 
 65 | The end
 66 | '''
 67 | 
 68 | _all_tab_cell = r'''# Test
 69 | 
 70 | ```{.input .python}
 71 | #@tab all
 72 | 1+2
 73 | ```
 74 | 
 75 | Split
 76 | 
 77 | ```{.input .python}
 78 | #@tab python2,python4
 79 | 1122
 80 | ```
 81 | 
 82 | :begin_tab:`python2,python3`
 83 | Here
 84 | :end_tab:
 85 | '''
 86 | 
 87 | class TestNotebook(unittest.TestCase):
 88 | 
 89 |     def test_split_markdown_cell(self):
 90 |         nb = notebook.read_markdown(_markdown_src)
 91 |         new_nb = notebook.split_markdown_cell(nb)
 92 |         cells = new_nb.cells
 93 |         self.assertEqual(len(cells), 8)
 94 |         self.assertEqual(cells[0].cell_type, 'markdown')
 95 |         self.assertEqual(cells[1].cell_type, 'markdown')
 96 |         self.assertEqual(cells[1].metadata['tab'], ['python2'])
 97 |         self.assertEqual(cells[2].cell_type, 'markdown')
 98 |         self.assertEqual('tab' in cells[2].metadata, False)
 99 |         self.assertEqual(cells[3].metadata['tab'], ['python2'])
100 |         self.assertEqual(cells[4].metadata['tab'], ['python3'])
101 |         self.assertEqual(cells[5].cell_type, 'code')
102 |         self.assertEqual(cells[6].cell_type, 'code')
103 | 
104 |     def test_get_tab_notebook(self):
105 |         nb = notebook.split_markdown_cell(notebook.read_markdown(_markdown_src))
106 |         new_nb = notebook.get_tab_notebook(nb, tab='python2', default_tab='python3')
107 |         cells = new_nb.cells
108 |         self.assertEqual(cells[0].cell_type, 'markdown')
109 |         self.assertEqual(cells[1].cell_type, 'markdown')
110 |         self.assertEqual(cells[1].metadata['tab'], ['python2'])
111 |         self.assertEqual(cells[2].cell_type, 'markdown')
112 |         self.assertEqual('tab' in cells[2].metadata, False)
113 |         self.assertEqual(cells[3].metadata['tab'], ['python2'])
114 |         self.assertEqual(cells[4].cell_type, 'code')
115 |         self.assertEqual(cells[4].metadata['tab'], ['python2'])
116 |         self.assertEqual(len(cells), 6)
117 | 
118 |         new_nb = notebook.get_tab_notebook(nb, tab='python3', default_tab='python3')
119 |         cells = new_nb.cells
120 |         self.assertEqual(cells[3].metadata['tab'], ['python3'])
121 |         self.assertEqual(len(cells), 5)
122 | 
123 |         nb = notebook.read_markdown(_multi_tab_cell)
124 |         cells = notebook.get_tab_notebook(nb, tab='python2', default_tab='python3').cells
125 |         self.assertEqual(len(cells), 3)
126 |         self.assertEqual(cells[1].metadata['tab'], ['python2'])
127 | 
128 |         cells = notebook.get_tab_notebook(nb, tab='python3', default_tab='python3').cells
129 |         self.assertEqual(len(cells), 3)
130 |         self.assertEqual(cells[1].metadata['tab'], ['python3'])
131 | 
132 |     def _split_and_merge(self, nb, tabs):        
133 |         split_nb = [notebook.get_tab_notebook(nb, tab, tabs[0]) for tab in tabs]
134 |         merged_nb = notebook.merge_tab_notebooks(split_nb)
135 |         return split_nb, merged_nb
136 | 
137 |     def test_merge_tab_notebooks(self):
138 |         nb = notebook.split_markdown_cell(notebook.read_markdown(_markdown_src))
139 |         _, new_nb = self._split_and_merge(nb, ['python3', 'python2'])
140 |         self.assertEqual(len(nb.cells), len(new_nb.cells))
141 |         for cell, new_cell in zip(nb.cells, new_nb.cells):
142 |             if new_cell.source != cell.source:
143 |                 self.assertTrue(new_cell.source in cell.source)
144 | 
145 |     def test_add_html_tab(self):
146 |         nb = notebook.split_markdown_cell(notebook.read_markdown(_markdown_src))
147 |         _, new_nb = self._split_and_merge(nb, ['python3', 'python2'])
148 |         new_nb = notebook.add_html_tab(new_nb, tabs=['python3', 'python2'])
149 |         cells = new_nb.cells
150 |         self.assertEqual(len(cells), 18)
151 |         self.assertRegex(cells[1].source, 'mdl-js-tabs')
152 |         self.assertRegex(cells[2].source, 'mdl-tabs__panel.*python2')
153 |         self.assertRegex(cells[4].source, '</div>')
154 |         self.assertRegex(cells[5].source, '</div>')
155 |         self.assertRegex(cells[8].source, 'mdl-tabs__panel.*python3')
156 |         self.assertRegex(cells[12].source, 'mdl-tabs__panel.*python2')
157 | 
158 |         nb = notebook.split_markdown_cell(notebook.read_markdown(_all_tab_cell))        
159 |         _, new_nb = self._split_and_merge(nb, ['python3', 'python2', 'python4']) 
160 |         cells = new_nb.cells
161 |         self.assertEqual(len(cells), 5)
162 |         self.assertEqual(cells[4].metadata['tab'], ['python3', 'python2'])
163 | 
164 |         new_nb = notebook.add_html_tab(new_nb, tabs=['python3', 'python2', 'python4'])
165 |         cells = new_nb.cells
166 |         self.assertEqual(len(cells), 15)
167 |         self.assertRegex(cells[3].source, 'mdl-js-tabs')
168 |         self.assertRegex(cells[4].source, 'mdl-tabs__panel.*python3')
169 |         self.assertRegex(cells[7].source, 'mdl-tabs__panel.*python2')
170 |         self.assertRegex(cells[11].source, 'mdl-tabs__panel.*python4')
171 |         
172 | 
173 | if __name__ == '__main__':
174 |     unittest.main()
175 | 


--------------------------------------------------------------------------------
/d2lbook/utils.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import glob
  3 | import shutil
  4 | import logging
  5 | 
  6 | def rm_ext(filename):
  7 |     return os.path.splitext(filename)[0]
  8 | 
  9 | def find_files(pattern, root=None, excluded_pattern=None):
 10 |     fnames = []
 11 |     patterns = pattern.split()
 12 |     for p in patterns:
 13 |         if root is not None:
 14 |             p = os.path.join(root, p)
 15 |         if os.path.isdir(p):
 16 |             p = os.path.join(p, '**')
 17 |         for fn in glob.glob(p, recursive=True):
 18 |             if os.path.isfile(fn):
 19 |                 fnames.append(fn)
 20 |     if not excluded_pattern:
 21 |         return fnames
 22 |     excluded_fnames = find_files(excluded_pattern, root)
 23 |     return [fn for fn in fnames if fn not in excluded_fnames]
 24 | 
 25 | def get_mtimes(fnames):
 26 |     if isinstance(fnames, str):
 27 |         return os.path.getmtime(fnames)
 28 |     return [os.path.getmtime(fn) for fn in fnames]
 29 | 
 30 | def split_fname(fname, base_dir, ext=None):
 31 |     fname = os.path.relpath(fname, base_dir)
 32 |     base, fext = os.path.splitext(fname)
 33 |     if fext.startswith('.'):
 34 |         fext = fext[1:]
 35 |     if ext and ext != fext:
 36 |         logging.warn("%s doesn't have extension %s", fname, ext)
 37 |     return base, fext
 38 | 
 39 | def get_tgt_fname(src_dir, src_fname, tgt_dir, src_ext, tgt_ext):
 40 |     fname, ext = split_fname(src_fname, src_dir, src_ext)
 41 |     if tgt_ext:
 42 |         ext = tgt_ext
 43 |     return os.path.join(tgt_dir, fname+'.'+ext)
 44 | 
 45 | def get_updated_files(src_fnames, src_dir, tgt_dir,
 46 |                       src_ext=None, tgt_ext=None, deps_mtime=0):
 47 |     updated_fnames = []
 48 |     for src_fn in src_fnames:
 49 |         tgt_fn = get_tgt_fname(src_dir, src_fn, tgt_dir, src_ext, tgt_ext)
 50 |         if (not os.path.exists(tgt_fn) # new
 51 |             or get_mtimes(src_fn) > get_mtimes(tgt_fn) # target is old
 52 |             or get_mtimes(tgt_fn) < deps_mtime): # deps is updated
 53 |             updated_fnames.append((src_fn, tgt_fn))
 54 |     return list(set(updated_fnames))
 55 | 
 56 | 
 57 | def get_tgt_files_from_src_pattern(pattern, tgt_dir, src_ext, tgt_ext):
 58 |     """Get files with tgt_ext in tgt_dir according to pattern with src_ext"""
 59 |     patterns = pattern.split()
 60 |     for i, p in enumerate(patterns):
 61 |         f, ext = os.path.splitext(p)
 62 |         if src_ext and ext == '.' + src_ext and tgt_ext:
 63 |             patterns[i] = f + '.' + tgt_ext
 64 |     return find_files(' '.join(patterns), tgt_dir)
 65 | 
 66 | 
 67 | def get_files_to_rm(pattern, src_dir, tgt_dir, src_ext=None, tgt_ext=None):
 68 |     """Return files under tgt_dir whose corresponding src file is removed under src_dir."""
 69 |     tgt_files = get_tgt_files_from_src_pattern(pattern, tgt_dir, src_ext, tgt_ext)
 70 |     to_removes = []
 71 |     for tgt_fn in tgt_files:
 72 |         # If tgt_ext is provided, only files with tgt_ext in tgt_dir are
 73 |         # considered being removed. Note that ipynb to rst may generate svg
 74 |         # files, which should not be removed though these svg files do not have
 75 |         # corresponding files in src_dir
 76 |         if tgt_ext:
 77 |             fext = os.path.splitext(tgt_fn)[1]
 78 |             if fext.startswith('.'):
 79 |                 fext = fext[1:]
 80 |             if tgt_ext != fext:
 81 |                 continue
 82 |         # By switching args, it actually get_src_fname.
 83 |         src_fn = get_tgt_fname(tgt_dir, tgt_fn, src_dir, tgt_ext, src_ext)
 84 |         if not os.path.exists(src_fn):
 85 |             to_removes.append(tgt_fn)
 86 |     return to_removes
 87 | 
 88 | 
 89 | def rm_empty_dir(path, rmed_empty_dirs):
 90 |     """Recursively remove empty directories under and including path."""
 91 |     if not os.path.isdir(path):
 92 |         return
 93 | 
 94 |     fnames = os.listdir(path)
 95 |     if len(fnames) > 0:
 96 |         for fn in fnames:
 97 |             fpath = os.path.join(path, fn)
 98 |             if os.path.isdir(fpath):
 99 |                 rm_empty_dir(fpath, rmed_empty_dirs)
100 | 
101 |     if len(os.listdir(path)) == 0:
102 |         rmed_empty_dirs.append(str(path))
103 |         os.rmdir(path)
104 | 
105 | def hide_individual_data_files(fns):
106 |     """To display concisely: _build/eval/data/A/B/C/D -> _build/eval/data/A."""
107 |     concise_fns = set()
108 |     for fn in fns:
109 |         concise_fn = []
110 |         fn_components = fn.split('/')
111 |         i = 0
112 |         seen_data = False
113 |         while i < len(fn_components) and not seen_data:
114 |             component = fn_components[i]
115 |             concise_fn.append(component)
116 |             if component == 'data':
117 |                 seen_data = True
118 |             i += 1
119 |         if i < len(fn_components) - 1:
120 |             next_component = fn_components[i + 1]
121 |             if next_component.isdigit():
122 |                 concise_fn.append('<some digit>')
123 |             else:
124 |                 concise_fn.append(next_component)
125 |             if i < len(fn_components) - 2:
126 |                 concise_fn.append('')  # For indicating dir instead of file
127 |         concise_fns.add('/'.join(concise_fn))
128 |     return concise_fns
129 | 
130 | def mkdir(dirname):
131 |     os.makedirs(dirname, exist_ok=True)
132 | 
133 | 
134 | def copy(src, tgt):
135 |     mkdir(os.path.dirname(tgt))
136 |     shutil.copy(src, tgt)
137 | 
138 | 
139 | def get_time_diff(tik, tok):
140 |     h, remainder = divmod((tok - tik).seconds, 3600)
141 |     m, s = divmod(remainder, 60)
142 |     return "%02d:%02d:%02d" % (h, m, s)
143 | 
144 | def run_cmd(cmd, verbose=False):
145 |     if isinstance(cmd, str):
146 |         cmd = [cmd]
147 |     cmd = ' '.join(cmd)
148 |     if verbose:
149 |         logging.info('Run "%s"', cmd)
150 |     ret = os.system(cmd)
151 |     if ret != 0:
152 |         exit(-1)
153 | 
154 | def split_config_str(config_str, num_items_per_line=None):
155 |     items = []
156 |     if not config_str:
157 |         return items
158 |     lines = config_str.split('\n')
159 |     for i, line in enumerate(lines):
160 |         items.append([tk.strip() for tk in line.split(',') if tk.strip()])
161 |         if num_items_per_line and len(items[-1]) != num_items_per_line:
162 |             logging.fatal("The items in %d-th line (%d) doesn't"
163 |                           " match the required (%d)"%(i, len(items[-1]), num_items_per_line))
164 |             logging.fatal("The raw string is:\n"+config_str)
165 |     return items
166 | 


--------------------------------------------------------------------------------
/docs/user/markdown.md:
--------------------------------------------------------------------------------
  1 | # Markdown Cells
  2 | :label:`sec_markdown`
  3 | 
  4 | 
  5 | The `d2lbook` provide additional features beyond the normal markdown supports in
  6 | Jupyter.
  7 | 
  8 | ## Table of Contents
  9 | 
 10 | You can use a `toc` code block to specify the table of contents.
 11 | Here `:maxdepth: 2` means display two levels of files, and `:numbered:` means
 12 | adding number to each section (default is not enabled). Also note that you don't
 13 | need to specify the file extension.
 14 | 
 15 | `````
 16 | ```toc
 17 | :maxdepth: 2
 18 | :numbered:
 19 | 
 20 | guide/index
 21 | ```
 22 | `````
 23 | 
 24 | 
 25 | ## Images
 26 | 
 27 | 
 28 | We can put the image caption in `[]`. In addition, we can use
 29 | `:width:` followed by its value in an inline block to specify the image width,
 30 | similarly use `:height:`for height.
 31 | 
 32 | ```
 33 | ![Estimating the length of a foot](../img/koebel.jpg)
 34 | :width:`400px`
 35 | ```
 36 | 
 37 | 
 38 | ![Estimating the length of a foot](../img/koebel.jpg)
 39 | :width:`400px`
 40 | 
 41 | 
 42 | ### SVG Images
 43 | 
 44 | We recommend you to use SVG images as much as you can. It is sharp and its size
 45 | is small. But since Latex doesn't support SVG images, if you want to build a PDF
 46 | output, you need to install `rsvg-convert`. On Macos, you can simply
 47 | `brew install librsvg` or `sudo apt-get install librsvg2-bin` for Ubuntu.
 48 | 
 49 | ![A LSTM cell in SVG](../img/lstm.svg)
 50 | 
 51 | ## Tables
 52 | 
 53 | You can insert table caption before the table by starting it with a `:`. Note
 54 | that you need to leave an empty line between the caption and the table itself.
 55 | 
 56 | ```
 57 | : The number is computed by $z_{ij} = \sum_{k}x_{ik}y_{kj}$.
 58 | 
 59 | | Year | Number | Comment |
 60 | | ---  | --- | --- |
 61 | | 2018 | 100 | Good year |
 62 | | 2019 | 200 | Even better, add something to make this column wider |
 63 | ```
 64 | 
 65 | 
 66 | : The number is computed by $z_{ij} = \sum_{k}x_{ik}y_{kj}$.
 67 | 
 68 | | Year | Number | Comment |
 69 | | ---  | --- | --- |
 70 | | 2018 | 100 | Good year |
 71 | | 2019 | 200 | Even better, add something to make this column wider  |
 72 | 
 73 | If the Table caption number doesn't show properly, you may need to update
 74 | `pandoc` to the latest version.
 75 | 
 76 | ## Cross References
 77 | 
 78 | We often want to reference sections, figures, tables and equations in a book.
 79 | 
 80 | ### Referencing Sections
 81 | :label:`my_sec3`
 82 | 
 83 | We can put a label immediately after the section title to allow this section to
 84 | be referenced by its label. The label format is
 85 | `:label:` followed by its label name in an inline code block.
 86 | 
 87 | ```
 88 | ### Referencing Sections
 89 | :label:`my_sec3`
 90 | ```
 91 | 
 92 | 
 93 | Then we can reference this section through `:ref:` followed by label name in an
 94 | inline code block
 95 | 
 96 | ```
 97 | :ref:`my_sec3` demonstrates how to reference a section.
 98 | ```
 99 | 
100 | 
101 | :ref:`my_sec3` demonstrates how to reference a section.
102 | 
103 | 
104 | Note that it displays the referenced section title with a clickable link. We can
105 | also use a numbered version by changing `:num:` to `:numref:`, e.g. :numref:`my_sec3`.
106 | 
107 | If the label is incorrect, say we put `my_sec2` here, the build log will
108 | contains a warning such as
109 | 
110 | ```
111 | WARNING: undefined label: my_sec2
112 | ```
113 | 
114 | 
115 | You can turn it into error by setting `warning_is_error = True` in
116 | `config.ini`.
117 | 
118 | Besides, we can cross
119 | reference label from other files as well, e.g. :numref:`sec_code`. This applies
120 | to figures, tables and equations as well.
121 | 
122 | 
123 | ### Referencing Images
124 | 
125 | Similarly we can label an image and reference it later.
126 | 
127 | ```
128 | ![A nice image with a cat and a dog.](../img/catdog.jpg)
129 | :width:`300px`
130 | :label:`img_catdog`
131 | 
132 | As can be seen from :numref:`img_catdog`,
133 | ```
134 | 
135 | 
136 | ![A nice image with a cat and a dog.](../img/catdog.jpg)
137 | :width:`300px`
138 | :label:`img_catdog`
139 | 
140 | As can be seen from :numref:`img_catdog`, there is a cat and a dog.
141 | 
142 | ### Referencing Tables
143 | 
144 | ```
145 | :This a is very long table caption. It will breaks into several lines. And
146 | contains a math equation as well. $z_{ij} = \sum_{k}x_{ik}y_{kj}$.
147 | 
148 | | Year | Number | Comment |
149 | | ---  | --- | --- |
150 | | 2018 | 100 | Good year |
151 | :label:`table`
152 | 
153 | Refer to :numref:`table`
154 | 
155 | ```
156 | 
157 | 
158 | :This a is very long table caption. It will breaks into several lines. And
159 | contains a math equation as well. $z_{ij} = \sum_{k}x_{ik}y_{kj}$.
160 | 
161 | | Year | Number | Comment |
162 | | ---  | --- | --- |
163 | | 2018 | 100 | Good year |
164 | :label:`table`
165 | 
166 | Refer to :numref:`table`
167 | 
168 | ### Referencing Equations
169 | 
170 | The difference here is that we need to use `eqlabel` instead of `label`. For
171 | example
172 | 
173 | ```
174 | $$\hat{\mathbf{y}}=\mathbf X \mathbf{w}+b$$
175 | :eqlabel:`linear`
176 | 
177 | 
178 | In :eqref:`linear`, we define the linear model.
179 | ```
180 | 
181 | 
182 | $$\hat{\mathbf{y}}=\mathbf X \mathbf{w}+b$$
183 | :eqlabel:`linear`
184 | 
185 | In :eqref:`linear`, we define the linear model.
186 | 
187 | 
188 | ## Citations
189 | 
190 | First put your bib file at somewhere. All references will be displayed on the
191 | place where it inserted in HTML. But in PDF, all references will be moved to end of
192 | the document. Then we can cite a paper through `:cite:`. Multipel papers can be
193 | separated by commans (note there should be no space)
194 | 
195 | ```
196 | 
197 | The breakthrough of deep learning origins from :cite:`krizhevsky2012imagenet` for...
198 | 
199 | Two keys together :cite:`he2016deep,devlin2018bert`...
200 | 
201 | :bibliography:`../refs.bib`
202 | ```
203 | 
204 | 
205 | The breakthrough of deep learning origins from :cite:`krizhevsky2012imagenet` for
206 | computer vision, there is a rich of following up works, such as
207 | :cite:`he2016deep`. NLP is catching up as well, the recent work
208 | :cite:`devlin2018bert` shows significant improvements.
209 | 
210 | Two keys together :cite:`he2016deep,devlin2018bert`. Single author
211 | :cite:`mitchell80`, two authors :cite:`Newell81`
212 | 
213 | 
214 | Note that :cite: is the same as `\citep` in LaTeX. To use `\citet` in LaTeX, just use :citet:.
215 | For example:
216 | 
217 | ```
218 | Two keys together :citet:`he2016deep,devlin2018bert`. Single author
219 | :citet:`mitchell80`, two authors :citet:`Newell81`
220 | ```
221 | 
222 | Two keys together :citet:`he2016deep,devlin2018bert`. Single author
223 | :citet:`mitchell80`, two authors :citet:`Newell81`
224 | 
225 | 
226 | ## References
227 | 
228 | :bibliography:`../refs.bib`
229 | 


--------------------------------------------------------------------------------
/d2lbook/config_default.ini:
--------------------------------------------------------------------------------
  1 | # A default configuration to create a book with d2lbook.
  2 | 
  3 | [project]
  4 | 
  5 | # The project name, used as the filename of the package and the PDF file. For
  6 | # example, if set to d2l-book, then will build d2l-book.zip and d2l-book.pdf
  7 | name = d2lbook
  8 | 
  9 | # Book title. It will be displayed on the top-right of the HTML page and the
 10 | # front page of the PDF file
 11 | title =
 12 | 
 13 | # All author names
 14 | author = The contributors
 15 | 
 16 | # Current release version
 17 | release = 0.0.1
 18 | 
 19 | # The copyright
 20 | copyright =
 21 | 
 22 | # The langunage, such as en, zh, ja, ...
 23 | lang =
 24 | 
 25 | [translation]
 26 | 
 27 | origin_repo =
 28 | origin_lang =
 29 | 
 30 | # aws or gcp
 31 | translator =
 32 | terminology =
 33 | 
 34 | 
 35 | [build]
 36 | 
 37 | # The root page
 38 | index = index
 39 | 
 40 | # A list of wildcards to indicate the markdown files that need to be evaluated as
 41 | # Jupyter notebooks.
 42 | notebooks = **/*.md
 43 | 
 44 | # A list of wildcards to indicate the rsts files that need to be included
 45 | rsts =
 46 | 
 47 | # A list of markdown files that will be included but not evaluated as Jupyter
 48 | # notebooks. They are included in the eval outputs as markdown files (e.g.,
 49 | # _build/eval) but not in the rst, pdf, html outputs (e.g., _build/rst etc.)
 50 | non-notebooks =
 51 | 
 52 | # Files that will be skipped.
 53 | exclusions =
 54 | 
 55 | # A list of files, if anyone is modified after the last build, will re-build all
 56 | # documents.
 57 | dependencies =
 58 | 
 59 | # A list of files that will be copied to the build folder.
 60 | resources =
 61 | 
 62 | # If True (default), then will evaluate the notebook to obtain outputs.
 63 | eval_notebook = True
 64 | 
 65 | 
 66 | # Source directory
 67 | source_dir = .
 68 | 
 69 | # Output directory
 70 | output_dir = _build
 71 | 
 72 | # If True, the mark the build as failed for any warning. Default is False.
 73 | warning_is_error = False
 74 | 
 75 | # Additional Sphinx extensions
 76 | sphinx_extensions =
 77 | 
 78 | # Additional Sphinx configuration options
 79 | sphinx_configs =
 80 | 
 81 | # Specify the tabs, seperated by ",". The first one will be the default tab.
 82 | tabs =
 83 | 
 84 | # The original Github repository, such as d2l-ai/d2l-en, if this project is a
 85 | # langunage translation from the source repository.
 86 | origin_repo =
 87 | 
 88 | [html]
 89 | 
 90 | # A list of links that is displayed on the navbar. Each line contains a link, a
 91 | # link consists of three items: name, URL, and a fontawesome icon
 92 | # (https://fontawesome.com/icons?d=gallery). Items are seperated by ,
 93 | header_links =
 94 | 
 95 | # The filename of the favicon
 96 | favicon =
 97 | 
 98 | # The filename of the html logo
 99 | html_logo =
100 | 
101 | # A list of CSS files to be included
102 | include_css =
103 | 
104 | # A list of JS files to be included
105 | include_js =
106 | 
107 | [pdf]
108 | 
109 | 
110 | # The main font. In default it's FreeSerif. If you want a bolder font, you can
111 | # download and install
112 | # https://www.fontsquirrel.com/fonts/download/source-serif-pro
113 | # Then specify Source Serif Pro
114 | main_font =
115 | 
116 | # The sans font. In default it's FreeSans.  If you want a bolder font, you can
117 | # download and install
118 | # https://www.fontsquirrel.com/fonts/download/source-sans-pro
119 | # Then specify Source Sans Pro
120 | sans_font =
121 | 
122 | # The mono font. In default it's FreeMono.  If you want a bolder font, you can
123 | # download and install
124 | # https://www.fontsquirrel.com/fonts/download/source-code-pro
125 | # Then specify Source Code Pro
126 | # If you find Source Code Pro is too wide, you can use the narrower Inconsolata
127 | # https://www.fontsquirrel.com/fonts/download/Inconsolata
128 | mono_font =
129 | 
130 | # The file used to post-process the generated tex file.
131 | post_latex =
132 | 
133 | # The filename of the latex logo
134 | latex_logo =
135 | 
136 | # Bibtext bibfile
137 | bibfile =
138 | 
139 | # Specify the url of external latex resources.
140 | latex_url = 
141 | 
142 | # Specify the latex style. We now support "cambridge" and the defaut sphinx style.
143 | style = 
144 | 
145 | [library]
146 | 
147 | # [DEPRECATED] Where code blocks will save to
148 | save_filename =
149 | 
150 | # [DEPRECATED] The parttern to mark this block will be saved.
151 | save_mark =
152 | 
153 | # [DEPRECATED] A list of filename and pattern pairs.
154 | save_patterns =
155 | 
156 | # [DEPRECATED, use release instead] The library version
157 | version =
158 | 
159 | # The file to save the library version
160 | version_file =
161 | 
162 | # If set, then save a/b.md into root_dir/a/b.md
163 | root_dir =
164 | 
165 | [deploy]
166 | 
167 | # Tracking ID for the HTML pages
168 | google_analytics_tracking_id =
169 | 
170 | # The github repo that all files will copy to
171 | github_repo =
172 | 
173 | # The S3 bucket that all files will copy to
174 | s3_bucket =
175 | 
176 | # S3 URLs of the other files that will also be deployed
177 | other_file_s3urls =
178 | 
179 | 
180 | [colab]
181 | 
182 | # The github repo to host the notebooks for colab, such as d2l-ai/d2l-book-colab
183 | # Also make sure that the machine's ssh key is added to github before running
184 | # "deploy" so that it can commit into d2l-ai/d2l-book-colab
185 | github_repo =
186 | 
187 | # The html pages that will be exclueded for adding the colab button
188 | exclusions = *.html **/index.html
189 | 
190 | # The additional libraries to be installed. Each line specifies one library, it
191 | # sould contains two items seperated by a comma. The first item is the lib name,
192 | # the second one is the pip package. For example: mxnet, mxnet-cu100. If a
193 | # notebook contains "import mxnet", then "!pip install mxnet-cu100" will
194 | # inserted into the notebook.
195 | libs =
196 | 
197 | # The text description of additional libraries are required.
198 | libs_header = The following additional libraries are needed to run this
199 |             notebook. Note that running on Colab is experimental, please report a Github
200 |             issue if you have any problem.
201 | 
202 | # If any code cell contains the pattern, default value is gpu, then choose the
203 | # hardware accelerator to GPU in Colab. The default accelerator is None.
204 | gpu_pattern = gpu
205 | 
206 | # Colab cannot display SVG files with a relative fname or a github URL. You can
207 | # replace it with your website URL. For example: img, http://book.d2l.ai/_images
208 | # will "img/test.svg" to "http://book.d2l.ai/_images/test.svg"
209 | replace_svg_url =
210 | 
211 | 
212 | [sagemaker]
213 | 
214 | # The github repo to host the notebooks for , such as d2l-ai/d2l-book-sagemaker
215 | # Also make sure that the machine's ssh key is added to github before running
216 | # "deploy" so that it can commit into d2l-ai/d2l-book-sagemaker
217 | github_repo =
218 | 
219 | kernel = conda_python3
220 | 
221 | libs =
222 | 
223 | libs_header = Installing (updating) the following libraries for your Sagemaker
224 |             instance.
225 | 
226 | [slides]
227 | 
228 | # The HTML element to put on the top left corner in each slide
229 | top_left =
230 | 
231 | # The HTML element to put on the top right corner in each slide
232 | top_right =
233 | 
234 | # The github repo to host the slides
235 | github_repo =
236 | 


--------------------------------------------------------------------------------
/d2lbook/markdown.py:
--------------------------------------------------------------------------------
  1 | """utilities to handle markdown
  2 | """
  3 | import re
  4 | from d2lbook import common
  5 | from typing import List, Dict
  6 | import logging
  7 | 
  8 | def split_markdown(source: str) -> List[Dict[str, str]]:
  9 |     """Split markdown into a list of text and code cells.
 10 | 
 11 |     A cell has three fields:
 12 |     1. type: either code or markdown
 13 |     2. class: code class or tab class
 14 |     3. source: single string for the source
 15 |     """
 16 |     cells: List[Dict] = []
 17 |     in_code = False
 18 |     in_tab = False
 19 |     cur_code_mark = None
 20 |     cur_tag = None
 21 |     cur_src = []
 22 | 
 23 |     def _add_cell(cur_src: List[str], cells: List[Dict]):
 24 |         if cur_src:
 25 |             src = '\n'.join(cur_src).strip()
 26 |             if in_code:
 27 |                 cells.append({
 28 |                     'type': 'code',
 29 |                     'fence': cur_code_mark,
 30 |                     'class': cur_tag,
 31 |                     'source': src})
 32 |             else:
 33 |                 if not src and not cur_tag:
 34 |                     return
 35 |                 cells.append({'type': 'markdown', 'source': src})
 36 |                 if cur_tag:
 37 |                     cells[-1]['class'] = cur_tag
 38 | 
 39 |     for l in source.splitlines():
 40 |         code = common.md_code_fence.match(l)
 41 |         tab = common.md_mark_pattern.match(l)
 42 |         if code:
 43 |             # code can be nested
 44 |             if in_tab or (in_code and code.groups()[0] != cur_code_mark):
 45 |                 cur_src.append(l)
 46 |             else:
 47 |                 _add_cell(cur_src, cells)
 48 |                 cur_src = []
 49 |                 cur_code_mark, cur_tag = code.groups()
 50 |                 in_code ^= True
 51 |         elif tab:
 52 |             begin = tab.groups()[0] == 'begin_tab'
 53 |             end = tab.groups()[0] == 'end_tab'
 54 |             if in_code or (not begin and not end):
 55 |                 cur_src.append(l)
 56 |             else:
 57 |                 _add_cell(cur_src, cells)
 58 |                 cur_src = []
 59 |                 if begin:
 60 |                     cur_tag = tab.groups()[1]
 61 |                 else:
 62 |                     cur_tag = None
 63 |                 in_tab = begin
 64 |         else:
 65 |             cur_src.append(l)
 66 |     _add_cell(cur_src, cells)
 67 |     return cells
 68 | 
 69 | def join_markdown_cells(cells: List[Dict]) -> str:
 70 |     """Join a list of cells into a markdown string"""
 71 |     src = []
 72 |     for c in cells:
 73 |         cell_src = []
 74 |         if c['type'] == 'markdown':
 75 |             if 'class' in c:
 76 |                 cell_src.append(f':begin_tab:{c["class"]}')
 77 |             cell_src.append(c['source'])
 78 |             if 'class' in c:
 79 |                 if cell_src[-1].endswith('\n'):
 80 |                     cell_src[-1] = cell_src[-1][:-1]
 81 |                 cell_src.append(':end_tab:')
 82 |         else:
 83 |             cell_src += [c['fence'] + c['class'], c['source'], c['fence']]
 84 |         src.append('\n'.join(cell_src).strip())
 85 |     return '\n\n'.join(src) + '\n'
 86 | 
 87 | basic_token = r'[\ \*-\/\\\._\w\d\:/]+'
 88 | token = r'[\|\'\:\;\<\>\^\(\)\{\}\[\]\ \*-\/\\\.,_=\w\d]+'
 89 | 
 90 | def _is_mark(lines):
 91 |     if isinstance(lines, str):
 92 |         lines = [lines]
 93 |     for l in lines:
 94 |         l = l.strip()
 95 |         if l:
 96 |             m = re.match(rf':{token}:(`{token}`)?', l)
 97 |             if m is None or m.span() != (0, len(l)):
 98 |                 return False
 99 |     return True
100 | 
101 | def _list(line, prev_prefix):
102 |     m = re.match(r' *[-\*\+] *', line) or re.match(r' *[\d]+\. *', line)
103 |     if m:
104 |         if prev_prefix is not None and len(prev_prefix.split('__')) == 2:
105 |             p = int(prev_prefix.split('__')[1]) + 1
106 |         else:
107 |             p = 0
108 |         return m[0] + '__' + str(p)
109 |     if prev_prefix == '':
110 |         return ''
111 |     if prev_prefix is not None and len(re.match(r' *', line)[0]) > len(
112 |             re.match(r' *', prev_prefix)[0]):
113 |         return prev_prefix
114 |     return ''
115 | 
116 | def split_text(text: str) -> List[Dict[str, str]]:
117 |     """Split text into a list of paragraphs
118 | 
119 |     1. type: text, list, image, title, equation, table
120 |     1. source:
121 |     1. prefix:
122 |     1. mark:
123 |     """
124 |     # split into paragraphs
125 |     lines = text.splitlines()
126 |     groups = common.group_list(lines, lambda a, _: a.strip() == '')
127 |     paras = ['\n'.join(item) for empty_line, item in groups if not empty_line]
128 | 
129 |     def _fallback(p, type):
130 |         logging.warn(f'Wrong {type} format:\n' + p)
131 |         cells.append({'type': 'text', 'source': p})
132 | 
133 |     cells = []
134 |     for p in paras:
135 |         lines = p.splitlines() + ['']
136 |         p += '\n'
137 |         if p.startswith('#'):
138 |             # parse title
139 |             if not _is_mark(lines[1:]):
140 |                 _fallback(p, 'title')
141 |             else:
142 |                 m = re.match(r'#+ *', lines[0])
143 |                 cells.append({
144 |                     'type': 'title',
145 |                     'prefix': m[0],
146 |                     'source': lines[0][m.span()[1]:],
147 |                     'mark': '\n'.join(lines[1:])})
148 |         elif p.startswith('$$'):
149 |             # parse equations
150 |             m = re.findall(r'\$\$', p)
151 |             if len(m) != 2:
152 |                 _fallback(p, 'equation')
153 |             else:
154 |                 cells.append({'type': 'equation', 'source': p})
155 |         elif p.startswith('!['):
156 |             # parse images
157 |             if not lines[0].strip().endswith(')') or not _is_mark(lines[1:]):
158 |                 _fallback(p, 'image')
159 |             else:
160 |                 cells.append({'type': 'image', 'source': p})
161 |         elif p.startswith('|'):
162 |             # parse table
163 |             for i, l in enumerate(lines):
164 |                 if not l.startswith('|'):
165 |                     break
166 |             if not _is_mark(lines[i:]):
167 |                 _fallback(p, 'equation')
168 |             else:
169 |                 cells.append({'type': 'table', 'source': p})
170 |         else:
171 |             groups = common.group_list(lines, _list)
172 |             for prefix, item in groups:
173 |                 if len(prefix.split('__')) == 2:
174 |                     prefix = prefix.split('__')[0]
175 |                 source = '\n'.join(item)[len(prefix):]
176 |                 if prefix == '':
177 |                     cells.append({'type': 'text', 'source': source})
178 |                 else:
179 |                     cells.append({
180 |                         'type': 'list',
181 |                         'prefix': prefix,
182 |                         'source': source})
183 |     return cells
184 | 
185 | def join_text(cells) -> str:
186 |     paras = []
187 |     for cell in cells:
188 |         l = cell['source']
189 |         if 'prefix' in cell:
190 |             l = cell['prefix'] + l
191 |         if 'mark' in cell:
192 |             l += '\n' + cell['mark']
193 |         paras.append(l)
194 |     return '\n'.join(paras)


--------------------------------------------------------------------------------
/d2lbook/colab.py:
--------------------------------------------------------------------------------
  1 | """Integration with Colab notebooks"""
  2 | import os
  3 | import re
  4 | import nbformat
  5 | import logging
  6 | from d2lbook import notebook
  7 | from d2lbook import utils
  8 | 
  9 | def parse_repo_lib(repo_str, lib_str, version):
 10 |     repo = utils.split_config_str(repo_str)
 11 |     if len(repo) == 1 and len(repo[0]) == 1:
 12 |         repos = {None:repo[0]}
 13 |         libs = {None:utils.split_config_str(lib_str, 2)}
 14 |     else:
 15 |         repo = utils.split_config_str(repo_str, 2)
 16 |         repos = {r[0]:r[1] for r in repo}
 17 |         libs_list = utils.split_config_str(lib_str, 3)
 18 |         libs = {}
 19 |         for tab, pkg, install in libs_list:
 20 |             if tab in libs:
 21 |                 libs[tab].append([pkg, install])
 22 |             else:
 23 |                 libs[tab] = [[pkg, install]]
 24 |     for tab in libs:
 25 |         for i, l in enumerate(libs[tab]):
 26 |             if '==RELEASE' in l[1]:
 27 |                 libs[tab][i][1] = l[1].replace('==RELEASE', f'=={version}')
 28 |     return repos, libs
 29 | 
 30 | 
 31 | class Colab():
 32 |     def __init__(self, config):
 33 |         self._valid = config.colab and config.colab['github_repo']
 34 |         if not self._valid:
 35 |             return
 36 |         self.tabs = config.tabs
 37 |         self.config = config.colab
 38 |         self._repo, self._libs = parse_repo_lib(
 39 |             self.config['github_repo'], self.config['libs'], config.project["release"])
 40 | 
 41 |     def valid(self):
 42 |         return self._valid
 43 | 
 44 |     def git_repo(self, tab):
 45 |         return self._repo[tab]
 46 | 
 47 |     def generate_notebooks(self, eval_dir, colab_dir, tab):
 48 |         if not self._valid:
 49 |             return
 50 |         # copy notebook fron eval_dir to colab_dir
 51 |         utils.run_cmd(['rm -rf', colab_dir])
 52 |         utils.run_cmd(['cp -r', eval_dir, colab_dir])
 53 |         notebooks = utils.find_files('**/*.ipynb', colab_dir)
 54 |         for fn in notebooks:
 55 |             nb = notebook.read(fn)
 56 |             if not nb:
 57 |                 continue
 58 |             # Use Python3 as the kernel
 59 |             update_notebook_kernel(nb, "python3", "Python 3")
 60 |             # Check if GPU is needed
 61 |             use_gpu = False
 62 |             for cell in nb.cells:
 63 |                 if cell.cell_type == 'code':
 64 |                     if self.config['gpu_pattern'] in cell.source:
 65 |                         use_gpu = True
 66 |                         break
 67 |             if use_gpu:
 68 |                 nb['metadata'].update({"accelerator": "GPU"})
 69 |                 logging.info('Use GPU for '+fn)
 70 |             # Update SVG image URLs
 71 |             if self.config['replace_svg_url']:
 72 |                 _update_svg_urls(nb, self.config['replace_svg_url'], fn, colab_dir)
 73 |             insert_additional_installation(nb, self._libs[tab], self.config['libs_header'])
 74 |             with open(fn, 'w') as f:
 75 |                 f.write(nbformat.writes(nb))
 76 | 
 77 |     def add_button(self, html_dir):
 78 |         """Add an open colab button in HTML"""
 79 |         if not self._valid:
 80 |             return
 81 |         files = utils.find_files('**/*.html', html_dir, self.config['exclusions'])
 82 |         for fn in files:
 83 |             with open(fn, 'r') as f:
 84 |                 html = f.read()
 85 |             if 'id="Colab' in html:
 86 |                 continue
 87 |             url = os.path.relpath(fn, html_dir).replace('.html', '.ipynb')
 88 |             if self.tabs:
 89 |                 colab_html = ''
 90 |                 for tab in self.tabs:
 91 |                     colab_tab = _get_colab_html(self._repo[tab], url, f'Colab [{tab}]')
 92 |                     colab_html += f'<div class="d2l-tabs__tab">{colab_tab}</div>'
 93 |                 colab_html = f'<div class="d2l-tabs" style="float:right">{colab_html}</div>'
 94 |             else:
 95 |                 colab_html = _get_colab_html(self._repo[None], url, f'Colab')
 96 |             html = html.replace('</h1>', colab_html+'</h1>')
 97 |             with open(fn, 'w') as f:
 98 |                 f.write(html)
 99 | 
100 | def _get_colab_html(repo, url, text):
101 |     id = text.replace(" ", "_")
102 |     colab_link = f'https://colab.research.google.com/github/{repo}/blob/master/{url}'
103 |     colab_html = f'<a href="{colab_link}" onclick="captureOutboundLink(\'{colab_link}\'); return false;"> <button style="float:right", id="{id}" class="mdl-button mdl-js-button mdl-button--primary mdl-js-ripple-effect"> <i class=" fas fa-external-link-alt"></i> {text} </button></a><div class="mdl-tooltip" data-mdl-for="{id}"> Open the notebook in Colab</div>'
104 |     return colab_html
105 | 
106 | def insert_additional_installation(notebook, lib, lib_header):
107 |     if lib:
108 |         cell = _get_installation_cell(notebook, lib)
109 |         if cell:
110 |             notebook.cells.insert(0, cell)
111 |             if lib_header:
112 |                 notebook.cells.insert(
113 |                     0, nbformat.v4.new_markdown_cell(source=lib_header))
114 | 
115 | def update_notebook_kernel(notebook, name, display_name=None):
116 |     if not display_name:
117 |         display_name = name
118 |     notebook['metadata'].update({"kernelspec": {
119 |         "name": name,
120 |         "display_name": display_name
121 |     }})
122 | 
123 | 
124 | def _update_svg_urls(notebook, pattern, filename, root_dir):
125 |     orgin_url, new_url = utils.split_config_str(pattern, 2)[0]
126 |     svg_re = re.compile('!\[.*\]\(([\.-_\w\d]+\.svg)\)')
127 |     for cell in notebook.cells:
128 |         if cell.cell_type == 'markdown':
129 |             lines = cell.source.split('\n')
130 |             for i, l in enumerate(lines):
131 |                 m = svg_re.search(l)
132 |                 if not m:
133 |                     continue
134 |                 path = os.path.relpath(os.path.realpath(os.path.join(
135 |                     root_dir, os.path.basename(filename), m[1])), root_dir)
136 |                 if not path.startswith(orgin_url):
137 |                     logging.warning("%s in %s does not start with %s"
138 |                                     "specified by replace_svg_url"%(
139 |                                         path, filename, orgin_url))
140 |                 else:
141 |                     url = new_url + path[len(orgin_url):]
142 |                     lines[i] = l.replace(m[1], url)
143 |             cell.source = '\n'.join(lines)
144 | 
145 | def _get_installation_cell(notebook, libs):
146 |     """Return a cell for installing the additional libs"""
147 |     lib_dict = dict(libs)
148 |     lib1_re = re.compile('from ([_\w\d]+) import')
149 |     lib2_re = re.compile('import ([_\w\d]+)')
150 |     find_libs = []
151 |     for cell in notebook.cells:
152 |         if cell.cell_type == 'code':
153 |             lines = cell.source.split('\n')
154 |             for l in lines:
155 |                 if l.strip().startswith('#'): # it's a comment
156 |                     continue
157 |                 m = lib1_re.search(l)
158 |                 if not m:
159 |                     m = lib2_re.search(l)
160 |                 if m and m[1] in lib_dict:
161 |                     find_libs.append(m[1])
162 |     if not find_libs and not notebook.metadata['required_libs']:
163 |         return None
164 |     install_str = ''
165 |     for lib in set(find_libs):
166 |         install_str += '!pip install ' + lib_dict[lib] + '\n'
167 |     for lib in notebook.metadata['required_libs']:
168 |         install_str += '!pip install ' + lib + '\n'
169 |     return nbformat.v4.new_code_cell(source=install_str)
170 | 


--------------------------------------------------------------------------------
/d2lbook/translate.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import glob
  3 | import sys
  4 | import os
  5 | from d2lbook import config, markdown, utils, common
  6 | import logging
  7 | import re
  8 | import glob
  9 | 
 10 | def translate():
 11 |     parser = argparse.ArgumentParser(description='Translate to another language')
 12 |     # Example usage: d2lbook translate --commit 35a64ab chapter_optimization chapter_computer-vision/anchor.md
 13 |     parser.add_argument('source', nargs='+', help='chapter directories or markdown files to activate')
 14 |     parser.add_argument('--commit', default='latest', help='the commit of the base repo')
 15 |     args = parser.parse_args(sys.argv[2:])
 16 | 
 17 |     cf = config.Config()
 18 |     trans = Translate(cf, args.commit)
 19 |     for source in args.source:
 20 |         # Check if source is a file or a chapter dir
 21 |         if not source.endswith(".md"):
 22 |             chap_dir = os.path.join(trans.repo_dir, source)
 23 |             if os.path.isdir(chap_dir):
 24 |                 logging.info(f'Translating all sections of {source}')
 25 |                 all_chap_secs = os.listdir(chap_dir)
 26 |                 for sec_name in all_chap_secs:
 27 |                     if sec_name.endswith(".md"):
 28 |                         trans.translate(os.path.join(source, sec_name))
 29 |             else:
 30 |                 logging.error(f'Invalid directory {source}: Please provide'
 31 |                               'a valid chapter name for translation')
 32 |         else:
 33 |             trans.translate(source)
 34 | 
 35 | class Translate(object):
 36 |     def __init__(self, cf: config.Config, commit: str):
 37 |         # init the original repo
 38 |         import git
 39 |         self.config = cf
 40 |         self.repo_dir = os.path.join(cf.tgt_dir, 'origin_repo')
 41 |         assert cf.translation['origin_repo'], 'much provide the origin repo'
 42 |         self.url = 'https://github.com/' + cf.translation['origin_repo']
 43 |         if os.path.exists(self.repo_dir):
 44 |             self.repo = git.Repo(self.repo_dir)
 45 |             logging.info(f'Pulling from {self.url} into {self.repo_dir}')
 46 |             # Reset to origin/master before pulling updates
 47 |             self.repo.git.reset('--hard', self.repo.remotes.origin.name + '/' + self.repo.active_branch.name)
 48 |             self.repo.remotes.origin.pull()
 49 |         else:
 50 |             logging.info(f'Clone {self.url} into {self.repo_dir}')
 51 |             self.repo = git.Repo.clone_from(self.url, self.repo_dir)
 52 |         if commit == 'latest':
 53 |             self.commit = str(self.repo.commit())[:7]
 54 |         else:
 55 |             self.repo.git.reset(commit, '--hard')
 56 |             self.commit = commit[:7]
 57 |         # init the translator
 58 |         self.translator = None
 59 |         if cf.translation['translator']:
 60 |             if cf.translation['translator'] == 'aws':
 61 |                 assert cf.project['lang']
 62 |                 assert cf.translation['origin_lang']
 63 |                 self.translator = AWS(cf.translation['origin_lang'], cf.project['lang'], cf.translation['terminology'])
 64 |             else:
 65 |                 logging.error(f'Unknown translator: {cf.translation["translator"]}')
 66 | 
 67 |     def translate(self, filename: str):
 68 |         src_fn = os.path.join(self.repo_dir, filename)
 69 |         fns = glob.glob(src_fn)
 70 |         if not len(fns):
 71 |             logging.warn('Not found '+src_fn)
 72 |             return
 73 |         if len(fns) > 1:
 74 |             for fn in fns:
 75 |                 self.translate(os.path.relpath(fn, self.repo_dir))
 76 |             return
 77 |         src_fn = fns[0]
 78 |         filename = os.path.relpath(src_fn, self.repo_dir)
 79 |         basename, ext = os.path.splitext(filename)
 80 |         origin_tgt_fn = os.path.join(self.config.src_dir,
 81 |                               basename+'_origin'+ext)
 82 |         tgt_fn = os.path.join(self.config.src_dir, filename)
 83 |         if os.path.exists(tgt_fn):
 84 |             logging.warn(f'File {tgt_fn} already exists, skip translation.')
 85 |             return
 86 |         logging.info(f'Write original text into {origin_tgt_fn}')
 87 |         utils.mkdir(os.path.dirname(origin_tgt_fn))
 88 |         with open(origin_tgt_fn, 'w') as f:
 89 |             with open(src_fn, 'r') as f2:
 90 |                 f.write(f2.read())
 91 | 
 92 |         if self.translator and ext == '.md':
 93 |             self.translator.translate_markdown(src_fn, tgt_fn)
 94 |             logging.info(f'Write translated results into {tgt_fn}')
 95 |         else:
 96 |             if not os.path.exists(tgt_fn):
 97 |                 with open(tgt_fn, 'w') as f:
 98 |                     logging.info(f'Create an empty file {tgt_fn}')
 99 | 
100 | 
101 | class MarkdownText(object):
102 |     def __init__(self):
103 |         self.mapping = []
104 | 
105 |     def _encode_pattern(self, pattern, text):
106 |         matched = set(re.findall(pattern, text))
107 |         for m in matched:
108 |             # another solution is use some special tokens and put them in
109 |             # the terminology. unfortuanly it doesn't work for amazon transcribe.
110 |             # So use a number instead, hope it will not be translated.
111 |             token = str(732293614+len(self.mapping))
112 |             text = text.replace(m, token)
113 |             self.mapping.append((m, token))
114 |         return text
115 | 
116 |     def encode(self, text:str) -> str:
117 |         patterns = [rf'(:{markdown.token}:`{markdown.token}`)', # mark
118 |                     rf'(`{markdown.token}`)',  # code
119 |                     rf'(\${markdown.token}\$)', # inline match
120 |                     rf'(\[{markdown.basic_token}\]\({markdown.basic_token}\))', # link
121 |                     ]
122 |         for p in patterns:
123 |             text = self._encode_pattern(p, text)
124 |         return text
125 | 
126 |     def decode(self, text:str) -> str:
127 |         for key, value in self.mapping:
128 |             text = text.replace(value, key)
129 |         text = text.replace('] (', '](')
130 |         return text
131 | 
132 | class Translator(object):
133 |     def translate(self, text: str):
134 |         raise NotImplemented()
135 | 
136 |     def _translate_markdown(self, text):
137 |         cells = markdown.split_markdown(text)
138 |         for cell in cells:
139 |             if cell['type'] == 'markdown':
140 |                 if 'class' in cell and cell['class']:
141 |                     # it may have nested code blocks
142 |                     cell['source'] = self._translate_markdown(cell['source'])
143 |                 else:
144 |                     text_cells = markdown.split_text(cell['source'])
145 |                     for t_cell in text_cells:
146 |                         if t_cell['source'] and (
147 |                             t_cell['type'] in ['text', 'list', 'title']):
148 |                             text = t_cell['source']
149 |                             markdown_text = MarkdownText()
150 |                             t_cell['source'] = markdown_text.decode(self.translate(
151 |                                 markdown_text.encode(text)))
152 |                             if text.endswith('\n'):
153 |                                 t_cell['source'] += '\n'
154 |                     cell['source'] = markdown.join_text(text_cells)
155 |         return markdown.join_markdown_cells(cells)
156 | 
157 |     def translate_markdown(self, src_fn: str, tgt_fn: str):
158 |         with open(src_fn, 'r') as r:
159 |             with open(tgt_fn, 'w') as w:
160 |                 w.write(self._translate_markdown(r.read()))
161 | 
162 | class AWS(Translator):
163 |     """Use Amazon Translate"""
164 |     def __init__(self, src_lang, target_lang, terminology=None):
165 |         import boto3
166 |         self.client = boto3.client('translate')
167 |         self.terminology = [terminology] if terminology else []
168 |         self.src_lang = src_lang
169 |         self.tgt_lang = target_lang
170 |         logging.info(f'Amazon Translate {src_lang} -> {target_lang}, terminology {self.terminology}')
171 | 
172 |     def translate(self, text: str):
173 |         text = text.replace('\n', ' ')
174 |         print(text)
175 |         resp = self.client.translate_text(
176 |             Text=text, TerminologyNames=self.terminology,
177 |             SourceLanguageCode=self.src_lang, TargetLanguageCode=self.tgt_lang)
178 |         return resp['TranslatedText']
179 | 
180 | if __name__ == "__main__":
181 |     logging.basicConfig(format='[d2lbook:%(filename)s:L%(lineno)d] %(levelname)-6s %(message)s')
182 |     logging.getLogger().setLevel(logging.INFO)
183 | 
184 |     if len(sys.argv) == 5:
185 |         _, src_fn, src_lang, tgt_fn, tgt_lang = sys.argv
186 |         terminology = None
187 |     elif len(sys.argv) == 6:
188 |         _, src_fn, src_lang, tgt_fn, tgt_lang, terminology = sys.argv
189 |     else:
190 |         exit(-1)
191 |     translator = AWS(src_lang, tgt_lang, terminology)
192 |     translator.translate_markdown(src_fn, tgt_fn)
193 | 
194 | 
195 | 


--------------------------------------------------------------------------------
/d2lbook/slides.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import glob
  3 | import sys
  4 | from d2lbook import config, notebook, common, utils
  5 | import logging
  6 | import glob
  7 | import nbformat
  8 | import pathlib
  9 | import os
 10 | from nbformat import notebooknode
 11 | from typing import Optional
 12 | import re
 13 | 
 14 | def slides():
 15 |     parser = argparse.ArgumentParser(
 16 |         description='Generate slides from markdown files.')
 17 |     parser.add_argument('filename', nargs='+',
 18 |                         help='the source markdown files')
 19 |     parser.add_argument('--tab', default=None, help='the tab')
 20 |     args = parser.parse_args(sys.argv[2:])
 21 |     tab = args.tab
 22 |     cf = config.Config()
 23 |     sd = Slides(cf)
 24 |     for fn in args.filename:
 25 |         fns = glob.glob(fn)
 26 |         if not len(fns):
 27 |             logging.warning('Not found ' + fn)
 28 |             return
 29 |         for md in fns:
 30 |             with open(md, 'r') as f:
 31 |                 nb = notebook.read_markdown(f.read())
 32 |             if tab:
 33 |                 nb = notebook.split_markdown_cell(nb)
 34 |                 nb = notebook.get_tab_notebook(nb, tab, cf.default_tab)
 35 |             output_fn = str(pathlib.Path(md).with_suffix('')) + (
 36 |                 '_' + tab if tab else '_') + '_slides.ipynb'
 37 |             sd.generate(nb, output_fn)
 38 | 
 39 | class Slides():
 40 |     def __init__(self, config):
 41 |         self._valid = config.slides and config.slides['github_repo']
 42 |         if not self._valid:
 43 |             return
 44 |         self.config = config
 45 |         repo = utils.split_config_str(self.config.slides['github_repo'], 2)
 46 |         self._repo = {r[0]: r[1] for r in repo}
 47 | 
 48 |     def deploy(self):
 49 |         if not self._valid:
 50 |             return
 51 |         repo = self._repo.get(self.config.tab, '')
 52 |         if not repo:
 53 |             return
 54 |         bash_fname = os.path.join(os.path.dirname(__file__),
 55 |                                   'upload_github.sh')
 56 |         utils.run_cmd([
 57 |             'bash', bash_fname, self.config.slides_dir, repo,
 58 |             self.config.project['release']])
 59 | 
 60 |     def generate_readme(self):
 61 |         repo = self._repo.get(self.config.tab, '')
 62 |         if not self._valid or not repo: return
 63 | 
 64 |         root = os.path.join(self.config.src_dir,
 65 |                             self.config.build['index'] + '.md')
 66 |         notebooks = notebook.get_toc(root)
 67 |         items = []
 68 |         for nb in notebooks:
 69 |             p = (self.config.slides_dir /
 70 |                  pathlib.Path(nb)).with_suffix('.ipynb')
 71 |             if p.exists():
 72 |                 p = str(p.relative_to(self.config.slides_dir))
 73 |                 base = 'https://nbviewer.jupyter.org/format/slides/github'
 74 |                 items.append(f' - [{p}]({base}/{repo}/blob/main/{p})')
 75 | 
 76 |         with open(os.path.join(self.config.slides_dir, 'README.md'), 'w') as f:
 77 |             f.write(f'# {repo}\n')
 78 |             f.write('''
 79 | This repo contains generated notebook slides. To open it locally, we suggest you to install the [rise](https://rise.readthedocs.io/en/stable/) extension.
 80 | 
 81 | You can also preview them in nbviwer:
 82 | ''')
 83 |             f.write('\n'.join(items))
 84 | 
 85 |     def generate(self, nb: notebooknode.NotebookNode, output_fn: str):
 86 |         """Get all slide blocks and write to file."""
 87 |         nb = _generate_slides(nb)
 88 |         if not nb: return
 89 | 
 90 |         nb['metadata'].update({
 91 |             'language_info': {
 92 |                 'name': 'python'},
 93 |             'celltoolbar': 'Slideshow',
 94 |             'rise': {
 95 |                 "autolaunch":
 96 |                 True,
 97 |                 "enable_chalkboard":
 98 |                 True,
 99 |                 "overlay":
100 |                 f"<div class='my-top-right'>{self.config.slides['top_right']}</div><div class='my-top-left'>{self.config.slides['top_left']}</div>",
101 |                 "scroll": 
102 |                 True
103 |             }})
104 |         dirname = os.path.dirname(output_fn)
105 |         utils.mkdir(dirname)
106 |         with open(output_fn, 'w') as f:
107 |             f.write(nbformat.writes(nb))
108 |         logging.info('Write slides into ' + output_fn)
109 | 
110 |         with open(dirname + '/rise.css', 'w') as f:
111 |             f.write('''
112 | div.text_cell_render.rendered_html {
113 |     padding: 0.35em 0.1em;
114 | }
115 | 
116 | div.code_cell {
117 |     font-size: 120%;
118 | }
119 | 
120 | div.my-top-right {
121 |     position: absolute;
122 |     right: 5%;
123 |     top: 1em;
124 |     font-size: 2em;
125 | }
126 | 
127 | div.my-top-left {
128 |     position: absolute;
129 |     left: 5%;
130 |     top: 1em;
131 |     font-size: 2em;
132 | }
133 | ''')
134 | 
135 | def remove_slide_marks(
136 |         nb: notebooknode.NotebookNode) -> notebooknode.NotebookNode:
137 |     """Remove all slide blocks and return."""
138 |     new_cells = []
139 |     for cell in nb.cells:
140 |         if cell.cell_type != 'markdown':
141 |             new_cells.append(cell)
142 |         else:
143 |             src = cell.source
144 |             matches = _match_slide_marks(cell.source)
145 |             for pair, text in matches:
146 |                 old = pair[0] + text + pair[1]
147 |                 new = '' if pair[0].endswith('~~') else text
148 |                 src = src.replace(old, new)
149 |             new_cells.append(nbformat.v4.new_markdown_cell(src))
150 |     return notebook.create_new_notebook(nb, new_cells)
151 | 
152 | def _generate_slides(
153 |         nb: notebooknode.NotebookNode) -> Optional[notebooknode.NotebookNode]:
154 |     new_cells = []
155 |     has_slides = False
156 |     for cell in nb.cells:
157 |         if cell.cell_type != 'markdown':
158 |             # remove comments
159 |             lines = cell.source.splitlines()
160 |             new_lines = []
161 |             for l in lines:
162 |                 new_l = re.sub(r'\#\ .*', '', l)
163 |                 if new_l != l and not new_l.rstrip(): 
164 |                     continue
165 |                 new_lines.append(new_l.rstrip())
166 |             cell.source = '\n'.join(new_lines)
167 |             new_cells.append(cell)
168 |         else:
169 |             slide_type = '-'
170 |             src = []
171 |             matches = _match_slide_marks(cell.source)
172 |             if matches:
173 |                 has_slides = True
174 |             for pair, text in matches:
175 |                 if pair[0].startswith('['):
176 |                     slide_type = 'slide'
177 |                 src.append(text)
178 |             src = '\n'.join(src)
179 |             if src:
180 |                 # cannot simply use . as it could be in code such as `a.text()`
181 |                 for m in ('.\n', '. '):
182 |                     sentences = [s.strip() for s in src.split(m)]
183 |                     src = m.join([s[0].upper() + s[1:] for s in sentences])
184 |                 src = src.replace('.$$', '$$').replace(',$$', '$$')
185 |                 src = src.rstrip(',. \n:，。：')
186 |             # find level-1 head
187 |             for l in cell.source.splitlines():
188 |                 if l.strip().startswith('# '):
189 |                     src = l + '\n\n' + src
190 |                     break
191 |             if not src: continue
192 |             new_cells.append(
193 |                 nbformat.v4.new_markdown_cell(
194 |                     src, metadata={"slideshow": {
195 |                         "slide_type": slide_type}}))
196 |     if not has_slides:
197 |         return None
198 | 
199 |     # merge code cell in the same slide if they don't have output
200 |     md_code_group = common.group_list(new_cells,
201 |                                       lambda cell, _: cell.cell_type == 'code')
202 |     merged_code_cell = []
203 |     for is_code, group in md_code_group:
204 |         if not is_code:
205 |             merged_code_cell.extend(group)
206 |         else:
207 |             src = []
208 |             for i, cell in enumerate(group):
209 |                 src.append(cell.source)
210 |                 if i == len(group) - 1 or 'outputs' in cell and len(
211 |                         cell['outputs']):
212 |                     cell.source = '\n\n'.join(src)
213 |                     src = []
214 |                     merged_code_cell.append(cell)
215 |     # clean #@save
216 |     for cell in merged_code_cell:
217 |         if cell.cell_type == 'code':
218 |             cell.source = cell.source.replace( \
219 |                 '\n#@save\n', '\n').replace('#@save', '').strip()
220 |     return notebook.create_new_notebook(nb, merged_code_cell)
221 | 
222 | def _match_slide_marks(text: str):
223 |     """return the texts in a pair. cannot be recursive"""
224 |     # the pair marks to generate slides
225 |     pairs = (('[**', '**]'), ('(**', '**)'), ('[~~', '~~]'), ('(~~', '~~)'))
226 |     matches = []
227 |     for p in pairs:
228 |         assert len(p) == 2, f'not a valid pair: {p}'
229 |         start = [i for i in range(len(text)) if text.startswith(p[0], i)]
230 |         end = [i for i in range(len(text)) if text.startswith(p[1], i)]
231 |         assert len(start) == len(end), f'some {p} are not enclosed in {text}'
232 |         for i, (s, e) in enumerate(zip(start, end)):
233 |             s += len(p[0])
234 |             assert s <= e, f'some {p} are overlapped'
235 |             if i < len(start) - 1:
236 |                 assert e < start[i + 1], f'some {p} are overlapped'
237 |             # handle if it's a markdown link such as [**a**](https://xx)
238 |             if p[1].endswith(']') and text.startswith(p[1] + '(', e):
239 |                 continue
240 |             matches.append((p, s, e))
241 |     matches.sort(key=lambda x: x[1])
242 |     for i in range(len(matches) - 1):
243 |         assert matches[i][1] < matches[i+1][1], \
244 |                 f'some {matches[i][0]} and {matches[i+1][0]} are overlapped'
245 |     return [(p, text[s:e]) for p, s, e in matches]
246 | 


--------------------------------------------------------------------------------
/d2lbook/resource.py:
--------------------------------------------------------------------------------
  1 | """Manage compute resources
  2 | """
  3 | import dataclasses
  4 | import datetime
  5 | import logging
  6 | import multiprocessing as mp
  7 | import os
  8 | import random
  9 | import subprocess
 10 | import threading
 11 | import time
 12 | import traceback
 13 | from typing import Any, Optional, Sequence
 14 | import getpass
 15 | 
 16 | import fasteners
 17 | 
 18 | from d2lbook import utils
 19 | 
 20 | def get_available_gpus():
 21 |     """Return a list of available GPUs with their names"""
 22 |     cmd = 'nvidia-smi --query-gpu=name --format=csv,noheader'
 23 |     process = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE,
 24 |                                stderr=subprocess.PIPE)
 25 |     stdout, _ = process.communicate()
 26 |     if process.returncode == 0:
 27 |         return stdout.decode().splitlines()
 28 |     return []
 29 | 
 30 | def get_notebook_gpus(notebook, max_gpus):
 31 |     """Return the # of GPUs needed for a notebook."""
 32 |     # several heuristics, not necessary accurate
 33 |     # TODO(mli) support a special mark in notebook to hint the #gpus
 34 |     single_gpu_patterns = ('gpu()', 'gpu(0)', 'device(\'cuda\')',
 35 |                            'device(\'/GPU:0\')', 'try_gpu()', 'try_gpu(0)', 'gpus=1')
 36 |     all_gpus_patterns = ('gpu(1)', 'device(\'cuda:1\')', 'device(\'/GPU:1\')',
 37 |                          'try_all_gpus', 'try_gpu(1)', 'gpus=2',
 38 |                          'gpus=3', 'gpus=4')
 39 |     n_gpus = 0
 40 |     for cell in notebook.cells:
 41 |         if cell.cell_type == 'code':
 42 |             if any([p in cell.source for p in single_gpu_patterns]):
 43 |                 n_gpus = max(n_gpus, 1)
 44 |             if any([p in cell.source for p in all_gpus_patterns]):
 45 |                 n_gpus = max(n_gpus, max_gpus)
 46 |     return n_gpus
 47 | 
 48 | @dataclasses.dataclass
 49 | class _Task():
 50 |     num_cpus: int
 51 |     num_gpus: int
 52 |     target: Any
 53 |     args: Sequence[Any]
 54 |     description: str
 55 |     process: Optional[Any] = None
 56 |     locks: Sequence[int] = dataclasses.field(default_factory=list)
 57 |     done: bool = False
 58 |     start_time: datetime.datetime = datetime.datetime.now()
 59 |     end_time: Optional[datetime.datetime] = None
 60 | 
 61 | class Process(mp.Process):
 62 |     def __init__(self, *args, **kwargs):
 63 |         mp.Process.__init__(self, *args, **kwargs)
 64 |         self._pconn, self._cconn = mp.Pipe()
 65 |         self._exception = None
 66 | 
 67 |     def run(self):
 68 |         try:
 69 |             mp.Process.run(self)
 70 |             self._cconn.send(None)
 71 |         except Exception as e:
 72 |             tb = traceback.format_exc()
 73 |             self._cconn.send((e, tb))
 74 | 
 75 |     @property
 76 |     def exception(self):
 77 |         if self._pconn.poll():
 78 |             self._exception = self._pconn.recv()
 79 |         return self._exception
 80 | 
 81 | class Scheduler():
 82 |     """A schedule run multiple jobs in parallel under the resource constraint."""
 83 |     def __init__(self, num_cpu_workers, num_gpu_workers):
 84 |         self._num_cpus = num_cpu_workers
 85 |         self._num_gpus = num_gpu_workers
 86 |         self._locks = [False] * (self._num_cpus + self._num_gpus)
 87 |         user = getpass.getuser()
 88 |         self._inter_locks = [
 89 |             fasteners.InterProcessLock(f'/tmp/d2lbook_{user}_cpu_{i}')
 90 |             for i in range(self._num_cpus)] + [
 91 |                 fasteners.InterProcessLock(f'/tmp/d2lbook_{user}_gpu_{i}')
 92 |                 for i in range(self._num_gpus)]
 93 |         self._tasks = []
 94 |         self._failed_tasks = []
 95 |         self._start_job_lock = fasteners.InterProcessLock(
 96 |             f'/tmp/d2lbook_{user}_start_job')
 97 | 
 98 |     def add(self, num_cpus, num_gpus, target, args, description=''):
 99 |         """Add tasks into the queue."""
100 |         assert not (num_cpus == 0 and num_gpus == 0), \
101 |                 'Need at least one CPU or GPU'
102 |         assert num_cpus <= self._num_cpus and num_gpus <= self._num_gpus, \
103 |             f'Not enough resources (CPU {self._num_cpus}, GPU {self._num_gpus} ) to run the task (CPU {num_cpus}, GPU {num_gpus})'
104 | 
105 |         if not description:
106 |             description = f'Target {target} with args {args}'
107 |         self._tasks.append(_Task(num_cpus, num_gpus, target, args,
108 |                                  description))
109 | 
110 |     @property
111 |     def failed_tasks(self):
112 |         return [(task.description, err, trace)
113 |                 for task, err, trace in self._failed_tasks]
114 | 
115 |     @property
116 |     def error_message(self):
117 |         if not self.failed_tasks:
118 |             return ''
119 |         errors = [
120 |             f'{len(self.failed_tasks)} notebooks are failed to evaluate:']
121 |         for task, err, trace in self.failed_tasks:
122 |             errors += [f'Task {task} exited with error: {err}', trace]
123 |         return '\n\n'.join(errors)
124 | 
125 |     def run(self):
126 |         """Run the tasks and block until they are done."""
127 |         def _device_info(task):
128 |             cpus = task.locks[:task.num_cpus]
129 |             gpus = [i - self._num_cpus for i in task.locks[task.num_cpus:]]
130 |             info = []
131 |             if cpus: info.append(f'CPU {cpus}')
132 |             if gpus: info.append(f'GPU {gpus}')
133 |             return ', '.join(info)
134 | 
135 |         def _runtime(task):
136 |             end_time = task.end_time if task.end_time else datetime.datetime.now(
137 |             )
138 |             return utils.get_time_diff(task.start_time, end_time)
139 | 
140 |         def _summary_heavy_tasks():
141 |             if self._tasks:
142 |                 logging.info(
143 |                     f'All {len(self._tasks)} tasks are done, sorting by runtime:'
144 |                 )
145 |                 self._tasks.sort(
146 |                     key=lambda task: (task.end_time - task.start_time).seconds)
147 |                 for task in self._tasks:
148 |                     logging.info(
149 |                         f'  - {_runtime(task)} on {_device_info(task)} for {task.description}'
150 |                     )
151 | 
152 |         def _status():
153 |             num_done, num_not_started, num_running = 0, 0, 0
154 |             for task in self._tasks:
155 |                 if task.done: num_done += 1
156 |                 if task.process: num_running += 1
157 |                 if not task.process and not task.done: num_not_started += 1
158 | 
159 |             logging.info(
160 |                 f'  Status: {num_running} running tasks, {num_done} done, {num_not_started} not started'
161 |             )
162 |             for task in self._tasks:
163 |                 if task.process:
164 |                     logging.info(
165 |                         f'    - Task "{task.description}" on {_device_info(task)} is running for {_runtime(task)}'
166 |                     )
167 | 
168 |         # try large gpu workloads first
169 |         self._tasks.sort(reverse=True, key=lambda task:
170 |                          (task.num_gpus, task.num_cpus))
171 | 
172 |         last_status_t = 0
173 |         for t in range(24 * 60 * 60):  # run at most 24 hours
174 |             if all([task.done for task in self._tasks]):
175 |                 break
176 | 
177 |             if t > last_status_t + 60:
178 |                 last_status_t = t
179 |                 _status()
180 | 
181 |             for task in self._tasks:
182 |                 if task.process or task.done:
183 |                     continue
184 |                 locks = self._lock(0, self._num_cpus, task.num_cpus) + \
185 |                         self._lock(self._num_cpus, self._num_cpus+self._num_gpus, task.num_gpus)
186 |                 if len(locks) < task.num_cpus + task.num_gpus:
187 |                     self._unlock(locks)
188 |                     continue
189 |                 task.locks = locks
190 |                 # a brutal fix to https://github.com/jupyter/nbconvert/issues/1066
191 |                 # if two ci jobs start to eval notebook at the same time, it may
192 |                 # cause the port bind conflict. here i require the ci job to acquire
193 |                 # a global lock for 1 sec.
194 |                 self._start_job_lock.acquire()
195 |                 message = f'Starting task "{task.description}" on {_device_info(task)}'
196 |                 logging.info(message)
197 |                 task.start_time = datetime.datetime.now()
198 |                 gpus = [i - self._num_cpus for i in locks[task.num_cpus:]]
199 |                 task.process = Process(target=_target,
200 |                                        args=(gpus, task.target, *task.args))
201 |                 task.process.start()
202 |                 _status()
203 |                 last_status_t = t
204 |                 time.sleep(1)
205 |                 self._start_job_lock.release()
206 |                 break
207 | 
208 |             # check if any one is finished
209 |             for task in self._tasks:
210 |                 if task.done or not task.process: continue
211 |                 if not task.process.is_alive():
212 |                     for lock in task.locks:
213 |                         self._locks[lock] = False
214 |                         self._inter_locks[lock].release()
215 |                     task.end_time = datetime.datetime.now()
216 |                     if task.process.exception:
217 |                         error, traceback = task.process.exception
218 |                         self._failed_tasks.append((task, error, traceback))
219 |                         logging.error(
220 |                             f'Task "{task.description}" on {_device_info(task)} exited with error: {error}\n{traceback}'
221 |                         )
222 |                     else:
223 |                         logging.info(
224 |                             f'Task "{task.description}" on {_device_info(task)} is finished in {_runtime(task)}'
225 |                         )
226 |                     task.process = None
227 |                     task.done = True
228 | 
229 |             time.sleep(1)
230 | 
231 |         _summary_heavy_tasks()
232 | 
233 |     def _lock(self, start, end, n):
234 |         ids = list(range(start, end))
235 |         random.shuffle(ids)
236 |         locks = []
237 |         for i in ids:
238 |             if len(locks) >= n:
239 |                 break
240 |             if self._inter_locks[i].acquire(
241 |                     blocking=False) and not self._locks[i]:
242 |                 self._locks[i] = True
243 |                 locks.append(i)
244 |         return locks
245 | 
246 |     def _unlock(self, locks):
247 |         for i in locks:
248 |             self._inter_locks[i].release()
249 |             self._locks[i] = False
250 | 
251 | def _target(gpus, target, *args):
252 |     if not gpus:
253 |         # it will triggler an runtime error if target actually uses a gpu
254 |         gpus = [""]
255 |     os.environ['CUDA_VISIBLE_DEVICES'] = ','.join([str(g) for g in gpus])
256 |     return target(*args)
257 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [yyyy] [name of copyright owner]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/docs/img/multi-lang.svg:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="UTF-8" standalone="no"?>
  2 | <!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
  3 | <svg xmlns:xl="http://www.w3.org/1999/xlink" xmlns="http://www.w3.org/2000/svg" xmlns:dc="http://purl.org/dc/elements/1.1/" version="1.1" viewBox="240 219 376 230" width="376" height="230">
  4 |   <defs>
  5 |     <font-face font-family="Arial" font-size="11" panose-1="2 11 6 4 2 2 2 2 2 4" units-per-em="1000" underline-position="-105.95703" underline-thickness="73.24219" slope="0" x-height="518.5547" cap-height="716.3086" ascent="905.2734" descent="-211.91406" font-weight="400">
  6 |       <font-face-src>
  7 |         <font-face-name name="ArialMT"/>
  8 |       </font-face-src>
  9 |     </font-face>
 10 |     <marker orient="auto" overflow="visible" markerUnits="strokeWidth" id="StickArrow_Marker" stroke-linejoin="miter" stroke-miterlimit="10" viewBox="-1 -3 6 6" markerWidth="6" markerHeight="6" color="black">
 11 |       <g>
 12 |         <path d="M 4 0 L 0 0 M 0 -1.5 L 4 0 L 0 1.5" fill="none" stroke="currentColor" stroke-width="1"/>
 13 |       </g>
 14 |     </marker>
 15 |   </defs>
 16 |   <metadata> Produced by OmniGraffle 7.15.2 
 17 |     <dc:date>2020-05-12 19:46:54 +0000</dc:date>
 18 |   </metadata>
 19 |   <g id="multi-lang" fill-opacity="1" fill="none" stroke-opacity="1" stroke-dasharray="none" stroke="none">
 20 |     <title>multi-lang</title>
 21 |     <g id="multi-lang: Layer 1">
 22 |       <title>Layer 1</title>
 23 |       <g id="Graphic_25">
 24 |         <rect x="394" y="220.4" width="71.4" height="23.700004" fill="#66bfff"/>
 25 |         <rect x="394" y="220.4" width="71.4" height="23.700004" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
 26 |         <text transform="translate(399 226.07007)" fill="black">
 27 |           <tspan font-family="Arial" font-size="11" font-weight="400" fill="black" x="10.222706" y="10">.md files</tspan>
 28 |         </text>
 29 |       </g>
 30 |       <g id="Graphic_24">
 31 |         <rect x="338.4" y="265.4" width="71.4" height="23.700004" fill="#b2d9ff"/>
 32 |         <rect x="338.4" y="265.4" width="71.4" height="23.700004" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
 33 |         <text transform="translate(343.4 271.07007)" fill="black">
 34 |           <tspan font-family="Arial" font-size="11" font-weight="400" fill="black" x="4.714649" y="10">.ipynb files</tspan>
 35 |         </text>
 36 |       </g>
 37 |       <g id="Graphic_23">
 38 |         <rect x="301.8" y="342.25" width="71.4" height="23.700004" fill="#b2d9ff"/>
 39 |         <rect x="301.8" y="342.25" width="71.4" height="23.700004" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
 40 |         <text transform="translate(306.8 347.92008)" fill="black">
 41 |           <tspan font-family="Arial" font-size="11" font-weight="400" fill="black" x="11.753468" y="10">.rst files</tspan>
 42 |         </text>
 43 |       </g>
 44 |       <g id="Graphic_22">
 45 |         <rect x="301.8" y="383.2" width="71.4" height="23.700004" fill="#b2d9ff"/>
 46 |         <rect x="301.8" y="383.2" width="71.4" height="23.700004" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
 47 |         <text transform="translate(306.8 388.87008)" fill="black">
 48 |           <tspan font-family="Arial" font-size="11" font-weight="400" fill="black" x="12.967335" y="10">.pdf file</tspan>
 49 |         </text>
 50 |       </g>
 51 |       <g id="Graphic_21">
 52 |         <rect x="475.7" y="383.2" width="71.4" height="23.700004" fill="#b2d9ff"/>
 53 |         <rect x="475.7" y="383.2" width="71.4" height="23.700004" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
 54 |         <text transform="translate(480.7 388.87008)" fill="black">
 55 |           <tspan font-family="Arial" font-size="11" font-weight="400" fill="black" x="12.967335" y="10">.pdf file</tspan>
 56 |         </text>
 57 |       </g>
 58 |       <g id="Graphic_19">
 59 |         <rect x="240.6" y="265.4" width="71.4" height="23.700004" fill="white"/>
 60 |         <rect x="240.6" y="265.4" width="71.4" height="23.700004" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
 61 |         <text transform="translate(245.6 271.07007)" fill="black">
 62 |           <tspan font-family="Arial" font-size="11" font-weight="400" fill="black" x="13.270802" y="10">publish</tspan>
 63 |         </text>
 64 |       </g>
 65 |       <g id="Graphic_18">
 66 |         <rect x="475.7" y="424.15" width="71.4" height="23.700004" fill="white"/>
 67 |         <rect x="475.7" y="424.15" width="71.4" height="23.700004" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
 68 |         <text transform="translate(480.7 429.8201)" fill="black">
 69 |           <tspan font-family="Arial" font-size="11" font-weight="400" fill="black" x="13.270802" y="10">publish</tspan>
 70 |         </text>
 71 |       </g>
 72 |       <g id="Graphic_16">
 73 |         <rect x="301.8" y="424.15" width="71.4" height="23.700004" fill="white"/>
 74 |         <rect x="301.8" y="424.15" width="71.4" height="23.700004" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
 75 |         <text transform="translate(306.8 429.8201)" fill="black">
 76 |           <tspan font-family="Arial" font-size="11" font-weight="400" fill="black" x="13.270802" y="10">publish</tspan>
 77 |         </text>
 78 |       </g>
 79 |       <g id="Line_15">
 80 |         <line x1="415.05867" y1="244.1" x2="393.32747" y2="261.6882" marker-end="url(#StickArrow_Marker)" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
 81 |       </g>
 82 |       <g id="Line_14">
 83 |         <line x1="368.4564" y1="289.1" x2="345.68047" y2="336.92326" marker-end="url(#StickArrow_Marker)" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
 84 |       </g>
 85 |       <g id="Line_13">
 86 |         <line x1="338.4" y1="277.25" x2="317.9" y2="277.25" marker-end="url(#StickArrow_Marker)" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
 87 |       </g>
 88 |       <g id="Line_10">
 89 |         <line x1="337.5" y1="365.95" x2="337.5" y2="377.3" marker-end="url(#StickArrow_Marker)" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
 90 |       </g>
 91 |       <g id="Line_9">
 92 |         <line x1="511.4" y1="365.95" x2="511.4" y2="377.3" marker-end="url(#StickArrow_Marker)" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
 93 |       </g>
 94 |       <g id="Line_8">
 95 |         <line x1="337.5" y1="406.9" x2="337.5" y2="418.25" marker-end="url(#StickArrow_Marker)" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
 96 |       </g>
 97 |       <g id="Line_7">
 98 |         <line x1="511.4" y1="406.9" x2="511.4" y2="418.25" marker-end="url(#StickArrow_Marker)" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
 99 |       </g>
100 |       <g id="Graphic_6">
101 |         <text transform="translate(354.1416 247.4)" fill="black">
102 |           <tspan font-family="Arial" font-size="11" font-weight="400" fill="black" x="0" y="10">Step 1:</tspan>
103 |         </text>
104 |       </g>
105 |       <g id="Graphic_5">
106 |         <text transform="translate(513.8 248.2)" fill="black">
107 |           <tspan font-family="Arial" font-size="11" font-weight="400" fill="black" x="0" y="10">Step 2:</tspan>
108 |         </text>
109 |       </g>
110 |       <g id="Graphic_4">
111 |         <text transform="translate(301 307)" fill="black">
112 |           <tspan font-family="Arial" font-size="11" font-weight="400" fill="black" x="0" y="10">Step 3:</tspan>
113 |         </text>
114 |       </g>
115 |       <g id="Graphic_3">
116 |         <text transform="translate(257.7 369)" fill="black">
117 |           <tspan font-family="Arial" font-size="11" font-weight="400" fill="black" x="0" y="10">Step 4:</tspan>
118 |         </text>
119 |       </g>
120 |       <g id="Graphic_2">
121 |         <text transform="translate(257.7 412.2)" fill="black">
122 |           <tspan font-family="Arial" font-size="11" font-weight="400" fill="black" x="0" y="10">Step 5:</tspan>
123 |         </text>
124 |       </g>
125 |       <g id="Graphic_26">
126 |         <rect x="443.3" y="266.2" width="71.4" height="23.700004" fill="#b2d9ff"/>
127 |         <rect x="443.3" y="266.2" width="71.4" height="23.700004" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
128 |         <text transform="translate(448.3 271.87007)" fill="black">
129 |           <tspan font-family="Arial" font-size="11" font-weight="400" fill="black" x="4.714649" y="10">.ipynb files</tspan>
130 |         </text>
131 |       </g>
132 |       <g id="Line_27">
133 |         <line x1="442.4556" y1="244.1" x2="461.9219" y2="262.18433" marker-end="url(#StickArrow_Marker)" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
134 |       </g>
135 |       <g id="Graphic_28">
136 |         <rect x="543.9" y="266.2" width="71.4" height="23.700004" fill="white"/>
137 |         <rect x="543.9" y="266.2" width="71.4" height="23.700004" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
138 |         <text transform="translate(548.9 271.87007)" fill="black">
139 |           <tspan font-family="Arial" font-size="11" font-weight="400" fill="black" x="13.270802" y="10">publish</tspan>
140 |         </text>
141 |       </g>
142 |       <g id="Line_29">
143 |         <line x1="514.7" y1="278.05" x2="538" y2="278.05" marker-end="url(#StickArrow_Marker)" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
144 |       </g>
145 |       <g id="Graphic_30">
146 |         <rect x="475.7" y="342.25" width="71.4" height="23.700004" fill="#b2d9ff"/>
147 |         <rect x="475.7" y="342.25" width="71.4" height="23.700004" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
148 |         <text transform="translate(480.7 347.92008)" fill="black">
149 |           <tspan font-family="Arial" font-size="11" font-weight="400" fill="black" x="11.753468" y="10">.rst files</tspan>
150 |         </text>
151 |       </g>
152 |       <g id="Line_31">
153 |         <line x1="484.0485" y1="289.9" x2="504.039" y2="336.82208" marker-end="url(#StickArrow_Marker)" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
154 |       </g>
155 |       <g id="Graphic_32">
156 |         <rect x="389" y="383.2" width="71.4" height="23.700004" fill="#b2d9ff"/>
157 |         <rect x="389" y="383.2" width="71.4" height="23.700004" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
158 |         <text transform="translate(394 388.87008)" fill="black">
159 |           <tspan font-family="Arial" font-size="11" font-weight="400" fill="black" x="7.472706" y="10">.html files</tspan>
160 |         </text>
161 |       </g>
162 |       <g id="Graphic_33">
163 |         <rect x="389" y="424.15" width="71.4" height="23.700004" fill="white"/>
164 |         <rect x="389" y="424.15" width="71.4" height="23.700004" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
165 |         <text transform="translate(394 429.8201)" fill="black">
166 |           <tspan font-family="Arial" font-size="11" font-weight="400" fill="black" x="13.270802" y="10">publish</tspan>
167 |         </text>
168 |       </g>
169 |       <g id="Line_34">
170 |         <line x1="424.7" y1="406.9" x2="424.7" y2="418.25" marker-end="url(#StickArrow_Marker)" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
171 |       </g>
172 |       <g id="Graphic_37">
173 |         <rect x="389" y="306.75" width="71.4" height="23.700004" fill="#b2d9ff"/>
174 |         <rect x="389" y="306.75" width="71.4" height="23.700004" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
175 |         <text transform="translate(394 312.42007)" fill="black">
176 |           <tspan font-family="Arial" font-size="11" font-weight="400" fill="black" x="4.714649" y="10">.ipynb files</tspan>
177 |         </text>
178 |       </g>
179 |       <g id="Line_39">
180 |         <line x1="463.1318" y1="289.9" x2="445.2955" y2="303.21976" marker-end="url(#StickArrow_Marker)" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
181 |       </g>
182 |       <g id="Line_40">
183 |         <line x1="388.60085" y1="289.1" x2="405.6306" y2="303.0166" marker-end="url(#StickArrow_Marker)" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
184 |       </g>
185 |       <g id="Line_41">
186 |         <line x1="424.7" y1="365.95" x2="424.7" y2="377.3" marker-end="url(#StickArrow_Marker)" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
187 |       </g>
188 |       <g id="Graphic_42">
189 |         <rect x="389" y="342.25" width="71.4" height="23.700004" fill="#b2d9ff"/>
190 |         <rect x="389" y="342.25" width="71.4" height="23.700004" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
191 |         <text transform="translate(394 347.92008)" fill="black">
192 |           <tspan font-family="Arial" font-size="11" font-weight="400" fill="black" x="11.753468" y="10">.rst files</tspan>
193 |         </text>
194 |       </g>
195 |       <g id="Line_43">
196 |         <line x1="424.7" y1="330.45" x2="424.7" y2="336.35" marker-end="url(#StickArrow_Marker)" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
197 |       </g>
198 |       <g id="Graphic_44">
199 |         <text transform="translate(410.5208 288.75)" fill="black">
200 |           <tspan font-family="Arial" font-size="11" font-weight="400" fill="black" x="0" y="10">Step 4:</tspan>
201 |         </text>
202 |       </g>
203 |     </g>
204 |   </g>
205 | </svg>
206 | 


--------------------------------------------------------------------------------
/d2lbook/rst.py:
--------------------------------------------------------------------------------
  1 | """utilities to handle rst files"""
  2 | import re
  3 | import logging
  4 | from typing import Dict
  5 | import nbconvert
  6 | import nbformat
  7 | from nbformat import notebooknode
  8 | from d2lbook import notebook
  9 | from d2lbook import common
 10 | from d2lbook import markdown
 11 | 
 12 | def convert_notebook(nb: notebooknode.NotebookNode, resources: Dict[str, str]):
 13 |     nb = _process_nb(nb)
 14 |     writer = nbconvert.RSTExporter()
 15 |     body, resources = writer.from_notebook_node(nb, resources)
 16 |     body = _process_rst(body)
 17 |     return body, resources
 18 | 
 19 | def _process_nb(nb):
 20 |     # add empty lines before and after a mark/fence
 21 |     new_cells = []
 22 |     for cell in nb.cells:
 23 |         if cell.cell_type == 'markdown':
 24 |             md_cells = markdown.split_markdown(cell.source)
 25 |             for i, md_cell in enumerate(md_cells):
 26 |                 if i < len(md_cells) - 1 and md_cells[i+1]['type'] == 'code':
 27 |                     md_cells[i]['source'] += '\n'
 28 |                 if md_cell['type'] == 'markdown':
 29 |                     lines = md_cells[i]['source'].split('\n')
 30 |                     for j, line in enumerate(lines):
 31 |                         m = common.md_mark_pattern.match(line)
 32 |                         if (m is not None
 33 |                             and m[1] not in ('ref', 'numref', 'eqref')
 34 |                             and m.end() == len(line)):
 35 |                             lines[j] = '\n'+line+'\n'
 36 |                     md_cells[i]['source'] = '\n'.join(lines)
 37 |             new_cells.append(nbformat.v4.new_markdown_cell(
 38 |                 markdown.join_markdown_cells(md_cells)))
 39 |         else:
 40 |             new_cells.append(cell)
 41 |     # hide/show
 42 |     hide_all = False
 43 |     for cell in new_cells:
 44 |         if cell.cell_type == 'code':
 45 |             src = cell.source.lower()
 46 |             if '#@hide_all' in src:
 47 |                 hide_all = True
 48 |             if hide_all or '# hide outputs' in src or ('#@hide' in src and '#@hide_code' not in src) or '#@hide_output' in src:
 49 |                 cell.outputs = []
 50 |             if hide_all or '# hide code' in src or ('#@hide' in src  and '#@hide_output' not in src) or '#@hide_code' in src:
 51 |                 cell.source = ''
 52 |     return notebook.create_new_notebook(nb, new_cells)
 53 | 
 54 | def _process_rst(body):
 55 | 
 56 |     def delete_lines(lines, deletes):
 57 |         return [line for i, line in enumerate(lines) if i not in deletes]
 58 |     def indented(line):
 59 |         return line.startswith('   ')
 60 | 
 61 |     def blank(line):
 62 |         return len(line.strip()) == 0
 63 | 
 64 |     def look_behind(i, cond, lines):
 65 |         indices = []
 66 |         while i < len(lines) and cond(lines[i]):
 67 |             indices.append(i)
 68 |             i = i + 1
 69 |         return indices
 70 | 
 71 |     lines = body.split('\n')
 72 | 
 73 |     # Preprocess table directives (later processed by "move .. _label: before a image, a section, or a table" below)
 74 |     #
 75 |     # E.g., 
 76 |     #
 77 |     # :Dataset vs. computer memory and computational power
 78 |     # 
 79 |     # +----------+----------------------------------------+
 80 |     # | Decade   | Dataset                                |
 81 |     # +==========+========================================+
 82 |     # | 1970     | 100 (Iris)                             |
 83 |     # +----------+----------------------------------------+
 84 |     #
 85 |     # Table: label:\ ``tab_intro_decade``
 86 |     #
 87 |     # ->
 88 |     #
 89 |     # .. table:: Dataset vs. computer memory and computational power
 90 |     # 
 91 |     #    +----------+----------------------------------------+
 92 |     #    | Decade   | Dataset                                |
 93 |     #    +==========+========================================+
 94 |     #    | 1970     | 100 (Iris)                             |
 95 |     #    +----------+----------------------------------------+
 96 |     # 
 97 |     # .. _tab_intro_decade:    
 98 |     i, deletes = 0, []
 99 |     while i < len(lines):
100 |         line = lines[i]
101 |         if line.startswith('Table: label:'):
102 |             line_i = 0
103 |             while line_i < len(line) and line[line_i] != '`':
104 |                 line_i += 1
105 |             assert line_i < len(line), "Original table label in rst file is assumed to be like Table: label:\ ``tab_intro_decade``"
106 |             lines[i] = ".. _" + line[line_i+2:-2]  + ":"
107 |             j = i
108 |             directives_in_table_caption = [":cite", ":numref", "eqref"]
109 |             while j > 0 and (not lines[j].startswith(":") or lines[j].startswith(tuple(directives_in_table_caption))):
110 |                 # Add indent for each line that is part of the table
111 |                 if lines[j].startswith("+") or lines[j].startswith("|"):
112 |                     lines[j] = "   " + lines[j]
113 |                 j -= 1
114 |             assert lines[j].startswith(":"), "Original table label in rst file is assumed to be like :Dataset vs. computer memory and computational power"
115 |             lines[j] = ".. table:: " + lines[j][1:]
116 |         i += 1
117 | 
118 |     # deletes: indices of lines to be deleted
119 |     i, deletes = 0, []
120 |     while i < len(lines):
121 |         line = lines[i]
122 |         # '.. code:: toc' -> '.. toctree::', then remove consecutive empty lines
123 |         # after the current line
124 |         if line.startswith('.. code:: toc'):
125 |             # convert into rst's toc block
126 |             lines[i] = '.. toctree::'
127 |             blanks = look_behind(i+1, blank, lines)
128 |             deletes.extend(blanks)
129 |             i += len(blanks)
130 |         # .. code:: eval_rst
131 |         #
132 |         #
133 |         #    .. only:: html
134 |         #
135 |         #       References
136 |         #       ==========
137 |         # ->
138 |         #
139 |         #
140 |         #
141 |         # .. only:: html
142 |         #
143 |         #    References
144 |         #    ==========
145 |         elif line.startswith('.. code:: eval_rst'):
146 |             # make it a rst block
147 |             deletes.append(i)
148 |             j = i + 1
149 |             while j < len(lines):
150 |                 line_j = lines[j]
151 |                 if indented(line_j):
152 |                     lines[j] = line_j[3:]
153 |                     if lines[j].strip().startswith('.. '):
154 |                         lines[j] = '\n'+lines[j].strip()
155 |                 elif not blank(line_j):
156 |                     break
157 |                 j += 1
158 |             i = j
159 |         elif line.startswith('.. code::'):
160 |             # reset LaTeX code-block rendering parameters
161 |             lines[i] = '.. raw:: latex\n\n   \\diilbookstyleinputcell\n\n' + lines[i]
162 |         elif line.startswith('.. parsed-literal::'):
163 |             # reset LaTeX code-block rendering parameters
164 |             lines[i] = '.. raw:: latex\n\n   \\diilbookstyleoutputcell\n\n' + lines[i]
165 |             # add a output class so we can add customized css
166 |             lines[i] += '\n    :class: output'
167 |             i += 1
168 |         # .. figure:: ../img/jupyter.png
169 |         #    :alt: Output after running Jupyter Notebook. The last row is the URL
170 |         #    for port 8888.
171 |         #
172 |         #    Output after running Jupyter Notebook. The last row is the URL for
173 |         #    port 8888.
174 |         #
175 |         # :width:``700px``
176 |         #
177 |         # :label:``fig_jupyter``
178 |         #->
179 |         # .. _fig_jupyter:
180 |         #
181 |         # .. figure:: ../img/jupyter.png
182 |         #    :width: 700px
183 |         #
184 |         #    Output after running Jupyter Notebook. The last row is the URL for
185 |         #    port 8888.
186 |         elif indented(line) and ':alt:' in line:
187 |             # Image caption, remove :alt: block, it cause trouble for long captions
188 |             caps = look_behind(i, lambda l: indented(l) and not blank(l), lines)
189 |             deletes.extend(caps)
190 |             i += len(caps)
191 |         # .. table:: Dataset versus computer memory and computational power
192 |         #    +-...
193 |         #    |
194 |         #    +-...
195 |         #
196 |         # :label:``tab_intro_decade``
197 |         # ->
198 |         # .. _tab_intro_decade:
199 |         #
200 |         # .. table:: Dataset versus computer memory and computational power
201 |         #
202 |         #    +-...
203 |         #    |
204 |         #    +-...
205 |         #
206 |         elif line.startswith('.. table::'):
207 |             # Add indent to table caption for long captions
208 |             caps = look_behind(i+1, lambda l: not indented(l) and not blank(l),
209 |                                lines)
210 |             for j in caps:
211 |                 lines[j] = '   ' + lines[j]
212 |             i += len(caps) + 1
213 |         else:
214 |             i += 1
215 | 
216 |     # change :label:my_label: into rst format
217 |     lines = delete_lines(lines, deletes)
218 |     deletes = []
219 | 
220 |     for i, line in enumerate(lines):
221 |         pos, new_line = 0, ''
222 |         while True:
223 |             match = common.rst_mark_pattern.search(line, pos)
224 |             if match is None:
225 |                 new_line += line[pos:]
226 |                 break
227 |             # e.g., case :math:`x`, :numref:`y`, match[0] = ':math:'
228 |             elif match[2] is None:
229 |                 end = match.end()
230 |                 new_line += line[pos:end]
231 |                 pos = end
232 |                 continue
233 | 
234 |             start, end = match.start(), match.end()
235 |             # e.g., origin=':label:``fig_jupyter``', key='label', value='fig_jupyter'
236 |             origin, key, value = match[0], match[1], match[2]
237 |             assert value.startswith('``') and value.endswith('``'), value
238 |             value = value[2:-2]
239 |             new_line += line[pos:start]
240 |             pos = end
241 |             # assert key in ['label', 'eqlabel', 'ref', 'numref', 'eqref', 'width', 'height', 'citet', 'citep'], 'unknown key: ' + key
242 |             if key == 'label':
243 |                 new_line += '.. _' + value + ':'
244 |             elif key in ['ref', 'numref', 'cite']:
245 |                 new_line += ':'+key+':`'+value+'`'
246 |             elif key == 'citet':
247 |                 new_line += ':cite:t:`'+value+'`'
248 |             elif key == 'citep':
249 |                 new_line += ':cite:p:`'+value+'`'
250 |             elif key == 'eqref':
251 |                 new_line += ':eq:`'+value+'`'
252 |             elif key in ['class', 'func', 'mod']:
253 |                 new_line += ':py:'+key+':`'+value+'`'
254 |             # .. math:: f
255 |             #
256 |             # :eqlabel:``gd-taylor``
257 |             # ->
258 |             # .. math:: f
259 |             #    :label: gd-taylor
260 |             elif key == 'eqlabel':
261 |                 new_line += '   :label: '+value
262 |                 if blank(lines[i-1]):
263 |                     deletes.append(i-1)
264 |             elif key in ['width', 'height']:
265 |                 new_line += '   :'+key+': '+value
266 |             elif key == 'bibliography':
267 |                 # a hard coded plain bibtex style...
268 |                 new_line += ('.. bibliography:: ' + value +
269 |                              '\n   :style: apa')
270 |                              # '\n   :style: apa\n   :all:') MM 20200104 removed ':all:' so only the cited references get printed
271 |             else:
272 |                 logging.fatal(f'unknown key {key}')
273 | 
274 |         lines[i] = new_line
275 |     lines = delete_lines(lines, deletes)
276 | 
277 |     def move(i, j): # move line i to line j
278 |         lines.insert(j, lines[i])
279 |         if i > j:
280 |             del lines[i+1]
281 |         else:
282 |             del lines[i]
283 | 
284 |     # move :width: or :width: just below .. figure::
285 |     for i, line in enumerate(lines):
286 |         if line.startswith('.. figure::'):
287 |             for j in range(i+1, len(lines)):
288 |                 line_j = lines[j]
289 |                 if not indented(line_j) and not blank(line_j):
290 |                     break
291 |                 if line_j.startswith('   :width:') or line_j.startswith('   :height:'):
292 |                     move(j, i+1)
293 | 
294 |     # move .. _label: before a image, a section, or a table
295 |     lines.insert(0, '')
296 |     i = 0
297 |     while i < len(lines):
298 |         line = lines[i]
299 |         if line.startswith('.. _'):
300 |             for j in range(i-1, -1, -1):
301 |                 line_j = lines[j]
302 |                 if (line_j.startswith('.. table:')
303 |                     or line_j.startswith('.. figure:')):
304 |                     move(i, j-1)
305 |                     lines.insert(j-1, '')
306 |                     i += 1  # Due to insertion of a blank line
307 |                     break
308 |                 if (len(set(line_j)) == 1
309 |                     and line_j[0] in ['=','~','_', '-']):
310 |                     k = max(j-2, 0)
311 |                     move(i, k)
312 |                     lines.insert(k, '')
313 |                     i += 1  # Due to insertion of a blank line
314 |                     break
315 |         i += 1
316 | 
317 |     # change .. image:: to .. figure:: to they will be center aligned
318 |     for i, line in enumerate(lines):
319 |         if '.. image::' in line:
320 |             lines[i] = line.replace('.. image::', '.. figure::')
321 | 
322 |     # sometimes the code results contains vt100 codes, widely used for
323 |     # coloring, while it is not supported by latex.
324 |     for i, l in enumerate(lines):
325 |         lines[i] = re.sub(r'\x1b\[[\d;]*K', '',
326 |                           re.sub(r'\x1b\[[\d;]*m', '', l))
327 | 
328 |     return '\n'.join(lines)
329 | 


--------------------------------------------------------------------------------
/d2lbook/notebook.py:
--------------------------------------------------------------------------------
  1 | """utilities to handle notebooks"""
  2 | 
  3 | from typing import Union, List, Optional
  4 | 
  5 | import os
  6 | import copy
  7 | import notedown
  8 | import nbformat
  9 | import nbconvert
 10 | from nbformat import notebooknode
 11 | from d2lbook import markdown
 12 | from d2lbook import common
 13 | from d2lbook import config
 14 | 
 15 | def create_new_notebook(
 16 |         nb: notebooknode.NotebookNode,
 17 |         cells: List[notebooknode.NotebookNode]) -> notebooknode.NotebookNode:
 18 |     """create an empty notebook by copying metadata from nb"""
 19 |     new_nb = copy.deepcopy(nb)
 20 |     new_nb.cells = cells
 21 |     return new_nb
 22 | 
 23 | def read(fname: str):
 24 |     if not os.path.exists(fname) or os.stat(fname).st_size == 0:
 25 |         return None
 26 |     with open(fname, 'r') as f:
 27 |         return nbformat.read(f, as_version=4)
 28 | 
 29 | def read_markdown(source: Union[str, List[str]]) -> notebooknode.NotebookNode:
 30 |     """Returns a notebook from markdown source"""
 31 |     if not isinstance(source, str):
 32 |         source = '\n'.join(source)
 33 |     reader = notedown.MarkdownReader(match='strict')
 34 |     return reader.reads(source)
 35 | 
 36 | def split_markdown_cell(
 37 |         nb: notebooknode.NotebookNode) -> notebooknode.NotebookNode:
 38 |     """split a markdown cell if it contains tab block.
 39 | 
 40 |     a new property `class` is added to the metadata for a tab cell.
 41 |     """
 42 |     # merge continous markdown cells
 43 |     grouped_cells = common.group_list(
 44 |         nb.cells, lambda cell, _: cell.cell_type == 'markdown')
 45 |     new_cells = []
 46 |     for is_md, group in grouped_cells:
 47 |         if not is_md:
 48 |             new_cells.extend(group)
 49 |         else:
 50 |             src = '\n\n'.join(cell.source for cell in group)
 51 |             md_cells = markdown.split_markdown(src)
 52 |             is_tab_cell = lambda cell, _: cell['class'] if (cell[
 53 |                 'type'] == 'markdown' and 'class' in cell) else 'not_tab_cell'
 54 |             grouped_md_cells = common.group_list(md_cells, is_tab_cell)
 55 |             for tab, md_group in grouped_md_cells:
 56 |                 new_cell = nbformat.v4.new_markdown_cell(
 57 |                     markdown.join_markdown_cells(md_group))
 58 |                 if tab != 'not_tab_cell':
 59 |                     assert tab.startswith('`') and tab.endswith('`'), tab
 60 |                     new_cell.metadata['tab'] = [
 61 |                         t.strip() for t in tab[1:-1].split(',')]
 62 |                 new_cells.append(new_cell)
 63 |     new_cells = [cell for cell in new_cells if cell.source]
 64 |     return create_new_notebook(nb, new_cells)
 65 | 
 66 | def get_cell_tab(cell: notebooknode.NotebookNode,
 67 |                  default_tab: str = '') -> List[str]:
 68 |     """Get the cell tab"""
 69 |     if 'tab' in cell.metadata:
 70 |         tab = cell.metadata['tab']
 71 |         return [tab] if type(tab) == str else tab
 72 |     if cell.cell_type != 'code':
 73 |         return []
 74 |     match = (common.source_tab_pattern.search(cell.source) or
 75 |              common.source_tab_pattern_2.search(cell.source))
 76 |     if match:
 77 |         return [tab.strip() for tab in match[1].split(',')]
 78 |     return [default_tab,]
 79 | 
 80 | def get_tab_notebook(nb: notebooknode.NotebookNode, tab: str,
 81 |                      default_tab: str) -> notebooknode.NotebookNode:
 82 |     """Returns a notebook with code/markdown cells that doesn't match tab
 83 |     removed.
 84 | 
 85 |     Return None if no cell matched the tab and nb contains code blocks.
 86 | 
 87 |     A `origin_pos` property is added to the metadata for each cell, which
 88 |     records its position in the original notebook `nb`.
 89 |     """
 90 |     if tab != default_tab:
 91 |         has_tab = False
 92 |         for cell in nb.cells:
 93 |             if tab in get_cell_tab(cell):
 94 |                 has_tab = True
 95 |                 break
 96 |         if not has_tab and any(
 97 |             [cell.cell_type == 'code' for cell in nb.cells]):
 98 |             return None
 99 | 
100 |     matched_tab = False
101 |     required_libs = []
102 |     new_cells = []
103 |     for i, cell in enumerate(nb.cells):
104 |         new_cell = copy.deepcopy(cell)
105 |         new_cell.metadata['origin_pos'] = i
106 |         cell_tab = get_cell_tab(new_cell, default_tab)
107 |         if not cell_tab:
108 |             new_cells.append(new_cell)
109 |         else:
110 |             if cell_tab == ['all'] or tab in cell_tab:
111 |                 # drop the cell contains `%load_ext d2lbook.tab`
112 |                 if '%load_ext d2lbook.tab' in new_cell.source:
113 |                     # Check if chapter specific libraries are required
114 |                     match = common.source_libs_required_pattern.search(new_cell.source)
115 |                     if match:
116 |                         # Extract the content within the square brackets
117 |                         libs_content = match.group(1)
118 |                         # Split the content into individual libraries
119 |                         required_libs = [lib.strip("'") for lib in libs_content.split(", ")]
120 |                     continue
121 |                 new_cell.metadata['tab'] = [tab]
122 |                 matched_tab = True
123 |                 # remove the tab from source
124 |                 lines = new_cell.source.split('\n')
125 |                 for j, line in enumerate(lines):
126 |                     src_tab = (common.source_tab_pattern.search(line) or
127 |                                common.source_tab_pattern_2.search(line))
128 |                     text_tab = common.md_mark_pattern.search(line)
129 |                     if src_tab or (text_tab and (text_tab[1] == 'begin_tab' or
130 |                                                  text_tab[1] == 'end_tab')):
131 |                         del lines[j]
132 | 
133 |                     # TODO, also remove the tailing #@save
134 |                     lines = _clean_if_branches(lines, tab)
135 |                     new_cell.source = '\n'.join(lines)
136 |                 new_cells.append(new_cell)
137 | 
138 |     nb.metadata['required_libs'] = required_libs
139 |     if not matched_tab and any([cell.cell_type == 'code'
140 |                                 for cell in nb.cells]):
141 |         return None
142 |     return create_new_notebook(nb, new_cells)
143 | 
144 | def _clean_if_branches(lines, tab):
145 |     """Handle special if branchs
146 |     """
147 |     #TODO make it more general purpose
148 |     mark = 'tab.selected'
149 |     if not any([mark in l for l in lines]):
150 |         return _clean_if_branches_old(lines, tab)
151 |     # 1 means in a matched if branch,
152 |     # 2 means in a not matched if branch
153 |     # 0 means others
154 |     mode = 0
155 |     indent = 0
156 |     _leading_spaces = lambda l: len(l) - len(l.lstrip())
157 |     new_lines = []
158 |     for i, l in enumerate(lines):
159 |         assert '\t' not in l, 'please use space in ' + l
160 |         if 'if' in l and mark in l:
161 |             mode = 1 if (f'"{tab}"' in l or f"'{tab}'" in l) else 2
162 |             indent = _leading_spaces(l)
163 |             continue
164 |         if mode != 0 and l.strip() != '' and _leading_spaces(l) <= indent:
165 |             # out of the if branch
166 |             mode = 0
167 |         if mode == 0:
168 |             new_lines.append(l)
169 |         if mode == 1:
170 |             new_lines.append(l[4:])
171 |     return new_lines
172 | 
173 | def _clean_if_branches_old(lines, tab):
174 |     """Handle speciall if branchs
175 |     """
176 |     #TODO make it more general purpose
177 |     mark = 'd2l.USE_'
178 |     matched = False
179 |     for l in lines:
180 |         if mark in l:
181 |             matched = True
182 |             break
183 |     if not matched:
184 |         return lines
185 |     # 1 means in a matched if branch,
186 |     # 2 means in a not matched if branch
187 |     # 0 means others
188 |     mode = 0
189 |     indent = 0
190 |     _leading_spaces = lambda l: len(l) - len(l.lstrip())
191 |     new_lines = []
192 |     for i, l in enumerate(lines):
193 |         assert '\t' not in l, 'please use space in ' + l
194 |         if 'if' in l and mark in l:
195 |             mode = 1 if mark+tab.upper() in l else 2
196 |             indent = _leading_spaces(l)
197 |             continue
198 |         if mode != 0 and l.strip() != '' and _leading_spaces(l) <= indent:
199 |             # out of the if branch
200 |             mode = 0
201 |         if mode == 0:
202 |             new_lines.append(l)
203 |         if mode == 1:
204 |             new_lines.append(l[4:])
205 |     return new_lines
206 | 
207 | def _has_output(cell):
208 |     """Return if a cell has output"""
209 |     return 'outputs' in cell and len(cell['outputs'])
210 | 
211 | def merge_tab_notebooks(
212 |     src_notebooks: List[notebooknode.NotebookNode]
213 | ) -> notebooknode.NotebookNode:
214 |     """Merge the tab notebooks into a single one.
215 | 
216 |     The reserved function of get_tab_notebook.
217 |     """
218 |     n = max([
219 |         max([cell.metadata['origin_pos'] for cell in nb.cells])
220 |         for nb in src_notebooks])
221 |     new_cells = [[] for _ in range(n + 1)]  # type: ignore
222 | 
223 |     # for compatability
224 |     tab_list = lambda tab: [tab] if type(tab) == str else tab
225 |     for nb in src_notebooks:
226 |         for cell in nb.cells:
227 |             cell = copy.deepcopy(cell)
228 |             p = cell.metadata['origin_pos']
229 |             if len(new_cells[p]):
230 |                 if _has_output(new_cells[p][-1]) or _has_output(
231 |                         cell) or new_cells[p][-1].source != cell.source:
232 |                     new_cells[p].append(cell)
233 |                 else:
234 |                     if 'tab' in cell.metadata:
235 |                         tab = tab_list(new_cells[p][-1].metadata['tab'])
236 |                         tab.extend(tab_list(cell.metadata['tab']))
237 |                         new_cells[p][-1].metadata['tab'] = tab
238 |             else:
239 |                 new_cells[p].append(cell)
240 |     expanded_cells = []
241 |     for cell in new_cells:
242 |         expanded_cells.extend(cell)
243 |     return create_new_notebook(src_notebooks[0], expanded_cells)
244 | 
245 | def _get_tab_bar(tabs, tab_id, default_tab, div_class=''):
246 |     code = f"```eval_rst\n\n.. raw:: html\n\n    <div class=\"mdl-tabs mdl-js-tabs mdl-js-ripple-effect\"><div class=\"mdl-tabs__tab-bar {div_class}\">"
247 |     for i, tab in enumerate(tabs):
248 |         active = 'is-active' if tab == default_tab else ''
249 |         code += f'<a href="#{tab}-{tab_id}-{i}" onclick="tagClick(\'{tab}\'); return false;" class="mdl-tabs__tab {active}">{tab}</a>'
250 |     code += "</div>\n```"
251 |     return nbformat.v4.new_markdown_cell(code)
252 | 
253 | def _get_tab_panel(cells, tab, tab_id, default_tab):
254 |     active = 'is-active' if tab == default_tab else ''
255 |     tab_panel_begin = nbformat.v4.new_markdown_cell(
256 |         f"```eval_rst\n.. raw:: html\n\n    <div class=\"mdl-tabs__panel {active}\" id=\"{tab}-{tab_id}\">\n```"
257 |     )
258 |     tab_panel_end = nbformat.v4.new_markdown_cell(
259 |         "```eval_rst\n.. raw:: html\n\n    </div>\n```")
260 |     return [tab_panel_begin, *cells, tab_panel_end]
261 | 
262 | def _merge_tabs(nb: notebooknode.NotebookNode, tabs: List[str]):
263 |     """merge side-by-side tabs into a single one.
264 | 
265 |     Returns a list of item, an item can be (False, a list of not-in-tab-cell) or
266 |     (True, a list of (tab_name, a list of cell-in-this-tab))
267 |     """
268 |     tab_status = lambda cell, _: 1 if get_cell_tab(cell) else 0
269 |     cell_groups = common.group_list(nb.cells, tab_status)
270 |     new_groups = []
271 |     for in_tab, cells in cell_groups:
272 |         if not in_tab:
273 |             new_groups.append((False, cells))
274 |             continue
275 |         # a special case that we can merge into non-tab cells
276 |         mergable = True
277 |         for cell in cells:
278 |             if set(cell.metadata['tab']) != set(tabs):
279 |                 mergable = False
280 |                 break
281 |         if mergable:
282 |             new_groups.append((False, cells))
283 |             continue
284 |         # the general case
285 |         group_dict = {tab: [] for tab in tabs}  # type: ignore
286 |         for cell in cells:
287 |             for tab in cell.metadata['tab']:
288 |                 group_dict[tab].append(cell)
289 |         group = [
290 |             (tab, group_dict[tab]) for tab in tabs if len(group_dict[tab])]
291 |         new_groups.append((True, group))
292 |     # merge two consecutive code blocks. The first
293 |     # code should not contain output
294 |     for is_tab, group in new_groups:
295 |         if not is_tab:
296 |             continue
297 |         for i, (tab, tab_cell) in enumerate(group):
298 |             new_tab_cell = []
299 |             for cell in tab_cell:
300 |                 if (len(new_tab_cell) > 0 and
301 |                         new_tab_cell[-1].cell_type == 'code' and
302 |                         cell.cell_type == 'code' and
303 |                         not _has_output(new_tab_cell[-1])):
304 |                     cell = copy.deepcopy(cell)
305 |                     cell.source = new_tab_cell[-1].source + '\n\n' + cell.source
306 |                     new_tab_cell[-1] = cell
307 |                 else:
308 |                     new_tab_cell.append(cell)
309 |             group[i] = (tab, new_tab_cell)
310 |     return new_groups
311 | 
312 | def add_html_tab(nb: notebooknode.NotebookNode,
313 |                  tabs: List[str]) -> notebooknode.NotebookNode:
314 |     """Add html codes for the tabs"""
315 |     cell_groups = _merge_tabs(nb, tabs)
316 |     all_tabs = common.flatten([[tab for tab, _ in group]
317 |                                for in_tab, group in cell_groups if in_tab])
318 |     # If there is only one tab, assume it's the default tab.
319 |     if len(set(all_tabs)) <= 1:
320 |         return nb
321 |     new_cells = []
322 |     for i, (in_tab, group) in enumerate(cell_groups):
323 |         if not in_tab:
324 |             new_cells.extend(group)
325 |         else:
326 |             cur_tabs = [tab for tab, _ in group]
327 |             div_class = "code"
328 |             for _, cells in group:
329 |                 if cells[0].cell_type != "code":
330 |                     div_class = "text"
331 |             new_cells.append(_get_tab_bar(cur_tabs, i, tabs[0], div_class))
332 |             for j, (tab, cells) in enumerate(group):
333 |                 new_cells.extend(
334 |                     _get_tab_panel(cells, tab, f'{i}-{j}', tabs[0]))
335 |             new_cells.append(
336 |                 nbformat.v4.new_markdown_cell(
337 |                     "```eval_rst\n.. raw:: html\n\n    </div>\n```"))
338 |     return create_new_notebook(nb, new_cells)
339 | 
340 | def get_toc(root, flat=True):
341 |     """return a list of files in the order defined by TOC"""
342 |     subpages = _get_subpages(root)
343 |     res = [root]
344 |     for fn in subpages:
345 |         if flat:
346 |             res.extend(get_toc(fn))
347 |         else:
348 |             res.append(get_toc(fn))
349 |     return res
350 | 
351 | def _get_subpages(input_fn):
352 |     """read toc in input_fn, returns what it contains"""
353 |     subpages = []
354 |     reader = notedown.MarkdownReader()
355 |     with open(input_fn, 'r', encoding='UTF-8') as f:
356 |         nb = reader.read(f)
357 |     for cell in nb.cells:
358 |         if (cell.cell_type == 'code' and 'attributes' in cell.metadata and
359 |                 'toc' in cell.metadata.attributes['classes']):
360 |             for l in cell.source.split('\n'):
361 |                 l = l.strip()
362 |                 if not l.startswith(':'):
363 |                     fn = os.path.join(os.path.dirname(input_fn), l + '.md')
364 |                     if os.path.exists(fn):
365 |                         subpages.append(fn)
366 |     return subpages
367 | 


--------------------------------------------------------------------------------
/d2lbook/library.py:
--------------------------------------------------------------------------------
  1 | """Save codes into library"""
  2 | from typing import List
  3 | from d2lbook import notebook
  4 | from d2lbook import common
  5 | import logging
  6 | import os
  7 | import copy
  8 | import re
  9 | import pathlib
 10 | import ast
 11 | import astor
 12 | from yapf.yapflib.yapf_api import FormatCode
 13 | import isort
 14 | 
 15 | HEADER = '#################   WARNING   ################\n'
 16 | def _write_header(f):
 17 |     f.write(HEADER)
 18 |     f.write('# The below part is generated automatically through:\n')
 19 |     f.write('#    d2lbook build lib\n')
 20 |     f.write('# Don\'t edit it directly\n\n')
 21 | 
 22 | def save_tab(notebooks: List[str], lib_fname: str, tab: str, default_tab: str):
 23 |     logging.info(
 24 |         f'Matching with the pattern: "#@save", searching for tab {tab}')
 25 |     custom_header = []
 26 |     if os.path.exists(lib_fname):
 27 |         with open(lib_fname, 'r') as f:
 28 |             lines = f.readlines()
 29 |         for i, l in enumerate(lines):
 30 |             if l.strip() == HEADER.strip():
 31 |                 custom_header = lines[:i]
 32 |                 break
 33 | 
 34 |     with open(lib_fname, 'w') as f:
 35 |         if custom_header:
 36 |             f.write(''.join(custom_header))
 37 |         _write_header(f)
 38 |         saved = []
 39 |         for nb in notebooks:
 40 |             saved.extend(_save_code(nb, tab=tab, default_tab=default_tab))
 41 |         f.write(_refactor_blocks(saved))
 42 |         logging.info('Saved %d blocks into %s', len(saved), lib_fname)
 43 | 
 44 | def save_version(version: str, version_fn: str):
 45 |     if version and version_fn:
 46 |         with open(version_fn, 'r', encoding='UTF-8') as f:
 47 |             lines = f.read().split('\n')
 48 |         for i, l in enumerate(lines):
 49 |             if '__version__' in l:
 50 |                 lines[i] = f'__version__ = "{version}"'
 51 |                 logging.info(f'save {lines[i]} into {version_fn}')
 52 |         with open(version_fn, 'w') as f:
 53 |             f.write('\n'.join(lines))
 54 | 
 55 | def _save_block(source: str, save_mark: str):
 56 |     if not save_mark: return ''
 57 |     lines = source.splitlines()
 58 |     block = []
 59 |     for i, l in enumerate(lines):
 60 |         m = re.search(f'# *{save_mark}', l)
 61 |         if m:
 62 |             l = l[:m.span()[0]].rstrip()
 63 |             if l: block.append(l)
 64 |             for j in range(i + 1, len(lines)):
 65 |                 l = lines[j]
 66 |                 if not l.startswith(' ') and len(l):
 67 |                     block.append(lines[j])
 68 |                 else:
 69 |                     for k in range(j, len(lines)):
 70 |                         if lines[k].startswith(' ') or not len(lines[k]):
 71 |                             block.append(lines[k])
 72 |                         else:
 73 |                             break
 74 |                     break
 75 |     return format_code('\n'.join(block))
 76 | 
 77 | def _save_code(input_fn, save_mark='@save', tab=None,
 78 |                default_tab=None):
 79 |     """get the code blocks (import, class, def) that will be saved"""
 80 |     with open(input_fn, 'r', encoding='UTF-8') as f:
 81 |         nb = notebook.read_markdown(f.read())
 82 |     if tab:
 83 |         nb = notebook.get_tab_notebook(nb, tab, default_tab)
 84 |         if not nb:
 85 |             return []
 86 |     saved = []
 87 |     for i, cell in enumerate(nb.cells):
 88 |         if cell.cell_type == 'code':
 89 |             block = _save_block(cell.source, save_mark)
 90 |             if block:
 91 |                 label = _find_latest_label(nb.cells[:i-1])
 92 |                 saved.append([block, label, input_fn])
 93 |     return saved
 94 | 
 95 | def _find_latest_label(cells):
 96 |     for cell in reversed(cells):
 97 |         if cell.cell_type == 'markdown':
 98 |             matches = re.findall(common.md_mark_pattern, cell.source)
 99 |             for m in reversed(matches):
100 |                 if m[0] == 'label' and 'sec_' in m[1]:
101 |                     return m[1]
102 |     return ''
103 | 
104 | def _refactor_blocks(saved_blocks):
105 |     # add label into docstring
106 |     for i, (block, label, _) in enumerate(saved_blocks):
107 |         if not label: continue        
108 |         modules = common.split_list(block.split('\n'), lambda l: l.startswith('def') or l.startswith('class'))
109 |         new_block = []
110 |         if modules[0]: new_block.append('\n'.join(modules[0]))
111 |         for m in modules[1:]:
112 |             parts = common.split_list(m, lambda l: '):' in l)
113 |             # find the docstring 
114 |             if len(parts) > 1:
115 |                 docstr = parts[1][1] if len(parts[1]) > 1 else common.head_spaces(m[0]) + '    '
116 |                 loc = f'Defined in :numref:{label}"""'
117 |                 if docstr.lstrip().startswith('"""') and docstr.endswith('"""'):
118 |                     parts[1][1] = docstr[:-3] + f'\n\n{common.head_spaces(docstr)}{loc}'
119 |                 else:
120 |                     parts[1].insert(1, f'{common.head_spaces(docstr)}"""{loc}')
121 |             new_block.append('\n'.join(common.flatten(parts)))                
122 |         saved_blocks[i][0] = '\n'.join(new_block)
123 | 
124 |     # merge @d2l.save_to_class
125 |     new_blocks = []
126 |     class_blocks = {}
127 |     for i, (block, _, _) in enumerate(saved_blocks):
128 |         lines = block.split('\n')
129 |         if lines[0].startswith('class'):
130 |             new_blocks.append(block)
131 |             m = re.search('class +([\w\_]+)', lines[0])
132 |             if m:                 
133 |                 class_blocks[m.groups()[0]] = len(new_blocks) - 1
134 |             continue
135 |         register = '@d2l.add_to_class'
136 |         if register in block:
137 |             parts = common.split_list(lines, lambda x: x.startswith(register))
138 |             if parts[0]:
139 |                 new_blocks.append(parts[0])
140 |             if len(parts) > 1:
141 |                 for p in parts[1:]:
142 |                     m = re.search('\@d2l\.add_to_class\(([\.\w\_]+)\)', p[0])
143 |                     if m:
144 |                         cls = m.groups()[0].split('.')[-1]
145 |                         new_blocks[class_blocks[cls]] += '\n\n' + '\n'.join(['    '+l for l in p[1:]])
146 |                 continue            
147 |         new_blocks.append(block)
148 | 
149 |     return '\n\n'.join(new_blocks)
150 | 
151 | 
152 | def _parse_mapping_config(config: str, split_line=True):
153 |     """Parse config such as: numpy -> asnumpy, reshape, ...
154 |     Return a list of string pairs
155 |     """
156 |     terms = []
157 |     for line in config.splitlines():
158 |         if split_line:
159 |             terms.extend(line.split(','))
160 |         else:
161 |             terms.append(line)
162 |     mapping = []
163 |     for term in terms:
164 |         term = term.strip()
165 |         if not term:
166 |             continue
167 |         if len(term.split('->')) == 2:
168 |             a, b = term.split('->')
169 |             mapping.append((a.strip(), b.strip()))
170 |         else:
171 |             mapping.append((term, term))
172 |     return mapping
173 | 
174 | def node_to_source(node):
175 |     if isinstance(node, ast.Constant):
176 |         return str(node.value)
177 |     return astor.to_source(node).rstrip()
178 | 
179 | def save_alias(tab_lib):
180 |     """Save alias into the library file"""
181 |     alias = ''
182 |     if 'alias' in tab_lib:
183 |         alias += tab_lib['alias'].strip() + '\n'
184 |     if 'lib_name' in tab_lib:
185 |         lib_name = tab_lib["lib_name"]
186 |         if 'simple_alias' in tab_lib:
187 |             mapping = _parse_mapping_config(tab_lib['simple_alias'])
188 |             for a, b in mapping:
189 |                 if a.endswith('('): a = a[:-1]
190 |                 if b.endswith('('): b = b[:-1]
191 |                 alias += f'\n{a} = {lib_name}.{b}'
192 |         if 'fluent_alias' in tab_lib:
193 |             mapping = _parse_mapping_config(tab_lib['fluent_alias'])
194 |             alias += '\n' + '\n'.join([
195 |                 f'{a} = lambda x, *args, **kwargs: x.{b}(*args, **kwargs)'
196 |                 for a, b in mapping])
197 |         if 'args_alias' in tab_lib:
198 |             mapping = _parse_mapping_config(tab_lib['args_alias'], split_line=False)
199 |             for a, b in mapping:
200 |                 alias += f'\ndef {a}:\n    return {b}'
201 |     if alias:
202 |         lib_file = tab_lib['lib_file']
203 |         with open(lib_file, 'a') as f:
204 |             logging.info(
205 |                 f'Wrote {len(alias.splitlines())} alias into {lib_file}')
206 |             f.write('\n\n\n# Alias defined in config.ini\n')
207 |             f.write(alias + '\n\n')
208 | 
209 | def replace_call(source: str, mapping, replace_fn):
210 | 
211 |     matched = False
212 |     for a in mapping:
213 |         if 'd2l.'+a in source:
214 |             matched = True
215 |     if not matched:
216 |         return source
217 |     lines = source.splitlines()
218 |     if lines[0].startswith('%'):
219 |         source = '\n'.join(lines[1:])
220 |     for _ in range(100):  # 100 is a (random) big enough number
221 |         replaced = False
222 |         tree = ast.parse(source)
223 |         for node in ast.walk(tree):
224 |             if (isinstance(node, ast.Call) and
225 |                 isinstance(node.func, ast.Attribute) and
226 |                 isinstance(node.func.value, ast.Name) and
227 |                 node.func.value.id == 'd2l' and
228 |                 node.func.attr in mapping):
229 |                 new_node = replace_fn(node, mapping[node.func.attr])
230 |                 if new_node:
231 |                     source = source.replace(
232 |                         ast.get_source_segment(source, node),
233 |                         new_node if isinstance(new_node, str) else node_to_source(new_node))
234 |                     replaced = True
235 |                     break
236 |         if not replaced:
237 |             break
238 |     if lines[0].startswith('%'):
239 |         source = lines[0] + '\n' + source
240 |     return source
241 | 
242 | 
243 | def replace_fluent_alias(source, fluent_mapping):
244 |     def _replace(node, b):
245 |         return ast.Call(
246 |             ast.Attribute(value=node.args[0], attr=b),
247 |             node.args[1:], node.keywords)
248 |     return replace_call(source, fluent_mapping, _replace)
249 | 
250 | def replace_args_alias(source, args_mapping):
251 |     def _replace(node, b):
252 |         a_args, b = b
253 |         a_kwargs = {a: b for a, b in a_args if not a.startswith('a_')}
254 |         a_args = [a for a, _  in a_args if a.startswith('a_')]
255 |         if len(node.args) != len(a_args):
256 |             return None
257 |         key_value = {a : node_to_source(arg) for arg, a in zip(node.args, a_args)}
258 |         for kw in node.keywords:
259 |             assert kw.arg in a_kwargs, (kw.arg, a_kwargs)
260 |             key_value['='+kw.arg] = '='+node_to_source(kw.value)
261 |         # remove not appeared keywords
262 |         b_call = ast.parse(b).body[0].value
263 |         if isinstance(b_call, ast.Call):
264 |             new_keywords = [kw for kw in b_call.keywords if '='+kw.value.id in key_value]
265 |             b_call.keywords = new_keywords
266 |             b = node_to_source(b_call)
267 |         for k, v in key_value.items():
268 |             b = b.replace(k, v)
269 |         return b
270 |     return replace_call(source, dict(args_mapping), _replace)
271 | 
272 | def call_args(call_str):
273 |     call = ast.parse(call_str).body[0].value
274 |     assert isinstance(call, ast.Call), call_str
275 |     name = call.func.id
276 |     args = [(a.id,None) for a in call.args] + [(k.arg, k.value) for k in call.keywords]
277 |     return name, args
278 | 
279 | def replace_alias(nb, tab_lib):
280 |     nb = copy.deepcopy(nb)
281 |     patterns = []
282 |     fluent_mapping = {}
283 |     args_mapping = {}
284 |     if 'reverse_alias' in tab_lib:
285 |         patterns += _parse_mapping_config(tab_lib['reverse_alias'], split_line=False)
286 |     if 'lib_name' in tab_lib:
287 |         lib_name = tab_lib["lib_name"]
288 |         if 'simple_alias' in tab_lib:
289 |             mapping = _parse_mapping_config(tab_lib['simple_alias'])
290 |             patterns += [(f'd2l.{a}', f'{lib_name}.{b}') for a, b in mapping]
291 |         if 'fluent_alias' in tab_lib:
292 |             fluent_mapping = dict(_parse_mapping_config(tab_lib['fluent_alias']))
293 |         if 'args_alias' in tab_lib:
294 |             for a, b in _parse_mapping_config(tab_lib['args_alias'], split_line=False):
295 |                 name, args = call_args(a)
296 |                 args_mapping[name] = (args, b)
297 | 
298 |     for cell in nb.cells:
299 |         if cell.cell_type == 'code':
300 |             for p, r in patterns:
301 |                 cell.source = cell.source.replace(p, r)
302 |             if fluent_mapping:
303 |                 cell.source = replace_fluent_alias(cell.source, fluent_mapping)
304 |             if args_mapping:
305 |                 cell.source = replace_args_alias(cell.source, args_mapping)
306 |     return nb
307 | 
308 | def format_code(source: str):
309 |     if 'import ' in source:
310 |         config = isort.settings.Config(no_lines_before=[
311 |             isort.settings.FUTURE, isort.settings.STDLIB, isort.settings.
312 |             THIRDPARTY, isort.settings.FIRSTPARTY, isort.settings.LOCALFOLDER])
313 | 
314 |         source = isort.code(source, config=config)
315 | 
316 |     # remove tailing spaces
317 |     source = '\n'.join([l.rstrip() for l in source.split('\n')]).strip()
318 | 
319 |     # Disable yapf, as it doesn't work well for long sentences
320 |     return source
321 | 
322 |     # fix the bug that yapf cannot handle jupyter magic
323 |     for l in source.splitlines():
324 |         if l.startswith('%') or l.startswith('!'):
325 |             return source
326 | 
327 |     # fix the bug that yapf remove the tailling ;
328 |     has_tailling_semicolon = source.rstrip().endswith(';')
329 | 
330 |     style = {
331 |         'DISABLE_ENDING_COMMA_HEURISTIC': True,
332 |         'SPACE_BETWEEN_ENDING_COMMA_AND_CLOSING_BRACKET': False,
333 |         'SPLIT_BEFORE_CLOSING_BRACKET': False,
334 |         'SPLIT_BEFORE_DICT_SET_GENERATOR': False,
335 |         'SPLIT_BEFORE_LOGICAL_OPERATOR': False,
336 |         'SPLIT_BEFORE_NAMED_ASSIGNS': False,
337 |         'COLUMN_LIMIT': 78,
338 |         'BLANK_LINES_AROUND_TOP_LEVEL_DEFINITION': 1,}
339 |     source = FormatCode(source, style_config=style)[0].strip()
340 |     if has_tailling_semicolon: source += ';'
341 |     return source
342 | 
343 | def format_code_nb(nb):
344 |     for cell in nb.cells:
345 |         if cell.cell_type == 'code':
346 |             cell.source = format_code(cell.source)
347 |     return nb
348 | 
349 | 
350 | # DEPRECATED
351 | # def save_file(root_dir: str, nbfile: str):
352 | #     nbfile = pathlib.Path(nbfile)
353 | #     pyfile = root_dir / nbfile.with_suffix('.py')
354 | 
355 | #     with nbfile.open('r') as f:
356 | #         nb = notebook.read_markdown(f.read())
357 | 
358 | #     saved = []
359 | #     save_all = False
360 | #     for cell in nb.cells:
361 | #         if cell.cell_type == 'code':
362 | #             src = cell.source.lstrip()
363 | #             if re.search('# *@save_all', src):
364 | #                 save_all = True
365 | #             if save_all or re.search('# *@save_cell', src):
366 | #                 saved.append(src)
367 | #             else:
368 | #                 blk = _save_block(src, '@save')
369 | #                 if blk:
370 | #                     saved.append(blk)
371 | #     if saved:
372 | #         with pyfile.open('w') as f:
373 | #             f.write(
374 | #                 f'# This file is generated from {str(nbfile)} automatically through:\n'
375 | #             )
376 | #             f.write('#    d2lbook build lib\n')
377 | #             f.write('# Don\'t edit it directly\n\n')
378 | #             for blk in saved:
379 | #                 f.write(blk + '\n\n')
380 | #             logging.info(f'Found {len(saved)} blocks in {str(nbfile)}')
381 | 
382 | # DEPRECATED
383 | # def save_mark(notebooks: List[str], lib_fname: str, save_mark: str):
384 | #     logging.info('Matching with the pattern: "%s"', save_mark)
385 | #     with open(lib_fname, 'w') as f:
386 | #         _write_header(f)
387 | #         lib_name = os.path.dirname(lib_fname)
388 | #         lib_name = lib_name.split('/')[-1]
389 | #         f.write('import sys\n' + lib_name + ' = sys.modules[__name__]\n\n')
390 | 
391 | #         for nb in notebooks:
392 | #             _save_code(nb, f, save_mark=save_mark)
393 | #         logging.info('Saved into %s', lib_fname)
394 | 


--------------------------------------------------------------------------------