├── d2lbook ├── _version.py ├── __init__.py ├── clear.py ├── main.py ├── upload_github.sh ├── rst_test.py ├── sagemaker.py ├── upload_doc_s3.sh ├── resource_test.py ├── tab.py ├── slides_test.py ├── activate.py ├── common.py ├── markdown_test.py ├── library_test.py ├── sphinx.py ├── deploy.py ├── config.py ├── notebook_test.py ├── utils.py ├── config_default.ini ├── markdown.py ├── colab.py ├── translate.py ├── slides.py ├── resource.py ├── rst.py ├── notebook.py └── library.py ├── docs ├── user │ ├── colab.md │ ├── index.md │ ├── code.md │ ├── code_tabs.md │ ├── edit.md │ ├── slides.md │ ├── create.md │ ├── build.md │ ├── deploy.md │ └── markdown.md ├── img │ ├── d2l.png │ ├── catdog.jpg │ ├── d2l-book.png │ ├── favicon.png │ ├── jupyter.png │ ├── koebel.jpg │ ├── s3-acl.png │ ├── build.graffle │ ├── record-set.png │ ├── github_pages.png │ ├── s3-web-hosting.png │ └── multi-lang.svg ├── develop │ ├── index.md │ └── pipeline.md ├── install.md ├── demo.md ├── refs.bib ├── config.ini └── index.md ├── .gitignore ├── README.md ├── Jenkinsfile ├── setup.py ├── scripts └── install_fonts.sh └── LICENSE /d2lbook/_version.py: -------------------------------------------------------------------------------- 1 | __version__ = '1.0.3' 2 | -------------------------------------------------------------------------------- /docs/user/colab.md: -------------------------------------------------------------------------------- 1 | # Colab 2 | :label:`sec_colab` 3 | -------------------------------------------------------------------------------- /d2lbook/__init__.py: -------------------------------------------------------------------------------- 1 | from ._version import __version__ 2 | -------------------------------------------------------------------------------- /docs/img/d2l.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/d2l-book/HEAD/docs/img/d2l.png -------------------------------------------------------------------------------- /docs/img/catdog.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/d2l-book/HEAD/docs/img/catdog.jpg -------------------------------------------------------------------------------- /docs/img/d2l-book.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/d2l-book/HEAD/docs/img/d2l-book.png -------------------------------------------------------------------------------- /docs/img/favicon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/d2l-book/HEAD/docs/img/favicon.png -------------------------------------------------------------------------------- /docs/img/jupyter.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/d2l-book/HEAD/docs/img/jupyter.png -------------------------------------------------------------------------------- /docs/img/koebel.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/d2l-book/HEAD/docs/img/koebel.jpg -------------------------------------------------------------------------------- /docs/img/s3-acl.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/d2l-book/HEAD/docs/img/s3-acl.png -------------------------------------------------------------------------------- /docs/img/build.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/d2l-book/HEAD/docs/img/build.graffle -------------------------------------------------------------------------------- /docs/img/record-set.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/d2l-book/HEAD/docs/img/record-set.png -------------------------------------------------------------------------------- /docs/img/github_pages.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/d2l-book/HEAD/docs/img/github_pages.png -------------------------------------------------------------------------------- /docs/img/s3-web-hosting.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d2l-ai/d2l-book/HEAD/docs/img/s3-web-hosting.png -------------------------------------------------------------------------------- /docs/develop/index.md: -------------------------------------------------------------------------------- 1 | # Development Guide 2 | 3 | Explain how it works. 4 | 5 | 6 | ```toc 7 | pipeline 8 | ``` 9 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /d2lbook.egg-info/ 2 | /dist/ 3 | *.pyc 4 | **/.ipynb_checkpoints/* 5 | /docs/_build/ 6 | .DS_Store 7 | build/ 8 | .mypy_cache 9 | .eggs 10 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # D2L-Book: A Toolkit for Hands-on Books 2 | 3 | This package helps you build and publish **a book with Python code**, or 4 | **Python package documents with tutorials**. The document site is available at 5 | https://book.d2l.ai 6 | -------------------------------------------------------------------------------- /d2lbook/clear.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import shutil 3 | from d2lbook.config import Config 4 | 5 | __all__ = ['clear'] 6 | 7 | def clear(): 8 | config = Config() 9 | build_dir = config.tgt_dir 10 | logging.info('Delete %s', build_dir) 11 | shutil.rmtree(build_dir, ignore_errors=True) 12 | -------------------------------------------------------------------------------- /docs/user/index.md: -------------------------------------------------------------------------------- 1 | # User Guide 2 | 3 | In this part, we will cover 4 | 5 | 1. How to create a project 6 | 1. How to build and deploy the documents 7 | 1. Various configuration options 8 | 1. Markdown and code examples 9 | 10 | ```toc 11 | :maxdepth: 2 12 | 13 | create 14 | edit 15 | build 16 | deploy 17 | markdown 18 | code 19 | code_tabs 20 | slides 21 | colab 22 | ``` 23 | -------------------------------------------------------------------------------- /Jenkinsfile: -------------------------------------------------------------------------------- 1 | stage("Build and Publish") { 2 | node('d2l-worker') { 3 | ws('workspace/d2l-book') { 4 | checkout scm 5 | sh '''set -ex 6 | conda remove -n d2l-book-build --all -y 7 | conda create -n d2l-book-build python=3.9 pip -y 8 | conda activate d2l-book-build 9 | pip install . 10 | python -m unittest d2lbook/*_test.py 11 | # pip install mypy 12 | # mypy --ignore-missing-imports d2lbook/*_test.py 13 | cd docs 14 | rm -rf _build 15 | pip install matplotlib numpy mypy 16 | d2lbook build eval 17 | d2lbook build eval --tab numpy 18 | d2lbook build eval --tab cpython 19 | d2lbook build pdf 20 | d2lbook build html --tab all 21 | ''' 22 | 23 | if (env.BRANCH_NAME == 'master') { 24 | sh '''set -ex 25 | conda activate d2l-book-build 26 | cd docs 27 | d2lbook deploy html pdf 28 | d2lbook clear 29 | ''' 30 | } 31 | } 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /d2lbook/main.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import sys 3 | from d2lbook.build import build 4 | from d2lbook.deploy import deploy 5 | from d2lbook.clear import clear 6 | from d2lbook.activate import activate 7 | from d2lbook.translate import translate 8 | from d2lbook.slides import slides 9 | import logging 10 | 11 | logging.basicConfig(format='[d2lbook:%(filename)s:L%(lineno)d] %(levelname)-6s %(message)s') 12 | logging.getLogger().setLevel(logging.INFO) 13 | 14 | 15 | def main(): 16 | commands = {'build': build, 'deploy':deploy, 'clear':clear, 17 | 'activate':activate, 'translate':translate, 'slides':slides} 18 | parser = argparse.ArgumentParser(description=''' 19 | D2L Book: Publish a book based on Jupyter notebooks. 20 | 21 | Run d2lbook command -h to get the help message for each command. 22 | ''') 23 | parser.add_argument('command', nargs=1, choices=commands.keys()) 24 | args = parser.parse_args(sys.argv[1:2]) 25 | commands[args.command[0]]() 26 | 27 | if __name__ == "__main__": 28 | main() 29 | -------------------------------------------------------------------------------- /docs/install.md: -------------------------------------------------------------------------------- 1 | # Installation 2 | 3 | The `d2lbook` package is tested under macOS and Linux. (You are welcome to 4 | contribute a Windows release). 5 | 6 | First make sure you have [pip](https://pip.pypa.io/en/stable/) available. In 7 | option, we recommend [conda](https://docs.conda.io/en/latest/miniconda.html) for 8 | libraries that `pip` doesn't support. 9 | 10 | Now install the command-line interface. 11 | 12 | ```sh 13 | pip install git+https://github.com/d2l-ai/d2l-book 14 | ``` 15 | 16 | This is a [d2lbook pip package](https://pypi.org/project/d2lbook/), but we 17 | recommend you to install latest version at Github directly since it's under fast 18 | developing. 19 | 20 | To build HTML results, we need [pandoc](https://pandoc.org/). You can install it 21 | through `conda install pandoc`. 22 | 23 | Building the PDF version requires 24 | [LibRsvg](https://wiki.gnome.org/Projects/LibRsvg) to convert your SVG images 25 | (our recommend format), e.g. `conda install librsvg`, and of course, you need to 26 | have a LaTeX distribution, e.g. [Tex Live](https://www.tug.org/texlive/), available, 27 | -------------------------------------------------------------------------------- /docs/demo.md: -------------------------------------------------------------------------------- 1 | # Building this Website 2 | 3 | You may find building this website is a good starting point for your 4 | project. The source codes of this site is available under 5 | [demo/](https://github.com/d2l-ai/d2l-book/tree/master/demo). 6 | 7 | Please make sure you have `git` (e.g. `conda install git`), `numpy` and 8 | `matplotlib` (e.g. `pip install numpy matplotlib`) installed. 9 | The following command will download the source codes, evaluate all notebooks and generate outputs in 10 | `ipynb`, `html` and `pdf` format. 11 | 12 | ```sh 13 | git clone https://github.com/d2l-ai/d2l-book 14 | cd d2l-book/demo 15 | d2lbook build all 16 | ``` 17 | 18 | Once finished, you can check the results in the `_build` folder. For example, this page is in `_build/html/index.html`, the PDF version is at `_build/pdf/d2l-book.pdf`, all evaluated notebooks are under `_build/eval/`. 19 | 20 | You can build a particular format: 21 | 22 | ```sh 23 | d2lbook build eval # evaluate noteboks and save in .ipynb formats 24 | d2lbook build html # build the HTML version 25 | d2lbook build pdf # build the PDF version 26 | ``` 27 | -------------------------------------------------------------------------------- /d2lbook/upload_github.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Upload files into a github repo. 3 | set -e 4 | 5 | if [ $# -ne 3 ]; then 6 | echo "ERROR: needs two arguments. " 7 | echo "Sample usage:" 8 | echo " $0 notebooks d2l-ai/notebooks version" 9 | exit -1 10 | fi 11 | 12 | SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" 13 | IN_DIR="$( cd $1 && pwd )" 14 | REPO=$2 15 | REPO_DIR=${IN_DIR}-git 16 | 17 | # clone the repo, make sure GIT_USERNAME and GIT_PASSWORD have already set 18 | rm -rf ${REPO_DIR} 19 | git clone git@github.com:${REPO}.git ${REPO_DIR} 20 | 21 | # remove all except for README.md and .git. 22 | tmp=$(mktemp -d) 23 | 24 | if [[ -f "${REPO_DIR}/README.md" ]]; then 25 | mv ${REPO_DIR}/README.md $tmp/ 26 | fi 27 | mv ${REPO_DIR}/.git $tmp/ 28 | rm -rf ${REPO_DIR}/* 29 | if [[ -f "$tmp/README.md" ]]; then 30 | mv $tmp/README.md ${REPO_DIR}/ 31 | fi 32 | mv $tmp/.git ${REPO_DIR}/.git 33 | 34 | cp -r ${IN_DIR}/* ${REPO_DIR}/ 35 | 36 | if [ -f ${REPO_DIR}/index.html ]; then 37 | touch ${REPO_DIR}/.nojekyll 38 | fi 39 | 40 | cd ${REPO_DIR} 41 | git config --global push.default simple 42 | git add -f --all . 43 | git diff-index --quiet HEAD || git commit -am "Version $3" 44 | git push origin 45 | -------------------------------------------------------------------------------- /docs/user/code.md: -------------------------------------------------------------------------------- 1 | # Code Cells 2 | :label:`sec_code` 3 | 4 | ## Maximum Line Length 5 | 6 | We recommend you to set the maximum line length to be 78 to avoid automatic line break in PDF. You can enable the Ruler extension in [nbextensions](https://github.com/ipython-contrib/jupyter_contrib_nbextensions) to add visual vertical line in Jupyter when writing codes. 7 | 8 | ```{.python .input} 9 | '-' * 78 10 | ``` 11 | 12 | ## Hide Source and Outputs 13 | 14 | We can hide the source of a code cell by adding a comment line `# Hide 15 | code` in the cell. We can also hide the code cell outputs using `# Hide outputs` 16 | 17 | For example, here is the normal code cell: 18 | 19 | ```{.python .input} 20 | 1+2+3 21 | ``` 22 | 23 | Let's hide the source codes 24 | 25 | ```{.python .input} 26 | # Hide code 27 | 1+2+3 28 | ``` 29 | 30 | Also try hiding the outputs 31 | 32 | ```{.python .input} 33 | # Hide outputs 34 | 1+2+3 35 | ``` 36 | 37 | ## Plotting 38 | 39 | We recommend you to use the `svg` format to plot a figure. For example, the following code configures `matplotlib` 40 | 41 | ```{.python .input n=3} 42 | %matplotlib inline 43 | from IPython import display 44 | from matplotlib import pyplot as plt 45 | import numpy as np 46 | 47 | display.set_matplotlib_formats('svg') 48 | 49 | x = np.arange(0, 10, 0.1) 50 | plt.plot(x, np.sin(x)); 51 | ``` 52 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, find_packages 2 | 3 | with open('d2lbook/_version.py') as ver_file: 4 | exec(ver_file.read()) 5 | 6 | requirements = [ 7 | 'jupyter', 8 | 'regex', 9 | 'sphinx==5.3.0', # >=5.1.1 to enable pre_border-radius in code cells, 6.1.3 shows blank webpages 10 | 'recommonmark', 11 | 'sphinxcontrib-bibtex==2.4.2', # >=2.2 to enable citet and citep 12 | 'pybtex-apa-style', 13 | 'd2l-notedown', 14 | 'mxtheme>=0.3.17', 15 | 'sphinxcontrib-svg2pdfconverter', 16 | 'numpydoc', 17 | 'awscli', 18 | 'gitpython', 19 | 'sphinx_autodoc_typehints', 20 | 'astor', 21 | 'yapf', 22 | 'fasteners', 23 | 'isort' 24 | ] 25 | 26 | setup( 27 | name='d2lbook', 28 | version=__version__, 29 | install_requires=requirements, 30 | python_requires='>=3.8', 31 | author='D2L Developers', 32 | author_email='d2l.devs@gmail.com', 33 | url='https://book.d2l.ai', 34 | description="Create an online book with Jupyter Notebooks and Sphinx", 35 | license='Apache-2.0', 36 | packages=find_packages(), 37 | include_package_data=True, 38 | package_data={'d2lbook':['config_default.ini', 'upload_doc_s3.sh', 'upload_github.sh']}, 39 | entry_points={ 40 | 'console_scripts': [ 41 | 'd2lbook = d2lbook.main:main', 42 | ] 43 | }, 44 | ) 45 | -------------------------------------------------------------------------------- /d2lbook/rst_test.py: -------------------------------------------------------------------------------- 1 | from d2lbook import notebook 2 | from d2lbook import rst 3 | import unittest 4 | import nbconvert 5 | 6 | _markdown_src = r''' 7 | # Test 8 | :label:`test` 9 | 10 | first para 11 | 12 | python is good 13 | 14 | another para 15 | 16 | This is :eqref:`sec_1` 17 | 18 | ```python2 19 | 1+2+3 20 | ``` 21 | 22 | python3 is better 23 | 24 | - here 25 | - haha 26 | 27 | 28 | ```{.input .python} 29 | 1+2+3 30 | ``` 31 | 32 | ```{.input .python} 33 | #@tab python2 34 | 1+2+3 35 | ``` 36 | 37 | ```bash 38 | ```` 39 | aa 40 | ```` 41 | ``` 42 | 43 | ## Section 2 44 | :label:`sec_2` 45 | 46 | ```eval_rst 47 | .. only:: html 48 | 49 | Table of Contents 50 | ----------------- 51 | ``` 52 | 53 | ```toc 54 | :numbered: 55 | :maxdepth: 2 56 | 57 | install 58 | user/index 59 | develop/index 60 | ``` 61 | 62 | ![Estimating the length of a foot](../img/koebel.jpg) 63 | :width:`400px` 64 | 65 | $x=1$, :numref:`sec_2` 66 | ''' 67 | 68 | class TestRst(unittest.TestCase): 69 | 70 | # TODO(mli) add some asserts 71 | def test_convert_notebook(self): 72 | nb = notebook.read_markdown(_markdown_src) 73 | body, _ = rst.convert_notebook(nb, {}) 74 | lines = body.split('\n') 75 | 76 | for l in lines: 77 | if l.startswith(':math:`x=1`'): 78 | self.assertEqual(l, ':math:`x=1`, :numref:`sec_2`') 79 | 80 | -------------------------------------------------------------------------------- /d2lbook/sagemaker.py: -------------------------------------------------------------------------------- 1 | """Integration with Sagemaker""" 2 | import nbformat 3 | from d2lbook import utils 4 | from d2lbook import colab 5 | from d2lbook import notebook 6 | 7 | class Sagemaker(colab.Colab): 8 | def __init__(self, config): 9 | self._valid = config.sagemaker and config.sagemaker['github_repo'] 10 | self.config = config.sagemaker 11 | self._repo, self._libs = colab.parse_repo_lib( 12 | self.config['github_repo'], self.config['libs'], config.library["version"]) 13 | kernel_str = self.config['kernel'] 14 | if ',' not in kernel_str: 15 | self._kernel = {None: kernel_str} 16 | else: 17 | kernel = utils.split_config_str(kernel_str, 2) 18 | self._kernel = {k[0]:k[1] for k in kernel} 19 | 20 | def generate_notebooks(self, eval_dir, sagemaker_dir, tab): 21 | if not self._valid: 22 | return 23 | utils.run_cmd(['rm -rf', sagemaker_dir]) 24 | utils.run_cmd(['cp -r', eval_dir, sagemaker_dir]) 25 | notebooks = utils.find_files('**/*.ipynb', sagemaker_dir) 26 | for fn in notebooks: 27 | nb = notebook.read(fn) 28 | if not nb: 29 | continue 30 | colab.update_notebook_kernel(nb, self._kernel[tab]) 31 | colab.insert_additional_installation(nb, self._libs[tab], self.config['libs_header']) 32 | with open(fn, 'w') as f: 33 | f.write(nbformat.writes(nb)) 34 | -------------------------------------------------------------------------------- /d2lbook/upload_doc_s3.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # Upload a doc folder into a S3 bucket, with text context compressed 4 | # 5 | # Sample Usage: 6 | # 7 | # ./upload_doc_s3.sh build/_build/html/ s3://en.d2l.ai 8 | # 9 | # Requres awscli is installed 10 | 11 | set -ex 12 | 13 | if [ $# -ne 2 ]; then 14 | echo "ERROR: needs two arguments. " 15 | echo "Usage:" 16 | echo " $0 doc_dir s3_bucket" 17 | exit -1 18 | fi 19 | 20 | DIR="$( cd $1 && pwd )" 21 | BUCKET=$2 22 | echo "Upload $DIR to $BUCKET" 23 | 24 | # use a temp workspace, because we need to modify (compress) some files later. 25 | rm -rf ${DIR}_tmp 26 | cp -r ${DIR} ${DIR}_tmp 27 | DIR=${DIR}_tmp 28 | 29 | 30 | find $DIR \( -iname '*.css' -o -iname '*.js' \) -exec gzip -9 -n {} \; -exec mv {}.gz {} \; 31 | 32 | aws s3 sync --exclude '*.*' --include '*.css' \ 33 | --content-type 'text/css' \ 34 | --content-encoding 'gzip' \ 35 | --acl 'public-read' --quiet \ 36 | $DIR $BUCKET 37 | 38 | aws s3 sync --exclude '*.*' --include '*.js' \ 39 | --content-type 'application/javascript' \ 40 | --content-encoding 'gzip' \ 41 | --acl 'public-read' --quiet \ 42 | $DIR $BUCKET 43 | 44 | # use a large expire time for fonts 45 | aws s3 sync --exclude '*.*' --include '*.woff' --include '*.woff2' \ 46 | --expires "$(date -d '+24 months' --utc +'%Y-%m-%dT%H:%M:%SZ')" \ 47 | --acl 'public-read' --quiet \ 48 | $DIR $BUCKET 49 | 50 | aws s3 sync --delete $DIR $BUCKET --acl 'public-read' --quiet 51 | -------------------------------------------------------------------------------- /d2lbook/resource_test.py: -------------------------------------------------------------------------------- 1 | """ 2 | Run multiple instances from terminal to test 3 | 4 | python d2lbook/resource_test.py 5 | """ 6 | from d2lbook import resource 7 | import unittest 8 | import time 9 | import logging 10 | import os 11 | 12 | def _incorrect_code(): 13 | for i in a: 14 | print(i) 15 | 16 | def _runtime_error(): 17 | return 1 / 0 18 | 19 | class TestResource(unittest.TestCase): 20 | def test_get_available_gpus(self): 21 | self.assertGreaterEqual(len(resource.get_available_gpus()), 0) 22 | 23 | def test_gpus(self): 24 | def _job(): 25 | self.assertEqual(len(os.environ['CUDA_VISIBLE_DEVICES']), 1) 26 | time.sleep(1) 27 | 28 | scheduler = resource.Scheduler(num_cpu_workers=2, num_gpu_workers=2) 29 | scheduler.add(1, 1, _job, ()) 30 | scheduler.add(1, 1, _job, ()) 31 | scheduler.run() 32 | 33 | def test_scheduler(self): 34 | scheduler = resource.Scheduler(num_cpu_workers=2, num_gpu_workers=2) 35 | for _ in range(3): 36 | scheduler.add(1, 0, time.sleep, (2,)) 37 | scheduler.add(1, 1, _incorrect_code, ()) 38 | scheduler.add(1, 2, _runtime_error, ()) 39 | scheduler.run() 40 | self.assertEqual(len(scheduler.failed_tasks), 2) 41 | logging.info(scheduler.error_message) 42 | 43 | if __name__ == '__main__': 44 | logging.basicConfig( 45 | format='[d2lbook:%(filename)s:L%(lineno)d] %(levelname)-6s %(message)s' 46 | ) 47 | logging.getLogger().setLevel(logging.INFO) 48 | unittest.main() 49 | -------------------------------------------------------------------------------- /d2lbook/tab.py: -------------------------------------------------------------------------------- 1 | from IPython.core.magic import Magics, magics_class, cell_magic 2 | import os 3 | import sys 4 | 5 | _TAB = None 6 | _LOG = sys.stderr.write 7 | 8 | # the tab selected last time 9 | _LAST_TAB = None 10 | _LAST_TAB_FILE = '/tmp/d2lbook_last_selected_tab' 11 | if os.path.exists(_LAST_TAB_FILE): 12 | with open(_LAST_TAB_FILE) as f: 13 | _LAST_TAB = f.read().strip() 14 | 15 | def select_tab(tab=_LAST_TAB): 16 | _LOG(f'Selected tab "{tab}", all other code cells not marked as "{tab}" will be ignored in execution.\n') 17 | _LOG(f'This code block will be deleted during build.') 18 | sys.modules[__name__]._TAB = tab 19 | if tab: 20 | with open(_LAST_TAB_FILE, 'w') as f: 21 | f.write(tab+'\n') 22 | 23 | def interact_select(*tabs): 24 | if len(tabs) == 1 and isinstance(tabs[0], (list, tuple)): 25 | tabs = tabs[0] 26 | from ipywidgets import interact 27 | interact(select_tab, tab=list(tabs)) 28 | 29 | def selected(*tabs): 30 | if len(tabs) == 1 and isinstance(tabs[0], (list, tuple)): 31 | tabs = tabs[0] 32 | return _TAB in tabs 33 | 34 | @magics_class 35 | class Tab(Magics): 36 | @cell_magic 37 | def tab(self, line, cell): 38 | tabs = [tab.strip() for tab in line.strip().split(',')] 39 | if _TAB in tabs or 'all' in tabs: 40 | self.shell.run_cell(cell) 41 | else: 42 | _LOG(f'Ignored to run as it is not marked as a "{_TAB}" cell.') 43 | 44 | 45 | def load_ipython_extension(ipython): 46 | ipython.run_cell('from d2lbook import tab') 47 | ipython.register_magics(Tab) 48 | -------------------------------------------------------------------------------- /docs/develop/pipeline.md: -------------------------------------------------------------------------------- 1 | # Build pipeline 2 | 3 | 4 | ![](../img/build.svg) 5 | 6 | The source files are markdown files. They are either purely markdown files or 7 | juypyter notebooks saved in the markdown format with output removed. For the 8 | latter, we may use Jupyter to edit them directly with the `notedown` plugin and 9 | then run "Kernel -> Restart & Clear Output" before committing. 10 | 11 | Then our building pipeline runs the following steps to publish the artifacts. 12 | 13 | 1. Convert .md files into .ipynb files and evaluate each of them. The reason that 14 | we use .md file as source format is because it's easy to review the source 15 | changes. We evaluate every time to guarantee every notebook is 16 | executable. This evaluation step may be time consuming, we can 17 | - Assume every notebook can be executed in 10 minutes, we may use multiple 18 | GPUs to accelerate the execution 19 | - If the source .md file hasn't change since last evaluation, we can reuse 20 | the cached .ipynb file to avoid execution again. 21 | - We use multiple processes to run notebooks in parallel. 22 | 1. The .ipynb files with outputs can be uploaded to Github directly so users can 23 | clone it to run them locally or on the cloud. Also we zip all files so users 24 | can download it easily 25 | 1. These .ipynb files are then converted to .rst files with format compatible to 26 | Sphinx. Additional preprocessing steps are used for image/table/citation 27 | references. 28 | 1. Use Sphinx to build .html and .pdf files 29 | 1. Publish all .html/.pdf/.zip files online, such as into an AWS S3 bucket. 30 | 31 | ## Multiple Implementations 32 | 33 | ![](../img/multi-lang.svg) -------------------------------------------------------------------------------- /docs/refs.bib: -------------------------------------------------------------------------------- 1 | @inproceedings{krizhevsky2012imagenet, 2 | title={Imagenet classification with deep convolutional neural networks}, 3 | author={Krizhevsky, Alex and Sutskever, Ilya and Hinton, Geoffrey E}, 4 | booktitle={Advances in neural information processing systems}, 5 | pages={1097--1105}, 6 | year={2012} 7 | } 8 | 9 | @inproceedings{he2016deep, 10 | title={Deep residual learning for image recognition}, 11 | author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian}, 12 | booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition}, 13 | pages={770--778}, 14 | year={2016} 15 | } 16 | 17 | @article{devlin2018bert, 18 | title={Bert: Pre-training of deep bidirectional transformers for language understanding}, 19 | author={Devlin, Jacob and Chang, Ming-Wei and Lee, Kenton and Toutanova, Kristina}, 20 | journal={arXiv preprint arXiv:1810.04805}, 21 | year={2018} 22 | } 23 | 24 | @TechReport{mitchell80, 25 | author = "T. M. Mitchell", 26 | title = "The Need for Biases in Learning Generalizations", 27 | institution = "Computer Science Department, Rutgers University", 28 | year = "1980", 29 | address = "New Brunswick, MA", 30 | } 31 | 32 | @InCollection{Newell81, 33 | author = "A. Newell and P. S. Rosenbloom", 34 | title = "Mechanisms of Skill Acquisition and the Law of 35 | Practice", 36 | booktitle = "Cognitive Skills and Their Acquisition", 37 | pages = "1--51", 38 | publisher = "Lawrence Erlbaum Associates, Inc.", 39 | year = "1981", 40 | editor = "J. R. Anderson", 41 | chapter = "1", 42 | address = "Hillsdale, NJ", 43 | eprint = {arXiv:1510.01797}, 44 | } 45 | -------------------------------------------------------------------------------- /d2lbook/slides_test.py: -------------------------------------------------------------------------------- 1 | from d2lbook import slides, notebook, common 2 | import unittest 3 | import time 4 | import logging 5 | import os 6 | 7 | # from docs/user/slides.md 8 | 9 | _md = '''# Data Manipulation 10 | 11 | ## Getting Started 12 | 13 | To start, we can use `arange` to create a row vector `x` 14 | containing the first 12 integers starting with 0, 15 | though they are created as floats by default. 16 | 17 | (**A tensor represents a (possibly multi-dimensional) array of numerical values. We can access a tensor's *shape*.**) 18 | 19 | 20 | ```{.python .input} 21 | import numpy as np 22 | 23 | x = np.arange(12) 24 | x 25 | ``` 26 | 27 | [**Many**] more (**operations can be applied elementwise,**) 28 | including unary operators like exponentiation. 29 | (~~e.g. `exp`~~) 30 | 31 | ```{.python .input} 32 | np.exp(x) 33 | ``` 34 | 35 | (**Even when shapes differ, we can still perform elementwise operations**) 36 | by invoking the *broadcasting mechanism*. 37 | 38 | 39 | ```{.python .input} 40 | a = np.arange(3).reshape(3, 1) 41 | b = np.arange(2).reshape(1, 2) 42 | a, b 43 | ``` 44 | ''' 45 | 46 | class TestSlides(unittest.TestCase): 47 | def test_match_pairs(self): 48 | matched = slides._match_slide_marks(_md) 49 | common.print_list(matched) 50 | self.assertEqual(len(matched), 5) 51 | 52 | def test_generate_slides(self): 53 | nb = notebook.read_markdown(_md) 54 | nb = slides._generate_slides(nb) 55 | common.print_list(nb.cells) 56 | self.assertEqual(len(nb.cells), 6) 57 | 58 | def test_remove_slide_marks(self): 59 | nb = notebook.read_markdown(_md) 60 | nb = slides.remove_slide_marks(nb) 61 | common.print_list(nb.cells) 62 | 63 | if __name__ == '__main__': 64 | unittest.main() 65 | -------------------------------------------------------------------------------- /d2lbook/activate.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | from d2lbook import markdown, common, config 3 | import glob 4 | import re 5 | import sys 6 | 7 | __all__ = ['activate'] 8 | 9 | commands = ['tab'] 10 | 11 | def activate(): 12 | parser = argparse.ArgumentParser(description='Activate tabs') 13 | parser.add_argument('tab', default='all', help='the tab to activate') 14 | parser.add_argument('filename', nargs='+', help='the markdown files to activate') 15 | args = parser.parse_args(sys.argv[2:]) 16 | 17 | cf = config.Config() 18 | for fn in args.filename: 19 | for f in glob.glob(fn): 20 | _activate_tab(f, args.tab, cf.default_tab) 21 | 22 | _tab_re = re.compile('# *@tab +([\w]+)') 23 | 24 | def _get_cell_tab(cell, default_tab): 25 | if cell['type'] != 'code': 26 | return [] 27 | if not '.input' in cell['class'] and not 'python' in cell['class']: 28 | return [] 29 | match = common.source_tab_pattern.search(cell['source']) 30 | if match: 31 | return [tab.strip() for tab in match[1].split(',')] 32 | return [default_tab] 33 | 34 | def _activate_tab(filename, tab, default_tab): 35 | if tab == 'default': 36 | tab = default_tab 37 | with open(filename, 'r') as f: 38 | src = f.read() 39 | cells = markdown.split_markdown(src) 40 | for cell in cells: 41 | cell_tab = _get_cell_tab(cell, default_tab) 42 | if not cell_tab: 43 | continue 44 | if tab == 'all' or cell_tab == ['all'] or tab in cell_tab: 45 | # activate 46 | cell['class'] = '{.python .input}' 47 | else: # disactivate 48 | cell['class'] = 'python' 49 | src = markdown.join_markdown_cells(cells) 50 | with open(filename, 'w') as f: 51 | f.write(src) 52 | -------------------------------------------------------------------------------- /docs/user/code_tabs.md: -------------------------------------------------------------------------------- 1 | # Group Code Blocks into Tabs 2 | 3 | Here is an example showing grouping code blocks into three tabs. 4 | 5 | ## Example 6 | 7 | Let's implement $a+b$. We first show instructions, then demonstrate the codes. 8 | 9 | :begin_tab:`python` 10 | You need to have python installed 11 | 12 | :end_tab: 13 | 14 | :begin_tab:`numpy` 15 | You can install numpy by 16 | ```bash 17 | pip install numpy 18 | ``` 19 | :end_tab: 20 | 21 | :begin_tab:`cpython` 22 | Please install cpython 23 | :end_tab: 24 | 25 | 26 | ```{.python .input} 27 | a = [1,1,1] 28 | b = [2,2,2] 29 | [ia+ib for ia, ib in zip(a,b)] 30 | ``` 31 | 32 | ```{.python .input} 33 | #@tab numpy 34 | import numpy as np 35 | a = np.ones(3) 36 | b = np.ones(3)*2 37 | a + b 38 | ``` 39 | 40 | ```{.python .input} 41 | #@tab cpython 42 | # Just a place holder 43 | print(1+2) 44 | ``` 45 | 46 | Next let's implement $a - b$ 47 | 48 | ```{.python .input} 49 | a = [1,1,1] 50 | b = [2,2,2] 51 | [ia-ib for ia, ib in zip(a,b)] 52 | ``` 53 | 54 | ```{.python .input} 55 | #@tab numpy 56 | a = np.ones(3) 57 | b = np.ones(3)*2 58 | a - b 59 | ``` 60 | 61 | ## Usages 62 | 63 | To enable multi-tabs, first configure the `tabs` entry in the `config.ini` file. For example, here we use `tabs = python, numpy, cpython`. `python` is the default tab. To specify a code block that doesn't belong to the default tab, add `#@tab`, followed by the tab name (case insensitive), in the first line of the code block. 64 | 65 | Sometimes these codes blocks conflict with each others. We can activate one tab at a time, so only code blocks belong to this tab can be evaluated in Jupyter. For example 66 | 67 | ```bash 68 | d2lbook activate default user/code_tabs.md # activate the default tab 69 | d2lbook activate numpy user/code_tabs.md # activate the numpy tab 70 | d2lbook activate all user/code_tabs.md # activate all tabs 71 | ``` 72 | -------------------------------------------------------------------------------- /docs/user/edit.md: -------------------------------------------------------------------------------- 1 | # Editing Source Files 2 | 3 | No matter whether it is a pure text file or a Jupyter notebook, we recommend that you save it as a markdown file. If it is a notebook, you can clear output before saving to make code review and version control easier. 4 | 5 | You can use your favorite markdown editors, e.g. [Typora](https://www.typora.io/), to edit markdown files directly. We enhanced markdown to support additional feature such as image/table captions and references, please refer to :numref:`sec_markdown` for more details. For a notebook, a Jupyter source code block is placed in a markdown code block with a `{.python .input}` tag, for example, 6 | 7 | ```` 8 | ```{.python .input} 9 | print('this is a Jupyter code cell') 10 | ``` 11 | ```` 12 | 13 | Another way we recommend is using Jupyter to edit markdown files directly, especially when they contain source code blocks. Jupyter's default file format is `ipynb`. We can use the `notedown` plugin to have Jupyter open and save markdown files. 14 | 15 | You can install this extension by 16 | 17 | ```bash 18 | pip install mu-notedown 19 | ``` 20 | 21 | (`mu-notedown` is a fork of [notedown](https://github.com/aaren/notedown) with several modifications. You may need to uninstall the original `notedown` first.) 22 | 23 | To turn on the `notedown` plugin by default whenever you run Jupyter Notebook do the following: First, generate a Jupyter Notebook configuration file (if it has already been generated, you can skip this step). 24 | 25 | ```bash 26 | jupyter notebook --generate-config 27 | ``` 28 | 29 | 30 | Then, add the following line to the end of the Jupyter Notebook configuration file (for Linux/macOS, usually in the path `~/.jupyter/jupyter_notebook_config.py`): 31 | 32 | ```bash 33 | c.NotebookApp.contents_manager_class = 'notedown.NotedownContentsManager' 34 | ``` 35 | 36 | 37 | Next restart your Jupyter, you should be able to open these markdowns in Jupyter as notebooks now. 38 | 39 | ![Use Jupyter to edit :numref:`sec_create`](../img/jupyter.png) 40 | :width:`500px` 41 | -------------------------------------------------------------------------------- /docs/config.ini: -------------------------------------------------------------------------------- 1 | # A demo for d2l-book. 2 | [project] 3 | 4 | # The project name, used as the filename of the package and the PDF file. For 5 | # example, if set to d2l-book, then will build d2l-book.zip and d2l-book.pdf 6 | name = d2l-book 7 | 8 | # All author names 9 | author = Mu Li 10 | 11 | release = 0.1.17 12 | 13 | [html] 14 | 15 | # A list of links that is displayed on the navbar. A link consists of three 16 | # items: name, URL, and a fontawesome icon 17 | # (https://fontawesome.com/icons?d=gallery). Items are separated by commas. 18 | header_links = PDF, https://book.d2l.ai/d2l-book.pdf, fas fa-file-pdf, 19 | Github, https://github.com/d2l-ai/d2l-book, fab fa-github 20 | 21 | # The filename of the favicon 22 | favicon = img/favicon.png 23 | 24 | html_logo = img/d2l-book.png 25 | 26 | [pdf] 27 | 28 | latex_logo = img/d2l.png 29 | 30 | bibfile = refs.bib 31 | 32 | [build] 33 | 34 | # A list of wildcards to indicate the markdown files that need to be evaluated as 35 | # Jupyter notebooks. 36 | notebooks = *.md */*.md 37 | 38 | 39 | # A list of files, if anyone is modified after the last build, will re-build all 40 | # documents. 41 | dependences = config.ini 42 | 43 | # A list of files that will be copied to the build folder. 44 | resources = img/ refs.bib 45 | 46 | # Specify the tabs, separated by ",". The first one will be the default tab. 47 | tabs = python, numpy, cpython 48 | 49 | [deploy] 50 | 51 | 52 | # Tracking ID for the HTML pages 53 | google_analytics_tracking_id = UA-96378503-15 54 | 55 | # The S3 bucket that all files will copy to 56 | s3_bucket = s3://book.d2l.ai 57 | 58 | # [colab] 59 | 60 | # # The github repo to host the notebooks for colab, such as d2l-ai/d2l-book-colab 61 | # # Also make sure that the machine's ssh key is added to github before running 62 | # # "deploy" so that it can commit into d2l-ai/d2l-book-colab 63 | # github_repo = d2l-ai/d2l-book-colab 64 | 65 | # # Colab cannot display SVG files with a relative fname or a github URL. You can 66 | # # replace it with your website URL. For example: img, http://book.d2l.ai/_images 67 | # # will "img/test.svg" to "http://book.d2l.ai/_images/test.svg" 68 | # replace_svg_url = img, http://book.d2l.ai/_images 69 | 70 | 71 | # [sagemaker] 72 | # github_repo = d2l-ai/d2l-book-sagemaker 73 | -------------------------------------------------------------------------------- /d2lbook/common.py: -------------------------------------------------------------------------------- 1 | import re 2 | from typing import Optional, List, Any, Callable, Tuple 3 | 4 | # Our special mark in markdown, e.g. :label:`chapter_intro` 5 | md_mark_pattern = re.compile(':([-\/\\._\w]+):(`[\ \*-\/\\\._\w]+`)?') 6 | # Same for md_mark_pattern, but for rst files 7 | rst_mark_pattern = re.compile(':([-\/\\._\w]+):(``[\ \*-\/\\\._\w]+``)?') 8 | # The source code tab mark 9 | source_tab_pattern = re.compile('# *@tab +([\w\,\ ]+)') 10 | source_tab_pattern_2 = re.compile('%%tab +([\w\,\ ]+)') 11 | # Pattern to match notebook specific required libraries 12 | source_libs_required_pattern = re.compile(r"# *required_libs\((.*?)\)") 13 | 14 | # Markdown code fence 15 | md_code_fence = re.compile('(```+) *(.*)') 16 | 17 | def split_list(list_obj: List[Any], split_fn: Callable[[Any], Any]) -> List[List[Any]]: 18 | """Cut a list into multiple parts when fn returns True""" 19 | prev_pos = 0 20 | ret = [] 21 | for i, item in enumerate(list_obj): 22 | if split_fn(item): 23 | ret.append(list_obj[prev_pos:i]) 24 | prev_pos = i 25 | ret.append(list_obj[prev_pos:]) 26 | return ret 27 | 28 | def group_list( 29 | list_obj: List[Any], 30 | status_fn: Callable[[Any, Any], Any]) -> List[Tuple[Any, List[Any]]]: 31 | """Cut a list into multiple parts when based on the value returned by status_fn""" 32 | prev_status = None 33 | prev_pos = 0 34 | ret = [] 35 | for i, item in enumerate(list_obj): 36 | cur_status = status_fn(item, prev_status) 37 | if prev_status is not None and cur_status != prev_status: 38 | ret.append((prev_status, list_obj[prev_pos:i])) 39 | prev_pos = i 40 | prev_status = cur_status 41 | ret.append((cur_status, list_obj[prev_pos:])) 42 | return ret 43 | 44 | def head_spaces(line: str): 45 | """"Return the head spaces.""" 46 | return line[: len(line)-len(line.lstrip())] 47 | 48 | def flatten(x): 49 | """flatten a list of lists into a list.""" 50 | return [item for sublist in x for item in sublist] 51 | 52 | def print_list(x): 53 | print(f'len: {len(x)}') 54 | for i, y in enumerate(x): 55 | print(f'{i}\t{y}') 56 | 57 | def print_dict(x): 58 | print(f'len: {len(x)}') 59 | for k in x: 60 | print(f'{k}\t{x[k]}') 61 | -------------------------------------------------------------------------------- /d2lbook/markdown_test.py: -------------------------------------------------------------------------------- 1 | from d2lbook import markdown, common 2 | import unittest 3 | 4 | _markdown_src = r'''# Test 5 | 6 | first para 7 | 8 | :begin_tab:`python2` 9 | python is good 10 | :end_tab: 11 | 12 | another para 13 | 14 | :eqref:`sec_1` 15 | 16 | :begin_tab:`python 3` 17 | python3 is better 18 | 19 | ```python 3 20 | print(3) 21 | ``` 22 | :end_tab: 23 | 24 | ````bash 25 | ```bash 26 | $ ls 27 | ``` 28 | ```` 29 | ''' 30 | 31 | _markdown_text_src = r'''# Test 32 | :label:`sec` 33 | 34 | THis is good. A paragraph. 35 | 36 | ![Image](../a.png) 37 | :label:`a.png` 38 | 39 | Assume A 40 | 41 | $$ 42 | X^{(N)} = \sum_{i=1}^N X_i. 43 | $$ 44 | :label:`adsf` 45 | 46 | and 47 | 48 | $$\|\boldsymbol{x}\|_2 = \sqrt{\sum_{i=1}^n x_i^2}.$$ 49 | 50 | Here is a list 51 | - sadf 52 | wer 53 | - asdf sadf 54 | sd sdf 55 | - asdf 56 | 57 | 1. wer asdf 58 | asdf asdf 59 | 60 | 1. Run the code in this section. Change the conditional statement `x == y` in this section to `x < y` or `x > y`, and then see what kind of tensor you can get. 61 | 1. Replace the two tensors that operate by element in the broadcasting mechanism with other shapes, e.g., 3-dimensional tensors. Is the result the same as expected? 62 | ''' 63 | 64 | class TestMarkdown(unittest.TestCase): 65 | 66 | def test_split(self): 67 | cells = markdown.split_markdown(_markdown_src) 68 | self.assertEqual(len(cells), 5) 69 | self.assertEqual(cells[0]['type'], 'markdown') 70 | self.assertEqual(cells[1]['type'], 'markdown') 71 | self.assertEqual(cells[1]['class'], '`python2`') 72 | self.assertEqual(cells[3]['class'], '`python 3`') 73 | self.assertEqual(cells[4]['class'], 'bash') 74 | 75 | def test_merge(self): 76 | cells = markdown.split_markdown(_markdown_src) 77 | src = markdown.join_markdown_cells(cells) 78 | self.assertEqual(_markdown_src, src) 79 | 80 | def test_split_text(self): 81 | cells = markdown.split_text(_markdown_text_src) 82 | common.print_list(cells) 83 | 84 | def test_join_text(self): 85 | cells = markdown.split_text(_markdown_text_src) 86 | src = markdown.join_text(cells) 87 | self.assertEqual(_markdown_text_src, src) 88 | 89 | 90 | if __name__ == '__main__': 91 | unittest.main() 92 | -------------------------------------------------------------------------------- /docs/index.md: -------------------------------------------------------------------------------- 1 | # D2L-Book: A Toolkit for Hands-on Books 2 | 3 | The D2L Book (`d2lbook`) package helps you build and publish **a book 4 | with Python code blocks**, or **Python package documents with tutorials**. You can 5 | check [Dive into Deep Learning](https://d2l.ai/) for a book 6 | example and [AutoGluon](https://autogluon.mxnet.io/) for a package document site 7 | example. 8 | 9 | `d2lbook` is designed to meet the following two requirements: 10 | 11 | - Your book may contain **a large amount of Python code** and you 12 | expect your readers to run them. Or your package documents have **multiple 13 | tutorials** to walk readers through your package usage through examples. 14 | The code should be runnable and maintainable. 15 | 16 | - You would like to publish **both a HTML website and a printable PDF 17 | version**. You expect the website should be modern, searchable and mobile 18 | friendly, and the PDF version should be at the same quality as written using LaTeX. 19 | 20 | 21 | To achieve the above goals, `d2lbook` combines 22 | [Jupyter Notebook](https://jupyter.org/), the widely used interactive 23 | environment in Python, and [Sphinx](http://www.sphinx-doc.org/en/master/), the 24 | de facto document building system for Python packages. In particular, its main 25 | features include: 26 | 27 | - Using [markdown](https://daringfireball.net/projects/markdown/) for your contents. 28 | - A minimal configuration file to customize the building so you can focus on the 29 | contents. 30 | - Evaluating all code blocks to obtain their output before publishing to validate the 31 | correctness. By default, `d2lbook` only evaluates the updated code blocks to save cost. 32 | - Being able to reference sections, figure, tables, equations, function, and 33 | class. 34 | - Pipelines to publish your website through Github or AWS. 35 | 36 | If `d2lbook` does not fit your requirements, you may check the following tools: 37 | 38 | - [Jupyter Book](https://jupyterbook.org): A similar tool for building books 39 | from computational material with Jupyter Notebooks and MyST Markdown. 40 | - [gitbook](https://www.gitbook.com/): very convenient to push a book written 41 | with markdown if you don't need to run them as Jupyter notebooks. 42 | - [sphinx-gallery](https://sphinx-gallery.github.io/stable/index.html), a Sphinx 43 | plugin to evaluate and publish your tutorials. It requires you to know how 44 | to use Sphinx and write your tutorials in `.py` format with the `rst` style. 45 | 46 | ```eval_rst 47 | .. only:: html 48 | 49 | Table of Contents 50 | ----------------- 51 | ``` 52 | 53 | 54 | ```toc 55 | :numbered: 56 | :maxdepth: 2 57 | 58 | install 59 | user/index 60 | develop/index 61 | ``` 62 | 63 | -------------------------------------------------------------------------------- /scripts/install_fonts.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Install fonts to build PDF 3 | 4 | 5 | ########################################################### 6 | # Make sure system font dir exists already, if not, run: 7 | # sudo mkdir /usr/share/fonts/opentype/ 8 | 9 | # Make sure unzip is installed already, if not, run: 10 | # sudo apt-get install unzip 11 | 12 | # fc-cache is required, if not already installed, run: 13 | # sudo apt install fontconfig 14 | ########################################################### 15 | 16 | 17 | # En 18 | 19 | wget -O source-serif-pro.zip https://www.fontsquirrel.com/fonts/download/source-serif-pro 20 | unzip source-serif-pro -d source-serif-pro 21 | sudo mv source-serif-pro /usr/share/fonts/opentype/ 22 | 23 | wget -O source-sans-pro.zip https://www.fontsquirrel.com/fonts/download/source-sans-pro 24 | unzip source-sans-pro -d source-sans-pro 25 | sudo mv source-sans-pro /usr/share/fonts/opentype/ 26 | 27 | wget -O source-code-pro.zip https://www.fontsquirrel.com/fonts/download/source-code-pro 28 | unzip source-code-pro -d source-code-pro 29 | sudo mv source-code-pro /usr/share/fonts/opentype/ 30 | 31 | wget -O Inconsolata.zip https://www.fontsquirrel.com/fonts/download/Inconsolata 32 | unzip Inconsolata -d Inconsolata 33 | sudo mv Inconsolata /usr/share/fonts/opentype/ 34 | 35 | sudo fc-cache -f -v 36 | 37 | # Zh 38 | wget https://github.com/adobe-fonts/source-han-sans/releases/download/2.004R/SourceHanSansSC.zip 39 | wget -O SourceHanSerifSC.zip https://github.com/adobe-fonts/source-han-serif/releases/download/2.001R/09_SourceHanSerifSC.zip 40 | 41 | unzip SourceHanSansSC.zip -d SourceHanSansSC 42 | unzip SourceHanSerifSC.zip -d SourceHanSerifSC 43 | 44 | sudo mv SourceHanSansSC SourceHanSerifSC /usr/share/fonts/opentype/ 45 | sudo fc-cache -f -v 46 | 47 | # KO 48 | 49 | wget https://github.com/adobe-fonts/source-han-sans/releases/download/2.004R/SourceHanSansK.zip 50 | wget -O SourceHanSerifK.zip https://github.com/adobe-fonts/source-han-serif/releases/download/2.001R/08_SourceHanSerifK.zip 51 | 52 | unzip SourceHanSansK.zip -d SourceHanSansK 53 | unzip SourceHanSerifK.zip -d SourceHanSerifK 54 | 55 | sudo mv SourceHanSansK SourceHanSerifK /usr/share/fonts/opentype/ 56 | sudo fc-cache -f -v 57 | 58 | # JA 59 | 60 | wget https://github.com/adobe-fonts/source-han-sans/releases/download/2.004R/SourceHanSansJ.zip 61 | wget -O SourceHanSerifJ.zip https://github.com/adobe-fonts/source-han-serif/releases/download/2.001R/07_SourceHanSerifJ.zip 62 | 63 | unzip SourceHanSansJ.zip -d SourceHanSansJ 64 | unzip SourceHanSerifJ.zip -d SourceHanSerifJ 65 | 66 | sudo mv SourceHanSansJ SourceHanSerifJ /usr/share/fonts/opentype/ 67 | sudo fc-cache -f -v 68 | 69 | 70 | # Remove all zip files 71 | rm Source*.zip source*.zip Inconsolata.zip 72 | -------------------------------------------------------------------------------- /docs/user/slides.md: -------------------------------------------------------------------------------- 1 | # Creating Slides 2 | 3 | We can mark a notebook and then create slides from that notebook. For example, here is the generate [slides](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-pytorch-slides/blob/main/chapter_preliminaries/ndarray.ipynb#/) from the markdown [source file](https://github.com/d2l-ai/d2l-en/blob/master/chapter_preliminaries/ndarray.md). 4 | Let explain how to do it by the following example. It's a markdown file with marks to 5 | generate slides. 6 | 7 | ````md 8 | # Data Manipulation 9 | 10 | ## Getting Started 11 | 12 | To start, we can use `arange` to create a row vector `x` 13 | containing the first 12 integers starting with 0, 14 | though they are created as floats by default. 15 | 16 | (**A tensor represents a (possibly multi-dimensional) array of numerical values. We can access a tensor's *shape*.**) 17 | 18 | 19 | ```{.python .input} 20 | import numpy as np 21 | 22 | x = np.arange(12) 23 | x 24 | ``` 25 | 26 | [**Many**] more (**operations can be applied elementwise,**) 27 | including unary operators like exponentiation. 28 | (~~e.g. `exp`~~) 29 | 30 | ```{.python .input} 31 | np.exp(x) 32 | ``` 33 | 34 | (**Even when shapes differ, we can still perform elementwise operations**) 35 | by invoking the *broadcasting mechanism*. 36 | 37 | 38 | ```{.python .input} 39 | a = np.arange(3).reshape(3, 1) 40 | b = np.arange(2).reshape(1, 2) 41 | a, b 42 | ``` 43 | 44 | ```` 45 | 46 | The above code block will generate 2 slides. The first slide contains the following contents: 47 | 48 | ````md 49 | # Data Manipulation 50 | 51 | A tensor represents a (possibly multi-dimensional) array of numerical values. We can access a tensor's *shape*. 52 | 53 | ```{.python .input} 54 | import numpy as np 55 | 56 | x = np.arange(12) 57 | x 58 | ``` 59 | ```` 60 | 61 | You can see that we automatically copied the level-1 heading and the code block. 62 | In addition, we copied the text between `(**` and `**)`, while dropped all others. 63 | 64 | The second slide contains the following: 65 | 66 | ````md 67 | Many operations can be applied elementwise, 68 | e.g. `exp` 69 | 70 | ```{.python .input} 71 | np.exp(x) 72 | ``` 73 | 74 | Even when shapes differ, we can still perform elementwise operations 75 | 76 | ```{.python .input} 77 | a = np.arange(3).reshape(3, 1) 78 | b = np.arange(2).reshape(1, 2) 79 | a, b 80 | ``` 81 | ```` 82 | 83 | First you can see is that all text between these three paris 84 | (`[**`, `**]`), 85 | (`(**`, `**)`), and 86 | (`(~~`, `~~)`) are kept. 87 | Here `[` means starting a new slide, while `(` means continuing the current slide. 88 | (Level-1 heading will start a new slide, so we used `(` in the previous block). 89 | In addition, `~~` means the text will only appear in slides, 90 | why not in the normal notebooks, htmls or pdfs. 91 | 92 | Second, we didn't start a new slide before the last code block, i.e. there is no 93 | level-1 heading and no (`[**`, `**]`) pair, so the last two code blocks are merged 94 | into the same slide. 95 | -------------------------------------------------------------------------------- /docs/user/create.md: -------------------------------------------------------------------------------- 1 | # Creating Your Project 2 | :label:`sec_create` 3 | 4 | Let's start with a simple project from scratch. 5 | 6 | ## Project From Scratch 7 | 8 | First make a folder for our project. 9 | 10 | ```{.python .input n=1} 11 | !mkdir -p mybook 12 | ``` 13 | 14 | Then create two pages. The `index.md` is the index page which contains the 15 | table of contents (TOC), which includes the other page `get_started.md`. Note 16 | that the TOC is defined in a code block with tag `toc`. If you are familiar with 17 | Sphinx, you can find it's similar to the TOC definition in Sphinx. Please refer 18 | to :numref:`sec_markdown` for more extensions `d2lbook` added to markdown. Also note we used the build-in magic `writefile` to save a code block into file provided by [Jupyter](https://ipython.readthedocs.io/en/stable/interactive/magics.html). 19 | 20 | ```{.python .input n=2} 21 | %%writefile mybook/index.md 22 | # My Book 23 | 24 | The starting page of my book with `d2lbook`. 25 | 26 | ````toc 27 | get_started 28 | ```` 29 | ``` 30 | 31 | ```{.python .input n=3} 32 | %%writefile mybook/get_started.md 33 | # Getting Started 34 | 35 | Please first install my favorite package `numpy`. 36 | ``` 37 | 38 | Now let's build the HTML version. 39 | 40 | ```{.python .input n=4} 41 | !cd mybook && d2lbook build html 42 | ``` 43 | 44 | The HTML index page is then available at `mybook/_build/html/index.html`. 45 | 46 | ## Configuration 47 | 48 | You can customize how results are built and published through `config.ini` on the root folder. 49 | 50 | ```{.python .input n=5} 51 | %%writefile mybook/config.ini 52 | 53 | [project] 54 | # Specify the PDF filename to mybook.pdf 55 | name = mybook 56 | # Specify the authors names in PDF 57 | author = Adam Smith, Alex Li 58 | 59 | [html] 60 | # Add two links on the navbar. A link consists of three 61 | # items: name, URL, and a fontawesome icon. Items are separated by commas. 62 | header_links = PDF, https://book.d2l.ai/d2l-book.pdf, fas fa-file-pdf, 63 | Github, https://github.com/d2l-ai/d2l-book, fab fa-github 64 | ``` 65 | 66 | Let's clear and build again. 67 | 68 | ```{.python .input} 69 | !cd mybook && rm -rf _build && d2lbook build html 70 | ``` 71 | 72 | If you open `index.html` again, you will see the two links on the navigation bar. 73 | 74 | Let build the PDF output, you will find `Output written on mybook.pdf (7 pages).` in the output logs. 75 | 76 | ```{.python .input} 77 | !cd mybook && d2lbook build pdf 78 | ``` 79 | 80 | We will cover more configuration options in the following sections. You can check [default_config.ini](https://github.com/d2l-ai/d2l-book/blob/master/d2lbook/config_default.ini) for all configuration options and their default values. Also check these examples `config.ini` in 81 | 82 | - [This website](https://github.com/d2l-ai/d2l-book/blob/master/docs/config.ini) 83 | - [Dive into Deep Learning](https://github.com/d2l-ai/d2l-en/blob/master/config.ini) 84 | 85 | Last, let's clear our workspace. 86 | 87 | ```{.python .input} 88 | !rm -rf mybook 89 | ``` 90 | -------------------------------------------------------------------------------- /d2lbook/library_test.py: -------------------------------------------------------------------------------- 1 | from d2lbook import library 2 | import unittest 3 | 4 | from collections import namedtuple 5 | 6 | class Cell: 7 | def __init__(self, cell_type, source): 8 | self.cell_type = cell_type 9 | self.source = source 10 | 11 | class Nb: 12 | def __init__(self, cells): 13 | self.cells = cells 14 | 15 | class TestLibrary(unittest.TestCase): 16 | def setUp(self): 17 | self.nb = Nb([Cell('code', '')]) 18 | self.tab_lib = { 19 | 'lib_name': 'torch', 20 | 'simple_alias': 21 | 'ones, zeros, tensor, arange, meshgrid, sin, sinh, cos, cosh, tanh, linspace, exp, log, normal, rand(, matmul, int32, float32, concat -> cat, stack, abs, eye', 22 | 'fluent_alias': 23 | 'numpy -> detach().numpy, reshape, size -> numel, to, reduce_sum -> sum, argmax, astype -> type, transpose -> t', 24 | 'alias': '', 25 | 'reverse_alias': '', 26 | 'args_alias': 'randn(size, device=None) -> np.random.randn(size=size, ctx=device)' 27 | } 28 | 29 | def test_replace_alias(self): 30 | # Test https://github.com/d2l-ai/d2l-book/issues/14 31 | pairs = [ # before, after 32 | ('X = d2l.reshape(d2l.arange(10,20),(2,3))', 33 | 'X = torch.arange(10, 20).reshape((2, 3))'), 34 | ('d2l.numpy(a)', 'a.detach().numpy()'), 35 | ('d2l.transpose(a)', 'a.t()'), 36 | ('metric.add(l * d2l.size(y), d2l.size(y))', 37 | 'metric.add(l * y.numel(), y.numel())'), 38 | ('float(d2l.reduce_sum(cmp.astype(y.dtype)))', 39 | 'float(cmp.astype(y.dtype).sum())'), 40 | ('d2l.numpy(nn.LeakyReLU(alpha)(x))', 41 | 'nn.LeakyReLU(alpha)(x).detach().numpy()'), 42 | ('d2l.reshape(X_tile(1 - d2l.eye(n_train)).astype(\'bool\'), (1,2))', 43 | 'X_tile(1 - torch.eye(n_train)).astype(\'bool\').reshape((1, 2))' 44 | ), 45 | ('float(d2l.reduce_sum(d2l.astype(cmp, y.dtype)))', 46 | 'float(cmp.type(y.dtype).sum())'), 47 | ('\nenc_attention_weights = d2l.reshape(\n d2l.concat(net.encoder.attention_weights, 0),\n (num_layers, num_heads, -1, num_steps))\nenc_attention_weights.shape = 2\n', 48 | 'enc_attention_weights = torch.cat(net.encoder.attention_weights, 0).reshape(\n (num_layers, num_heads, -1, num_steps))\nenc_attention_weights.shape = 2' 49 | ), 50 | ('float(d2l.reduce_sum(d2l.abs(Y1 - Y2))) < 1e-6', 51 | 'float(torch.abs(Y1 - Y2).sum()) < 1e-6'), 52 | ('d2l.plt.scatter(d2l.numpy(features[:, a + b]), d2l.numpy(labels), 1);', 53 | 'd2l.plt.scatter(features[:, a + b].detach().numpy(),labels.detach().numpy(), 1);' 54 | ), 55 | ('d2l.reshape(multistep_preds[i - tau: i], (1, -1))', 56 | 'multistep_preds[i - tau:i].reshape((1, -1))'), 57 | ('X = d2l.reshape(d2l.arange(16, dtype=d2l.float32), (1, 1, 4, 4))', 58 | 'X = torch.arange(16, dtype=torch.float32).reshape((1, 1, 4, 4))' 59 | ), 60 | ('# comments\nX = d2l.reshape(a)', '# comments\nX = a.reshape()'), 61 | ('X = d2l.reshape(a) # comments', 'X = a.reshape() # comments'), 62 | ('Y[i, j] = d2l.reduce_sum((X[i: i + h, j: j + w] * K))', 63 | 'Y[i, j] = (X[i:i + h, j:j + w] * K).sum()'), 64 | ('d2l.randn(size=(1,2)) * 0.01', 65 | 'np.random.randn(size=(1,2)) * 0.01'), 66 | ('d2l.randn(size=(1,2), device=d2l.try_gpu()) * 0.01', 67 | 'np.random.randn(size=(1,2), ctx=d2l.try_gpu()) * 0.01' 68 | ), 69 | 70 | ] 71 | 72 | for a, b in pairs: 73 | self.nb.cells[0].source = a 74 | nb = library.replace_alias(self.nb, self.tab_lib) 75 | compact = lambda x: x.replace('\n', '').replace(' ', '') 76 | self.assertEqual(compact(nb.cells[0].source), compact(b)) 77 | -------------------------------------------------------------------------------- /docs/user/build.md: -------------------------------------------------------------------------------- 1 | # Building 2 | 3 | This section we will explain various options to build your projects. This options can be grouped into four categories: 4 | 5 | 1. Sanity check 6 | - `d2lbook build linkcheck` will check if all internal and external links are accessible. 7 | - `d2lbook build outputcheck` will check if no notebook will contain code outputs 8 | 1. Building results 9 | - `d2lbook build html`: build the HTML version into `_build/html` 10 | - `d2lbook build pdf`: build the PDF version into `_build/pdf` 11 | - `d2lbook build pkg`: build a zip file contains all `.ipynb` notebooks 12 | 1. Additional features 13 | - `d2lbook build colab`: convert all notebooks can be run on Google Colab into `_build/colab`. See more in :numref:`sec_colab` 14 | - `d2lbook build lib`: build a Python package so we can reuse codes in other notebooks. See more in XXX. 15 | 1. Internal stages, which often are triggered automatically. 16 | - `d2lbook build eval`: evaluate all notebooks and save them as `.ipynb` notebooks into `_build/eval` 17 | - `d2lbook build rst`: convert all notebooks into `rst` files and create a Sphinx project in `_build/rst` 18 | 19 | 20 | ## Building Cache 21 | 22 | We encourage you to evaluate your notebooks to obtain code cell results, instead of keeping these results in the source files for two reasons: 23 | 1. These results make code review difficult, especially when they have randomness either due to numerical precision or random number generators. 24 | 1. A notebook hasn't evaluated for a while may be broken due to package upgrading. 25 | 26 | But the evaluation costs additional overhead during building. We recommend to limit the runtime for each notebook within a few minutes. And `d2lbook` will reuse the previous built and only evaluate the modified notebooks. 27 | 28 | For example, the average runtime of a notebook (section) in [Dive into Deep Learning](https://d2l.ai) is about 2 minutes on a GPU machine, due to training neural networks. It contains more than 100 notebooks, which make the total runtime cost 2-3 hours. In reality, each code change will only modify a few notebooks and therefore the [build time](http://ci.d2l.ai/blue/organizations/jenkins/d2l-en/activity) is often less than 10 minutes. 29 | 30 | Let's see how it works. First create a project as we did in :numref:`sec_create`. 31 | 32 | ```{.python .input} 33 | !mkdir -p cache 34 | ``` 35 | 36 | ```{.python .input} 37 | %%writefile cache/index.md 38 | # My Book 39 | 40 | The starting page of my book with `d2lbook`. 41 | 42 | ````toc 43 | get_started 44 | ```` 45 | ``` 46 | 47 | ```{.python .input} 48 | %%writefile cache/get_started.md 49 | # Getting Started 50 | 51 | Please first install my favorite package `numpy`. 52 | ``` 53 | 54 | ```{.python .input} 55 | !cd cache; d2lbook build html 56 | ``` 57 | 58 | You can see `index.md` is evaluated. (Though it doesn't contain codes, it's fine to evaluate it as a Jupyter notebook.) 59 | 60 | If building again, we will see no notebook will be evaluated. 61 | 62 | ```{.python .input} 63 | !cd cache; d2lbook build html 64 | ``` 65 | 66 | Now let's modify `get_started.md`, you will see it will be re-evaluated, but not `index.md`. 67 | 68 | ```{.python .input} 69 | %%writefile cache/get_started.md 70 | # Getting Started 71 | 72 | Please first install my favorite package `numpy>=1.18`. 73 | ``` 74 | 75 | ```{.python .input} 76 | !cd cache; d2lbook build html 77 | ``` 78 | 79 | One way to trigger the whole built is removing the saved notebooks in `_build/eval`, or simply deleting `_build`. Another way is specifying some dependencies. For example, in the following cell we add `config.ini` into the dependencies. Every time `config.ini` is modified, it will invalid the cache of all notebooks and trigger a build from scratch. 80 | 81 | 82 | ```{.python .input} 83 | %%writefile cache/config.ini 84 | 85 | [build] 86 | dependencies = config.ini 87 | ``` 88 | 89 | ```{.python .input} 90 | !cd cache; d2lbook build html 91 | ``` 92 | 93 | Last, let's clean our workspace. 94 | 95 | ```{.python .input} 96 | !rm -rf cache 97 | ``` 98 | -------------------------------------------------------------------------------- /d2lbook/sphinx.py: -------------------------------------------------------------------------------- 1 | import os 2 | import logging 3 | from d2lbook import sphinx_template as template 4 | from d2lbook import utils 5 | 6 | __all__ = ['prepare_sphinx_env'] 7 | 8 | def prepare_sphinx_env(config): 9 | env = SphinxEnv(config) 10 | env.prepare_env() 11 | 12 | class SphinxEnv(object): 13 | def __init__(self, config): 14 | self.config = config 15 | if self.config.pdf['style'] == 'cambridge': 16 | self.pyconf = template.sphinx_conf_cambridge 17 | else: 18 | self.pyconf = template.sphinx_conf 19 | 20 | def prepare_env(self): 21 | self._copy_static_files() 22 | self._update_header_links() 23 | self._write_js() 24 | self._write_css() 25 | for key in self.config.project: 26 | self._update_pyconf(key, self.config.project[key]) 27 | self._update_pyconf('index', self.config.build['index']) 28 | self._update_pyconf('sphinx_configs', self.config.build['sphinx_configs']) 29 | 30 | extensions = ['recommonmark', 'sphinxcontrib.bibtex', 31 | 'sphinxcontrib.rsvgconverter', 'sphinx.ext.autodoc', 32 | 'sphinx.ext.viewcode'] 33 | extensions.extend(self.config.build['sphinx_extensions'].split()) 34 | self._update_pyconf('extensions', ','.join('"'+ext+'"' for ext in extensions)) 35 | self._update_pyconf('bibfile', self.config.pdf['bibfile']) 36 | for font in ['main_font', 'sans_font', 'mono_font']: 37 | font_value = '' 38 | if self.config.pdf[font]: 39 | font_value = '\set%s{%s}' % (font.replace('_', ''), self.config.pdf[font]) 40 | self._update_pyconf(font, font_value) 41 | 42 | fname = os.path.join(self.config.rst_dir, 'conf.py') 43 | with open(fname, 'w') as f: 44 | f.write(self.pyconf) 45 | 46 | def _update_pyconf(self, key, value): 47 | self.pyconf = self.pyconf.replace(key.upper(), value) 48 | 49 | def _copy_static_files(self): 50 | static_keys = [('html', 'favicon'), ('html', 'html_logo'), ('pdf', 'latex_logo')] 51 | for attribute, key in static_keys: 52 | if attribute == 'html': 53 | fname = self.config.html[key] 54 | elif attribute == 'pdf': 55 | fname = self.config.pdf[key] 56 | if not fname: 57 | self._update_pyconf(key, '') 58 | continue 59 | sphinx_fname = os.path.join(self.config.rst_dir, '_static', 60 | os.path.basename(fname)) 61 | utils.copy(fname, sphinx_fname) 62 | self._update_pyconf(key, os.path.join( 63 | '_static', os.path.basename(fname))) 64 | 65 | def _update_header_links(self): 66 | items = utils.split_config_str(self.config.html['header_links'], 3) 67 | sphinx_links = '' 68 | for tk in items: 69 | if tk: 70 | sphinx_links += "('%s', '%s', True, '%s')," % (tk[0], tk[1], tk[2]) 71 | self._update_pyconf('header_links', sphinx_links) 72 | 73 | def _write_js(self): 74 | d2l_js = (template.shorten_sec_num + template.replace_qr 75 | + template.copybutton_js + template.discourse_js + template.tabbar_js) 76 | g_id = 'google_analytics_tracking_id' 77 | if g_id in self.config.deploy: 78 | d2l_js += template.google_tracker.replace( 79 | g_id.upper(), self.config.deploy[g_id]) 80 | 81 | os.makedirs(os.path.join(self.config.rst_dir, '_static'), exist_ok=True) 82 | fname = os.path.join(self.config.rst_dir, '_static', 'd2l.js') 83 | with open(fname, 'w') as f: 84 | f.write(d2l_js) 85 | for fname in utils.find_files(self.config.html['include_js'], self.config.src_dir): 86 | with open (fname, 'r') as fin: 87 | f.write(fin.read()) 88 | 89 | def _write_css(self): 90 | fname = os.path.join(self.config.rst_dir, '_static', 'd2l.css') 91 | d2l_css = template.hide_bibkey_css + template.copybutton_css + \ 92 | template.limit_output_length_css + template.tabbar_css 93 | with open(fname, 'w') as f: 94 | f.write(d2l_css) 95 | for fname in utils.find_files(self.config.html['include_css'], self.config.src_dir): 96 | with open (fname, 'r') as fin: 97 | f.write(fin.read()) 98 | -------------------------------------------------------------------------------- /d2lbook/deploy.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pathlib 3 | import sys 4 | import logging 5 | import argparse 6 | import shutil 7 | from d2lbook.utils import * 8 | from d2lbook.config import Config 9 | from d2lbook import colab 10 | from d2lbook import sagemaker 11 | from d2lbook import slides 12 | import glob 13 | 14 | __all__ = ['deploy'] 15 | 16 | commands = ['html', 'pdf', 'pkg', 'colab', 'sagemaker', 'all', 'slides'] 17 | 18 | def deploy(): 19 | parser = argparse.ArgumentParser(description='Deploy documents') 20 | parser.add_argument('commands', nargs='+', choices=commands) 21 | parser.add_argument('--s3', help='s3 bucket') 22 | args = parser.parse_args(sys.argv[2:]) 23 | config = Config() 24 | if args.s3: 25 | config.deploy['s3_bucket'] = args.s3 26 | if config.deploy['s3_bucket']: 27 | deployer = S3Deployer(config) 28 | elif config.deploy['github_repo']: 29 | deployer = GithubDeployer(config) 30 | else: 31 | deployer = Deployer(config) 32 | for cmd in args.commands: 33 | getattr(deployer, cmd)() 34 | 35 | class Deployer(object): 36 | def __init__(self, config): 37 | self.config = config 38 | 39 | def colab(self): 40 | _colab = colab.Colab(self.config) 41 | if not _colab.valid(): 42 | return 43 | def _run(): 44 | repo = _colab.git_repo(self.config.tab) 45 | bash_fname = os.path.join(os.path.dirname(__file__), 'upload_github.sh') 46 | run_cmd(['bash', bash_fname, self.config.colab_dir, repo, 47 | self.config.project['release']]) 48 | tab = self.config.tab 49 | self.config.set_tab('all') 50 | self.config.iter_tab(_run) 51 | self.config.set_tab(tab) 52 | 53 | def sagemaker(self): 54 | _sagemaker = sagemaker.Sagemaker(self.config) 55 | if not _sagemaker.valid(): 56 | return 57 | def _run(): 58 | repo = _sagemaker.git_repo(self.config.tab) 59 | bash_fname = os.path.join(os.path.dirname(__file__), 'upload_github.sh') 60 | run_cmd(['bash', bash_fname, self.config.sagemaker_dir, repo, 61 | self.config.project['release']]) 62 | tab = self.config.tab 63 | self.config.set_tab('all') 64 | self.config.iter_tab(_run) 65 | self.config.set_tab(tab) 66 | 67 | def slides(self): 68 | tab = self.config.tab 69 | self.config.set_tab('all') 70 | self.config.iter_tab(lambda: slides.Slides(self.config).deploy()) 71 | self.config.set_tab(tab) 72 | 73 | def _get_pdfs(self): 74 | # get all generated pdfs 75 | pdfs = list(glob.glob(self.config.tgt_dir+'/pdf*/'+self.config.project['name']+'*.pdf')) 76 | rets = [] 77 | for p in pdfs: 78 | p = pathlib.Path(p) 79 | tks = p.parent.name.split('_') 80 | if len(tks) > 1: 81 | tab = tks[1] 82 | if p.with_suffix('').name.split('-')[-1] != tab: 83 | continue 84 | rets.append(str(p)) 85 | return rets 86 | 87 | class GithubDeployer(Deployer): 88 | def __init__(self, config): 89 | super(GithubDeployer, self).__init__(config) 90 | self.git_dir = os.path.join(self.config.tgt_dir, 'github_deploy') 91 | shutil.rmtree(self.git_dir, ignore_errors=True) 92 | mkdir(self.git_dir) 93 | 94 | def html(self): 95 | run_cmd(['cp -r', os.path.join(self.config.html_dir, '*'), self.git_dir]) 96 | 97 | def pdf(self): 98 | for pdf in self._get_pdfs(): 99 | shutil.copy(pdf, self.git_dir) 100 | 101 | def pkg(self): 102 | shutil.copy(self.config.pkg_fname, self.git_dir) 103 | 104 | def __del__(self): 105 | bash_fname = os.path.join(os.path.dirname(__file__), 'upload_github.sh') 106 | run_cmd(['bash', bash_fname, self.git_dir, self.config.deploy['github_repo'], self.config.project['release']]) 107 | 108 | class S3Deployer(Deployer): 109 | def __init__(self, config): 110 | super(S3Deployer, self).__init__(config) 111 | 112 | def html(self): 113 | bash_fname = os.path.join(os.path.dirname(__file__), 'upload_doc_s3.sh') 114 | run_cmd(['bash', bash_fname, self.config.html_dir, self.config.deploy['s3_bucket']]) 115 | 116 | def pdf(self): 117 | url = self.config.deploy['s3_bucket'] 118 | if not url.endswith('/'): 119 | url += '/' 120 | for pdf in self._get_pdfs(): 121 | logging.info('cp %s to %s', pdf, url) 122 | run_cmd(['aws s3 cp', pdf, url, "--acl 'public-read' --quiet"]) 123 | 124 | def _deploy_other_files(self, tgt_url): 125 | other_urls = self.config.deploy['other_file_s3urls'].split() 126 | for other_url in other_urls: 127 | logging.info('cp %s to %s', other_url, tgt_url) 128 | run_cmd(['aws s3 cp', other_url, tgt_url, "--acl 'public-read' --quiet"]) 129 | 130 | def pkg(self): 131 | url = self.config.deploy['s3_bucket'] 132 | if not url.endswith('/'): 133 | url += '/' 134 | logging.info('cp %s to %s', self.config.pkg_fname, url) 135 | run_cmd(['aws s3 cp', self.config.pkg_fname, url, "--acl 'public-read' --quiet"]) 136 | self._deploy_other_files(url) 137 | 138 | def all(self): 139 | self.html() 140 | self.pdf() 141 | self.pkg() 142 | -------------------------------------------------------------------------------- /docs/user/deploy.md: -------------------------------------------------------------------------------- 1 | # Deploying 2 | 3 | You can copy the built result to any of your favorite places that can serve content online. Otherwise `d2lbook` provides two ways to deploy your build results: deploying through [Github](http://github.com/) or through [AWS](https://aws.amazon.com/). 4 | 5 | ## Deploying Through Github 6 | 7 | [Github Pages](https://pages.github.com/) allow us to host a website through a Github repo. To do so, we first need to create a github repo, for example we created [d2l-ai/d2l-book-deploy-demo](https://github.com/d2l-ai/d2l-book-deploy-demo) for this example. Then enable serving from the master branch in `Settings -> GitHub Pages`. You will get a URL to access it. It is [d2l-ai.github.io/d2l-book-deploy-demo](https://d2l-ai.github.io/d2l-book-deploy-demo/) for this example. You can add anything to `README.md`, which will not show on the website. 8 | 9 | ![Enable serving from master branch at Github](../img/github_pages.png) 10 | :width:`400px` 11 | 12 | Now let's create a project with `[deploy] github_repo` specified and build both HTML and PDF. You will see a large amount of logging information thanks to LaTeX, more exactly, `xelatex`. 13 | 14 | ```{.python .input} 15 | !mkdir -p deploy 16 | ``` 17 | 18 | ```{.python .input} 19 | %%writefile deploy/index.md 20 | # Deploying Demo for d2lbook 21 | 22 | This is a demo to deploy on Github. 23 | 24 | ````toc 25 | get_started 26 | ```` 27 | ``` 28 | 29 | ```{.python .input} 30 | %%writefile deploy/get_started.md 31 | # Getting Started 32 | 33 | Please first install my favorite package `numpy`. 34 | ``` 35 | 36 | ```{.python .input} 37 | %%writefile deploy/config.ini 38 | [project] 39 | name = deply-demo 40 | 41 | [html] 42 | header_links = PDF, https://https://d2l-ai.github.io/d2l-book-deploy-demo/deply-demo.pdf, fas fa-file-pdf 43 | 44 | [deploy] 45 | github_repo = d2l-ai/d2l-book-deploy-demo 46 | ``` 47 | 48 | ```{.python .input} 49 | !cd deploy; d2lbook build html pdf 50 | ``` 51 | 52 | To deploy to Github, you need to have your machine's [SSH key imported to Github](https://github.com/settings/keys). Otherwise, you may need to type in your account and password. When it is done, you can assess the results online in one or two minutes. For this example, the URL is [/d2l-ai.github.io/d2l-book-deploy-demo/](https://d2l-ai.github.io/d2l-book-deploy-demo/). 53 | 54 | ```{.python .input} 55 | !cd deploy; d2lbook deploy html pdf 56 | ``` 57 | 58 | Lastly, let's clean our workspace. 59 | 60 | ```{.python .input} 61 | !rm -rf deploy 62 | ``` 63 | 64 | ## Deploying Through AWS 65 | 66 | Another supported option is deploying through AWS. This option provides more flexibility but requires you to know the basic usage of AWS. 67 | 68 | We recommend the following procedure for the deployment: 69 | 70 | 1. Copying results into [S3](https://aws.amazon.com/s3/). 71 | 2. Serving with a [CDN](https://en.wikipedia.org/wiki/Content_delivery_network) by using [CloudFront](https://aws.amazon.com/cloudfront/) to reduce the latency. 72 | 3. Buying and adding a customized domain at [Route 53](https://aws.amazon.com/route53/) 73 | 74 | Now let's walk through these three steps one-by-one. Before getting started, you need to have a valid AWS account and type in your [AWS access key](https://docs.aws.amazon.com/IAM/latest/UserGuide/id_credentials_access-keys.html) by running `aws configure`. 75 | 76 | First, create a S3 bucket. You can use the targeted domain name as your bucket name. For example, this site is deployed at book.d2l.ai, then we created a bucket with the name `book.d2l.ai`. In addition, we need to disable blocking public access through ACLs when creating this bucket, see :numref:`fig_s3-acl` and enable static website hosting, see :numref:`fig_s3-web-hosting`. Then you will get a URL to access this bucket. In our example, it's http://book.d2l.ai.s3-website-us-west-2.amazonaws.com/. 77 | 78 | ![Disable blocking public access through ACLs](../img/s3-acl.png) 79 | 80 | :label:`fig_s3-acl` 81 | :width:`500px` 82 | 83 | ![Enable static web hosting](../img/s3-web-hosting.png) 84 | 85 | :label:`fig_s3-web-hosting` 86 | :width:`500px` 87 | 88 | Second, create a new CloudFront distribution, by specifying the following options: 89 | - Origin Domain Name: the previous S3 URL without `http://`, e.g. `book.d2l.ai.s3-website-us-west-2.amazonaws.com` 90 | - Default Root Object: `index.html` 91 | - [optional] Alternate Domain Names (CNAMEs): your target domain name, e.g. `book.d2l.ai` 92 | - [optional] SSL Certificate: you can create one in [Certificate Manager](https://aws.amazon.com/certificate-manager/) and then select it. 93 | 94 | After a few minute, we will obtain the domain name such as `d1y0wi2ibwmzrt.cloudfront.net`. 95 | 96 | Third, you can either buy a domain at Route 53 or add Route 53's DNS into your existing domain. After that, create a "Hosted Zone" and then a Type-A Record Set with the CoundFront domain name as the alias target, see :numref:`fig_record-set` for an example. 97 | 98 | ![Create a Record Set](../img/record-set.png) 99 | 100 | :label:`fig_record-set` 101 | :width:`350px` 102 | 103 | Once you are done, specify your S3 bucket name in the following `config.ini` entry. 104 | 105 | ```bash 106 | [deploy] 107 | s3_bucket = s3://book.d2l.ai 108 | ``` 109 | 110 | Each time you run `d2lbook deploy`, all results will be synchronized to this bucket and deployed automatically. Note that since we enabled a CDN, any new change may take a while to be shown in your URL (e.g. http://book.d2l.ai). But you can check the S3 bucket URL (e.g. http://book.d2l.ai.s3-website-us-west-2.amazonaws.com) to review the changes immediately. 111 | -------------------------------------------------------------------------------- /d2lbook/config.py: -------------------------------------------------------------------------------- 1 | import configparser 2 | import os 3 | import logging 4 | 5 | class Config(): 6 | def __init__(self, tab=None, config_fname='config.ini'): 7 | config = configparser.ConfigParser() 8 | default_config_name = os.path.join( 9 | os.path.dirname(__file__), 'config_default.ini') 10 | config.read(default_config_name, encoding='UTF-8') 11 | if os.path.exists(config_fname): 12 | logging.info('Load configure from %s', config_fname) 13 | config.read(config_fname, encoding='UTF-8') 14 | tabs = config['build']['tabs'] 15 | self.tabs = [tab.strip() for tab in tabs.lower().split(',')] if tabs else [] 16 | self.default_tab = self.tabs[0] if self.tabs else None 17 | self.tab = tab.lower() if tab else self.default_tab 18 | if self.tab: 19 | assert self.tabs, 'No tabs is specified' 20 | if self.tab != 'all': 21 | assert self.tab in tabs, self.tab + ' is not found in tabs, which are ' + tabs 22 | self.translation = config['translation'] 23 | self.build = config['build'] 24 | self.deploy = config['deploy'] 25 | self.project = config['project'] 26 | self.html = config['html'] 27 | self.pdf = config['pdf'] 28 | self.slides = config['slides'] 29 | self.library = dict(config['library'].items()) 30 | for tab in self.tabs: 31 | if f'library-{tab}' in config: 32 | self.library[tab] = dict(config[f'library-{tab}'].items()) 33 | self.colab = config['colab'] 34 | self.sagemaker = config['sagemaker'] 35 | 36 | # A bunch of directories 37 | self.src_dir = self.build['source_dir'] 38 | self.tgt_dir = self.build['output_dir'] 39 | self.eval_dir = os.path.join(self.tgt_dir, 'eval') 40 | self.ipynb_dir = os.path.join(self.tgt_dir, 'ipynb') 41 | self.rst_dir = os.path.join(self.tgt_dir, 'rst') 42 | try: 43 | self.html_dir = self.build['html_dir'] 44 | except KeyError: 45 | self.html_dir = os.path.join(self.tgt_dir, 'html') 46 | # MM20200104 changed to allow separate html_dir to be specified in config.ini, e.g. put 'html_dir = docs' in the [build] section 47 | self.pdf_dir = os.path.join(self.tgt_dir, 'pdf') 48 | self.colab_dir = os.path.join(self.tgt_dir, 'colab') 49 | self.sagemaker_dir = os.path.join(self.tgt_dir, 'sagemaker') 50 | self.linkcheck_dir = os.path.join(self.tgt_dir, 'linkcheck') 51 | self.slides_dir = os.path.join(self.tgt_dir, 'slides') 52 | 53 | self._set_target() 54 | 55 | # The project must have an index page 56 | index_fname, ext = os.path.splitext(self.build['index']) 57 | if ext and ext != '.md': 58 | logging.info('Ignore the file extesion, %s, specified by index in %s', 59 | ext, config_fname) 60 | index_fname = os.path.join(self.src_dir, index_fname+'.md') 61 | if not os.path.exists(index_fname): 62 | logging.fatal('Failed to find the index file: %s', index_fname) 63 | exit(-1) 64 | 65 | if not self.project['title']: 66 | # Infer the book title from the index page 67 | with open(index_fname, 'r') as f: 68 | for line in f: 69 | line = line.strip() 70 | if line: 71 | if line.startswith('#'): 72 | line = line[1:] 73 | self.project['title'] = line.strip() 74 | break 75 | 76 | self.set_tab(self.tab) 77 | # Sanity checks. 78 | self.sanity_check() 79 | 80 | def _set_target(self): 81 | # Some targets names. 82 | self.tex_fname = os.path.join(self.pdf_dir, self.project['name']+'.tex') 83 | self.pkg_fname = os.path.join(self.tgt_dir, self.project['name']+'.zip') 84 | 85 | def sanity_check(self): 86 | notebook_patterns = self.build['notebooks'].split() 87 | for p in notebook_patterns: 88 | assert p.endswith('md'), '`notebooks` patterns must end with `md`' \ 89 | ' in `config.init`. Examples: `notebooks = *.md */*.md`.' 90 | 91 | rst_patterns = self.build['rsts'].split() 92 | for p in rst_patterns: 93 | assert p.endswith('rst'), '`rsts` patterns must end with `rst`' \ 94 | ' in `config.init`. Examples: `rsts = index.rst' \ 95 | ' api/**/*.rst`.' 96 | 97 | 98 | def iter_tab(self, fn): 99 | if self.tab == 'all': 100 | for tab in self.tabs: 101 | self.set_tab(tab) 102 | fn() 103 | self.set_tab('all') 104 | else: 105 | fn() 106 | 107 | def set_tab(self, tab): 108 | if tab: 109 | assert tab in self.tabs + ['all'], f"{tab} doesn't exist in {self.tabs}" 110 | self.tab = tab 111 | self.eval_dir = self._set_tab_dir(self.eval_dir, tab) 112 | self.ipynb_dir = self._set_tab_dir(self.ipynb_dir, tab) 113 | self.rst_dir = self._set_tab_dir(self.rst_dir, tab) 114 | self.pdf_dir = self._set_tab_dir(self.pdf_dir, tab) 115 | self.colab_dir = self._set_tab_dir(self.colab_dir, tab) 116 | self.sagemaker_dir = self._set_tab_dir(self.sagemaker_dir, tab) 117 | self.slides_dir = self._set_tab_dir(self.slides_dir, tab) 118 | self._set_target() 119 | 120 | 121 | def _default_tab_dir(self, dirname): 122 | tokens = dirname.split('/') 123 | if self.tabs and '_' in tokens[-1]: 124 | tokens[-1] = '_'.join(tokens[-1].split('_')[:-1]) 125 | return '/'.join(tokens) 126 | return dirname 127 | 128 | def _set_tab_dir(self, dirname, tab): 129 | dirname = self._default_tab_dir(dirname) 130 | if tab == self.default_tab: 131 | return dirname 132 | return dirname + '_' + tab 133 | -------------------------------------------------------------------------------- /d2lbook/notebook_test.py: -------------------------------------------------------------------------------- 1 | from d2lbook import notebook 2 | from d2lbook import build 3 | from d2lbook import common 4 | import unittest 5 | import nbconvert 6 | 7 | # 8 blocks: 8 | # 0: markdown 9 | # 1: markdown python2 10 | # 2: markdown 11 | # 3: markdown python2 12 | # 4: markdown python3 13 | # 5: code default 14 | # 6: code python2 15 | # 7: markdown 16 | _markdown_src = r'''# Test 17 | 18 | first para 19 | 20 | :begin_tab:`python2` 21 | python is good 22 | :end_tab: 23 | 24 | another para 25 | 26 | This is :eqref:`sec_1` 27 | 28 | :begin_tab:`python2` 29 | ```python2 30 | 1+2+3 31 | ``` 32 | :end_tab: 33 | 34 | :begin_tab:`python3` 35 | python3 is better 36 | 37 | - here 38 | - haha 39 | 40 | :end_tab: 41 | 42 | ```{.input .python} 43 | 1+2+3 44 | ``` 45 | 46 | ```{.input .python} 47 | #@tab python2 48 | 1+2+3 49 | ``` 50 | 51 | ```bash 52 | ```` 53 | $ ls 54 | ```` 55 | ``` 56 | ''' 57 | 58 | _multi_tab_cell = r'''# Test 59 | 60 | ```{.input .python} 61 | #@tab python2, python3 62 | 1+2 63 | ``` 64 | 65 | The end 66 | ''' 67 | 68 | _all_tab_cell = r'''# Test 69 | 70 | ```{.input .python} 71 | #@tab all 72 | 1+2 73 | ``` 74 | 75 | Split 76 | 77 | ```{.input .python} 78 | #@tab python2,python4 79 | 1122 80 | ``` 81 | 82 | :begin_tab:`python2,python3` 83 | Here 84 | :end_tab: 85 | ''' 86 | 87 | class TestNotebook(unittest.TestCase): 88 | 89 | def test_split_markdown_cell(self): 90 | nb = notebook.read_markdown(_markdown_src) 91 | new_nb = notebook.split_markdown_cell(nb) 92 | cells = new_nb.cells 93 | self.assertEqual(len(cells), 8) 94 | self.assertEqual(cells[0].cell_type, 'markdown') 95 | self.assertEqual(cells[1].cell_type, 'markdown') 96 | self.assertEqual(cells[1].metadata['tab'], ['python2']) 97 | self.assertEqual(cells[2].cell_type, 'markdown') 98 | self.assertEqual('tab' in cells[2].metadata, False) 99 | self.assertEqual(cells[3].metadata['tab'], ['python2']) 100 | self.assertEqual(cells[4].metadata['tab'], ['python3']) 101 | self.assertEqual(cells[5].cell_type, 'code') 102 | self.assertEqual(cells[6].cell_type, 'code') 103 | 104 | def test_get_tab_notebook(self): 105 | nb = notebook.split_markdown_cell(notebook.read_markdown(_markdown_src)) 106 | new_nb = notebook.get_tab_notebook(nb, tab='python2', default_tab='python3') 107 | cells = new_nb.cells 108 | self.assertEqual(cells[0].cell_type, 'markdown') 109 | self.assertEqual(cells[1].cell_type, 'markdown') 110 | self.assertEqual(cells[1].metadata['tab'], ['python2']) 111 | self.assertEqual(cells[2].cell_type, 'markdown') 112 | self.assertEqual('tab' in cells[2].metadata, False) 113 | self.assertEqual(cells[3].metadata['tab'], ['python2']) 114 | self.assertEqual(cells[4].cell_type, 'code') 115 | self.assertEqual(cells[4].metadata['tab'], ['python2']) 116 | self.assertEqual(len(cells), 6) 117 | 118 | new_nb = notebook.get_tab_notebook(nb, tab='python3', default_tab='python3') 119 | cells = new_nb.cells 120 | self.assertEqual(cells[3].metadata['tab'], ['python3']) 121 | self.assertEqual(len(cells), 5) 122 | 123 | nb = notebook.read_markdown(_multi_tab_cell) 124 | cells = notebook.get_tab_notebook(nb, tab='python2', default_tab='python3').cells 125 | self.assertEqual(len(cells), 3) 126 | self.assertEqual(cells[1].metadata['tab'], ['python2']) 127 | 128 | cells = notebook.get_tab_notebook(nb, tab='python3', default_tab='python3').cells 129 | self.assertEqual(len(cells), 3) 130 | self.assertEqual(cells[1].metadata['tab'], ['python3']) 131 | 132 | def _split_and_merge(self, nb, tabs): 133 | split_nb = [notebook.get_tab_notebook(nb, tab, tabs[0]) for tab in tabs] 134 | merged_nb = notebook.merge_tab_notebooks(split_nb) 135 | return split_nb, merged_nb 136 | 137 | def test_merge_tab_notebooks(self): 138 | nb = notebook.split_markdown_cell(notebook.read_markdown(_markdown_src)) 139 | _, new_nb = self._split_and_merge(nb, ['python3', 'python2']) 140 | self.assertEqual(len(nb.cells), len(new_nb.cells)) 141 | for cell, new_cell in zip(nb.cells, new_nb.cells): 142 | if new_cell.source != cell.source: 143 | self.assertTrue(new_cell.source in cell.source) 144 | 145 | def test_add_html_tab(self): 146 | nb = notebook.split_markdown_cell(notebook.read_markdown(_markdown_src)) 147 | _, new_nb = self._split_and_merge(nb, ['python3', 'python2']) 148 | new_nb = notebook.add_html_tab(new_nb, tabs=['python3', 'python2']) 149 | cells = new_nb.cells 150 | self.assertEqual(len(cells), 18) 151 | self.assertRegex(cells[1].source, 'mdl-js-tabs') 152 | self.assertRegex(cells[2].source, 'mdl-tabs__panel.*python2') 153 | self.assertRegex(cells[4].source, '') 154 | self.assertRegex(cells[5].source, '') 155 | self.assertRegex(cells[8].source, 'mdl-tabs__panel.*python3') 156 | self.assertRegex(cells[12].source, 'mdl-tabs__panel.*python2') 157 | 158 | nb = notebook.split_markdown_cell(notebook.read_markdown(_all_tab_cell)) 159 | _, new_nb = self._split_and_merge(nb, ['python3', 'python2', 'python4']) 160 | cells = new_nb.cells 161 | self.assertEqual(len(cells), 5) 162 | self.assertEqual(cells[4].metadata['tab'], ['python3', 'python2']) 163 | 164 | new_nb = notebook.add_html_tab(new_nb, tabs=['python3', 'python2', 'python4']) 165 | cells = new_nb.cells 166 | self.assertEqual(len(cells), 15) 167 | self.assertRegex(cells[3].source, 'mdl-js-tabs') 168 | self.assertRegex(cells[4].source, 'mdl-tabs__panel.*python3') 169 | self.assertRegex(cells[7].source, 'mdl-tabs__panel.*python2') 170 | self.assertRegex(cells[11].source, 'mdl-tabs__panel.*python4') 171 | 172 | 173 | if __name__ == '__main__': 174 | unittest.main() 175 | -------------------------------------------------------------------------------- /d2lbook/utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | import glob 3 | import shutil 4 | import logging 5 | 6 | def rm_ext(filename): 7 | return os.path.splitext(filename)[0] 8 | 9 | def find_files(pattern, root=None, excluded_pattern=None): 10 | fnames = [] 11 | patterns = pattern.split() 12 | for p in patterns: 13 | if root is not None: 14 | p = os.path.join(root, p) 15 | if os.path.isdir(p): 16 | p = os.path.join(p, '**') 17 | for fn in glob.glob(p, recursive=True): 18 | if os.path.isfile(fn): 19 | fnames.append(fn) 20 | if not excluded_pattern: 21 | return fnames 22 | excluded_fnames = find_files(excluded_pattern, root) 23 | return [fn for fn in fnames if fn not in excluded_fnames] 24 | 25 | def get_mtimes(fnames): 26 | if isinstance(fnames, str): 27 | return os.path.getmtime(fnames) 28 | return [os.path.getmtime(fn) for fn in fnames] 29 | 30 | def split_fname(fname, base_dir, ext=None): 31 | fname = os.path.relpath(fname, base_dir) 32 | base, fext = os.path.splitext(fname) 33 | if fext.startswith('.'): 34 | fext = fext[1:] 35 | if ext and ext != fext: 36 | logging.warn("%s doesn't have extension %s", fname, ext) 37 | return base, fext 38 | 39 | def get_tgt_fname(src_dir, src_fname, tgt_dir, src_ext, tgt_ext): 40 | fname, ext = split_fname(src_fname, src_dir, src_ext) 41 | if tgt_ext: 42 | ext = tgt_ext 43 | return os.path.join(tgt_dir, fname+'.'+ext) 44 | 45 | def get_updated_files(src_fnames, src_dir, tgt_dir, 46 | src_ext=None, tgt_ext=None, deps_mtime=0): 47 | updated_fnames = [] 48 | for src_fn in src_fnames: 49 | tgt_fn = get_tgt_fname(src_dir, src_fn, tgt_dir, src_ext, tgt_ext) 50 | if (not os.path.exists(tgt_fn) # new 51 | or get_mtimes(src_fn) > get_mtimes(tgt_fn) # target is old 52 | or get_mtimes(tgt_fn) < deps_mtime): # deps is updated 53 | updated_fnames.append((src_fn, tgt_fn)) 54 | return list(set(updated_fnames)) 55 | 56 | 57 | def get_tgt_files_from_src_pattern(pattern, tgt_dir, src_ext, tgt_ext): 58 | """Get files with tgt_ext in tgt_dir according to pattern with src_ext""" 59 | patterns = pattern.split() 60 | for i, p in enumerate(patterns): 61 | f, ext = os.path.splitext(p) 62 | if src_ext and ext == '.' + src_ext and tgt_ext: 63 | patterns[i] = f + '.' + tgt_ext 64 | return find_files(' '.join(patterns), tgt_dir) 65 | 66 | 67 | def get_files_to_rm(pattern, src_dir, tgt_dir, src_ext=None, tgt_ext=None): 68 | """Return files under tgt_dir whose corresponding src file is removed under src_dir.""" 69 | tgt_files = get_tgt_files_from_src_pattern(pattern, tgt_dir, src_ext, tgt_ext) 70 | to_removes = [] 71 | for tgt_fn in tgt_files: 72 | # If tgt_ext is provided, only files with tgt_ext in tgt_dir are 73 | # considered being removed. Note that ipynb to rst may generate svg 74 | # files, which should not be removed though these svg files do not have 75 | # corresponding files in src_dir 76 | if tgt_ext: 77 | fext = os.path.splitext(tgt_fn)[1] 78 | if fext.startswith('.'): 79 | fext = fext[1:] 80 | if tgt_ext != fext: 81 | continue 82 | # By switching args, it actually get_src_fname. 83 | src_fn = get_tgt_fname(tgt_dir, tgt_fn, src_dir, tgt_ext, src_ext) 84 | if not os.path.exists(src_fn): 85 | to_removes.append(tgt_fn) 86 | return to_removes 87 | 88 | 89 | def rm_empty_dir(path, rmed_empty_dirs): 90 | """Recursively remove empty directories under and including path.""" 91 | if not os.path.isdir(path): 92 | return 93 | 94 | fnames = os.listdir(path) 95 | if len(fnames) > 0: 96 | for fn in fnames: 97 | fpath = os.path.join(path, fn) 98 | if os.path.isdir(fpath): 99 | rm_empty_dir(fpath, rmed_empty_dirs) 100 | 101 | if len(os.listdir(path)) == 0: 102 | rmed_empty_dirs.append(str(path)) 103 | os.rmdir(path) 104 | 105 | def hide_individual_data_files(fns): 106 | """To display concisely: _build/eval/data/A/B/C/D -> _build/eval/data/A.""" 107 | concise_fns = set() 108 | for fn in fns: 109 | concise_fn = [] 110 | fn_components = fn.split('/') 111 | i = 0 112 | seen_data = False 113 | while i < len(fn_components) and not seen_data: 114 | component = fn_components[i] 115 | concise_fn.append(component) 116 | if component == 'data': 117 | seen_data = True 118 | i += 1 119 | if i < len(fn_components) - 1: 120 | next_component = fn_components[i + 1] 121 | if next_component.isdigit(): 122 | concise_fn.append('') 123 | else: 124 | concise_fn.append(next_component) 125 | if i < len(fn_components) - 2: 126 | concise_fn.append('') # For indicating dir instead of file 127 | concise_fns.add('/'.join(concise_fn)) 128 | return concise_fns 129 | 130 | def mkdir(dirname): 131 | os.makedirs(dirname, exist_ok=True) 132 | 133 | 134 | def copy(src, tgt): 135 | mkdir(os.path.dirname(tgt)) 136 | shutil.copy(src, tgt) 137 | 138 | 139 | def get_time_diff(tik, tok): 140 | h, remainder = divmod((tok - tik).seconds, 3600) 141 | m, s = divmod(remainder, 60) 142 | return "%02d:%02d:%02d" % (h, m, s) 143 | 144 | def run_cmd(cmd, verbose=False): 145 | if isinstance(cmd, str): 146 | cmd = [cmd] 147 | cmd = ' '.join(cmd) 148 | if verbose: 149 | logging.info('Run "%s"', cmd) 150 | ret = os.system(cmd) 151 | if ret != 0: 152 | exit(-1) 153 | 154 | def split_config_str(config_str, num_items_per_line=None): 155 | items = [] 156 | if not config_str: 157 | return items 158 | lines = config_str.split('\n') 159 | for i, line in enumerate(lines): 160 | items.append([tk.strip() for tk in line.split(',') if tk.strip()]) 161 | if num_items_per_line and len(items[-1]) != num_items_per_line: 162 | logging.fatal("The items in %d-th line (%d) doesn't" 163 | " match the required (%d)"%(i, len(items[-1]), num_items_per_line)) 164 | logging.fatal("The raw string is:\n"+config_str) 165 | return items 166 | -------------------------------------------------------------------------------- /docs/user/markdown.md: -------------------------------------------------------------------------------- 1 | # Markdown Cells 2 | :label:`sec_markdown` 3 | 4 | 5 | The `d2lbook` provide additional features beyond the normal markdown supports in 6 | Jupyter. 7 | 8 | ## Table of Contents 9 | 10 | You can use a `toc` code block to specify the table of contents. 11 | Here `:maxdepth: 2` means display two levels of files, and `:numbered:` means 12 | adding number to each section (default is not enabled). Also note that you don't 13 | need to specify the file extension. 14 | 15 | ````` 16 | ```toc 17 | :maxdepth: 2 18 | :numbered: 19 | 20 | guide/index 21 | ``` 22 | ````` 23 | 24 | 25 | ## Images 26 | 27 | 28 | We can put the image caption in `[]`. In addition, we can use 29 | `:width:` followed by its value in an inline block to specify the image width, 30 | similarly use `:height:`for height. 31 | 32 | ``` 33 | ![Estimating the length of a foot](../img/koebel.jpg) 34 | :width:`400px` 35 | ``` 36 | 37 | 38 | ![Estimating the length of a foot](../img/koebel.jpg) 39 | :width:`400px` 40 | 41 | 42 | ### SVG Images 43 | 44 | We recommend you to use SVG images as much as you can. It is sharp and its size 45 | is small. But since Latex doesn't support SVG images, if you want to build a PDF 46 | output, you need to install `rsvg-convert`. On Macos, you can simply 47 | `brew install librsvg` or `sudo apt-get install librsvg2-bin` for Ubuntu. 48 | 49 | ![A LSTM cell in SVG](../img/lstm.svg) 50 | 51 | ## Tables 52 | 53 | You can insert table caption before the table by starting it with a `:`. Note 54 | that you need to leave an empty line between the caption and the table itself. 55 | 56 | ``` 57 | : The number is computed by $z_{ij} = \sum_{k}x_{ik}y_{kj}$. 58 | 59 | | Year | Number | Comment | 60 | | --- | --- | --- | 61 | | 2018 | 100 | Good year | 62 | | 2019 | 200 | Even better, add something to make this column wider | 63 | ``` 64 | 65 | 66 | : The number is computed by $z_{ij} = \sum_{k}x_{ik}y_{kj}$. 67 | 68 | | Year | Number | Comment | 69 | | --- | --- | --- | 70 | | 2018 | 100 | Good year | 71 | | 2019 | 200 | Even better, add something to make this column wider | 72 | 73 | If the Table caption number doesn't show properly, you may need to update 74 | `pandoc` to the latest version. 75 | 76 | ## Cross References 77 | 78 | We often want to reference sections, figures, tables and equations in a book. 79 | 80 | ### Referencing Sections 81 | :label:`my_sec3` 82 | 83 | We can put a label immediately after the section title to allow this section to 84 | be referenced by its label. The label format is 85 | `:label:` followed by its label name in an inline code block. 86 | 87 | ``` 88 | ### Referencing Sections 89 | :label:`my_sec3` 90 | ``` 91 | 92 | 93 | Then we can reference this section through `:ref:` followed by label name in an 94 | inline code block 95 | 96 | ``` 97 | :ref:`my_sec3` demonstrates how to reference a section. 98 | ``` 99 | 100 | 101 | :ref:`my_sec3` demonstrates how to reference a section. 102 | 103 | 104 | Note that it displays the referenced section title with a clickable link. We can 105 | also use a numbered version by changing `:num:` to `:numref:`, e.g. :numref:`my_sec3`. 106 | 107 | If the label is incorrect, say we put `my_sec2` here, the build log will 108 | contains a warning such as 109 | 110 | ``` 111 | WARNING: undefined label: my_sec2 112 | ``` 113 | 114 | 115 | You can turn it into error by setting `warning_is_error = True` in 116 | `config.ini`. 117 | 118 | Besides, we can cross 119 | reference label from other files as well, e.g. :numref:`sec_code`. This applies 120 | to figures, tables and equations as well. 121 | 122 | 123 | ### Referencing Images 124 | 125 | Similarly we can label an image and reference it later. 126 | 127 | ``` 128 | ![A nice image with a cat and a dog.](../img/catdog.jpg) 129 | :width:`300px` 130 | :label:`img_catdog` 131 | 132 | As can be seen from :numref:`img_catdog`, 133 | ``` 134 | 135 | 136 | ![A nice image with a cat and a dog.](../img/catdog.jpg) 137 | :width:`300px` 138 | :label:`img_catdog` 139 | 140 | As can be seen from :numref:`img_catdog`, there is a cat and a dog. 141 | 142 | ### Referencing Tables 143 | 144 | ``` 145 | :This a is very long table caption. It will breaks into several lines. And 146 | contains a math equation as well. $z_{ij} = \sum_{k}x_{ik}y_{kj}$. 147 | 148 | | Year | Number | Comment | 149 | | --- | --- | --- | 150 | | 2018 | 100 | Good year | 151 | :label:`table` 152 | 153 | Refer to :numref:`table` 154 | 155 | ``` 156 | 157 | 158 | :This a is very long table caption. It will breaks into several lines. And 159 | contains a math equation as well. $z_{ij} = \sum_{k}x_{ik}y_{kj}$. 160 | 161 | | Year | Number | Comment | 162 | | --- | --- | --- | 163 | | 2018 | 100 | Good year | 164 | :label:`table` 165 | 166 | Refer to :numref:`table` 167 | 168 | ### Referencing Equations 169 | 170 | The difference here is that we need to use `eqlabel` instead of `label`. For 171 | example 172 | 173 | ``` 174 | $$\hat{\mathbf{y}}=\mathbf X \mathbf{w}+b$$ 175 | :eqlabel:`linear` 176 | 177 | 178 | In :eqref:`linear`, we define the linear model. 179 | ``` 180 | 181 | 182 | $$\hat{\mathbf{y}}=\mathbf X \mathbf{w}+b$$ 183 | :eqlabel:`linear` 184 | 185 | In :eqref:`linear`, we define the linear model. 186 | 187 | 188 | ## Citations 189 | 190 | First put your bib file at somewhere. All references will be displayed on the 191 | place where it inserted in HTML. But in PDF, all references will be moved to end of 192 | the document. Then we can cite a paper through `:cite:`. Multipel papers can be 193 | separated by commans (note there should be no space) 194 | 195 | ``` 196 | 197 | The breakthrough of deep learning origins from :cite:`krizhevsky2012imagenet` for... 198 | 199 | Two keys together :cite:`he2016deep,devlin2018bert`... 200 | 201 | :bibliography:`../refs.bib` 202 | ``` 203 | 204 | 205 | The breakthrough of deep learning origins from :cite:`krizhevsky2012imagenet` for 206 | computer vision, there is a rich of following up works, such as 207 | :cite:`he2016deep`. NLP is catching up as well, the recent work 208 | :cite:`devlin2018bert` shows significant improvements. 209 | 210 | Two keys together :cite:`he2016deep,devlin2018bert`. Single author 211 | :cite:`mitchell80`, two authors :cite:`Newell81` 212 | 213 | 214 | Note that :cite: is the same as `\citep` in LaTeX. To use `\citet` in LaTeX, just use :citet:. 215 | For example: 216 | 217 | ``` 218 | Two keys together :citet:`he2016deep,devlin2018bert`. Single author 219 | :citet:`mitchell80`, two authors :citet:`Newell81` 220 | ``` 221 | 222 | Two keys together :citet:`he2016deep,devlin2018bert`. Single author 223 | :citet:`mitchell80`, two authors :citet:`Newell81` 224 | 225 | 226 | ## References 227 | 228 | :bibliography:`../refs.bib` 229 | -------------------------------------------------------------------------------- /d2lbook/config_default.ini: -------------------------------------------------------------------------------- 1 | # A default configuration to create a book with d2lbook. 2 | 3 | [project] 4 | 5 | # The project name, used as the filename of the package and the PDF file. For 6 | # example, if set to d2l-book, then will build d2l-book.zip and d2l-book.pdf 7 | name = d2lbook 8 | 9 | # Book title. It will be displayed on the top-right of the HTML page and the 10 | # front page of the PDF file 11 | title = 12 | 13 | # All author names 14 | author = The contributors 15 | 16 | # Current release version 17 | release = 0.0.1 18 | 19 | # The copyright 20 | copyright = 21 | 22 | # The langunage, such as en, zh, ja, ... 23 | lang = 24 | 25 | [translation] 26 | 27 | origin_repo = 28 | origin_lang = 29 | 30 | # aws or gcp 31 | translator = 32 | terminology = 33 | 34 | 35 | [build] 36 | 37 | # The root page 38 | index = index 39 | 40 | # A list of wildcards to indicate the markdown files that need to be evaluated as 41 | # Jupyter notebooks. 42 | notebooks = **/*.md 43 | 44 | # A list of wildcards to indicate the rsts files that need to be included 45 | rsts = 46 | 47 | # A list of markdown files that will be included but not evaluated as Jupyter 48 | # notebooks. They are included in the eval outputs as markdown files (e.g., 49 | # _build/eval) but not in the rst, pdf, html outputs (e.g., _build/rst etc.) 50 | non-notebooks = 51 | 52 | # Files that will be skipped. 53 | exclusions = 54 | 55 | # A list of files, if anyone is modified after the last build, will re-build all 56 | # documents. 57 | dependencies = 58 | 59 | # A list of files that will be copied to the build folder. 60 | resources = 61 | 62 | # If True (default), then will evaluate the notebook to obtain outputs. 63 | eval_notebook = True 64 | 65 | 66 | # Source directory 67 | source_dir = . 68 | 69 | # Output directory 70 | output_dir = _build 71 | 72 | # If True, the mark the build as failed for any warning. Default is False. 73 | warning_is_error = False 74 | 75 | # Additional Sphinx extensions 76 | sphinx_extensions = 77 | 78 | # Additional Sphinx configuration options 79 | sphinx_configs = 80 | 81 | # Specify the tabs, seperated by ",". The first one will be the default tab. 82 | tabs = 83 | 84 | # The original Github repository, such as d2l-ai/d2l-en, if this project is a 85 | # langunage translation from the source repository. 86 | origin_repo = 87 | 88 | [html] 89 | 90 | # A list of links that is displayed on the navbar. Each line contains a link, a 91 | # link consists of three items: name, URL, and a fontawesome icon 92 | # (https://fontawesome.com/icons?d=gallery). Items are seperated by , 93 | header_links = 94 | 95 | # The filename of the favicon 96 | favicon = 97 | 98 | # The filename of the html logo 99 | html_logo = 100 | 101 | # A list of CSS files to be included 102 | include_css = 103 | 104 | # A list of JS files to be included 105 | include_js = 106 | 107 | [pdf] 108 | 109 | 110 | # The main font. In default it's FreeSerif. If you want a bolder font, you can 111 | # download and install 112 | # https://www.fontsquirrel.com/fonts/download/source-serif-pro 113 | # Then specify Source Serif Pro 114 | main_font = 115 | 116 | # The sans font. In default it's FreeSans. If you want a bolder font, you can 117 | # download and install 118 | # https://www.fontsquirrel.com/fonts/download/source-sans-pro 119 | # Then specify Source Sans Pro 120 | sans_font = 121 | 122 | # The mono font. In default it's FreeMono. If you want a bolder font, you can 123 | # download and install 124 | # https://www.fontsquirrel.com/fonts/download/source-code-pro 125 | # Then specify Source Code Pro 126 | # If you find Source Code Pro is too wide, you can use the narrower Inconsolata 127 | # https://www.fontsquirrel.com/fonts/download/Inconsolata 128 | mono_font = 129 | 130 | # The file used to post-process the generated tex file. 131 | post_latex = 132 | 133 | # The filename of the latex logo 134 | latex_logo = 135 | 136 | # Bibtext bibfile 137 | bibfile = 138 | 139 | # Specify the url of external latex resources. 140 | latex_url = 141 | 142 | # Specify the latex style. We now support "cambridge" and the defaut sphinx style. 143 | style = 144 | 145 | [library] 146 | 147 | # [DEPRECATED] Where code blocks will save to 148 | save_filename = 149 | 150 | # [DEPRECATED] The parttern to mark this block will be saved. 151 | save_mark = 152 | 153 | # [DEPRECATED] A list of filename and pattern pairs. 154 | save_patterns = 155 | 156 | # [DEPRECATED, use release instead] The library version 157 | version = 158 | 159 | # The file to save the library version 160 | version_file = 161 | 162 | # If set, then save a/b.md into root_dir/a/b.md 163 | root_dir = 164 | 165 | [deploy] 166 | 167 | # Tracking ID for the HTML pages 168 | google_analytics_tracking_id = 169 | 170 | # The github repo that all files will copy to 171 | github_repo = 172 | 173 | # The S3 bucket that all files will copy to 174 | s3_bucket = 175 | 176 | # S3 URLs of the other files that will also be deployed 177 | other_file_s3urls = 178 | 179 | 180 | [colab] 181 | 182 | # The github repo to host the notebooks for colab, such as d2l-ai/d2l-book-colab 183 | # Also make sure that the machine's ssh key is added to github before running 184 | # "deploy" so that it can commit into d2l-ai/d2l-book-colab 185 | github_repo = 186 | 187 | # The html pages that will be exclueded for adding the colab button 188 | exclusions = *.html **/index.html 189 | 190 | # The additional libraries to be installed. Each line specifies one library, it 191 | # sould contains two items seperated by a comma. The first item is the lib name, 192 | # the second one is the pip package. For example: mxnet, mxnet-cu100. If a 193 | # notebook contains "import mxnet", then "!pip install mxnet-cu100" will 194 | # inserted into the notebook. 195 | libs = 196 | 197 | # The text description of additional libraries are required. 198 | libs_header = The following additional libraries are needed to run this 199 | notebook. Note that running on Colab is experimental, please report a Github 200 | issue if you have any problem. 201 | 202 | # If any code cell contains the pattern, default value is gpu, then choose the 203 | # hardware accelerator to GPU in Colab. The default accelerator is None. 204 | gpu_pattern = gpu 205 | 206 | # Colab cannot display SVG files with a relative fname or a github URL. You can 207 | # replace it with your website URL. For example: img, http://book.d2l.ai/_images 208 | # will "img/test.svg" to "http://book.d2l.ai/_images/test.svg" 209 | replace_svg_url = 210 | 211 | 212 | [sagemaker] 213 | 214 | # The github repo to host the notebooks for , such as d2l-ai/d2l-book-sagemaker 215 | # Also make sure that the machine's ssh key is added to github before running 216 | # "deploy" so that it can commit into d2l-ai/d2l-book-sagemaker 217 | github_repo = 218 | 219 | kernel = conda_python3 220 | 221 | libs = 222 | 223 | libs_header = Installing (updating) the following libraries for your Sagemaker 224 | instance. 225 | 226 | [slides] 227 | 228 | # The HTML element to put on the top left corner in each slide 229 | top_left = 230 | 231 | # The HTML element to put on the top right corner in each slide 232 | top_right = 233 | 234 | # The github repo to host the slides 235 | github_repo = 236 | -------------------------------------------------------------------------------- /d2lbook/markdown.py: -------------------------------------------------------------------------------- 1 | """utilities to handle markdown 2 | """ 3 | import re 4 | from d2lbook import common 5 | from typing import List, Dict 6 | import logging 7 | 8 | def split_markdown(source: str) -> List[Dict[str, str]]: 9 | """Split markdown into a list of text and code cells. 10 | 11 | A cell has three fields: 12 | 1. type: either code or markdown 13 | 2. class: code class or tab class 14 | 3. source: single string for the source 15 | """ 16 | cells: List[Dict] = [] 17 | in_code = False 18 | in_tab = False 19 | cur_code_mark = None 20 | cur_tag = None 21 | cur_src = [] 22 | 23 | def _add_cell(cur_src: List[str], cells: List[Dict]): 24 | if cur_src: 25 | src = '\n'.join(cur_src).strip() 26 | if in_code: 27 | cells.append({ 28 | 'type': 'code', 29 | 'fence': cur_code_mark, 30 | 'class': cur_tag, 31 | 'source': src}) 32 | else: 33 | if not src and not cur_tag: 34 | return 35 | cells.append({'type': 'markdown', 'source': src}) 36 | if cur_tag: 37 | cells[-1]['class'] = cur_tag 38 | 39 | for l in source.splitlines(): 40 | code = common.md_code_fence.match(l) 41 | tab = common.md_mark_pattern.match(l) 42 | if code: 43 | # code can be nested 44 | if in_tab or (in_code and code.groups()[0] != cur_code_mark): 45 | cur_src.append(l) 46 | else: 47 | _add_cell(cur_src, cells) 48 | cur_src = [] 49 | cur_code_mark, cur_tag = code.groups() 50 | in_code ^= True 51 | elif tab: 52 | begin = tab.groups()[0] == 'begin_tab' 53 | end = tab.groups()[0] == 'end_tab' 54 | if in_code or (not begin and not end): 55 | cur_src.append(l) 56 | else: 57 | _add_cell(cur_src, cells) 58 | cur_src = [] 59 | if begin: 60 | cur_tag = tab.groups()[1] 61 | else: 62 | cur_tag = None 63 | in_tab = begin 64 | else: 65 | cur_src.append(l) 66 | _add_cell(cur_src, cells) 67 | return cells 68 | 69 | def join_markdown_cells(cells: List[Dict]) -> str: 70 | """Join a list of cells into a markdown string""" 71 | src = [] 72 | for c in cells: 73 | cell_src = [] 74 | if c['type'] == 'markdown': 75 | if 'class' in c: 76 | cell_src.append(f':begin_tab:{c["class"]}') 77 | cell_src.append(c['source']) 78 | if 'class' in c: 79 | if cell_src[-1].endswith('\n'): 80 | cell_src[-1] = cell_src[-1][:-1] 81 | cell_src.append(':end_tab:') 82 | else: 83 | cell_src += [c['fence'] + c['class'], c['source'], c['fence']] 84 | src.append('\n'.join(cell_src).strip()) 85 | return '\n\n'.join(src) + '\n' 86 | 87 | basic_token = r'[\ \*-\/\\\._\w\d\:/]+' 88 | token = r'[\|\'\:\;\<\>\^\(\)\{\}\[\]\ \*-\/\\\.,_=\w\d]+' 89 | 90 | def _is_mark(lines): 91 | if isinstance(lines, str): 92 | lines = [lines] 93 | for l in lines: 94 | l = l.strip() 95 | if l: 96 | m = re.match(rf':{token}:(`{token}`)?', l) 97 | if m is None or m.span() != (0, len(l)): 98 | return False 99 | return True 100 | 101 | def _list(line, prev_prefix): 102 | m = re.match(r' *[-\*\+] *', line) or re.match(r' *[\d]+\. *', line) 103 | if m: 104 | if prev_prefix is not None and len(prev_prefix.split('__')) == 2: 105 | p = int(prev_prefix.split('__')[1]) + 1 106 | else: 107 | p = 0 108 | return m[0] + '__' + str(p) 109 | if prev_prefix == '': 110 | return '' 111 | if prev_prefix is not None and len(re.match(r' *', line)[0]) > len( 112 | re.match(r' *', prev_prefix)[0]): 113 | return prev_prefix 114 | return '' 115 | 116 | def split_text(text: str) -> List[Dict[str, str]]: 117 | """Split text into a list of paragraphs 118 | 119 | 1. type: text, list, image, title, equation, table 120 | 1. source: 121 | 1. prefix: 122 | 1. mark: 123 | """ 124 | # split into paragraphs 125 | lines = text.splitlines() 126 | groups = common.group_list(lines, lambda a, _: a.strip() == '') 127 | paras = ['\n'.join(item) for empty_line, item in groups if not empty_line] 128 | 129 | def _fallback(p, type): 130 | logging.warn(f'Wrong {type} format:\n' + p) 131 | cells.append({'type': 'text', 'source': p}) 132 | 133 | cells = [] 134 | for p in paras: 135 | lines = p.splitlines() + [''] 136 | p += '\n' 137 | if p.startswith('#'): 138 | # parse title 139 | if not _is_mark(lines[1:]): 140 | _fallback(p, 'title') 141 | else: 142 | m = re.match(r'#+ *', lines[0]) 143 | cells.append({ 144 | 'type': 'title', 145 | 'prefix': m[0], 146 | 'source': lines[0][m.span()[1]:], 147 | 'mark': '\n'.join(lines[1:])}) 148 | elif p.startswith('$$'): 149 | # parse equations 150 | m = re.findall(r'\$\$', p) 151 | if len(m) != 2: 152 | _fallback(p, 'equation') 153 | else: 154 | cells.append({'type': 'equation', 'source': p}) 155 | elif p.startswith('!['): 156 | # parse images 157 | if not lines[0].strip().endswith(')') or not _is_mark(lines[1:]): 158 | _fallback(p, 'image') 159 | else: 160 | cells.append({'type': 'image', 'source': p}) 161 | elif p.startswith('|'): 162 | # parse table 163 | for i, l in enumerate(lines): 164 | if not l.startswith('|'): 165 | break 166 | if not _is_mark(lines[i:]): 167 | _fallback(p, 'equation') 168 | else: 169 | cells.append({'type': 'table', 'source': p}) 170 | else: 171 | groups = common.group_list(lines, _list) 172 | for prefix, item in groups: 173 | if len(prefix.split('__')) == 2: 174 | prefix = prefix.split('__')[0] 175 | source = '\n'.join(item)[len(prefix):] 176 | if prefix == '': 177 | cells.append({'type': 'text', 'source': source}) 178 | else: 179 | cells.append({ 180 | 'type': 'list', 181 | 'prefix': prefix, 182 | 'source': source}) 183 | return cells 184 | 185 | def join_text(cells) -> str: 186 | paras = [] 187 | for cell in cells: 188 | l = cell['source'] 189 | if 'prefix' in cell: 190 | l = cell['prefix'] + l 191 | if 'mark' in cell: 192 | l += '\n' + cell['mark'] 193 | paras.append(l) 194 | return '\n'.join(paras) -------------------------------------------------------------------------------- /d2lbook/colab.py: -------------------------------------------------------------------------------- 1 | """Integration with Colab notebooks""" 2 | import os 3 | import re 4 | import nbformat 5 | import logging 6 | from d2lbook import notebook 7 | from d2lbook import utils 8 | 9 | def parse_repo_lib(repo_str, lib_str, version): 10 | repo = utils.split_config_str(repo_str) 11 | if len(repo) == 1 and len(repo[0]) == 1: 12 | repos = {None:repo[0]} 13 | libs = {None:utils.split_config_str(lib_str, 2)} 14 | else: 15 | repo = utils.split_config_str(repo_str, 2) 16 | repos = {r[0]:r[1] for r in repo} 17 | libs_list = utils.split_config_str(lib_str, 3) 18 | libs = {} 19 | for tab, pkg, install in libs_list: 20 | if tab in libs: 21 | libs[tab].append([pkg, install]) 22 | else: 23 | libs[tab] = [[pkg, install]] 24 | for tab in libs: 25 | for i, l in enumerate(libs[tab]): 26 | if '==RELEASE' in l[1]: 27 | libs[tab][i][1] = l[1].replace('==RELEASE', f'=={version}') 28 | return repos, libs 29 | 30 | 31 | class Colab(): 32 | def __init__(self, config): 33 | self._valid = config.colab and config.colab['github_repo'] 34 | if not self._valid: 35 | return 36 | self.tabs = config.tabs 37 | self.config = config.colab 38 | self._repo, self._libs = parse_repo_lib( 39 | self.config['github_repo'], self.config['libs'], config.project["release"]) 40 | 41 | def valid(self): 42 | return self._valid 43 | 44 | def git_repo(self, tab): 45 | return self._repo[tab] 46 | 47 | def generate_notebooks(self, eval_dir, colab_dir, tab): 48 | if not self._valid: 49 | return 50 | # copy notebook fron eval_dir to colab_dir 51 | utils.run_cmd(['rm -rf', colab_dir]) 52 | utils.run_cmd(['cp -r', eval_dir, colab_dir]) 53 | notebooks = utils.find_files('**/*.ipynb', colab_dir) 54 | for fn in notebooks: 55 | nb = notebook.read(fn) 56 | if not nb: 57 | continue 58 | # Use Python3 as the kernel 59 | update_notebook_kernel(nb, "python3", "Python 3") 60 | # Check if GPU is needed 61 | use_gpu = False 62 | for cell in nb.cells: 63 | if cell.cell_type == 'code': 64 | if self.config['gpu_pattern'] in cell.source: 65 | use_gpu = True 66 | break 67 | if use_gpu: 68 | nb['metadata'].update({"accelerator": "GPU"}) 69 | logging.info('Use GPU for '+fn) 70 | # Update SVG image URLs 71 | if self.config['replace_svg_url']: 72 | _update_svg_urls(nb, self.config['replace_svg_url'], fn, colab_dir) 73 | insert_additional_installation(nb, self._libs[tab], self.config['libs_header']) 74 | with open(fn, 'w') as f: 75 | f.write(nbformat.writes(nb)) 76 | 77 | def add_button(self, html_dir): 78 | """Add an open colab button in HTML""" 79 | if not self._valid: 80 | return 81 | files = utils.find_files('**/*.html', html_dir, self.config['exclusions']) 82 | for fn in files: 83 | with open(fn, 'r') as f: 84 | html = f.read() 85 | if 'id="Colab' in html: 86 | continue 87 | url = os.path.relpath(fn, html_dir).replace('.html', '.ipynb') 88 | if self.tabs: 89 | colab_html = '' 90 | for tab in self.tabs: 91 | colab_tab = _get_colab_html(self._repo[tab], url, f'Colab [{tab}]') 92 | colab_html += f'
{colab_tab}
' 93 | colab_html = f'
{colab_html}
' 94 | else: 95 | colab_html = _get_colab_html(self._repo[None], url, f'Colab') 96 | html = html.replace('', colab_html+'') 97 | with open(fn, 'w') as f: 98 | f.write(html) 99 | 100 | def _get_colab_html(repo, url, text): 101 | id = text.replace(" ", "_") 102 | colab_link = f'https://colab.research.google.com/github/{repo}/blob/master/{url}' 103 | colab_html = f'
Open the notebook in Colab
' 104 | return colab_html 105 | 106 | def insert_additional_installation(notebook, lib, lib_header): 107 | if lib: 108 | cell = _get_installation_cell(notebook, lib) 109 | if cell: 110 | notebook.cells.insert(0, cell) 111 | if lib_header: 112 | notebook.cells.insert( 113 | 0, nbformat.v4.new_markdown_cell(source=lib_header)) 114 | 115 | def update_notebook_kernel(notebook, name, display_name=None): 116 | if not display_name: 117 | display_name = name 118 | notebook['metadata'].update({"kernelspec": { 119 | "name": name, 120 | "display_name": display_name 121 | }}) 122 | 123 | 124 | def _update_svg_urls(notebook, pattern, filename, root_dir): 125 | orgin_url, new_url = utils.split_config_str(pattern, 2)[0] 126 | svg_re = re.compile('!\[.*\]\(([\.-_\w\d]+\.svg)\)') 127 | for cell in notebook.cells: 128 | if cell.cell_type == 'markdown': 129 | lines = cell.source.split('\n') 130 | for i, l in enumerate(lines): 131 | m = svg_re.search(l) 132 | if not m: 133 | continue 134 | path = os.path.relpath(os.path.realpath(os.path.join( 135 | root_dir, os.path.basename(filename), m[1])), root_dir) 136 | if not path.startswith(orgin_url): 137 | logging.warning("%s in %s does not start with %s" 138 | "specified by replace_svg_url"%( 139 | path, filename, orgin_url)) 140 | else: 141 | url = new_url + path[len(orgin_url):] 142 | lines[i] = l.replace(m[1], url) 143 | cell.source = '\n'.join(lines) 144 | 145 | def _get_installation_cell(notebook, libs): 146 | """Return a cell for installing the additional libs""" 147 | lib_dict = dict(libs) 148 | lib1_re = re.compile('from ([_\w\d]+) import') 149 | lib2_re = re.compile('import ([_\w\d]+)') 150 | find_libs = [] 151 | for cell in notebook.cells: 152 | if cell.cell_type == 'code': 153 | lines = cell.source.split('\n') 154 | for l in lines: 155 | if l.strip().startswith('#'): # it's a comment 156 | continue 157 | m = lib1_re.search(l) 158 | if not m: 159 | m = lib2_re.search(l) 160 | if m and m[1] in lib_dict: 161 | find_libs.append(m[1]) 162 | if not find_libs and not notebook.metadata['required_libs']: 163 | return None 164 | install_str = '' 165 | for lib in set(find_libs): 166 | install_str += '!pip install ' + lib_dict[lib] + '\n' 167 | for lib in notebook.metadata['required_libs']: 168 | install_str += '!pip install ' + lib + '\n' 169 | return nbformat.v4.new_code_cell(source=install_str) 170 | -------------------------------------------------------------------------------- /d2lbook/translate.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import glob 3 | import sys 4 | import os 5 | from d2lbook import config, markdown, utils, common 6 | import logging 7 | import re 8 | import glob 9 | 10 | def translate(): 11 | parser = argparse.ArgumentParser(description='Translate to another language') 12 | # Example usage: d2lbook translate --commit 35a64ab chapter_optimization chapter_computer-vision/anchor.md 13 | parser.add_argument('source', nargs='+', help='chapter directories or markdown files to activate') 14 | parser.add_argument('--commit', default='latest', help='the commit of the base repo') 15 | args = parser.parse_args(sys.argv[2:]) 16 | 17 | cf = config.Config() 18 | trans = Translate(cf, args.commit) 19 | for source in args.source: 20 | # Check if source is a file or a chapter dir 21 | if not source.endswith(".md"): 22 | chap_dir = os.path.join(trans.repo_dir, source) 23 | if os.path.isdir(chap_dir): 24 | logging.info(f'Translating all sections of {source}') 25 | all_chap_secs = os.listdir(chap_dir) 26 | for sec_name in all_chap_secs: 27 | if sec_name.endswith(".md"): 28 | trans.translate(os.path.join(source, sec_name)) 29 | else: 30 | logging.error(f'Invalid directory {source}: Please provide' 31 | 'a valid chapter name for translation') 32 | else: 33 | trans.translate(source) 34 | 35 | class Translate(object): 36 | def __init__(self, cf: config.Config, commit: str): 37 | # init the original repo 38 | import git 39 | self.config = cf 40 | self.repo_dir = os.path.join(cf.tgt_dir, 'origin_repo') 41 | assert cf.translation['origin_repo'], 'much provide the origin repo' 42 | self.url = 'https://github.com/' + cf.translation['origin_repo'] 43 | if os.path.exists(self.repo_dir): 44 | self.repo = git.Repo(self.repo_dir) 45 | logging.info(f'Pulling from {self.url} into {self.repo_dir}') 46 | # Reset to origin/master before pulling updates 47 | self.repo.git.reset('--hard', self.repo.remotes.origin.name + '/' + self.repo.active_branch.name) 48 | self.repo.remotes.origin.pull() 49 | else: 50 | logging.info(f'Clone {self.url} into {self.repo_dir}') 51 | self.repo = git.Repo.clone_from(self.url, self.repo_dir) 52 | if commit == 'latest': 53 | self.commit = str(self.repo.commit())[:7] 54 | else: 55 | self.repo.git.reset(commit, '--hard') 56 | self.commit = commit[:7] 57 | # init the translator 58 | self.translator = None 59 | if cf.translation['translator']: 60 | if cf.translation['translator'] == 'aws': 61 | assert cf.project['lang'] 62 | assert cf.translation['origin_lang'] 63 | self.translator = AWS(cf.translation['origin_lang'], cf.project['lang'], cf.translation['terminology']) 64 | else: 65 | logging.error(f'Unknown translator: {cf.translation["translator"]}') 66 | 67 | def translate(self, filename: str): 68 | src_fn = os.path.join(self.repo_dir, filename) 69 | fns = glob.glob(src_fn) 70 | if not len(fns): 71 | logging.warn('Not found '+src_fn) 72 | return 73 | if len(fns) > 1: 74 | for fn in fns: 75 | self.translate(os.path.relpath(fn, self.repo_dir)) 76 | return 77 | src_fn = fns[0] 78 | filename = os.path.relpath(src_fn, self.repo_dir) 79 | basename, ext = os.path.splitext(filename) 80 | origin_tgt_fn = os.path.join(self.config.src_dir, 81 | basename+'_origin'+ext) 82 | tgt_fn = os.path.join(self.config.src_dir, filename) 83 | if os.path.exists(tgt_fn): 84 | logging.warn(f'File {tgt_fn} already exists, skip translation.') 85 | return 86 | logging.info(f'Write original text into {origin_tgt_fn}') 87 | utils.mkdir(os.path.dirname(origin_tgt_fn)) 88 | with open(origin_tgt_fn, 'w') as f: 89 | with open(src_fn, 'r') as f2: 90 | f.write(f2.read()) 91 | 92 | if self.translator and ext == '.md': 93 | self.translator.translate_markdown(src_fn, tgt_fn) 94 | logging.info(f'Write translated results into {tgt_fn}') 95 | else: 96 | if not os.path.exists(tgt_fn): 97 | with open(tgt_fn, 'w') as f: 98 | logging.info(f'Create an empty file {tgt_fn}') 99 | 100 | 101 | class MarkdownText(object): 102 | def __init__(self): 103 | self.mapping = [] 104 | 105 | def _encode_pattern(self, pattern, text): 106 | matched = set(re.findall(pattern, text)) 107 | for m in matched: 108 | # another solution is use some special tokens and put them in 109 | # the terminology. unfortuanly it doesn't work for amazon transcribe. 110 | # So use a number instead, hope it will not be translated. 111 | token = str(732293614+len(self.mapping)) 112 | text = text.replace(m, token) 113 | self.mapping.append((m, token)) 114 | return text 115 | 116 | def encode(self, text:str) -> str: 117 | patterns = [rf'(:{markdown.token}:`{markdown.token}`)', # mark 118 | rf'(`{markdown.token}`)', # code 119 | rf'(\${markdown.token}\$)', # inline match 120 | rf'(\[{markdown.basic_token}\]\({markdown.basic_token}\))', # link 121 | ] 122 | for p in patterns: 123 | text = self._encode_pattern(p, text) 124 | return text 125 | 126 | def decode(self, text:str) -> str: 127 | for key, value in self.mapping: 128 | text = text.replace(value, key) 129 | text = text.replace('] (', '](') 130 | return text 131 | 132 | class Translator(object): 133 | def translate(self, text: str): 134 | raise NotImplemented() 135 | 136 | def _translate_markdown(self, text): 137 | cells = markdown.split_markdown(text) 138 | for cell in cells: 139 | if cell['type'] == 'markdown': 140 | if 'class' in cell and cell['class']: 141 | # it may have nested code blocks 142 | cell['source'] = self._translate_markdown(cell['source']) 143 | else: 144 | text_cells = markdown.split_text(cell['source']) 145 | for t_cell in text_cells: 146 | if t_cell['source'] and ( 147 | t_cell['type'] in ['text', 'list', 'title']): 148 | text = t_cell['source'] 149 | markdown_text = MarkdownText() 150 | t_cell['source'] = markdown_text.decode(self.translate( 151 | markdown_text.encode(text))) 152 | if text.endswith('\n'): 153 | t_cell['source'] += '\n' 154 | cell['source'] = markdown.join_text(text_cells) 155 | return markdown.join_markdown_cells(cells) 156 | 157 | def translate_markdown(self, src_fn: str, tgt_fn: str): 158 | with open(src_fn, 'r') as r: 159 | with open(tgt_fn, 'w') as w: 160 | w.write(self._translate_markdown(r.read())) 161 | 162 | class AWS(Translator): 163 | """Use Amazon Translate""" 164 | def __init__(self, src_lang, target_lang, terminology=None): 165 | import boto3 166 | self.client = boto3.client('translate') 167 | self.terminology = [terminology] if terminology else [] 168 | self.src_lang = src_lang 169 | self.tgt_lang = target_lang 170 | logging.info(f'Amazon Translate {src_lang} -> {target_lang}, terminology {self.terminology}') 171 | 172 | def translate(self, text: str): 173 | text = text.replace('\n', ' ') 174 | print(text) 175 | resp = self.client.translate_text( 176 | Text=text, TerminologyNames=self.terminology, 177 | SourceLanguageCode=self.src_lang, TargetLanguageCode=self.tgt_lang) 178 | return resp['TranslatedText'] 179 | 180 | if __name__ == "__main__": 181 | logging.basicConfig(format='[d2lbook:%(filename)s:L%(lineno)d] %(levelname)-6s %(message)s') 182 | logging.getLogger().setLevel(logging.INFO) 183 | 184 | if len(sys.argv) == 5: 185 | _, src_fn, src_lang, tgt_fn, tgt_lang = sys.argv 186 | terminology = None 187 | elif len(sys.argv) == 6: 188 | _, src_fn, src_lang, tgt_fn, tgt_lang, terminology = sys.argv 189 | else: 190 | exit(-1) 191 | translator = AWS(src_lang, tgt_lang, terminology) 192 | translator.translate_markdown(src_fn, tgt_fn) 193 | 194 | 195 | -------------------------------------------------------------------------------- /d2lbook/slides.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import glob 3 | import sys 4 | from d2lbook import config, notebook, common, utils 5 | import logging 6 | import glob 7 | import nbformat 8 | import pathlib 9 | import os 10 | from nbformat import notebooknode 11 | from typing import Optional 12 | import re 13 | 14 | def slides(): 15 | parser = argparse.ArgumentParser( 16 | description='Generate slides from markdown files.') 17 | parser.add_argument('filename', nargs='+', 18 | help='the source markdown files') 19 | parser.add_argument('--tab', default=None, help='the tab') 20 | args = parser.parse_args(sys.argv[2:]) 21 | tab = args.tab 22 | cf = config.Config() 23 | sd = Slides(cf) 24 | for fn in args.filename: 25 | fns = glob.glob(fn) 26 | if not len(fns): 27 | logging.warning('Not found ' + fn) 28 | return 29 | for md in fns: 30 | with open(md, 'r') as f: 31 | nb = notebook.read_markdown(f.read()) 32 | if tab: 33 | nb = notebook.split_markdown_cell(nb) 34 | nb = notebook.get_tab_notebook(nb, tab, cf.default_tab) 35 | output_fn = str(pathlib.Path(md).with_suffix('')) + ( 36 | '_' + tab if tab else '_') + '_slides.ipynb' 37 | sd.generate(nb, output_fn) 38 | 39 | class Slides(): 40 | def __init__(self, config): 41 | self._valid = config.slides and config.slides['github_repo'] 42 | if not self._valid: 43 | return 44 | self.config = config 45 | repo = utils.split_config_str(self.config.slides['github_repo'], 2) 46 | self._repo = {r[0]: r[1] for r in repo} 47 | 48 | def deploy(self): 49 | if not self._valid: 50 | return 51 | repo = self._repo.get(self.config.tab, '') 52 | if not repo: 53 | return 54 | bash_fname = os.path.join(os.path.dirname(__file__), 55 | 'upload_github.sh') 56 | utils.run_cmd([ 57 | 'bash', bash_fname, self.config.slides_dir, repo, 58 | self.config.project['release']]) 59 | 60 | def generate_readme(self): 61 | repo = self._repo.get(self.config.tab, '') 62 | if not self._valid or not repo: return 63 | 64 | root = os.path.join(self.config.src_dir, 65 | self.config.build['index'] + '.md') 66 | notebooks = notebook.get_toc(root) 67 | items = [] 68 | for nb in notebooks: 69 | p = (self.config.slides_dir / 70 | pathlib.Path(nb)).with_suffix('.ipynb') 71 | if p.exists(): 72 | p = str(p.relative_to(self.config.slides_dir)) 73 | base = 'https://nbviewer.jupyter.org/format/slides/github' 74 | items.append(f' - [{p}]({base}/{repo}/blob/main/{p})') 75 | 76 | with open(os.path.join(self.config.slides_dir, 'README.md'), 'w') as f: 77 | f.write(f'# {repo}\n') 78 | f.write(''' 79 | This repo contains generated notebook slides. To open it locally, we suggest you to install the [rise](https://rise.readthedocs.io/en/stable/) extension. 80 | 81 | You can also preview them in nbviwer: 82 | ''') 83 | f.write('\n'.join(items)) 84 | 85 | def generate(self, nb: notebooknode.NotebookNode, output_fn: str): 86 | """Get all slide blocks and write to file.""" 87 | nb = _generate_slides(nb) 88 | if not nb: return 89 | 90 | nb['metadata'].update({ 91 | 'language_info': { 92 | 'name': 'python'}, 93 | 'celltoolbar': 'Slideshow', 94 | 'rise': { 95 | "autolaunch": 96 | True, 97 | "enable_chalkboard": 98 | True, 99 | "overlay": 100 | f"
{self.config.slides['top_right']}
{self.config.slides['top_left']}
", 101 | "scroll": 102 | True 103 | }}) 104 | dirname = os.path.dirname(output_fn) 105 | utils.mkdir(dirname) 106 | with open(output_fn, 'w') as f: 107 | f.write(nbformat.writes(nb)) 108 | logging.info('Write slides into ' + output_fn) 109 | 110 | with open(dirname + '/rise.css', 'w') as f: 111 | f.write(''' 112 | div.text_cell_render.rendered_html { 113 | padding: 0.35em 0.1em; 114 | } 115 | 116 | div.code_cell { 117 | font-size: 120%; 118 | } 119 | 120 | div.my-top-right { 121 | position: absolute; 122 | right: 5%; 123 | top: 1em; 124 | font-size: 2em; 125 | } 126 | 127 | div.my-top-left { 128 | position: absolute; 129 | left: 5%; 130 | top: 1em; 131 | font-size: 2em; 132 | } 133 | ''') 134 | 135 | def remove_slide_marks( 136 | nb: notebooknode.NotebookNode) -> notebooknode.NotebookNode: 137 | """Remove all slide blocks and return.""" 138 | new_cells = [] 139 | for cell in nb.cells: 140 | if cell.cell_type != 'markdown': 141 | new_cells.append(cell) 142 | else: 143 | src = cell.source 144 | matches = _match_slide_marks(cell.source) 145 | for pair, text in matches: 146 | old = pair[0] + text + pair[1] 147 | new = '' if pair[0].endswith('~~') else text 148 | src = src.replace(old, new) 149 | new_cells.append(nbformat.v4.new_markdown_cell(src)) 150 | return notebook.create_new_notebook(nb, new_cells) 151 | 152 | def _generate_slides( 153 | nb: notebooknode.NotebookNode) -> Optional[notebooknode.NotebookNode]: 154 | new_cells = [] 155 | has_slides = False 156 | for cell in nb.cells: 157 | if cell.cell_type != 'markdown': 158 | # remove comments 159 | lines = cell.source.splitlines() 160 | new_lines = [] 161 | for l in lines: 162 | new_l = re.sub(r'\#\ .*', '', l) 163 | if new_l != l and not new_l.rstrip(): 164 | continue 165 | new_lines.append(new_l.rstrip()) 166 | cell.source = '\n'.join(new_lines) 167 | new_cells.append(cell) 168 | else: 169 | slide_type = '-' 170 | src = [] 171 | matches = _match_slide_marks(cell.source) 172 | if matches: 173 | has_slides = True 174 | for pair, text in matches: 175 | if pair[0].startswith('['): 176 | slide_type = 'slide' 177 | src.append(text) 178 | src = '\n'.join(src) 179 | if src: 180 | # cannot simply use . as it could be in code such as `a.text()` 181 | for m in ('.\n', '. '): 182 | sentences = [s.strip() for s in src.split(m)] 183 | src = m.join([s[0].upper() + s[1:] for s in sentences]) 184 | src = src.replace('.$$', '$$').replace(',$$', '$$') 185 | src = src.rstrip(',. \n:,。:') 186 | # find level-1 head 187 | for l in cell.source.splitlines(): 188 | if l.strip().startswith('# '): 189 | src = l + '\n\n' + src 190 | break 191 | if not src: continue 192 | new_cells.append( 193 | nbformat.v4.new_markdown_cell( 194 | src, metadata={"slideshow": { 195 | "slide_type": slide_type}})) 196 | if not has_slides: 197 | return None 198 | 199 | # merge code cell in the same slide if they don't have output 200 | md_code_group = common.group_list(new_cells, 201 | lambda cell, _: cell.cell_type == 'code') 202 | merged_code_cell = [] 203 | for is_code, group in md_code_group: 204 | if not is_code: 205 | merged_code_cell.extend(group) 206 | else: 207 | src = [] 208 | for i, cell in enumerate(group): 209 | src.append(cell.source) 210 | if i == len(group) - 1 or 'outputs' in cell and len( 211 | cell['outputs']): 212 | cell.source = '\n\n'.join(src) 213 | src = [] 214 | merged_code_cell.append(cell) 215 | # clean #@save 216 | for cell in merged_code_cell: 217 | if cell.cell_type == 'code': 218 | cell.source = cell.source.replace( \ 219 | '\n#@save\n', '\n').replace('#@save', '').strip() 220 | return notebook.create_new_notebook(nb, merged_code_cell) 221 | 222 | def _match_slide_marks(text: str): 223 | """return the texts in a pair. cannot be recursive""" 224 | # the pair marks to generate slides 225 | pairs = (('[**', '**]'), ('(**', '**)'), ('[~~', '~~]'), ('(~~', '~~)')) 226 | matches = [] 227 | for p in pairs: 228 | assert len(p) == 2, f'not a valid pair: {p}' 229 | start = [i for i in range(len(text)) if text.startswith(p[0], i)] 230 | end = [i for i in range(len(text)) if text.startswith(p[1], i)] 231 | assert len(start) == len(end), f'some {p} are not enclosed in {text}' 232 | for i, (s, e) in enumerate(zip(start, end)): 233 | s += len(p[0]) 234 | assert s <= e, f'some {p} are overlapped' 235 | if i < len(start) - 1: 236 | assert e < start[i + 1], f'some {p} are overlapped' 237 | # handle if it's a markdown link such as [**a**](https://xx) 238 | if p[1].endswith(']') and text.startswith(p[1] + '(', e): 239 | continue 240 | matches.append((p, s, e)) 241 | matches.sort(key=lambda x: x[1]) 242 | for i in range(len(matches) - 1): 243 | assert matches[i][1] < matches[i+1][1], \ 244 | f'some {matches[i][0]} and {matches[i+1][0]} are overlapped' 245 | return [(p, text[s:e]) for p, s, e in matches] 246 | -------------------------------------------------------------------------------- /d2lbook/resource.py: -------------------------------------------------------------------------------- 1 | """Manage compute resources 2 | """ 3 | import dataclasses 4 | import datetime 5 | import logging 6 | import multiprocessing as mp 7 | import os 8 | import random 9 | import subprocess 10 | import threading 11 | import time 12 | import traceback 13 | from typing import Any, Optional, Sequence 14 | import getpass 15 | 16 | import fasteners 17 | 18 | from d2lbook import utils 19 | 20 | def get_available_gpus(): 21 | """Return a list of available GPUs with their names""" 22 | cmd = 'nvidia-smi --query-gpu=name --format=csv,noheader' 23 | process = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, 24 | stderr=subprocess.PIPE) 25 | stdout, _ = process.communicate() 26 | if process.returncode == 0: 27 | return stdout.decode().splitlines() 28 | return [] 29 | 30 | def get_notebook_gpus(notebook, max_gpus): 31 | """Return the # of GPUs needed for a notebook.""" 32 | # several heuristics, not necessary accurate 33 | # TODO(mli) support a special mark in notebook to hint the #gpus 34 | single_gpu_patterns = ('gpu()', 'gpu(0)', 'device(\'cuda\')', 35 | 'device(\'/GPU:0\')', 'try_gpu()', 'try_gpu(0)', 'gpus=1') 36 | all_gpus_patterns = ('gpu(1)', 'device(\'cuda:1\')', 'device(\'/GPU:1\')', 37 | 'try_all_gpus', 'try_gpu(1)', 'gpus=2', 38 | 'gpus=3', 'gpus=4') 39 | n_gpus = 0 40 | for cell in notebook.cells: 41 | if cell.cell_type == 'code': 42 | if any([p in cell.source for p in single_gpu_patterns]): 43 | n_gpus = max(n_gpus, 1) 44 | if any([p in cell.source for p in all_gpus_patterns]): 45 | n_gpus = max(n_gpus, max_gpus) 46 | return n_gpus 47 | 48 | @dataclasses.dataclass 49 | class _Task(): 50 | num_cpus: int 51 | num_gpus: int 52 | target: Any 53 | args: Sequence[Any] 54 | description: str 55 | process: Optional[Any] = None 56 | locks: Sequence[int] = dataclasses.field(default_factory=list) 57 | done: bool = False 58 | start_time: datetime.datetime = datetime.datetime.now() 59 | end_time: Optional[datetime.datetime] = None 60 | 61 | class Process(mp.Process): 62 | def __init__(self, *args, **kwargs): 63 | mp.Process.__init__(self, *args, **kwargs) 64 | self._pconn, self._cconn = mp.Pipe() 65 | self._exception = None 66 | 67 | def run(self): 68 | try: 69 | mp.Process.run(self) 70 | self._cconn.send(None) 71 | except Exception as e: 72 | tb = traceback.format_exc() 73 | self._cconn.send((e, tb)) 74 | 75 | @property 76 | def exception(self): 77 | if self._pconn.poll(): 78 | self._exception = self._pconn.recv() 79 | return self._exception 80 | 81 | class Scheduler(): 82 | """A schedule run multiple jobs in parallel under the resource constraint.""" 83 | def __init__(self, num_cpu_workers, num_gpu_workers): 84 | self._num_cpus = num_cpu_workers 85 | self._num_gpus = num_gpu_workers 86 | self._locks = [False] * (self._num_cpus + self._num_gpus) 87 | user = getpass.getuser() 88 | self._inter_locks = [ 89 | fasteners.InterProcessLock(f'/tmp/d2lbook_{user}_cpu_{i}') 90 | for i in range(self._num_cpus)] + [ 91 | fasteners.InterProcessLock(f'/tmp/d2lbook_{user}_gpu_{i}') 92 | for i in range(self._num_gpus)] 93 | self._tasks = [] 94 | self._failed_tasks = [] 95 | self._start_job_lock = fasteners.InterProcessLock( 96 | f'/tmp/d2lbook_{user}_start_job') 97 | 98 | def add(self, num_cpus, num_gpus, target, args, description=''): 99 | """Add tasks into the queue.""" 100 | assert not (num_cpus == 0 and num_gpus == 0), \ 101 | 'Need at least one CPU or GPU' 102 | assert num_cpus <= self._num_cpus and num_gpus <= self._num_gpus, \ 103 | f'Not enough resources (CPU {self._num_cpus}, GPU {self._num_gpus} ) to run the task (CPU {num_cpus}, GPU {num_gpus})' 104 | 105 | if not description: 106 | description = f'Target {target} with args {args}' 107 | self._tasks.append(_Task(num_cpus, num_gpus, target, args, 108 | description)) 109 | 110 | @property 111 | def failed_tasks(self): 112 | return [(task.description, err, trace) 113 | for task, err, trace in self._failed_tasks] 114 | 115 | @property 116 | def error_message(self): 117 | if not self.failed_tasks: 118 | return '' 119 | errors = [ 120 | f'{len(self.failed_tasks)} notebooks are failed to evaluate:'] 121 | for task, err, trace in self.failed_tasks: 122 | errors += [f'Task {task} exited with error: {err}', trace] 123 | return '\n\n'.join(errors) 124 | 125 | def run(self): 126 | """Run the tasks and block until they are done.""" 127 | def _device_info(task): 128 | cpus = task.locks[:task.num_cpus] 129 | gpus = [i - self._num_cpus for i in task.locks[task.num_cpus:]] 130 | info = [] 131 | if cpus: info.append(f'CPU {cpus}') 132 | if gpus: info.append(f'GPU {gpus}') 133 | return ', '.join(info) 134 | 135 | def _runtime(task): 136 | end_time = task.end_time if task.end_time else datetime.datetime.now( 137 | ) 138 | return utils.get_time_diff(task.start_time, end_time) 139 | 140 | def _summary_heavy_tasks(): 141 | if self._tasks: 142 | logging.info( 143 | f'All {len(self._tasks)} tasks are done, sorting by runtime:' 144 | ) 145 | self._tasks.sort( 146 | key=lambda task: (task.end_time - task.start_time).seconds) 147 | for task in self._tasks: 148 | logging.info( 149 | f' - {_runtime(task)} on {_device_info(task)} for {task.description}' 150 | ) 151 | 152 | def _status(): 153 | num_done, num_not_started, num_running = 0, 0, 0 154 | for task in self._tasks: 155 | if task.done: num_done += 1 156 | if task.process: num_running += 1 157 | if not task.process and not task.done: num_not_started += 1 158 | 159 | logging.info( 160 | f' Status: {num_running} running tasks, {num_done} done, {num_not_started} not started' 161 | ) 162 | for task in self._tasks: 163 | if task.process: 164 | logging.info( 165 | f' - Task "{task.description}" on {_device_info(task)} is running for {_runtime(task)}' 166 | ) 167 | 168 | # try large gpu workloads first 169 | self._tasks.sort(reverse=True, key=lambda task: 170 | (task.num_gpus, task.num_cpus)) 171 | 172 | last_status_t = 0 173 | for t in range(24 * 60 * 60): # run at most 24 hours 174 | if all([task.done for task in self._tasks]): 175 | break 176 | 177 | if t > last_status_t + 60: 178 | last_status_t = t 179 | _status() 180 | 181 | for task in self._tasks: 182 | if task.process or task.done: 183 | continue 184 | locks = self._lock(0, self._num_cpus, task.num_cpus) + \ 185 | self._lock(self._num_cpus, self._num_cpus+self._num_gpus, task.num_gpus) 186 | if len(locks) < task.num_cpus + task.num_gpus: 187 | self._unlock(locks) 188 | continue 189 | task.locks = locks 190 | # a brutal fix to https://github.com/jupyter/nbconvert/issues/1066 191 | # if two ci jobs start to eval notebook at the same time, it may 192 | # cause the port bind conflict. here i require the ci job to acquire 193 | # a global lock for 1 sec. 194 | self._start_job_lock.acquire() 195 | message = f'Starting task "{task.description}" on {_device_info(task)}' 196 | logging.info(message) 197 | task.start_time = datetime.datetime.now() 198 | gpus = [i - self._num_cpus for i in locks[task.num_cpus:]] 199 | task.process = Process(target=_target, 200 | args=(gpus, task.target, *task.args)) 201 | task.process.start() 202 | _status() 203 | last_status_t = t 204 | time.sleep(1) 205 | self._start_job_lock.release() 206 | break 207 | 208 | # check if any one is finished 209 | for task in self._tasks: 210 | if task.done or not task.process: continue 211 | if not task.process.is_alive(): 212 | for lock in task.locks: 213 | self._locks[lock] = False 214 | self._inter_locks[lock].release() 215 | task.end_time = datetime.datetime.now() 216 | if task.process.exception: 217 | error, traceback = task.process.exception 218 | self._failed_tasks.append((task, error, traceback)) 219 | logging.error( 220 | f'Task "{task.description}" on {_device_info(task)} exited with error: {error}\n{traceback}' 221 | ) 222 | else: 223 | logging.info( 224 | f'Task "{task.description}" on {_device_info(task)} is finished in {_runtime(task)}' 225 | ) 226 | task.process = None 227 | task.done = True 228 | 229 | time.sleep(1) 230 | 231 | _summary_heavy_tasks() 232 | 233 | def _lock(self, start, end, n): 234 | ids = list(range(start, end)) 235 | random.shuffle(ids) 236 | locks = [] 237 | for i in ids: 238 | if len(locks) >= n: 239 | break 240 | if self._inter_locks[i].acquire( 241 | blocking=False) and not self._locks[i]: 242 | self._locks[i] = True 243 | locks.append(i) 244 | return locks 245 | 246 | def _unlock(self, locks): 247 | for i in locks: 248 | self._inter_locks[i].release() 249 | self._locks[i] = False 250 | 251 | def _target(gpus, target, *args): 252 | if not gpus: 253 | # it will triggler an runtime error if target actually uses a gpu 254 | gpus = [""] 255 | os.environ['CUDA_VISIBLE_DEVICES'] = ','.join([str(g) for g in gpus]) 256 | return target(*args) 257 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /docs/img/multi-lang.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | Produced by OmniGraffle 7.15.2 17 | 2020-05-12 19:46:54 +0000 18 | 19 | 20 | multi-lang 21 | 22 | Layer 1 23 | 24 | 25 | 26 | 27 | .md files 28 | 29 | 30 | 31 | 32 | 33 | 34 | .ipynb files 35 | 36 | 37 | 38 | 39 | 40 | 41 | .rst files 42 | 43 | 44 | 45 | 46 | 47 | 48 | .pdf file 49 | 50 | 51 | 52 | 53 | 54 | 55 | .pdf file 56 | 57 | 58 | 59 | 60 | 61 | 62 | publish 63 | 64 | 65 | 66 | 67 | 68 | 69 | publish 70 | 71 | 72 | 73 | 74 | 75 | 76 | publish 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | Step 1: 103 | 104 | 105 | 106 | 107 | Step 2: 108 | 109 | 110 | 111 | 112 | Step 3: 113 | 114 | 115 | 116 | 117 | Step 4: 118 | 119 | 120 | 121 | 122 | Step 5: 123 | 124 | 125 | 126 | 127 | 128 | 129 | .ipynb files 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | publish 140 | 141 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | 149 | .rst files 150 | 151 | 152 | 153 | 154 | 155 | 156 | 157 | 158 | 159 | .html files 160 | 161 | 162 | 163 | 164 | 165 | 166 | publish 167 | 168 | 169 | 170 | 171 | 172 | 173 | 174 | 175 | 176 | .ipynb files 177 | 178 | 179 | 180 | 181 | 182 | 183 | 184 | 185 | 186 | 187 | 188 | 189 | 190 | 191 | 192 | .rst files 193 | 194 | 195 | 196 | 197 | 198 | 199 | 200 | Step 4: 201 | 202 | 203 | 204 | 205 | 206 | -------------------------------------------------------------------------------- /d2lbook/rst.py: -------------------------------------------------------------------------------- 1 | """utilities to handle rst files""" 2 | import re 3 | import logging 4 | from typing import Dict 5 | import nbconvert 6 | import nbformat 7 | from nbformat import notebooknode 8 | from d2lbook import notebook 9 | from d2lbook import common 10 | from d2lbook import markdown 11 | 12 | def convert_notebook(nb: notebooknode.NotebookNode, resources: Dict[str, str]): 13 | nb = _process_nb(nb) 14 | writer = nbconvert.RSTExporter() 15 | body, resources = writer.from_notebook_node(nb, resources) 16 | body = _process_rst(body) 17 | return body, resources 18 | 19 | def _process_nb(nb): 20 | # add empty lines before and after a mark/fence 21 | new_cells = [] 22 | for cell in nb.cells: 23 | if cell.cell_type == 'markdown': 24 | md_cells = markdown.split_markdown(cell.source) 25 | for i, md_cell in enumerate(md_cells): 26 | if i < len(md_cells) - 1 and md_cells[i+1]['type'] == 'code': 27 | md_cells[i]['source'] += '\n' 28 | if md_cell['type'] == 'markdown': 29 | lines = md_cells[i]['source'].split('\n') 30 | for j, line in enumerate(lines): 31 | m = common.md_mark_pattern.match(line) 32 | if (m is not None 33 | and m[1] not in ('ref', 'numref', 'eqref') 34 | and m.end() == len(line)): 35 | lines[j] = '\n'+line+'\n' 36 | md_cells[i]['source'] = '\n'.join(lines) 37 | new_cells.append(nbformat.v4.new_markdown_cell( 38 | markdown.join_markdown_cells(md_cells))) 39 | else: 40 | new_cells.append(cell) 41 | # hide/show 42 | hide_all = False 43 | for cell in new_cells: 44 | if cell.cell_type == 'code': 45 | src = cell.source.lower() 46 | if '#@hide_all' in src: 47 | hide_all = True 48 | if hide_all or '# hide outputs' in src or ('#@hide' in src and '#@hide_code' not in src) or '#@hide_output' in src: 49 | cell.outputs = [] 50 | if hide_all or '# hide code' in src or ('#@hide' in src and '#@hide_output' not in src) or '#@hide_code' in src: 51 | cell.source = '' 52 | return notebook.create_new_notebook(nb, new_cells) 53 | 54 | def _process_rst(body): 55 | 56 | def delete_lines(lines, deletes): 57 | return [line for i, line in enumerate(lines) if i not in deletes] 58 | def indented(line): 59 | return line.startswith(' ') 60 | 61 | def blank(line): 62 | return len(line.strip()) == 0 63 | 64 | def look_behind(i, cond, lines): 65 | indices = [] 66 | while i < len(lines) and cond(lines[i]): 67 | indices.append(i) 68 | i = i + 1 69 | return indices 70 | 71 | lines = body.split('\n') 72 | 73 | # Preprocess table directives (later processed by "move .. _label: before a image, a section, or a table" below) 74 | # 75 | # E.g., 76 | # 77 | # :Dataset vs. computer memory and computational power 78 | # 79 | # +----------+----------------------------------------+ 80 | # | Decade | Dataset | 81 | # +==========+========================================+ 82 | # | 1970 | 100 (Iris) | 83 | # +----------+----------------------------------------+ 84 | # 85 | # Table: label:\ ``tab_intro_decade`` 86 | # 87 | # -> 88 | # 89 | # .. table:: Dataset vs. computer memory and computational power 90 | # 91 | # +----------+----------------------------------------+ 92 | # | Decade | Dataset | 93 | # +==========+========================================+ 94 | # | 1970 | 100 (Iris) | 95 | # +----------+----------------------------------------+ 96 | # 97 | # .. _tab_intro_decade: 98 | i, deletes = 0, [] 99 | while i < len(lines): 100 | line = lines[i] 101 | if line.startswith('Table: label:'): 102 | line_i = 0 103 | while line_i < len(line) and line[line_i] != '`': 104 | line_i += 1 105 | assert line_i < len(line), "Original table label in rst file is assumed to be like Table: label:\ ``tab_intro_decade``" 106 | lines[i] = ".. _" + line[line_i+2:-2] + ":" 107 | j = i 108 | directives_in_table_caption = [":cite", ":numref", "eqref"] 109 | while j > 0 and (not lines[j].startswith(":") or lines[j].startswith(tuple(directives_in_table_caption))): 110 | # Add indent for each line that is part of the table 111 | if lines[j].startswith("+") or lines[j].startswith("|"): 112 | lines[j] = " " + lines[j] 113 | j -= 1 114 | assert lines[j].startswith(":"), "Original table label in rst file is assumed to be like :Dataset vs. computer memory and computational power" 115 | lines[j] = ".. table:: " + lines[j][1:] 116 | i += 1 117 | 118 | # deletes: indices of lines to be deleted 119 | i, deletes = 0, [] 120 | while i < len(lines): 121 | line = lines[i] 122 | # '.. code:: toc' -> '.. toctree::', then remove consecutive empty lines 123 | # after the current line 124 | if line.startswith('.. code:: toc'): 125 | # convert into rst's toc block 126 | lines[i] = '.. toctree::' 127 | blanks = look_behind(i+1, blank, lines) 128 | deletes.extend(blanks) 129 | i += len(blanks) 130 | # .. code:: eval_rst 131 | # 132 | # 133 | # .. only:: html 134 | # 135 | # References 136 | # ========== 137 | # -> 138 | # 139 | # 140 | # 141 | # .. only:: html 142 | # 143 | # References 144 | # ========== 145 | elif line.startswith('.. code:: eval_rst'): 146 | # make it a rst block 147 | deletes.append(i) 148 | j = i + 1 149 | while j < len(lines): 150 | line_j = lines[j] 151 | if indented(line_j): 152 | lines[j] = line_j[3:] 153 | if lines[j].strip().startswith('.. '): 154 | lines[j] = '\n'+lines[j].strip() 155 | elif not blank(line_j): 156 | break 157 | j += 1 158 | i = j 159 | elif line.startswith('.. code::'): 160 | # reset LaTeX code-block rendering parameters 161 | lines[i] = '.. raw:: latex\n\n \\diilbookstyleinputcell\n\n' + lines[i] 162 | elif line.startswith('.. parsed-literal::'): 163 | # reset LaTeX code-block rendering parameters 164 | lines[i] = '.. raw:: latex\n\n \\diilbookstyleoutputcell\n\n' + lines[i] 165 | # add a output class so we can add customized css 166 | lines[i] += '\n :class: output' 167 | i += 1 168 | # .. figure:: ../img/jupyter.png 169 | # :alt: Output after running Jupyter Notebook. The last row is the URL 170 | # for port 8888. 171 | # 172 | # Output after running Jupyter Notebook. The last row is the URL for 173 | # port 8888. 174 | # 175 | # :width:``700px`` 176 | # 177 | # :label:``fig_jupyter`` 178 | #-> 179 | # .. _fig_jupyter: 180 | # 181 | # .. figure:: ../img/jupyter.png 182 | # :width: 700px 183 | # 184 | # Output after running Jupyter Notebook. The last row is the URL for 185 | # port 8888. 186 | elif indented(line) and ':alt:' in line: 187 | # Image caption, remove :alt: block, it cause trouble for long captions 188 | caps = look_behind(i, lambda l: indented(l) and not blank(l), lines) 189 | deletes.extend(caps) 190 | i += len(caps) 191 | # .. table:: Dataset versus computer memory and computational power 192 | # +-... 193 | # | 194 | # +-... 195 | # 196 | # :label:``tab_intro_decade`` 197 | # -> 198 | # .. _tab_intro_decade: 199 | # 200 | # .. table:: Dataset versus computer memory and computational power 201 | # 202 | # +-... 203 | # | 204 | # +-... 205 | # 206 | elif line.startswith('.. table::'): 207 | # Add indent to table caption for long captions 208 | caps = look_behind(i+1, lambda l: not indented(l) and not blank(l), 209 | lines) 210 | for j in caps: 211 | lines[j] = ' ' + lines[j] 212 | i += len(caps) + 1 213 | else: 214 | i += 1 215 | 216 | # change :label:my_label: into rst format 217 | lines = delete_lines(lines, deletes) 218 | deletes = [] 219 | 220 | for i, line in enumerate(lines): 221 | pos, new_line = 0, '' 222 | while True: 223 | match = common.rst_mark_pattern.search(line, pos) 224 | if match is None: 225 | new_line += line[pos:] 226 | break 227 | # e.g., case :math:`x`, :numref:`y`, match[0] = ':math:' 228 | elif match[2] is None: 229 | end = match.end() 230 | new_line += line[pos:end] 231 | pos = end 232 | continue 233 | 234 | start, end = match.start(), match.end() 235 | # e.g., origin=':label:``fig_jupyter``', key='label', value='fig_jupyter' 236 | origin, key, value = match[0], match[1], match[2] 237 | assert value.startswith('``') and value.endswith('``'), value 238 | value = value[2:-2] 239 | new_line += line[pos:start] 240 | pos = end 241 | # assert key in ['label', 'eqlabel', 'ref', 'numref', 'eqref', 'width', 'height', 'citet', 'citep'], 'unknown key: ' + key 242 | if key == 'label': 243 | new_line += '.. _' + value + ':' 244 | elif key in ['ref', 'numref', 'cite']: 245 | new_line += ':'+key+':`'+value+'`' 246 | elif key == 'citet': 247 | new_line += ':cite:t:`'+value+'`' 248 | elif key == 'citep': 249 | new_line += ':cite:p:`'+value+'`' 250 | elif key == 'eqref': 251 | new_line += ':eq:`'+value+'`' 252 | elif key in ['class', 'func', 'mod']: 253 | new_line += ':py:'+key+':`'+value+'`' 254 | # .. math:: f 255 | # 256 | # :eqlabel:``gd-taylor`` 257 | # -> 258 | # .. math:: f 259 | # :label: gd-taylor 260 | elif key == 'eqlabel': 261 | new_line += ' :label: '+value 262 | if blank(lines[i-1]): 263 | deletes.append(i-1) 264 | elif key in ['width', 'height']: 265 | new_line += ' :'+key+': '+value 266 | elif key == 'bibliography': 267 | # a hard coded plain bibtex style... 268 | new_line += ('.. bibliography:: ' + value + 269 | '\n :style: apa') 270 | # '\n :style: apa\n :all:') MM 20200104 removed ':all:' so only the cited references get printed 271 | else: 272 | logging.fatal(f'unknown key {key}') 273 | 274 | lines[i] = new_line 275 | lines = delete_lines(lines, deletes) 276 | 277 | def move(i, j): # move line i to line j 278 | lines.insert(j, lines[i]) 279 | if i > j: 280 | del lines[i+1] 281 | else: 282 | del lines[i] 283 | 284 | # move :width: or :width: just below .. figure:: 285 | for i, line in enumerate(lines): 286 | if line.startswith('.. figure::'): 287 | for j in range(i+1, len(lines)): 288 | line_j = lines[j] 289 | if not indented(line_j) and not blank(line_j): 290 | break 291 | if line_j.startswith(' :width:') or line_j.startswith(' :height:'): 292 | move(j, i+1) 293 | 294 | # move .. _label: before a image, a section, or a table 295 | lines.insert(0, '') 296 | i = 0 297 | while i < len(lines): 298 | line = lines[i] 299 | if line.startswith('.. _'): 300 | for j in range(i-1, -1, -1): 301 | line_j = lines[j] 302 | if (line_j.startswith('.. table:') 303 | or line_j.startswith('.. figure:')): 304 | move(i, j-1) 305 | lines.insert(j-1, '') 306 | i += 1 # Due to insertion of a blank line 307 | break 308 | if (len(set(line_j)) == 1 309 | and line_j[0] in ['=','~','_', '-']): 310 | k = max(j-2, 0) 311 | move(i, k) 312 | lines.insert(k, '') 313 | i += 1 # Due to insertion of a blank line 314 | break 315 | i += 1 316 | 317 | # change .. image:: to .. figure:: to they will be center aligned 318 | for i, line in enumerate(lines): 319 | if '.. image::' in line: 320 | lines[i] = line.replace('.. image::', '.. figure::') 321 | 322 | # sometimes the code results contains vt100 codes, widely used for 323 | # coloring, while it is not supported by latex. 324 | for i, l in enumerate(lines): 325 | lines[i] = re.sub(r'\x1b\[[\d;]*K', '', 326 | re.sub(r'\x1b\[[\d;]*m', '', l)) 327 | 328 | return '\n'.join(lines) 329 | -------------------------------------------------------------------------------- /d2lbook/notebook.py: -------------------------------------------------------------------------------- 1 | """utilities to handle notebooks""" 2 | 3 | from typing import Union, List, Optional 4 | 5 | import os 6 | import copy 7 | import notedown 8 | import nbformat 9 | import nbconvert 10 | from nbformat import notebooknode 11 | from d2lbook import markdown 12 | from d2lbook import common 13 | from d2lbook import config 14 | 15 | def create_new_notebook( 16 | nb: notebooknode.NotebookNode, 17 | cells: List[notebooknode.NotebookNode]) -> notebooknode.NotebookNode: 18 | """create an empty notebook by copying metadata from nb""" 19 | new_nb = copy.deepcopy(nb) 20 | new_nb.cells = cells 21 | return new_nb 22 | 23 | def read(fname: str): 24 | if not os.path.exists(fname) or os.stat(fname).st_size == 0: 25 | return None 26 | with open(fname, 'r') as f: 27 | return nbformat.read(f, as_version=4) 28 | 29 | def read_markdown(source: Union[str, List[str]]) -> notebooknode.NotebookNode: 30 | """Returns a notebook from markdown source""" 31 | if not isinstance(source, str): 32 | source = '\n'.join(source) 33 | reader = notedown.MarkdownReader(match='strict') 34 | return reader.reads(source) 35 | 36 | def split_markdown_cell( 37 | nb: notebooknode.NotebookNode) -> notebooknode.NotebookNode: 38 | """split a markdown cell if it contains tab block. 39 | 40 | a new property `class` is added to the metadata for a tab cell. 41 | """ 42 | # merge continous markdown cells 43 | grouped_cells = common.group_list( 44 | nb.cells, lambda cell, _: cell.cell_type == 'markdown') 45 | new_cells = [] 46 | for is_md, group in grouped_cells: 47 | if not is_md: 48 | new_cells.extend(group) 49 | else: 50 | src = '\n\n'.join(cell.source for cell in group) 51 | md_cells = markdown.split_markdown(src) 52 | is_tab_cell = lambda cell, _: cell['class'] if (cell[ 53 | 'type'] == 'markdown' and 'class' in cell) else 'not_tab_cell' 54 | grouped_md_cells = common.group_list(md_cells, is_tab_cell) 55 | for tab, md_group in grouped_md_cells: 56 | new_cell = nbformat.v4.new_markdown_cell( 57 | markdown.join_markdown_cells(md_group)) 58 | if tab != 'not_tab_cell': 59 | assert tab.startswith('`') and tab.endswith('`'), tab 60 | new_cell.metadata['tab'] = [ 61 | t.strip() for t in tab[1:-1].split(',')] 62 | new_cells.append(new_cell) 63 | new_cells = [cell for cell in new_cells if cell.source] 64 | return create_new_notebook(nb, new_cells) 65 | 66 | def get_cell_tab(cell: notebooknode.NotebookNode, 67 | default_tab: str = '') -> List[str]: 68 | """Get the cell tab""" 69 | if 'tab' in cell.metadata: 70 | tab = cell.metadata['tab'] 71 | return [tab] if type(tab) == str else tab 72 | if cell.cell_type != 'code': 73 | return [] 74 | match = (common.source_tab_pattern.search(cell.source) or 75 | common.source_tab_pattern_2.search(cell.source)) 76 | if match: 77 | return [tab.strip() for tab in match[1].split(',')] 78 | return [default_tab,] 79 | 80 | def get_tab_notebook(nb: notebooknode.NotebookNode, tab: str, 81 | default_tab: str) -> notebooknode.NotebookNode: 82 | """Returns a notebook with code/markdown cells that doesn't match tab 83 | removed. 84 | 85 | Return None if no cell matched the tab and nb contains code blocks. 86 | 87 | A `origin_pos` property is added to the metadata for each cell, which 88 | records its position in the original notebook `nb`. 89 | """ 90 | if tab != default_tab: 91 | has_tab = False 92 | for cell in nb.cells: 93 | if tab in get_cell_tab(cell): 94 | has_tab = True 95 | break 96 | if not has_tab and any( 97 | [cell.cell_type == 'code' for cell in nb.cells]): 98 | return None 99 | 100 | matched_tab = False 101 | required_libs = [] 102 | new_cells = [] 103 | for i, cell in enumerate(nb.cells): 104 | new_cell = copy.deepcopy(cell) 105 | new_cell.metadata['origin_pos'] = i 106 | cell_tab = get_cell_tab(new_cell, default_tab) 107 | if not cell_tab: 108 | new_cells.append(new_cell) 109 | else: 110 | if cell_tab == ['all'] or tab in cell_tab: 111 | # drop the cell contains `%load_ext d2lbook.tab` 112 | if '%load_ext d2lbook.tab' in new_cell.source: 113 | # Check if chapter specific libraries are required 114 | match = common.source_libs_required_pattern.search(new_cell.source) 115 | if match: 116 | # Extract the content within the square brackets 117 | libs_content = match.group(1) 118 | # Split the content into individual libraries 119 | required_libs = [lib.strip("'") for lib in libs_content.split(", ")] 120 | continue 121 | new_cell.metadata['tab'] = [tab] 122 | matched_tab = True 123 | # remove the tab from source 124 | lines = new_cell.source.split('\n') 125 | for j, line in enumerate(lines): 126 | src_tab = (common.source_tab_pattern.search(line) or 127 | common.source_tab_pattern_2.search(line)) 128 | text_tab = common.md_mark_pattern.search(line) 129 | if src_tab or (text_tab and (text_tab[1] == 'begin_tab' or 130 | text_tab[1] == 'end_tab')): 131 | del lines[j] 132 | 133 | # TODO, also remove the tailing #@save 134 | lines = _clean_if_branches(lines, tab) 135 | new_cell.source = '\n'.join(lines) 136 | new_cells.append(new_cell) 137 | 138 | nb.metadata['required_libs'] = required_libs 139 | if not matched_tab and any([cell.cell_type == 'code' 140 | for cell in nb.cells]): 141 | return None 142 | return create_new_notebook(nb, new_cells) 143 | 144 | def _clean_if_branches(lines, tab): 145 | """Handle special if branchs 146 | """ 147 | #TODO make it more general purpose 148 | mark = 'tab.selected' 149 | if not any([mark in l for l in lines]): 150 | return _clean_if_branches_old(lines, tab) 151 | # 1 means in a matched if branch, 152 | # 2 means in a not matched if branch 153 | # 0 means others 154 | mode = 0 155 | indent = 0 156 | _leading_spaces = lambda l: len(l) - len(l.lstrip()) 157 | new_lines = [] 158 | for i, l in enumerate(lines): 159 | assert '\t' not in l, 'please use space in ' + l 160 | if 'if' in l and mark in l: 161 | mode = 1 if (f'"{tab}"' in l or f"'{tab}'" in l) else 2 162 | indent = _leading_spaces(l) 163 | continue 164 | if mode != 0 and l.strip() != '' and _leading_spaces(l) <= indent: 165 | # out of the if branch 166 | mode = 0 167 | if mode == 0: 168 | new_lines.append(l) 169 | if mode == 1: 170 | new_lines.append(l[4:]) 171 | return new_lines 172 | 173 | def _clean_if_branches_old(lines, tab): 174 | """Handle speciall if branchs 175 | """ 176 | #TODO make it more general purpose 177 | mark = 'd2l.USE_' 178 | matched = False 179 | for l in lines: 180 | if mark in l: 181 | matched = True 182 | break 183 | if not matched: 184 | return lines 185 | # 1 means in a matched if branch, 186 | # 2 means in a not matched if branch 187 | # 0 means others 188 | mode = 0 189 | indent = 0 190 | _leading_spaces = lambda l: len(l) - len(l.lstrip()) 191 | new_lines = [] 192 | for i, l in enumerate(lines): 193 | assert '\t' not in l, 'please use space in ' + l 194 | if 'if' in l and mark in l: 195 | mode = 1 if mark+tab.upper() in l else 2 196 | indent = _leading_spaces(l) 197 | continue 198 | if mode != 0 and l.strip() != '' and _leading_spaces(l) <= indent: 199 | # out of the if branch 200 | mode = 0 201 | if mode == 0: 202 | new_lines.append(l) 203 | if mode == 1: 204 | new_lines.append(l[4:]) 205 | return new_lines 206 | 207 | def _has_output(cell): 208 | """Return if a cell has output""" 209 | return 'outputs' in cell and len(cell['outputs']) 210 | 211 | def merge_tab_notebooks( 212 | src_notebooks: List[notebooknode.NotebookNode] 213 | ) -> notebooknode.NotebookNode: 214 | """Merge the tab notebooks into a single one. 215 | 216 | The reserved function of get_tab_notebook. 217 | """ 218 | n = max([ 219 | max([cell.metadata['origin_pos'] for cell in nb.cells]) 220 | for nb in src_notebooks]) 221 | new_cells = [[] for _ in range(n + 1)] # type: ignore 222 | 223 | # for compatability 224 | tab_list = lambda tab: [tab] if type(tab) == str else tab 225 | for nb in src_notebooks: 226 | for cell in nb.cells: 227 | cell = copy.deepcopy(cell) 228 | p = cell.metadata['origin_pos'] 229 | if len(new_cells[p]): 230 | if _has_output(new_cells[p][-1]) or _has_output( 231 | cell) or new_cells[p][-1].source != cell.source: 232 | new_cells[p].append(cell) 233 | else: 234 | if 'tab' in cell.metadata: 235 | tab = tab_list(new_cells[p][-1].metadata['tab']) 236 | tab.extend(tab_list(cell.metadata['tab'])) 237 | new_cells[p][-1].metadata['tab'] = tab 238 | else: 239 | new_cells[p].append(cell) 240 | expanded_cells = [] 241 | for cell in new_cells: 242 | expanded_cells.extend(cell) 243 | return create_new_notebook(src_notebooks[0], expanded_cells) 244 | 245 | def _get_tab_bar(tabs, tab_id, default_tab, div_class=''): 246 | code = f"```eval_rst\n\n.. raw:: html\n\n
" 247 | for i, tab in enumerate(tabs): 248 | active = 'is-active' if tab == default_tab else '' 249 | code += f'{tab}' 250 | code += "
\n```" 251 | return nbformat.v4.new_markdown_cell(code) 252 | 253 | def _get_tab_panel(cells, tab, tab_id, default_tab): 254 | active = 'is-active' if tab == default_tab else '' 255 | tab_panel_begin = nbformat.v4.new_markdown_cell( 256 | f"```eval_rst\n.. raw:: html\n\n
\n```" 257 | ) 258 | tab_panel_end = nbformat.v4.new_markdown_cell( 259 | "```eval_rst\n.. raw:: html\n\n
\n```") 260 | return [tab_panel_begin, *cells, tab_panel_end] 261 | 262 | def _merge_tabs(nb: notebooknode.NotebookNode, tabs: List[str]): 263 | """merge side-by-side tabs into a single one. 264 | 265 | Returns a list of item, an item can be (False, a list of not-in-tab-cell) or 266 | (True, a list of (tab_name, a list of cell-in-this-tab)) 267 | """ 268 | tab_status = lambda cell, _: 1 if get_cell_tab(cell) else 0 269 | cell_groups = common.group_list(nb.cells, tab_status) 270 | new_groups = [] 271 | for in_tab, cells in cell_groups: 272 | if not in_tab: 273 | new_groups.append((False, cells)) 274 | continue 275 | # a special case that we can merge into non-tab cells 276 | mergable = True 277 | for cell in cells: 278 | if set(cell.metadata['tab']) != set(tabs): 279 | mergable = False 280 | break 281 | if mergable: 282 | new_groups.append((False, cells)) 283 | continue 284 | # the general case 285 | group_dict = {tab: [] for tab in tabs} # type: ignore 286 | for cell in cells: 287 | for tab in cell.metadata['tab']: 288 | group_dict[tab].append(cell) 289 | group = [ 290 | (tab, group_dict[tab]) for tab in tabs if len(group_dict[tab])] 291 | new_groups.append((True, group)) 292 | # merge two consecutive code blocks. The first 293 | # code should not contain output 294 | for is_tab, group in new_groups: 295 | if not is_tab: 296 | continue 297 | for i, (tab, tab_cell) in enumerate(group): 298 | new_tab_cell = [] 299 | for cell in tab_cell: 300 | if (len(new_tab_cell) > 0 and 301 | new_tab_cell[-1].cell_type == 'code' and 302 | cell.cell_type == 'code' and 303 | not _has_output(new_tab_cell[-1])): 304 | cell = copy.deepcopy(cell) 305 | cell.source = new_tab_cell[-1].source + '\n\n' + cell.source 306 | new_tab_cell[-1] = cell 307 | else: 308 | new_tab_cell.append(cell) 309 | group[i] = (tab, new_tab_cell) 310 | return new_groups 311 | 312 | def add_html_tab(nb: notebooknode.NotebookNode, 313 | tabs: List[str]) -> notebooknode.NotebookNode: 314 | """Add html codes for the tabs""" 315 | cell_groups = _merge_tabs(nb, tabs) 316 | all_tabs = common.flatten([[tab for tab, _ in group] 317 | for in_tab, group in cell_groups if in_tab]) 318 | # If there is only one tab, assume it's the default tab. 319 | if len(set(all_tabs)) <= 1: 320 | return nb 321 | new_cells = [] 322 | for i, (in_tab, group) in enumerate(cell_groups): 323 | if not in_tab: 324 | new_cells.extend(group) 325 | else: 326 | cur_tabs = [tab for tab, _ in group] 327 | div_class = "code" 328 | for _, cells in group: 329 | if cells[0].cell_type != "code": 330 | div_class = "text" 331 | new_cells.append(_get_tab_bar(cur_tabs, i, tabs[0], div_class)) 332 | for j, (tab, cells) in enumerate(group): 333 | new_cells.extend( 334 | _get_tab_panel(cells, tab, f'{i}-{j}', tabs[0])) 335 | new_cells.append( 336 | nbformat.v4.new_markdown_cell( 337 | "```eval_rst\n.. raw:: html\n\n
\n```")) 338 | return create_new_notebook(nb, new_cells) 339 | 340 | def get_toc(root, flat=True): 341 | """return a list of files in the order defined by TOC""" 342 | subpages = _get_subpages(root) 343 | res = [root] 344 | for fn in subpages: 345 | if flat: 346 | res.extend(get_toc(fn)) 347 | else: 348 | res.append(get_toc(fn)) 349 | return res 350 | 351 | def _get_subpages(input_fn): 352 | """read toc in input_fn, returns what it contains""" 353 | subpages = [] 354 | reader = notedown.MarkdownReader() 355 | with open(input_fn, 'r', encoding='UTF-8') as f: 356 | nb = reader.read(f) 357 | for cell in nb.cells: 358 | if (cell.cell_type == 'code' and 'attributes' in cell.metadata and 359 | 'toc' in cell.metadata.attributes['classes']): 360 | for l in cell.source.split('\n'): 361 | l = l.strip() 362 | if not l.startswith(':'): 363 | fn = os.path.join(os.path.dirname(input_fn), l + '.md') 364 | if os.path.exists(fn): 365 | subpages.append(fn) 366 | return subpages 367 | -------------------------------------------------------------------------------- /d2lbook/library.py: -------------------------------------------------------------------------------- 1 | """Save codes into library""" 2 | from typing import List 3 | from d2lbook import notebook 4 | from d2lbook import common 5 | import logging 6 | import os 7 | import copy 8 | import re 9 | import pathlib 10 | import ast 11 | import astor 12 | from yapf.yapflib.yapf_api import FormatCode 13 | import isort 14 | 15 | HEADER = '################# WARNING ################\n' 16 | def _write_header(f): 17 | f.write(HEADER) 18 | f.write('# The below part is generated automatically through:\n') 19 | f.write('# d2lbook build lib\n') 20 | f.write('# Don\'t edit it directly\n\n') 21 | 22 | def save_tab(notebooks: List[str], lib_fname: str, tab: str, default_tab: str): 23 | logging.info( 24 | f'Matching with the pattern: "#@save", searching for tab {tab}') 25 | custom_header = [] 26 | if os.path.exists(lib_fname): 27 | with open(lib_fname, 'r') as f: 28 | lines = f.readlines() 29 | for i, l in enumerate(lines): 30 | if l.strip() == HEADER.strip(): 31 | custom_header = lines[:i] 32 | break 33 | 34 | with open(lib_fname, 'w') as f: 35 | if custom_header: 36 | f.write(''.join(custom_header)) 37 | _write_header(f) 38 | saved = [] 39 | for nb in notebooks: 40 | saved.extend(_save_code(nb, tab=tab, default_tab=default_tab)) 41 | f.write(_refactor_blocks(saved)) 42 | logging.info('Saved %d blocks into %s', len(saved), lib_fname) 43 | 44 | def save_version(version: str, version_fn: str): 45 | if version and version_fn: 46 | with open(version_fn, 'r', encoding='UTF-8') as f: 47 | lines = f.read().split('\n') 48 | for i, l in enumerate(lines): 49 | if '__version__' in l: 50 | lines[i] = f'__version__ = "{version}"' 51 | logging.info(f'save {lines[i]} into {version_fn}') 52 | with open(version_fn, 'w') as f: 53 | f.write('\n'.join(lines)) 54 | 55 | def _save_block(source: str, save_mark: str): 56 | if not save_mark: return '' 57 | lines = source.splitlines() 58 | block = [] 59 | for i, l in enumerate(lines): 60 | m = re.search(f'# *{save_mark}', l) 61 | if m: 62 | l = l[:m.span()[0]].rstrip() 63 | if l: block.append(l) 64 | for j in range(i + 1, len(lines)): 65 | l = lines[j] 66 | if not l.startswith(' ') and len(l): 67 | block.append(lines[j]) 68 | else: 69 | for k in range(j, len(lines)): 70 | if lines[k].startswith(' ') or not len(lines[k]): 71 | block.append(lines[k]) 72 | else: 73 | break 74 | break 75 | return format_code('\n'.join(block)) 76 | 77 | def _save_code(input_fn, save_mark='@save', tab=None, 78 | default_tab=None): 79 | """get the code blocks (import, class, def) that will be saved""" 80 | with open(input_fn, 'r', encoding='UTF-8') as f: 81 | nb = notebook.read_markdown(f.read()) 82 | if tab: 83 | nb = notebook.get_tab_notebook(nb, tab, default_tab) 84 | if not nb: 85 | return [] 86 | saved = [] 87 | for i, cell in enumerate(nb.cells): 88 | if cell.cell_type == 'code': 89 | block = _save_block(cell.source, save_mark) 90 | if block: 91 | label = _find_latest_label(nb.cells[:i-1]) 92 | saved.append([block, label, input_fn]) 93 | return saved 94 | 95 | def _find_latest_label(cells): 96 | for cell in reversed(cells): 97 | if cell.cell_type == 'markdown': 98 | matches = re.findall(common.md_mark_pattern, cell.source) 99 | for m in reversed(matches): 100 | if m[0] == 'label' and 'sec_' in m[1]: 101 | return m[1] 102 | return '' 103 | 104 | def _refactor_blocks(saved_blocks): 105 | # add label into docstring 106 | for i, (block, label, _) in enumerate(saved_blocks): 107 | if not label: continue 108 | modules = common.split_list(block.split('\n'), lambda l: l.startswith('def') or l.startswith('class')) 109 | new_block = [] 110 | if modules[0]: new_block.append('\n'.join(modules[0])) 111 | for m in modules[1:]: 112 | parts = common.split_list(m, lambda l: '):' in l) 113 | # find the docstring 114 | if len(parts) > 1: 115 | docstr = parts[1][1] if len(parts[1]) > 1 else common.head_spaces(m[0]) + ' ' 116 | loc = f'Defined in :numref:{label}"""' 117 | if docstr.lstrip().startswith('"""') and docstr.endswith('"""'): 118 | parts[1][1] = docstr[:-3] + f'\n\n{common.head_spaces(docstr)}{loc}' 119 | else: 120 | parts[1].insert(1, f'{common.head_spaces(docstr)}"""{loc}') 121 | new_block.append('\n'.join(common.flatten(parts))) 122 | saved_blocks[i][0] = '\n'.join(new_block) 123 | 124 | # merge @d2l.save_to_class 125 | new_blocks = [] 126 | class_blocks = {} 127 | for i, (block, _, _) in enumerate(saved_blocks): 128 | lines = block.split('\n') 129 | if lines[0].startswith('class'): 130 | new_blocks.append(block) 131 | m = re.search('class +([\w\_]+)', lines[0]) 132 | if m: 133 | class_blocks[m.groups()[0]] = len(new_blocks) - 1 134 | continue 135 | register = '@d2l.add_to_class' 136 | if register in block: 137 | parts = common.split_list(lines, lambda x: x.startswith(register)) 138 | if parts[0]: 139 | new_blocks.append(parts[0]) 140 | if len(parts) > 1: 141 | for p in parts[1:]: 142 | m = re.search('\@d2l\.add_to_class\(([\.\w\_]+)\)', p[0]) 143 | if m: 144 | cls = m.groups()[0].split('.')[-1] 145 | new_blocks[class_blocks[cls]] += '\n\n' + '\n'.join([' '+l for l in p[1:]]) 146 | continue 147 | new_blocks.append(block) 148 | 149 | return '\n\n'.join(new_blocks) 150 | 151 | 152 | def _parse_mapping_config(config: str, split_line=True): 153 | """Parse config such as: numpy -> asnumpy, reshape, ... 154 | Return a list of string pairs 155 | """ 156 | terms = [] 157 | for line in config.splitlines(): 158 | if split_line: 159 | terms.extend(line.split(',')) 160 | else: 161 | terms.append(line) 162 | mapping = [] 163 | for term in terms: 164 | term = term.strip() 165 | if not term: 166 | continue 167 | if len(term.split('->')) == 2: 168 | a, b = term.split('->') 169 | mapping.append((a.strip(), b.strip())) 170 | else: 171 | mapping.append((term, term)) 172 | return mapping 173 | 174 | def node_to_source(node): 175 | if isinstance(node, ast.Constant): 176 | return str(node.value) 177 | return astor.to_source(node).rstrip() 178 | 179 | def save_alias(tab_lib): 180 | """Save alias into the library file""" 181 | alias = '' 182 | if 'alias' in tab_lib: 183 | alias += tab_lib['alias'].strip() + '\n' 184 | if 'lib_name' in tab_lib: 185 | lib_name = tab_lib["lib_name"] 186 | if 'simple_alias' in tab_lib: 187 | mapping = _parse_mapping_config(tab_lib['simple_alias']) 188 | for a, b in mapping: 189 | if a.endswith('('): a = a[:-1] 190 | if b.endswith('('): b = b[:-1] 191 | alias += f'\n{a} = {lib_name}.{b}' 192 | if 'fluent_alias' in tab_lib: 193 | mapping = _parse_mapping_config(tab_lib['fluent_alias']) 194 | alias += '\n' + '\n'.join([ 195 | f'{a} = lambda x, *args, **kwargs: x.{b}(*args, **kwargs)' 196 | for a, b in mapping]) 197 | if 'args_alias' in tab_lib: 198 | mapping = _parse_mapping_config(tab_lib['args_alias'], split_line=False) 199 | for a, b in mapping: 200 | alias += f'\ndef {a}:\n return {b}' 201 | if alias: 202 | lib_file = tab_lib['lib_file'] 203 | with open(lib_file, 'a') as f: 204 | logging.info( 205 | f'Wrote {len(alias.splitlines())} alias into {lib_file}') 206 | f.write('\n\n\n# Alias defined in config.ini\n') 207 | f.write(alias + '\n\n') 208 | 209 | def replace_call(source: str, mapping, replace_fn): 210 | 211 | matched = False 212 | for a in mapping: 213 | if 'd2l.'+a in source: 214 | matched = True 215 | if not matched: 216 | return source 217 | lines = source.splitlines() 218 | if lines[0].startswith('%'): 219 | source = '\n'.join(lines[1:]) 220 | for _ in range(100): # 100 is a (random) big enough number 221 | replaced = False 222 | tree = ast.parse(source) 223 | for node in ast.walk(tree): 224 | if (isinstance(node, ast.Call) and 225 | isinstance(node.func, ast.Attribute) and 226 | isinstance(node.func.value, ast.Name) and 227 | node.func.value.id == 'd2l' and 228 | node.func.attr in mapping): 229 | new_node = replace_fn(node, mapping[node.func.attr]) 230 | if new_node: 231 | source = source.replace( 232 | ast.get_source_segment(source, node), 233 | new_node if isinstance(new_node, str) else node_to_source(new_node)) 234 | replaced = True 235 | break 236 | if not replaced: 237 | break 238 | if lines[0].startswith('%'): 239 | source = lines[0] + '\n' + source 240 | return source 241 | 242 | 243 | def replace_fluent_alias(source, fluent_mapping): 244 | def _replace(node, b): 245 | return ast.Call( 246 | ast.Attribute(value=node.args[0], attr=b), 247 | node.args[1:], node.keywords) 248 | return replace_call(source, fluent_mapping, _replace) 249 | 250 | def replace_args_alias(source, args_mapping): 251 | def _replace(node, b): 252 | a_args, b = b 253 | a_kwargs = {a: b for a, b in a_args if not a.startswith('a_')} 254 | a_args = [a for a, _ in a_args if a.startswith('a_')] 255 | if len(node.args) != len(a_args): 256 | return None 257 | key_value = {a : node_to_source(arg) for arg, a in zip(node.args, a_args)} 258 | for kw in node.keywords: 259 | assert kw.arg in a_kwargs, (kw.arg, a_kwargs) 260 | key_value['='+kw.arg] = '='+node_to_source(kw.value) 261 | # remove not appeared keywords 262 | b_call = ast.parse(b).body[0].value 263 | if isinstance(b_call, ast.Call): 264 | new_keywords = [kw for kw in b_call.keywords if '='+kw.value.id in key_value] 265 | b_call.keywords = new_keywords 266 | b = node_to_source(b_call) 267 | for k, v in key_value.items(): 268 | b = b.replace(k, v) 269 | return b 270 | return replace_call(source, dict(args_mapping), _replace) 271 | 272 | def call_args(call_str): 273 | call = ast.parse(call_str).body[0].value 274 | assert isinstance(call, ast.Call), call_str 275 | name = call.func.id 276 | args = [(a.id,None) for a in call.args] + [(k.arg, k.value) for k in call.keywords] 277 | return name, args 278 | 279 | def replace_alias(nb, tab_lib): 280 | nb = copy.deepcopy(nb) 281 | patterns = [] 282 | fluent_mapping = {} 283 | args_mapping = {} 284 | if 'reverse_alias' in tab_lib: 285 | patterns += _parse_mapping_config(tab_lib['reverse_alias'], split_line=False) 286 | if 'lib_name' in tab_lib: 287 | lib_name = tab_lib["lib_name"] 288 | if 'simple_alias' in tab_lib: 289 | mapping = _parse_mapping_config(tab_lib['simple_alias']) 290 | patterns += [(f'd2l.{a}', f'{lib_name}.{b}') for a, b in mapping] 291 | if 'fluent_alias' in tab_lib: 292 | fluent_mapping = dict(_parse_mapping_config(tab_lib['fluent_alias'])) 293 | if 'args_alias' in tab_lib: 294 | for a, b in _parse_mapping_config(tab_lib['args_alias'], split_line=False): 295 | name, args = call_args(a) 296 | args_mapping[name] = (args, b) 297 | 298 | for cell in nb.cells: 299 | if cell.cell_type == 'code': 300 | for p, r in patterns: 301 | cell.source = cell.source.replace(p, r) 302 | if fluent_mapping: 303 | cell.source = replace_fluent_alias(cell.source, fluent_mapping) 304 | if args_mapping: 305 | cell.source = replace_args_alias(cell.source, args_mapping) 306 | return nb 307 | 308 | def format_code(source: str): 309 | if 'import ' in source: 310 | config = isort.settings.Config(no_lines_before=[ 311 | isort.settings.FUTURE, isort.settings.STDLIB, isort.settings. 312 | THIRDPARTY, isort.settings.FIRSTPARTY, isort.settings.LOCALFOLDER]) 313 | 314 | source = isort.code(source, config=config) 315 | 316 | # remove tailing spaces 317 | source = '\n'.join([l.rstrip() for l in source.split('\n')]).strip() 318 | 319 | # Disable yapf, as it doesn't work well for long sentences 320 | return source 321 | 322 | # fix the bug that yapf cannot handle jupyter magic 323 | for l in source.splitlines(): 324 | if l.startswith('%') or l.startswith('!'): 325 | return source 326 | 327 | # fix the bug that yapf remove the tailling ; 328 | has_tailling_semicolon = source.rstrip().endswith(';') 329 | 330 | style = { 331 | 'DISABLE_ENDING_COMMA_HEURISTIC': True, 332 | 'SPACE_BETWEEN_ENDING_COMMA_AND_CLOSING_BRACKET': False, 333 | 'SPLIT_BEFORE_CLOSING_BRACKET': False, 334 | 'SPLIT_BEFORE_DICT_SET_GENERATOR': False, 335 | 'SPLIT_BEFORE_LOGICAL_OPERATOR': False, 336 | 'SPLIT_BEFORE_NAMED_ASSIGNS': False, 337 | 'COLUMN_LIMIT': 78, 338 | 'BLANK_LINES_AROUND_TOP_LEVEL_DEFINITION': 1,} 339 | source = FormatCode(source, style_config=style)[0].strip() 340 | if has_tailling_semicolon: source += ';' 341 | return source 342 | 343 | def format_code_nb(nb): 344 | for cell in nb.cells: 345 | if cell.cell_type == 'code': 346 | cell.source = format_code(cell.source) 347 | return nb 348 | 349 | 350 | # DEPRECATED 351 | # def save_file(root_dir: str, nbfile: str): 352 | # nbfile = pathlib.Path(nbfile) 353 | # pyfile = root_dir / nbfile.with_suffix('.py') 354 | 355 | # with nbfile.open('r') as f: 356 | # nb = notebook.read_markdown(f.read()) 357 | 358 | # saved = [] 359 | # save_all = False 360 | # for cell in nb.cells: 361 | # if cell.cell_type == 'code': 362 | # src = cell.source.lstrip() 363 | # if re.search('# *@save_all', src): 364 | # save_all = True 365 | # if save_all or re.search('# *@save_cell', src): 366 | # saved.append(src) 367 | # else: 368 | # blk = _save_block(src, '@save') 369 | # if blk: 370 | # saved.append(blk) 371 | # if saved: 372 | # with pyfile.open('w') as f: 373 | # f.write( 374 | # f'# This file is generated from {str(nbfile)} automatically through:\n' 375 | # ) 376 | # f.write('# d2lbook build lib\n') 377 | # f.write('# Don\'t edit it directly\n\n') 378 | # for blk in saved: 379 | # f.write(blk + '\n\n') 380 | # logging.info(f'Found {len(saved)} blocks in {str(nbfile)}') 381 | 382 | # DEPRECATED 383 | # def save_mark(notebooks: List[str], lib_fname: str, save_mark: str): 384 | # logging.info('Matching with the pattern: "%s"', save_mark) 385 | # with open(lib_fname, 'w') as f: 386 | # _write_header(f) 387 | # lib_name = os.path.dirname(lib_fname) 388 | # lib_name = lib_name.split('/')[-1] 389 | # f.write('import sys\n' + lib_name + ' = sys.modules[__name__]\n\n') 390 | 391 | # for nb in notebooks: 392 | # _save_code(nb, f, save_mark=save_mark) 393 | # logging.info('Saved into %s', lib_fname) 394 | --------------------------------------------------------------------------------