├── .gitignore ├── Makefile ├── README.rst ├── cheatsheet.rst ├── conf.py ├── crawler ├── docs │ ├── src │ ├── step1 │ │ ├── Makefile │ │ ├── conf.py │ │ ├── index.rst │ │ ├── index.txt │ │ ├── install.rst │ │ ├── install.txt │ │ ├── support.rst │ │ └── support.txt │ ├── step2 │ │ ├── Makefile │ │ ├── api.rst │ │ ├── api.txt │ │ ├── cli.rst │ │ ├── cli.txt │ │ ├── conf.py │ │ ├── cookbook.rst │ │ ├── cookbook.txt │ │ ├── index.rst │ │ ├── index.txt │ │ ├── install.rst │ │ ├── install.txt │ │ ├── support.rst │ │ └── support.txt │ └── step3 │ │ ├── Makefile │ │ ├── api.rst │ │ ├── api.txt │ │ ├── cli.rst │ │ ├── cli.txt │ │ ├── conf.py │ │ ├── cookbook.rst │ │ ├── cookbook.txt │ │ ├── index.rst │ │ ├── install.rst │ │ ├── install.txt │ │ ├── support.rst │ │ ├── support.txt │ │ └── utils.txt ├── setup.py └── src │ ├── __init__.py │ └── crawler │ ├── __init__.py │ ├── main.py │ └── utils.py ├── files ├── cheatsheet-back-full.png └── cheatsheet-front-full.png ├── finish.rst ├── glossary.rst ├── index.rst ├── quickstart-output.txt ├── requirements.txt ├── start.rst ├── step-1.rst ├── step-2.rst ├── step-3.rst └── useful-links.rst /.gitignore: -------------------------------------------------------------------------------- 1 | _build 2 | *.pyc 3 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | # Makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | PAPER = 8 | BUILDDIR = _build 9 | 10 | # User-friendly check for sphinx-build 11 | ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1) 12 | $(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/) 13 | endif 14 | 15 | # Internal variables. 16 | PAPEROPT_a4 = -D latex_paper_size=a4 17 | PAPEROPT_letter = -D latex_paper_size=letter 18 | ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . 19 | # the i18n builder cannot share the environment and doctrees with the others 20 | I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . 21 | 22 | .PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext 23 | 24 | help: 25 | @echo "Please use \`make ' where is one of" 26 | @echo " html to make standalone HTML files" 27 | @echo " dirhtml to make HTML files named index.html in directories" 28 | @echo " singlehtml to make a single large HTML file" 29 | @echo " pickle to make pickle files" 30 | @echo " json to make JSON files" 31 | @echo " htmlhelp to make HTML files and a HTML help project" 32 | @echo " qthelp to make HTML files and a qthelp project" 33 | @echo " devhelp to make HTML files and a Devhelp project" 34 | @echo " epub to make an epub" 35 | @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" 36 | @echo " latexpdf to make LaTeX files and run them through pdflatex" 37 | @echo " latexpdfja to make LaTeX files and run them through platex/dvipdfmx" 38 | @echo " text to make text files" 39 | @echo " man to make manual pages" 40 | @echo " texinfo to make Texinfo files" 41 | @echo " info to make Texinfo files and run them through makeinfo" 42 | @echo " gettext to make PO message catalogs" 43 | @echo " changes to make an overview of all changed/added/deprecated items" 44 | @echo " xml to make Docutils-native XML files" 45 | @echo " pseudoxml to make pseudoxml-XML files for display purposes" 46 | @echo " linkcheck to check all external links for integrity" 47 | @echo " doctest to run all doctests embedded in the documentation (if enabled)" 48 | 49 | clean: 50 | rm -rf $(BUILDDIR)/* 51 | 52 | html: 53 | $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html 54 | @echo 55 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." 56 | 57 | dirhtml: 58 | $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml 59 | @echo 60 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." 61 | 62 | singlehtml: 63 | $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml 64 | @echo 65 | @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." 66 | 67 | pickle: 68 | $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle 69 | @echo 70 | @echo "Build finished; now you can process the pickle files." 71 | 72 | json: 73 | $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json 74 | @echo 75 | @echo "Build finished; now you can process the JSON files." 76 | 77 | htmlhelp: 78 | $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp 79 | @echo 80 | @echo "Build finished; now you can run HTML Help Workshop with the" \ 81 | ".hhp project file in $(BUILDDIR)/htmlhelp." 82 | 83 | qthelp: 84 | $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp 85 | @echo 86 | @echo "Build finished; now you can run "qcollectiongenerator" with the" \ 87 | ".qhcp project file in $(BUILDDIR)/qthelp, like this:" 88 | @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/TheRestructuredTextBook.qhcp" 89 | @echo "To view the help file:" 90 | @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/TheRestructuredTextBook.qhc" 91 | 92 | devhelp: 93 | $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp 94 | @echo 95 | @echo "Build finished." 96 | @echo "To view the help file:" 97 | @echo "# mkdir -p $$HOME/.local/share/devhelp/TheRestructuredTextBook" 98 | @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/TheRestructuredTextBook" 99 | @echo "# devhelp" 100 | 101 | epub: 102 | $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub 103 | @echo 104 | @echo "Build finished. The epub file is in $(BUILDDIR)/epub." 105 | 106 | latex: 107 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 108 | @echo 109 | @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." 110 | @echo "Run \`make' in that directory to run these through (pdf)latex" \ 111 | "(use \`make latexpdf' here to do that automatically)." 112 | 113 | latexpdf: 114 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 115 | @echo "Running LaTeX files through pdflatex..." 116 | $(MAKE) -C $(BUILDDIR)/latex all-pdf 117 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." 118 | 119 | latexpdfja: 120 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 121 | @echo "Running LaTeX files through platex and dvipdfmx..." 122 | $(MAKE) -C $(BUILDDIR)/latex all-pdf-ja 123 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." 124 | 125 | text: 126 | $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text 127 | @echo 128 | @echo "Build finished. The text files are in $(BUILDDIR)/text." 129 | 130 | man: 131 | $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man 132 | @echo 133 | @echo "Build finished. The manual pages are in $(BUILDDIR)/man." 134 | 135 | texinfo: 136 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 137 | @echo 138 | @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo." 139 | @echo "Run \`make' in that directory to run these through makeinfo" \ 140 | "(use \`make info' here to do that automatically)." 141 | 142 | info: 143 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 144 | @echo "Running Texinfo files through makeinfo..." 145 | make -C $(BUILDDIR)/texinfo info 146 | @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo." 147 | 148 | gettext: 149 | $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale 150 | @echo 151 | @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale." 152 | 153 | changes: 154 | $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes 155 | @echo 156 | @echo "The overview file is in $(BUILDDIR)/changes." 157 | 158 | linkcheck: 159 | $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck 160 | @echo 161 | @echo "Link check complete; look for any errors in the above output " \ 162 | "or in $(BUILDDIR)/linkcheck/output.txt." 163 | 164 | doctest: 165 | $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest 166 | @echo "Testing of doctests in the sources finished, look at the " \ 167 | "results in $(BUILDDIR)/doctest/output.txt." 168 | 169 | xml: 170 | $(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml 171 | @echo 172 | @echo "Build finished. The XML files are in $(BUILDDIR)/xml." 173 | 174 | pseudoxml: 175 | $(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml 176 | @echo 177 | @echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml." 178 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | index.rst -------------------------------------------------------------------------------- /cheatsheet.rst: -------------------------------------------------------------------------------- 1 | Cheat Sheet 2 | =========== 3 | 4 | We have made a cheat sheet for helping you remember the syntax for RST & Sphinx programs. 5 | 6 | RST Cheat Sheet 7 | --------------- 8 | 9 | .. image:: files/cheatsheet-front-full.png 10 | 11 | Sphinx Cheat Sheet 12 | ------------------ 13 | 14 | .. image:: files/cheatsheet-back-full.png -------------------------------------------------------------------------------- /conf.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # The RestructuredText Book documentation build configuration file, created by 4 | # sphinx-quickstart on Sun Feb 23 11:06:14 2014. 5 | # 6 | # This file is execfile()d with the current directory set to its containing dir. 7 | # 8 | # Note that not all possible configuration values are present in this 9 | # autogenerated file. 10 | # 11 | # All configuration values have a default; values that are commented out 12 | # serve to show the default. 13 | 14 | import sys, os 15 | 16 | 17 | import sphinx_rtd_theme 18 | html_theme = 'sphinx_rtd_theme' 19 | html_theme_path = [sphinx_rtd_theme.get_html_theme_path()] 20 | 21 | # If extensions (or modules to document with autodoc) are in another directory, 22 | # add these directories to sys.path here. If the directory is relative to the 23 | # documentation root, use os.path.abspath to make it absolute, like shown here. 24 | sys.path.insert(0, os.path.abspath('crawler/src')) 25 | 26 | # -- General configuration ----------------------------------------------------- 27 | 28 | # If your documentation needs a minimal Sphinx version, state it here. 29 | #needs_sphinx = '1.0' 30 | 31 | # Add any Sphinx extension module names here, as strings. They can be extensions 32 | # coming with Sphinx (named 'sphinx.ext.*') or your custom ones. 33 | extensions = [ 34 | 'sphinx.ext.intersphinx', 35 | 'sphinx.ext.autodoc', 36 | 'sphinx.ext.doctest', 37 | 'sphinx.ext.todo', 38 | ] 39 | 40 | autodoc_mock_imports = ['bs4', 'requests'] 41 | 42 | # Add any paths that contain templates here, relative to this directory. 43 | templates_path = ['_templates'] 44 | 45 | # The suffix of source filenames. 46 | source_suffix = '.rst' 47 | 48 | # The encoding of source files. 49 | #source_encoding = 'utf-8-sig' 50 | 51 | # The master toctree document. 52 | master_doc = 'index' 53 | 54 | # General information about the project. 55 | project = u'Sphinx Tutorial' 56 | copyright = u'2014, Eric Holscher' 57 | 58 | # The version info for the project you're documenting, acts as replacement for 59 | # |version| and |release|, also used in various other places throughout the 60 | # built documents. 61 | # 62 | # The short X.Y version. 63 | version = '1.0' 64 | # The full version, including alpha/beta/rc tags. 65 | release = '1.0' 66 | 67 | # The language for content autogenerated by Sphinx. Refer to documentation 68 | # for a list of supported languages. 69 | #language = None 70 | 71 | # There are two options for replacing |today|: either, you set today to some 72 | # non-false value, then it is used: 73 | #today = '' 74 | # Else, today_fmt is used as the format for a strftime call. 75 | #today_fmt = '%B %d, %Y' 76 | 77 | # List of patterns, relative to source directory, that match files and 78 | # directories to ignore when looking for source files. 79 | exclude_patterns = ['_build', 'README.rst'] 80 | 81 | # The reST default role (used for this markup: `text`) to use for all documents. 82 | #default_role = None 83 | 84 | # If true, '()' will be appended to :func: etc. cross-reference text. 85 | #add_function_parentheses = True 86 | 87 | # If true, the current module name will be prepended to all description 88 | # unit titles (such as .. function::). 89 | #add_module_names = True 90 | 91 | # If true, sectionauthor and moduleauthor directives will be shown in the 92 | # output. They are ignored by default. 93 | #show_authors = False 94 | 95 | # The name of the Pygments (syntax highlighting) style to use. 96 | pygments_style = 'sphinx' 97 | 98 | # A list of ignored prefixes for module index sorting. 99 | #modindex_common_prefix = [] 100 | 101 | # If true, keep warnings as "system message" paragraphs in the built documents. 102 | #keep_warnings = False 103 | 104 | 105 | # -- Options for HTML output --------------------------------------------------- 106 | 107 | # The theme to use for HTML and HTML Help pages. See the documentation for 108 | # a list of builtin themes. 109 | 110 | # Theme options are theme-specific and customize the look and feel of a theme 111 | # further. For a list of options available for each theme, see the 112 | # documentation. 113 | #html_theme_options = {} 114 | 115 | # Add any paths that contain custom themes here, relative to this directory. 116 | #html_theme_path = [] 117 | 118 | # The name for this set of Sphinx documents. If None, it defaults to 119 | # " v documentation". 120 | #html_title = None 121 | 122 | # A shorter title for the navigation bar. Default is the same as html_title. 123 | #html_short_title = None 124 | 125 | # The name of an image file (relative to this directory) to place at the top 126 | # of the sidebar. 127 | #html_logo = None 128 | 129 | # The name of an image file (within the static path) to use as favicon of the 130 | # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 131 | # pixels large. 132 | #html_favicon = None 133 | 134 | # Add any paths that contain custom static files (such as style sheets) here, 135 | # relative to this directory. They are copied after the builtin static files, 136 | # so a file named "default.css" will overwrite the builtin "default.css". 137 | # html_static_path = ['_static'] 138 | 139 | # If not '', a 'Last updated on:' timestamp is inserted at every page bottom, 140 | # using the given strftime format. 141 | #html_last_updated_fmt = '%b %d, %Y' 142 | 143 | # If true, SmartyPants will be used to convert quotes and dashes to 144 | # typographically correct entities. 145 | #html_use_smartypants = True 146 | 147 | # Custom sidebar templates, maps document names to template names. 148 | #html_sidebars = {} 149 | 150 | # Additional templates that should be rendered to pages, maps page names to 151 | # template names. 152 | #html_additional_pages = {} 153 | 154 | # If false, no module index is generated. 155 | #html_domain_indices = True 156 | 157 | # If false, no index is generated. 158 | #html_use_index = True 159 | 160 | # If true, the index is split into individual pages for each letter. 161 | #html_split_index = False 162 | 163 | # If true, links to the reST sources are added to the pages. 164 | html_show_sourcelink = False 165 | 166 | # If true, "Created using Sphinx" is shown in the HTML footer. Default is True. 167 | #html_show_sphinx = True 168 | 169 | # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. 170 | #html_show_copyright = True 171 | 172 | # If true, an OpenSearch description file will be output, and all pages will 173 | # contain a tag referring to it. The value of this option must be the 174 | # base URL from which the finished HTML is served. 175 | #html_use_opensearch = '' 176 | 177 | # This is the file name suffix for HTML files (e.g. ".xhtml"). 178 | #html_file_suffix = None 179 | 180 | # Output file base name for HTML help builder. 181 | htmlhelp_basename = 'TheRestructuredTextBookdoc' 182 | 183 | 184 | # -- Options for LaTeX output -------------------------------------------------- 185 | 186 | latex_elements = { 187 | # The paper size ('letterpaper' or 'a4paper'). 188 | #'papersize': 'letterpaper', 189 | 190 | # The font size ('10pt', '11pt' or '12pt'). 191 | #'pointsize': '10pt', 192 | 'pointsize': '12pt', 193 | 194 | # Additional stuff for the LaTeX preamble. 195 | #'preamble': '', 196 | 197 | # Eliminate blank pages per http://stackoverflow.com/questions/5422997/ 198 | 'babel' : r'\usepackage[english]{babel}', 199 | 'classoptions': ',openany,oneside', 200 | 'fncychap': r'\usepackage[Lenny]{fncychap}', 201 | } 202 | 203 | # Grouping the document tree into LaTeX files. List of tuples 204 | # (source start file, target name, title, author, documentclass [howto/manual]). 205 | latex_documents = [ 206 | ('index', 'TheRestructuredTextBook.tex', u'The RestructuredText Book Documentation', 207 | u'Daniel Greenfeld, Eric Holscher', 'manual'), 208 | ] 209 | 210 | # The name of an image file (relative to this directory) to place at the top of 211 | # the title page. 212 | #latex_logo = None 213 | 214 | # For "manual" documents, if this is true, then toplevel headings are parts, 215 | # not chapters. 216 | #latex_use_parts = False 217 | 218 | # If true, show page references after internal links. 219 | #latex_show_pagerefs = False 220 | 221 | # If true, show URL addresses after external links. 222 | #latex_show_urls = False 223 | 224 | # Documents to append as an appendix to all manuals. 225 | #latex_appendices = [] 226 | 227 | # If false, no module index is generated. 228 | #latex_domain_indices = True 229 | 230 | 231 | # -- Options for manual page output -------------------------------------------- 232 | 233 | # One entry per manual page. List of tuples 234 | # (source start file, name, description, authors, manual section). 235 | man_pages = [ 236 | ('index', 'therestructuredtextbook', u'The RestructuredText Book Documentation', 237 | [u'Daniel Greenfeld, Eric Holscher'], 1) 238 | ] 239 | 240 | # If true, show URL addresses after external links. 241 | #man_show_urls = False 242 | 243 | 244 | # -- Options for Texinfo output ------------------------------------------------ 245 | 246 | # Grouping the document tree into Texinfo files. List of tuples 247 | # (source start file, target name, title, author, 248 | # dir menu entry, description, category) 249 | texinfo_documents = [ 250 | ('index', 'Sphinx Tutorial', u'Sphinx Tutorial', 251 | u'Eric Holscher', 'SphinxTutorial', 'One line description of project.', 252 | 'Miscellaneous'), 253 | ] 254 | 255 | # Documents to append as an appendix to all manuals. 256 | #texinfo_appendices = [] 257 | 258 | # If false, no module index is generated. 259 | #texinfo_domain_indices = True 260 | 261 | # How to display URL addresses: 'footnote', 'no', or 'inline'. 262 | #texinfo_show_urls = 'footnote' 263 | 264 | # If true, do not generate a @detailmenu in the "Top" node's menu. 265 | #texinfo_no_detailmenu = False 266 | 267 | 268 | # -- Options for Epub output --------------------------------------------------- 269 | 270 | # Bibliographic Dublin Core info. 271 | epub_title = u'The RestructuredText Book' 272 | epub_author = u'Daniel Greenfeld, Eric Holscher' 273 | epub_publisher = u'Daniel Greenfeld, Eric Holscher' 274 | epub_copyright = u'2014, Daniel Greenfeld, Eric Holscher' 275 | 276 | # The language of the text. It defaults to the language option 277 | # or en if the language is not set. 278 | #epub_language = '' 279 | 280 | # The scheme of the identifier. Typical schemes are ISBN or URL. 281 | #epub_scheme = '' 282 | 283 | # The unique identifier of the text. This can be a ISBN number 284 | # or the project homepage. 285 | #epub_identifier = '' 286 | 287 | # A unique identification for the text. 288 | #epub_uid = '' 289 | 290 | # A tuple containing the cover image and cover page html template filenames. 291 | #epub_cover = () 292 | 293 | # A sequence of (type, uri, title) tuples for the guide element of content.opf. 294 | #epub_guide = () 295 | 296 | # HTML files that should be inserted before the pages created by sphinx. 297 | # The format is a list of tuples containing the path and title. 298 | #epub_pre_files = [] 299 | 300 | # HTML files shat should be inserted after the pages created by sphinx. 301 | # The format is a list of tuples containing the path and title. 302 | #epub_post_files = [] 303 | 304 | # A list of files that should not be packed into the epub file. 305 | #epub_exclude_files = [] 306 | 307 | # The depth of the table of contents in toc.ncx. 308 | #epub_tocdepth = 3 309 | 310 | # Allow duplicate toc entries. 311 | #epub_tocdup = True 312 | 313 | # Fix unsupported image types using the PIL. 314 | #epub_fix_images = False 315 | 316 | # Scale large images. 317 | #epub_max_image_width = 0 318 | 319 | # If 'no', URL addresses will not be shown. 320 | #epub_show_urls = 'inline' 321 | 322 | # If false, no index is generated. 323 | #epub_use_index = True 324 | 325 | 326 | # Example configuration for intersphinx: refer to the Python standard library. 327 | on_rtd = os.environ.get('READTHEDOCS', None) == 'True' 328 | # if on_rtd: 329 | # intersphinx_mapping = { 330 | # 'sphinx': ('/Users/eric/checkouts/sphinx/doc/_build/html/', None), 331 | # } 332 | # else: 333 | intersphinx_mapping = { 334 | 'sphinx': ('http://sphinx-doc.org/', None), 335 | } 336 | 337 | 338 | rst_epilog = """ 339 | .. _Sphinx: http://sphinx-doc.org/ 340 | .. _reStructuredText: http://sphinx-doc.org/rest.html 341 | .. _Read the Docs: https://readthedocs.org 342 | """ 343 | -------------------------------------------------------------------------------- /crawler/docs/src: -------------------------------------------------------------------------------- 1 | ../src -------------------------------------------------------------------------------- /crawler/docs/step1/Makefile: -------------------------------------------------------------------------------- 1 | # Makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | PAPER = 8 | BUILDDIR = _build 9 | 10 | # User-friendly check for sphinx-build 11 | ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1) 12 | $(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/) 13 | endif 14 | 15 | # Internal variables. 16 | PAPEROPT_a4 = -D latex_paper_size=a4 17 | PAPEROPT_letter = -D latex_paper_size=letter 18 | ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . 19 | # the i18n builder cannot share the environment and doctrees with the others 20 | I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . 21 | 22 | .PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext 23 | 24 | help: 25 | @echo "Please use \`make ' where is one of" 26 | @echo " html to make standalone HTML files" 27 | @echo " dirhtml to make HTML files named index.html in directories" 28 | @echo " singlehtml to make a single large HTML file" 29 | @echo " pickle to make pickle files" 30 | @echo " json to make JSON files" 31 | @echo " htmlhelp to make HTML files and a HTML help project" 32 | @echo " qthelp to make HTML files and a qthelp project" 33 | @echo " devhelp to make HTML files and a Devhelp project" 34 | @echo " epub to make an epub" 35 | @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" 36 | @echo " latexpdf to make LaTeX files and run them through pdflatex" 37 | @echo " latexpdfja to make LaTeX files and run them through platex/dvipdfmx" 38 | @echo " text to make text files" 39 | @echo " man to make manual pages" 40 | @echo " texinfo to make Texinfo files" 41 | @echo " info to make Texinfo files and run them through makeinfo" 42 | @echo " gettext to make PO message catalogs" 43 | @echo " changes to make an overview of all changed/added/deprecated items" 44 | @echo " xml to make Docutils-native XML files" 45 | @echo " pseudoxml to make pseudoxml-XML files for display purposes" 46 | @echo " linkcheck to check all external links for integrity" 47 | @echo " doctest to run all doctests embedded in the documentation (if enabled)" 48 | 49 | clean: 50 | rm -rf $(BUILDDIR)/* 51 | 52 | html: 53 | $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html 54 | @echo 55 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." 56 | 57 | dirhtml: 58 | $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml 59 | @echo 60 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." 61 | 62 | singlehtml: 63 | $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml 64 | @echo 65 | @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." 66 | 67 | pickle: 68 | $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle 69 | @echo 70 | @echo "Build finished; now you can process the pickle files." 71 | 72 | json: 73 | $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json 74 | @echo 75 | @echo "Build finished; now you can process the JSON files." 76 | 77 | htmlhelp: 78 | $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp 79 | @echo 80 | @echo "Build finished; now you can run HTML Help Workshop with the" \ 81 | ".hhp project file in $(BUILDDIR)/htmlhelp." 82 | 83 | qthelp: 84 | $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp 85 | @echo 86 | @echo "Build finished; now you can run "qcollectiongenerator" with the" \ 87 | ".qhcp project file in $(BUILDDIR)/qthelp, like this:" 88 | @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/Crawler.qhcp" 89 | @echo "To view the help file:" 90 | @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/Crawler.qhc" 91 | 92 | devhelp: 93 | $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp 94 | @echo 95 | @echo "Build finished." 96 | @echo "To view the help file:" 97 | @echo "# mkdir -p $$HOME/.local/share/devhelp/Crawler" 98 | @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/Crawler" 99 | @echo "# devhelp" 100 | 101 | epub: 102 | $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub 103 | @echo 104 | @echo "Build finished. The epub file is in $(BUILDDIR)/epub." 105 | 106 | latex: 107 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 108 | @echo 109 | @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." 110 | @echo "Run \`make' in that directory to run these through (pdf)latex" \ 111 | "(use \`make latexpdf' here to do that automatically)." 112 | 113 | latexpdf: 114 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 115 | @echo "Running LaTeX files through pdflatex..." 116 | $(MAKE) -C $(BUILDDIR)/latex all-pdf 117 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." 118 | 119 | latexpdfja: 120 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 121 | @echo "Running LaTeX files through platex and dvipdfmx..." 122 | $(MAKE) -C $(BUILDDIR)/latex all-pdf-ja 123 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." 124 | 125 | text: 126 | $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text 127 | @echo 128 | @echo "Build finished. The text files are in $(BUILDDIR)/text." 129 | 130 | man: 131 | $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man 132 | @echo 133 | @echo "Build finished. The manual pages are in $(BUILDDIR)/man." 134 | 135 | texinfo: 136 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 137 | @echo 138 | @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo." 139 | @echo "Run \`make' in that directory to run these through makeinfo" \ 140 | "(use \`make info' here to do that automatically)." 141 | 142 | info: 143 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 144 | @echo "Running Texinfo files through makeinfo..." 145 | make -C $(BUILDDIR)/texinfo info 146 | @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo." 147 | 148 | gettext: 149 | $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale 150 | @echo 151 | @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale." 152 | 153 | changes: 154 | $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes 155 | @echo 156 | @echo "The overview file is in $(BUILDDIR)/changes." 157 | 158 | linkcheck: 159 | $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck 160 | @echo 161 | @echo "Link check complete; look for any errors in the above output " \ 162 | "or in $(BUILDDIR)/linkcheck/output.txt." 163 | 164 | doctest: 165 | $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest 166 | @echo "Testing of doctests in the sources finished, look at the " \ 167 | "results in $(BUILDDIR)/doctest/output.txt." 168 | 169 | xml: 170 | $(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml 171 | @echo 172 | @echo "Build finished. The XML files are in $(BUILDDIR)/xml." 173 | 174 | pseudoxml: 175 | $(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml 176 | @echo 177 | @echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml." 178 | -------------------------------------------------------------------------------- /crawler/docs/step1/conf.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Crawler documentation build configuration file, created by 4 | # sphinx-quickstart on Wed Mar 5 14:52:11 2014. 5 | # 6 | # This file is execfile()d with the current directory set to its 7 | # containing dir. 8 | # 9 | # Note that not all possible configuration values are present in this 10 | # autogenerated file. 11 | # 12 | # All configuration values have a default; values that are commented out 13 | # serve to show the default. 14 | 15 | import sys 16 | import os 17 | 18 | # If extensions (or modules to document with autodoc) are in another directory, 19 | # add these directories to sys.path here. If the directory is relative to the 20 | # documentation root, use os.path.abspath to make it absolute, like shown here. 21 | #sys.path.insert(0, os.path.abspath('.')) 22 | 23 | # -- General configuration ------------------------------------------------ 24 | 25 | # If your documentation needs a minimal Sphinx version, state it here. 26 | #needs_sphinx = '1.0' 27 | 28 | # Add any Sphinx extension module names here, as strings. They can be 29 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 30 | # ones. 31 | extensions = [] 32 | 33 | # Add any paths that contain templates here, relative to this directory. 34 | templates_path = ['_templates'] 35 | 36 | # The suffix of source filenames. 37 | source_suffix = '.rst' 38 | 39 | # The encoding of source files. 40 | #source_encoding = 'utf-8-sig' 41 | 42 | # The master toctree document. 43 | master_doc = 'index' 44 | 45 | # General information about the project. 46 | project = u'Crawler' 47 | copyright = u'2014, Eric Holscher' 48 | 49 | # The version info for the project you're documenting, acts as replacement for 50 | # |version| and |release|, also used in various other places throughout the 51 | # built documents. 52 | # 53 | # The short X.Y version. 54 | version = '1.0' 55 | # The full version, including alpha/beta/rc tags. 56 | release = '1.0' 57 | 58 | # The language for content autogenerated by Sphinx. Refer to documentation 59 | # for a list of supported languages. 60 | #language = None 61 | 62 | # There are two options for replacing |today|: either, you set today to some 63 | # non-false value, then it is used: 64 | #today = '' 65 | # Else, today_fmt is used as the format for a strftime call. 66 | #today_fmt = '%B %d, %Y' 67 | 68 | # List of patterns, relative to source directory, that match files and 69 | # directories to ignore when looking for source files. 70 | exclude_patterns = ['_build'] 71 | 72 | # The reST default role (used for this markup: `text`) to use for all 73 | # documents. 74 | #default_role = None 75 | 76 | # If true, '()' will be appended to :func: etc. cross-reference text. 77 | #add_function_parentheses = True 78 | 79 | # If true, the current module name will be prepended to all description 80 | # unit titles (such as .. function::). 81 | #add_module_names = True 82 | 83 | # If true, sectionauthor and moduleauthor directives will be shown in the 84 | # output. They are ignored by default. 85 | #show_authors = False 86 | 87 | # The name of the Pygments (syntax highlighting) style to use. 88 | pygments_style = 'sphinx' 89 | 90 | # A list of ignored prefixes for module index sorting. 91 | #modindex_common_prefix = [] 92 | 93 | # If true, keep warnings as "system message" paragraphs in the built documents. 94 | #keep_warnings = False 95 | 96 | 97 | # -- Options for HTML output ---------------------------------------------- 98 | 99 | # The theme to use for HTML and HTML Help pages. See the documentation for 100 | # a list of builtin themes. 101 | html_theme = 'sphinx_rtd_theme' 102 | 103 | # Theme options are theme-specific and customize the look and feel of a theme 104 | # further. For a list of options available for each theme, see the 105 | # documentation. 106 | #html_theme_options = {} 107 | 108 | # Add any paths that contain custom themes here, relative to this directory. 109 | #html_theme_path = [] 110 | 111 | # The name for this set of Sphinx documents. If None, it defaults to 112 | # " v documentation". 113 | #html_title = None 114 | 115 | # A shorter title for the navigation bar. Default is the same as html_title. 116 | #html_short_title = None 117 | 118 | # The name of an image file (relative to this directory) to place at the top 119 | # of the sidebar. 120 | #html_logo = None 121 | 122 | # The name of an image file (within the static path) to use as favicon of the 123 | # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 124 | # pixels large. 125 | #html_favicon = None 126 | 127 | # Add any paths that contain custom static files (such as style sheets) here, 128 | # relative to this directory. They are copied after the builtin static files, 129 | # so a file named "default.css" will overwrite the builtin "default.css". 130 | html_static_path = ['_static'] 131 | 132 | # Add any extra paths that contain custom files (such as robots.txt or 133 | # .htaccess) here, relative to this directory. These files are copied 134 | # directly to the root of the documentation. 135 | #html_extra_path = [] 136 | 137 | # If not '', a 'Last updated on:' timestamp is inserted at every page bottom, 138 | # using the given strftime format. 139 | #html_last_updated_fmt = '%b %d, %Y' 140 | 141 | # If true, SmartyPants will be used to convert quotes and dashes to 142 | # typographically correct entities. 143 | #html_use_smartypants = True 144 | 145 | # Custom sidebar templates, maps document names to template names. 146 | #html_sidebars = {} 147 | 148 | # Additional templates that should be rendered to pages, maps page names to 149 | # template names. 150 | #html_additional_pages = {} 151 | 152 | # If false, no module index is generated. 153 | #html_domain_indices = True 154 | 155 | # If false, no index is generated. 156 | #html_use_index = True 157 | 158 | # If true, the index is split into individual pages for each letter. 159 | #html_split_index = False 160 | 161 | # If true, links to the reST sources are added to the pages. 162 | #html_show_sourcelink = True 163 | 164 | # If true, "Created using Sphinx" is shown in the HTML footer. Default is True. 165 | #html_show_sphinx = True 166 | 167 | # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. 168 | #html_show_copyright = True 169 | 170 | # If true, an OpenSearch description file will be output, and all pages will 171 | # contain a tag referring to it. The value of this option must be the 172 | # base URL from which the finished HTML is served. 173 | #html_use_opensearch = '' 174 | 175 | # This is the file name suffix for HTML files (e.g. ".xhtml"). 176 | #html_file_suffix = None 177 | 178 | # Output file base name for HTML help builder. 179 | htmlhelp_basename = 'Crawlerdoc' 180 | 181 | 182 | # -- Options for LaTeX output --------------------------------------------- 183 | 184 | latex_elements = { 185 | # The paper size ('letterpaper' or 'a4paper'). 186 | #'papersize': 'letterpaper', 187 | 188 | # The font size ('10pt', '11pt' or '12pt'). 189 | #'pointsize': '10pt', 190 | 191 | # Additional stuff for the LaTeX preamble. 192 | #'preamble': '', 193 | } 194 | 195 | # Grouping the document tree into LaTeX files. List of tuples 196 | # (source start file, target name, title, 197 | # author, documentclass [howto, manual, or own class]). 198 | latex_documents = [ 199 | ('index', 'Crawler.tex', u'Crawler Documentation', 200 | u'Eric Holscher', 'manual'), 201 | ] 202 | 203 | # The name of an image file (relative to this directory) to place at the top of 204 | # the title page. 205 | #latex_logo = None 206 | 207 | # For "manual" documents, if this is true, then toplevel headings are parts, 208 | # not chapters. 209 | #latex_use_parts = False 210 | 211 | # If true, show page references after internal links. 212 | #latex_show_pagerefs = False 213 | 214 | # If true, show URL addresses after external links. 215 | #latex_show_urls = False 216 | 217 | # Documents to append as an appendix to all manuals. 218 | #latex_appendices = [] 219 | 220 | # If false, no module index is generated. 221 | #latex_domain_indices = True 222 | 223 | 224 | # -- Options for manual page output --------------------------------------- 225 | 226 | # One entry per manual page. List of tuples 227 | # (source start file, name, description, authors, manual section). 228 | man_pages = [ 229 | ('index', 'crawler', u'Crawler Documentation', 230 | [u'Eric Holscher'], 1) 231 | ] 232 | 233 | # If true, show URL addresses after external links. 234 | man_show_urls = False 235 | 236 | 237 | # -- Options for Texinfo output ------------------------------------------- 238 | 239 | # Grouping the document tree into Texinfo files. List of tuples 240 | # (source start file, target name, title, author, 241 | # dir menu entry, description, category) 242 | texinfo_documents = [ 243 | ('index', 'Crawler', u'Crawler Documentation', 244 | u'Eric Holscher', 'Crawler', 'One line description of project.', 245 | 'Miscellaneous'), 246 | ] 247 | 248 | # Documents to append as an appendix to all manuals. 249 | #texinfo_appendices = [] 250 | 251 | # If false, no module index is generated. 252 | #texinfo_domain_indices = True 253 | 254 | # How to display URL addresses: 'footnote', 'no', or 'inline'. 255 | #texinfo_show_urls = 'footnote' 256 | 257 | # If true, do not generate a @detailmenu in the "Top" node's menu. 258 | #texinfo_no_detailmenu = False 259 | -------------------------------------------------------------------------------- /crawler/docs/step1/index.rst: -------------------------------------------------------------------------------- 1 | Crawler Step 1 Documentation 2 | ============================ 3 | 4 | Our Crawler will make your life as a web developer easier. 5 | You can learn more about it in our documentation. 6 | 7 | .. toctree:: 8 | :maxdepth: 2 9 | 10 | install 11 | support 12 | 13 | -------------------------------------------------------------------------------- /crawler/docs/step1/index.txt: -------------------------------------------------------------------------------- 1 | Crawler Step 1 Documentation 2 | 3 | Our Crawler will make your life as a web developer easier. 4 | You can learn more about it in our documentation. 5 | 6 | toctree: 7 | 8 | install 9 | support 10 | 11 | -------------------------------------------------------------------------------- /crawler/docs/step1/install.rst: -------------------------------------------------------------------------------- 1 | ============ 2 | Installation 3 | ============ 4 | 5 | At the command line:: 6 | 7 | easy_install crawler 8 | 9 | Or, if you have pip installed: 10 | 11 | .. code-block:: bash 12 | 13 | pip install crawler 14 | -------------------------------------------------------------------------------- /crawler/docs/step1/install.txt: -------------------------------------------------------------------------------- 1 | Installation 2 | 3 | At the command line: 4 | 5 | easy_install crawler 6 | 7 | Or, if you have pip installed: 8 | 9 | pip install crawler 10 | -------------------------------------------------------------------------------- /crawler/docs/step1/support.rst: -------------------------------------------------------------------------------- 1 | ======= 2 | Support 3 | ======= 4 | 5 | The easiest way to get help with the project is to join the ``#crawler`` 6 | channel on Freenode_. 7 | We hang out there and you can get real-time help with your projects. 8 | The other good way is to open an issue on Github_. 9 | 10 | The mailing list at https://groups.google.com/forum/#!forum/crawler 11 | is also available for support. 12 | 13 | .. _Freenode: irc://freenode.net 14 | .. _Github: http://github.com/example/crawler/issues 15 | -------------------------------------------------------------------------------- /crawler/docs/step1/support.txt: -------------------------------------------------------------------------------- 1 | Support 2 | 3 | The easiest way to get help with the project is to join the #crawler 4 | channel on Freenode. 5 | We hang out there and you can get real-time help with your projects. 6 | The other good way is to open an issue on Github. 7 | 8 | The mailing list at https://groups.google.com/forum/#!forum/crawler 9 | is also available for support. 10 | 11 | Freenode: irc://freenode.net 12 | Github: http://github.com/example/crawler/issues 13 | 14 | -------------------------------------------------------------------------------- /crawler/docs/step2/Makefile: -------------------------------------------------------------------------------- 1 | # Makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | PAPER = 8 | BUILDDIR = _build 9 | 10 | # User-friendly check for sphinx-build 11 | ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1) 12 | $(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/) 13 | endif 14 | 15 | # Internal variables. 16 | PAPEROPT_a4 = -D latex_paper_size=a4 17 | PAPEROPT_letter = -D latex_paper_size=letter 18 | ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . 19 | # the i18n builder cannot share the environment and doctrees with the others 20 | I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . 21 | 22 | .PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext 23 | 24 | help: 25 | @echo "Please use \`make ' where is one of" 26 | @echo " html to make standalone HTML files" 27 | @echo " dirhtml to make HTML files named index.html in directories" 28 | @echo " singlehtml to make a single large HTML file" 29 | @echo " pickle to make pickle files" 30 | @echo " json to make JSON files" 31 | @echo " htmlhelp to make HTML files and a HTML help project" 32 | @echo " qthelp to make HTML files and a qthelp project" 33 | @echo " devhelp to make HTML files and a Devhelp project" 34 | @echo " epub to make an epub" 35 | @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" 36 | @echo " latexpdf to make LaTeX files and run them through pdflatex" 37 | @echo " latexpdfja to make LaTeX files and run them through platex/dvipdfmx" 38 | @echo " text to make text files" 39 | @echo " man to make manual pages" 40 | @echo " texinfo to make Texinfo files" 41 | @echo " info to make Texinfo files and run them through makeinfo" 42 | @echo " gettext to make PO message catalogs" 43 | @echo " changes to make an overview of all changed/added/deprecated items" 44 | @echo " xml to make Docutils-native XML files" 45 | @echo " pseudoxml to make pseudoxml-XML files for display purposes" 46 | @echo " linkcheck to check all external links for integrity" 47 | @echo " doctest to run all doctests embedded in the documentation (if enabled)" 48 | 49 | clean: 50 | rm -rf $(BUILDDIR)/* 51 | 52 | html: 53 | $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html 54 | @echo 55 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." 56 | 57 | dirhtml: 58 | $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml 59 | @echo 60 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." 61 | 62 | singlehtml: 63 | $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml 64 | @echo 65 | @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." 66 | 67 | pickle: 68 | $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle 69 | @echo 70 | @echo "Build finished; now you can process the pickle files." 71 | 72 | json: 73 | $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json 74 | @echo 75 | @echo "Build finished; now you can process the JSON files." 76 | 77 | htmlhelp: 78 | $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp 79 | @echo 80 | @echo "Build finished; now you can run HTML Help Workshop with the" \ 81 | ".hhp project file in $(BUILDDIR)/htmlhelp." 82 | 83 | qthelp: 84 | $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp 85 | @echo 86 | @echo "Build finished; now you can run "qcollectiongenerator" with the" \ 87 | ".qhcp project file in $(BUILDDIR)/qthelp, like this:" 88 | @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/Crawler.qhcp" 89 | @echo "To view the help file:" 90 | @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/Crawler.qhc" 91 | 92 | devhelp: 93 | $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp 94 | @echo 95 | @echo "Build finished." 96 | @echo "To view the help file:" 97 | @echo "# mkdir -p $$HOME/.local/share/devhelp/Crawler" 98 | @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/Crawler" 99 | @echo "# devhelp" 100 | 101 | epub: 102 | $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub 103 | @echo 104 | @echo "Build finished. The epub file is in $(BUILDDIR)/epub." 105 | 106 | latex: 107 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 108 | @echo 109 | @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." 110 | @echo "Run \`make' in that directory to run these through (pdf)latex" \ 111 | "(use \`make latexpdf' here to do that automatically)." 112 | 113 | latexpdf: 114 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 115 | @echo "Running LaTeX files through pdflatex..." 116 | $(MAKE) -C $(BUILDDIR)/latex all-pdf 117 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." 118 | 119 | latexpdfja: 120 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 121 | @echo "Running LaTeX files through platex and dvipdfmx..." 122 | $(MAKE) -C $(BUILDDIR)/latex all-pdf-ja 123 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." 124 | 125 | text: 126 | $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text 127 | @echo 128 | @echo "Build finished. The text files are in $(BUILDDIR)/text." 129 | 130 | man: 131 | $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man 132 | @echo 133 | @echo "Build finished. The manual pages are in $(BUILDDIR)/man." 134 | 135 | texinfo: 136 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 137 | @echo 138 | @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo." 139 | @echo "Run \`make' in that directory to run these through makeinfo" \ 140 | "(use \`make info' here to do that automatically)." 141 | 142 | info: 143 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 144 | @echo "Running Texinfo files through makeinfo..." 145 | make -C $(BUILDDIR)/texinfo info 146 | @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo." 147 | 148 | gettext: 149 | $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale 150 | @echo 151 | @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale." 152 | 153 | changes: 154 | $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes 155 | @echo 156 | @echo "The overview file is in $(BUILDDIR)/changes." 157 | 158 | linkcheck: 159 | $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck 160 | @echo 161 | @echo "Link check complete; look for any errors in the above output " \ 162 | "or in $(BUILDDIR)/linkcheck/output.txt." 163 | 164 | doctest: 165 | $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest 166 | @echo "Testing of doctests in the sources finished, look at the " \ 167 | "results in $(BUILDDIR)/doctest/output.txt." 168 | 169 | xml: 170 | $(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml 171 | @echo 172 | @echo "Build finished. The XML files are in $(BUILDDIR)/xml." 173 | 174 | pseudoxml: 175 | $(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml 176 | @echo 177 | @echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml." 178 | -------------------------------------------------------------------------------- /crawler/docs/step2/api.rst: -------------------------------------------------------------------------------- 1 | Crawler Python API 2 | ================== 3 | 4 | Getting started with Crawler is easy. 5 | The main class you need to care about is :class:`~crawler.main.Crawler` 6 | 7 | crawler.main 8 | ------------ 9 | 10 | .. automodule:: crawler.main 11 | :members: 12 | -------------------------------------------------------------------------------- /crawler/docs/step2/api.txt: -------------------------------------------------------------------------------- 1 | Crawler Python API 2 | 3 | Getting started with Crawler is easy. 4 | The main class you need to care about is crawler.main.Crawler 5 | 6 | crawler.main 7 | 8 | automodule: crawler.main 9 | -------------------------------------------------------------------------------- /crawler/docs/step2/cli.rst: -------------------------------------------------------------------------------- 1 | ==================== 2 | Command Line Options 3 | ==================== 4 | 5 | These flags allow you to change the behavior of :program:`Crawler`. 6 | Check out how to use them in the :doc:`cookbook`. 7 | 8 | .. option:: -d , --delay 9 | 10 | Use a delay in between page fetchs so we don't overwhelm the remote server. 11 | Value in seconds. 12 | 13 | Default: 1 second 14 | 15 | .. option:: -i , --ignore 16 | 17 | Ignore pages that match a specific pattern. 18 | 19 | Default: None 20 | 21 | 22 | 23 | -------------------------------------------------------------------------------- /crawler/docs/step2/cli.txt: -------------------------------------------------------------------------------- 1 | Command Line Options 2 | 3 | These flags allow you to change the behavior of Crawler. 4 | Check out how to use them in the Cookbook. 5 | 6 | -d , --delay 7 | 8 | Use a delay in between page fetchs so we don't overwhelm the remote server. 9 | Value in seconds. 10 | 11 | Default: 1 second 12 | 13 | -i , --ignore 14 | 15 | Ignore pages that match a specific pattern. 16 | 17 | Default: None 18 | -------------------------------------------------------------------------------- /crawler/docs/step2/conf.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Crawler documentation build configuration file, created by 4 | # sphinx-quickstart on Wed Mar 5 14:52:11 2014. 5 | # 6 | # This file is execfile()d with the current directory set to its 7 | # containing dir. 8 | # 9 | # Note that not all possible configuration values are present in this 10 | # autogenerated file. 11 | # 12 | # All configuration values have a default; values that are commented out 13 | # serve to show the default. 14 | 15 | import sys 16 | import os 17 | 18 | # If extensions (or modules to document with autodoc) are in another directory, 19 | # add these directories to sys.path here. If the directory is relative to the 20 | # documentation root, use os.path.abspath to make it absolute, like shown here. 21 | 22 | sys.path.insert(0, os.path.abspath('../src')) 23 | 24 | # -- General configuration ------------------------------------------------ 25 | 26 | # If your documentation needs a minimal Sphinx version, state it here. 27 | #needs_sphinx = '1.0' 28 | 29 | # Add any Sphinx extension module names here, as strings. They can be 30 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 31 | # ones. 32 | extensions = [ 33 | 'sphinx.ext.intersphinx', 34 | 'sphinx.ext.autodoc', 35 | 'sphinx.ext.doctest', 36 | 'sphinx.ext.todo', 37 | ] 38 | 39 | # Add any paths that contain templates here, relative to this directory. 40 | templates_path = ['_templates'] 41 | 42 | # The suffix of source filenames. 43 | source_suffix = '.rst' 44 | 45 | # The encoding of source files. 46 | #source_encoding = 'utf-8-sig' 47 | 48 | # The master toctree document. 49 | master_doc = 'index' 50 | 51 | # General information about the project. 52 | project = u'Crawler' 53 | copyright = u'2014, Eric Holscher' 54 | 55 | # The version info for the project you're documenting, acts as replacement for 56 | # |version| and |release|, also used in various other places throughout the 57 | # built documents. 58 | # 59 | # The short X.Y version. 60 | version = '1.0' 61 | # The full version, including alpha/beta/rc tags. 62 | release = '1.0' 63 | 64 | # The language for content autogenerated by Sphinx. Refer to documentation 65 | # for a list of supported languages. 66 | #language = None 67 | 68 | # There are two options for replacing |today|: either, you set today to some 69 | # non-false value, then it is used: 70 | #today = '' 71 | # Else, today_fmt is used as the format for a strftime call. 72 | #today_fmt = '%B %d, %Y' 73 | 74 | # List of patterns, relative to source directory, that match files and 75 | # directories to ignore when looking for source files. 76 | exclude_patterns = ['_build'] 77 | 78 | # The reST default role (used for this markup: `text`) to use for all 79 | # documents. 80 | #default_role = None 81 | 82 | # If true, '()' will be appended to :func: etc. cross-reference text. 83 | #add_function_parentheses = True 84 | 85 | # If true, the current module name will be prepended to all description 86 | # unit titles (such as .. function::). 87 | #add_module_names = True 88 | 89 | # If true, sectionauthor and moduleauthor directives will be shown in the 90 | # output. They are ignored by default. 91 | #show_authors = False 92 | 93 | # The name of the Pygments (syntax highlighting) style to use. 94 | pygments_style = 'sphinx' 95 | 96 | # A list of ignored prefixes for module index sorting. 97 | #modindex_common_prefix = [] 98 | 99 | # If true, keep warnings as "system message" paragraphs in the built documents. 100 | #keep_warnings = False 101 | 102 | 103 | # -- Options for HTML output ---------------------------------------------- 104 | 105 | # The theme to use for HTML and HTML Help pages. See the documentation for 106 | # a list of builtin themes. 107 | html_theme = 'sphinx_rtd_theme' 108 | 109 | # Theme options are theme-specific and customize the look and feel of a theme 110 | # further. For a list of options available for each theme, see the 111 | # documentation. 112 | #html_theme_options = {} 113 | 114 | # Add any paths that contain custom themes here, relative to this directory. 115 | #html_theme_path = [] 116 | 117 | # The name for this set of Sphinx documents. If None, it defaults to 118 | # " v documentation". 119 | #html_title = None 120 | 121 | # A shorter title for the navigation bar. Default is the same as html_title. 122 | #html_short_title = None 123 | 124 | # The name of an image file (relative to this directory) to place at the top 125 | # of the sidebar. 126 | #html_logo = None 127 | 128 | # The name of an image file (within the static path) to use as favicon of the 129 | # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 130 | # pixels large. 131 | #html_favicon = None 132 | 133 | # Add any paths that contain custom static files (such as style sheets) here, 134 | # relative to this directory. They are copied after the builtin static files, 135 | # so a file named "default.css" will overwrite the builtin "default.css". 136 | html_static_path = ['_static'] 137 | 138 | # Add any extra paths that contain custom files (such as robots.txt or 139 | # .htaccess) here, relative to this directory. These files are copied 140 | # directly to the root of the documentation. 141 | #html_extra_path = [] 142 | 143 | # If not '', a 'Last updated on:' timestamp is inserted at every page bottom, 144 | # using the given strftime format. 145 | #html_last_updated_fmt = '%b %d, %Y' 146 | 147 | # If true, SmartyPants will be used to convert quotes and dashes to 148 | # typographically correct entities. 149 | #html_use_smartypants = True 150 | 151 | # Custom sidebar templates, maps document names to template names. 152 | #html_sidebars = {} 153 | 154 | # Additional templates that should be rendered to pages, maps page names to 155 | # template names. 156 | #html_additional_pages = {} 157 | 158 | # If false, no module index is generated. 159 | #html_domain_indices = True 160 | 161 | # If false, no index is generated. 162 | #html_use_index = True 163 | 164 | # If true, the index is split into individual pages for each letter. 165 | #html_split_index = False 166 | 167 | # If true, links to the reST sources are added to the pages. 168 | #html_show_sourcelink = True 169 | 170 | # If true, "Created using Sphinx" is shown in the HTML footer. Default is True. 171 | #html_show_sphinx = True 172 | 173 | # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. 174 | #html_show_copyright = True 175 | 176 | # If true, an OpenSearch description file will be output, and all pages will 177 | # contain a tag referring to it. The value of this option must be the 178 | # base URL from which the finished HTML is served. 179 | #html_use_opensearch = '' 180 | 181 | # This is the file name suffix for HTML files (e.g. ".xhtml"). 182 | #html_file_suffix = None 183 | 184 | # Output file base name for HTML help builder. 185 | htmlhelp_basename = 'Crawlerdoc' 186 | 187 | 188 | # -- Options for LaTeX output --------------------------------------------- 189 | 190 | latex_elements = { 191 | # The paper size ('letterpaper' or 'a4paper'). 192 | #'papersize': 'letterpaper', 193 | 194 | # The font size ('10pt', '11pt' or '12pt'). 195 | #'pointsize': '10pt', 196 | 197 | # Additional stuff for the LaTeX preamble. 198 | #'preamble': '', 199 | } 200 | 201 | # Grouping the document tree into LaTeX files. List of tuples 202 | # (source start file, target name, title, 203 | # author, documentclass [howto, manual, or own class]). 204 | latex_documents = [ 205 | ('index', 'Crawler.tex', u'Crawler Documentation', 206 | u'Eric Holscher', 'manual'), 207 | ] 208 | 209 | # The name of an image file (relative to this directory) to place at the top of 210 | # the title page. 211 | #latex_logo = None 212 | 213 | # For "manual" documents, if this is true, then toplevel headings are parts, 214 | # not chapters. 215 | #latex_use_parts = False 216 | 217 | # If true, show page references after internal links. 218 | #latex_show_pagerefs = False 219 | 220 | # If true, show URL addresses after external links. 221 | #latex_show_urls = False 222 | 223 | # Documents to append as an appendix to all manuals. 224 | #latex_appendices = [] 225 | 226 | # If false, no module index is generated. 227 | #latex_domain_indices = True 228 | 229 | 230 | # -- Options for manual page output --------------------------------------- 231 | 232 | # One entry per manual page. List of tuples 233 | # (source start file, name, description, authors, manual section). 234 | man_pages = [ 235 | ('index', 'crawler', u'Crawler Documentation', 236 | [u'Eric Holscher'], 1) 237 | ] 238 | 239 | # If true, show URL addresses after external links. 240 | man_show_urls = False 241 | 242 | 243 | # -- Options for Texinfo output ------------------------------------------- 244 | 245 | # Grouping the document tree into Texinfo files. List of tuples 246 | # (source start file, target name, title, author, 247 | # dir menu entry, description, category) 248 | texinfo_documents = [ 249 | ('index', 'Crawler', u'Crawler Documentation', 250 | u'Eric Holscher', 'Crawler', 'One line description of project.', 251 | 'Miscellaneous'), 252 | ] 253 | 254 | # Documents to append as an appendix to all manuals. 255 | #texinfo_appendices = [] 256 | 257 | # If false, no module index is generated. 258 | #texinfo_domain_indices = True 259 | 260 | # How to display URL addresses: 'footnote', 'no', or 'inline'. 261 | #texinfo_show_urls = 'footnote' 262 | 263 | # If true, do not generate a @detailmenu in the "Top" node's menu. 264 | #texinfo_no_detailmenu = False 265 | -------------------------------------------------------------------------------- /crawler/docs/step2/cookbook.rst: -------------------------------------------------------------------------------- 1 | ======== 2 | Cookbook 3 | ======== 4 | 5 | Crawl a web page 6 | ---------------- 7 | 8 | The most simple way to use our program is with no arguments. 9 | Simply run:: 10 | 11 | python main.py -u 12 | 13 | to crawl a webpage. 14 | 15 | Crawl a page slowly 16 | ------------------- 17 | 18 | To add a delay to your crawler, 19 | use :option:`-d`:: 20 | 21 | python main.py -d 10 -u 22 | 23 | This will wait 10 seconds between page fetches. 24 | 25 | Crawl only your blog 26 | -------------------- 27 | 28 | You will want to use the :option:`-i` flag, 29 | which while ignore URLs matching the passed regex:: 30 | 31 | python main.py -i "^blog" -u 32 | 33 | This will only crawl pages that contain your blog URL. 34 | -------------------------------------------------------------------------------- /crawler/docs/step2/cookbook.txt: -------------------------------------------------------------------------------- 1 | Cookbook 2 | 3 | Crawl a web page 4 | 5 | The most simple way to use our program is with no arguments. 6 | Simply run: 7 | 8 | python main.py -u 9 | 10 | to crawl a webpage. 11 | 12 | Crawl a page slowly 13 | 14 | To add a delay to your crawler, 15 | use -d: 16 | 17 | python main.py -d 10 -u 18 | 19 | This will wait 10 seconds between page fetches. 20 | 21 | Crawl only your blog 22 | 23 | You will want to use the -i flag, 24 | which while ignore URLs matching the passed regex:: 25 | 26 | python main.py -i "^blog" -u 27 | 28 | This will only crawl pages that contain your blog URL. 29 | -------------------------------------------------------------------------------- /crawler/docs/step2/index.rst: -------------------------------------------------------------------------------- 1 | Crawler Step 2 Documentation 2 | ============================ 3 | 4 | Our Crawler will make your life as a web developer easier. 5 | You can learn more about it in our documentation. 6 | 7 | .. toctree:: 8 | :caption: User Guide 9 | 10 | install 11 | support 12 | cookbook 13 | 14 | .. toctree:: 15 | :caption: Programmer Reference 16 | 17 | cli 18 | api 19 | 20 | -------------------------------------------------------------------------------- /crawler/docs/step2/index.txt: -------------------------------------------------------------------------------- 1 | Crawler Step 2 Documentation 2 | 3 | User Guide 4 | 5 | toctree: 6 | 7 | install 8 | support 9 | cookbook 10 | 11 | Programmer Reference 12 | 13 | toctree: 14 | 15 | cli 16 | api 17 | 18 | -------------------------------------------------------------------------------- /crawler/docs/step2/install.rst: -------------------------------------------------------------------------------- 1 | ../step1/install.rst -------------------------------------------------------------------------------- /crawler/docs/step2/install.txt: -------------------------------------------------------------------------------- 1 | ../step1/install.txt -------------------------------------------------------------------------------- /crawler/docs/step2/support.rst: -------------------------------------------------------------------------------- 1 | ../step1/support.rst -------------------------------------------------------------------------------- /crawler/docs/step2/support.txt: -------------------------------------------------------------------------------- 1 | ../step1/support.txt -------------------------------------------------------------------------------- /crawler/docs/step3/Makefile: -------------------------------------------------------------------------------- 1 | # Makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | PAPER = 8 | BUILDDIR = _build 9 | 10 | # User-friendly check for sphinx-build 11 | ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1) 12 | $(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/) 13 | endif 14 | 15 | # Internal variables. 16 | PAPEROPT_a4 = -D latex_paper_size=a4 17 | PAPEROPT_letter = -D latex_paper_size=letter 18 | ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . 19 | # the i18n builder cannot share the environment and doctrees with the others 20 | I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . 21 | 22 | .PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext 23 | 24 | help: 25 | @echo "Please use \`make ' where is one of" 26 | @echo " html to make standalone HTML files" 27 | @echo " dirhtml to make HTML files named index.html in directories" 28 | @echo " singlehtml to make a single large HTML file" 29 | @echo " pickle to make pickle files" 30 | @echo " json to make JSON files" 31 | @echo " htmlhelp to make HTML files and a HTML help project" 32 | @echo " qthelp to make HTML files and a qthelp project" 33 | @echo " devhelp to make HTML files and a Devhelp project" 34 | @echo " epub to make an epub" 35 | @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" 36 | @echo " latexpdf to make LaTeX files and run them through pdflatex" 37 | @echo " latexpdfja to make LaTeX files and run them through platex/dvipdfmx" 38 | @echo " text to make text files" 39 | @echo " man to make manual pages" 40 | @echo " texinfo to make Texinfo files" 41 | @echo " info to make Texinfo files and run them through makeinfo" 42 | @echo " gettext to make PO message catalogs" 43 | @echo " changes to make an overview of all changed/added/deprecated items" 44 | @echo " xml to make Docutils-native XML files" 45 | @echo " pseudoxml to make pseudoxml-XML files for display purposes" 46 | @echo " linkcheck to check all external links for integrity" 47 | @echo " doctest to run all doctests embedded in the documentation (if enabled)" 48 | 49 | clean: 50 | rm -rf $(BUILDDIR)/* 51 | 52 | html: 53 | $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html 54 | @echo 55 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." 56 | 57 | dirhtml: 58 | $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml 59 | @echo 60 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." 61 | 62 | singlehtml: 63 | $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml 64 | @echo 65 | @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." 66 | 67 | pickle: 68 | $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle 69 | @echo 70 | @echo "Build finished; now you can process the pickle files." 71 | 72 | json: 73 | $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json 74 | @echo 75 | @echo "Build finished; now you can process the JSON files." 76 | 77 | htmlhelp: 78 | $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp 79 | @echo 80 | @echo "Build finished; now you can run HTML Help Workshop with the" \ 81 | ".hhp project file in $(BUILDDIR)/htmlhelp." 82 | 83 | qthelp: 84 | $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp 85 | @echo 86 | @echo "Build finished; now you can run "qcollectiongenerator" with the" \ 87 | ".qhcp project file in $(BUILDDIR)/qthelp, like this:" 88 | @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/Crawler.qhcp" 89 | @echo "To view the help file:" 90 | @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/Crawler.qhc" 91 | 92 | devhelp: 93 | $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp 94 | @echo 95 | @echo "Build finished." 96 | @echo "To view the help file:" 97 | @echo "# mkdir -p $$HOME/.local/share/devhelp/Crawler" 98 | @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/Crawler" 99 | @echo "# devhelp" 100 | 101 | epub: 102 | $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub 103 | @echo 104 | @echo "Build finished. The epub file is in $(BUILDDIR)/epub." 105 | 106 | latex: 107 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 108 | @echo 109 | @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." 110 | @echo "Run \`make' in that directory to run these through (pdf)latex" \ 111 | "(use \`make latexpdf' here to do that automatically)." 112 | 113 | latexpdf: 114 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 115 | @echo "Running LaTeX files through pdflatex..." 116 | $(MAKE) -C $(BUILDDIR)/latex all-pdf 117 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." 118 | 119 | latexpdfja: 120 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 121 | @echo "Running LaTeX files through platex and dvipdfmx..." 122 | $(MAKE) -C $(BUILDDIR)/latex all-pdf-ja 123 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." 124 | 125 | text: 126 | $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text 127 | @echo 128 | @echo "Build finished. The text files are in $(BUILDDIR)/text." 129 | 130 | man: 131 | $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man 132 | @echo 133 | @echo "Build finished. The manual pages are in $(BUILDDIR)/man." 134 | 135 | texinfo: 136 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 137 | @echo 138 | @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo." 139 | @echo "Run \`make' in that directory to run these through makeinfo" \ 140 | "(use \`make info' here to do that automatically)." 141 | 142 | info: 143 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 144 | @echo "Running Texinfo files through makeinfo..." 145 | make -C $(BUILDDIR)/texinfo info 146 | @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo." 147 | 148 | gettext: 149 | $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale 150 | @echo 151 | @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale." 152 | 153 | changes: 154 | $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes 155 | @echo 156 | @echo "The overview file is in $(BUILDDIR)/changes." 157 | 158 | linkcheck: 159 | $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck 160 | @echo 161 | @echo "Link check complete; look for any errors in the above output " \ 162 | "or in $(BUILDDIR)/linkcheck/output.txt." 163 | 164 | doctest: 165 | $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest 166 | @echo "Testing of doctests in the sources finished, look at the " \ 167 | "results in $(BUILDDIR)/doctest/output.txt." 168 | 169 | xml: 170 | $(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml 171 | @echo 172 | @echo "Build finished. The XML files are in $(BUILDDIR)/xml." 173 | 174 | pseudoxml: 175 | $(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml 176 | @echo 177 | @echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml." 178 | -------------------------------------------------------------------------------- /crawler/docs/step3/api.rst: -------------------------------------------------------------------------------- 1 | Crawler Python API 2 | ================== 3 | 4 | Getting started with Crawler is easy. 5 | The main class you need to care about is :class:`~crawler.main.Crawler` 6 | 7 | crawler.main 8 | ------------ 9 | 10 | .. automodule:: crawler.main 11 | :members: 12 | 13 | crawler.utils 14 | ------------- 15 | 16 | .. testsetup:: * 17 | 18 | from crawler.utils import should_ignore, log 19 | 20 | .. automethod:: crawler.utils.should_ignore 21 | 22 | .. doctest:: 23 | 24 | >>> should_ignore(['blog/$'], 'http://ericholscher.com/blog/') 25 | True 26 | 27 | .. doctest:: 28 | 29 | # This test should fail 30 | >>> should_ignore(['home'], 'http://ericholscher.com/blog/') 31 | True 32 | 33 | .. automethod:: crawler.utils.log 34 | 35 | .. doctest:: 36 | 37 | >>> log('http://ericholscher.com/blog/', 200) 38 | OK: 200 http://ericholscher.com/blog/ 39 | 40 | .. doctest:: 41 | 42 | >>> log('http://ericholscher.com/blog/', 500) 43 | ERR: 500 http://ericholscher.com/blog/ 44 | 45 | .. doctest:: 46 | 47 | # This test should fail 48 | >>> log('http://ericholscher.com/blog/', 500) 49 | OK: 500 http://ericholscher.com/blog/ 50 | 51 | -------------------------------------------------------------------------------- /crawler/docs/step3/api.txt: -------------------------------------------------------------------------------- 1 | Crawler Python API 2 | 3 | Getting started with Crawler is easy. 4 | The main class you need to care about is crawler.main.Crawler 5 | 6 | crawler.main 7 | 8 | crawler.utils 9 | 10 | crawler.utils.should_ignore 11 | 12 | should_ignore(['blog/$'], 'http://ericholscher.com/blog/') 13 | True 14 | 15 | # This test should fail 16 | should_ignore(['home'], 'http://ericholscher.com/blog/') 17 | True 18 | 19 | crawler.utils.log 20 | 21 | log('http://ericholscher.com/blog/', 200) 22 | OK: 200 http://ericholscher.com/blog/ 23 | 24 | log('http://ericholscher.com/blog/', 500) 25 | ERR: 500 http://ericholscher.com/blog/ 26 | 27 | # This test should fail 28 | log('http://ericholscher.com/blog/', 500) 29 | OK: 500 http://ericholscher.com/blog/ 30 | 31 | -------------------------------------------------------------------------------- /crawler/docs/step3/cli.rst: -------------------------------------------------------------------------------- 1 | ../step2/cli.rst -------------------------------------------------------------------------------- /crawler/docs/step3/cli.txt: -------------------------------------------------------------------------------- 1 | ../step2/cli.txt -------------------------------------------------------------------------------- /crawler/docs/step3/conf.py: -------------------------------------------------------------------------------- 1 | ../step2/conf.py -------------------------------------------------------------------------------- /crawler/docs/step3/cookbook.rst: -------------------------------------------------------------------------------- 1 | ../step2/cookbook.rst -------------------------------------------------------------------------------- /crawler/docs/step3/cookbook.txt: -------------------------------------------------------------------------------- 1 | ../step2/cookbook.txt -------------------------------------------------------------------------------- /crawler/docs/step3/index.rst: -------------------------------------------------------------------------------- 1 | Crawler Step 3 Documentation 2 | ============================ 3 | 4 | Our Crawler will make your life as a web developer easier. 5 | You can learn more about it in our documentation. 6 | 7 | .. toctree:: 8 | :caption: User Guide 9 | 10 | install 11 | support 12 | cookbook 13 | 14 | .. toctree:: 15 | :caption: Programmer Reference 16 | 17 | cli 18 | api 19 | 20 | -------------------------------------------------------------------------------- /crawler/docs/step3/install.rst: -------------------------------------------------------------------------------- 1 | ../step2/install.rst -------------------------------------------------------------------------------- /crawler/docs/step3/install.txt: -------------------------------------------------------------------------------- 1 | ../step2/install.txt -------------------------------------------------------------------------------- /crawler/docs/step3/support.rst: -------------------------------------------------------------------------------- 1 | ../step2/support.rst -------------------------------------------------------------------------------- /crawler/docs/step3/support.txt: -------------------------------------------------------------------------------- 1 | ../step2/support.txt -------------------------------------------------------------------------------- /crawler/docs/step3/utils.txt: -------------------------------------------------------------------------------- 1 | Utils 2 | 3 | from crawler.utils import should_ignore, log 4 | 5 | automethod: crawler.utils.should_ignore 6 | 7 | >>> should_ignore(['blog/$'], 'http://ericholscher.com/blog/') 8 | True 9 | 10 | # This test should fail 11 | >>> should_ignore(['home'], 'http://ericholscher.com/blog/') 12 | True 13 | 14 | automethod: crawler.utils.log 15 | 16 | >>> log('http://ericholscher.com/blog/', 200) 17 | OK: 200 http://ericholscher.com/blog/ 18 | 19 | >>> log('http://ericholscher.com/blog/', 500) 20 | ERR: 500 http://ericholscher.com/blog/ 21 | 22 | # This test should fail 23 | >>> log('http://ericholscher.com/blog/', 500) 24 | OK: 500 http://ericholscher.com/blog/ 25 | 26 | -------------------------------------------------------------------------------- /crawler/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, find_packages 2 | 3 | setup( 4 | name='crawler', 5 | version='1.0.0', 6 | author='Eric Holscher', 7 | author_email='eric@ericholscher.com', 8 | license='BSD', 9 | description='A simple web crawler', 10 | package_dir={'': 'src'}, 11 | packages=find_packages('src'), 12 | zip_safe=True, 13 | install_requires=['requests', 'beautifulsoup4'], 14 | entry_points={ 15 | 'console_scripts': [ 16 | 'crawler=crawler.main:run_main', 17 | ] 18 | }, 19 | ) 20 | -------------------------------------------------------------------------------- /crawler/src/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ericholscher/sphinx-tutorial/56cfc35553955f656fbc2b4fc87b7e95ff085cec/crawler/src/__init__.py -------------------------------------------------------------------------------- /crawler/src/crawler/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ericholscher/sphinx-tutorial/56cfc35553955f656fbc2b4fc87b7e95ff085cec/crawler/src/crawler/__init__.py -------------------------------------------------------------------------------- /crawler/src/crawler/main.py: -------------------------------------------------------------------------------- 1 | """ 2 | Main Module 3 | """ 4 | import time 5 | from optparse import OptionParser 6 | # Python 3 compat 7 | try: 8 | from urlparse import urlparse 9 | except ImportError: 10 | from urllib.parse import urlparse 11 | 12 | import requests 13 | from bs4 import BeautifulSoup 14 | 15 | from utils import log, should_ignore 16 | 17 | 18 | class Crawler(object): 19 | 20 | """ 21 | Main Crawler object. 22 | 23 | Example:: 24 | 25 | c = Crawler('http://example.com') 26 | c.crawl() 27 | 28 | :param delay: Number of seconds to wait between searches 29 | :param ignore: Paths to ignore 30 | 31 | """ 32 | 33 | def __init__(self, url, delay, ignore): 34 | self.url = url 35 | self.delay = delay 36 | if ignore: 37 | self.ignore = ignore.split(',') 38 | else: 39 | self.ignore = [] 40 | 41 | def get(self, url): 42 | """ 43 | Get a specific URL, log its response, and return its content. 44 | 45 | :param url: The fully qualified URL to retrieve 46 | """ 47 | response = requests.get(url) 48 | log(url, response.status_code) 49 | return response.content 50 | 51 | def crawl(self): 52 | """ 53 | Crawl the URL set up in the crawler. 54 | 55 | This is the main entry point, and will block while it runs. 56 | """ 57 | html = self.get(self.url) 58 | soup = BeautifulSoup(html, "html.parser") 59 | for tag in soup.findAll('a', href=True): 60 | link = tag['href'] 61 | parsed = urlparse(link) 62 | if parsed.scheme: 63 | to_get = link 64 | else: 65 | to_get = self.url + link 66 | if should_ignore(self.ignore, to_get): 67 | print('Ignoring URL: {url}'.format(url=to_get)) 68 | continue 69 | self.get(to_get) 70 | time.sleep(self.delay) 71 | 72 | 73 | def run_main(): 74 | """ 75 | A small wrapper that is used for running as a CLI Script. 76 | """ 77 | 78 | parser = OptionParser() 79 | parser.add_option("-u", "--url", dest="url", default="http://docs.readthedocs.org/en/latest/", 80 | help="URL to fetch") 81 | parser.add_option("-d", "--delay", dest="delay", type="int", default=1, 82 | help="Delay between fetching") 83 | parser.add_option("-i", "--ignore", dest="ignore", default='', 84 | help="Ignore a subset of URL's") 85 | 86 | (options, args) = parser.parse_args() 87 | 88 | c = Crawler(url=options.url, delay=options.delay, ignore=options.ignore) 89 | c.crawl() 90 | 91 | if __name__ == '__main__': 92 | run_main() 93 | -------------------------------------------------------------------------------- /crawler/src/crawler/utils.py: -------------------------------------------------------------------------------- 1 | import re 2 | 3 | 4 | def log(url, status): 5 | """ 6 | Log information about a response to the console. 7 | 8 | :param url: The URL that was retrieved. 9 | :param status: A status code for the `Response`. 10 | 11 | """ 12 | if 200 <= int(status) < 300: 13 | prose = 'OK' 14 | else: 15 | prose = 'ERR' 16 | print("{prose}: {status} {url}".format(prose=prose, url=url, status=status)) 17 | 18 | 19 | def should_ignore(ignore_list, url): 20 | """ 21 | Returns True if the URL should be ignored 22 | 23 | :param ignore_list: The list of regexs to ignore. 24 | :param url: The fully qualified URL to compare against. 25 | """ 26 | for pattern in ignore_list: 27 | compiled = re.compile(pattern) 28 | if compiled.search(url): 29 | return True 30 | return False 31 | -------------------------------------------------------------------------------- /files/cheatsheet-back-full.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ericholscher/sphinx-tutorial/56cfc35553955f656fbc2b4fc87b7e95ff085cec/files/cheatsheet-back-full.png -------------------------------------------------------------------------------- /files/cheatsheet-front-full.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ericholscher/sphinx-tutorial/56cfc35553955f656fbc2b4fc87b7e95ff085cec/files/cheatsheet-front-full.png -------------------------------------------------------------------------------- /finish.rst: -------------------------------------------------------------------------------- 1 | Finishing Up: Additional Extensions & Individual Exploration 2 | ============================================================ 3 | 4 | If there is much time left in the session, 5 | take some time to play around and get to know Sphinx better. 6 | There is a large ecosystem of extensions, 7 | and lots of builtin features we haven't covered. 8 | 9 | I'm happy to consult with you about interesting challenges you might be facing with docs. 10 | 11 | Part of being a good user of Sphinx is knowing what all is there. 12 | Here are a few options for what to look at: 13 | 14 | * :ref:`dev-extensions` 15 | * Read through all the existing :ref:`extensions` 16 | * `Breathe `_ 17 | * Explore the Read the Docs Admin Panel 18 | * Apply these docs to a project you have 19 | * Show a neighbor what you've done & talk about the concepts learned. 20 | 21 | Also, here are a number of more thought out examples of things you might do: 22 | 23 | .. contents:: 24 | :local: 25 | 26 | Markdown Support 27 | ---------------- 28 | 29 | You can use Markdown and reStructuredText in the same Sphinx project. 30 | We support this natively on Read the Docs, and you can do it locally:: 31 | 32 | $ pip install recommonmark 33 | 34 | Then in your ``conf.py``: 35 | 36 | .. code-block:: python 37 | 38 | from recommonmark.parser import CommonMarkParser 39 | 40 | source_parsers = { 41 | '.md': CommonMarkParser, 42 | } 43 | 44 | source_suffix = ['.rst', '.md'] 45 | 46 | .. note:: Markdown doesn't support a lot of the features of Sphinx, 47 | like inline markup and directives. 48 | However, it works for basic prose content. 49 | 50 | You can now add a Markdown file with a ``.md`` extension, 51 | and Sphinx will build it into the project. 52 | You can do things like include it in your normal TOC Tree, 53 | and Sphinx will search it. 54 | 55 | Go ahead and add a new Markdown File with an ``.md`` extension. 56 | Since we haven't covered Markdown in this text, 57 | here is an example ``community.md``:: 58 | 59 | # Community Standards 60 | 61 | The Crawler community is quite large, 62 | and with that we have a specific set of standards that we apply in our community. 63 | 64 | All of our project spaces are covered by the [Django Community Code of Conduct](https://djangoproject.com/conduct/]. 65 | 66 | ### Feedback 67 | 68 | Any issues can be sent directly to our [project mailing list](mailto:community@crawler.com). 69 | 70 | Add it to your ``toctree`` in your ``index.rst`` as well, 71 | and you will see it appear properly in Sphinx. 72 | 73 | Generate i18n Files 74 | ------------------- 75 | 76 | Sphinx has support for i18n. 77 | If you do a ``make gettext`` on your project, 78 | you should get a gettext catalog for your documentation. 79 | Check for it in ``_build/locale``. 80 | 81 | You can then use these files to translate your documentation using most standard tools. 82 | You can read more about this in Sphinx's :ref:`sphinx:intl` doc. 83 | 84 | Play with Sphinx autoapi 85 | ------------------------ 86 | 87 | ``sphinx-autoapi`` is a tool that I am helping develop which will make doing API docs easier. 88 | It depends on parsing, 89 | instead of importing code. 90 | This means you don't need to change your PYTHONPATH at all, 91 | and we have a few other different design decisions. 92 | 93 | First you need to install autoapi: 94 | 95 | .. code:: bash 96 | 97 | pip install sphinx-autoapi 98 | 99 | Then add it to your Sphinx project's ``conf.py``: 100 | 101 | .. code:: python 102 | 103 | extensions = ['autoapi.extension'] 104 | 105 | # Document Python Code 106 | autoapi_type = 'python' 107 | autoapi_dir = '../src' 108 | 109 | AutoAPI will automatically add itself to the last TOCTree in your top-level ``index.rst``. 110 | 111 | This is needed because we will be outputting rst files into the ``autoapi`` directory. 112 | This adds it into the global TOCTree for your project, 113 | so that it appears in the menus. 114 | 115 | Add Django Support 116 | ------------------ 117 | 118 | Have a Django project laying around? 119 | Add Sphinx documentation to it! 120 | There isn't anything special for Django projects except for the ``DJANGO_SETTINGS_MODULE``. 121 | 122 | You can set it in your ``conf.py``, 123 | similar to ``autodoc``. 124 | Try this piece of code:: 125 | 126 | # Set this to whatever your settings file should default to. 127 | os.environ.setdefault("DJANGO_SETTINGS_MODULE", "settings.test") 128 | 129 | 130 | Tables 131 | ------ 132 | 133 | Tables can be a tricky part of a lot of lightweight markup languages. 134 | Luckily, 135 | RST has some really nice features around tables. 136 | It supports tables in a couple easier to use formats: 137 | 138 | * `CSV `_ 139 | * `List `_ 140 | 141 | So for example, 142 | you can manage your tables in Google Docs, 143 | then export them as CSV in your docs. 144 | 145 | 146 | An example of a CSV table: 147 | 148 | .. code-block:: rst 149 | 150 | .. csv-table:: 151 | :header: "Treat", "Quantity", "Description" 152 | :widths: 15, 10, 30 153 | 154 | "Albatross", 2.99, "On a stick!" 155 | "Crunchy Frog", 1.49, "If we took the bones out, it wouldn't be 156 | crunchy, now would it?" 157 | "Gannet Ripple", 1.99, "On a stick!" 158 | 159 | And a rendered example: 160 | 161 | .. csv-table:: 162 | :header: "Treat", "Quantity", "Description" 163 | :widths: 15, 10, 30 164 | 165 | "Albatross", 2.99, "On a stick!" 166 | "Crunchy Frog", 1.49, "If we took the bones out, it wouldn't be 167 | crunchy, now would it?" 168 | "Gannet Ripple", 1.99, "On a stick!" 169 | 170 | Go ahead and try it yourself! -------------------------------------------------------------------------------- /glossary.rst: -------------------------------------------------------------------------------- 1 | Glossary 2 | ======== 3 | 4 | .. glossary:: 5 | 6 | Semantic Markup 7 | Semantic means that you are specifying *what* something is, instead of *how* it is being used. In Sphinx this means you specify what an object is, and Sphinx handles displaying and linking it properly in the page. 8 | 9 | Example: In HTML, ``issue 72`` to link an issue is bad. You're specifying how it is displayed. ``issue 72`` is much better, because you are saying that the exist is an ``issue``. Issues can then be styled independently. 10 | 11 | -------------------------------------------------------------------------------- /index.rst: -------------------------------------------------------------------------------- 1 | =============== 2 | Sphinx Tutorial 3 | =============== 4 | 5 | Welcome to the Introduction to Sphinx & Read the Docs. 6 | This tutorial will walk you through the initial steps 7 | writing reStructuredText and Sphinx, 8 | and deploying that code to Read the Docs. 9 | 10 | Please provide feedback to `@ericholscher`_. 11 | 12 | .. _@ericholscher: http://twitter.com/ericholscher 13 | 14 | Schedule 15 | -------- 16 | 17 | * 9-9:30 Introduction to the Tutorial 18 | * 9:30-9:50 :doc:`start` 19 | * 9:50-10:20 :doc:`step-1` 20 | * 10:20-10:40 *Break* 21 | * 10:40-11:10 :doc:`step-2` 22 | * 11:10-11:40 :doc:`step-3` 23 | * 11:40-12:20 :doc:`finish` 24 | 25 | Thanks for coming 26 | 27 | .. toctree:: 28 | :maxdepth: 2 29 | :caption: Tutorial 30 | 31 | start 32 | step-1 33 | step-2 34 | step-3 35 | finish 36 | 37 | .. toctree:: 38 | :maxdepth: 2 39 | :caption: Supplemental Material 40 | 41 | cheatsheet 42 | 43 | .. toctree:: 44 | :caption: Step 1 45 | :glob: 46 | 47 | crawler/docs/step1/index 48 | 49 | .. toctree:: 50 | :caption: Step 2 51 | :glob: 52 | 53 | crawler/docs/step2/index 54 | 55 | .. toctree:: 56 | :caption: Step 3 57 | :glob: 58 | 59 | crawler/docs/step3/index 60 | 61 | 62 | -------------------------------------------------------------------------------- /quickstart-output.txt: -------------------------------------------------------------------------------- 1 | -> sphinx-quickstart 2 | Welcome to the Sphinx 1.3.1 quickstart utility. 3 | 4 | Please enter values for the following settings (just press Enter to 5 | accept a default value, if one is given in brackets). 6 | 7 | Enter the root path for documentation. 8 | > Root path for the documentation [.]: 9 | 10 | You have two options for placing the build directory for Sphinx output. 11 | Either, you use a directory "_build" within the root path, or you separate 12 | "source" and "build" directories within the root path. 13 | > Separate source and build directories (y/n) [n]: 14 | 15 | Inside the root directory, two more directories will be created; "_templates" 16 | for custom HTML templates and "_static" for custom stylesheets and other static 17 | files. You can enter another prefix (such as ".") to replace the underscore. 18 | > Name prefix for templates and static dir [_]: 19 | 20 | The project name will occur in several places in the built documentation. 21 | > Project name: Crawler 22 | > Author name(s): Eric Holscher 23 | 24 | Sphinx has the notion of a "version" and a "release" for the 25 | software. Each version can have multiple releases. For example, for 26 | Python the version is something like 2.5 or 3.0, while the release is 27 | something like 2.5.1 or 3.0a1. If you don't need this dual structure, 28 | just set both to the same value. 29 | > Project version: 1.0 30 | > Project release [1.0]: 31 | 32 | If the documents are to be written in a language other than English, 33 | you can select a language here by its language code. Sphinx will then 34 | translate text that it generates into that language. 35 | 36 | For a list of supported codes, see 37 | http://sphinx-doc.org/config.html#confval-language. 38 | > Project language [en]: 39 | 40 | The file name suffix for source files. Commonly, this is either ".txt" 41 | or ".rst". Only files with this suffix are considered documents. 42 | > Source file suffix [.rst]: 43 | 44 | One document is special in that it is considered the top node of the 45 | "contents tree", that is, it is the root of the hierarchical structure 46 | of the documents. Normally, this is "index", but if your "index" 47 | document is a custom template, you can also set this to another filename. 48 | > Name of your master document (without suffix) [index]: 49 | 50 | Sphinx can also add configuration for epub output: 51 | > Do you want to use the epub builder (y/n) [n]: 52 | 53 | Please indicate if you want to use one of the following Sphinx extensions: 54 | > autodoc: automatically insert docstrings from modules (y/n) [n]: 55 | > doctest: automatically test code snippets in doctest blocks (y/n) [n]: 56 | > intersphinx: link between Sphinx documentation of different projects (y/n) [n]: 57 | > todo: write "todo" entries that can be shown or hidden on build (y/n) [n]: 58 | > coverage: checks for documentation coverage (y/n) [n]: 59 | > pngmath: include math, rendered as PNG images (y/n) [n]: 60 | > mathjax: include math, rendered in the browser by MathJax (y/n) [n]: 61 | > ifconfig: conditional inclusion of content based on config values (y/n) [n]: 62 | > viewcode: include links to the source code of documented Python objects (y/n) [n]: 63 | 64 | A Makefile and a Windows command file can be generated for you so that you 65 | only have to run e.g. `make html' instead of invoking sphinx-build 66 | directly. 67 | > Create Makefile? (y/n) [y]: 68 | > Create Windows command file? (y/n) [y]: 69 | 70 | Creating file ./conf.py. 71 | Creating file ./index.rst. 72 | Creating file ./Makefile. 73 | Creating file ./make.bat. 74 | 75 | Finished: An initial directory structure has been created. 76 | 77 | You should now populate your master file ./index.rst and create other documentation 78 | source files. Use the Makefile to build the docs, like so: 79 | make builder 80 | where "builder" is one of the supported builders, e.g. html, latex or linkcheck. 81 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | beautifulsoup4 2 | requests 3 | -------------------------------------------------------------------------------- /start.rst: -------------------------------------------------------------------------------- 1 | Getting Started: Overview & Installing Initial Project 2 | ====================================================== 3 | 4 | Concepts 5 | ******** 6 | 7 | Sphinx Philosophy 8 | ----------------- 9 | 10 | `Sphinx`_ is what is called a documentation generator. 11 | This means that it takes a bunch of source files in plain text, 12 | and generates a bunch of other awesome things, mainly HTML. 13 | For our use case you can think of it as a program that takes in plain text 14 | files in `reStructuredText`_ format, and outputs HTML. 15 | 16 | :: 17 | 18 | reST -> Sphinx -> HTML 19 | 20 | So as a user of Sphinx, your main job will be writing these text files. 21 | This means that you should be minimally familiar with `reStructuredText`_ as 22 | a language. 23 | It's similar to Markdown in a lot of ways, 24 | if you are already familiar with Markdown. 25 | 26 | Tasks 27 | ***** 28 | 29 | Installing Sphinx 30 | ----------------- 31 | 32 | The first step is installing `Sphinx`_. 33 | Sphinx is a python project, 34 | so it can be installed like any other python library. 35 | Every Operating System should have Python pre-installed, 36 | so you should just have to run:: 37 | 38 | pip install sphinx 39 | 40 | .. note:: Advanced users can install this in a virtualenv if they wish. 41 | Also, ``easy_install install Sphinx`` works fine if you don't have pip. 42 | 43 | Get this repo 44 | ------------- 45 | 46 | To do this tutorial, 47 | you need the actual repository. 48 | It contains the example code that we will be documenting. 49 | 50 | You can clone it here:: 51 | 52 | git clone https://github.com/ericholscher/pycon-sphinx-tutorial 53 | 54 | 55 | Getting Started 56 | --------------- 57 | 58 | Now you are ready to start creating documentation. 59 | You should have a directory called ``crawler``, 60 | which contains source code in it's ``src`` directory. 61 | Inside the ``crawler`` you should create a ``docs`` directory, 62 | and move into it:: 63 | 64 | cd crawler 65 | mkdir docs 66 | cd docs 67 | 68 | Then you can create the Sphinx project skeleton in this directory:: 69 | 70 | sphinx-quickstart 71 | 72 | Have the *Project name* be ``Crawler``, 73 | put in your own *Author name*, 74 | and put in ``1.0`` as the *Project version*. 75 | Otherwise you can accept the default options. 76 | 77 | My output looks like this: 78 | 79 | .. literalinclude:: quickstart-output.txt 80 | :language: text 81 | :linenos: 82 | 83 | Your file system should now look similar to this:: 84 | 85 | crawler 86 | ├── src 87 | └── docs 88 | ├── index.rst 89 | ├── conf.py 90 | ├── Makefile 91 | ├── make.bat 92 | ├── _build 93 | ├── _static 94 | ├── _templates 95 | 96 | We have a top-level ``docs`` directory in the main project directory. 97 | Inside of this is: 98 | 99 | ``index.rst``: 100 | This is the index file for the documentation, or what lives at ``/``. 101 | It normally contains a *Table of Contents* that will link to all other 102 | pages of the documentation. 103 | 104 | ``conf.py``: 105 | Allows for customization of Sphinx. 106 | You won't need to use this too much yet, 107 | but it's good to be familiar with this file. 108 | 109 | ``Makefile`` & ``make.bat``: 110 | This is the main interface for local development, 111 | and shouldn't be changed. 112 | 113 | ``_build``: 114 | The directory that your output files go into. 115 | 116 | ``_static``: 117 | The directory to include all your static files, like images. 118 | 119 | ``_templates``: 120 | Allows you to override Sphinx templates to customize look and feel. 121 | 122 | Building docs 123 | ------------- 124 | 125 | Let's build our docs into HTML to see how it works. 126 | Simply run: 127 | 128 | .. code-block:: python 129 | 130 | # Inside top-level docs/ directory. 131 | make html 132 | 133 | This should run Sphinx in your shell, and output HTML. 134 | At the end, it should say something about the documents being ready in 135 | ``_build/html``. 136 | You can now open them in your browser by typing:: 137 | 138 | # On OS X 139 | open _build/html/index.html 140 | 141 | You can also view it by running a web server in that directory:: 142 | 143 | # Inside docs/_build/html directory. 144 | python -m SimpleHTTPServer 145 | 146 | # For python 3 147 | python3 -m http.server 148 | 149 | Then open your browser to http://localhost:8000. 150 | 151 | This should display a rendered HTML page that says **Welcome to Crawler’s documentation!** at the top. 152 | 153 | .. note:: ``make html`` is the main way you will build HTML documentation locally. 154 | It is simply a wrapper around a more complex call to Sphinx, 155 | which you can see as the first line of output. 156 | 157 | Custom Theme 158 | ------------ 159 | 160 | You'll notice your docs look a bit different than mine. 161 | 162 | First, 163 | you need to install the theme: 164 | 165 | .. code:: bash 166 | 167 | $ pip install sphinx_rtd_theme 168 | 169 | Then you need to update a few settings in your ``conf.py``. 170 | 171 | .. code:: python 172 | 173 | import sphinx_rtd_theme 174 | 175 | html_theme = 'sphinx_rtd_theme' 176 | 177 | html_theme_path = [sphinx_rtd_theme.get_html_theme_path()] 178 | 179 | If you rebuild your documentation, 180 | you will see the new theme:: 181 | 182 | make html 183 | 184 | .. warning:: Didn't see your new theme? 185 | That's because Sphinx is smart, 186 | and only rebuilds pages that have changed. 187 | It might have thought none of your pages changed, 188 | so it didn't rebuild anything. 189 | Fix this by running a ``make clean html``, 190 | which will force a full rebuild. 191 | 192 | Extra Credit 193 | ************ 194 | 195 | Have some extra time left? 196 | Check out these other cool things you can do with Sphinx. 197 | 198 | Understanding ``conf.py`` 199 | ------------------------- 200 | 201 | Sphinx is quite configurable, 202 | which can be a bit overwhelming. 203 | However, 204 | the ``conf.py`` file is quite well documented. 205 | You can read through it and get some ideas about what all it can do. 206 | 207 | A few of the more useful settings are: 208 | 209 | * project 210 | * html_theme 211 | * extensions 212 | * exclude_patterns 213 | 214 | This is all well documented in the Sphinx :ref:`sphinx:build-config` doc. 215 | 216 | Moving on 217 | --------- 218 | 219 | Now it is time to move on to :doc:`step-1`. 220 | -------------------------------------------------------------------------------- /step-1.rst: -------------------------------------------------------------------------------- 1 | Step 1: Getting started with RST 2 | ================================ 3 | 4 | Now that we have our basic skeleton, 5 | let's document the project. 6 | As you might have guessed from the name, 7 | we'll be documenting a basic web crawler. 8 | 9 | For this project, 10 | we'll have the following pages: 11 | 12 | * Index Page 13 | * Support 14 | * Installation 15 | * Cookbook 16 | * Command Line Options 17 | * API 18 | 19 | Let's go over the concepts we'll cover, 20 | and then we can talk more about the pages to create. 21 | 22 | Concepts 23 | ******** 24 | 25 | A lot of these RST syntax examples are covered in the Sphinx :ref:`sphinx:rst-primer`. 26 | 27 | .. index:: 28 | pair: Syntax; Hyperlink 29 | 30 | Sections 31 | -------- 32 | 33 | .. code-block:: rst 34 | 35 | Title 36 | ===== 37 | 38 | Section 39 | ------- 40 | 41 | Subsection 42 | ~~~~~~~~~~ 43 | 44 | Every Sphinx document has multiple level of headings. 45 | Section headers are created by underlining 46 | the section title with a punctuation character, at least 47 | as long as the text. 48 | 49 | They give structure to the document, 50 | which is used in navigation and in the display in all output formats. 51 | 52 | Code Samples 53 | ------------ 54 | 55 | .. code-block:: rst 56 | 57 | You can use ``backticks`` for showing ``highlighted`` code. 58 | 59 | If you want to make sure that text is shown in monospaced fonts for code examples or concepts, 60 | use double backticks around it. 61 | It looks ``like this`` on output. 62 | 63 | Hyperlink Syntax 64 | ---------------- 65 | 66 | .. code-block:: rst 67 | 68 | `A cool website`_ 69 | 70 | .. _A cool website: http://sphinx-doc.org 71 | 72 | The link text is set by putting a ``_`` after some text. 73 | The ````` is used to group text, 74 | allowing you to include multiple words in your link text. 75 | You should use the `````, 76 | even when the link text is only one word. 77 | This keeps the syntax consistent. 78 | 79 | The link target is defined at the bottom of the section with ``.. _: ``. 80 | 81 | .. index:: 82 | pair: Syntax; Code Example 83 | 84 | Code Example Syntax 85 | ------------------- 86 | 87 | .. code-block:: rst 88 | 89 | A cool bit of code:: 90 | 91 | Some cool Code 92 | 93 | .. code-block:: rst 94 | 95 | A bit of **rst** which should be *highlighted* properly. 96 | 97 | The syntax for displaying code is ``::``. 98 | When it is used at the end of a sentence, 99 | Sphinx is smart and displays one ``:`` in the output, 100 | and knows there is a code example in the following indented block. 101 | 102 | Sphinx, 103 | like Python, 104 | uses meaningful whitespace. 105 | Blocks of content are structured based on the indention level they are on. 106 | You can see this concept with our ``code-block`` directive later. 107 | 108 | .. index:: 109 | pair: Syntax; TOC Tree 110 | 111 | .. _toctree-syntax: 112 | 113 | Table of Contents Tree 114 | ---------------------- 115 | 116 | .. code-block:: rst 117 | 118 | .. toctree:: 119 | :maxdepth: 2 120 | 121 | install 122 | support 123 | 124 | Now would be a good time to introduce the ``toctree``. 125 | One of the main concepts in Sphinx is that it allows multiple pages to be combined into a cohesive hierarchy. 126 | The ``toctree`` directive is a fundamental part of this structure. 127 | 128 | The above example will output a Table of Contents in the page where it occurs. 129 | The ``maxdepth`` argument tells Sphinx to include 2 levels of headers in it's output. 130 | It will output the 2 top-level headers of the pages listed. 131 | This also tells Sphinx that the other pages are sub-pages of the current page, 132 | creating a "tree" structure of the pages:: 133 | 134 | index 135 | ├── install 136 | ├── support 137 | 138 | .. note:: The TOC Tree is also used for generating the navigation elements inside Sphinx. 139 | It is quite important, 140 | and one of the most powerful concepts in Sphinx. 141 | 142 | Tasks 143 | ***** 144 | 145 | Create Installation page 146 | ------------------------ 147 | 148 | Installation documentation is really important. 149 | Anyone who is coming to the project will need to install it. 150 | For our example, 151 | we are installing a basic Python script, 152 | so it will be pretty easy. 153 | 154 | Include the following in your ``install.rst``, 155 | on the same level as ``index.rst``, properly marked up: 156 | 157 | .. literalinclude:: crawler/docs/step1/install.txt 158 | :linenos: 159 | 160 | .. note:: 161 | Live Preview: :doc:`crawler/docs/step1/install` 162 | 163 | Create Support page 164 | ------------------- 165 | 166 | It's always important that users can ask questions when they get stuck. 167 | There are many ways to handle this, 168 | but normal approaches are to have an IRC channel and mailing list. 169 | 170 | Go ahead and put this in your ``support.rst``, but add the proper RST markup: 171 | 172 | .. literalinclude:: crawler/docs/step1/support.txt 173 | :linenos: 174 | 175 | .. note:: 176 | Live Preview: :doc:`crawler/docs/step1/support` 177 | 178 | You can now open the ``support.html`` file directly, 179 | but it isn't showing on the navigation.. 180 | 181 | Add TocTree 182 | ----------- 183 | 184 | Now you need to tie all these files together. 185 | As we mentioned above, 186 | the :ref:`toctree-syntax` is the best way to do this. 187 | Go ahead and complete the ``toctree`` directive in your ``index.rst`` file, 188 | adding the new ``install`` and ``support``. 189 | 190 | Sanity Check 191 | ------------ 192 | 193 | Your filesystem should now look something like this:: 194 | 195 | crawler 196 | ├── src 197 | └── docs 198 | ├── index.rst 199 | ├── support.rst 200 | ├── install.rst 201 | ├── Makefile 202 | ├── conf.py 203 | 204 | Build Docs 205 | ---------- 206 | 207 | Now that you have a few pages of content, 208 | go ahead and build your docs again:: 209 | 210 | make html 211 | 212 | If you open up your ``index.html``, 213 | you should see the basic structure of your docs from the included ``toctree`` directive. 214 | 215 | Extra Credit 216 | ************ 217 | 218 | Have some extra time left? 219 | Check out these other cool things you can do with Sphinx. 220 | 221 | Make a manpage 222 | --------------- 223 | 224 | The beauty of Sphinx is that it can output in multiple formats, 225 | not just HTML. 226 | All of those formats share the same base format though, 227 | so you only have to change things in one place. 228 | So you can generate a manpage for your docs:: 229 | 230 | make man 231 | 232 | This will place a manpage in ``_build/man``. 233 | You can then view it with:: 234 | 235 | man _build/man/crawler.1 236 | 237 | Create a single page document 238 | ----------------------------- 239 | 240 | Some people prefer one large HTML document, 241 | instead of having to look through multiple pages. 242 | This is another area where Sphinx shines. 243 | You can write your documentation in multiple files to make editing and updating easier. 244 | Then if you want to distribute a single page HTML version:: 245 | 246 | make singlehtml 247 | 248 | This will combine all of your HTML pages into a single page. 249 | Check it out by opening it in your browser:: 250 | 251 | open _build/singlehtml/index.html 252 | 253 | .. note:: You'll notice that it included the documents in the order 254 | that your :ref:`TOC Tree ` was defined. 255 | 256 | Play with RST 257 | ------------- 258 | 259 | RST takes a bit of practice to wrap your head around. 260 | Go over to http://rst.ninjs.org, 261 | which is a live preview. 262 | 263 | .. note:: Use the :doc:`cheatsheet` for lots more ideas! 264 | 265 | Looking for some ideas of what the syntax contains? 266 | The :ref:`rst-primer` in the Sphinx docs is a great place to start. 267 | 268 | .. 269 | Local setup 270 | ~~~~~~~~~~~ 271 | 272 | Want to be able to run this locally? 273 | Go ahead and clone the repo and get it setup:: 274 | 275 | git clone https://github.com/anru/rsted 276 | cd rsted 277 | pip install -r pip-requirements.txt 278 | python application.py 279 | 280 | .. note:: If you already have the repository for this project, 281 | the ``rsted`` repo should already be in your ``usb`` directory . 282 | 283 | You can now view the application by going to http://localhost:5000. 284 | 285 | Moving on 286 | --------- 287 | 288 | Now it is time to move on to :doc:`step-2`. 289 | -------------------------------------------------------------------------------- /step-2.rst: -------------------------------------------------------------------------------- 1 | Step 2: Building References & API docs 2 | ====================================== 3 | 4 | .. note:: Finish at 11:15 5 | 6 | Concepts 7 | ******** 8 | 9 | Referencing 10 | ----------- 11 | 12 | Another important Sphinx feature is that it allows referencing across documents. 13 | This is another powerful way to tie documents together. 14 | 15 | The simplest way to do this is to define an explicit reference object: 16 | 17 | .. code-block:: rst 18 | 19 | .. _reference-name: 20 | 21 | Cool section 22 | ------------ 23 | 24 | Which can then be referenced with ``:ref:``: 25 | 26 | .. code-block:: rst 27 | 28 | :ref:`reference-name` 29 | 30 | Which will then be rendered with the title of the section *Cool section*. 31 | 32 | Sphinx also supports ``:doc:`docname``` for linking to a document. 33 | 34 | Semantic Descriptions and References 35 | ------------------------------------ 36 | 37 | Sphinx also has much more powerful semantic referencing capabilities, 38 | which knows all about software development concepts. 39 | 40 | Say you're creating a CLI application. 41 | You can define an option for that program quite easily: 42 | 43 | .. code-block:: rst 44 | 45 | .. option:: -i , --ignore 46 | 47 | Ignore pages that match a specific pattern. 48 | 49 | That can also be referenced quite simply: 50 | 51 | .. code-block:: rst 52 | 53 | :option:`-i` 54 | 55 | Sphinx includes a large number of these semantic types, including: 56 | 57 | * :rst:dir:`Module ` 58 | * :rst:dir:`Class ` 59 | * :rst:dir:`Method ` 60 | 61 | External References 62 | ------------------- 63 | 64 | Sphinx also includes a number of pre-defined references for external concepts. 65 | Things like PEP's and RFC's: 66 | 67 | .. code-block:: rst 68 | 69 | You can learn more about this at :pep:`8` or :rfc:`1984`. 70 | 71 | You can read more about this in the Sphinx :ref:`inline-markup` docs. 72 | 73 | Automatically generating this markup 74 | ------------------------------------ 75 | 76 | Of course, Sphinx wants to make your life easy. 77 | It includes ways to automatically create these object definitions for your own code. 78 | This is called ``audodoc``, 79 | which allows you do to syntax like this: 80 | 81 | .. code-block:: rst 82 | 83 | .. automodule:: crawler 84 | 85 | and have it document the full Python module importable as ``crawler``. 86 | You can also do a full range of auto functions: 87 | 88 | .. code-block:: rst 89 | 90 | .. autoclass:: 91 | .. autofunction:: 92 | .. autoexception:: 93 | 94 | .. warning:: The module must be importable by Sphinx when running. 95 | We'll cover how to do this in the Tasks below. 96 | 97 | You can read more about this in the Sphinx :mod:`~sphinx.ext.autodoc` docs. 98 | 99 | Tasks 100 | ***** 101 | 102 | Referencing Code 103 | ---------------- 104 | 105 | Let's go ahead and add a cookbook to our documentation. 106 | Users will often come to your project to solve the same problems. 107 | Including a Cookbook or Examples section will be a great resource for this content. 108 | 109 | In your ``cookbook.rst``, 110 | add the following: 111 | 112 | .. literalinclude:: crawler/docs/step2/cookbook.txt 113 | :language: rst 114 | :linenos: 115 | 116 | .. note:: 117 | Live Preview: :doc:`crawler/docs/step2/cookbook` 118 | 119 | Remember, you will need to use ``:option:`` blocks here. 120 | This is because they are referencing a command line option for our program. 121 | 122 | Adding Reference Targets 123 | ------------------------ 124 | 125 | Now that we have pointed at our CLI options, 126 | we need to actually define them. 127 | In your ``cli.rst`` file, 128 | add the following: 129 | 130 | .. literalinclude:: crawler/docs/step2/cli.txt 131 | :language: rst 132 | :linenos: 133 | 134 | .. note:: 135 | Live Preview: :doc:`crawler/docs/step2/cli` 136 | 137 | Here you are documenting the actual options your code takes. 138 | 139 | Try it out 140 | ~~~~~~~~~~ 141 | 142 | Let's go ahead and build the docs and see what happens. 143 | Do a:: 144 | 145 | make html 146 | 147 | Here you will see that the ``:option:`` blocks magically become links to the definition. 148 | This is your first taste of Semantic Markup. 149 | With Sphinx, 150 | we are able to simply say that something is a ``option``, 151 | and then it handles everything for us; 152 | linking between the definition and the usage. 153 | 154 | Importing Code 155 | -------------- 156 | 157 | Being able to define options and link to them is pretty neat. 158 | Wouldn't it be great if we could do that with actual code too? 159 | Sphinx makes this easy, 160 | let's take a look. 161 | 162 | We'll go ahead and create an ``api.rst`` that will hold our API reference: 163 | 164 | .. literalinclude:: crawler/docs/step2/api.txt 165 | :language: rst 166 | :linenos: 167 | 168 | .. note:: 169 | Live Preview: :doc:`crawler/docs/step2/api` 170 | 171 | Remember, you'll need to use the ``.. autoclass::`` directive to pull in your source code. 172 | This will render the docstrings of your Python code nicely. 173 | 174 | Requirements 175 | ------------ 176 | 177 | In order to build your code, 178 | it needs to be able to import it. 179 | This means it needs all of the required Python modules you import in the code. 180 | 181 | If you have third party dependencies, 182 | that means that you have to have them installed in your Python environment. 183 | Luckily, 184 | for most cases you can actually mock these variables using `autodoc_mock_imports `_. 185 | 186 | In your ``conf.py`` go ahead and add: 187 | 188 | autodoc_mock_imports = ['bs4', 'requests'] 189 | 190 | This will allow your docs to import the example code without requiring those modules be installed. 191 | 192 | Tell Sphinx about your code 193 | --------------------------- 194 | 195 | When Sphinx runs autodoc, 196 | it imports your Python code to pull off the docstrings. 197 | This means that Sphinx has to be able to see your code. 198 | We'll need to add our ``PYTHONPATH`` to our ``conf.py`` so it can import the code. 199 | 200 | If you open up your ``conf.py`` file, 201 | you should see something close to this on line 18: 202 | 203 | .. literalinclude:: crawler/docs/step1/conf.py 204 | :lines: 18-21 205 | 206 | As it notes, 207 | you need to let it know the path to your Python source. 208 | In our example it will be ``../src/``, 209 | so go ahead and put that in this setting. 210 | 211 | .. note:: You should always use relative paths here. 212 | Part of the value of Sphinx is having your docs build on other people's computers, 213 | and if you hard code local paths that won't work! 214 | 215 | Try it out 216 | ~~~~~~~~~~ 217 | 218 | Now go ahead and regenerate your docs and look at the magic that happened:: 219 | 220 | make html 221 | 222 | Your Python docstrings have been magically imported into the project. 223 | 224 | Tie it all together 225 | ------------------- 226 | 227 | Now let's link directly to that for users who come in to the project. 228 | Update your ``index.rst`` to look like: 229 | 230 | .. literalinclude:: crawler/docs/step2/index.txt 231 | :language: rst 232 | :linenos: 233 | 234 | .. note:: 235 | Live Preview: :doc:`crawler/docs/step2/index` 236 | 237 | One last time, 238 | let's rebuild those docs:: 239 | 240 | make html 241 | 242 | .. warning:: You now have awesome documentation! :) 243 | 244 | Now you have a beautiful documentation reference that is coming directly from your code. 245 | This means that every time you change your code, 246 | it will automatically be reflected in your documentation. 247 | 248 | The beauty of this approach is that it allows you to keep your prose and reference documentation in the same place. 249 | It even lets you semantically reference the code from inside the docs. 250 | This is amazingly powerful and a great way to write documentation. 251 | 252 | Extra Credit 253 | ************ 254 | 255 | Have some extra time left? 256 | Let's look through the code to understand what's happening here more. 257 | 258 | Look through intersphinx 259 | ------------------------ 260 | 261 | Intersphinx allows you to bring the power of Sphinx references to multiple projects. 262 | It lets you pull in references, 263 | and semantically link them across projects. 264 | For example, 265 | in this guide we reference the Sphinx docs a lot, 266 | so we have this intersphinx setting:: 267 | 268 | intersphinx_mapping = { 269 | 'sphinx': ('http://sphinx-doc.org/', None), 270 | } 271 | 272 | Which allows us to add a prefix to references and have them resolve: 273 | 274 | .. code-block:: rst 275 | 276 | :ref:`sphinx:inline-markup` 277 | 278 | We can also ignore the prefix, 279 | and Sphinx will fall back to intersphinx references if none exist in the current project: 280 | 281 | .. code-block:: rst 282 | 283 | :ref:`inline-markup` 284 | 285 | You can read more about this in the :mod:`~sphinx.ext.intersphinx` docs. 286 | 287 | Understand the code 288 | ------------------- 289 | 290 | A lot of the magic that is happening in `Importing Code`_ above is actually in the source code. 291 | 292 | Check out the code for ``crawler/main.py``: 293 | 294 | .. literalinclude:: crawler/src/crawler/main.py 295 | :linenos: 296 | 297 | As you can see, 298 | we're heavily using RST in our docstrings. 299 | This gives us the same power as we have in Sphinx, 300 | but allows it to live within the code base. 301 | 302 | This approach of having the docs live inside the code is great for some things. 303 | However, 304 | the power of Sphinx allows you to mix docstrings and prose documentation together. 305 | This lets you keep the amount of 306 | 307 | Moving on 308 | --------- 309 | 310 | Could it get better? 311 | In fact, 312 | it can and it will. 313 | Let's go on to :doc:`step-3`. 314 | -------------------------------------------------------------------------------- /step-3.rst: -------------------------------------------------------------------------------- 1 | Step 3: Keeping Documentation Up to Date 2 | ======================================== 3 | 4 | Now we have a wonderful set of documentation, 5 | so we want to make sure it stays up to date and correct. 6 | 7 | There are two factors here: 8 | 9 | * The documentation is up to date with the code 10 | * The user is seeing the latest version of the docs 11 | 12 | We will solve the first problem with Sphinx's :mod:`~sphinx.ext.doctest` module. 13 | The second problem we will solve by deploying our docs to `Read the Docs`_. 14 | 15 | .. _step-3-concepts: 16 | 17 | Concepts 18 | ******** 19 | 20 | Testing your code 21 | ----------------- 22 | 23 | Sphinx ships with a ``doctest`` module which is quite powerful. 24 | It allows you to run tests against your code inside your docs. 25 | This means that you can verify all of the code examples work, 26 | so that your docs are always up to date with your code! 27 | 28 | .. warning:: This only works for Python currently. 29 | 30 | You can read the full Sphinx docs for :mod:`~sphinx.ext.doctest`, 31 | but here is a basic example: 32 | 33 | .. code-block:: rst 34 | 35 | .. doctest:: 36 | 37 | >>> sum(2, 2) 38 | 4 39 | 40 | When you run this example, 41 | Sphinx will validate the return is what is expected. 42 | 43 | If you need any other code to be run, 44 | but not output to the user, 45 | you can use ``testsetup``: 46 | 47 | 48 | .. code-block:: rst 49 | 50 | .. testsetup:: 51 | 52 | import os 53 | 54 | x = 4 55 | 56 | This will then be available in the examples that you actually show your user. 57 | 58 | Hosting docs on Read the Docs 59 | ----------------------------- 60 | 61 | Read the Docs (https://readthedocs.org) is an open source doc hosting site. 62 | It's built in Django, 63 | and is free to use for open source projects. 64 | It hosts Sphinx documentation, 65 | automatically building it each time you make a commit. 66 | 67 | Read the Docs gives you a number of additional features, 68 | over hosting Sphinx yourself: 69 | 70 | * You can add Versions to your project for each tag & branch. 71 | * You can alerts for when your doc build fails 72 | * You can search across the full set of docs with Elastic Search 73 | 74 | We'll be putting your docs up on Read the Docs at the end of this tutorial. 75 | 76 | Tasks 77 | ***** 78 | 79 | Add doctests to our utils 80 | ------------------------- 81 | 82 | The utils module is inside ``crawler`` is a good candidate for testing. 83 | It has small, 84 | self-contained pieces of logic that will work great as doctests. 85 | 86 | Open your ``api.rst``, and update it to look like: 87 | 88 | .. literalinclude:: crawler/docs/step3/api.txt 89 | :language: rst 90 | :linenos: 91 | 92 | .. note:: 93 | Live Preview: :doc:`crawler/docs/step3/api` 94 | 95 | Now go ahead and add the RST markup that is covered above in the :ref:`step-3-concepts` section. 96 | 97 | As you can see here, 98 | we are actually testing our logic. 99 | It also acts as documentation for your users, 100 | and is included in the output of your documentation. 101 | 102 | These doctests do double duty, 103 | acting as **tests and documentation**. 104 | 105 | Caveats 106 | ~~~~~~~ 107 | 108 | Note that we have to import our code in the ``testsetup::`` block. 109 | This is so that Sphinx can call the functions properly in our doctest blocks. 110 | This is hidden in the output of the docs though, 111 | so users won't be confused. 112 | 113 | .. note:: You can also put doctest blocks directly in your docstrings. 114 | They will need to include full import paths though, 115 | as Sphinx can't guarantee the ``testsetup::`` directive will be called. 116 | 117 | Test your docs 118 | -------------- 119 | 120 | You can now go ahead and test your docs:: 121 | 122 | make doctest 123 | 124 | .. note:: You will need to make sure to add the ``sphinx.ext.doctest`` to your ``extensions``. 125 | Open up your ``conf.py`` file and make sure that you have it there. 126 | 127 | It should provide output that looks similar to this:: 128 | 129 | Doctest summary 130 | =============== 131 | 5 tests 132 | 2 failures in tests 133 | 0 failures in setup code 134 | 0 failures in cleanup code 135 | build finished with problems. 136 | 137 | As you can see, 138 | some of the tests are broken! 139 | You should go ahead and fix the tests :) 140 | 141 | Requirements 142 | ------------ 143 | 144 | In order for Read the Docs to build your code, 145 | it needs to be able to import it. 146 | This means it needs all of the required Python modules you import in the code. 147 | 148 | You can add a ``requirements.txt`` to the top-level of your project: 149 | 150 | .. literalinclude:: requirements.txt 151 | 152 | Read the Docs 153 | ------------- 154 | 155 | Last but not least, 156 | once you've written your documentation you have to put it somewhere for the world to see! 157 | Read the Docs makes this quite simple, 158 | and is free for all open source projects. 159 | 160 | * Register for an account at http://readthedocs.org 161 | * Click the *Import Project* button 162 | * Add the URL for a specific repository you want to build docs for 163 | * Sit back and have a drink while Read the Docs does the rest. 164 | 165 | It will: 166 | 167 | * Pull down your code 168 | * Install your ``requirements.txt`` 169 | * Build HTML, PDF, and ePub of your docs 170 | * Serve it up online at ``http://.readthedocs.org`` 171 | 172 | 173 | 174 | Extra Credit 175 | ************ 176 | 177 | Have some extra time left? 178 | Let's run the code and see if it actually works! 179 | 180 | Explore doctests more 181 | --------------------- 182 | 183 | Sphinx's :mod:`~sphinx.ext.doctest` module has more interesting options. 184 | You can do things that look more like normal unit tests, 185 | as well as specific "doctest-style" testing. 186 | Go in and re-write one of the existing tests to use the ``testcode`` directive instead of the ``doctest`` directive. 187 | 188 | Run the crawler 189 | --------------- 190 | 191 | Go ahead and run the crawler against the Read the Docs documentation:: 192 | 193 | # in crawler/src/crawler 194 | python main.py -u https://docs.readthedocs.org/en/latest/ 195 | 196 | You should see your terminal start printing output, 197 | if your internet if working. 198 | 199 | Can you add another command line option, 200 | and document it? 201 | 202 | Moving on 203 | --------- 204 | 205 | Now we are at the last part of our Tutorial. 206 | Let's head on over to :doc:`finish`. -------------------------------------------------------------------------------- /useful-links.rst: -------------------------------------------------------------------------------- 1 | Useful Links 2 | ============ 3 | 4 | During Tutorial 5 | --------------- 6 | 7 | * Local ``rsted`` instance: http://localhost:5000/ 8 | 9 | Documentation 10 | ------------- 11 | 12 | * Docutils documentation: docutils.sourceforge.net/docs/ 13 | * Sphinx Documentation: http://sphinx-doc.org 14 | * Read the Docs Documentation: https://docs.readthedocs.org 15 | 16 | Third party links 17 | ----------------- 18 | 19 | --------------------------------------------------------------------------------