├── .gitignore ├── ChangeLog ├── LICENSE ├── MANIFEST.in ├── Makefile ├── README.rst ├── bin ├── copyexample └── stresstester ├── docs ├── Makefile └── source │ ├── conf.py │ └── index.rst ├── example ├── __init__.py ├── certs │ ├── ca.cert │ ├── ca.key │ ├── server.cert │ └── server.key └── example.py ├── setup.py ├── tests ├── __init__.py ├── httpserver_test.py └── test.py └── zygote ├── __init__.py ├── _httpserver.py ├── _httpserver_2.py ├── accounting.py ├── handlers.py ├── main.py ├── master.py ├── message.py ├── resources ├── __init__.py ├── static │ ├── __init__.py │ ├── base.css │ ├── favicon.ico │ ├── main.js │ ├── mustache.js │ ├── reset-min.css │ └── template.html └── templates │ ├── __init__.py │ └── home.html ├── util.py └── worker.py /.gitignore: -------------------------------------------------------------------------------- 1 | *.py[co] 2 | *~ 3 | current 4 | tmp_* 5 | zygote.egg-info 6 | docs/build 7 | -------------------------------------------------------------------------------- /ChangeLog: -------------------------------------------------------------------------------- 1 | 2013-12-30 David Selassie 2 | 3 | * Version is now pegged in setup.py 4 | * Package is actually declared in setup.py 5 | * Resources now live inside the package and are distributed 6 | 7 | 2013-07-17 Baris Metin 8 | 9 | * merge @tranminhh's patch to sanitize headers 10 | * Bump version to 0.5.2 11 | 12 | 2013-05-29 Baris Metin 13 | 14 | * Remove redundant sys.path.insert from zygote worker 15 | * Bump version to 0.5.1 16 | 17 | 2013-05-24 Baris Metin 18 | 19 | * Add load .pth files from basepath at zygote initialization time 20 | * Bump version to 0.5 21 | 22 | 2013-05-09 Baris Metin 23 | 24 | * Merge David Selassie's install-ioloop branch (worker installs its ioloop as the global ioloop) 25 | * bump version to 0.4 26 | * ChangeLog format is now GNU style 27 | 28 | 2013-04-30 Baris Metin 29 | 30 | * fix remote_ip in status handler 31 | * bump version to 0.3 32 | 33 | 2013-01-02 Eskil Olsen 34 | 35 | * Add a --name/-n option to set proctitle name (eskil@eskil.org). 36 | * bump version to 0.2 37 | 38 | 2012-11-14 Eskil Olsen 39 | 40 | * Starting a ChangeLog file (eskil@yelp.com) 41 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | 179 | APPENDIX: How to apply the Apache License to your work. 180 | 181 | To apply the Apache License to your work, attach the following 182 | boilerplate notice, with the fields enclosed by brackets "[]" 183 | replaced with your own identifying information. (Don't include 184 | the brackets!) The text should be enclosed in the appropriate 185 | comment syntax for the file format. We also recommend that a 186 | file or class name and description of purpose be included on the 187 | same "printed page" as the copyright notice for easier 188 | identification within third-party archives. 189 | 190 | Copyright [yyyy] [name of copyright owner] 191 | 192 | Licensed under the Apache License, Version 2.0 (the "License"); 193 | you may not use this file except in compliance with the License. 194 | You may obtain a copy of the License at 195 | 196 | http://www.apache.org/licenses/LICENSE-2.0 197 | 198 | Unless required by applicable law or agreed to in writing, software 199 | distributed under the License is distributed on an "AS IS" BASIS, 200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 201 | See the License for the specific language governing permissions and 202 | limitations under the License. 203 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | recursive-include zygote/resources * 2 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: default pyflakes clean test production docs 2 | 3 | default: docs 4 | 5 | pyflakes: 6 | find zygote tests -name '*.py' -print0 | xargs -0 pyflakes 7 | 8 | clean: 9 | find . -name '*.py[co]' -delete 10 | rm -rf tmp_* current 11 | 12 | test: 13 | @testify -v tests 14 | 15 | production: 16 | 17 | docs: 18 | make -C docs html 19 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | docs/source/index.rst -------------------------------------------------------------------------------- /bin/copyexample: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import os 4 | import shutil 5 | import signal 6 | import sys 7 | 8 | if __name__ == '__main__': 9 | revision = os.urandom(4).encode('hex') 10 | directory = './tmp_%s' % revision 11 | os.makedirs(directory) 12 | shutil.copyfile('example/example.py', os.path.join(directory, 'example.py')) 13 | if os.path.exists('current'): 14 | os.unlink('current') 15 | os.symlink(directory, 'current') 16 | if len(sys.argv) >= 2: 17 | os.kill(int(sys.argv[1]), signal.SIGHUP) 18 | print directory 19 | -------------------------------------------------------------------------------- /bin/stresstester: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import optparse 4 | import sys 5 | import time 6 | 7 | import tornado.ioloop 8 | import tornado.httpclient 9 | 10 | class ClientLoop(object): 11 | 12 | def __init__(self, url, io_loop, delay=0): 13 | self.url = url 14 | self.delay = delay 15 | self.http_client = tornado.httpclient.AsyncHTTPClient(io_loop) 16 | 17 | def fetch(self): 18 | self.http_client.fetch(self.url, self.callback) 19 | 20 | def callback(self, response): 21 | io_loop = self.http_client.io_loop 22 | if self.delay: 23 | io_loop.add_timeout(time.time() + self.delay, self.fetch) 24 | else: 25 | io_loop.add_callback(self.fetch) 26 | 27 | if __name__ == '__main__': 28 | parser = optparse.OptionParser() 29 | parser.add_option('-c', '--concurrency', type='int', default=4, help='The number of concurrent clients to run') 30 | parser.add_option('-d', '--delay', type='float', default=0, help='The delay between fetches') 31 | opts, args = parser.parse_args() 32 | if len(args) != 1: 33 | parser.error('must pass exactly one url as an arg') 34 | sys.exit(1) 35 | url = args[0] 36 | 37 | io_loop = tornado.ioloop.IOLoop.instance() 38 | for x in xrange(opts.concurrency): 39 | ClientLoop(url, io_loop, opts.delay).fetch() 40 | try: 41 | io_loop.start() 42 | except KeyboardInterrupt: 43 | pass 44 | 45 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | PAPER = 8 | BUILDDIR = build 9 | 10 | # Internal variables. 11 | PAPEROPT_a4 = -D latex_paper_size=a4 12 | PAPEROPT_letter = -D latex_paper_size=letter 13 | ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source 14 | 15 | .PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest 16 | 17 | help: 18 | @echo "Please use \`make ' where is one of" 19 | @echo " html to make standalone HTML files" 20 | @echo " dirhtml to make HTML files named index.html in directories" 21 | @echo " singlehtml to make a single large HTML file" 22 | @echo " pickle to make pickle files" 23 | @echo " json to make JSON files" 24 | @echo " htmlhelp to make HTML files and a HTML help project" 25 | @echo " qthelp to make HTML files and a qthelp project" 26 | @echo " devhelp to make HTML files and a Devhelp project" 27 | @echo " epub to make an epub" 28 | @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" 29 | @echo " latexpdf to make LaTeX files and run them through pdflatex" 30 | @echo " text to make text files" 31 | @echo " man to make manual pages" 32 | @echo " changes to make an overview of all changed/added/deprecated items" 33 | @echo " linkcheck to check all external links for integrity" 34 | @echo " doctest to run all doctests embedded in the documentation (if enabled)" 35 | 36 | clean: 37 | -rm -rf $(BUILDDIR)/* 38 | 39 | html: 40 | $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html 41 | @echo 42 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." 43 | 44 | dirhtml: 45 | $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml 46 | @echo 47 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." 48 | 49 | singlehtml: 50 | $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml 51 | @echo 52 | @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." 53 | 54 | pickle: 55 | $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle 56 | @echo 57 | @echo "Build finished; now you can process the pickle files." 58 | 59 | json: 60 | $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json 61 | @echo 62 | @echo "Build finished; now you can process the JSON files." 63 | 64 | htmlhelp: 65 | $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp 66 | @echo 67 | @echo "Build finished; now you can run HTML Help Workshop with the" \ 68 | ".hhp project file in $(BUILDDIR)/htmlhelp." 69 | 70 | qthelp: 71 | $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp 72 | @echo 73 | @echo "Build finished; now you can run "qcollectiongenerator" with the" \ 74 | ".qhcp project file in $(BUILDDIR)/qthelp, like this:" 75 | @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/Zygote.qhcp" 76 | @echo "To view the help file:" 77 | @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/Zygote.qhc" 78 | 79 | devhelp: 80 | $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp 81 | @echo 82 | @echo "Build finished." 83 | @echo "To view the help file:" 84 | @echo "# mkdir -p $$HOME/.local/share/devhelp/Zygote" 85 | @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/Zygote" 86 | @echo "# devhelp" 87 | 88 | epub: 89 | $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub 90 | @echo 91 | @echo "Build finished. The epub file is in $(BUILDDIR)/epub." 92 | 93 | latex: 94 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 95 | @echo 96 | @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." 97 | @echo "Run \`make' in that directory to run these through (pdf)latex" \ 98 | "(use \`make latexpdf' here to do that automatically)." 99 | 100 | latexpdf: 101 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 102 | @echo "Running LaTeX files through pdflatex..." 103 | make -C $(BUILDDIR)/latex all-pdf 104 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." 105 | 106 | text: 107 | $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text 108 | @echo 109 | @echo "Build finished. The text files are in $(BUILDDIR)/text." 110 | 111 | man: 112 | $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man 113 | @echo 114 | @echo "Build finished. The manual pages are in $(BUILDDIR)/man." 115 | 116 | changes: 117 | $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes 118 | @echo 119 | @echo "The overview file is in $(BUILDDIR)/changes." 120 | 121 | linkcheck: 122 | $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck 123 | @echo 124 | @echo "Link check complete; look for any errors in the above output " \ 125 | "or in $(BUILDDIR)/linkcheck/output.txt." 126 | 127 | doctest: 128 | $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest 129 | @echo "Testing of doctests in the sources finished, look at the " \ 130 | "results in $(BUILDDIR)/doctest/output.txt." 131 | -------------------------------------------------------------------------------- /docs/source/conf.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Zygote documentation build configuration file, created by 4 | # sphinx-quickstart on Fri Jun 24 18:18:33 2011. 5 | # 6 | # This file is execfile()d with the current directory set to its containing dir. 7 | # 8 | # Note that not all possible configuration values are present in this 9 | # autogenerated file. 10 | # 11 | # All configuration values have a default; values that are commented out 12 | # serve to show the default. 13 | 14 | import sys, os 15 | 16 | # If extensions (or modules to document with autodoc) are in another directory, 17 | # add these directories to sys.path here. If the directory is relative to the 18 | # documentation root, use os.path.abspath to make it absolute, like shown here. 19 | #sys.path.insert(0, os.path.abspath('.')) 20 | 21 | # -- General configuration ----------------------------------------------------- 22 | 23 | # If your documentation needs a minimal Sphinx version, state it here. 24 | #needs_sphinx = '1.0' 25 | 26 | # Add any Sphinx extension module names here, as strings. They can be extensions 27 | # coming with Sphinx (named 'sphinx.ext.*') or your custom ones. 28 | extensions = [] 29 | 30 | # Add any paths that contain templates here, relative to this directory. 31 | templates_path = ['_templates'] 32 | 33 | # The suffix of source filenames. 34 | source_suffix = '.rst' 35 | 36 | # The encoding of source files. 37 | #source_encoding = 'utf-8-sig' 38 | 39 | # The master toctree document. 40 | master_doc = 'index' 41 | 42 | # General information about the project. 43 | project = u'Zygote' 44 | copyright = u'2011, Evan Klitzke' 45 | 46 | # The version info for the project you're documenting, acts as replacement for 47 | # |version| and |release|, also used in various other places throughout the 48 | # built documents. 49 | # 50 | # The short X.Y version. 51 | version = '0.1.0' 52 | # The full version, including alpha/beta/rc tags. 53 | release = '0.1.0' 54 | 55 | # The language for content autogenerated by Sphinx. Refer to documentation 56 | # for a list of supported languages. 57 | #language = None 58 | 59 | # There are two options for replacing |today|: either, you set today to some 60 | # non-false value, then it is used: 61 | #today = '' 62 | # Else, today_fmt is used as the format for a strftime call. 63 | #today_fmt = '%B %d, %Y' 64 | 65 | # List of patterns, relative to source directory, that match files and 66 | # directories to ignore when looking for source files. 67 | exclude_patterns = [] 68 | 69 | # The reST default role (used for this markup: `text`) to use for all documents. 70 | #default_role = None 71 | 72 | # If true, '()' will be appended to :func: etc. cross-reference text. 73 | #add_function_parentheses = True 74 | 75 | # If true, the current module name will be prepended to all description 76 | # unit titles (such as .. function::). 77 | #add_module_names = True 78 | 79 | # If true, sectionauthor and moduleauthor directives will be shown in the 80 | # output. They are ignored by default. 81 | #show_authors = False 82 | 83 | # The name of the Pygments (syntax highlighting) style to use. 84 | pygments_style = 'sphinx' 85 | 86 | # A list of ignored prefixes for module index sorting. 87 | #modindex_common_prefix = [] 88 | 89 | 90 | # -- Options for HTML output --------------------------------------------------- 91 | 92 | # The theme to use for HTML and HTML Help pages. See the documentation for 93 | # a list of builtin themes. 94 | html_theme = 'default' 95 | 96 | # Theme options are theme-specific and customize the look and feel of a theme 97 | # further. For a list of options available for each theme, see the 98 | # documentation. 99 | #html_theme_options = {} 100 | 101 | # Add any paths that contain custom themes here, relative to this directory. 102 | #html_theme_path = [] 103 | 104 | # The name for this set of Sphinx documents. If None, it defaults to 105 | # " v documentation". 106 | #html_title = None 107 | 108 | # A shorter title for the navigation bar. Default is the same as html_title. 109 | #html_short_title = None 110 | 111 | # The name of an image file (relative to this directory) to place at the top 112 | # of the sidebar. 113 | #html_logo = None 114 | 115 | # The name of an image file (within the static path) to use as favicon of the 116 | # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 117 | # pixels large. 118 | #html_favicon = None 119 | 120 | # Add any paths that contain custom static files (such as style sheets) here, 121 | # relative to this directory. They are copied after the builtin static files, 122 | # so a file named "default.css" will overwrite the builtin "default.css". 123 | html_static_path = ['_static'] 124 | 125 | # If not '', a 'Last updated on:' timestamp is inserted at every page bottom, 126 | # using the given strftime format. 127 | #html_last_updated_fmt = '%b %d, %Y' 128 | 129 | # If true, SmartyPants will be used to convert quotes and dashes to 130 | # typographically correct entities. 131 | #html_use_smartypants = True 132 | 133 | # Custom sidebar templates, maps document names to template names. 134 | #html_sidebars = {} 135 | 136 | # Additional templates that should be rendered to pages, maps page names to 137 | # template names. 138 | #html_additional_pages = {} 139 | 140 | # If false, no module index is generated. 141 | #html_domain_indices = True 142 | 143 | # If false, no index is generated. 144 | #html_use_index = True 145 | 146 | # If true, the index is split into individual pages for each letter. 147 | #html_split_index = False 148 | 149 | # If true, links to the reST sources are added to the pages. 150 | #html_show_sourcelink = True 151 | 152 | # If true, "Created using Sphinx" is shown in the HTML footer. Default is True. 153 | #html_show_sphinx = True 154 | 155 | # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. 156 | #html_show_copyright = True 157 | 158 | # If true, an OpenSearch description file will be output, and all pages will 159 | # contain a tag referring to it. The value of this option must be the 160 | # base URL from which the finished HTML is served. 161 | #html_use_opensearch = '' 162 | 163 | # This is the file name suffix for HTML files (e.g. ".xhtml"). 164 | #html_file_suffix = None 165 | 166 | # Output file base name for HTML help builder. 167 | htmlhelp_basename = 'Zygotedoc' 168 | 169 | 170 | # -- Options for LaTeX output -------------------------------------------------- 171 | 172 | # The paper size ('letter' or 'a4'). 173 | #latex_paper_size = 'letter' 174 | 175 | # The font size ('10pt', '11pt' or '12pt'). 176 | #latex_font_size = '10pt' 177 | 178 | # Grouping the document tree into LaTeX files. List of tuples 179 | # (source start file, target name, title, author, documentclass [howto/manual]). 180 | latex_documents = [ 181 | ('index', 'Zygote.tex', u'Zygote Documentation', 182 | u'Evan Klitzke', 'manual'), 183 | ] 184 | 185 | # The name of an image file (relative to this directory) to place at the top of 186 | # the title page. 187 | #latex_logo = None 188 | 189 | # For "manual" documents, if this is true, then toplevel headings are parts, 190 | # not chapters. 191 | #latex_use_parts = False 192 | 193 | # If true, show page references after internal links. 194 | #latex_show_pagerefs = False 195 | 196 | # If true, show URL addresses after external links. 197 | #latex_show_urls = False 198 | 199 | # Additional stuff for the LaTeX preamble. 200 | #latex_preamble = '' 201 | 202 | # Documents to append as an appendix to all manuals. 203 | #latex_appendices = [] 204 | 205 | # If false, no module index is generated. 206 | #latex_domain_indices = True 207 | 208 | 209 | # -- Options for manual page output -------------------------------------------- 210 | 211 | # One entry per manual page. List of tuples 212 | # (source start file, name, description, authors, manual section). 213 | man_pages = [ 214 | ('index', 'zygote', u'Zygote Documentation', 215 | [u'Evan Klitzke'], 1) 216 | ] 217 | -------------------------------------------------------------------------------- /docs/source/index.rst: -------------------------------------------------------------------------------- 1 | Zygote 2 | ====== 3 | 4 | Zygote is a Python program that assists in running pre-forked Python web 5 | applications. The problem it attempts to solve is the ability to deploy new 6 | code, and have HTTP workers efficiently move over to serving the new code, 7 | without causing any service interruptions. 8 | 9 | Let's say you're serving an application, and the currently deployed version is 10 | called `A`. You're trying to deploy a new version of your web app, and that 11 | version is called `B`. The ideal way this would work is like so: 12 | 13 | * A new Python interpreter `P` starts up, imports code from `B` and does all of 14 | the static initialization and loads modules. This process will only happen 15 | once. 16 | 17 | * New HTTP workers are created by forking `P`. Due to the use of forking, new 18 | workers don't need to reimport lots of code (so starting a worker is cheap), 19 | and workers can share static data structures (so starting a new worker 20 | consumes significantly less memory). 21 | 22 | * In progress requests that are being run from the `A` version of the code 23 | should be allowed to complete, and not be interrupted; deploying new code 24 | should not cause anyone to get an HTTP 500 response, or even be noticeable by 25 | users. However, as requests from `A` complete, they should exit and allow `B` 26 | workers to be spawned. 27 | 28 | * The deploy code needs to be cognizant of how many HTTP workers the system is 29 | capable of running (usually this means don't run more workers than you have 30 | RAM allocated for), so if a machine is capable of supporting 200 workers, and 31 | 100 of them are serving requests for `A` at the time of the deploy, at first 32 | the 100 idle `A` workers can be killed and 100 `B` workers can be spawned, 33 | and then `A` workers are killed and `B` workers are spawned as the `A` 34 | workers complete their requests. 35 | 36 | This is what Zygote does. New code deployments happen more quickly, use less CPU 37 | and disk at startup, and workers use less memory. Zygote has an embedded HTTP 38 | server based on the one provided by Tornado, but this is complementary to a 39 | real, full-fledged HTTP server like Apache or Nginx -- Zygote's expertise is 40 | just in managing Python web processes. It's OK to run Apache or Nginx in front 41 | of Zygote. 42 | 43 | Zygote is known to work with Python 2.5+. Zygote has been tested with PyPy 1.5+, 44 | and should work fine. 45 | 46 | Zygote is licensed under the `Apache Licence, Version 2.0 47 | `_. You should find a copy of 48 | this license along with the Zygote source, in the ``LICENSE`` file. 49 | 50 | How It Works 51 | ------------ 52 | 53 | The concept of "zygote" processes on Unix systems is not new; see Chromium's 54 | `LinuxZygote `_ wiki page for 55 | a description of how they're used in the Chromium browser. In the Zygote process 56 | model there is a process tree that looks something like this:: 57 | 58 | zygote-master 59 | \ 60 | `--- zygote A 61 | | `--- worker 62 | | --- worker 63 | | 64 | `--- zygote B 65 | `--- worker 66 | --- worker 67 | 68 | (Some other zygote models like those used by HAProxy and Chrome have a slightly 69 | different, flatter process tree, but the diagram shows how it works in Zygote). 70 | 71 | When the master zygote process wants to spawn a copy of `B`, it forks, and the 72 | forked process, the `B` zygote, can then fork again to create workers. Because 73 | the workers are created using the ``fork(2)`` system call, the zygotes can 74 | import Python modules once and the workers spawned will automatically have all 75 | of the code available to them, initialized and in memory. Not only is this 76 | faster, it also saves a lot of memory compared to reimporting the code multiple 77 | times, and having identical pages in memory that are unshared. 78 | 79 | Transitioning code from `A` to `B` as described in the previous section consists 80 | of the master killing idle workers and instructing the appropriate zygote to 81 | fork. 82 | 83 | Internally, communication between the different processes is done using abstract 84 | unix domain sockets. 85 | 86 | If you use a command like ``pstree`` or ``ps -eFH`` you can verify that the process 87 | tree looks as expected. Additionally, if you have the `setproctitle` Python 88 | module available, the processes will set their titles such that it's easy to see 89 | what version of the code everything is running. 90 | 91 | How to Use It 92 | ------------- 93 | 94 | To use Zygote, you need to write a module that implements a `get_application()` 95 | method. That method can take any number of string arguments, and must return an 96 | object that can be used by a `Tornado `_ 97 | ``HTTPServer`` object (typically this would be an instance of 98 | ``tornado.web.Application``). Any extra arguments passed to ``zygote`` on the 99 | command line will be fed in as positional arguments to `get_application()`, so 100 | you can pass in extra data (e.g. the path to a config file) using this 101 | mechanism; however, it is strongly encouraged that any arguments to 102 | `get_application()` be made optional arguments, since the ``zygote`` command 103 | line tool doesn't have any knowledge of the expected arguments and cannot 104 | display useful help or error messages to users. 105 | 106 | Your application can be a "pure" Tornado web application, or a WSGI 107 | application. If you're using WSGI, make sure you first wrap the application 108 | using ``tornado.wsgi.WSGIContainer``. 109 | 110 | After that, an invocation of Zygote would be done like this:: 111 | 112 | python -m zygote.main -p 8000 -b ./example -m example 113 | 114 | Let's break that down. The ``python -m zygote.main`` part instructs Python to 115 | run Zygote's `main` module. The parts after that are options and arguments. The 116 | ``-p 8000`` option instructs Zygote that your application will be served from 117 | port 8000. The ``-b ./example`` option states that the symlink for your 118 | application exists at ``./example``. This does not strictly need to be a symlink, 119 | but the code versioning will only work if it is a symlink. The final option is 120 | ``-m example`` and that states that the module name for the application is 121 | ``example``. 122 | 123 | The example invocation given above will work if you run it from a clone of the 124 | Zygote source code. The ``-b`` option tells Zygote what to insert into `sys.path` 125 | to make your code runnable, and in the Zygote source tree there's a file named 126 | ``example/example.py``. In other words, `example` gets added to `sys.path` and 127 | that makes ``example.py`` importable by doing ``import example``. 128 | 129 | Caveats 130 | ------- 131 | 132 | Zygote is essentially a wrapper around Tornado's HTTP server, and therefore only 133 | works with Tornado applications. This means your application must be a valid 134 | Tornado application to use Zygote. Note, however, that this does *not* preclude 135 | you can from using WSGI applications with Zygote. On the contrary, you can use 136 | Tornado's ``tornado.wsgi.WSGIContainer`` class to wrap a WSGI application for 137 | use with Zygote. 138 | 139 | Your application must be fork-safe to use Zygote. That means that it's best if 140 | creating non-forksafe resources such as database connections is not done as a 141 | side-effect of importing your code, and only done upon initialization of the 142 | code. If you *do* have non-forksafe resources in your code, you need to write 143 | code that reinitializes those resources when the application is instantiated (or 144 | by detecting when the current PID changes). 145 | 146 | Zygote supports IPv4 only. Support for IPv6 should be easy to add (Tornado 147 | already supports it), if there's a need. 148 | 149 | The Zygote project is developed by `Yelp `_, but 150 | Zygote is not currently considered stable enough to run as the frontend for the 151 | main site (i.e. http://www.yelp.com/ is not running on Zygote). At Yelp we are 152 | using Zygote for other internal services. When the main site runs off of Zygote, 153 | you're sure to hear about it in a blog post or other announcement. 154 | 155 | Testing 156 | ------- 157 | 158 | There are unit tests, which exist in the ``tests`` directory. You should be able 159 | to run them by invoking ``make test``, e.g.:: 160 | 161 | evan@zeno ~/code/zygote (master) $ make test 162 | tests.test ZygoteTests.test_http_get ... ok in 2.53s 163 | 164 | PASSED. 1 test / 1 case: 1 passed (0 unexpected), 0 failed (0 expected). (Total test time 2.53s) 165 | 166 | Some caveats. You need a very recent version of Tornado to run the tests. This 167 | is to force Tornado to use the "simple" http client. Hopefully the API will be 168 | stable going forward from Tornado 0.2.0. 169 | 170 | You will also need `Testify `_ to run the 171 | tests. Any version of Testify should work. 172 | -------------------------------------------------------------------------------- /example/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Yelp/zygote/9798a4a66747737888950dc923ffc890f2831818/example/__init__.py -------------------------------------------------------------------------------- /example/certs/ca.cert: -------------------------------------------------------------------------------- 1 | -----BEGIN CERTIFICATE----- 2 | MIICQTCCAaoCCQDOnsDkrUiGaDANBgkqhkiG9w0BAQUFADBlMQswCQYDVQQGEwJV 3 | UzELMAkGA1UECBMCQ0ExFjAUBgNVBAcTDVNhbiBGcmFuY2lzY28xEzARBgNVBAoT 4 | ClllbHAsIEluYy4xHDAaBgkqhkiG9w0BCQEWDWZha2VAeWVscC5jb20wHhcNMTIw 5 | MzI4MjE1MzI0WhcNMTMwMzI4MjE1MzI0WjBlMQswCQYDVQQGEwJVUzELMAkGA1UE 6 | CBMCQ0ExFjAUBgNVBAcTDVNhbiBGcmFuY2lzY28xEzARBgNVBAoTClllbHAsIElu 7 | Yy4xHDAaBgkqhkiG9w0BCQEWDWZha2VAeWVscC5jb20wgZ8wDQYJKoZIhvcNAQEB 8 | BQADgY0AMIGJAoGBAMghIEzv4fr6sRjQY+kHq82zpjEfy0b2i/fDZM1ne43mxOzO 9 | LJ7NlWMwdRf9A3b9ZAA/dC1t85JezSkPx98y0zz1iSVhLJoFoAR4Pa5SHUSNuZvd 10 | hvAvES/swPUlMoawm41Qyg9PTTcvz8MWjnx7o749KE7wrCb9FAtF9IZJddCHAgMB 11 | AAEwDQYJKoZIhvcNAQEFBQADgYEAx91P6rcMxYrYMAn9EkjrAYG4fFrodp8jWX2M 12 | ujp3vvEKenj3nQAyq0tIcOIYi46Irb6aBT9X6jie0lshEvdnEkEaNbP96DfqXXBQ 13 | 0PBHNRLDryXKXkKG4nNmm0YA5oz4MWMJ4/4Y4Wuul0iwRjq9ygCoQ+cg+EwPX56s 14 | 7iYoNqY= 15 | -----END CERTIFICATE----- 16 | -------------------------------------------------------------------------------- /example/certs/ca.key: -------------------------------------------------------------------------------- 1 | -----BEGIN RSA PRIVATE KEY----- 2 | MIICXQIBAAKBgQDIISBM7+H6+rEY0GPpB6vNs6YxH8tG9ov3w2TNZ3uN5sTsziye 3 | zZVjMHUX/QN2/WQAP3QtbfOSXs0pD8ffMtM89YklYSyaBaAEeD2uUh1Ejbmb3Ybw 4 | LxEv7MD1JTKGsJuNUMoPT003L8/DFo58e6O+PShO8Kwm/RQLRfSGSXXQhwIDAQAB 5 | AoGBALhJcNIIL4MK6JueAfKrIrSIEqi3y1tsWxVrM17GRQs1ju81J/eP/llZXOob 6 | Dd67lSN5SwUuc5W8gJWoN3g7+DFOT7NitV2lL/JEoO9p2PR9LTAO9ECY8XRygUCE 7 | DvcUmVEuKQmltj/gzSoGfwQ32FOVNTr56JwxZ7+AaKC/35nJAkEA+dCXt9S1Yyok 8 | 96Ws3qWyvo2hcLTq3ZroHnWTQpDLPWjNuZYJEd7hmFRPxU+JcU0j9cIISj1eTPIY 9 | 8d6vG9+bhQJBAM0VnHofXfdKD1DMYrdzsOKy9RnMjgP9hI46mu4mpnVOCKlCrVIE 10 | j99Lc/sF4hCEPY0Jb1ji+uvvvAntHvi/O5sCQDXiQFdukhFprb+dBMShiQSBGClv 11 | XJmgKCEpyzG4eZ4tVPKK4jnwkUiCJxKwLT07Hl6ME62vvv9p2OeS2MJyYOECQE4N 12 | zptQOuQ2ZLPcximKN6VgdRaXmul74Kp9NaA0R6BXzcYV4X9YyyUyQ3cjmxGsMvzt 13 | vVo4MUYA3TFt9R65MccCQQCDPzvzl4jLecGpKQrSqfY6bkEbrM0s+d3FG/bP+KzC 14 | kw4emei4iYJ6RC7CMB18uEScDD0Y+mjB6rJPE4GXzjcG 15 | -----END RSA PRIVATE KEY----- 16 | -------------------------------------------------------------------------------- /example/certs/server.cert: -------------------------------------------------------------------------------- 1 | -----BEGIN CERTIFICATE----- 2 | MIICbjCCAdcCCQD/wqBrOU2+kTANBgkqhkiG9w0BAQUFADBlMQswCQYDVQQGEwJV 3 | UzELMAkGA1UECBMCQ0ExFjAUBgNVBAcTDVNhbiBGcmFuY2lzY28xEzARBgNVBAoT 4 | ClllbHAsIEluYy4xHDAaBgkqhkiG9w0BCQEWDWZha2VAeWVscC5jb20wHhcNMTIw 5 | MzI4MjE1ODI2WhcNMTMwMzI4MjE1ODI2WjCBkTELMAkGA1UEBhMCVVMxCzAJBgNV 6 | BAgTAkNBMRYwFAYDVQQHEw1TYW4gRnJhbmNpc2NvMRMwEQYDVQQKEwpZZWxwLCBJ 7 | bmMuMRUwEwYDVQQLEwxSZXZlbnVlIFRlYW0xEjAQBgNVBAMTCWxvY2FsaG9zdDEd 8 | MBsGCSqGSIb3DQEJARYOYnJ5Y2VAeWVscC5jb20wgZ8wDQYJKoZIhvcNAQEBBQAD 9 | gY0AMIGJAoGBALA1ppWfixI8BiVRwvSChthxsQqVzWgwG5dFQi0xEu00yk6g3MMX 10 | kY3FOHpqAF77oTuhxmKhWQ858KzHags1s3Xyb2IUdKqI5QfsX0QL7rkCx+tHDiQe 11 | tmn1GeQxx97NgKCN1oMtiQ2ejYo/3U7CUayOFE/kRbaSzyr08pj0hmdXAgMBAAEw 12 | DQYJKoZIhvcNAQEFBQADgYEASP7MMJVqbPNKMEAV8X2lNBUB0672oS3p7pFiBf24 13 | Z2B2o3ZD4DhHbMMByClLA38EYs74eZjkihxd0e2Vf+lK8YSyZN99/AYiy/h1Uf7d 14 | qchqbMZXiE0D7j4J7G0+jr/10nEJdj/DEXNuYSxzB6Us1sq6G3drM9hK6Tt5Iufz 15 | hcQ= 16 | -----END CERTIFICATE----- 17 | -------------------------------------------------------------------------------- /example/certs/server.key: -------------------------------------------------------------------------------- 1 | -----BEGIN RSA PRIVATE KEY----- 2 | MIICXAIBAAKBgQCwNaaVn4sSPAYlUcL0gobYcbEKlc1oMBuXRUItMRLtNMpOoNzD 3 | F5GNxTh6agBe+6E7ocZioVkPOfCsx2oLNbN18m9iFHSqiOUH7F9EC+65AsfrRw4k 4 | HrZp9RnkMcfezYCgjdaDLYkNno2KP91OwlGsjhRP5EW2ks8q9PKY9IZnVwIDAQAB 5 | AoGAQEYBkngUgT01vK0bIJbv2sl1m3fS6dsKZV4U6mkJD07/MDkK7XqOVBkJJdW+ 6 | dubwA9FZjKZv1tb6i/tdGeOnppXFaAZ18cQCdDgr6Oj0GWWVJnZ46EI1zX4QFvdr 7 | tP0pkDDukgG8Q0Sl4C57U44WtaRem1gVm/nhrLvl5uBrBuECQQDkKPxdHLnscW8O 8 | PDwg7lQtWWPL/177OAP2uQultRXVarDoBqskSxxTpXvCdRxOb2+HIEXS9ixdrG+6 9 | zsR6Z+rRAkEAxbXkOUQgLmcmmGtkIpeXX4em9WpYmeG3ptNWCoeH4X6PUH7R4AfX 10 | IZ8bu6955ihB0DH5DeJhvlVIP6B8XsTppwJAN9EzFBBwB8EkeyYPS7siismgmYqL 11 | EQh+J8DTcaGgisqEJu9itQlPD8OfSE5gM2wdq8AgdODWr7/8wYXOGWgM0QJAMXUA 12 | tDqQeksfYn1qvSSCn0kFwNprc4L9N+Qh39xrZ0MLgq1Wvt33ONfeTiLlMWfcnsIB 13 | dTGuFbirrA7vTZ2gfQJBAJhCllG0fQs2g4ppjYqPQPUzHdTBfSm17gsb5OT8slXp 14 | Tnnc6hNaJvnR2dh2gOxgQXLjNwGC8ShWW1MvjRbhtjU= 15 | -----END RSA PRIVATE KEY----- 16 | -------------------------------------------------------------------------------- /example/example.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import time 3 | import tornado.web 4 | 5 | start_time = time.time() 6 | log = logging.getLogger('example') 7 | log.debug('started up') 8 | 9 | class StatusHandler(tornado.web.RequestHandler): 10 | 11 | def get(self): 12 | self.content_type = 'text/plain' 13 | self.write('uptime: %1.3f\n' % (time.time() - start_time)) 14 | 15 | def initialize(*args, **kwargs): 16 | pass 17 | 18 | def get_application(*args, **kwargs): 19 | log.debug('creating application for \'example\'') 20 | return tornado.web.Application([('/', StatusHandler)], debug=False) 21 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from setuptools import find_packages 4 | from setuptools import setup 5 | 6 | 7 | setup( 8 | name = 'zygote', 9 | version = '0.5.3', 10 | author = 'Evan Klitzke', 11 | author_email = 'evan@eklitzke.org', 12 | description = 'A tornado HTTP worker management tool', 13 | license = 'Apache License 2.0', 14 | entry_points = {'console_scripts': 'zygote = zygote.main:main'}, 15 | packages = find_packages(exclude=['tests']), 16 | install_requires = ['setuptools', 'tornado'], 17 | include_package_data = True, 18 | ) 19 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Yelp/zygote/9798a4a66747737888950dc923ffc890f2831818/tests/__init__.py -------------------------------------------------------------------------------- /tests/httpserver_test.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import testify as T 3 | 4 | from zygote._httpserver import HTTPRequest as HTTPRequest_1 5 | from zygote._httpserver_2 import HTTPRequest as HTTPRequest_2 6 | 7 | 8 | class HTTPRequestReprTest(T.TestCase): 9 | 10 | def test_http_request_repr_does_not_show_body_or_auth_headers(self): 11 | self._verify_safe_repr(HTTPRequest_1) 12 | 13 | def test_http_request_2_repr_does_not_show_body_or_auth_headers(self): 14 | self._verify_safe_repr(HTTPRequest_2) 15 | 16 | def _verify_safe_repr(self, http_request_cls): 17 | request = http_request_cls( 18 | 'POST', '/path', 19 | version='HTTP/1.1', 20 | remote_ip='127.0.0.1', 21 | host='127.0.0.1', 22 | protocol='http', 23 | body='sensitive post information', 24 | headers={'Authorization': 'Basic credentials'}) 25 | 26 | T.assert_equal(repr(request), 27 | "HTTPRequest(protocol='http', host='127.0.0.1', method='POST', uri='/path', version='HTTP/1.1', remote_ip='127.0.0.1', headers={'Authorization': '***redacted***'})") 28 | 29 | 30 | if __name__ == '__main__': 31 | T.run() 32 | -------------------------------------------------------------------------------- /tests/test.py: -------------------------------------------------------------------------------- 1 | import errno 2 | import random 3 | import re 4 | import signal 5 | import socket 6 | import subprocess 7 | import sys 8 | import time 9 | import os 10 | 11 | import tornado.simple_httpclient 12 | from tornado.httpclient import HTTPRequest, HTTPClient, HTTPError 13 | 14 | from testify import * 15 | 16 | num_re = re.compile(r'\d+$') 17 | stat_re = re.compile(r'^(?P\d+) \((?P[^)]+)\) (?P[A-Z]) (?P\d+) (?P\d+) (?P\d+) (?P\d+) (?P-?\d+) (?P\d+) \d+ (\d+) \d+ \d+.*') 18 | 19 | class ZygoteTest(TestCase): 20 | 21 | __test__ = False 22 | 23 | USE_DEVNULL = True 24 | 25 | basedir = './example' 26 | control_port = None 27 | port = None 28 | protocol = 'http' 29 | num_workers = 4 30 | extra_proc_args = [] 31 | 32 | def get_url(self, path, protocol=None): 33 | protocol = protocol or self.protocol 34 | ca_cert_path = os.path.join(self.basedir, 'certs', 'ca.cert') 35 | req = HTTPRequest('%s://localhost:%d%s' % (protocol, self.port, path), validate_cert=True, ca_certs=ca_cert_path) 36 | try: 37 | response = self.http_client.fetch(req) 38 | except socket.error, e: 39 | if e.errno == errno.ECONNREFUSED: 40 | assert False, 'socket was not connected' 41 | raise 42 | #if not self.http_client._io_loop._stopped: 43 | # self.http_client._io_loop.stop() 44 | return response 45 | 46 | def check_response(self, resp, code=200): 47 | assert_equals(resp.code, code) 48 | 49 | @setup 50 | def create_http_client(self): 51 | self.http_client = HTTPClient() 52 | if not isinstance(self.http_client._async_client, tornado.simple_httpclient.SimpleAsyncHTTPClient): 53 | self.http_client._async_client = tornado.simple_httpclient.SimpleAsyncHTTPClient(client._io_loop) 54 | 55 | @class_setup 56 | def choose_ports(self): 57 | if self.port is None: 58 | self.port = random.randrange(29000, 30000) 59 | if self.control_port is None: 60 | self.control_port = random.randrange(5000, 6000) 61 | 62 | @setup 63 | def create_process(self): 64 | env = os.environ.copy() 65 | #zygote_path = os.path.join(os.getcwd(), 'zygote') 66 | zygote_path = os.getcwd() 67 | if not env.get('PYTHONPATH'): 68 | env['PYTHONPATH'] = zygote_path 69 | else: 70 | parts = env['PYTHONPATH'].split(':') 71 | if parts[0] != zygote_path: 72 | env['PYTHONPATH'] = zygote_path + ':' + env['PYTHONPATH'] 73 | 74 | kw = {'env': env} 75 | if self.USE_DEVNULL: 76 | devnull = open(os.devnull, 'w') 77 | kw['stdout'] = kw['stderr'] = devnull 78 | else: 79 | kw['stdout'] = sys.stdout 80 | kw['stderr'] = sys.stderr 81 | 82 | 83 | proc_args = ['python', 'zygote/main.py', 84 | '-d', 85 | '-b', self.basedir, 86 | '-p', str(self.port), 87 | '--control-port', str(self.control_port), 88 | '--num-workers', str(self.num_workers), 89 | '-m', 'example', 90 | ] 91 | 92 | if self.extra_proc_args: 93 | proc_args += self.extra_proc_args 94 | 95 | self.proc = subprocess.Popen(proc_args, **kw) 96 | 97 | @setup 98 | def sanity_check_process(self): 99 | """Ensure the process didn't crash immediately""" 100 | assert_equals(self.proc.returncode, None) 101 | time.sleep(1) 102 | 103 | def get_process_tree(self): 104 | pid_map = {} 105 | for potential_pid in os.listdir('/proc'): 106 | if not num_re.match(potential_pid): 107 | continue 108 | pid = int(potential_pid) 109 | try: 110 | with open('/proc/%d/stat' % pid) as stat_file: 111 | data = stat_file.read().strip() 112 | except IOError: 113 | continue 114 | try: 115 | m = stat_re.match(data) 116 | ppid = int(m.group('ppid')) 117 | except AttributeError: 118 | print >>sys.stderr, "Error reading /proc/%d/stat: %s" % (pid, data) 119 | raise 120 | pid_map.setdefault(pid, []) 121 | pid_map.setdefault(ppid, []).append(pid) 122 | return pid_map 123 | 124 | @setup 125 | def check_process_tree(self): 126 | pid_map = self.get_process_tree() 127 | self.processes = set([self.proc.pid]) 128 | for zygote_pid in pid_map[self.proc.pid]: 129 | self.processes.add(zygote_pid) 130 | for child in pid_map.get(zygote_pid, []): 131 | self.processes.add(child) 132 | 133 | # there should be one master process, one worker process, and num_workers workers 134 | assert_equal(len(self.processes), self.num_workers + 2) 135 | 136 | @teardown 137 | def remove_process(self): 138 | self.proc.send_signal(signal.SIGTERM) 139 | assert_equals(self.proc.wait(), 0) 140 | 141 | # make sure all of the processes in the process tree terminated 142 | for pid in self.processes: 143 | try: 144 | os.kill(pid, 0) 145 | except OSError, e: 146 | if e.errno == errno.ESRCH: 147 | continue 148 | 149 | assert False, 'pid %d still alive' % (pid,) 150 | 151 | self.assert_(not self.is_port_connected(self.port)) 152 | self.assert_(not self.is_port_connected(self.control_port)) 153 | 154 | self.removed = True 155 | 156 | def is_port_connected(self, port): 157 | s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) 158 | try: 159 | s.connect(('127.0.0.1', port)) 160 | except socket.error, e: 161 | if e.errno == errno.ECONNREFUSED: 162 | return False 163 | raise 164 | s.close() 165 | return True 166 | 167 | def get_zygote(self, process_tree, num_expected=1): 168 | assert_equal(len(process_tree[self.proc.pid]), num_expected) 169 | return process_tree[self.proc.pid][0] 170 | 171 | class ZygoteTests(ZygoteTest): 172 | 173 | def test_http_get(self): 174 | for x in xrange(self.num_workers + 1): 175 | resp = self.get_url('/') 176 | self.check_response(resp) 177 | assert resp.body.startswith('uptime: ') 178 | 179 | def test_kill_intermediate_zygote(self): 180 | pid_map = self.get_process_tree() 181 | zygote = self.get_zygote(pid_map) 182 | workers = pid_map[zygote] 183 | assert_equal(len(workers), self.num_workers) 184 | 185 | os.kill(zygote, signal.SIGKILL) 186 | time.sleep(1) 187 | 188 | new_pid_map = self.get_process_tree() 189 | for w in workers: 190 | try: 191 | os.kill(w, 0) 192 | except OSError, e: 193 | if e.errno == errno.ESRCH: 194 | continue 195 | else: 196 | raise 197 | assert False, 'worker pid %d was still alive' % (w,) 198 | assert_equal(len(new_pid_map[self.proc.pid]), 1) 199 | new_zygote = new_pid_map[self.proc.pid][0] 200 | assert_equal(len(new_pid_map[new_zygote]), self.num_workers) 201 | 202 | def test_hup(self): 203 | """Test sending SIGHUP to the master""" 204 | process_tree = self.get_process_tree() 205 | initial_zygote = self.get_zygote(process_tree) 206 | os.kill(self.proc.pid, signal.SIGHUP) 207 | time.sleep(1) 208 | 209 | process_tree = self.get_process_tree() 210 | final_zygote = self.get_zygote(process_tree) 211 | assert_not_equal(initial_zygote, final_zygote) 212 | 213 | def test_hup_intermediate(self): 214 | """Test sending SIGHUP to the zygote (this is an abnormal case!)""" 215 | process_tree = self.get_process_tree() 216 | initial_zygote = self.get_zygote(process_tree) 217 | 218 | # this should cause the intermediate to die, since it should not have a 219 | # SIGHUP handler 220 | os.kill(initial_zygote, signal.SIGHUP) 221 | time.sleep(1) 222 | 223 | process_tree = self.get_process_tree() 224 | final_zygote = self.get_zygote(process_tree) 225 | assert_not_equal(initial_zygote, final_zygote) 226 | 227 | 228 | class SecureZygoteTests(ZygoteTest): 229 | protocol = 'https' 230 | extra_proc_args = ['--cert', 'certs/server.cert', '--key', 'certs/server.key'] 231 | 232 | def test_https_get(self): 233 | for x in xrange(self.num_workers + 1): 234 | resp = self.get_url('/') 235 | self.check_response(resp) 236 | assert resp.body.startswith('uptime: ') 237 | 238 | def test_http_get(self): 239 | for x in xrange(self.num_workers + 1): 240 | assert_raises(HTTPError, self.get_url, '/', protocol='http') 241 | 242 | 243 | if __name__ == '__main__': 244 | main() 245 | -------------------------------------------------------------------------------- /zygote/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import pkg_resources 3 | 4 | 5 | # We should define the package version external to 6 | # it. http://stackoverflow.com/questions/2058802/how-can-i-get-the-version-defined-in-setup-py-setuptools-in-my-package 7 | version = pkg_resources.require('zygote')[0].version 8 | -------------------------------------------------------------------------------- /zygote/_httpserver.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | ###### THIS IS A MODIFIED VERSION OF TORNADO'S HTTPSERVER FROM TORNADO 1.2 ####### 3 | # 4 | # It has been modified to support a callback after headers finish, and 5 | # another callback on close. 6 | # 7 | # HTTPRequest.__repr__ has also been modified to not show body (POST can 8 | # contain sensitive data) or sensitive headers, since HTTPRequest is repr'ed 9 | # when tornado logs errors. 10 | # 11 | # These changes will most likely need to be ported to a new version if you 12 | # ever want to upgrade tornado. 13 | ################################################################################## 14 | # 15 | # Copyright 2009 Facebook 16 | # 17 | # Licensed under the Apache License, Version 2.0 (the "License"); you may 18 | # not use this file except in compliance with the License. You may obtain 19 | # a copy of the License at 20 | # 21 | # http://www.apache.org/licenses/LICENSE-2.0 22 | # 23 | # Unless required by applicable law or agreed to in writing, software 24 | # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 25 | # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 26 | # License for the specific language governing permissions and limitations 27 | # under the License. 28 | 29 | """A non-blocking, single-threaded HTTP server.""" 30 | 31 | import cgi 32 | import errno 33 | import logging 34 | import os 35 | import socket 36 | import time 37 | import urlparse 38 | 39 | from tornado import httputil 40 | from tornado import ioloop 41 | from tornado import iostream 42 | from tornado import stack_context 43 | 44 | from zygote.util import sanitize_headers 45 | 46 | 47 | try: 48 | import fcntl 49 | except ImportError: 50 | if os.name == 'nt': 51 | from tornado import win32_support as fcntl 52 | else: 53 | raise 54 | 55 | try: 56 | import ssl # Python 2.6+ 57 | except ImportError: 58 | ssl = None 59 | 60 | try: 61 | import multiprocessing # Python 2.6+ 62 | except ImportError: 63 | multiprocessing = None 64 | 65 | def _cpu_count(): 66 | if multiprocessing is not None: 67 | try: 68 | return multiprocessing.cpu_count() 69 | except NotImplementedError: 70 | pass 71 | try: 72 | return os.sysconf("SC_NPROCESSORS_CONF") 73 | except ValueError: 74 | pass 75 | logging.error("Could not detect number of processors; " 76 | "running with one process") 77 | return 1 78 | 79 | 80 | class HTTPServer(object): 81 | """A non-blocking, single-threaded HTTP server. 82 | 83 | A server is defined by a request callback that takes an HTTPRequest 84 | instance as an argument and writes a valid HTTP response with 85 | request.write(). request.finish() finishes the request (but does not 86 | necessarily close the connection in the case of HTTP/1.1 keep-alive 87 | requests). A simple example server that echoes back the URI you 88 | requested: 89 | 90 | import httpserver 91 | import ioloop 92 | 93 | def handle_request(request): 94 | message = "You requested %s\n" % request.uri 95 | request.write("HTTP/1.1 200 OK\r\nContent-Length: %d\r\n\r\n%s" % ( 96 | len(message), message)) 97 | request.finish() 98 | 99 | http_server = httpserver.HTTPServer(handle_request) 100 | http_server.listen(8888) 101 | ioloop.IOLoop.instance().start() 102 | 103 | HTTPServer is a very basic connection handler. Beyond parsing the 104 | HTTP request body and headers, the only HTTP semantics implemented 105 | in HTTPServer is HTTP/1.1 keep-alive connections. We do not, however, 106 | implement chunked encoding, so the request callback must provide a 107 | Content-Length header or implement chunked encoding for HTTP/1.1 108 | requests for the server to run correctly for HTTP/1.1 clients. If 109 | the request handler is unable to do this, you can provide the 110 | no_keep_alive argument to the HTTPServer constructor, which will 111 | ensure the connection is closed on every request no matter what HTTP 112 | version the client is using. 113 | 114 | If xheaders is True, we support the X-Real-Ip and X-Scheme headers, 115 | which override the remote IP and HTTP scheme for all requests. These 116 | headers are useful when running Tornado behind a reverse proxy or 117 | load balancer. 118 | 119 | HTTPServer can serve HTTPS (SSL) traffic with Python 2.6+ and OpenSSL. 120 | To make this server serve SSL traffic, send the ssl_options dictionary 121 | argument with the arguments required for the ssl.wrap_socket() method, 122 | including "certfile" and "keyfile": 123 | 124 | HTTPServer(applicaton, ssl_options={ 125 | "certfile": os.path.join(data_dir, "mydomain.crt"), 126 | "keyfile": os.path.join(data_dir, "mydomain.key"), 127 | }) 128 | 129 | By default, listen() runs in a single thread in a single process. You 130 | can utilize all available CPUs on this machine by calling bind() and 131 | start() instead of listen(): 132 | 133 | http_server = httpserver.HTTPServer(handle_request) 134 | http_server.bind(8888) 135 | http_server.start(0) # Forks multiple sub-processes 136 | ioloop.IOLoop.instance().start() 137 | 138 | start(0) detects the number of CPUs on this machine and "pre-forks" that 139 | number of child processes so that we have one Tornado process per CPU, 140 | all with their own IOLoop. You can also pass in the specific number of 141 | child processes you want to run with if you want to override this 142 | auto-detection. 143 | """ 144 | def __init__(self, request_callback, no_keep_alive=False, io_loop=None, 145 | xheaders=False, ssl_options=None, headers_callback=None, close_callback=None): 146 | """Initializes the server with the given request callback. 147 | 148 | If you use pre-forking/start() instead of the listen() method to 149 | start your server, you should not pass an IOLoop instance to this 150 | constructor. Each pre-forked child process will create its own 151 | IOLoop instance after the forking process. 152 | """ 153 | self.request_callback = request_callback 154 | self.no_keep_alive = no_keep_alive 155 | self.io_loop = io_loop 156 | self.xheaders = xheaders 157 | self.ssl_options = ssl_options 158 | self._socket = None 159 | self._started = False 160 | self._headers_callback = headers_callback 161 | self._close_callback = close_callback 162 | 163 | def listen(self, port, address=""): 164 | """Binds to the given port and starts the server in a single process. 165 | 166 | This method is a shortcut for: 167 | 168 | server.bind(port, address) 169 | server.start(1) 170 | 171 | """ 172 | self.bind(port, address) 173 | self.start(1) 174 | 175 | def bind(self, port, address=""): 176 | """Binds this server to the given port on the given IP address. 177 | 178 | To start the server, call start(). If you want to run this server 179 | in a single process, you can call listen() as a shortcut to the 180 | sequence of bind() and start() calls. 181 | """ 182 | assert not self._socket 183 | self._socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM, 0) 184 | flags = fcntl.fcntl(self._socket.fileno(), fcntl.F_GETFD) 185 | flags |= fcntl.FD_CLOEXEC 186 | fcntl.fcntl(self._socket.fileno(), fcntl.F_SETFD, flags) 187 | self._socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) 188 | self._socket.setblocking(0) 189 | self._socket.bind((address, port)) 190 | self._socket.listen(128) 191 | 192 | def start(self, num_processes=1): 193 | """Starts this server in the IOLoop. 194 | 195 | By default, we run the server in this process and do not fork any 196 | additional child process. 197 | 198 | If num_processes is None or <= 0, we detect the number of cores 199 | available on this machine and fork that number of child 200 | processes. If num_processes is given and > 1, we fork that 201 | specific number of sub-processes. 202 | 203 | Since we use processes and not threads, there is no shared memory 204 | between any server code. 205 | 206 | Note that multiple processes are not compatible with the autoreload 207 | module (or the debug=True option to tornado.web.Application). 208 | When using multiple processes, no IOLoops can be created or 209 | referenced until after the call to HTTPServer.start(n). 210 | """ 211 | assert not self._started 212 | self._started = True 213 | if num_processes is None or num_processes <= 0: 214 | num_processes = _cpu_count() 215 | if num_processes > 1 and ioloop.IOLoop.initialized(): 216 | logging.error("Cannot run in multiple processes: IOLoop instance " 217 | "has already been initialized. You cannot call " 218 | "IOLoop.instance() before calling start()") 219 | num_processes = 1 220 | if num_processes > 1: 221 | logging.info("Pre-forking %d server processes", num_processes) 222 | for i in range(num_processes): 223 | if os.fork() == 0: 224 | import random 225 | from binascii import hexlify 226 | try: 227 | # If available, use the same method as 228 | # random.py 229 | seed = long(hexlify(os.urandom(16)), 16) 230 | except NotImplementedError: 231 | # Include the pid to avoid initializing two 232 | # processes to the same value 233 | seed(int(time.time() * 1000) ^ os.getpid()) 234 | random.seed(seed) 235 | self.io_loop = ioloop.IOLoop.instance() 236 | self.io_loop.add_handler( 237 | self._socket.fileno(), self._handle_events, 238 | ioloop.IOLoop.READ) 239 | return 240 | os.waitpid(-1, 0) 241 | else: 242 | if not self.io_loop: 243 | self.io_loop = ioloop.IOLoop.instance() 244 | self.io_loop.add_handler(self._socket.fileno(), 245 | self._handle_events, 246 | ioloop.IOLoop.READ) 247 | 248 | def stop(self): 249 | self.io_loop.remove_handler(self._socket.fileno()) 250 | self._socket.close() 251 | 252 | def _handle_events(self, fd, events): 253 | while True: 254 | try: 255 | connection, address = self._socket.accept() 256 | except socket.error, e: 257 | if e.args[0] in (errno.EWOULDBLOCK, errno.EAGAIN): 258 | return 259 | raise 260 | if self.ssl_options is not None: 261 | assert ssl, "Python 2.6+ and OpenSSL required for SSL" 262 | try: 263 | connection = ssl.wrap_socket(connection, 264 | server_side=True, 265 | do_handshake_on_connect=False, 266 | **self.ssl_options) 267 | except ssl.SSLError, err: 268 | if err.args[0] == ssl.SSL_ERROR_EOF: 269 | return connection.close() 270 | else: 271 | raise 272 | except socket.error, err: 273 | if err.args[0] == errno.ECONNABORTED: 274 | return connection.close() 275 | else: 276 | raise 277 | try: 278 | if self.ssl_options is not None: 279 | stream = iostream.SSLIOStream(connection, io_loop=self.io_loop) 280 | else: 281 | stream = iostream.IOStream(connection, io_loop=self.io_loop) 282 | HTTPConnection(stream, address, self.request_callback, 283 | self.no_keep_alive, self.xheaders, self._close_callback, self._headers_callback) 284 | except: 285 | logging.error("Error in connection callback", exc_info=True) 286 | 287 | class _BadRequestException(Exception): 288 | """Exception class for malformed HTTP requests.""" 289 | pass 290 | 291 | class HTTPConnection(object): 292 | """Handles a connection to an HTTP client, executing HTTP requests. 293 | 294 | We parse HTTP headers and bodies, and execute the request callback 295 | until the HTTP conection is closed. 296 | """ 297 | def __init__(self, stream, address, request_callback, no_keep_alive=False, 298 | xheaders=False, close_callback=None, headers_callback=None): 299 | self.stream = stream 300 | self.address = address 301 | self.request_callback = request_callback 302 | self.no_keep_alive = no_keep_alive 303 | self.xheaders = xheaders 304 | self._request = None 305 | self._request_finished = False 306 | # Save stack context here, outside of any request. This keeps 307 | # contexts from one request from leaking into the next. 308 | self._header_callback = stack_context.wrap(self._on_headers) 309 | self._headers_callback = stack_context.wrap(headers_callback) 310 | self._close_callback = stack_context.wrap(close_callback) 311 | self.stream.read_until("\r\n\r\n", self._header_callback) 312 | 313 | def write(self, chunk): 314 | assert self._request, "Request closed" 315 | if not self.stream.closed(): 316 | self.stream.write(chunk, self._on_write_complete) 317 | 318 | def finish(self): 319 | if self._close_callback is not None: 320 | self.stream.set_close_callback(self._close_callback) 321 | assert self._request, "Request closed" 322 | self._request_finished = True 323 | if not self.stream.writing(): 324 | self._finish_request() 325 | 326 | def _on_write_complete(self): 327 | if self._request_finished: 328 | self._finish_request() 329 | 330 | def _finish_request(self): 331 | if self.no_keep_alive: 332 | disconnect = True 333 | else: 334 | connection_header = self._request.headers.get("Connection") 335 | if self._request.supports_http_1_1(): 336 | disconnect = connection_header == "close" 337 | elif ("Content-Length" in self._request.headers 338 | or self._request.method in ("HEAD", "GET")): 339 | disconnect = connection_header != "Keep-Alive" 340 | else: 341 | disconnect = True 342 | self._request = None 343 | self._request_finished = False 344 | if disconnect: 345 | self.stream.close() 346 | return 347 | self.stream.read_until("\r\n\r\n", self._header_callback) 348 | 349 | def _on_headers(self, data): 350 | try: 351 | eol = data.find("\r\n") 352 | start_line = data[:eol] 353 | try: 354 | method, uri, version = start_line.split(" ") 355 | except ValueError: 356 | raise _BadRequestException("Malformed HTTP request line") 357 | if not version.startswith("HTTP/"): 358 | raise _BadRequestException("Malformed HTTP version in HTTP Request-Line") 359 | headers = httputil.HTTPHeaders.parse(data[eol:]) 360 | self._request = HTTPRequest( 361 | connection=self, method=method, uri=uri, version=version, 362 | headers=headers, remote_ip=self.address[0]) 363 | 364 | content_length = headers.get("Content-Length") 365 | if content_length: 366 | content_length = int(content_length) 367 | if content_length > self.stream.max_buffer_size: 368 | raise _BadRequestException("Content-Length too long") 369 | if headers.get("Expect") == "100-continue": 370 | self.stream.write("HTTP/1.1 100 (Continue)\r\n\r\n") 371 | self.stream.read_bytes(content_length, self._on_request_body) 372 | return 373 | 374 | if self._headers_callback is not None: 375 | self._headers_callback(start_line, self.address[0], headers) 376 | self.request_callback(self._request) 377 | except _BadRequestException, e: 378 | logging.info("Malformed HTTP request from %s: %s", 379 | self.address[0], e) 380 | self.stream.close() 381 | return 382 | 383 | def _on_request_body(self, data): 384 | self._request.body = data 385 | content_type = self._request.headers.get("Content-Type", "") 386 | if self._request.method in ("POST", "PUT"): 387 | if content_type.startswith("application/x-www-form-urlencoded"): 388 | arguments = cgi.parse_qs(self._request.body) 389 | for name, values in arguments.iteritems(): 390 | values = [v for v in values if v] 391 | if values: 392 | self._request.arguments.setdefault(name, []).extend( 393 | values) 394 | elif content_type.startswith("multipart/form-data"): 395 | fields = content_type.split(";") 396 | for field in fields: 397 | k, sep, v = field.strip().partition("=") 398 | if k == "boundary" and v: 399 | self._parse_mime_body(v, data) 400 | break 401 | else: 402 | logging.warning("Invalid multipart/form-data") 403 | self.request_callback(self._request) 404 | 405 | def _parse_mime_body(self, boundary, data): 406 | # The standard allows for the boundary to be quoted in the header, 407 | # although it's rare (it happens at least for google app engine 408 | # xmpp). I think we're also supposed to handle backslash-escapes 409 | # here but I'll save that until we see a client that uses them 410 | # in the wild. 411 | if boundary.startswith('"') and boundary.endswith('"'): 412 | boundary = boundary[1:-1] 413 | if data.endswith("\r\n"): 414 | footer_length = len(boundary) + 6 415 | else: 416 | footer_length = len(boundary) + 4 417 | parts = data[:-footer_length].split("--" + boundary + "\r\n") 418 | for part in parts: 419 | if not part: continue 420 | eoh = part.find("\r\n\r\n") 421 | if eoh == -1: 422 | logging.warning("multipart/form-data missing headers") 423 | continue 424 | headers = httputil.HTTPHeaders.parse(part[:eoh]) 425 | name_header = headers.get("Content-Disposition", "") 426 | if not name_header.startswith("form-data;") or \ 427 | not part.endswith("\r\n"): 428 | logging.warning("Invalid multipart/form-data") 429 | continue 430 | value = part[eoh + 4:-2] 431 | name_values = {} 432 | for name_part in name_header[10:].split(";"): 433 | name, name_value = name_part.strip().split("=", 1) 434 | name_values[name] = name_value.strip('"').decode("utf-8") 435 | if not name_values.get("name"): 436 | logging.warning("multipart/form-data value missing name") 437 | continue 438 | name = name_values["name"] 439 | if name_values.get("filename"): 440 | ctype = headers.get("Content-Type", "application/unknown") 441 | self._request.files.setdefault(name, []).append(dict( 442 | filename=name_values["filename"], body=value, 443 | content_type=ctype)) 444 | else: 445 | self._request.arguments.setdefault(name, []).append(value) 446 | 447 | 448 | class HTTPRequest(object): 449 | """A single HTTP request. 450 | 451 | GET/POST arguments are available in the arguments property, which 452 | maps arguments names to lists of values (to support multiple values 453 | for individual names). Names and values are both unicode always. 454 | 455 | File uploads are available in the files property, which maps file 456 | names to list of files. Each file is a dictionary of the form 457 | {"filename":..., "content_type":..., "body":...}. The content_type 458 | comes from the provided HTTP header and should not be trusted 459 | outright given that it can be easily forged. 460 | 461 | An HTTP request is attached to a single HTTP connection, which can 462 | be accessed through the "connection" attribute. Since connections 463 | are typically kept open in HTTP/1.1, multiple requests can be handled 464 | sequentially on a single connection. 465 | """ 466 | def __init__(self, method, uri, version="HTTP/1.0", headers=None, 467 | body=None, remote_ip=None, protocol=None, host=None, 468 | files=None, connection=None): 469 | self.method = method 470 | self.uri = uri 471 | self.version = version 472 | self.headers = headers or httputil.HTTPHeaders() 473 | self.body = body or "" 474 | if connection and connection.xheaders: 475 | # Squid uses X-Forwarded-For, others use X-Real-Ip 476 | self.remote_ip = self.headers.get( 477 | "X-Real-Ip", self.headers.get("X-Forwarded-For", remote_ip)) 478 | # AWS uses X-Forwarded-Proto 479 | self.protocol = self.headers.get( 480 | "X-Scheme", self.headers.get("X-Forwarded-Proto", protocol)) 481 | if self.protocol not in ("http", "https"): 482 | self.protocol = "http" 483 | else: 484 | self.remote_ip = remote_ip 485 | if protocol: 486 | self.protocol = protocol 487 | elif connection and isinstance(connection.stream, 488 | iostream.SSLIOStream): 489 | self.protocol = "https" 490 | else: 491 | self.protocol = "http" 492 | self.host = host or self.headers.get("Host") or "127.0.0.1" 493 | self.files = files or {} 494 | self.connection = connection 495 | self._start_time = time.time() 496 | self._finish_time = None 497 | 498 | scheme, netloc, path, query, fragment = urlparse.urlsplit(uri) 499 | self.path = path 500 | self.query = query 501 | arguments = cgi.parse_qs(query) 502 | self.arguments = {} 503 | for name, values in arguments.iteritems(): 504 | values = [v for v in values if v] 505 | if values: self.arguments[name] = values 506 | 507 | def supports_http_1_1(self): 508 | """Returns True if this request supports HTTP/1.1 semantics""" 509 | return self.version == "HTTP/1.1" 510 | 511 | def write(self, chunk): 512 | """Writes the given chunk to the response stream.""" 513 | assert isinstance(chunk, str) 514 | self.connection.write(chunk) 515 | 516 | def finish(self): 517 | """Finishes this HTTP request on the open connection.""" 518 | self.connection.finish() 519 | self._finish_time = time.time() 520 | 521 | def full_url(self): 522 | """Reconstructs the full URL for this request.""" 523 | return self.protocol + "://" + self.host + self.uri 524 | 525 | def request_time(self): 526 | """Returns the amount of time it took for this request to execute.""" 527 | if self._finish_time is None: 528 | return time.time() - self._start_time 529 | else: 530 | return self._finish_time - self._start_time 531 | 532 | def get_ssl_certificate(self): 533 | """Returns the client's SSL certificate, if any. 534 | 535 | To use client certificates, the HTTPServer must have been constructed 536 | with cert_reqs set in ssl_options, e.g.: 537 | server = HTTPServer(app, 538 | ssl_options=dict( 539 | certfile="foo.crt", 540 | keyfile="foo.key", 541 | cert_reqs=ssl.CERT_REQUIRED, 542 | ca_certs="cacert.crt")) 543 | 544 | The return value is a dictionary, see SSLSocket.getpeercert() in 545 | the standard library for more details. 546 | http://docs.python.org/library/ssl.html#sslsocket-objects 547 | """ 548 | try: 549 | return self.connection.stream.socket.getpeercert() 550 | except ssl.SSLError: 551 | return None 552 | 553 | def __repr__(self): 554 | attrs = ("protocol", "host", "method", "uri", "version", "remote_ip") 555 | args = ", ".join(["%s=%r" % (n, getattr(self, n)) for n in attrs]) 556 | return "%s(%s, headers=%s)" % ( 557 | self.__class__.__name__, args, sanitize_headers(self.headers)) 558 | -------------------------------------------------------------------------------- /zygote/_httpserver_2.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # 4 | ###### THIS IS A MODIFIED VERSION OF TORNADO'S HTTPSERVER FROM TORNADO 2.2 ####### 5 | # 6 | # It has been modified to support a callback after headers finish, and 7 | # another callback on close. 8 | # 9 | # HTTPRequest.__repr__ has also been modified to not show body (POST can 10 | # contain sensitive data) or sensitive headers, since HTTPRequest is repr'ed 11 | # when tornado logs errors. 12 | # 13 | # These changes will most likely need to be ported to a new version if you 14 | # ever want to upgrade tornado. 15 | ################################################################################## 16 | # 17 | # 18 | # Copyright 2009 Facebook 19 | # 20 | # Licensed under the Apache License, Version 2.0 (the "License"); you may 21 | # not use this file except in compliance with the License. You may obtain 22 | # a copy of the License at 23 | # 24 | # http://www.apache.org/licenses/LICENSE-2.0 25 | # 26 | # Unless required by applicable law or agreed to in writing, software 27 | # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 28 | # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 29 | # License for the specific language governing permissions and limitations 30 | # under the License. 31 | 32 | """A non-blocking, single-threaded HTTP server. 33 | 34 | Typical applications have little direct interaction with the `HTTPServer` 35 | class except to start a server at the beginning of the process 36 | (and even that is often done indirectly via `tornado.web.Application.listen`). 37 | 38 | This module also defines the `HTTPRequest` class which is exposed via 39 | `tornado.web.RequestHandler.request`. 40 | """ 41 | 42 | from __future__ import absolute_import, division, with_statement 43 | 44 | import Cookie 45 | import logging 46 | import socket 47 | import time 48 | import urlparse 49 | 50 | from tornado.escape import utf8, native_str, parse_qs_bytes 51 | from tornado import httputil 52 | from tornado import iostream 53 | from tornado.netutil import TCPServer 54 | from tornado import stack_context 55 | from tornado.util import b, bytes_type 56 | 57 | from zygote.util import sanitize_headers 58 | 59 | 60 | try: 61 | import ssl # Python 2.6+ 62 | except ImportError: 63 | ssl = None 64 | 65 | 66 | class HTTPServer(TCPServer): 67 | r"""A non-blocking, single-threaded HTTP server. 68 | 69 | A server is defined by a request callback that takes an HTTPRequest 70 | instance as an argument and writes a valid HTTP response with 71 | `HTTPRequest.write`. `HTTPRequest.finish` finishes the request (but does 72 | not necessarily close the connection in the case of HTTP/1.1 keep-alive 73 | requests). A simple example server that echoes back the URI you 74 | requested:: 75 | 76 | import httpserver 77 | import ioloop 78 | 79 | def handle_request(request): 80 | message = "You requested %s\n" % request.uri 81 | request.write("HTTP/1.1 200 OK\r\nContent-Length: %d\r\n\r\n%s" % ( 82 | len(message), message)) 83 | request.finish() 84 | 85 | http_server = httpserver.HTTPServer(handle_request) 86 | http_server.listen(8888) 87 | ioloop.IOLoop.instance().start() 88 | 89 | `HTTPServer` is a very basic connection handler. Beyond parsing the 90 | HTTP request body and headers, the only HTTP semantics implemented 91 | in `HTTPServer` is HTTP/1.1 keep-alive connections. We do not, however, 92 | implement chunked encoding, so the request callback must provide a 93 | ``Content-Length`` header or implement chunked encoding for HTTP/1.1 94 | requests for the server to run correctly for HTTP/1.1 clients. If 95 | the request handler is unable to do this, you can provide the 96 | ``no_keep_alive`` argument to the `HTTPServer` constructor, which will 97 | ensure the connection is closed on every request no matter what HTTP 98 | version the client is using. 99 | 100 | If ``xheaders`` is ``True``, we support the ``X-Real-Ip`` and ``X-Scheme`` 101 | headers, which override the remote IP and HTTP scheme for all requests. 102 | These headers are useful when running Tornado behind a reverse proxy or 103 | load balancer. 104 | 105 | `HTTPServer` can serve SSL traffic with Python 2.6+ and OpenSSL. 106 | To make this server serve SSL traffic, send the ssl_options dictionary 107 | argument with the arguments required for the `ssl.wrap_socket` method, 108 | including "certfile" and "keyfile":: 109 | 110 | HTTPServer(applicaton, ssl_options={ 111 | "certfile": os.path.join(data_dir, "mydomain.crt"), 112 | "keyfile": os.path.join(data_dir, "mydomain.key"), 113 | }) 114 | 115 | `HTTPServer` initialization follows one of three patterns (the 116 | initialization methods are defined on `tornado.netutil.TCPServer`): 117 | 118 | 1. `~tornado.netutil.TCPServer.listen`: simple single-process:: 119 | 120 | server = HTTPServer(app) 121 | server.listen(8888) 122 | IOLoop.instance().start() 123 | 124 | In many cases, `tornado.web.Application.listen` can be used to avoid 125 | the need to explicitly create the `HTTPServer`. 126 | 127 | 2. `~tornado.netutil.TCPServer.bind`/`~tornado.netutil.TCPServer.start`: 128 | simple multi-process:: 129 | 130 | server = HTTPServer(app) 131 | server.bind(8888) 132 | server.start(0) # Forks multiple sub-processes 133 | IOLoop.instance().start() 134 | 135 | When using this interface, an `IOLoop` must *not* be passed 136 | to the `HTTPServer` constructor. `start` will always start 137 | the server on the default singleton `IOLoop`. 138 | 139 | 3. `~tornado.netutil.TCPServer.add_sockets`: advanced multi-process:: 140 | 141 | sockets = tornado.netutil.bind_sockets(8888) 142 | tornado.process.fork_processes(0) 143 | server = HTTPServer(app) 144 | server.add_sockets(sockets) 145 | IOLoop.instance().start() 146 | 147 | The `add_sockets` interface is more complicated, but it can be 148 | used with `tornado.process.fork_processes` to give you more 149 | flexibility in when the fork happens. `add_sockets` can 150 | also be used in single-process servers if you want to create 151 | your listening sockets in some way other than 152 | `tornado.netutil.bind_sockets`. 153 | 154 | """ 155 | def __init__(self, request_callback, no_keep_alive=False, io_loop=None, 156 | xheaders=False, ssl_options=None, headers_callback = None, 157 | close_callback = None, **kwargs): 158 | self.request_callback = request_callback 159 | self.no_keep_alive = no_keep_alive 160 | self.xheaders = xheaders 161 | self._headers_callback = headers_callback 162 | self._close_callback = close_callback 163 | TCPServer.__init__(self, io_loop=io_loop, ssl_options=ssl_options, 164 | **kwargs) 165 | 166 | def handle_stream(self, stream, address): 167 | HTTPConnection(stream, address, self.request_callback, 168 | self.no_keep_alive, self.xheaders, 169 | self._headers_callback, self._close_callback) 170 | 171 | 172 | class _BadRequestException(Exception): 173 | """Exception class for malformed HTTP requests.""" 174 | pass 175 | 176 | 177 | class HTTPConnection(object): 178 | """Handles a connection to an HTTP client, executing HTTP requests. 179 | 180 | We parse HTTP headers and bodies, and execute the request callback 181 | until the HTTP conection is closed. 182 | """ 183 | def __init__(self, stream, address, request_callback, no_keep_alive=False, 184 | xheaders=False, headers_callback=None, close_callback=None): 185 | self.stream = stream 186 | if self.stream.socket.family not in (socket.AF_INET, socket.AF_INET6): 187 | # Unix (or other) socket; fake the remote address 188 | address = ('0.0.0.0', 0) 189 | self.address = address 190 | self.request_callback = request_callback 191 | self.no_keep_alive = no_keep_alive 192 | self.xheaders = xheaders 193 | self._request = None 194 | self._request_finished = False 195 | # Save stack context here, outside of any request. This keeps 196 | # contexts from one request from leaking into the next. 197 | self._header_callback = stack_context.wrap(self._on_headers) 198 | if headers_callback: 199 | self.on_headers = stack_context.wrap(headers_callback) 200 | else: 201 | self.on_headers = lambda *args: None 202 | if close_callback: 203 | self.on_finish = stack_context.wrap(close_callback) 204 | else: 205 | self.on_finish = lambda *args: None 206 | self.stream.read_until(b("\r\n\r\n"), self._header_callback) 207 | self._write_callback = None 208 | 209 | def write(self, chunk, callback=None): 210 | """Writes a chunk of output to the stream.""" 211 | assert self._request, "Request closed" 212 | if not self.stream.closed(): 213 | self._write_callback = stack_context.wrap(callback) 214 | self.stream.write(chunk, self._on_write_complete) 215 | 216 | def finish(self): 217 | """Finishes the request.""" 218 | assert self._request, "Request closed" 219 | self._request_finished = True 220 | if not self.stream.writing(): 221 | self._finish_request() 222 | 223 | def _on_write_complete(self): 224 | if self._write_callback is not None: 225 | callback = self._write_callback 226 | self._write_callback = None 227 | callback() 228 | # _on_write_complete is enqueued on the IOLoop whenever the 229 | # IOStream's write buffer becomes empty, but it's possible for 230 | # another callback that runs on the IOLoop before it to 231 | # simultaneously write more data and finish the request. If 232 | # there is still data in the IOStream, a future 233 | # _on_write_complete will be responsible for calling 234 | # _finish_request. 235 | if self._request_finished and not self.stream.writing(): 236 | self._finish_request() 237 | 238 | def _finish_request(self): 239 | if self.no_keep_alive: 240 | disconnect = True 241 | else: 242 | connection_header = self._request.headers.get("Connection") 243 | if connection_header is not None: 244 | connection_header = connection_header.lower() 245 | if self._request.supports_http_1_1(): 246 | disconnect = connection_header == "close" 247 | elif ("Content-Length" in self._request.headers 248 | or self._request.method in ("HEAD", "GET")): 249 | disconnect = connection_header != "keep-alive" 250 | else: 251 | disconnect = True 252 | self._request = None 253 | self._request_finished = False 254 | self.on_finish(disconnect) 255 | if disconnect: 256 | self.stream.close() 257 | return 258 | self.stream.read_until(b("\r\n\r\n"), self._header_callback) 259 | 260 | def _on_headers(self, data): 261 | try: 262 | data = native_str(data.decode('latin1')) 263 | eol = data.find("\r\n") 264 | start_line = data[:eol] 265 | try: 266 | method, uri, version = start_line.split(" ") 267 | except ValueError: 268 | raise _BadRequestException("Malformed HTTP request line") 269 | if not version.startswith("HTTP/"): 270 | raise _BadRequestException("Malformed HTTP version in HTTP Request-Line") 271 | headers = httputil.HTTPHeaders.parse(data[eol:]) 272 | self._request = HTTPRequest( 273 | connection=self, method=method, uri=uri, version=version, 274 | headers=headers, remote_ip=self.address[0]) 275 | 276 | content_length = headers.get("Content-Length") 277 | if content_length: 278 | content_length = int(content_length) 279 | if content_length > self.stream.max_buffer_size: 280 | raise _BadRequestException("Content-Length too long") 281 | if headers.get("Expect") == "100-continue": 282 | self.stream.write(b("HTTP/1.1 100 (Continue)\r\n\r\n")) 283 | self.stream.read_bytes(content_length, self._on_request_body) 284 | return 285 | 286 | self.on_headers(start_line, self.address[0], headers) 287 | self.request_callback(self._request) 288 | except _BadRequestException, e: 289 | logging.info("Malformed HTTP request from %s: %s", 290 | self.address[0], e) 291 | self.stream.close() 292 | return 293 | 294 | def _on_request_body(self, data): 295 | self._request.body = data 296 | content_type = self._request.headers.get("Content-Type", "") 297 | if self._request.method in ("POST", "PUT"): 298 | if content_type.startswith("application/x-www-form-urlencoded"): 299 | arguments = parse_qs_bytes(native_str(self._request.body)) 300 | for name, values in arguments.iteritems(): 301 | values = [v for v in values if v] 302 | if values: 303 | self._request.arguments.setdefault(name, []).extend( 304 | values) 305 | elif content_type.startswith("multipart/form-data"): 306 | fields = content_type.split(";") 307 | for field in fields: 308 | k, sep, v = field.strip().partition("=") 309 | if k == "boundary" and v: 310 | httputil.parse_multipart_form_data( 311 | utf8(v), data, 312 | self._request.arguments, 313 | self._request.files) 314 | break 315 | else: 316 | logging.warning("Invalid multipart/form-data") 317 | self.request_callback(self._request) 318 | 319 | 320 | class HTTPRequest(object): 321 | """A single HTTP request. 322 | 323 | All attributes are type `str` unless otherwise noted. 324 | 325 | .. attribute:: method 326 | 327 | HTTP request method, e.g. "GET" or "POST" 328 | 329 | .. attribute:: uri 330 | 331 | The requested uri. 332 | 333 | .. attribute:: path 334 | 335 | The path portion of `uri` 336 | 337 | .. attribute:: query 338 | 339 | The query portion of `uri` 340 | 341 | .. attribute:: version 342 | 343 | HTTP version specified in request, e.g. "HTTP/1.1" 344 | 345 | .. attribute:: headers 346 | 347 | `HTTPHeader` dictionary-like object for request headers. Acts like 348 | a case-insensitive dictionary with additional methods for repeated 349 | headers. 350 | 351 | .. attribute:: body 352 | 353 | Request body, if present, as a byte string. 354 | 355 | .. attribute:: remote_ip 356 | 357 | Client's IP address as a string. If `HTTPServer.xheaders` is set, 358 | will pass along the real IP address provided by a load balancer 359 | in the ``X-Real-Ip`` header 360 | 361 | .. attribute:: protocol 362 | 363 | The protocol used, either "http" or "https". If `HTTPServer.xheaders` 364 | is set, will pass along the protocol used by a load balancer if 365 | reported via an ``X-Scheme`` header. 366 | 367 | .. attribute:: host 368 | 369 | The requested hostname, usually taken from the ``Host`` header. 370 | 371 | .. attribute:: arguments 372 | 373 | GET/POST arguments are available in the arguments property, which 374 | maps arguments names to lists of values (to support multiple values 375 | for individual names). Names are of type `str`, while arguments 376 | are byte strings. Note that this is different from 377 | `RequestHandler.get_argument`, which returns argument values as 378 | unicode strings. 379 | 380 | .. attribute:: files 381 | 382 | File uploads are available in the files property, which maps file 383 | names to lists of :class:`HTTPFile`. 384 | 385 | .. attribute:: connection 386 | 387 | An HTTP request is attached to a single HTTP connection, which can 388 | be accessed through the "connection" attribute. Since connections 389 | are typically kept open in HTTP/1.1, multiple requests can be handled 390 | sequentially on a single connection. 391 | """ 392 | def __init__(self, method, uri, version="HTTP/1.0", headers=None, 393 | body=None, remote_ip=None, protocol=None, host=None, 394 | files=None, connection=None): 395 | self.method = method 396 | self.uri = uri 397 | self.version = version 398 | self.headers = headers or httputil.HTTPHeaders() 399 | self.body = body or "" 400 | if connection and connection.xheaders: 401 | # Squid uses X-Forwarded-For, others use X-Real-Ip 402 | self.remote_ip = self.headers.get( 403 | "X-Real-Ip", self.headers.get("X-Forwarded-For", remote_ip)) 404 | if not self._valid_ip(self.remote_ip): 405 | self.remote_ip = remote_ip 406 | # AWS uses X-Forwarded-Proto 407 | self.protocol = self.headers.get( 408 | "X-Scheme", self.headers.get("X-Forwarded-Proto", protocol)) 409 | if self.protocol not in ("http", "https"): 410 | self.protocol = "http" 411 | else: 412 | self.remote_ip = remote_ip 413 | if protocol: 414 | self.protocol = protocol 415 | elif connection and isinstance(connection.stream, 416 | iostream.SSLIOStream): 417 | self.protocol = "https" 418 | else: 419 | self.protocol = "http" 420 | self.host = host or self.headers.get("Host") or "127.0.0.1" 421 | self.files = files or {} 422 | self.connection = connection 423 | self._start_time = time.time() 424 | self._finish_time = None 425 | 426 | scheme, netloc, path, query, fragment = urlparse.urlsplit(native_str(uri)) 427 | self.path = path 428 | self.query = query 429 | arguments = parse_qs_bytes(query) 430 | self.arguments = {} 431 | for name, values in arguments.iteritems(): 432 | values = [v for v in values if v] 433 | if values: 434 | self.arguments[name] = values 435 | 436 | def supports_http_1_1(self): 437 | """Returns True if this request supports HTTP/1.1 semantics""" 438 | return self.version == "HTTP/1.1" 439 | 440 | @property 441 | def cookies(self): 442 | """A dictionary of Cookie.Morsel objects.""" 443 | if not hasattr(self, "_cookies"): 444 | self._cookies = Cookie.SimpleCookie() 445 | if "Cookie" in self.headers: 446 | try: 447 | self._cookies.load( 448 | native_str(self.headers["Cookie"])) 449 | except Exception: 450 | self._cookies = {} 451 | return self._cookies 452 | 453 | def write(self, chunk, callback=None): 454 | """Writes the given chunk to the response stream.""" 455 | assert isinstance(chunk, bytes_type) 456 | self.connection.write(chunk, callback=callback) 457 | 458 | def finish(self): 459 | """Finishes this HTTP request on the open connection.""" 460 | self.connection.finish() 461 | self._finish_time = time.time() 462 | 463 | def full_url(self): 464 | """Reconstructs the full URL for this request.""" 465 | return self.protocol + "://" + self.host + self.uri 466 | 467 | def request_time(self): 468 | """Returns the amount of time it took for this request to execute.""" 469 | if self._finish_time is None: 470 | return time.time() - self._start_time 471 | else: 472 | return self._finish_time - self._start_time 473 | 474 | def get_ssl_certificate(self): 475 | """Returns the client's SSL certificate, if any. 476 | 477 | To use client certificates, the HTTPServer must have been constructed 478 | with cert_reqs set in ssl_options, e.g.:: 479 | 480 | server = HTTPServer(app, 481 | ssl_options=dict( 482 | certfile="foo.crt", 483 | keyfile="foo.key", 484 | cert_reqs=ssl.CERT_REQUIRED, 485 | ca_certs="cacert.crt")) 486 | 487 | The return value is a dictionary, see SSLSocket.getpeercert() in 488 | the standard library for more details. 489 | http://docs.python.org/library/ssl.html#sslsocket-objects 490 | """ 491 | try: 492 | return self.connection.stream.socket.getpeercert() 493 | except ssl.SSLError: 494 | return None 495 | 496 | def __repr__(self): 497 | attrs = ("protocol", "host", "method", "uri", "version", "remote_ip") 498 | args = ", ".join(["%s=%r" % (n, getattr(self, n)) for n in attrs]) 499 | return "%s(%s, headers=%s)" % ( 500 | self.__class__.__name__, args, sanitize_headers(self.headers)) 501 | 502 | def _valid_ip(self, ip): 503 | try: 504 | res = socket.getaddrinfo(ip, 0, socket.AF_UNSPEC, 505 | socket.SOCK_STREAM, 506 | 0, socket.AI_NUMERICHOST) 507 | return bool(res) 508 | except socket.gaierror, e: 509 | if e.args[0] == socket.EAI_NONAME: 510 | return False 511 | raise 512 | return True 513 | -------------------------------------------------------------------------------- /zygote/accounting.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import os 3 | import signal 4 | import time 5 | 6 | import zygote.util 7 | from zygote import message 8 | from zygote.util import meminfo_fmt 9 | 10 | log = zygote.util.get_logger('zygote.accounting') 11 | 12 | def format_millis(v): 13 | if v is not None: 14 | return '%1.1f' % (v * 1000.0) 15 | 16 | class Worker(object): 17 | 18 | def __init__(self, pid, time_created=None): 19 | if time_created: 20 | self.time_created = datetime.datetime.fromtimestamp(time_created / 1e6) 21 | else: 22 | self.time_created = datetime.datetime.now() 23 | self.pid = pid 24 | self.vsz = '' 25 | self.rss = '' 26 | self.shr = '' 27 | self.remote_ip = None 28 | self.request_count = 0 29 | self.request_started = None 30 | self.http = None 31 | 32 | def update_meminfo(self): 33 | for k, v in meminfo_fmt(self.pid).iteritems(): 34 | setattr(self, k, v) 35 | 36 | def __eq__(self, other_pid): 37 | return self.pid == other_pid 38 | 39 | def start_request(self, remote_ip, http): 40 | self.remote_ip = remote_ip 41 | self.request_started = time.time() 42 | self.request_count += 1 43 | self.http = http 44 | 45 | def end_request(self): 46 | self.remote_ip = None 47 | self.http = None 48 | self.request_started = None 49 | 50 | def to_dict(self): 51 | d = {'pid': self.pid, 52 | 'vsz': self.vsz, 53 | 'rss': self.rss, 54 | 'shr': self.shr, 55 | 'time_created': self.time_created, 56 | 'remote_ip': self.remote_ip, 57 | 'request_count': self.request_count, 58 | 'http': self.http} 59 | if self.request_started is None: 60 | d['elapsed'] = None 61 | d['elapsed_formatted'] = None 62 | else: 63 | now = time.time() 64 | d['elapsed'] = now - self.request_started 65 | d['elapsed_formatted'] = format_millis(d['elapsed']) 66 | return d 67 | 68 | def request_exit(self): 69 | """Instruct this worker to exit""" 70 | os.kill(self.pid, signal.SIGTERM) 71 | 72 | class Zygote(object): 73 | """Stub representing the zygote from the master side of the fork. Is not 74 | actually the zygote, but sends some commands over the unix domain socket 75 | to the zygote. 76 | 77 | TODO: Move parsing of messages *from* the unix domain socket into this object, 78 | and use a regular callback system 79 | """ 80 | __generation = 0 81 | 82 | def __init__(self, pid, basepath, io_loop, canary=False): 83 | """Initialize using real Zygote's pid, basepath and master's 84 | io_loop. 85 | 86 | Master also marks the zygote as 'canary' at initialization if 87 | it's an update to a newer revision of the source. Only if the 88 | canary is live, master continues on transition workers. 89 | """ 90 | self.basepath = basepath 91 | self.pid = pid 92 | self.worker_map = {} 93 | self.time_created = datetime.datetime.now() 94 | self.vsz = '' 95 | self.rss = '' 96 | self.shr = '' 97 | self.connected = False 98 | self.send_queue = [] 99 | self.write_queue_active = False 100 | self.canary = canary 101 | 102 | # wait until the control_socket can be connected, since it might take a 103 | # moment before the forked child creates their socket. a better way to 104 | # do this would be to have the parent create the control_socket and then 105 | # the child inherits it through forking 106 | self.control_socket = zygote.util.AFUnixSender(io_loop) 107 | self.control_socket.connect('\0zygote_%d' % self.pid) 108 | 109 | self.shutting_down = False 110 | 111 | self.generation = self.__class__.__generation 112 | self.__class__.__generation += 1 113 | 114 | def update_meminfo(self): 115 | for k, v in meminfo_fmt(self.pid).iteritems(): 116 | setattr(self, k, v) 117 | for worker in self.worker_map.itervalues(): 118 | worker.update_meminfo() 119 | 120 | def workers(self): 121 | return self.worker_map.values() 122 | 123 | def add_worker(self, pid, time_created=None): 124 | worker = Worker(pid, time_created) 125 | self.worker_map[pid] = worker 126 | 127 | def remove_worker(self, pid): 128 | try: 129 | del self.worker_map[pid] 130 | except KeyError: 131 | log.warning("Tried to delete unknown worker %d (did worker initialization fail?)", pid) 132 | 133 | def begin_http(self, pid, http): 134 | self.worker_map[pid].http = http 135 | 136 | def end_http(self, pid, http): 137 | self.worker_map[pid].http = None 138 | 139 | def idle_workers(self): 140 | return [w for w in self.worker_map.itervalues() if w.http is None] 141 | 142 | def get_worker(self, pid): 143 | return self.worker_map.get(pid) 144 | 145 | def request_spawn(self): 146 | """Instruct this zygote to spawn a new worker""" 147 | log.debug('requesting spawn on Zygote %d', self.generation) 148 | self.control_socket.send(message.MessageCreateWorker.emit('')) 149 | 150 | def request_kill_workers(self, num_workers_to_kill): 151 | """Instruct this zygote to kill an idle worker""" 152 | self.control_socket.send(message.MessageKillWorkers.emit('%d' % num_workers_to_kill)) 153 | 154 | def request_shut_down(self): 155 | """Instruct this zygote to shut down all workers""" 156 | self.control_socket.send(message.MessageShutDown.emit("")) 157 | self.shutting_down = True 158 | 159 | @property 160 | def worker_count(self): 161 | return len(self.worker_map) 162 | 163 | def to_dict(self): 164 | return { 165 | 'basepath': self.basepath, 166 | 'pid': self.pid, 167 | 'workers': sorted(self.worker_map.values(), key=lambda x: x.time_created), 168 | 'vsz': self.vsz, 169 | 'rss': self.rss, 170 | 'shr': self.shr, 171 | 'time_created': self.time_created, 172 | 'generation': self.generation, 173 | } 174 | 175 | class ZygoteCollection(object): 176 | 177 | def __init__(self): 178 | self.zygote_map = {} 179 | 180 | def add_zygote(self, pid, basepath, io_loop, canary=False): 181 | z = Zygote(pid, basepath, io_loop, canary=canary) 182 | self.zygote_map[pid] = z 183 | return z 184 | 185 | def update_meminfo(self): 186 | for z in self.zygote_map.values(): 187 | z.update_meminfo() 188 | 189 | def remove_zygote(self, pid): 190 | del self.zygote_map[pid] 191 | 192 | def get_worker(self, pid): 193 | for zygote in self.zygote_map.itervalues(): 194 | w = zygote.get_worker(pid) 195 | if w: 196 | return w 197 | return None 198 | 199 | def basepath_to_zygote(self, basepath): 200 | for zygote in self.zygote_map.itervalues(): 201 | if zygote.basepath == basepath: 202 | return zygote 203 | return None 204 | 205 | def __getitem__(self, pid): 206 | # ZygoteMaster requests a zygote using it's pid when it 207 | # recieves a message from the zygote. In certain cases, 208 | # ZygoteMaster can request a zygote that is already 209 | # removed. This can happen if zygote dies and master handles 210 | # the signal accordingly before reading the message from 211 | # socket. 212 | return self.zygote_map.get(pid, None) 213 | 214 | def __iter__(self): 215 | return self.zygote_map.itervalues() 216 | 217 | def other_zygotes(self, target): 218 | return [z for z in self.zygote_map.itervalues() if z != target] 219 | 220 | def to_dict(self): 221 | return {'zygotes': sorted(self.zygote_map.values(), key=lambda x: x.time_created)} 222 | 223 | def pids(self): 224 | return self.zygote_map.keys() 225 | 226 | def worker_count(self): 227 | """Return the total number of workers""" 228 | return sum(len(z.workers()) for z in self.zygote_map.itervalues()) 229 | -------------------------------------------------------------------------------- /zygote/handlers.py: -------------------------------------------------------------------------------- 1 | from __future__ import with_statement 2 | 3 | import datetime 4 | import os 5 | from pkg_resources import resource_filename 6 | import socket 7 | import time 8 | import traceback 9 | 10 | import tornado.httpserver 11 | import tornado.web 12 | import zygote.util 13 | 14 | try: 15 | import simplejson as json 16 | except ImportError: 17 | import json 18 | 19 | class JSONEncoder(json.JSONEncoder): 20 | 21 | def default(self, obj): 22 | if hasattr(obj, 'to_dict'): 23 | return obj.to_dict() 24 | elif type(obj) is datetime.datetime: 25 | return time.mktime(obj.timetuple()) + obj.microsecond / 1e6 26 | else: 27 | return super(JSONEncoder, self).default(obj) 28 | 29 | class RequestHandler(tornado.web.RequestHandler): 30 | 31 | def get_error_html(self, status_code, **kwargs): 32 | if 500 <= status_code <= 599: 33 | self.set_header('Content-Type', 'text/plain') 34 | return traceback.format_exc() 35 | else: 36 | return super(RequestHandler, self).get_error_html(status_code, **kwargs) 37 | 38 | class TemplateHandler(RequestHandler): 39 | 40 | def get(self): 41 | self.set_header('Content-Type', 'text/plain') 42 | self.set_header('Cache-Control', 'max-age=0') 43 | static_path = self.application.settings['static_path'] 44 | with open(os.path.join(static_path, 'template.html')) as template: 45 | self.write(template.read()) 46 | 47 | class HTMLHandler(RequestHandler): 48 | 49 | def get(self): 50 | self.render('home.html') 51 | 52 | class JSONHandler(RequestHandler): 53 | 54 | def get(self): 55 | 56 | self.zygote_master.zygote_collection.update_meminfo() 57 | env = self.zygote_master.zygote_collection.to_dict() 58 | env['hostname'] = socket.gethostname() 59 | env['interface'], env['port'] = self.application.settings['worker_sockname'] 60 | env['pid'] = os.getpid() 61 | env['basepath'] = self.zygote_master.basepath 62 | env['time_created'] = self.zygote_master.time_created 63 | env.update(zygote.util.meminfo_fmt()) 64 | 65 | self.set_header('Content-Type', 'application/json') 66 | self.write(json.dumps(env, cls=JSONEncoder, indent=2)) 67 | 68 | def get_httpserver(io_loop, port, zygote_master, zygote_base=None, ssl_options=None): 69 | if zygote_base is not None: 70 | static_path = os.path.realpath(os.path.join(zygote_base, 'zygote', 'resources', 'static')) 71 | template_path = os.path.realpath(os.path.join(zygote_base, 'zygote', 'resources', 'templates')) 72 | else: 73 | static_path = os.path.realpath(resource_filename('zygote.resources', 'static')) 74 | template_path = os.path.realpath(resource_filename('zygote.resources', 'templates')) 75 | 76 | # We need to ensure that we keep file handles open to the static path 77 | # and template path. If they go away (from some kind of clean up) while 78 | # the app is still running, we won't be able to serve the status page. 79 | # Bad! 80 | # 81 | # TODO: when implementing #24, these FDs will need to be cleaned up 82 | open_fds = [] 83 | open_fds.append(os.open(static_path, os.O_DIRECTORY|os.O_RDONLY)) 84 | open_fds.append(os.open(template_path, os.O_DIRECTORY|os.O_RDONLY)) 85 | 86 | JSONHandler.zygote_master = zygote_master 87 | app = tornado.web.Application([('/', HTMLHandler), 88 | ('/json', JSONHandler), 89 | ('/template', TemplateHandler)], 90 | debug=False, 91 | static_path=static_path, 92 | template_path=template_path) 93 | app.settings['worker_sockname'] = zygote_master.sock.getsockname() 94 | http_server = tornado.httpserver.HTTPServer(app, 95 | io_loop=io_loop, 96 | no_keep_alive=True, 97 | ssl_options=ssl_options, 98 | ) 99 | http_server.listen(port) 100 | return open_fds, http_server 101 | -------------------------------------------------------------------------------- /zygote/main.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # 3 | # Copyright 2011, Yelp Inc. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | import optparse 18 | import os 19 | import sys 20 | 21 | import zygote.master 22 | import zygote.util 23 | 24 | def main(): 25 | usage = 'usage: %prog -b -m -p [module_args...]' 26 | parser = optparse.OptionParser(usage=usage) 27 | parser.add_option('-b', '--basepath', default=os.environ.get('BASEPATH', ''), help='The basepath to use') 28 | parser.add_option('--control-port', type='int', default=5100, help='The control port to listen on') 29 | parser.add_option('-d', '--debug', default=False, action='store_true', help='Enable debugging') 30 | parser.add_option('-n', '--name', default=None, help='The name of the application to set in proctitle, otherwise use the app module.') 31 | parser.add_option('--version', default=None, help='The version of the application to set in proctitle.') 32 | parser.add_option('-m', '--module', default=None, help='The name of the module holding get_application()') 33 | parser.add_option('-p', '--port', type='int', default=0, help='The port to bind on') 34 | parser.add_option('-i', '--interface', default='', help='The interface to bind on') 35 | parser.add_option('--num-workers', type='int', default=8, help='How many workers to run') 36 | parser.add_option('--max-requests', type='int', default=None, help='The maximum number of requests a child can run') 37 | parser.add_option('--zygote-base', default=None, help='The base path to the zygote') 38 | parser.add_option('--cert', default=None, help='Certificate to use for HTTPS traffic') 39 | parser.add_option('--key', default=None, help='Private key for HTTPS traffic') 40 | parser.add_option('--cacerts', default=None, help='File containing a list of root certificates') 41 | parser.add_option( 42 | '--control-socket', 43 | dest='control_socket_path', 44 | default=os.path.join(zygote.util.get_rundir(), "zygote_master.sock"), 45 | help='The socket to control zygote master at run time' 46 | ) 47 | 48 | opts, args = parser.parse_args() 49 | 50 | if not opts.basepath: 51 | parser.error('The `basepath` cannot be empty; specify one with -b or exporting BASEPATH') 52 | sys.exit(1) 53 | if not opts.port: 54 | parser.error('No port was specified') 55 | sys.exit(1) 56 | if not opts.module: 57 | parser.error('You must specify a module argument using -m') 58 | sys.exit(1) 59 | 60 | zygote.master.main(opts, args) 61 | 62 | if __name__ == '__main__': 63 | main() 64 | -------------------------------------------------------------------------------- /zygote/master.py: -------------------------------------------------------------------------------- 1 | import atexit 2 | import datetime 3 | import errno 4 | import fcntl 5 | import logging 6 | import os 7 | import signal 8 | import socket 9 | import struct 10 | import sys 11 | import time 12 | 13 | import tornado.ioloop 14 | 15 | from zygote import accounting 16 | from zygote import handlers 17 | from zygote import message 18 | from zygote.util import close_fds 19 | from zygote.util import safe_kill 20 | from zygote.util import setproctitle 21 | from zygote.util import wait_for_pids 22 | from zygote.util import ZygoteIOLoop 23 | from zygote.util import get_logger 24 | from zygote.util import NullHandler 25 | from zygote.util import LocklessHandler 26 | from zygote.worker import INIT_FAILURE_EXIT_CODE 27 | from zygote.worker import ZygoteWorker 28 | 29 | 30 | try: 31 | import ssl # Python 2.6+ 32 | except ImportError: 33 | ssl = None 34 | 35 | 36 | class ZygoteMaster(object): 37 | 38 | instantiated = False 39 | 40 | RECV_SIZE = 8192 41 | 42 | # number of seconds to wait between polls 43 | POLL_INTERVAL = 1.0 44 | 45 | # how many seconds to wait before sending SIGKILL to children 46 | WAIT_FOR_KILL_TIME = 10.0 47 | 48 | def __init__( 49 | self, 50 | sock, 51 | basepath, 52 | module, 53 | name, 54 | version, 55 | num_workers, 56 | control_port, 57 | control_socket_path, 58 | application_args=None, 59 | max_requests=None, 60 | zygote_base=None, 61 | ssl_options=None, 62 | debug=False 63 | ): 64 | self.logger = get_logger('zygote.master', debug) 65 | if self.__class__.instantiated: 66 | self.logger.error('cannot instantiate zygote master more than once') 67 | sys.exit(1) 68 | self.__class__.instantiated = True 69 | 70 | self.sock = sock 71 | self.basepath = basepath 72 | self.module = module 73 | self.name = name 74 | self.version = version 75 | self.num_workers = num_workers 76 | self.control_port = control_port 77 | self.control_socket_path = control_socket_path 78 | self.application_args = application_args or [] 79 | self.max_requests = max_requests 80 | self.zygote_base = zygote_base 81 | self.ssl_options = ssl_options 82 | self.debug = debug 83 | 84 | self.stopped = False 85 | self.started_transition = None 86 | self.prev_zygote = None 87 | self.current_zygote = None 88 | self.time_created = datetime.datetime.now() 89 | self.io_loop = ZygoteIOLoop(log_name='zygote.master.ioloop') 90 | self.zygote_collection = accounting.ZygoteCollection() 91 | 92 | self.setup_master_socket() 93 | self.setup_control_socket() 94 | 95 | signal.signal(signal.SIGCHLD, self.reap_child) 96 | signal.signal(signal.SIGHUP, self.update_revision) 97 | for sig in (signal.SIGINT, signal.SIGTERM, signal.SIGQUIT): 98 | signal.signal(sig, self.stop) 99 | 100 | self.open_fds, self.status_http_server = handlers.get_httpserver( 101 | self.io_loop, 102 | self.control_port, 103 | self, 104 | zygote_base=self.zygote_base, 105 | ssl_options=self.ssl_options, 106 | ) 107 | 108 | def setup_master_socket(self): 109 | """Create an abstract unix domain socket for master. This 110 | socket will be used to receive messages from zygotes and their 111 | children. 112 | """ 113 | self.logger.debug("Binding to master domain socket") 114 | self.master_socket = socket.socket(socket.AF_UNIX, socket.SOCK_DGRAM, 0) 115 | self.master_socket.bind('\0zygote_%d' % os.getpid()) 116 | self.io_loop.add_handler(self.master_socket.fileno(), self.handle_protocol_msg, self.io_loop.READ) 117 | 118 | def setup_control_socket(self): 119 | try: 120 | socket_path = self.control_socket_path 121 | if os.path.exists(socket_path): 122 | # NOTE: Starting the same application twice we won't get 123 | # here since main() won't be able to bind. We can add a 124 | # (ex|nb) file lock if needed. 125 | self.logger.error("Control socket exitsts %s. Probably from a previous run. Removing...", socket_path) 126 | self.cleanup_control_socket() 127 | self.logger.debug("Binding to control socket %s", socket_path) 128 | self.control_socket = socket.socket(socket.AF_UNIX, socket.SOCK_DGRAM, 0) 129 | self.control_socket.bind(socket_path) 130 | self.io_loop.add_handler(self.control_socket.fileno(), self.handle_control_msg, self.io_loop.READ) 131 | except Exception, e: 132 | # This is treated as a fatal error as only way to recover 133 | # from this is to restart zygote master and it's not what 134 | # we want. 135 | self.logger.error("Can not bind to control socket: %s", e) 136 | self.logger.error("Control socket is needed to make configuration changes on the running zygote master.") 137 | sys.exit(1) 138 | 139 | def cleanup_control_socket(self): 140 | if os.path.exists(self.control_socket_path): 141 | self.logger.debug("Removing control socket at %s" % self.control_socket_path) 142 | os.unlink(self.control_socket_path) 143 | 144 | def handle_control_msg(self, fd, events): 145 | assert fd == self.control_socket.fileno() 146 | data = self.control_socket.recv(self.RECV_SIZE) 147 | msg = message.ControlMessage.parse(data) 148 | msg_type = type(msg) 149 | 150 | # NOTE: We can possibly use SO_PEERCRED on control socket to 151 | # get more information about the client. 152 | self.logger.info('received message of type %s', msg_type.__name__,) 153 | 154 | if msg_type is message.ControlMessageScaleWorkers: 155 | self.scale_workers(msg.num_workers) 156 | 157 | def handle_protocol_msg(self, fd, events): 158 | """Callback for messages received on the master_socket""" 159 | assert fd == self.master_socket.fileno() 160 | data = self.master_socket.recv(self.RECV_SIZE) 161 | msg = message.Message.parse(data) 162 | msg_type = type(msg) 163 | self.logger.debug('received message of type %s from pid %d', msg_type.__name__, msg.pid) 164 | 165 | if msg_type is message.MessageCanaryInit: 166 | self.logger.info("Canary zygote initialized. Transitioning idle workers.") 167 | # This is not the canary zygote anymore 168 | self.current_zygote.canary = False 169 | # We can also release the handle on the previous 170 | # zygote. It is already in the zygote_collection for 171 | # accounting purposses, but we won't need to keep track of 172 | # it anymore. 173 | self.prev_zygote = None 174 | # Canary initialization was successful, we can now transition workers 175 | self.io_loop.add_callback(self.transition_idle_workers) 176 | elif msg_type is message.MessageWorkerStart: 177 | # a new worker was spawned by one of our zygotes; add it to 178 | # zygote_collection, and note the time created and the zygote parent 179 | zygote = self.zygote_collection[msg.worker_ppid] 180 | if zygote: 181 | zygote.add_worker(msg.pid, msg.time_created) 182 | elif msg_type is message.MessageWorkerExitInitFail: 183 | if not self.current_zygote.canary: 184 | self.logger.error("A worker initialization failed, giving up") 185 | self.stop() 186 | return 187 | elif msg_type is message.MessageWorkerExit: 188 | # a worker exited. tell the current/active zygote to spawn a new 189 | # child. if this was the last child of a different (non-current) 190 | # zygote, kill that zygote 191 | zygote = self.zygote_collection[msg.pid] 192 | if not zygote: 193 | return 194 | 195 | zygote.remove_worker(msg.child_pid) 196 | if zygote.shutting_down: 197 | self.logger.debug('Removed a worker from shutting down zygote %d, %d left', msg.pid, len(zygote.workers())) 198 | return 199 | else: 200 | self.logger.debug('Removed a worker from zygote %d, %d left', msg.pid, len(zygote.workers())) 201 | 202 | if not self.stopped: 203 | if zygote in (self.current_zygote, self.prev_zygote): 204 | if self.num_workers > zygote.worker_count: 205 | # Only start a new if we're below quota. This 206 | # is how we scale down the number of workers. 207 | zygote.request_spawn() 208 | else: 209 | # Not a zygote that we care about. Request shutdown. 210 | zygote.request_shut_down() 211 | elif msg_type is message.MessageHTTPBegin: 212 | # a worker started servicing an HTTP request 213 | worker = self.zygote_collection.get_worker(msg.pid) 214 | if worker: 215 | worker.start_request(msg.remote_ip, msg.http_line) 216 | elif msg_type is message.MessageHTTPEnd: 217 | # a worker finished servicing an HTTP request 218 | worker = self.zygote_collection.get_worker(msg.pid) 219 | if worker: 220 | worker.end_request() 221 | if self.max_requests is not None and worker.request_count >= self.max_requests: 222 | self.logger.info('Worker %d reached max_requests %d, killing it', worker.pid, self.max_requests) 223 | safe_kill(worker.pid, signal.SIGQUIT) 224 | else: 225 | self.logger.warning('master got unexpected message of type %s', msg_type) 226 | 227 | 228 | def scale_workers(self, num_workers): 229 | prev_num_workers = self.num_workers 230 | diff_num_workers = num_workers - self.num_workers 231 | self.num_workers = num_workers 232 | if not diff_num_workers: 233 | return 234 | elif diff_num_workers > 0: 235 | self.logger.info('Increasing number of workers from %d to %d.', prev_num_workers, num_workers) 236 | for _ in range(diff_num_workers): 237 | self.current_zygote.request_spawn() 238 | else: 239 | self.logger.info('Reducing number of workers from %d to %d.', prev_num_workers, num_workers) 240 | self.current_zygote.request_kill_workers(-diff_num_workers) 241 | 242 | def reap_child(self, signum, frame): 243 | """Signal handler for SIGCHLD. Reaps children and updates 244 | self.zygote_collection. 245 | """ 246 | assert signum == signal.SIGCHLD 247 | while True: 248 | try: 249 | # The Zygotes are in their own process group, so need to 250 | # call waitpid() with -1 instead of 0. See waitpid(2). 251 | pid, status = os.waitpid(-1, os.WNOHANG) 252 | except OSError, e: 253 | if e.errno == errno.ECHILD: 254 | break 255 | elif e.errno == errno.EINTR: 256 | continue 257 | raise 258 | if pid == 0: 259 | break 260 | 261 | status_code = os.WEXITSTATUS(status) 262 | self.logger.info('zygote %d exited with status %d', pid, status_code) 263 | 264 | # the zygote died. if the zygote was not the current zygote it's OK; 265 | # otherwise, we need to start a new one 266 | try: 267 | self.zygote_collection.remove_zygote(pid) 268 | except KeyError: 269 | pass 270 | 271 | if status_code == INIT_FAILURE_EXIT_CODE: 272 | if pid == self.current_zygote.pid and self.current_zygote.canary: 273 | if self.prev_zygote: 274 | self.curent_zygote = self.prev_zygote 275 | self.logger.error("Could not initialize canary worker. Giving up trying to respawn") 276 | else: 277 | self.logger.error("Could not initialize zygote worker, giving up") 278 | self.really_stop() 279 | return 280 | 281 | if not self.stopped: 282 | active_zygote = self.current_zygote 283 | 284 | if pid == self.current_zygote.pid: 285 | self.current_zygote = self.create_zygote() 286 | active_zygote = self.current_zygote 287 | elif self.prev_zygote and pid == self.prev_zygote.pid: 288 | self.prev_zygote = self.create_zygote() 289 | active_zygote = self.prev_zygote 290 | 291 | # we may need to create new workers for the active zygote... this 292 | # is a bit racy, although that seems to be pretty unlikely in 293 | # practice 294 | workers_needed = self.num_workers - self.zygote_collection.worker_count() 295 | for x in xrange(workers_needed): 296 | active_zygote.request_spawn() 297 | 298 | elif len(self.zygote_collection.zygote_map.values()) == 0: 299 | self.really_stop() 300 | 301 | def stop(self, signum=None, frame=None): 302 | """ 303 | Stop the zygote master. Steps: 304 | * Ask all zygotes to kill and wait on their children 305 | * Wait for zygotes to exit 306 | * Kill anything left over if necessary 307 | """ 308 | if self.stopped: 309 | return 310 | # kill all of the workers 311 | self.logger.info('stopping all zygotes and workers') 312 | pids = set() 313 | for zygote in self.zygote_collection: 314 | pids.add(zygote.pid) 315 | self.logger.debug('requesting shutdown on %d', zygote.pid) 316 | zygote.request_shut_down() 317 | 318 | self.logger.debug('setting self.stopped') 319 | self.stopped = True 320 | 321 | self.logger.debug('master is stopping. will not try to update anymore.') 322 | signal.signal(signal.SIGHUP, signal.SIG_IGN) 323 | 324 | self.logger.debug('stopping io_loop.') 325 | if getattr(self, 'io_loop', None) is not None: 326 | self.io_loop.stop() 327 | 328 | self.logger.info('waiting for workers to exit before stoping master.') 329 | wait_for_pids(pids, self.WAIT_FOR_KILL_TIME, self.logger, kill_pgroup=True) 330 | self.logger.info('all zygotes exited; good night') 331 | 332 | self.really_stop(0) 333 | 334 | def really_stop(self, status=0): 335 | self.cleanup_control_socket() 336 | sys.exit(status) 337 | 338 | def transition_idle_workers(self): 339 | """Transition idle HTTP workers from old zygotes to the current 340 | zygote. 341 | """ 342 | if not self.started_transition: 343 | self.started_transition = time.time() 344 | if (time.time() - self.started_transition) > self.WAIT_FOR_KILL_TIME: 345 | self.logger.debug("sending SIGKILL for transition because it was Too Damn Slow") 346 | sig = signal.SIGKILL 347 | else: 348 | sig = signal.SIGQUIT 349 | 350 | other_zygotes = self.zygote_collection.other_zygotes(self.current_zygote) 351 | if self.current_zygote.canary and self.prev_zygote: 352 | if self.prev_zygote in other_zygotes: 353 | other_zygotes.remove(self.prev_zygote) 354 | 355 | kill_count = 0 356 | other_zygote_count = len(other_zygotes) 357 | for zygote in other_zygotes: 358 | for worker in zygote.idle_workers(): 359 | self.logger.debug("killing worker %d with signal %d", worker.pid, sig) 360 | if safe_kill(worker.pid, sig): 361 | kill_count += 1 362 | self.logger.info('Attempted to transition %d workers from %d zygotes', kill_count, other_zygote_count) 363 | 364 | if other_zygote_count: 365 | # The list of other zygotes was at least one, so we should 366 | # reschedule another call to transition_idle_workers. When a zygote 367 | # runs out of worker children, the handle_protocol_msg function will 368 | # notice this fact when it receives the final MessageWorkerExit, and 369 | # at that time it will kill the worker, which is how this timeout 370 | # loop gets ended. 371 | self.io_loop.add_timeout(time.time() + self.POLL_INTERVAL, self.transition_idle_workers) 372 | else: 373 | self.started_transition = None 374 | 375 | # Cleanup empty zygotes for the next iteration of the transition. 376 | for zygote in other_zygotes: 377 | if zygote.worker_count == 0: 378 | self.kill_empty_zygote(zygote, sig) 379 | 380 | def kill_empty_zygote(self, zygote, sig=signal.SIGQUIT): 381 | """Send zygote SIGQUIT if it has zero workers. """ 382 | # The only valid time to kill a zygote is if it doesn't have 383 | # any workers left. 384 | if zygote.worker_count == 0: 385 | self.logger.info("killing zygote with pid %d" % zygote.pid) 386 | safe_kill(zygote.pid, sig) 387 | 388 | def update_revision(self, signum=None, frame=None): 389 | """The SIGHUP handler, calls create_zygote and possibly initiates the 390 | transition of idle workers. 391 | 392 | This preserves the current zygote and initializes a "canary" 393 | zygote as the current one. 394 | """ 395 | self.prev_zygote = self.current_zygote 396 | self.current_zygote = self.create_zygote(canary=True) 397 | 398 | def create_zygote(self, canary=False): 399 | """"Create a new zygote""" 400 | # read the basepath symlink 401 | realbase = os.path.realpath(self.basepath) 402 | 403 | pid = os.fork() 404 | if pid: 405 | self.logger.info('started zygote %d pointed at base %r', pid, realbase) 406 | z = self.zygote_collection.add_zygote(pid, realbase, self.io_loop, canary=canary) 407 | if not canary: 408 | self.io_loop.add_callback(self.transition_idle_workers) 409 | return z 410 | else: 411 | # Try to clean up some of the file descriptors and whatnot that 412 | # exist in the parent before continuing. Strictly speaking, this 413 | # isn't necessary, but it seems good to remove these resources 414 | # if they're not needed in the child. 415 | del self.io_loop 416 | close_fds(self.sock.fileno()) 417 | signal.signal(signal.SIGHUP, signal.SIG_DFL) 418 | 419 | # Make the zygote a process group leader 420 | os.setpgid(os.getpid(), os.getpid()) 421 | # create the zygote 422 | z = ZygoteWorker( 423 | sock=self.sock, 424 | basepath=realbase, 425 | module=self.module, 426 | name=self.name, 427 | version=self.version, 428 | args=self.application_args, 429 | ssl_options=self.ssl_options, 430 | canary=canary, 431 | debug=self.debug 432 | ) 433 | z.loop() 434 | 435 | def start(self): 436 | self.current_zygote = self.create_zygote() 437 | for x in xrange(self.num_workers): 438 | self.current_zygote.request_spawn() 439 | self.io_loop.start() 440 | 441 | def main(opts, extra_args): 442 | setproctitle('zygote master %s' % (opts.name or opts.module,)) 443 | zygote_logger = get_logger('zygote', opts.debug) 444 | 445 | if not logging.root.handlers: 446 | # XXX: WARNING 447 | # 448 | # We're disabling the root logger. Tornado's RequestHandler ONLY 449 | # supports logging uncaught errors to the root logger. This will end 450 | # poorly for you! 451 | # 452 | # We should probably provide a RequestHandler subclass that has 453 | # _handle_request_exception overridden to do something useful. 454 | # That might be hard to do without adding a tight version dependency 455 | # on tornado. 456 | logging.root.addHandler(NullHandler()) 457 | 458 | if opts.debug: 459 | logging.root.setLevel(logging.DEBUG) 460 | else: 461 | logging.root.setLevel(logging.INFO) 462 | 463 | zygote_logger.info('main started') 464 | 465 | # Create the TCP listen socket 466 | sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) 467 | flags = fcntl.fcntl(sock.fileno(), fcntl.F_GETFD) 468 | flags |= fcntl.FD_CLOEXEC 469 | fcntl.fcntl(sock.fileno(), fcntl.F_SETFD, flags) 470 | sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) 471 | sock.setblocking(0) 472 | sock.bind((opts.interface, opts.port)) 473 | sock.listen(128) 474 | 475 | ssl_options=None 476 | if opts.cert: 477 | ssl_options = dict( 478 | certfile=opts.cert, 479 | keyfile=opts.key, 480 | ca_certs=opts.cacerts, 481 | cert_reqs=ssl.CERT_OPTIONAL if opts.cacerts else ssl.CERT_NONE, 482 | ) 483 | zygote_logger.info('using SSL with %s', ssl_options) 484 | 485 | sock = ssl.wrap_socket(sock, 486 | server_side=True, 487 | do_handshake_on_connect=False, 488 | **ssl_options 489 | ) 490 | 491 | master = ZygoteMaster( 492 | sock, 493 | basepath=opts.basepath, 494 | module=opts.module, 495 | name=opts.name or opts.module, 496 | version=opts.version, 497 | num_workers=opts.num_workers, 498 | control_port=opts.control_port, 499 | control_socket_path=opts.control_socket_path, 500 | application_args=extra_args, 501 | max_requests=opts.max_requests, 502 | zygote_base=opts.zygote_base, 503 | ssl_options=ssl_options, 504 | debug=opts.debug 505 | ) 506 | atexit.register(master.stop) 507 | master.start() 508 | -------------------------------------------------------------------------------- /zygote/message.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | class Message(object): 4 | 5 | CANARY_INIT = 'I' 6 | 7 | CREATE_WORKER = 'C' 8 | KILL_WORKERS = 'L' 9 | SHUT_DOWN = 'K' 10 | 11 | WORKER_START = 'S' 12 | WORKER_EXIT = 'X' 13 | WORKER_EXIT_INIT_FAIL = 'Y' 14 | 15 | HTTP_BEGIN = 'B' 16 | HTTP_END = 'E' 17 | 18 | @classmethod 19 | def emit(cls, body): 20 | return '%d %s %s' % (os.getpid(), cls.msg_type, body) 21 | 22 | @classmethod 23 | def parse(cls, msg): 24 | pid, type, body = msg.split(' ', 2) 25 | pid = int(pid) 26 | if type == cls.CREATE_WORKER: 27 | return MessageCreateWorker(pid, body) 28 | elif type == cls.KILL_WORKERS: 29 | return MessageKillWorkers(pid, body) 30 | elif type == cls.CANARY_INIT: 31 | return MessageCanaryInit(pid, body) 32 | elif type == cls.WORKER_START: 33 | return MessageWorkerStart(pid, body) 34 | elif type == cls.WORKER_EXIT: 35 | return MessageWorkerExit(pid, body) 36 | elif type == cls.HTTP_BEGIN: 37 | return MessageHTTPBegin(pid, body) 38 | elif type == cls.HTTP_END: 39 | return MessageHTTPEnd(pid, body) 40 | elif type == cls.WORKER_EXIT_INIT_FAIL: 41 | return MessageWorkerExitInitFail(pid, body) 42 | elif type == cls.SHUT_DOWN: 43 | return MessageShutDown(pid, body) 44 | else: 45 | assert False 46 | 47 | def __init__(self, pid): 48 | self.pid = int(pid) 49 | 50 | class MessageCanaryInit(Message): 51 | 52 | msg_type = Message.CANARY_INIT 53 | 54 | def __init__(self, pid, body): 55 | assert body == '' 56 | super(MessageCanaryInit, self).__init__(pid) 57 | 58 | class MessageCreateWorker(Message): 59 | 60 | msg_type = Message.CREATE_WORKER 61 | 62 | def __init__(self, pid, body): 63 | assert body == '' 64 | super(MessageCreateWorker, self).__init__(pid) 65 | 66 | class MessageKillWorkers(Message): 67 | 68 | msg_type = Message.KILL_WORKERS 69 | 70 | def __init__(self, pid, body): 71 | super(MessageKillWorkers, self).__init__(pid) 72 | self.num_workers_to_kill = int(body) 73 | 74 | class MessageWorkerStart(Message): 75 | 76 | msg_type = Message.WORKER_START 77 | 78 | def __init__(self, pid, body): 79 | super(MessageWorkerStart, self).__init__(pid) 80 | created, ppid = body.split(' ') 81 | self.time_created = int(created) 82 | self.worker_ppid = int(ppid) 83 | 84 | class MessageWorkerExit(Message): 85 | 86 | msg_type = Message.WORKER_EXIT 87 | 88 | def __init__(self, pid, body): 89 | super(MessageWorkerExit, self).__init__(pid) 90 | child_pid, status = body.split() 91 | self.payload = body 92 | self.child_pid = int(child_pid) 93 | self.status = int(status) 94 | 95 | class MessageWorkerExitInitFail(Message): 96 | 97 | msg_type = Message.WORKER_EXIT_INIT_FAIL 98 | 99 | def __init__(self, pid, body): 100 | super(MessageWorkerExitInitFail, self).__init__(pid) 101 | child_pid, status = body.split() 102 | self.payload = body 103 | self.child_pid = int(child_pid) 104 | self.status = int(status) 105 | 106 | class MessageHTTPBegin(Message): 107 | 108 | msg_type = Message.HTTP_BEGIN 109 | 110 | def __init__(self, pid, body): 111 | super(MessageHTTPBegin, self).__init__(pid) 112 | self.remote_ip, self.http_line = body.split(' ', 1) 113 | 114 | class MessageHTTPEnd(Message): 115 | 116 | msg_type = Message.HTTP_END 117 | 118 | def __init__(self, pid, body): 119 | super(MessageHTTPEnd, self).__init__(pid) 120 | assert body == '' # just ignore the body, it should be empty 121 | 122 | class MessageShutDown(Message): 123 | 124 | msg_type = Message.SHUT_DOWN 125 | 126 | def __init__(self, pid, body): 127 | super(MessageShutDown, self).__init__(pid) 128 | assert body == '' # just ignore the body, it should be empty 129 | 130 | 131 | class ControlMessage(object): 132 | """Control Message type used by zygote master""" 133 | 134 | UNKNOWN = 'I' 135 | SCALE_WORKERS = 'S' 136 | 137 | @classmethod 138 | def emit(cls, body): 139 | return '%s %s' % (cls.msg_type, body) 140 | 141 | @classmethod 142 | def parse(cls, msg): 143 | type, body = msg.split(' ', 1) 144 | if type == cls.SCALE_WORKERS: 145 | return ControlMessageScaleWorkers(body) 146 | else: 147 | return ControlMessageUnknown(body) 148 | 149 | class ControlMessageUnknown(ControlMessage): 150 | 151 | msg_type = ControlMessage.UNKNOWN 152 | 153 | def __init__(self, body): 154 | super(ControlMessageUnknown, self).__init__() 155 | 156 | class ControlMessageScaleWorkers(ControlMessage): 157 | 158 | msg_type = ControlMessage.SCALE_WORKERS 159 | 160 | def __init__(self, body): 161 | super(ControlMessageScaleWorkers, self).__init__() 162 | self.num_workers = int(body) 163 | -------------------------------------------------------------------------------- /zygote/resources/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Yelp/zygote/9798a4a66747737888950dc923ffc890f2831818/zygote/resources/__init__.py -------------------------------------------------------------------------------- /zygote/resources/static/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Yelp/zygote/9798a4a66747737888950dc923ffc890f2831818/zygote/resources/static/__init__.py -------------------------------------------------------------------------------- /zygote/resources/static/base.css: -------------------------------------------------------------------------------- 1 | @import url(http://fonts.googleapis.com/css?family=Droid+Sans+Mono); 2 | html { font-family: 'Droid Sans Mono', monospace; color: #eeeeec; background-color: #1f1f1f; } 3 | body { margin: 1em; } 4 | 5 | dt { color: #babafa; display: inline; } 6 | dd { display: inline; } 7 | 8 | th { font-weight: bold; padding-right: 1em; color: #70e691} 9 | td { padding-right: 1em; } 10 | 11 | h1 { color: #fc9c54; font-weight: bold; } 12 | h2 { color: #f9e181; font-weight: bold; } 13 | h3 { color: #f9e181; font-weight: normal; } 14 | 15 | a { color: #babafa; } 16 | a:visited { color: #babafa; } 17 | 18 | th { padding-right: 1.5em; } 19 | td { padding-right: 1.5em; } 20 | 21 | .right { text-align: right; } 22 | 23 | .ft { margin-top: 1em; } 24 | .vgap { margin-top: 1em; } 25 | 26 | tr:nth-child(even) { background-color: #2e2e2e; } 27 | -------------------------------------------------------------------------------- /zygote/resources/static/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Yelp/zygote/9798a4a66747737888950dc923ffc890f2831818/zygote/resources/static/favicon.ico -------------------------------------------------------------------------------- /zygote/resources/static/main.js: -------------------------------------------------------------------------------- 1 | var format10 = function (d) { 2 | if (d < 10) 3 | return "0" + d; 4 | else 5 | return d; 6 | } 7 | 8 | var mungeTimes = function (obj) { 9 | if (!obj) { 10 | return obj; 11 | } else if (typeof obj === 'object') { 12 | if (obj.hasOwnProperty('length')) { // overly simplistic array check 13 | var a = new Array(obj.length); 14 | for (var i = 0; i < obj.length; i++) { 15 | a[i] = mungeTimes(obj[i]); 16 | } 17 | return a; 18 | } else { 19 | var o = {}; 20 | for (var prop in obj) { 21 | if (obj.hasOwnProperty(prop)) { 22 | if (prop.match(/time_/)) { 23 | if (typeof obj[prop] === 'number') { 24 | var d = new Date(obj[prop] * 1000); 25 | var s = d.getFullYear() + "-" + format10(d.getMonth() + 1) + "-" + format10(d.getDate()) + " " + format10(d.getHours()) + ":" + format10(d.getMinutes()) + ":" + format10(d.getSeconds());a 26 | o[prop] = obj[prop] 27 | o[prop + "_fmt"] = s; 28 | } else { 29 | o[prop] = obj[prop] 30 | o[prop + "_fmt"] = null; 31 | } 32 | } else { 33 | o[prop] = mungeTimes(obj[prop]) 34 | } 35 | } 36 | } 37 | return o; 38 | } 39 | } else { 40 | return obj; 41 | } 42 | } 43 | 44 | $(document).ready(function () { 45 | var template = null; 46 | var vars = null; 47 | 48 | var render = function () { 49 | $('#content').html(Mustache.to_html(template, vars)); 50 | }; 51 | 52 | var updateVars = function () { 53 | $.get('/json', function (data) { 54 | lastRefreshed = new Date(); 55 | vars = mungeTimes(data); 56 | if (template) { 57 | render(); 58 | } 59 | }); 60 | }; 61 | 62 | /* update template */ 63 | $.get('/template', function (data) { 64 | template = data; 65 | if (vars) { 66 | render(); 67 | } 68 | }); 69 | 70 | updateVars(); 71 | setInterval(updateVars, 1000); 72 | }); 73 | -------------------------------------------------------------------------------- /zygote/resources/static/mustache.js: -------------------------------------------------------------------------------- 1 | /* 2 | mustache.js — Logic-less templates in JavaScript 3 | 4 | See http://mustache.github.com/ for more info. 5 | */ 6 | 7 | var Mustache = function() { 8 | var Renderer = function() {}; 9 | 10 | Renderer.prototype = { 11 | otag: "{{", 12 | ctag: "}}", 13 | pragmas: {}, 14 | buffer: [], 15 | pragmas_implemented: { 16 | "IMPLICIT-ITERATOR": true 17 | }, 18 | context: {}, 19 | 20 | render: function(template, context, partials, in_recursion) { 21 | // reset buffer & set context 22 | if(!in_recursion) { 23 | this.context = context; 24 | this.buffer = []; // TODO: make this non-lazy 25 | } 26 | 27 | // fail fast 28 | if(!this.includes("", template)) { 29 | if(in_recursion) { 30 | return template; 31 | } else { 32 | this.send(template); 33 | return; 34 | } 35 | } 36 | 37 | template = this.render_pragmas(template); 38 | var html = this.render_section(template, context, partials); 39 | if(in_recursion) { 40 | return this.render_tags(html, context, partials, in_recursion); 41 | } 42 | 43 | this.render_tags(html, context, partials, in_recursion); 44 | }, 45 | 46 | /* 47 | Sends parsed lines 48 | */ 49 | send: function(line) { 50 | if(line != "") { 51 | this.buffer.push(line); 52 | } 53 | }, 54 | 55 | /* 56 | Looks for %PRAGMAS 57 | */ 58 | render_pragmas: function(template) { 59 | // no pragmas 60 | if(!this.includes("%", template)) { 61 | return template; 62 | } 63 | 64 | var that = this; 65 | var regex = new RegExp(this.otag + "%([\\w-]+) ?([\\w]+=[\\w]+)?" + 66 | this.ctag); 67 | return template.replace(regex, function(match, pragma, options) { 68 | if(!that.pragmas_implemented[pragma]) { 69 | throw({message: 70 | "This implementation of mustache doesn't understand the '" + 71 | pragma + "' pragma"}); 72 | } 73 | that.pragmas[pragma] = {}; 74 | if(options) { 75 | var opts = options.split("="); 76 | that.pragmas[pragma][opts[0]] = opts[1]; 77 | } 78 | return ""; 79 | // ignore unknown pragmas silently 80 | }); 81 | }, 82 | 83 | /* 84 | Tries to find a partial in the curent scope and render it 85 | */ 86 | render_partial: function(name, context, partials) { 87 | name = this.trim(name); 88 | if(!partials || partials[name] === undefined) { 89 | throw({message: "unknown_partial '" + name + "'"}); 90 | } 91 | if(typeof(context[name]) != "object") { 92 | return this.render(partials[name], context, partials, true); 93 | } 94 | return this.render(partials[name], context[name], partials, true); 95 | }, 96 | 97 | /* 98 | Renders inverted (^) and normal (#) sections 99 | */ 100 | render_section: function(template, context, partials) { 101 | if(!this.includes("#", template) && !this.includes("^", template)) { 102 | return template; 103 | } 104 | 105 | var that = this; 106 | // CSW - Added "+?" so it finds the tighest bound, not the widest 107 | var regex = new RegExp(this.otag + "(\\^|\\#)\\s*(.+)\\s*" + this.ctag + 108 | "\n*([\\s\\S]+?)" + this.otag + "\\/\\s*\\2\\s*" + this.ctag + 109 | "\\s*", "mg"); 110 | 111 | // for each {{#foo}}{{/foo}} section do... 112 | return template.replace(regex, function(match, type, name, content) { 113 | var value = that.find(name, context); 114 | if(type == "^") { // inverted section 115 | if(!value || that.is_array(value) && value.length === 0) { 116 | // false or empty list, render it 117 | return that.render(content, context, partials, true); 118 | } else { 119 | return ""; 120 | } 121 | } else if(type == "#") { // normal section 122 | if(that.is_array(value)) { // Enumerable, Let's loop! 123 | return that.map(value, function(row) { 124 | return that.render(content, that.create_context(row), 125 | partials, true); 126 | }).join(""); 127 | } else if(that.is_object(value)) { // Object, Use it as subcontext! 128 | return that.render(content, that.create_context(value), 129 | partials, true); 130 | } else if(typeof value === "function") { 131 | // higher order section 132 | return value.call(context, content, function(text) { 133 | return that.render(text, context, partials, true); 134 | }); 135 | } else if(value) { // boolean section 136 | return that.render(content, context, partials, true); 137 | } else { 138 | return ""; 139 | } 140 | } 141 | }); 142 | }, 143 | 144 | /* 145 | Replace {{foo}} and friends with values from our view 146 | */ 147 | render_tags: function(template, context, partials, in_recursion) { 148 | // tit for tat 149 | var that = this; 150 | 151 | var new_regex = function() { 152 | return new RegExp(that.otag + "(=|!|>|\\{|%)?([^\\/#\\^]+?)\\1?" + 153 | that.ctag + "+", "g"); 154 | }; 155 | 156 | var regex = new_regex(); 157 | var tag_replace_callback = function(match, operator, name) { 158 | switch(operator) { 159 | case "!": // ignore comments 160 | return ""; 161 | case "=": // set new delimiters, rebuild the replace regexp 162 | that.set_delimiters(name); 163 | regex = new_regex(); 164 | return ""; 165 | case ">": // render partial 166 | return that.render_partial(name, context, partials); 167 | case "{": // the triple mustache is unescaped 168 | return that.find(name, context); 169 | default: // escape the value 170 | return that.escape(that.find(name, context)); 171 | } 172 | }; 173 | var lines = template.split("\n"); 174 | for(var i = 0; i < lines.length; i++) { 175 | lines[i] = lines[i].replace(regex, tag_replace_callback, this); 176 | if(!in_recursion) { 177 | this.send(lines[i]); 178 | } 179 | } 180 | 181 | if(in_recursion) { 182 | return lines.join("\n"); 183 | } 184 | }, 185 | 186 | set_delimiters: function(delimiters) { 187 | var dels = delimiters.split(" "); 188 | this.otag = this.escape_regex(dels[0]); 189 | this.ctag = this.escape_regex(dels[1]); 190 | }, 191 | 192 | escape_regex: function(text) { 193 | // thank you Simon Willison 194 | if(!arguments.callee.sRE) { 195 | var specials = [ 196 | '/', '.', '*', '+', '?', '|', 197 | '(', ')', '[', ']', '{', '}', '\\' 198 | ]; 199 | arguments.callee.sRE = new RegExp( 200 | '(\\' + specials.join('|\\') + ')', 'g' 201 | ); 202 | } 203 | return text.replace(arguments.callee.sRE, '\\$1'); 204 | }, 205 | 206 | /* 207 | find `name` in current `context`. That is find me a value 208 | from the view object 209 | */ 210 | find: function(name, context) { 211 | name = this.trim(name); 212 | 213 | // Checks whether a value is thruthy or false or 0 214 | function is_kinda_truthy(bool) { 215 | return bool === false || bool === 0 || bool; 216 | } 217 | 218 | var value; 219 | if(is_kinda_truthy(context[name])) { 220 | value = context[name]; 221 | } else if(is_kinda_truthy(this.context[name])) { 222 | value = this.context[name]; 223 | } 224 | 225 | if(typeof value === "function") { 226 | return value.apply(context); 227 | } 228 | if(value !== undefined) { 229 | return value; 230 | } 231 | // silently ignore unkown variables 232 | return ""; 233 | }, 234 | 235 | // Utility methods 236 | 237 | /* includes tag */ 238 | includes: function(needle, haystack) { 239 | return haystack.indexOf(this.otag + needle) != -1; 240 | }, 241 | 242 | /* 243 | Does away with nasty characters 244 | */ 245 | escape: function(s) { 246 | s = String(s === null ? "" : s); 247 | return s.replace(/&(?!\w+;)|["'<>\\]/g, function(s) { 248 | switch(s) { 249 | case "&": return "&"; 250 | case "\\": return "\\\\"; 251 | case '"': return '"'; 252 | case "'": return '''; 253 | case "<": return "<"; 254 | case ">": return ">"; 255 | default: return s; 256 | } 257 | }); 258 | }, 259 | 260 | // by @langalex, support for arrays of strings 261 | create_context: function(_context) { 262 | if(this.is_object(_context)) { 263 | return _context; 264 | } else { 265 | var iterator = "."; 266 | if(this.pragmas["IMPLICIT-ITERATOR"]) { 267 | iterator = this.pragmas["IMPLICIT-ITERATOR"].iterator; 268 | } 269 | var ctx = {}; 270 | ctx[iterator] = _context; 271 | return ctx; 272 | } 273 | }, 274 | 275 | is_object: function(a) { 276 | return a && typeof a == "object"; 277 | }, 278 | 279 | is_array: function(a) { 280 | return Object.prototype.toString.call(a) === '[object Array]'; 281 | }, 282 | 283 | /* 284 | Gets rid of leading and trailing whitespace 285 | */ 286 | trim: function(s) { 287 | return s.replace(/^\s*|\s*$/g, ""); 288 | }, 289 | 290 | /* 291 | Why, why, why? Because IE. Cry, cry cry. 292 | */ 293 | map: function(array, fn) { 294 | if (typeof array.map == "function") { 295 | return array.map(fn); 296 | } else { 297 | var r = []; 298 | var l = array.length; 299 | for(var i = 0; i < l; i++) { 300 | r.push(fn(array[i])); 301 | } 302 | return r; 303 | } 304 | } 305 | }; 306 | 307 | return({ 308 | name: "mustache.js", 309 | version: "0.3.1-dev", 310 | 311 | /* 312 | Turns a template and view into HTML 313 | */ 314 | to_html: function(template, view, partials, send_fun) { 315 | var renderer = new Renderer(); 316 | if(send_fun) { 317 | renderer.send = send_fun; 318 | } 319 | renderer.render(template, view, partials); 320 | if(!send_fun) { 321 | return renderer.buffer.join("\n"); 322 | } 323 | } 324 | }); 325 | }(); 326 | -------------------------------------------------------------------------------- /zygote/resources/static/reset-min.css: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) 2010, Yahoo! Inc. All rights reserved. 3 | Code licensed under the BSD License: 4 | http://developer.yahoo.com/yui/license.html 5 | version: 3.3.0 6 | build: 3167 7 | */ 8 | html{color:#000;background:#FFF;}body,div,dl,dt,dd,ul,ol,li,h1,h2,h3,h4,h5,h6,pre,code,form,fieldset,legend,input,textarea,p,blockquote,th,td{margin:0;padding:0;}table{border-collapse:collapse;border-spacing:0;}fieldset,img{border:0;}address,caption,cite,code,dfn,em,strong,th,var{font-style:normal;font-weight:normal;}li{list-style:none;}caption,th{text-align:left;}h1,h2,h3,h4,h5,h6{font-size:100%;font-weight:normal;}q:before,q:after{content:'';}abbr,acronym{border:0;font-variant:normal;}sup{vertical-align:text-top;}sub{vertical-align:text-bottom;}input,textarea,select{font-family:inherit;font-size:inherit;font-weight:inherit;}input,textarea,select{*font-size:100%;}legend{color:#000;} -------------------------------------------------------------------------------- /zygote/resources/static/template.html: -------------------------------------------------------------------------------- 1 |
2 |
pid:
{{pid}}

3 |
created:
{{time_created_fmt}}

4 |
basepath:
{{basepath}}

5 |
6 |
hostname:
{{hostname}}

7 |
interface:
{{interface}}

8 |
port:
{{port}}

9 |
10 |
RSS:
{{rss}}

11 |
VSZ:
{{vsz}}

12 |
13 | 14 | {{#zygotes}} 15 |
16 |

Zygote {{pid}}

17 |
18 |
created:
{{time_created_fmt}}

19 |
basepath:
{{basepath}}

20 |
21 |
RSS:
{{rss}}

22 |
VSZ:
{{vsz}}

23 |
24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | {{#workers}} 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | {{/workers}} 47 |
PIDTime CreatedVSZRSSReqsΔt msRemote IPHTTP Line
{{pid}}{{time_created_fmt}}{{vsz}}{{rss}}{{request_count}}{{elapsed_formatted}}{{remote_ip}}{{http}}
48 |
49 | {{/zygotes}} 50 | -------------------------------------------------------------------------------- /zygote/resources/templates/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Yelp/zygote/9798a4a66747737888950dc923ffc890f2831818/zygote/resources/templates/__init__.py -------------------------------------------------------------------------------- /zygote/resources/templates/home.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | zygote 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 |

Zygote Master

13 |
loading...
14 |
15 | Note: you can access all of the data on this page programatically at /json 16 |
17 | 18 | 19 | -------------------------------------------------------------------------------- /zygote/util.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import with_statement 3 | 4 | import errno 5 | import fcntl 6 | import functools 7 | import logging 8 | import os 9 | import resource 10 | import signal 11 | import socket 12 | import sys 13 | import time 14 | 15 | import tornado.ioloop 16 | 17 | try: 18 | import setproctitle as _setproctitle 19 | has_proc_title = True 20 | except ImportError: 21 | has_proc_title = False 22 | 23 | log = logging.getLogger('zygote.util') 24 | 25 | def setproctitle(name): 26 | if has_proc_title: 27 | _setproctitle.setproctitle(name) 28 | 29 | def is_eintr(exc): 30 | """Returns True if an exception is an EINTR, False otherwise.""" 31 | if hasattr(exc, 'errno'): 32 | return exc.errno == errno.EINTR 33 | elif getattr(exc, 'args', None) and hasattr(exc, 'message'): 34 | return exc.args[0] == errno.EINTR 35 | return False 36 | 37 | def set_nonblocking(fd): 38 | flags = fcntl.fcntl(fd, fcntl.F_GETFL) 39 | fcntl.fcntl(fd, fcntl.F_SETFL, flags | os.O_NONBLOCK) 40 | 41 | def get_rundir(): 42 | return os.getenv('RUNDIR') if os.getenv('RUNDIR') else os.getcwd() 43 | 44 | def get_meminfo(pid=None): 45 | """Get the memory statistics for the current process. Values are returned 46 | as kilobytes. The meanings of the fields are: 47 | virt -- virtual size 48 | res -- RSS size 49 | shr -- shared memory 50 | trs -- kilobytes from 'code' pages 51 | drs -- kilobytes from data/stack pages 52 | lrs -- kilobytes from library pages 53 | dt -- kilobytes from diry pages 54 | """ 55 | try: 56 | with open('/proc/%d/statm' % (pid or os.getpid())) as raw_file: 57 | data = raw_file.read().rstrip('\n') 58 | except IOError: 59 | return dict() 60 | 61 | fields = ['virt', 'res', 'shr', 'trs', 'drs', 'lrs', 'dt'] 62 | pagesize = resource.getpagesize() 63 | return dict((k, int(v) * pagesize >> 10) for k, v in zip(fields, data.split())) 64 | 65 | def meminfo_fmt(pid=None): 66 | d = get_meminfo(pid) 67 | return { 68 | 'rss': '%1.2f' % (d['res'] / 1024.0), 69 | 'vsz': '%1.2f' % (d['virt'] / 1024.0), 70 | 'shr': '%1.2f' % (d['shr'] / 1024.0) 71 | } 72 | 73 | def retry_eintr(func, max_retries=5): 74 | """Retry a function on EINTR""" 75 | for x in xrange(max_retries): 76 | try: 77 | return func() 78 | except Exception, e: 79 | if not is_eintr(e) or x == max_retries - 1: 80 | raise 81 | 82 | def close_fds(*exclude): 83 | """Try to close open file descriptors. This will probably only work on 84 | Linux, since it uses /proc/PID/fd to get information on what file 85 | descriptors to close. 86 | 87 | An alternative for non-Linux systems would be to just try to close random 88 | file descriptors (say, the first 16k), but it doesn't seem like it's really 89 | worth the trouble (and doing this is potentially slow). 90 | """ 91 | return # XXX: fixme 92 | if not os.path.exists('/proc/self/fd'): 93 | log.warn('no /proc fd information running, not closing fds') 94 | return 95 | excl = list(exclude) + [sys.stdin.fileno(), sys.stdout.fileno(), sys.stderr.fileno()] 96 | for fd_name in os.listdir('/proc/self/fd'): 97 | fd = int(fd_name) 98 | if fd not in excl: 99 | try: 100 | retry_eintr(lambda: os.close(fd)) 101 | except OSError, e: 102 | if e.errno == errno.EBADF: 103 | # for some reason the fd was bad. nothing we can do about 104 | # that 105 | pass 106 | else: 107 | raise 108 | 109 | def is_pid_alive(pid): 110 | """Sends null signal to a process to check if it's alive""" 111 | try: 112 | os.kill(pid, 0) 113 | return True 114 | except OSError, e: 115 | # Access denied, but process is alive 116 | return e.errno == errno.EPERM 117 | except: 118 | return False 119 | 120 | def safe_kill(pid, sig=signal.SIGUSR1, process_group=False): 121 | try: 122 | log.debug('killing %d', pid) 123 | if process_group: 124 | os.killpg(pid, sig) 125 | else: 126 | os.kill(pid, sig) 127 | except OSError, e: 128 | # Process may have died before we send the signal 129 | if not is_pid_alive(pid): 130 | return True 131 | log.warning('failed to safe_kill pid %d because of %r' % (pid, e)) 132 | return False 133 | return True 134 | 135 | def wait_for_pids(pids, timeout, log, kill_pgroup=False): 136 | """Wait for the given Set of pids to die. If they 137 | haven't died after timeout seconds, send them all SIGKILL. 138 | 139 | If kill_pgroup is true, the kill will be sent to 140 | the process group instead of to the process itself. 141 | """ 142 | start = time.time() 143 | elapsed = 0 144 | while elapsed < timeout: 145 | if not pids: 146 | break 147 | pid, _, _ = os.wait3(os.WNOHANG) 148 | if pid == 0: 149 | time.sleep(0.5) 150 | else: 151 | pids.remove(pid) 152 | elapsed = time.time() - start 153 | else: 154 | # will only get here if we ran out of time 155 | log.warning("PIDs [%s] didn't quit after %f seconds, sending SIGKILL", ",".join(str(p) for p in pids), timeout) 156 | for pid in pids: 157 | safe_kill(pid, signal.SIGKILL, kill_pgroup) 158 | 159 | class AFUnixSender(object): 160 | """Sender abstraction for an AF_UNIX socket (using the SOCK_DGRAM 161 | protocol). This handles connecting in a non-blocking fashion, and sending 162 | messages asynchronously. Messages that are scheduled to be sent before the 163 | socket is connected will be sent once the socket finishes connecting. 164 | """ 165 | 166 | CONNECT_FREQUENCY = 0.1 167 | 168 | def __init__(self, io_loop, sock=None, logger=None): 169 | self.io_loop = io_loop 170 | if sock is None: 171 | self.socket = socket.socket(socket.AF_UNIX, socket.SOCK_DGRAM, 0) 172 | else: 173 | self.socket = sock 174 | set_nonblocking(self.socket) 175 | self.log = logger or log 176 | 177 | self.connected = False # is the socket connected> 178 | self.send_queue = [] # queue of messages to send 179 | self.sending = False # are there queued messages? 180 | 181 | def connect(self, target): 182 | try: 183 | self.socket.connect(target) 184 | except socket.error, e: 185 | if e.errno == errno.EINPROGRESS: 186 | # usual case -- the nonblocking connect causes EINPROGRESS. When 187 | # the socket is writeable, then the connect has finished, and we 188 | # call _finish_connecting 189 | self.io_loop.add_handler(self.socket.fileno(), self._finish_connecting, self.io_loop.WRITE) 190 | elif e.errno == errno.ECONNREFUSED: 191 | # the connection was refused. Retry the connection in 192 | # CONNECT_FREQUENCY seconds 193 | self.io_loop.add_timeout(time.time() + self.CONNECT_FREQUENCY, lambda: self.connect(target)) 194 | else: 195 | raise 196 | else: 197 | # we were able to connect immediately 198 | self._finish_connecting() 199 | 200 | def _finish_connecting(self, fd=None, events=None): 201 | if fd is not None: 202 | assert fd == self.socket.fileno() 203 | self.io_loop.remove_handler(fd) 204 | error = self.socket.getsockopt(socket.SOL_SOCKET, socket.SO_ERROR) 205 | if error != 0: 206 | self.log.error('got socket connect error %r' % (error,)) 207 | raise IOError('errno %d' % (error,)) 208 | 209 | self.connected = True 210 | if self.send_queue: 211 | self._sendall() 212 | 213 | def _sendall(self): 214 | if not self.connected: 215 | return 216 | if not self.send_queue: 217 | self.log.error('got _sendall with no send_queue') 218 | return 219 | if self.sending: 220 | # could happen if we schedule multiple messages to be sent in a row 221 | self.log.debug('already in send loop, be patient') 222 | return 223 | 224 | def maybe_send_queue(fd, _): 225 | """Try to send the message queue. Returns True if the entire 226 | queue was sent""" 227 | assert fd == self.socket.fileno() 228 | while self.send_queue: 229 | self.sending = True 230 | try: 231 | self.socket.send(self.send_queue.pop(0)) 232 | except IOError, e: 233 | if e.errno == errno.EWOULDBLOCK: 234 | self.log.debug("got EWOULDBLOCK") 235 | return False 236 | elif e.errno in (errno.ECONNREFUSED, errno.ENOTCONN): 237 | # If the worker has already shut down, and needs to be reaped, 238 | # then we'll get back an ENOTCONN or an ECONNREFUSED from this 239 | # send call. We should be reaping this process shortly hereafter 240 | # anyway, so just ignore it 241 | return False 242 | else: 243 | self.sending = False 244 | raise 245 | except IndexError: 246 | # We'll get the IndexError when we've sent the entire 247 | # send_queue; if this ever gets multithreaded, we might 248 | # get an IndexError instead of breaking out of the loop 249 | # naturally, so gotta handle that! 250 | break 251 | except Exception, e: 252 | # Sometimes we seem to get errors that are not IOErrors? 253 | if getattr(e, 'errno', None) not in (errno.ECONNREFUSED, errno.ENOTCONN): 254 | raise 255 | return False 256 | self.sending = False 257 | return True 258 | 259 | def sender(fd, *args, **kwargs): 260 | success = maybe_send_queue(fd, []) 261 | if success: 262 | self.io_loop.remove_handler(fd) 263 | 264 | # Try and send immediately 265 | success = maybe_send_queue(self.socket.fileno(), []) 266 | # if that fails, put it in the ioloop to send later 267 | if not success: 268 | self.io_loop.add_handler(self.socket.fileno(), sender, self.io_loop.WRITE) 269 | 270 | def send(self, msg): 271 | self.send_queue.append(msg) 272 | self._sendall() 273 | 274 | class ZygoteIOLoop(tornado.ioloop.IOLoop): 275 | """Override IOLoop to log to our own logger instead of the root logger""" 276 | 277 | def __init__(self, *args, **kwargs): 278 | log_name = kwargs.pop('log_name', 'zygote.io_loop') 279 | self.log = logging.getLogger(log_name) 280 | super(ZygoteIOLoop, self).__init__(*args, **kwargs) 281 | 282 | def handle_callback_exception(self, callback): 283 | self.log.exception("Error in callback %s", callback) 284 | 285 | def add_handler(self, fd, handler, events): 286 | """Add a handler to the IOLoop, with exception handling""" 287 | 288 | # Get the list of available attributes from the handler. 289 | # A workaround for: http://bugs.python.org/issue3445 290 | available_attrs = (a for a in functools.WRAPPER_ASSIGNMENTS if hasattr(handler, a)) 291 | 292 | @functools.wraps(handler, assigned=available_attrs) 293 | def wrapped_handler(*args, **kwargs): 294 | try: 295 | handler(*args, **kwargs) 296 | except Exception: 297 | self.handle_callback_exception(handler) 298 | 299 | return super(ZygoteIOLoop, self).add_handler(fd, wrapped_handler, events) 300 | 301 | 302 | 303 | if hasattr(logging, 'NullHandler'): 304 | NullHandler = logging.NullHandler 305 | else: 306 | class NullHandler(logging.Handler): 307 | def emit(self, record): 308 | pass 309 | 310 | # TODO: support logging to things other than stderr 311 | # 312 | # Why do we need this? Because we can not log in our signal handlers 313 | # (http://docs.python.org/library/logging.html#thread-safety). 314 | # 315 | # This OTOH may cause log lines from master & worker processes merge 316 | # into a single line. Happens very rarely and it's still better than 317 | # master stuck in a deadlock. 318 | class LocklessHandler(logging.StreamHandler): 319 | def createLock(self): 320 | self.lock = None 321 | 322 | def get_logger(logger_name, debug=False): 323 | logger = logging.getLogger(logger_name) 324 | formatter = logging.Formatter('[%(process)d] %(asctime)s :: %(levelname)-7s :: %(name)s - %(message)s') 325 | handler = LocklessHandler() 326 | handler.setFormatter(formatter) 327 | handler.setLevel(logging.DEBUG if debug else logging.INFO) 328 | logger.handlers = [handler] 329 | logger.propagate = False 330 | return logger 331 | 332 | 333 | def sanitize_headers(headers): 334 | """Sanitize sensitive request headers for logging""" 335 | results = dict(headers) 336 | # Redact instead of remove Authorization header so that those 337 | # using Basic Auth can debug if needed 338 | if results.get('Authorization'): 339 | results['Authorization'] = '***redacted***' 340 | return results 341 | -------------------------------------------------------------------------------- /zygote/worker.py: -------------------------------------------------------------------------------- 1 | import atexit 2 | import errno 3 | import os 4 | import random 5 | import signal 6 | import site 7 | import socket 8 | import sys 9 | import time 10 | 11 | import tornado 12 | 13 | if tornado.version_info >= (2,1,0): 14 | from ._httpserver_2 import HTTPServer 15 | else: 16 | from ._httpserver import HTTPServer 17 | 18 | from zygote.util import AFUnixSender 19 | from zygote.util import get_logger 20 | from zygote.util import safe_kill 21 | from zygote.util import set_nonblocking 22 | from zygote.util import setproctitle 23 | from zygote.util import wait_for_pids 24 | from zygote.util import ZygoteIOLoop 25 | import message 26 | 27 | # Exit with this exit code when there was a failure to init the worker 28 | # (which might be hard to represent otherwise if it, for example, occurs 29 | # while setting up the domain socket) 30 | INIT_FAILURE_EXIT_CODE = 4 31 | 32 | WORKER_INIT_FAILURE_EXIT_CODE = 5 33 | 34 | def establish_signal_handlers(logger): 35 | # delete atexit handlers from parent 36 | del atexit._exithandlers[:] 37 | 38 | def zygote_exit(signum, frame): 39 | if signum == signal.SIGINT: 40 | logger.info('received SIGINT, exiting') 41 | elif signum == signal.SIGTERM: 42 | logger.info('received SIGTERM, exiting') 43 | elif signum == signal.SIGQUIT: 44 | logger.info('recieved SIGQUIT (clean exit), exiting') 45 | else: 46 | logger.info('received signal %d, exiting', signum) 47 | sys.exit(0) 48 | # we explicitly ignore SIGINT and SIGTERM 49 | signal.signal(signal.SIGINT, signal.SIG_IGN) 50 | signal.signal(signal.SIGTERM, signal.SIG_IGN) 51 | signal.signal(signal.SIGCHLD, signal.SIG_DFL) 52 | for sig in (signal.SIGQUIT,): 53 | signal.signal(sig, zygote_exit) 54 | 55 | def notify(sock, msg_cls, body=''): 56 | """Send a message to the zygote master. Should be using AFUnixSender?""" 57 | sock.send(msg_cls.emit(str(body))) 58 | 59 | 60 | class ZygoteWorker(object): 61 | """A Zygote is a process that manages children worker processes. 62 | 63 | When the zygote process is instantiated it does a few things: 64 | * chdirs to the absolute position pointed by a basepath symlink 65 | * munges sys.path to point to the new version of the code 66 | * imports the target module, to pre-fork load resources 67 | * creates read and write pipes to the parent process 68 | """ 69 | 70 | RECV_SIZE = 8192 71 | 72 | # how many seconds to wait before sending SIGKILL to children 73 | WAIT_FOR_KILL_TIME = 10.0 74 | 75 | def __init__(self, sock, basepath, module, name, version, args, ssl_options=None, canary=False, debug=False): 76 | self.module = module 77 | self.name = name 78 | self.version = version 79 | self.basepath = basepath 80 | self.args = args 81 | self.ssl_options = ssl_options 82 | self.ppid = os.getppid() 83 | self.canary = canary 84 | self.children = set() 85 | self.debug = debug 86 | self.logger = get_logger('zygote.worker.zygote_process', self.debug) 87 | 88 | establish_signal_handlers(self.logger) 89 | 90 | # Set up the control socket nice and early 91 | try: 92 | self.control_socket = socket.socket(socket.AF_UNIX, socket.SOCK_DGRAM, 0) 93 | self.control_socket.bind('\0zygote_%d' % os.getpid()) 94 | except Exception: 95 | # If we can't bind to the control socket, just give up 96 | self.logger.error("Could not bind to control socket, aborting early!") 97 | sys.exit(INIT_FAILURE_EXIT_CODE) 98 | 99 | try: 100 | self._real_init(sock, self.basepath, self.module, args) 101 | except Exception: 102 | self.logger.exception("Error performing initialization of %s", self) 103 | sys.exit(INIT_FAILURE_EXIT_CODE) 104 | 105 | def _version(self): 106 | """Get the actual version as specified or from the basepath.""" 107 | return self.version or self.basepath.split('/')[-1] 108 | 109 | def _real_init(self, sock, basepath, module, args): 110 | """Actual initialization function. Broken out for error handling""" 111 | 112 | setproctitle('zygote name=%s version=%s' % (self.name, self._version(),)) 113 | 114 | # Create a pipe(2) pair. This will be used so workers can detect when 115 | # the intermediate zygote exits -- when this happens, a read event will 116 | # happen on the read_pipe file descriptor, and the child can exit. We do 117 | # this so that if the intermediate zygote exits unexpectedly for some 118 | # reason, while it still has children workers running (which is an 119 | # abnormal situation in and of itself), we aren't left with orphaned 120 | # worker processes. Note that the write_pipe is normally never written 121 | # on, we're just using this hack to get a read event on the read pipe. 122 | self.read_pipe, self.write_pipe = os.pipe() 123 | 124 | self.io_loop = ZygoteIOLoop(log_name='zygote.worker.ioloop') 125 | 126 | os.chdir(basepath) 127 | 128 | # Add basepath to sys.path so that application will be able to 129 | # load what's required. We do this at zygote initialization 130 | # to have the correct paths at new code reload. 131 | site.addsitedir(os.path.realpath(basepath)) 132 | 133 | t = __import__(module, [], [], ['initialize', 'get_application'], 0) 134 | 135 | self.sock = sock 136 | 137 | self.get_application = t.get_application 138 | 139 | set_nonblocking(self.control_socket) 140 | self.io_loop.add_handler(self.control_socket.fileno(), self.handle_control, self.io_loop.READ) 141 | 142 | self.notify_socket = AFUnixSender(self.io_loop) 143 | self.notify_socket.connect('\0zygote_%d' % self.ppid) 144 | 145 | signal.signal(signal.SIGCHLD, self.reap_child) 146 | 147 | # If there is an initialize function defined then call it. 148 | if hasattr(t, 'initialize'): 149 | self.logger.info('initializing zygote') 150 | t.initialize(*self.args) 151 | 152 | if self.canary: 153 | notify(self.notify_socket, message.MessageCanaryInit) 154 | # Initialization is successful. This is not the canary zygote anymore. 155 | self.canary = False 156 | 157 | self.logger.info('new zygote started') 158 | 159 | def handle_control(self, fd, events): 160 | assert fd == self.control_socket.fileno() 161 | data = self.control_socket.recv(self.RECV_SIZE) 162 | msg = message.Message.parse(data) 163 | if type(msg) is message.MessageCreateWorker: 164 | self.spawn_worker() 165 | elif type(msg) is message.MessageKillWorkers: 166 | self.kill_workers(msg.num_workers_to_kill) 167 | elif type(msg) is message.MessageShutDown: 168 | self.kill_all_workers() 169 | else: 170 | assert False 171 | 172 | def kill_workers(self, num_workers_to_kill): 173 | if num_workers_to_kill > len(self.children): 174 | self.logger.error( 175 | 'Request to kill %d workers out of %d current workers', 176 | num_workers_to_kill, 177 | len(self.children) 178 | ) 179 | return 180 | worker_pids = random.sample(self.children, num_workers_to_kill) 181 | for pid in worker_pids: 182 | safe_kill(pid) 183 | wait_for_pids(worker_pids, self.WAIT_FOR_KILL_TIME, self.logger) 184 | 185 | def kill_all_workers(self): 186 | """Kill all workers and wait (synchronously) for them 187 | to exit""" 188 | # reset the signal handler so that we don't get interrupted 189 | # by SIGCHLDs 190 | signal.signal(signal.SIGCHLD, signal.SIG_DFL) 191 | waiting_pids = set() 192 | 193 | self.logger.debug('zygote requesting kill on %d pids', len(self.children)) 194 | for pid in self.children: 195 | if safe_kill(pid, signal.SIGQUIT): 196 | waiting_pids.add(pid) 197 | wait_for_pids(waiting_pids, self.WAIT_FOR_KILL_TIME, self.logger) 198 | self.logger.debug('zygote done killing children, terminating') 199 | sys.exit(0) 200 | 201 | def reap_child(self, signum, frame): 202 | assert signum == signal.SIGCHLD 203 | while True: 204 | try: 205 | pid, status = os.waitpid(0, os.WNOHANG) 206 | except OSError, e: 207 | if e.errno == errno.ECHILD: 208 | break 209 | elif e.errno == errno.EINTR: 210 | continue 211 | raise # should just be EINVAL on Linux 212 | 213 | if pid == 0: 214 | break 215 | 216 | status_code = os.WEXITSTATUS(status) 217 | self.logger.info('reaped worker %d, status %d', pid, status_code) 218 | if status_code == WORKER_INIT_FAILURE_EXIT_CODE: 219 | notify(self.notify_socket, message.MessageWorkerExitInitFail, '%d %d' % (pid, status_code)) 220 | else: 221 | notify(self.notify_socket, message.MessageWorkerExit, '%d %d' % (pid, status_code)) 222 | 223 | self.children.remove(pid) 224 | 225 | def loop(self): 226 | self.io_loop.start() 227 | 228 | def spawn_worker(self): 229 | time_created = time.time() 230 | pid = os.fork() 231 | if pid: 232 | self.children.add(pid) 233 | return 234 | try: 235 | self.logger.debug("Calling _initialize_worker") 236 | self._initialize_worker(time_created) 237 | self.logger.debug("Worker initialized") 238 | except Exception, e: 239 | self.logger.exception("Error initializing worker process: %s", e) 240 | sys.exit(WORKER_INIT_FAILURE_EXIT_CODE) 241 | self.logger.debug("Looks okay to me, smooth sailing!") 242 | 243 | def _initialize_worker(self, time_created): 244 | # We're the child. We need to close the write_pipe in order for the 245 | # read_pipe to get an event when the parent's write_pipe closes 246 | # (otherwise the kernel is too smart and thinks that it's waiting 247 | # for writes from *this* process' write_pipe). 248 | os.close(self.write_pipe) 249 | 250 | logger = get_logger('zygote.worker.worker_process') 251 | logger.debug('new worker started') 252 | 253 | def on_parent_exit(fd, events): 254 | logger.error('detected that intermediate zygote died, exiting') 255 | sys.exit(0) 256 | 257 | # create a new i/o loop 258 | del self.io_loop 259 | io_loop = ZygoteIOLoop(log_name='zygote.worker.worker_process.ioloop') 260 | # Install this worker's io_loop as the global io_loop; only applies in 261 | # this fork. Programs that uses this io_loop instance should NOT use 262 | # io_loop.start() because start() is invoked by the corresponding 263 | # zygote worker. 264 | if tornado.version_info >= (2,1,0): 265 | io_loop.install() 266 | else: 267 | tornado.ioloop.IOLoop._instance = io_loop 268 | 269 | # add the read pipe 270 | io_loop.add_handler(self.read_pipe, on_parent_exit, io_loop.READ) 271 | 272 | sock = AFUnixSender(io_loop, logger=logger) 273 | sock.connect('\0zygote_%d' % self.ppid) 274 | 275 | establish_signal_handlers(logger) 276 | def on_headers(line, remote_ip, headers): 277 | logger.debug('sending MessageHTTPBegin') 278 | notify(sock, message.MessageHTTPBegin, "%s %s" % (remote_ip, line)) 279 | def on_close(disconnected=False): 280 | logger.debug('sending MessageHTTPEnd') 281 | notify(sock, message.MessageHTTPEnd) 282 | 283 | notify(sock, message.MessageWorkerStart, '%d %d' % (int(time_created * 1e6), os.getppid())) 284 | setproctitle('zygote-worker name=%s version=%s' % (self.name, self._version(),)) 285 | try: 286 | # io_loop is passed into get_application for program to add handler 287 | # or schedule task on the main io_loop. Program that uses this 288 | # io_loop instance should NOT use io_loop.start() because start() 289 | # is invoked by the corresponding zygote worker. 290 | kwargs = {'io_loop': io_loop} 291 | logger.debug("Invoking get_application") 292 | app = self.get_application(*self.args, **kwargs) 293 | except Exception: 294 | logger.error("Unable to get application") 295 | raise 296 | # TODO: make keep-alive servers work 297 | logger.debug("Creating HTTPServer") 298 | http_server = HTTPServer(app, 299 | io_loop=io_loop, 300 | no_keep_alive=True, 301 | close_callback=on_close, 302 | headers_callback=on_headers, 303 | ssl_options=self.ssl_options 304 | ) 305 | if tornado.version_info >= (2,1,0): 306 | http_server.add_socket(self.sock) 307 | else: 308 | http_server._socket = self.sock 309 | io_loop.add_handler(self.sock.fileno(), http_server._handle_events, io_loop.READ) 310 | logger.debug("Started ioloop...") 311 | io_loop.start() 312 | --------------------------------------------------------------------------------