├── .gitignore ├── .travis.yml ├── HISTORY.rst ├── LICENSE ├── MANIFEST.in ├── NOTICES ├── README.rst ├── docs ├── Makefile ├── make.bat └── source │ ├── api.rst │ ├── conf.py │ └── index.rst ├── setup.py ├── src └── pycohttpparser │ ├── __init__.py │ ├── api.py │ ├── build.py │ ├── picohttpparser.c │ └── picohttpparser.h ├── test.py ├── test_requirements.txt └── tox.ini /.gitignore: -------------------------------------------------------------------------------- 1 | env/ 2 | __pycache__/ 3 | *.pyc 4 | *.egg-info 5 | .coverage 6 | *.egg 7 | build/ 8 | *.so 9 | dist/ 10 | .tox/ 11 | .cache/ -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | 3 | python: 4 | - "2.7" 5 | - "3.3" 6 | - "3.4" 7 | - "3.5" 8 | #- "pypy" 9 | 10 | sudo: false 11 | addons: 12 | apt: 13 | packages: 14 | - libffi-dev 15 | - python-dev 16 | install: 17 | - "pip install ." 18 | - "pip install -r test_requirements.txt" 19 | - "pip install flake8" 20 | before_script: "flake8 --max-complexity 10 src/ test.py" 21 | script: py.test --cov pycohttpparser test.py 22 | -------------------------------------------------------------------------------- /HISTORY.rst: -------------------------------------------------------------------------------- 1 | Release History 2 | =============== 3 | 4 | 1.0.0 (2015-03-21) 5 | ------------------ 6 | 7 | - Initial release. 8 | - Contains picohttpparser at revision 98bcc1c3b431d05d4584af66082da48e4638a675 9 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2015 Cory Benfield 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), to deal 5 | in the Software without restriction, including without limitation the rights 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | copies of the Software, and to permit persons to whom the Software is 8 | furnished to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in 11 | all copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 | THE SOFTWARE. 20 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include README.rst NOTICES LICENSE HISTORY.rst src/pycohttpparser/picohttpparser.c src/pycohttpparser/picohttpparser.h -------------------------------------------------------------------------------- /NOTICES: -------------------------------------------------------------------------------- 1 | The following license applies to picohttpparser: 2 | 3 | Copyright (c) 2009-2014 Kazuho Oku, Tokuhiro Matsuno, Daisuke Murase, 4 | Shigeo Mitsunari 5 | 6 | The software is licensed under either the MIT License (below) or the Perl 7 | license. 8 | 9 | Permission is hereby granted, free of charge, to any person obtaining a copy 10 | of this software and associated documentation files (the "Software"), to 11 | deal in the Software without restriction, including without limitation the 12 | rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 13 | sell copies of the Software, and to permit persons to whom the Software is 14 | furnished to do so, subject to the following conditions: 15 | 16 | The above copyright notice and this permission notice shall be included in 17 | all copies or substantial portions of the Software. 18 | 19 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 20 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 21 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 22 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 23 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 24 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 25 | IN THE SOFTWARE. 26 | 27 | 28 | The following license applies to the work borrowed from Donald Stufft's blog 29 | post "Distributing a CFFI Project", and from the Python Cryptographic 30 | Authority's 'cryptography' project, for distributing CFFI projects: 31 | 32 | Copyright (c) Individual contributors. 33 | All rights reserved. 34 | 35 | Redistribution and use in source and binary forms, with or without 36 | modification, are permitted provided that the following conditions are met: 37 | 38 | 1. Redistributions of source code must retain the above copyright notice, 39 | this list of conditions and the following disclaimer. 40 | 41 | 2. Redistributions in binary form must reproduce the above copyright 42 | notice, this list of conditions and the following disclaimer in the 43 | documentation and/or other materials provided with the distribution. 44 | 45 | 3. Neither the name of PyCA Cryptography nor the names of its contributors 46 | may be used to endorse or promote products derived from this software 47 | without specific prior written permission. 48 | 49 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 50 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 51 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 52 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 53 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 54 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 55 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 56 | ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 57 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 58 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 59 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | pycohttpparser 2 | ============== 3 | 4 | pycohttpparser is a Python wrapper library around the excellent 5 | `picohttpparser`_ C library. It aims to provide a speedy C implementation of 6 | the HTTP/1.1 parsing API used in the `hyper`_ project. Using CFFI, it supports 7 | both CPython 2.7 and 3.4, and PyPy. 8 | 9 | To get started with pycohttpparser, simply install it from the cheeseshop: 10 | 11 | .. code-block:: bash 12 | 13 | $ pip install pycohttpparser 14 | 15 | You'll need to make sure your system is set up for using CFFI. For more 16 | information, `consult CFFI's documentation`_. 17 | 18 | Then, you can start parsing your HTTP messages! For example: 19 | 20 | .. code-block:: pycon 21 | 22 | >>> import pycohttpparser.api as p 23 | >>> message = socket.recv() 24 | >>> m = memoryview(data) 25 | >>> c = p.Parser() 26 | >>> r = c.parse_request(m) 27 | >>> r.method.tobytes() 28 | b'POST' 29 | >>> r.path.tobytes() 30 | b'/post' 31 | 32 | For more information, see `the documentation`_. 33 | 34 | 35 | .. _picohttpparser: https://github.com/h2o/picohttpparser 36 | .. _hyper: http://hyper.readthedocs.org/ 37 | .. _consult CFFI's documentation: https://cffi.readthedocs.org/en/latest/#installation-and-status 38 | .. _the documentation: http://pycohttpparser.readthedocs.org/ 39 | 40 | License 41 | ------- 42 | 43 | The Python wrapper library here is licensed under the MIT license. See LICENSE 44 | for more details. 45 | 46 | The original picohttpparser C code, which is included in this project in its 47 | entirety, is licensed under the MIT license. See the source files or the 48 | NOTICES file for more details. 49 | 50 | Maintainers 51 | ----------- 52 | 53 | The python wrapper library is maintained by Cory Benfield. 54 | 55 | picohttpparser is maintained by the picohttpparser team: see NOTICES for more. 56 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | PAPER = 8 | BUILDDIR = build 9 | 10 | # User-friendly check for sphinx-build 11 | ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1) 12 | $(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/) 13 | endif 14 | 15 | # Internal variables. 16 | PAPEROPT_a4 = -D latex_paper_size=a4 17 | PAPEROPT_letter = -D latex_paper_size=letter 18 | ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source 19 | # the i18n builder cannot share the environment and doctrees with the others 20 | I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source 21 | 22 | .PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext 23 | 24 | help: 25 | @echo "Please use \`make ' where is one of" 26 | @echo " html to make standalone HTML files" 27 | @echo " dirhtml to make HTML files named index.html in directories" 28 | @echo " singlehtml to make a single large HTML file" 29 | @echo " pickle to make pickle files" 30 | @echo " json to make JSON files" 31 | @echo " htmlhelp to make HTML files and a HTML help project" 32 | @echo " qthelp to make HTML files and a qthelp project" 33 | @echo " devhelp to make HTML files and a Devhelp project" 34 | @echo " epub to make an epub" 35 | @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" 36 | @echo " latexpdf to make LaTeX files and run them through pdflatex" 37 | @echo " latexpdfja to make LaTeX files and run them through platex/dvipdfmx" 38 | @echo " text to make text files" 39 | @echo " man to make manual pages" 40 | @echo " texinfo to make Texinfo files" 41 | @echo " info to make Texinfo files and run them through makeinfo" 42 | @echo " gettext to make PO message catalogs" 43 | @echo " changes to make an overview of all changed/added/deprecated items" 44 | @echo " xml to make Docutils-native XML files" 45 | @echo " pseudoxml to make pseudoxml-XML files for display purposes" 46 | @echo " linkcheck to check all external links for integrity" 47 | @echo " doctest to run all doctests embedded in the documentation (if enabled)" 48 | 49 | clean: 50 | rm -rf $(BUILDDIR)/* 51 | 52 | html: 53 | $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html 54 | @echo 55 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." 56 | 57 | dirhtml: 58 | $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml 59 | @echo 60 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." 61 | 62 | singlehtml: 63 | $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml 64 | @echo 65 | @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." 66 | 67 | pickle: 68 | $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle 69 | @echo 70 | @echo "Build finished; now you can process the pickle files." 71 | 72 | json: 73 | $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json 74 | @echo 75 | @echo "Build finished; now you can process the JSON files." 76 | 77 | htmlhelp: 78 | $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp 79 | @echo 80 | @echo "Build finished; now you can run HTML Help Workshop with the" \ 81 | ".hhp project file in $(BUILDDIR)/htmlhelp." 82 | 83 | qthelp: 84 | $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp 85 | @echo 86 | @echo "Build finished; now you can run "qcollectiongenerator" with the" \ 87 | ".qhcp project file in $(BUILDDIR)/qthelp, like this:" 88 | @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/pycohttpparser.qhcp" 89 | @echo "To view the help file:" 90 | @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/pycohttpparser.qhc" 91 | 92 | devhelp: 93 | $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp 94 | @echo 95 | @echo "Build finished." 96 | @echo "To view the help file:" 97 | @echo "# mkdir -p $$HOME/.local/share/devhelp/pycohttpparser" 98 | @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/pycohttpparser" 99 | @echo "# devhelp" 100 | 101 | epub: 102 | $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub 103 | @echo 104 | @echo "Build finished. The epub file is in $(BUILDDIR)/epub." 105 | 106 | latex: 107 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 108 | @echo 109 | @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." 110 | @echo "Run \`make' in that directory to run these through (pdf)latex" \ 111 | "(use \`make latexpdf' here to do that automatically)." 112 | 113 | latexpdf: 114 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 115 | @echo "Running LaTeX files through pdflatex..." 116 | $(MAKE) -C $(BUILDDIR)/latex all-pdf 117 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." 118 | 119 | latexpdfja: 120 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 121 | @echo "Running LaTeX files through platex and dvipdfmx..." 122 | $(MAKE) -C $(BUILDDIR)/latex all-pdf-ja 123 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." 124 | 125 | text: 126 | $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text 127 | @echo 128 | @echo "Build finished. The text files are in $(BUILDDIR)/text." 129 | 130 | man: 131 | $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man 132 | @echo 133 | @echo "Build finished. The manual pages are in $(BUILDDIR)/man." 134 | 135 | texinfo: 136 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 137 | @echo 138 | @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo." 139 | @echo "Run \`make' in that directory to run these through makeinfo" \ 140 | "(use \`make info' here to do that automatically)." 141 | 142 | info: 143 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 144 | @echo "Running Texinfo files through makeinfo..." 145 | make -C $(BUILDDIR)/texinfo info 146 | @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo." 147 | 148 | gettext: 149 | $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale 150 | @echo 151 | @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale." 152 | 153 | changes: 154 | $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes 155 | @echo 156 | @echo "The overview file is in $(BUILDDIR)/changes." 157 | 158 | linkcheck: 159 | $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck 160 | @echo 161 | @echo "Link check complete; look for any errors in the above output " \ 162 | "or in $(BUILDDIR)/linkcheck/output.txt." 163 | 164 | doctest: 165 | $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest 166 | @echo "Testing of doctests in the sources finished, look at the " \ 167 | "results in $(BUILDDIR)/doctest/output.txt." 168 | 169 | xml: 170 | $(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml 171 | @echo 172 | @echo "Build finished. The XML files are in $(BUILDDIR)/xml." 173 | 174 | pseudoxml: 175 | $(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml 176 | @echo 177 | @echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml." 178 | -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | REM Command file for Sphinx documentation 4 | 5 | if "%SPHINXBUILD%" == "" ( 6 | set SPHINXBUILD=sphinx-build 7 | ) 8 | set BUILDDIR=build 9 | set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% source 10 | set I18NSPHINXOPTS=%SPHINXOPTS% source 11 | if NOT "%PAPER%" == "" ( 12 | set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS% 13 | set I18NSPHINXOPTS=-D latex_paper_size=%PAPER% %I18NSPHINXOPTS% 14 | ) 15 | 16 | if "%1" == "" goto help 17 | 18 | if "%1" == "help" ( 19 | :help 20 | echo.Please use `make ^` where ^ is one of 21 | echo. html to make standalone HTML files 22 | echo. dirhtml to make HTML files named index.html in directories 23 | echo. singlehtml to make a single large HTML file 24 | echo. pickle to make pickle files 25 | echo. json to make JSON files 26 | echo. htmlhelp to make HTML files and a HTML help project 27 | echo. qthelp to make HTML files and a qthelp project 28 | echo. devhelp to make HTML files and a Devhelp project 29 | echo. epub to make an epub 30 | echo. latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter 31 | echo. text to make text files 32 | echo. man to make manual pages 33 | echo. texinfo to make Texinfo files 34 | echo. gettext to make PO message catalogs 35 | echo. changes to make an overview over all changed/added/deprecated items 36 | echo. xml to make Docutils-native XML files 37 | echo. pseudoxml to make pseudoxml-XML files for display purposes 38 | echo. linkcheck to check all external links for integrity 39 | echo. doctest to run all doctests embedded in the documentation if enabled 40 | goto end 41 | ) 42 | 43 | if "%1" == "clean" ( 44 | for /d %%i in (%BUILDDIR%\*) do rmdir /q /s %%i 45 | del /q /s %BUILDDIR%\* 46 | goto end 47 | ) 48 | 49 | 50 | %SPHINXBUILD% 2> nul 51 | if errorlevel 9009 ( 52 | echo. 53 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 54 | echo.installed, then set the SPHINXBUILD environment variable to point 55 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 56 | echo.may add the Sphinx directory to PATH. 57 | echo. 58 | echo.If you don't have Sphinx installed, grab it from 59 | echo.http://sphinx-doc.org/ 60 | exit /b 1 61 | ) 62 | 63 | if "%1" == "html" ( 64 | %SPHINXBUILD% -b html %ALLSPHINXOPTS% %BUILDDIR%/html 65 | if errorlevel 1 exit /b 1 66 | echo. 67 | echo.Build finished. The HTML pages are in %BUILDDIR%/html. 68 | goto end 69 | ) 70 | 71 | if "%1" == "dirhtml" ( 72 | %SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% %BUILDDIR%/dirhtml 73 | if errorlevel 1 exit /b 1 74 | echo. 75 | echo.Build finished. The HTML pages are in %BUILDDIR%/dirhtml. 76 | goto end 77 | ) 78 | 79 | if "%1" == "singlehtml" ( 80 | %SPHINXBUILD% -b singlehtml %ALLSPHINXOPTS% %BUILDDIR%/singlehtml 81 | if errorlevel 1 exit /b 1 82 | echo. 83 | echo.Build finished. The HTML pages are in %BUILDDIR%/singlehtml. 84 | goto end 85 | ) 86 | 87 | if "%1" == "pickle" ( 88 | %SPHINXBUILD% -b pickle %ALLSPHINXOPTS% %BUILDDIR%/pickle 89 | if errorlevel 1 exit /b 1 90 | echo. 91 | echo.Build finished; now you can process the pickle files. 92 | goto end 93 | ) 94 | 95 | if "%1" == "json" ( 96 | %SPHINXBUILD% -b json %ALLSPHINXOPTS% %BUILDDIR%/json 97 | if errorlevel 1 exit /b 1 98 | echo. 99 | echo.Build finished; now you can process the JSON files. 100 | goto end 101 | ) 102 | 103 | if "%1" == "htmlhelp" ( 104 | %SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% %BUILDDIR%/htmlhelp 105 | if errorlevel 1 exit /b 1 106 | echo. 107 | echo.Build finished; now you can run HTML Help Workshop with the ^ 108 | .hhp project file in %BUILDDIR%/htmlhelp. 109 | goto end 110 | ) 111 | 112 | if "%1" == "qthelp" ( 113 | %SPHINXBUILD% -b qthelp %ALLSPHINXOPTS% %BUILDDIR%/qthelp 114 | if errorlevel 1 exit /b 1 115 | echo. 116 | echo.Build finished; now you can run "qcollectiongenerator" with the ^ 117 | .qhcp project file in %BUILDDIR%/qthelp, like this: 118 | echo.^> qcollectiongenerator %BUILDDIR%\qthelp\pycohttpparser.qhcp 119 | echo.To view the help file: 120 | echo.^> assistant -collectionFile %BUILDDIR%\qthelp\pycohttpparser.ghc 121 | goto end 122 | ) 123 | 124 | if "%1" == "devhelp" ( 125 | %SPHINXBUILD% -b devhelp %ALLSPHINXOPTS% %BUILDDIR%/devhelp 126 | if errorlevel 1 exit /b 1 127 | echo. 128 | echo.Build finished. 129 | goto end 130 | ) 131 | 132 | if "%1" == "epub" ( 133 | %SPHINXBUILD% -b epub %ALLSPHINXOPTS% %BUILDDIR%/epub 134 | if errorlevel 1 exit /b 1 135 | echo. 136 | echo.Build finished. The epub file is in %BUILDDIR%/epub. 137 | goto end 138 | ) 139 | 140 | if "%1" == "latex" ( 141 | %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex 142 | if errorlevel 1 exit /b 1 143 | echo. 144 | echo.Build finished; the LaTeX files are in %BUILDDIR%/latex. 145 | goto end 146 | ) 147 | 148 | if "%1" == "latexpdf" ( 149 | %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex 150 | cd %BUILDDIR%/latex 151 | make all-pdf 152 | cd %BUILDDIR%/.. 153 | echo. 154 | echo.Build finished; the PDF files are in %BUILDDIR%/latex. 155 | goto end 156 | ) 157 | 158 | if "%1" == "latexpdfja" ( 159 | %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex 160 | cd %BUILDDIR%/latex 161 | make all-pdf-ja 162 | cd %BUILDDIR%/.. 163 | echo. 164 | echo.Build finished; the PDF files are in %BUILDDIR%/latex. 165 | goto end 166 | ) 167 | 168 | if "%1" == "text" ( 169 | %SPHINXBUILD% -b text %ALLSPHINXOPTS% %BUILDDIR%/text 170 | if errorlevel 1 exit /b 1 171 | echo. 172 | echo.Build finished. The text files are in %BUILDDIR%/text. 173 | goto end 174 | ) 175 | 176 | if "%1" == "man" ( 177 | %SPHINXBUILD% -b man %ALLSPHINXOPTS% %BUILDDIR%/man 178 | if errorlevel 1 exit /b 1 179 | echo. 180 | echo.Build finished. The manual pages are in %BUILDDIR%/man. 181 | goto end 182 | ) 183 | 184 | if "%1" == "texinfo" ( 185 | %SPHINXBUILD% -b texinfo %ALLSPHINXOPTS% %BUILDDIR%/texinfo 186 | if errorlevel 1 exit /b 1 187 | echo. 188 | echo.Build finished. The Texinfo files are in %BUILDDIR%/texinfo. 189 | goto end 190 | ) 191 | 192 | if "%1" == "gettext" ( 193 | %SPHINXBUILD% -b gettext %I18NSPHINXOPTS% %BUILDDIR%/locale 194 | if errorlevel 1 exit /b 1 195 | echo. 196 | echo.Build finished. The message catalogs are in %BUILDDIR%/locale. 197 | goto end 198 | ) 199 | 200 | if "%1" == "changes" ( 201 | %SPHINXBUILD% -b changes %ALLSPHINXOPTS% %BUILDDIR%/changes 202 | if errorlevel 1 exit /b 1 203 | echo. 204 | echo.The overview file is in %BUILDDIR%/changes. 205 | goto end 206 | ) 207 | 208 | if "%1" == "linkcheck" ( 209 | %SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck 210 | if errorlevel 1 exit /b 1 211 | echo. 212 | echo.Link check complete; look for any errors in the above output ^ 213 | or in %BUILDDIR%/linkcheck/output.txt. 214 | goto end 215 | ) 216 | 217 | if "%1" == "doctest" ( 218 | %SPHINXBUILD% -b doctest %ALLSPHINXOPTS% %BUILDDIR%/doctest 219 | if errorlevel 1 exit /b 1 220 | echo. 221 | echo.Testing of doctests in the sources finished, look at the ^ 222 | results in %BUILDDIR%/doctest/output.txt. 223 | goto end 224 | ) 225 | 226 | if "%1" == "xml" ( 227 | %SPHINXBUILD% -b xml %ALLSPHINXOPTS% %BUILDDIR%/xml 228 | if errorlevel 1 exit /b 1 229 | echo. 230 | echo.Build finished. The XML files are in %BUILDDIR%/xml. 231 | goto end 232 | ) 233 | 234 | if "%1" == "pseudoxml" ( 235 | %SPHINXBUILD% -b pseudoxml %ALLSPHINXOPTS% %BUILDDIR%/pseudoxml 236 | if errorlevel 1 exit /b 1 237 | echo. 238 | echo.Build finished. The pseudo-XML files are in %BUILDDIR%/pseudoxml. 239 | goto end 240 | ) 241 | 242 | :end 243 | -------------------------------------------------------------------------------- /docs/source/api.rst: -------------------------------------------------------------------------------- 1 | pycohttpparser Python API 2 | ========================= 3 | 4 | .. module:: hyper 5 | 6 | This page documents pycohttpparser's Python API. 7 | 8 | An important feature to note is that, wherever possible, pycohttpparser uses 9 | ``memoryview`` objects to avoid copying data. The only objects that are not 10 | returned as ``memoryview``s are response status codes and the HTTP minor version 11 | number. 12 | 13 | .. autoclass:: pycohttpparser.api.Parser 14 | :inherited-members: 15 | 16 | .. autoclass:: pycohttpparser.api.Request 17 | :inherited-members: 18 | 19 | .. autoclass:: pycohttpparser.api.Response 20 | :inherited-members: 21 | 22 | .. autoclass:: pycohttpparser.api.ParseError 23 | -------------------------------------------------------------------------------- /docs/source/conf.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # pycohttpparser documentation build configuration file, created by 4 | # sphinx-quickstart on Sat Mar 21 11:33:49 2015. 5 | # 6 | # This file is execfile()d with the current directory set to its 7 | # containing dir. 8 | # 9 | # Note that not all possible configuration values are present in this 10 | # autogenerated file. 11 | # 12 | # All configuration values have a default; values that are commented out 13 | # serve to show the default. 14 | 15 | import sys 16 | import os 17 | 18 | # If extensions (or modules to document with autodoc) are in another directory, 19 | # add these directories to sys.path here. If the directory is relative to the 20 | # documentation root, use os.path.abspath to make it absolute, like shown here. 21 | #sys.path.insert(0, os.path.abspath('.')) 22 | 23 | # -- General configuration ------------------------------------------------ 24 | 25 | # If your documentation needs a minimal Sphinx version, state it here. 26 | #needs_sphinx = '1.0' 27 | 28 | # Add any Sphinx extension module names here, as strings. They can be 29 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 30 | # ones. 31 | extensions = [ 32 | 'sphinx.ext.autodoc', 33 | 'sphinx.ext.intersphinx', 34 | 'sphinx.ext.viewcode', 35 | ] 36 | 37 | # Add any paths that contain templates here, relative to this directory. 38 | templates_path = ['_templates'] 39 | 40 | # The suffix of source filenames. 41 | source_suffix = '.rst' 42 | 43 | # The encoding of source files. 44 | #source_encoding = 'utf-8-sig' 45 | 46 | # The master toctree document. 47 | master_doc = 'index' 48 | 49 | # General information about the project. 50 | project = u'pycohttpparser' 51 | copyright = u'2015, Cory Benfield' 52 | 53 | # The version info for the project you're documenting, acts as replacement for 54 | # |version| and |release|, also used in various other places throughout the 55 | # built documents. 56 | # 57 | # The short X.Y version. 58 | version = '1.0.0' 59 | # The full version, including alpha/beta/rc tags. 60 | release = '1.0.0' 61 | 62 | # The language for content autogenerated by Sphinx. Refer to documentation 63 | # for a list of supported languages. 64 | #language = None 65 | 66 | # There are two options for replacing |today|: either, you set today to some 67 | # non-false value, then it is used: 68 | #today = '' 69 | # Else, today_fmt is used as the format for a strftime call. 70 | #today_fmt = '%B %d, %Y' 71 | 72 | # List of patterns, relative to source directory, that match files and 73 | # directories to ignore when looking for source files. 74 | exclude_patterns = [] 75 | 76 | # The reST default role (used for this markup: `text`) to use for all 77 | # documents. 78 | #default_role = None 79 | 80 | # If true, '()' will be appended to :func: etc. cross-reference text. 81 | #add_function_parentheses = True 82 | 83 | # If true, the current module name will be prepended to all description 84 | # unit titles (such as .. function::). 85 | #add_module_names = True 86 | 87 | # If true, sectionauthor and moduleauthor directives will be shown in the 88 | # output. They are ignored by default. 89 | #show_authors = False 90 | 91 | # The name of the Pygments (syntax highlighting) style to use. 92 | pygments_style = 'sphinx' 93 | 94 | # A list of ignored prefixes for module index sorting. 95 | #modindex_common_prefix = [] 96 | 97 | # If true, keep warnings as "system message" paragraphs in the built documents. 98 | #keep_warnings = False 99 | 100 | 101 | # -- Options for HTML output ---------------------------------------------- 102 | 103 | # The theme to use for HTML and HTML Help pages. See the documentation for 104 | # a list of builtin themes. 105 | html_theme = 'default' 106 | 107 | # Theme options are theme-specific and customize the look and feel of a theme 108 | # further. For a list of options available for each theme, see the 109 | # documentation. 110 | #html_theme_options = {} 111 | 112 | # Add any paths that contain custom themes here, relative to this directory. 113 | #html_theme_path = [] 114 | 115 | # The name for this set of Sphinx documents. If None, it defaults to 116 | # " v documentation". 117 | #html_title = None 118 | 119 | # A shorter title for the navigation bar. Default is the same as html_title. 120 | #html_short_title = None 121 | 122 | # The name of an image file (relative to this directory) to place at the top 123 | # of the sidebar. 124 | #html_logo = None 125 | 126 | # The name of an image file (within the static path) to use as favicon of the 127 | # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 128 | # pixels large. 129 | #html_favicon = None 130 | 131 | # Add any paths that contain custom static files (such as style sheets) here, 132 | # relative to this directory. They are copied after the builtin static files, 133 | # so a file named "default.css" will overwrite the builtin "default.css". 134 | html_static_path = ['_static'] 135 | 136 | # Add any extra paths that contain custom files (such as robots.txt or 137 | # .htaccess) here, relative to this directory. These files are copied 138 | # directly to the root of the documentation. 139 | #html_extra_path = [] 140 | 141 | # If not '', a 'Last updated on:' timestamp is inserted at every page bottom, 142 | # using the given strftime format. 143 | #html_last_updated_fmt = '%b %d, %Y' 144 | 145 | # If true, SmartyPants will be used to convert quotes and dashes to 146 | # typographically correct entities. 147 | #html_use_smartypants = True 148 | 149 | # Custom sidebar templates, maps document names to template names. 150 | #html_sidebars = {} 151 | 152 | # Additional templates that should be rendered to pages, maps page names to 153 | # template names. 154 | #html_additional_pages = {} 155 | 156 | # If false, no module index is generated. 157 | #html_domain_indices = True 158 | 159 | # If false, no index is generated. 160 | #html_use_index = True 161 | 162 | # If true, the index is split into individual pages for each letter. 163 | #html_split_index = False 164 | 165 | # If true, links to the reST sources are added to the pages. 166 | #html_show_sourcelink = True 167 | 168 | # If true, "Created using Sphinx" is shown in the HTML footer. Default is True. 169 | #html_show_sphinx = True 170 | 171 | # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. 172 | #html_show_copyright = True 173 | 174 | # If true, an OpenSearch description file will be output, and all pages will 175 | # contain a tag referring to it. The value of this option must be the 176 | # base URL from which the finished HTML is served. 177 | #html_use_opensearch = '' 178 | 179 | # This is the file name suffix for HTML files (e.g. ".xhtml"). 180 | #html_file_suffix = None 181 | 182 | # Output file base name for HTML help builder. 183 | htmlhelp_basename = 'pycohttpparserdoc' 184 | 185 | 186 | # -- Options for LaTeX output --------------------------------------------- 187 | 188 | latex_elements = { 189 | # The paper size ('letterpaper' or 'a4paper'). 190 | #'papersize': 'letterpaper', 191 | 192 | # The font size ('10pt', '11pt' or '12pt'). 193 | #'pointsize': '10pt', 194 | 195 | # Additional stuff for the LaTeX preamble. 196 | #'preamble': '', 197 | } 198 | 199 | # Grouping the document tree into LaTeX files. List of tuples 200 | # (source start file, target name, title, 201 | # author, documentclass [howto, manual, or own class]). 202 | latex_documents = [ 203 | ('index', 'pycohttpparser.tex', u'pycohttpparser Documentation', 204 | u'Cory Benfield', 'manual'), 205 | ] 206 | 207 | # The name of an image file (relative to this directory) to place at the top of 208 | # the title page. 209 | #latex_logo = None 210 | 211 | # For "manual" documents, if this is true, then toplevel headings are parts, 212 | # not chapters. 213 | #latex_use_parts = False 214 | 215 | # If true, show page references after internal links. 216 | #latex_show_pagerefs = False 217 | 218 | # If true, show URL addresses after external links. 219 | #latex_show_urls = False 220 | 221 | # Documents to append as an appendix to all manuals. 222 | #latex_appendices = [] 223 | 224 | # If false, no module index is generated. 225 | #latex_domain_indices = True 226 | 227 | 228 | # -- Options for manual page output --------------------------------------- 229 | 230 | # One entry per manual page. List of tuples 231 | # (source start file, name, description, authors, manual section). 232 | man_pages = [ 233 | ('index', 'pycohttpparser', u'pycohttpparser Documentation', 234 | [u'Cory Benfield'], 1) 235 | ] 236 | 237 | # If true, show URL addresses after external links. 238 | #man_show_urls = False 239 | 240 | 241 | # -- Options for Texinfo output ------------------------------------------- 242 | 243 | # Grouping the document tree into Texinfo files. List of tuples 244 | # (source start file, target name, title, author, 245 | # dir menu entry, description, category) 246 | texinfo_documents = [ 247 | ('index', 'pycohttpparser', u'pycohttpparser Documentation', 248 | u'Cory Benfield', 'pycohttpparser', 'One line description of project.', 249 | 'Miscellaneous'), 250 | ] 251 | 252 | # Documents to append as an appendix to all manuals. 253 | #texinfo_appendices = [] 254 | 255 | # If false, no module index is generated. 256 | #texinfo_domain_indices = True 257 | 258 | # How to display URL addresses: 'footnote', 'no', or 'inline'. 259 | #texinfo_show_urls = 'footnote' 260 | 261 | # If true, do not generate a @detailmenu in the "Top" node's menu. 262 | #texinfo_no_detailmenu = False 263 | 264 | 265 | # Example configuration for intersphinx: refer to the Python standard library. 266 | intersphinx_mapping = {'http://docs.python.org/': None} 267 | -------------------------------------------------------------------------------- /docs/source/index.rst: -------------------------------------------------------------------------------- 1 | pycohttpparser 2 | ============== 3 | 4 | pycohttpparser is a Python wrapper library around the excellent 5 | `picohttpparser`_ C library. It aims to provide a speedy C implementation of 6 | the HTTP/1.1 parsing API used in the `hyper`_ project. Using CFFI, it supports 7 | both CPython 2.7 and 3.4, and PyPy. 8 | 9 | To get started with pycohttpparser, simply install it from the cheeseshop: 10 | 11 | .. code-block:: bash 12 | 13 | $ pip install pycohttpparser 14 | 15 | You'll need to make sure your system is set up for using CFFI. For more 16 | information, `consult CFFI's documentation`_. 17 | 18 | Then, you can start parsing your HTTP messages! For example: 19 | 20 | .. code-block:: pycon 21 | 22 | >>> import pycohttpparser.api as p 23 | >>> message = socket.recv() 24 | >>> m = memoryview(data) 25 | >>> c = p.Parser() 26 | >>> r = c.parse_request(m) 27 | >>> r.method.tobytes() 28 | b'POST' 29 | >>> r.path.tobytes() 30 | b'/post' 31 | 32 | Contents 33 | -------- 34 | 35 | .. toctree:: 36 | :maxdepth: 2 37 | 38 | api 39 | 40 | 41 | .. _picohttpparser: https://github.com/h2o/picohttpparser 42 | .. _hyper: http://hyper.readthedocs.org/ 43 | .. _consult CFFI's documentation: https://cffi.readthedocs.org/en/latest/#installation-and-status 44 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | import os 4 | import re 5 | import sys 6 | 7 | from setuptools import setup, find_packages 8 | 9 | 10 | # Get the version 11 | version_regex = r'__version__ = ["\']([^"\']*)["\']' 12 | with open('src/pycohttpparser/__init__.py', 'r') as f: 13 | text = f.read() 14 | match = re.search(version_regex, text) 15 | 16 | if match: 17 | version = match.group(1) 18 | else: 19 | raise RuntimeError("No version number found!") 20 | 21 | # Stealing this from Kenneth Reitz 22 | if sys.argv[-1] == 'publish': 23 | os.system('python setup.py sdist upload') 24 | sys.exit() 25 | 26 | setup( 27 | name='pycohttpparser', 28 | version=version, 29 | description='Python wrapper for picohttpparser', 30 | long_description=open('README.rst').read() + '\r\n' + open('HISTORY.rst').read(), 31 | author='Cory Benfield', 32 | author_email='cory@lukasa.co.uk', 33 | url='https://github.com/Lukasa/pycohttpparser', 34 | packages=find_packages('src'), 35 | package_dir={'': 'src'}, 36 | license='MIT License', 37 | classifiers=[ 38 | 'Development Status :: 4 - Beta', 39 | 'Intended Audience :: Developers', 40 | 'License :: OSI Approved :: MIT License', 41 | 'Programming Language :: Python', 42 | 'Programming Language :: Python :: 2', 43 | 'Programming Language :: Python :: 2.7', 44 | 'Programming Language :: Python :: 3', 45 | 'Programming Language :: Python :: 3.4', 46 | ], 47 | install_requires=['cffi>=1.0.0'], 48 | setup_requires=['cffi>=1.0.0'], 49 | zip_safe=False, 50 | cffi_modules=["src/pycohttpparser/build.py:ffi"], 51 | ext_package="pycohttpparser", 52 | ) 53 | -------------------------------------------------------------------------------- /src/pycohttpparser/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | pycohttpparser 4 | ~~~~~~~~~~~~~~ 5 | """ 6 | __version__ = '1.0.0' 7 | -------------------------------------------------------------------------------- /src/pycohttpparser/api.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | pycohttpparser/api 4 | ~~~~~~~~~~~~~~~~~~ 5 | 6 | Defines the public API to pycohttpparser. 7 | """ 8 | from collections import namedtuple 9 | 10 | from ._pycohttpparser import lib, ffi 11 | 12 | 13 | Request = namedtuple( 14 | 'Request', ['method', 'path', 'minor_version', 'headers', 'consumed'] 15 | ) 16 | Response = namedtuple( 17 | 'Response', ['status', 'msg', 'minor_version', 'headers', 'consumed'] 18 | ) 19 | 20 | 21 | class ParseError(Exception): 22 | """ 23 | An invalid HTTP message was passed to the parser. 24 | """ 25 | 26 | 27 | class Parser(object): 28 | """ 29 | A single HTTP parser object. This object can parse HTTP requests and 30 | responses using picohttpparser. 31 | 32 | This object is not thread-safe, and it does maintain state that is shared 33 | across parsing requests. For this reason, make sure that access to this 34 | object is synchronized if you use it across multiple threads. 35 | """ 36 | def __init__(self): 37 | # Store some instance variables. This represents essentially static 38 | # allocations that are used repeatedly in some of the parsing code. 39 | # This avoids the overhead of repeatedly allocating large chunks of 40 | # memory each time a parse is called. 41 | # Allocate all the data that will come out of the method. 42 | self._method = self._msg = ffi.new("char **") 43 | self._method_len = self._msg_len = ffi.new("size_t *") 44 | self._path = ffi.new("char **") 45 | self._path_len = ffi.new("size_t *") 46 | self._minor_version = ffi.new("int *") 47 | self._status = ffi.new("int *") 48 | 49 | # Allow space for 1000 headers. Anything more is clearly nonsense. 50 | self._header_count = 1000 51 | self._headers = ffi.new("struct phr_header [1000]") 52 | self._num_headers = ffi.new("size_t *", self._header_count) 53 | 54 | def parse_request(self, buffer): 55 | """ 56 | Parses a single HTTP request from a buffer. 57 | 58 | :param buffer: A ``memoryview`` object wrapping a buffer containing a 59 | HTTP request. 60 | :returns: A :class:`Request ` object, or 61 | ``None`` if there is not enough data in the buffer. 62 | """ 63 | # Allocate function inputs 64 | buffer_size = ffi.cast("size_t", len(buffer)) 65 | phr_buffer = ffi.new("char []", buffer.tobytes()) 66 | last_len = ffi.cast("size_t", 0) 67 | 68 | # Reset the header count. 69 | self._num_headers[0] = self._header_count 70 | 71 | # Do the parse. 72 | pret = lib.phr_parse_request( 73 | phr_buffer, 74 | buffer_size, 75 | self._method, 76 | self._method_len, 77 | self._path, 78 | self._path_len, 79 | self._minor_version, 80 | self._headers, 81 | self._num_headers, 82 | last_len 83 | ) 84 | 85 | # Check for insufficient data or parse errors. 86 | if pret == -2: 87 | return None 88 | elif pret == -1: 89 | raise ParseError("Invalid message") 90 | 91 | # If we got here we have a full request. We need to return useful 92 | # data. A useful trick here: all the returned char pointers are 93 | # pointers into buffer. This means we can use them as offsets and 94 | # return memoryviews to their data. Snazzy, right? 95 | method = b'' 96 | path = b'' 97 | minor_version = -1 98 | 99 | offset = self._method[0] - phr_buffer 100 | element_len = self._method_len[0] 101 | method = buffer[offset:offset+element_len] 102 | 103 | offset = self._path[0] - phr_buffer 104 | element_len = self._path_len[0] 105 | path = buffer[offset:offset+element_len] 106 | 107 | minor_version = self._minor_version[0] 108 | 109 | # We can create the Request object now, because all the scalar fields 110 | # are ready. We can put the headers into a list already hung from it. 111 | req = Request(method, path, minor_version, [], pret) 112 | 113 | for header in self._build_headers(phr_buffer, buffer): 114 | req.headers.append(header) 115 | 116 | return req 117 | 118 | def parse_response(self, buffer): 119 | """ 120 | Parses a single HTTP response from a buffer. 121 | 122 | :param buffer: A ``memoryview`` object wrapping a buffer containing a 123 | HTTP response. 124 | :returns: A :class:`Response ` object, or 125 | ``None`` if there is not enough data in the buffer. 126 | """ 127 | # Allocate function inputs 128 | buffer_size = ffi.cast("size_t", len(buffer)) 129 | phr_buffer = ffi.new("char []", buffer.tobytes()) 130 | last_len = ffi.cast("size_t", 0) 131 | 132 | # Reset the header count. 133 | self._num_headers[0] = self._header_count 134 | 135 | # Do the parse. 136 | pret = lib.phr_parse_response( 137 | phr_buffer, 138 | buffer_size, 139 | self._minor_version, 140 | self._status, 141 | self._msg, 142 | self._msg_len, 143 | self._headers, 144 | self._num_headers, 145 | last_len 146 | ) 147 | 148 | # Check for insufficient data or parse errors. 149 | if pret == -2: 150 | return None 151 | elif pret == -1: 152 | raise ParseError("Invalid message") 153 | 154 | # If we got here we have a full request. We need to return useful 155 | # data. A useful trick here: all the returned char pointers are 156 | # pointers into buffer. This means we can use them as offsets and 157 | # return memoryviews to their data. Snazzy, right? 158 | msg = b'' 159 | status = 0 160 | minor_version = -1 161 | 162 | status = self._status[0] 163 | 164 | offset = self._msg[0] - phr_buffer 165 | element_len = self._msg_len[0] 166 | msg = buffer[offset:offset+element_len] 167 | 168 | minor_version = self._minor_version[0] 169 | 170 | # We can create the Request object now, because all the scalar fields 171 | # are ready. We can put the headers into a list already hung from it. 172 | req = Response(status, msg, minor_version, [], pret) 173 | 174 | for header in self._build_headers(phr_buffer, buffer): 175 | req.headers.append(header) 176 | 177 | return req 178 | 179 | def _build_headers(self, phr_buffer, orig_buffer): 180 | """ 181 | Called by a parsing routine to build a collection of header names and 182 | values. 183 | """ 184 | for index in range(self._num_headers[0]): 185 | header_struct = self._headers[index] 186 | name_index = header_struct.name - phr_buffer 187 | value_index = header_struct.value - phr_buffer 188 | name_len = header_struct.name_len 189 | value_len = header_struct.value_len 190 | 191 | name = orig_buffer[name_index:name_index+name_len] 192 | 193 | value = orig_buffer[value_index:value_index+value_len] 194 | 195 | yield (name, value) 196 | -------------------------------------------------------------------------------- /src/pycohttpparser/build.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | pycohttpparser/build.py 4 | ~~~~~~~~~~~~~~~~~~~~~~~ 5 | 6 | The CFFI build code in pycohttpparser. 7 | """ 8 | import os.path 9 | 10 | from cffi import FFI 11 | ffi = FFI() 12 | 13 | 14 | path = os.path.join(os.path.dirname(__file__), 'picohttpparser.c') 15 | with open(path, 'r') as f: 16 | data = f.read() 17 | 18 | ffi.cdef(""" 19 | struct phr_header { 20 | const char* name; 21 | size_t name_len; 22 | const char* value; 23 | size_t value_len; 24 | }; 25 | 26 | int phr_parse_request(const char* buf, size_t len, const char** method, 27 | size_t* method_len, const char** path, 28 | size_t* path_len, int* minor_version, 29 | struct phr_header* headers, size_t* num_headers, 30 | size_t last_len); 31 | 32 | int phr_parse_response(const char* _buf, size_t len, int *minor_version, 33 | int *status, const char **msg, size_t *msg_len, 34 | struct phr_header* headers, size_t* num_headers, 35 | size_t last_len); 36 | 37 | int phr_parse_headers(const char* buf, size_t len, 38 | struct phr_header* headers, size_t* num_headers, 39 | size_t last_len); 40 | 41 | struct phr_chunked_decoder { 42 | size_t bytes_left_in_chunk; /* number of bytes left in current chunk */ 43 | char consume_trailer; /* if trailing headers should be consumed */ 44 | char _hex_count; 45 | char _state; 46 | }; 47 | 48 | ssize_t phr_decode_chunked(struct phr_chunked_decoder *decoder, char *buf, 49 | size_t *bufsz); 50 | 51 | """) 52 | 53 | ffi.set_source( 54 | '_pycohttpparser', 55 | """#include 56 | """ + data, 57 | include_dirs=["src/pycohttpparser"], 58 | ) 59 | 60 | if __name__ == '__main__': 61 | ffi.compile() 62 | -------------------------------------------------------------------------------- /src/pycohttpparser/picohttpparser.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2009-2014 Kazuho Oku, Tokuhiro Matsuno, Daisuke Murase, 3 | * Shigeo Mitsunari 4 | * 5 | * The software is licensed under either the MIT License (below) or the Perl 6 | * license. 7 | * 8 | * Permission is hereby granted, free of charge, to any person obtaining a copy 9 | * of this software and associated documentation files (the "Software"), to 10 | * deal in the Software without restriction, including without limitation the 11 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 12 | * sell copies of the Software, and to permit persons to whom the Software is 13 | * furnished to do so, subject to the following conditions: 14 | * 15 | * The above copyright notice and this permission notice shall be included in 16 | * all copies or substantial portions of the Software. 17 | * 18 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 23 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 24 | * IN THE SOFTWARE. 25 | */ 26 | 27 | #include 28 | #include 29 | #include 30 | #ifdef __SSE4_2__ 31 | #ifdef _MSC_VER 32 | #include 33 | #else 34 | #include 35 | #endif 36 | #endif 37 | #include "picohttpparser.h" 38 | 39 | /* $Id: 8e21379070e9a13462ee692b40548e5fd59a547c $ */ 40 | 41 | #if __GNUC__ >= 3 42 | #define likely(x) __builtin_expect(!!(x), 1) 43 | #define unlikely(x) __builtin_expect(!!(x), 0) 44 | #else 45 | #define likely(x) (x) 46 | #define unlikely(x) (x) 47 | #endif 48 | 49 | #ifdef _MSC_VER 50 | #define ALIGNED(n) _declspec(align(n)) 51 | #else 52 | #define ALIGNED(n) __attribute__((aligned(n))) 53 | #endif 54 | 55 | #define IS_PRINTABLE_ASCII(c) ((unsigned char)(c)-040u < 0137u) 56 | 57 | #define CHECK_EOF() \ 58 | if (buf == buf_end) { \ 59 | *ret = -2; \ 60 | return NULL; \ 61 | } 62 | 63 | #define EXPECT_CHAR(ch) \ 64 | CHECK_EOF(); \ 65 | if (*buf++ != ch) { \ 66 | *ret = -1; \ 67 | return NULL; \ 68 | } 69 | 70 | #define ADVANCE_TOKEN(tok, toklen) \ 71 | do { \ 72 | const char *tok_start = buf; \ 73 | static const char ALIGNED(16) ranges2[] = "\000\040\177\177"; \ 74 | int found2; \ 75 | buf = findchar_fast(buf, buf_end, ranges2, sizeof(ranges2) - 1, &found2); \ 76 | if (!found2) { \ 77 | CHECK_EOF(); \ 78 | } \ 79 | while (1) { \ 80 | if (*buf == ' ') { \ 81 | break; \ 82 | } else if (unlikely(!IS_PRINTABLE_ASCII(*buf))) { \ 83 | if ((unsigned char)*buf < '\040' || *buf == '\177') { \ 84 | *ret = -1; \ 85 | return NULL; \ 86 | } \ 87 | } \ 88 | ++buf; \ 89 | CHECK_EOF(); \ 90 | } \ 91 | tok = tok_start; \ 92 | toklen = buf - tok_start; \ 93 | } while (0) 94 | 95 | static const char *token_char_map = "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0" 96 | "\0\1\1\1\1\1\1\1\0\0\1\1\0\1\1\0\1\1\1\1\1\1\1\1\1\1\0\0\0\0\0\0" 97 | "\0\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\0\0\0\1\1" 98 | "\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\0\1\0\1\0" 99 | "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0" 100 | "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0" 101 | "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0" 102 | "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"; 103 | 104 | static const char *findchar_fast(const char *buf, const char *buf_end, const char *ranges, size_t ranges_size, int *found) 105 | { 106 | *found = 0; 107 | #if __SSE4_2__ 108 | if (likely(buf_end - buf >= 16)) { 109 | __m128i ranges16 = _mm_loadu_si128((const __m128i *)ranges); 110 | 111 | size_t left = (buf_end - buf) & ~15; 112 | do { 113 | __m128i b16 = _mm_loadu_si128((void *)buf); 114 | int r = _mm_cmpestri(ranges16, ranges_size, b16, 16, _SIDD_LEAST_SIGNIFICANT | _SIDD_CMP_RANGES | _SIDD_UBYTE_OPS); 115 | if (unlikely(r != 16)) { 116 | buf += r; 117 | *found = 1; 118 | break; 119 | } 120 | buf += 16; 121 | left -= 16; 122 | } while (likely(left != 0)); 123 | } 124 | #else 125 | /* suppress unused parameter warning */ 126 | (void)buf_end; 127 | (void)ranges; 128 | (void)ranges_size; 129 | #endif 130 | return buf; 131 | } 132 | 133 | static const char *get_token_to_eol(const char *buf, const char *buf_end, const char **token, size_t *token_len, int *ret) 134 | { 135 | const char *token_start = buf; 136 | 137 | #ifdef __SSE4_2__ 138 | static const char ranges1[] = "\0\010" 139 | /* allow HT */ 140 | "\012\037" 141 | /* allow SP and up to but not including DEL */ 142 | "\177\177" 143 | /* allow chars w. MSB set */ 144 | ; 145 | int found; 146 | buf = findchar_fast(buf, buf_end, ranges1, sizeof(ranges1) - 1, &found); 147 | if (found) 148 | goto FOUND_CTL; 149 | #else 150 | /* find non-printable char within the next 8 bytes, this is the hottest code; manually inlined */ 151 | while (likely(buf_end - buf >= 8)) { 152 | #define DOIT() \ 153 | do { \ 154 | if (unlikely(!IS_PRINTABLE_ASCII(*buf))) \ 155 | goto NonPrintable; \ 156 | ++buf; \ 157 | } while (0) 158 | DOIT(); 159 | DOIT(); 160 | DOIT(); 161 | DOIT(); 162 | DOIT(); 163 | DOIT(); 164 | DOIT(); 165 | DOIT(); 166 | #undef DOIT 167 | continue; 168 | NonPrintable: 169 | if ((likely((unsigned char)*buf < '\040') && likely(*buf != '\011')) || unlikely(*buf == '\177')) { 170 | goto FOUND_CTL; 171 | } 172 | ++buf; 173 | } 174 | #endif 175 | for (;; ++buf) { 176 | CHECK_EOF(); 177 | if (unlikely(!IS_PRINTABLE_ASCII(*buf))) { 178 | if ((likely((unsigned char)*buf < '\040') && likely(*buf != '\011')) || unlikely(*buf == '\177')) { 179 | goto FOUND_CTL; 180 | } 181 | } 182 | } 183 | FOUND_CTL: 184 | if (likely(*buf == '\015')) { 185 | ++buf; 186 | EXPECT_CHAR('\012'); 187 | *token_len = buf - 2 - token_start; 188 | } else if (*buf == '\012') { 189 | *token_len = buf - token_start; 190 | ++buf; 191 | } else { 192 | *ret = -1; 193 | return NULL; 194 | } 195 | *token = token_start; 196 | 197 | return buf; 198 | } 199 | 200 | static const char *is_complete(const char *buf, const char *buf_end, size_t last_len, int *ret) 201 | { 202 | int ret_cnt = 0; 203 | buf = last_len < 3 ? buf : buf + last_len - 3; 204 | 205 | while (1) { 206 | CHECK_EOF(); 207 | if (*buf == '\015') { 208 | ++buf; 209 | CHECK_EOF(); 210 | EXPECT_CHAR('\012'); 211 | ++ret_cnt; 212 | } else if (*buf == '\012') { 213 | ++buf; 214 | ++ret_cnt; 215 | } else { 216 | ++buf; 217 | ret_cnt = 0; 218 | } 219 | if (ret_cnt == 2) { 220 | return buf; 221 | } 222 | } 223 | 224 | *ret = -2; 225 | return NULL; 226 | } 227 | 228 | /* *_buf is always within [buf, buf_end) upon success */ 229 | static const char *parse_int(const char *buf, const char *buf_end, int *value, int *ret) 230 | { 231 | int v; 232 | CHECK_EOF(); 233 | if (!('0' <= *buf && *buf <= '9')) { 234 | *ret = -1; 235 | return NULL; 236 | } 237 | v = 0; 238 | for (;; ++buf) { 239 | CHECK_EOF(); 240 | if ('0' <= *buf && *buf <= '9') { 241 | v = v * 10 + *buf - '0'; 242 | } else { 243 | break; 244 | } 245 | } 246 | 247 | *value = v; 248 | return buf; 249 | } 250 | 251 | /* returned pointer is always within [buf, buf_end), or null */ 252 | static const char *parse_http_version(const char *buf, const char *buf_end, int *minor_version, int *ret) 253 | { 254 | EXPECT_CHAR('H'); 255 | EXPECT_CHAR('T'); 256 | EXPECT_CHAR('T'); 257 | EXPECT_CHAR('P'); 258 | EXPECT_CHAR('/'); 259 | EXPECT_CHAR('1'); 260 | EXPECT_CHAR('.'); 261 | return parse_int(buf, buf_end, minor_version, ret); 262 | } 263 | 264 | static const char *parse_headers(const char *buf, const char *buf_end, struct phr_header *headers, size_t *num_headers, 265 | size_t max_headers, int *ret) 266 | { 267 | for (;; ++*num_headers) { 268 | CHECK_EOF(); 269 | if (*buf == '\015') { 270 | ++buf; 271 | EXPECT_CHAR('\012'); 272 | break; 273 | } else if (*buf == '\012') { 274 | ++buf; 275 | break; 276 | } 277 | if (*num_headers == max_headers) { 278 | *ret = -1; 279 | return NULL; 280 | } 281 | if (!(*num_headers != 0 && (*buf == ' ' || *buf == '\t'))) { 282 | static const char ALIGNED(16) ranges1[] = "::\x00\037"; 283 | int found; 284 | if (!token_char_map[(unsigned char)*buf]) { 285 | *ret = -1; 286 | return NULL; 287 | } 288 | /* parsing name, but do not discard SP before colon, see 289 | * http://www.mozilla.org/security/announce/2006/mfsa2006-33.html */ 290 | headers[*num_headers].name = buf; 291 | buf = findchar_fast(buf, buf_end, ranges1, sizeof(ranges1) - 1, &found); 292 | if (!found) { 293 | CHECK_EOF(); 294 | } 295 | while (1) { 296 | if (*buf == ':') { 297 | break; 298 | } else if (*buf < ' ') { 299 | *ret = -1; 300 | return NULL; 301 | } 302 | ++buf; 303 | CHECK_EOF(); 304 | } 305 | headers[*num_headers].name_len = buf - headers[*num_headers].name; 306 | ++buf; 307 | for (;; ++buf) { 308 | CHECK_EOF(); 309 | if (!(*buf == ' ' || *buf == '\t')) { 310 | break; 311 | } 312 | } 313 | } else { 314 | headers[*num_headers].name = NULL; 315 | headers[*num_headers].name_len = 0; 316 | } 317 | if ((buf = get_token_to_eol(buf, buf_end, &headers[*num_headers].value, &headers[*num_headers].value_len, ret)) == NULL) { 318 | return NULL; 319 | } 320 | } 321 | return buf; 322 | } 323 | 324 | static const char *parse_request(const char *buf, const char *buf_end, const char **method, size_t *method_len, const char **path, 325 | size_t *path_len, int *minor_version, struct phr_header *headers, size_t *num_headers, 326 | size_t max_headers, int *ret) 327 | { 328 | /* skip first empty line (some clients add CRLF after POST content) */ 329 | CHECK_EOF(); 330 | if (*buf == '\015') { 331 | ++buf; 332 | EXPECT_CHAR('\012'); 333 | } else if (*buf == '\012') { 334 | ++buf; 335 | } 336 | 337 | /* parse request line */ 338 | ADVANCE_TOKEN(*method, *method_len); 339 | ++buf; 340 | ADVANCE_TOKEN(*path, *path_len); 341 | ++buf; 342 | if ((buf = parse_http_version(buf, buf_end, minor_version, ret)) == NULL) { 343 | return NULL; 344 | } 345 | if (*buf == '\015') { 346 | ++buf; 347 | EXPECT_CHAR('\012'); 348 | } else if (*buf == '\012') { 349 | ++buf; 350 | } else { 351 | *ret = -1; 352 | return NULL; 353 | } 354 | 355 | return parse_headers(buf, buf_end, headers, num_headers, max_headers, ret); 356 | } 357 | 358 | int phr_parse_request(const char *buf_start, size_t len, const char **method, size_t *method_len, const char **path, 359 | size_t *path_len, int *minor_version, struct phr_header *headers, size_t *num_headers, size_t last_len) 360 | { 361 | const char *buf = buf_start, *buf_end = buf_start + len; 362 | size_t max_headers = *num_headers; 363 | int r; 364 | 365 | *method = NULL; 366 | *method_len = 0; 367 | *path = NULL; 368 | *path_len = 0; 369 | *minor_version = -1; 370 | *num_headers = 0; 371 | 372 | /* if last_len != 0, check if the request is complete (a fast countermeasure 373 | againt slowloris */ 374 | if (last_len != 0 && is_complete(buf, buf_end, last_len, &r) == NULL) { 375 | return r; 376 | } 377 | 378 | if ((buf = parse_request(buf, buf_end, method, method_len, path, path_len, minor_version, headers, num_headers, max_headers, 379 | &r)) == NULL) { 380 | return r; 381 | } 382 | 383 | return (int)(buf - buf_start); 384 | } 385 | 386 | static const char *parse_response(const char *buf, const char *buf_end, int *minor_version, int *status, const char **msg, 387 | size_t *msg_len, struct phr_header *headers, size_t *num_headers, size_t max_headers, int *ret) 388 | { 389 | /* parse "HTTP/1.x" */ 390 | if ((buf = parse_http_version(buf, buf_end, minor_version, ret)) == NULL) { 391 | return NULL; 392 | } 393 | /* skip space */ 394 | if (*buf++ != ' ') { 395 | *ret = -1; 396 | return NULL; 397 | } 398 | /* parse status code */ 399 | if ((buf = parse_int(buf, buf_end, status, ret)) == NULL) { 400 | return NULL; 401 | } 402 | /* skip space */ 403 | if (*buf++ != ' ') { 404 | *ret = -1; 405 | return NULL; 406 | } 407 | /* get message */ 408 | if ((buf = get_token_to_eol(buf, buf_end, msg, msg_len, ret)) == NULL) { 409 | return NULL; 410 | } 411 | 412 | return parse_headers(buf, buf_end, headers, num_headers, max_headers, ret); 413 | } 414 | 415 | int phr_parse_response(const char *buf_start, size_t len, int *minor_version, int *status, const char **msg, size_t *msg_len, 416 | struct phr_header *headers, size_t *num_headers, size_t last_len) 417 | { 418 | const char *buf = buf_start, *buf_end = buf + len; 419 | size_t max_headers = *num_headers; 420 | int r; 421 | 422 | *minor_version = -1; 423 | *status = 0; 424 | *msg = NULL; 425 | *msg_len = 0; 426 | *num_headers = 0; 427 | 428 | /* if last_len != 0, check if the response is complete (a fast countermeasure 429 | against slowloris */ 430 | if (last_len != 0 && is_complete(buf, buf_end, last_len, &r) == NULL) { 431 | return r; 432 | } 433 | 434 | if ((buf = parse_response(buf, buf_end, minor_version, status, msg, msg_len, headers, num_headers, max_headers, &r)) == NULL) { 435 | return r; 436 | } 437 | 438 | return (int)(buf - buf_start); 439 | } 440 | 441 | int phr_parse_headers(const char *buf_start, size_t len, struct phr_header *headers, size_t *num_headers, size_t last_len) 442 | { 443 | const char *buf = buf_start, *buf_end = buf + len; 444 | size_t max_headers = *num_headers; 445 | int r; 446 | 447 | *num_headers = 0; 448 | 449 | /* if last_len != 0, check if the response is complete (a fast countermeasure 450 | against slowloris */ 451 | if (last_len != 0 && is_complete(buf, buf_end, last_len, &r) == NULL) { 452 | return r; 453 | } 454 | 455 | if ((buf = parse_headers(buf, buf_end, headers, num_headers, max_headers, &r)) == NULL) { 456 | return r; 457 | } 458 | 459 | return (int)(buf - buf_start); 460 | } 461 | 462 | enum { 463 | CHUNKED_IN_CHUNK_SIZE, 464 | CHUNKED_IN_CHUNK_EXT, 465 | CHUNKED_IN_CHUNK_DATA, 466 | CHUNKED_IN_CHUNK_CRLF, 467 | CHUNKED_IN_TRAILERS_LINE_HEAD, 468 | CHUNKED_IN_TRAILERS_LINE_MIDDLE 469 | }; 470 | 471 | static int decode_hex(int ch) 472 | { 473 | if ('0' <= ch && ch <= '9') { 474 | return ch - '0'; 475 | } else if ('A' <= ch && ch <= 'F') { 476 | return ch - 'A' + 0xa; 477 | } else if ('a' <= ch && ch <= 'f') { 478 | return ch - 'a' + 0xa; 479 | } else { 480 | return -1; 481 | } 482 | } 483 | 484 | ssize_t phr_decode_chunked(struct phr_chunked_decoder *decoder, char *buf, size_t *_bufsz) 485 | { 486 | size_t dst = 0, src = 0, bufsz = *_bufsz; 487 | ssize_t ret = -2; /* incomplete */ 488 | 489 | while (1) { 490 | switch (decoder->_state) { 491 | case CHUNKED_IN_CHUNK_SIZE: 492 | for (;; ++src) { 493 | int v; 494 | if (src == bufsz) 495 | goto Exit; 496 | if ((v = decode_hex(buf[src])) == -1) { 497 | if (decoder->_hex_count == 0) { 498 | ret = -1; 499 | goto Exit; 500 | } 501 | break; 502 | } 503 | if (decoder->_hex_count == sizeof(size_t) * 2) { 504 | ret = -1; 505 | goto Exit; 506 | } 507 | decoder->bytes_left_in_chunk = decoder->bytes_left_in_chunk * 16 + v; 508 | ++decoder->_hex_count; 509 | } 510 | decoder->_hex_count = 0; 511 | decoder->_state = CHUNKED_IN_CHUNK_EXT; 512 | /* fallthru */ 513 | case CHUNKED_IN_CHUNK_EXT: 514 | /* RFC 7230 A.2 "Line folding in chunk extensions is disallowed" */ 515 | for (;; ++src) { 516 | if (src == bufsz) 517 | goto Exit; 518 | if (buf[src] == '\012') 519 | break; 520 | } 521 | ++src; 522 | if (decoder->bytes_left_in_chunk == 0) { 523 | if (decoder->consume_trailer) { 524 | decoder->_state = CHUNKED_IN_TRAILERS_LINE_HEAD; 525 | break; 526 | } else { 527 | goto Complete; 528 | } 529 | } 530 | decoder->_state = CHUNKED_IN_CHUNK_DATA; 531 | /* fallthru */ 532 | case CHUNKED_IN_CHUNK_DATA: { 533 | size_t avail = bufsz - src; 534 | if (avail < decoder->bytes_left_in_chunk) { 535 | if (dst != src) 536 | memmove(buf + dst, buf + src, avail); 537 | src += avail; 538 | dst += avail; 539 | decoder->bytes_left_in_chunk -= avail; 540 | goto Exit; 541 | } 542 | if (dst != src) 543 | memmove(buf + dst, buf + src, decoder->bytes_left_in_chunk); 544 | src += decoder->bytes_left_in_chunk; 545 | dst += decoder->bytes_left_in_chunk; 546 | decoder->bytes_left_in_chunk = 0; 547 | decoder->_state = CHUNKED_IN_CHUNK_CRLF; 548 | } 549 | /* fallthru */ 550 | case CHUNKED_IN_CHUNK_CRLF: 551 | for (;; ++src) { 552 | if (src == bufsz) 553 | goto Exit; 554 | if (buf[src] != '\015') 555 | break; 556 | } 557 | if (buf[src] != '\012') { 558 | ret = -1; 559 | goto Exit; 560 | } 561 | ++src; 562 | decoder->_state = CHUNKED_IN_CHUNK_SIZE; 563 | break; 564 | case CHUNKED_IN_TRAILERS_LINE_HEAD: 565 | for (;; ++src) { 566 | if (src == bufsz) 567 | goto Exit; 568 | if (buf[src] != '\015') 569 | break; 570 | } 571 | if (buf[src++] == '\012') 572 | goto Complete; 573 | decoder->_state = CHUNKED_IN_TRAILERS_LINE_MIDDLE; 574 | /* fallthru */ 575 | case CHUNKED_IN_TRAILERS_LINE_MIDDLE: 576 | for (;; ++src) { 577 | if (src == bufsz) 578 | goto Exit; 579 | if (buf[src] == '\012') 580 | break; 581 | } 582 | ++src; 583 | decoder->_state = CHUNKED_IN_TRAILERS_LINE_HEAD; 584 | break; 585 | default: 586 | assert(!"decoder is corrupt"); 587 | } 588 | } 589 | 590 | Complete: 591 | ret = bufsz - src; 592 | Exit: 593 | if (dst != src) 594 | memmove(buf + dst, buf + src, bufsz - src); 595 | *_bufsz = dst; 596 | return ret; 597 | } 598 | 599 | #undef CHECK_EOF 600 | #undef EXPECT_CHAR 601 | #undef ADVANCE_TOKEN 602 | -------------------------------------------------------------------------------- /src/pycohttpparser/picohttpparser.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2009-2014 Kazuho Oku, Tokuhiro Matsuno, Daisuke Murase, 3 | * Shigeo Mitsunari 4 | * 5 | * The software is licensed under either the MIT License (below) or the Perl 6 | * license. 7 | * 8 | * Permission is hereby granted, free of charge, to any person obtaining a copy 9 | * of this software and associated documentation files (the "Software"), to 10 | * deal in the Software without restriction, including without limitation the 11 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 12 | * sell copies of the Software, and to permit persons to whom the Software is 13 | * furnished to do so, subject to the following conditions: 14 | * 15 | * The above copyright notice and this permission notice shall be included in 16 | * all copies or substantial portions of the Software. 17 | * 18 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 23 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 24 | * IN THE SOFTWARE. 25 | */ 26 | 27 | #ifndef picohttpparser_h 28 | #define picohttpparser_h 29 | 30 | #include 31 | 32 | #ifdef _MSC_VER 33 | #define ssize_t intptr_t 34 | #endif 35 | 36 | /* $Id: ded2259d5094ae4620381807de0d16f25b6d617c $ */ 37 | 38 | #ifdef __cplusplus 39 | extern "C" { 40 | #endif 41 | 42 | /* contains name and value of a header (name == NULL if is a continuing line 43 | * of a multiline header */ 44 | struct phr_header { 45 | const char *name; 46 | size_t name_len; 47 | const char *value; 48 | size_t value_len; 49 | }; 50 | 51 | /* returns number of bytes consumed if successful, -2 if request is partial, 52 | * -1 if failed */ 53 | int phr_parse_request(const char *buf, size_t len, const char **method, size_t *method_len, const char **path, size_t *path_len, 54 | int *minor_version, struct phr_header *headers, size_t *num_headers, size_t last_len); 55 | 56 | /* ditto */ 57 | int phr_parse_response(const char *_buf, size_t len, int *minor_version, int *status, const char **msg, size_t *msg_len, 58 | struct phr_header *headers, size_t *num_headers, size_t last_len); 59 | 60 | /* ditto */ 61 | int phr_parse_headers(const char *buf, size_t len, struct phr_header *headers, size_t *num_headers, size_t last_len); 62 | 63 | /* should be zero-filled before start */ 64 | struct phr_chunked_decoder { 65 | size_t bytes_left_in_chunk; /* number of bytes left in current chunk */ 66 | char consume_trailer; /* if trailing headers should be consumed */ 67 | char _hex_count; 68 | char _state; 69 | }; 70 | 71 | /* the function rewrites the buffer given as (buf, bufsz) removing the chunked- 72 | * encoding headers. When the function returns without an error, bufsz is 73 | * updated to the length of the decoded data available. Applications should 74 | * repeatedly call the function while it returns -2 (incomplete) every time 75 | * supplying newly arrived data. If the end of the chunked-encoded data is 76 | * found, the function returns a non-negative number indicating the number of 77 | * octets left undecoded at the tail of the supplied buffer. Returns -1 on 78 | * error. 79 | */ 80 | ssize_t phr_decode_chunked(struct phr_chunked_decoder *decoder, char *buf, size_t *bufsz); 81 | 82 | #ifdef __cplusplus 83 | } 84 | #endif 85 | 86 | #endif 87 | -------------------------------------------------------------------------------- /test.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | import pycohttpparser.api as p 4 | 5 | 6 | class Test(object): 7 | def test_basic_parsing(self): 8 | data = ( 9 | b"POST /post HTTP/1.1\r\n" 10 | b"User-Agent: hyper\r\n" 11 | b"content-length: 2\r\n" 12 | b"host: http2bin.org\r\n" 13 | b"\r\n" 14 | b"hi" 15 | ) 16 | m = memoryview(data) 17 | 18 | c = p.Parser() 19 | r = c.parse_request(m) 20 | 21 | assert r 22 | assert r.method.tobytes() == b'POST' 23 | assert r.path.tobytes() == b'/post' 24 | assert r.minor_version == 1 25 | 26 | expected_headers = [ 27 | (b'User-Agent', b'hyper'), 28 | (b'content-length', b'2'), 29 | (b'host', b'http2bin.org'), 30 | ] 31 | 32 | assert len(expected_headers) == len(r.headers) 33 | 34 | for (n1, v1), (n2, v2) in zip(r.headers, expected_headers): 35 | assert n1.tobytes() == n2 36 | assert v1.tobytes() == v2 37 | 38 | assert r.consumed == len(data) - 2 39 | 40 | def test_basic_response_parsing(self): 41 | data = ( 42 | b"HTTP/1.1 200 OK\r\n" 43 | b"Server: h2o\r\n" 44 | b"content-length: 2\r\n" 45 | b"Vary: accept-encoding\r\n" 46 | b"\r\n" 47 | b"hi" 48 | ) 49 | m = memoryview(data) 50 | 51 | c = p.Parser() 52 | r = c.parse_response(m) 53 | 54 | assert r 55 | assert r.status == 200 56 | assert r.msg.tobytes() == b'OK' 57 | assert r.minor_version == 1 58 | 59 | expected_headers = [ 60 | (b'Server', b'h2o'), 61 | (b'content-length', b'2'), 62 | (b'Vary', b'accept-encoding'), 63 | ] 64 | 65 | assert len(expected_headers) == len(r.headers) 66 | 67 | for (n1, v1), (n2, v2) in zip(r.headers, expected_headers): 68 | assert n1.tobytes() == n2 69 | assert v1.tobytes() == v2 70 | 71 | assert r.consumed == len(data) - 2 72 | 73 | def test_short_request(self): 74 | data = ( 75 | b"POST /post HTTP/1.1\r\n" 76 | b"User-Agent: hyper\r\n" 77 | b"content-length: 2\r\n" 78 | ) 79 | m = memoryview(data) 80 | 81 | c = p.Parser() 82 | r = c.parse_request(m) 83 | 84 | assert r is None 85 | 86 | def test_short_repsonse(self): 87 | data = ( 88 | b"HTTP/1.1 200 OK\r\n" 89 | b"Server: h2o\r\n" 90 | b"content" 91 | ) 92 | m = memoryview(data) 93 | 94 | c = p.Parser() 95 | r = c.parse_response(m) 96 | 97 | assert r is None 98 | 99 | def test_invalid_request(self): 100 | data = ( 101 | b"POST /post HTTP/1.1\r\n" 102 | b"User-Agent: hyper\r\n" 103 | b"content- 2\r\n" 104 | b"\r\n" 105 | ) 106 | m = memoryview(data) 107 | 108 | c = p.Parser() 109 | 110 | with pytest.raises(p.ParseError): 111 | c.parse_request(m) 112 | 113 | def test_invalid_repsonse(self): 114 | data = ( 115 | b"HTTP/1.1 200 OK\r\n" 116 | b"Server: h2o\r\n" 117 | b"content\r\n" 118 | b"\r\n" 119 | ) 120 | m = memoryview(data) 121 | 122 | c = p.Parser() 123 | 124 | with pytest.raises(p.ParseError): 125 | c.parse_response(m) 126 | -------------------------------------------------------------------------------- /test_requirements.txt: -------------------------------------------------------------------------------- 1 | pytest 2 | pytest-cov 3 | -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- 1 | [tox] 2 | envlist = py27, py33, py34, py35, pypy, lint 3 | 4 | [testenv] 5 | deps= -r{toxinidir}/test_requirements.txt 6 | commands= py.test test.py 7 | 8 | [testenv:lint] 9 | basepython=python3.4 10 | deps = flake8==2.5.1 11 | commands = flake8 --max-complexity 10 src/ test.py 12 | --------------------------------------------------------------------------------