├── debian ├── compat ├── pyversions ├── source │ └── format ├── clean ├── changelog ├── rules ├── python-http-parser.preinst ├── control ├── watch └── copyright ├── THANKS ├── MANIFEST.in ├── .gitignore ├── http_parser ├── __init__.py ├── http_parser.gyp ├── pyversion_compat.h ├── reader.py ├── http.py ├── util.py ├── parser.pyx ├── http_parser.h ├── pyparser.py ├── py25.py └── http_parser.c ├── examples ├── httpstream.py └── httpparser.py ├── LICENSE ├── README.rst ├── NOTICE └── setup.py /debian/compat: -------------------------------------------------------------------------------- 1 | 7 2 | -------------------------------------------------------------------------------- /debian/pyversions: -------------------------------------------------------------------------------- 1 | 2.5- 2 | -------------------------------------------------------------------------------- /debian/source/format: -------------------------------------------------------------------------------- 1 | 3.0 (native) 2 | -------------------------------------------------------------------------------- /debian/clean: -------------------------------------------------------------------------------- 1 | http-parser.egg-info/* 2 | -------------------------------------------------------------------------------- /THANKS: -------------------------------------------------------------------------------- 1 | Benoit Calvez 2 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include .gitignore 2 | include LICENSE 3 | include NOTICE 4 | include README.rst 5 | include THANKS 6 | recursive-include http_parser * 7 | recursive-include examples * 8 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.gem 2 | *.swp 3 | *.pyc 4 | *.pyo 5 | *#* 6 | *.sw* 7 | build 8 | dist 9 | setuptools-* 10 | .svn/* 11 | .DS_Store 12 | *.so 13 | http_parser.egg-info 14 | nohup.out 15 | .coverage 16 | doc/.sass-cache 17 | -------------------------------------------------------------------------------- /http_parser/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 - 2 | # 3 | # This file is part of http_parser released under the MIT license. 4 | # See the NOTICE for more information. 5 | 6 | version_info = (0, 7, 5) 7 | __version__ = ".".join(map(str, version_info)) 8 | -------------------------------------------------------------------------------- /debian/changelog: -------------------------------------------------------------------------------- 1 | python-http-parser (0.6.0-1) unstable; urgency=low 2 | 3 | * bump version. 4 | 5 | -- Benoit Chesneau Mon, 20 Jun 2011 17:20:00 +0100 6 | 7 | python-http-parser (0.5.4-1) unstable; urgency=low 8 | 9 | * bump version. 10 | 11 | -- Benoit Chesneau Mon, 20 Jun 2011 15:52:00 +0100 12 | -------------------------------------------------------------------------------- /debian/rules: -------------------------------------------------------------------------------- 1 | #!/usr/bin/make -f 2 | # -*- makefile -*- 3 | # Sample debian/rules that uses debhelper. 4 | # This file was originally written by Joey Hess and Craig Small. 5 | # As a special exception, when this file is copied by dh-make into a 6 | # dh-make output file, you may use that output file without restriction. 7 | # This special exception was added by Craig Small in version 0.37 of dh-make. 8 | 9 | # Uncomment this to turn on verbose mode. 10 | # export DH_VERBOSE=1 11 | 12 | %: 13 | dh $@ 14 | -------------------------------------------------------------------------------- /debian/python-http-parser.preinst: -------------------------------------------------------------------------------- 1 | #! /bin/sh 2 | 3 | set -e 4 | 5 | # This was added by stdeb to workaround Debian #479852. In a nutshell, 6 | # pycentral does not remove normally remove its symlinks on an 7 | # upgrade. Since we're using python-support, however, those symlinks 8 | # will be broken. This tells python-central to clean up any symlinks. 9 | if [ -e /var/lib/dpkg/info/python-http-parser.list ] && which pycentral >/dev/null 2>&1 10 | then 11 | pycentral pkgremove python-http-parser 12 | fi 13 | 14 | #DEBHELPER# 15 | -------------------------------------------------------------------------------- /debian/control: -------------------------------------------------------------------------------- 1 | Source: python-http-parser 2 | Section: python 3 | Priority: optional 4 | Maintainer: Benoit Chesneau 5 | Build-Depends: debhelper (>= 7), python-support, python-setuptools 6 | Standards-Version: 3.9.0.0 7 | Homepage: http://github.com/benoitc/http-parser 8 | 9 | Package: python-http-parser 10 | Architecture: all 11 | Depends: ${python:Depends}, ${shlibs:Depends}, ${misc:Depends} 12 | Provides: ${python:Provides} 13 | Description: Python http request/response parser 14 | HTTP request/response parser for Python in C under MIT License, based on 15 | http-parser from Ryan Dahl. 16 | -------------------------------------------------------------------------------- /examples/httpstream.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import socket 3 | 4 | from http_parser.http import HttpStream 5 | from http_parser.reader import SocketReader 6 | 7 | from http_parser.util import b 8 | 9 | def main(): 10 | s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) 11 | try: 12 | s.connect(('gunicorn.org', 80)) 13 | s.send(b("GET / HTTP/1.1\r\nHost: gunicorn.org\r\n\r\n")) 14 | p = HttpStream(SocketReader(s)) 15 | print(p.headers()) 16 | 17 | print(p.body_file().read()) 18 | finally: 19 | s.close() 20 | 21 | if __name__ == "__main__": 22 | main() 23 | 24 | 25 | -------------------------------------------------------------------------------- /debian/watch: -------------------------------------------------------------------------------- 1 | # Example watch control file for uscan 2 | # Rename this file to "watch" and then you can run the "uscan" command 3 | # to check for upstream updates and more. 4 | # See uscan(1) for format 5 | 6 | # Compulsory line, this is a version 3 file 7 | version=3 8 | 9 | # Uncomment to examine a Webpage 10 | # 11 | #http://www.example.com/downloads.php python-couchdbkit-(.*)\.tar\.gz 12 | opts=dversionmangle=s/\+dfsg$// \ 13 | http://pypi.python.org/packages/source/c/http-parser/http-parser-(.*).tar.gz 14 | # http://github.com/benoitc/couchdbkit/downloads/ /benoitc/couchdbkit/tarball/([0-9].*) 15 | 16 | # Uncomment to examine a Webserver directory 17 | #http://www.example.com/pub/python-couchdbkit-(.*)\.tar\.gz 18 | 19 | # Uncommment to examine a FTP server 20 | #ftp://ftp.example.com/pub/python-couchdbkit-(.*)\.tar\.gz debian uupdate 21 | 22 | # Uncomment to find new files on sourceforge, for devscripts >= 2.9 23 | # http://sf.net/python-couchdbkit/python-couchdbkit-(.*)\.tar\.gz 24 | 25 | # Uncomment to find new files on GooglePages 26 | # http://example.googlepages.com/foo.html python-couchdbkit-(.*)\.tar\.gz 27 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | 2011,2012 (c) Benoît Chesneau 2 | 3 | Permission is hereby granted, free of charge, to any person 4 | obtaining a copy of this software and associated documentation 5 | files (the "Software"), to deal in the Software without 6 | restriction, including without limitation the rights to use, 7 | copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | copies of the Software, and to permit persons to whom the 9 | Software is furnished to do so, subject to the following 10 | conditions: 11 | 12 | The above copyright notice and this permission notice shall be 13 | included in all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 16 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 17 | OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 18 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 19 | HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 20 | WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 21 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 22 | OTHER DEALINGS IN THE SOFTWARE. 23 | -------------------------------------------------------------------------------- /debian/copyright: -------------------------------------------------------------------------------- 1 | 2011 (c) Benoît Chesneau 2 | 3 | Permission is hereby granted, free of charge, to any person 4 | obtaining a copy of this software and associated documentation 5 | files (the "Software"), to deal in the Software without 6 | restriction, including without limitation the rights to use, 7 | copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | copies of the Software, and to permit persons to whom the 9 | Software is furnished to do so, subject to the following 10 | conditions: 11 | 12 | The above copyright notice and this permission notice shall be 13 | included in all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 16 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 17 | OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 18 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 19 | HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 20 | WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 21 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 22 | OTHER DEALINGS IN THE SOFTWARE. 23 | 24 | -------------------------------------------------------------------------------- /examples/httpparser.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import socket 3 | 4 | try: 5 | from http_parser.parser import HttpParser 6 | except ImportError: 7 | from http_parser.pyparser import HttpParser 8 | from http_parser.util import b 9 | 10 | def main(): 11 | 12 | p = HttpParser() 13 | s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) 14 | body = [] 15 | header_done = False 16 | try: 17 | s.connect(('gunicorn.org', 80)) 18 | s.send(b("GET / HTTP/1.1\r\nHost: gunicorn.org\r\n\r\n")) 19 | 20 | while True: 21 | data = s.recv(1024) 22 | if not data: 23 | break 24 | 25 | recved = len(data) 26 | nparsed = p.execute(data, recved) 27 | assert nparsed == recved 28 | 29 | if p.is_headers_complete() and not header_done: 30 | print(p.get_headers()) 31 | print(p.get_headers()['content-length']) 32 | header_done = True 33 | 34 | if p.is_partial_body(): 35 | body.append(p.recv_body()) 36 | 37 | if p.is_message_complete(): 38 | break 39 | 40 | print(b("").join(body)) 41 | 42 | finally: 43 | s.close() 44 | 45 | if __name__ == "__main__": 46 | main() 47 | 48 | 49 | -------------------------------------------------------------------------------- /http_parser/http_parser.gyp: -------------------------------------------------------------------------------- 1 | # This file is used with the GYP meta build system. 2 | # http://code.google.com/p/gyp/ 3 | # To build try this: 4 | # svn co http://gyp.googlecode.com/svn/trunk gyp 5 | # ./gyp/gyp -f make --depth=`pwd` http_parser.gyp 6 | # ./out/Debug/test 7 | { 8 | 'target_defaults': { 9 | 'default_configuration': 'Debug', 10 | 'configurations': { 11 | # TODO: hoist these out and put them somewhere common, because 12 | # RuntimeLibrary MUST MATCH across the entire project 13 | 'Debug': { 14 | 'defines': [ 'DEBUG', '_DEBUG' ], 15 | 'msvs_settings': { 16 | 'VCCLCompilerTool': { 17 | 'RuntimeLibrary': 1, # static debug 18 | }, 19 | }, 20 | }, 21 | 'Release': { 22 | 'defines': [ 'NDEBUG' ], 23 | 'msvs_settings': { 24 | 'VCCLCompilerTool': { 25 | 'RuntimeLibrary': 0, # static release 26 | }, 27 | }, 28 | } 29 | }, 30 | 'msvs_settings': { 31 | 'VCCLCompilerTool': { 32 | }, 33 | 'VCLibrarianTool': { 34 | }, 35 | 'VCLinkerTool': { 36 | 'GenerateDebugInformation': 'true', 37 | }, 38 | }, 39 | 'conditions': [ 40 | ['OS == "win"', { 41 | 'defines': [ 42 | 'WIN32' 43 | ], 44 | }] 45 | ], 46 | }, 47 | 48 | 'targets': [ 49 | { 50 | 'target_name': 'http_parser', 51 | 'type': 'static_library', 52 | 'include_dirs': [ '.' ], 53 | 'direct_dependent_settings': { 54 | 'include_dirs': [ '.' ], 55 | }, 56 | 'defines': [ 'HTTP_PARSER_STRICT=0' ], 57 | 'sources': [ './http_parser.c', ], 58 | 'conditions': [ 59 | ['OS=="win"', { 60 | 'msvs_settings': { 61 | 'VCCLCompilerTool': { 62 | # Compile as C++. http_parser.c is actually C99, but C++ is 63 | # close enough in this case. 64 | 'CompileAs': 2, 65 | }, 66 | }, 67 | }] 68 | ], 69 | }, 70 | 71 | { 72 | 'target_name': 'test', 73 | 'type': 'executable', 74 | 'dependencies': [ 'http_parser' ], 75 | 'sources': [ 'test.c' ] 76 | } 77 | ] 78 | } 79 | 80 | -------------------------------------------------------------------------------- /http_parser/pyversion_compat.h: -------------------------------------------------------------------------------- 1 | #include "Python.h" 2 | 3 | #if PY_VERSION_HEX < 0x02070000 4 | #if PY_VERSION_HEX < 0x02060000 5 | #define PyObject_CheckBuffer(object) (0) 6 | 7 | #define PyObject_GetBuffer(obj, view, flags) (PyErr_SetString(PyExc_NotImplementedError, \ 8 | "new buffer interface is not available"), -1) 9 | #define PyBuffer_FillInfo(view, obj, buf, len, readonly, flags) (PyErr_SetString(PyExc_NotImplementedError, \ 10 | "new buffer interface is not available"), -1) 11 | #define PyBuffer_Release(obj) (PyErr_SetString(PyExc_NotImplementedError, \ 12 | "new buffer interface is not available"), -1) 13 | // Bytes->String 14 | #define PyBytes_FromStringAndSize PyString_FromStringAndSize 15 | #define PyBytes_FromString PyString_FromString 16 | #define PyBytes_AsString PyString_AsString 17 | #define PyBytes_Size PyString_Size 18 | #endif 19 | 20 | #define PyMemoryView_FromBuffer(info) (PyErr_SetString(PyExc_NotImplementedError, \ 21 | "new buffer interface is not available"), (PyObject *)NULL) 22 | #define PyMemoryView_FromObject(object) (PyErr_SetString(PyExc_NotImplementedError, \ 23 | "new buffer interface is not available"), (PyObject *)NULL) 24 | #endif 25 | 26 | #if PY_VERSION_HEX >= 0x03000000 27 | // for buffers 28 | #define Py_END_OF_BUFFER ((Py_ssize_t) 0) 29 | 30 | #define PyObject_CheckReadBuffer(object) (0) 31 | 32 | #define PyBuffer_FromMemory(ptr, s) (PyErr_SetString(PyExc_NotImplementedError, \ 33 | "old buffer interface is not available"), (PyObject *)NULL) 34 | #define PyBuffer_FromReadWriteMemory(ptr, s) (PyErr_SetString(PyExc_NotImplementedError, \ 35 | "old buffer interface is not available"), (PyObject *)NULL) 36 | #define PyBuffer_FromObject(object, offset, size) (PyErr_SetString(PyExc_NotImplementedError, \ 37 | "old buffer interface is not available"), (PyObject *)NULL) 38 | #define PyBuffer_FromReadWriteObject(object, offset, size) (PyErr_SetString(PyExc_NotImplementedError, \ 39 | "old buffer interface is not available"), (PyObject *)NULL) 40 | 41 | #endif 42 | 43 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | http-parser 2 | ----------- 3 | 4 | HTTP request/response parser for Python compatible with Python 2.x 5 | (>=2.5.4), Python 3 and Pypy. If possible a C parser based on 6 | http-parser_ from Ryan Dahl will be used. 7 | 8 | http-parser is under the MIT license. 9 | 10 | Project url: https://github.com/benoitc/http-parser/ 11 | 12 | Requirements: 13 | ------------- 14 | 15 | - Python 2.5 or sup. Pypy latest version. 16 | - Cython if you need to rebuild the C code (Not needed for Pypy) 17 | 18 | Installation 19 | ------------ 20 | 21 | :: 22 | 23 | $ pip install http-parser 24 | 25 | Or install from source:: 26 | 27 | $ git clone git://github.com/benoitc/http-parser.git 28 | $ cd http-parser && python setup.py install 29 | 30 | 31 | Note: if you get an error on MacOSX try to install with the following 32 | arguments: 33 | 34 | $ env ARCHFLAGS="-arch i386 -arch x86_64" python setup.py install 35 | 36 | Usage 37 | ----- 38 | 39 | http-parser provide you **parser.HttpParser** low-level parser in C that 40 | you can access in your python program and **http.HttpStream** providing 41 | higher-level access to a readable,sequential io.RawIOBase object. 42 | 43 | To help you in your day work, http-parser provides you 3 kind of readers 44 | in the reader module: IterReader to read iterables, StringReader to 45 | reads strings and StringIO objects, SocketReader to read sockets or 46 | objects with the same api (recv_into needed). You can of course use any 47 | io.RawIOBase object. 48 | 49 | Example of HttpStream 50 | +++++++++++++++++++++ 51 | 52 | ex:: 53 | 54 | #!/usr/bin/env python 55 | import socket 56 | 57 | from http_parser.http import HttpStream 58 | from http_parser.reader import SocketReader 59 | 60 | def main(): 61 | s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) 62 | try: 63 | s.connect(('gunicorn.org', 80)) 64 | s.send("GET / HTTP/1.1\r\nHost: gunicorn.org\r\n\r\n") 65 | r = SocketReader(s) 66 | p = HttpStream(r) 67 | print p.headers() 68 | print p.body_file().read() 69 | finally: 70 | s.close() 71 | 72 | if __name__ == "__main__": 73 | main() 74 | 75 | Example of HttpParser: 76 | ++++++++++++++++++++++ 77 | 78 | :: 79 | 80 | #!/usr/bin/env python 81 | import socket 82 | 83 | # try to import C parser then fallback in pure python parser. 84 | try: 85 | from http_parser.parser import HttpParser 86 | except ImportError: 87 | from http_parser.pyparser import HttpParser 88 | 89 | 90 | def main(): 91 | 92 | p = HttpParser() 93 | s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) 94 | body = [] 95 | try: 96 | s.connect(('gunicorn.org', 80)) 97 | s.send("GET / HTTP/1.1\r\nHost: gunicorn.org\r\n\r\n") 98 | 99 | while True: 100 | data = s.recv(1024) 101 | if not data: 102 | break 103 | 104 | recved = len(data) 105 | nparsed = p.execute(data, recved) 106 | assert nparsed == recved 107 | 108 | if p.is_headers_complete(): 109 | print p.get_headers() 110 | 111 | if p.is_partial_body(): 112 | body.append(p.recv_body()) 113 | 114 | if p.is_message_complete(): 115 | break 116 | 117 | print "".join(body) 118 | 119 | finally: 120 | s.close() 121 | 122 | if __name__ == "__main__": 123 | main() 124 | 125 | 126 | You can find more docs in the code (or use a doc generator). 127 | 128 | 129 | Copyright 130 | --------- 131 | 132 | 2011,2012 (c) Benoît Chesneau 133 | 134 | 135 | .. http-parser_ https://github.com/ry/http-parser 136 | -------------------------------------------------------------------------------- /NOTICE: -------------------------------------------------------------------------------- 1 | http-parser 2 | 2011,2012 (c) Benoît Chesneau 3 | 4 | http-parser is released under the MIT license. See the LICENSE 5 | file for the complete license. 6 | 7 | 8 | http-parser.c, http-parser.h under MIT license 9 | ---------------------------------------------- 10 | 11 | Copyright Joyent, Inc. and other Node contributors. All rights reserved. 12 | 13 | Permission is hereby granted, free of charge, to any person obtaining a 14 | copy of this software and associated documentation files (the 15 | "Software"), to deal in the Software without restriction, including 16 | without limitation the rights to use, copy, modify, merge, publish, 17 | distribute, sublicense, and/or sell copies of the Software, and to 18 | permit persons to whom the Software is furnished to do so, subject to 19 | the following conditions: 20 | 21 | The above copyright notice and this permission notice shall be included 22 | in all copies or substantial portions of the Software. 23 | 24 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 25 | OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 26 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 27 | IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 28 | CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 29 | TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 30 | SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 31 | 32 | 33 | 34 | setup.py my_build_ext function under MIT License 35 | ------------------------------------------------ 36 | 37 | Copyright Denis Bilenko and the contributors, http://www.gevent.org 38 | 39 | Permission is hereby granted, free of charge, to any person obtaining a 40 | copy of this software and associated documentation files (the 41 | "Software"), to deal in the Software without restriction, including 42 | without limitation the rights to use, copy, modify, merge, publish, 43 | distribute, sublicense, and/or sell copies of the Software, and to 44 | permit persons to whom the Software is furnished to do so, subject to 45 | the following conditions: 46 | 47 | The above copyright notice and this permission notice shall be included 48 | in all copies or substantial portions of the Software. 49 | 50 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 51 | OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 52 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 53 | IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 54 | CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 55 | TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 56 | SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 57 | 58 | 59 | util.py - IOrderedDict 60 | ---------------------- 61 | 62 | IOrderedDict is based on collections.OrderedDict module, with 63 | insensitive key search support. 64 | 65 | Under PSF license. 66 | 67 | Copyright © 2001-2010 Python Software Foundation; All Rights Reserved 68 | 69 | This LICENSE AGREEMENT is between the Python Software Foundation 70 | (“PSF”), and the Individual or Organization (“Licensee”) accessing and 71 | otherwise using Python 2.7.1 software in source or binary form and its 72 | associated documentation. 73 | 74 | Subject to the terms and conditions of this License Agreement, PSF 75 | hereby grants Licensee a nonexclusive, royalty-free, world-wide license 76 | to reproduce, analyze, test, perform and/or display publicly, prepare 77 | derivative works, distribute, and otherwise use Python 2.7.1 alone or in 78 | any derivative version, provided, however, that PSF’s License Agreement 79 | and PSF’s notice of copyright, i.e., “Copyright © 2001-2010 Python 80 | Software Foundation; All Rights Reserved” are retained in Python 2.7.1 81 | alone or in any derivative version prepared by Licensee. 82 | 83 | In the event Licensee prepares a derivative work that is based on or 84 | incorporates Python 2.7.1 or any part thereof, and wants to make the 85 | derivative work available to others as provided herein, then Licensee 86 | hereby agrees to include in any such work a brief summary of the changes 87 | made to Python 2.7.1. 88 | 89 | PSF is making Python 2.7.1 available to Licensee on an “AS IS” basis. 90 | PSF MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR IMPLIED. BY WAY 91 | OF EXAMPLE, BUT NOT LIMITATION, PSF MAKES NO AND DISCLAIMS ANY 92 | REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS FOR ANY 93 | PARTICULAR PURPOSE OR THAT THE USE OF PYTHON 2.7.1 WILL NOT INFRINGE ANY 94 | THIRD PARTY RIGHTS. 95 | 96 | PSF SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYTHON 2.7.1 97 | FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS A 98 | RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYTHON 2.7.1, OR 99 | ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF. 100 | 101 | This License Agreement will automatically terminate upon a material 102 | breach of its terms and conditions. 103 | 104 | Nothing in this License Agreement shall be deemed to create any 105 | relationship of agency, partnership, or joint venture between PSF and 106 | Licensee. This License Agreement does not grant permission to use PSF 107 | trademarks or trade name in a trademark sense to endorse or promote 108 | products or services of Licensee, or any third party. 109 | 110 | By copying, installing or otherwise using Python 2.7.1, Licensee agrees 111 | to be bound by the terms and conditions of this License Agreement. 112 | 113 | 114 | py25.IOBase, py25.RawIOBase, py25.BufferedReader, py25.TextIOWrapper: 115 | --------------------------------------------------------------------- 116 | 117 | Partial implementation of io classes from python 2.7. Only read 118 | functions have been ported. 119 | 120 | 121 | Under PSF license. 122 | 123 | Copyright © 2001-2010 Python Software Foundation; All Rights Reserved 124 | 125 | 126 | -------------------------------------------------------------------------------- /http_parser/reader.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 - 2 | # 3 | # This file is part of http-parser released under the MIT license. 4 | 5 | # See the NOTICE for more information. 6 | 7 | from errno import EINTR, EAGAIN, EWOULDBLOCK 8 | import socket 9 | import sys 10 | import types 11 | try: 12 | from cStringIO import StringIO 13 | except ImportError: 14 | from StringIO import StringIO 15 | 16 | try: 17 | from io import DEFAULT_BUFFER_SIZE, RawIOBase 18 | except ImportError: 19 | from http_parser.py25 import DEFAULT_BUFFER_SIZE, RawIOBase 20 | 21 | try: 22 | bytes 23 | bytearray 24 | except (NameError, AttributeError): 25 | # python < 2.6 26 | from py25 import bytes, bytearray 27 | 28 | 29 | _blocking_errnos = ( EAGAIN, EWOULDBLOCK ) 30 | 31 | if sys.version_info < (2, 7, 0, 'final'): 32 | # in python 2.6 socket.recv_into doesn't support bytesarray 33 | import array 34 | def _readinto(sock, b): 35 | l = max(len(b), DEFAULT_BUFFER_SIZE) 36 | while True: 37 | try: 38 | buf = sock.recv(l) 39 | recved = len(buf) 40 | b[0:recved] = buf 41 | return recved 42 | except socket.error, e: 43 | n = e.args[0] 44 | if n == EINTR: 45 | continue 46 | if n in _blocking_errnos: 47 | return None 48 | raise 49 | else: 50 | _readinto = None 51 | 52 | class HttpBodyReader(RawIOBase): 53 | """ Raw implementation to stream http body """ 54 | 55 | def __init__(self, http_stream): 56 | self.http_stream = http_stream 57 | self.eof = False 58 | 59 | def readinto(self, b): 60 | if self.http_stream.parser.is_message_complete() or self.eof: 61 | if self.http_stream.parser.is_partial_body(): 62 | return self.http_stream.parser.recv_body_into(b) 63 | return 0 64 | 65 | self._checkReadable() 66 | try: 67 | self._checkClosed() 68 | except AttributeError: 69 | pass 70 | 71 | while True: 72 | buf = bytearray(DEFAULT_BUFFER_SIZE) 73 | recved = self.http_stream.stream.readinto(buf) 74 | if recved is None: 75 | break 76 | 77 | del buf[recved:] 78 | nparsed = self.http_stream.parser.execute(bytes(buf), recved) 79 | if nparsed != recved: 80 | return None 81 | 82 | if self.http_stream.parser.is_partial_body() or recved == 0: 83 | break 84 | elif self.http_stream.parser.is_message_complete(): 85 | break 86 | 87 | if not self.http_stream.parser.is_partial_body(): 88 | self.eof = True 89 | b = bytes('') 90 | return len(b) 91 | 92 | return self.http_stream.parser.recv_body_into(b) 93 | 94 | def readable(self): 95 | return not self.closed or self.http_stream.parser.is_partial_body() 96 | 97 | def close(self): 98 | if self.closed: 99 | return 100 | RawIOBase.close(self) 101 | self.http_stream = None 102 | 103 | class IterReader(RawIOBase): 104 | """ A raw reader implementation for iterable """ 105 | def __init__(self, iterable): 106 | self.iter = iter(iterable) 107 | self._buffer = "" 108 | 109 | def readinto(self, b): 110 | self._checkClosed() 111 | self._checkReadable() 112 | 113 | l = len(b) 114 | try: 115 | chunk = self.iter.next() 116 | self._buffer += chunk 117 | m = min(len(self._buffer), l) 118 | data, self._buffer = self._buffer[:m], self._buffer[m:] 119 | b[0:m] = data 120 | return len(data) 121 | except StopIteration: 122 | del b[0:] 123 | return 0 124 | 125 | def readable(self): 126 | return not self.closed 127 | 128 | def close(self): 129 | if self.closed: 130 | return 131 | RawIOBase.close(self) 132 | self.iter = None 133 | 134 | class StringReader(IterReader): 135 | """ a raw reader for strings or StringIO.StringIO, 136 | cStringIO.StringIO objects """ 137 | 138 | def __init__(self, string): 139 | if isinstance(string, types.StringTypes): 140 | iterable = StringIO(string) 141 | else: 142 | iterable = string 143 | IterReader.__init__(self, iterable) 144 | 145 | 146 | 147 | 148 | class SocketReader(RawIOBase): 149 | """ a raw reader for sockets or socket like interface. based 150 | on SocketIO object from python3.2 """ 151 | 152 | def __init__(self, sock): 153 | RawIOBase.__init__(self) 154 | self._sock = sock 155 | 156 | if _readinto is not None: 157 | def readinto(self, b): 158 | try: 159 | self._checkClosed() 160 | except AttributeError: 161 | pass 162 | self._checkReadable() 163 | return _readinto(self._sock, b) 164 | 165 | else: 166 | def readinto(self, b): 167 | try: 168 | self._checkClosed() 169 | except AttributeError: 170 | pass 171 | self._checkReadable() 172 | 173 | while True: 174 | try: 175 | return self._sock.recv_into(b) 176 | except socket.error, e: 177 | n = e.args[0] 178 | if n == EINTR: 179 | continue 180 | if n in _blocking_errnos: 181 | return None 182 | raise 183 | 184 | def readable(self): 185 | """True if the SocketIO is open for reading. 186 | """ 187 | return not self.closed 188 | 189 | def fileno(self): 190 | """Return the file descriptor of the underlying socket. 191 | """ 192 | self._checkClosed() 193 | return self._sock.fileno() 194 | 195 | def close(self): 196 | """Close the SocketIO object. This doesn't close the underlying 197 | socket, except if all references to it have disappeared. 198 | """ 199 | if self.closed: 200 | return 201 | RawIOBase.close(self) 202 | self._sock = None 203 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 - 2 | # 3 | # This file is part of http-parser released under the MIT license. 4 | # See the NOTICE for more information. 5 | 6 | from __future__ import with_statement 7 | 8 | from distutils.core import setup 9 | from distutils.command import build_ext 10 | from distutils.command.install import INSTALL_SCHEMES 11 | from distutils.extension import Extension 12 | from distutils.errors import CCompilerError, DistutilsExecError 13 | import glob 14 | from imp import load_source 15 | import os 16 | import sys 17 | import traceback 18 | 19 | if not hasattr(sys, 'version_info') or \ 20 | sys.version_info < (2, 5, 0, 'final'): 21 | raise SystemExit("http-parser requires Python 2.6x or later") 22 | 23 | is_pypy = hasattr(sys, 'pypy_version_info') 24 | 25 | CLASSIFIERS = [ 26 | 'Development Status :: 4 - Beta', 27 | 'Environment :: Other Environment', 28 | 'Intended Audience :: Developers', 29 | 'License :: OSI Approved :: MIT License', 30 | 'Operating System :: MacOS :: MacOS X', 31 | 'Operating System :: POSIX', 32 | 'Programming Language :: Python', 33 | 'Topic :: Internet', 34 | 'Topic :: Utilities', 35 | 'Topic :: Software Development :: Libraries :: Python Modules', 36 | ] 37 | 38 | 39 | MODULES = ["http_parser"] 40 | 41 | INCLUDE_DIRS = ["http_parser"] 42 | SOURCES = [os.path.join("http_parser", "parser.c"), 43 | os.path.join("http_parser", "http_parser.c")] 44 | 45 | for scheme in INSTALL_SCHEMES.values(): 46 | scheme['data'] = scheme['purelib'] 47 | 48 | class my_build_ext(build_ext.build_ext): 49 | user_options = (build_ext.build_ext.user_options 50 | + [("cython=", None, "path to the cython executable")]) 51 | 52 | def initialize_options(self): 53 | build_ext.build_ext.initialize_options(self) 54 | self.cython = "cython" 55 | 56 | def compile_cython(self): 57 | sources = glob.glob('http_parser/*.pyx') 58 | if not sources: 59 | if not os.path.exists('http_parser/parser.c'): 60 | sys.stderr.write('Could not find http_parser/parser.c\n') 61 | 62 | if os.path.exists('http_parser/parser.c'): 63 | core_c_mtime = os.stat('http_parser/parser.c').st_mtime 64 | changed = [filename for filename in sources if \ 65 | (os.stat(filename).st_mtime - core_c_mtime) > 1] 66 | if not changed: 67 | return 68 | sys.stderr.write('Running %s (changed: %s)\n' % (self.cython, 69 | ', '.join(changed))) 70 | else: 71 | sys.stderr.write('Running %s' % self.cython) 72 | cython_result = os.system('%s http_parser/parser.pyx' % self.cython) 73 | if cython_result: 74 | if os.system('%s -V 2> %s' % (self.cython, os.devnull)): 75 | # there's no cython in the system 76 | sys.stderr.write('No cython found, cannot rebuild parser.c\n') 77 | return 78 | sys.exit(1) 79 | 80 | def build_extension(self, ext): 81 | if self.cython: 82 | self.compile_cython() 83 | try: 84 | result = build_ext.build_ext.build_extension(self, ext) 85 | # hack: create a symlink from build/../parser.so to http_parser/parser.so 86 | # to prevent "ImportError: cannot import name core" failures 87 | 88 | fullname = self.get_ext_fullname(ext.name) 89 | modpath = fullname.split('.') 90 | filename = self.get_ext_filename(ext.name) 91 | filename = os.path.split(filename)[-1] 92 | if not self.inplace: 93 | filename = os.path.join(*modpath[:-1] + [filename]) 94 | path_to_build_core_so = os.path.abspath( 95 | os.path.join(self.build_lib, filename)) 96 | path_to_core_so = os.path.abspath( 97 | os.path.join('http_parser', 98 | os.path.basename(path_to_build_core_so))) 99 | if path_to_build_core_so != path_to_core_so: 100 | try: 101 | os.unlink(path_to_core_so) 102 | except OSError: 103 | pass 104 | if hasattr(os, 'symlink'): 105 | print('Linking %s to %s' % (path_to_build_core_so, 106 | path_to_core_so)) 107 | os.symlink(path_to_build_core_so, path_to_core_so) 108 | else: 109 | print('Copying %s to %s' % (path_to_build_core_so, 110 | path_to_core_so)) 111 | import shutil 112 | shutil.copyfile(path_to_build_core_so, path_to_core_so) 113 | return result 114 | 115 | except (Exception, CCompilerError,): 116 | traceback.print_exc() 117 | sys.stderr.write("warning: can't build parser.c speedup.\n\n") 118 | sys.stderr.write("You can can safely ignire previous error.\n") 119 | 120 | 121 | 122 | def main(): 123 | http_parser = load_source("http_parser", os.path.join("http_parser", 124 | "__init__.py")) 125 | 126 | # read long description 127 | with open(os.path.join(os.path.dirname(__file__), 'README.rst')) as f: 128 | long_description = f.read() 129 | 130 | PACKAGES = {} 131 | for name in MODULES: 132 | PACKAGES[name] = name.replace(".", "/") 133 | 134 | DATA_FILES = [ 135 | ('http_parser', ["LICENSE", "MANIFEST.in", "NOTICE", "README.rst", 136 | "THANKS",]) 137 | ] 138 | 139 | 140 | options = dict( 141 | name = 'http-parser', 142 | version = http_parser.__version__, 143 | description = 'http request/response parser', 144 | long_description = long_description, 145 | author = 'Benoit Chesneau', 146 | author_email = 'benoitc@e-engura.com', 147 | license = 'MIT', 148 | url = 'http://github.com/benoitc/http-parser', 149 | classifiers = CLASSIFIERS, 150 | platforms=['any'], 151 | packages = PACKAGES.keys(), 152 | package_dir = PACKAGES, 153 | data_files = DATA_FILES, 154 | 155 | ) 156 | 157 | 158 | if not is_pypy: 159 | EXT_MODULES = [Extension("http_parser.parser", 160 | sources=SOURCES, include_dirs=INCLUDE_DIRS)] 161 | 162 | 163 | options.update(dict( 164 | cmdclass = {'build_ext': my_build_ext}, 165 | ext_modules = EXT_MODULES)) 166 | 167 | # Python 3: run 2to3 168 | try: 169 | from distutils.command.build_py import build_py_2to3 170 | from distutils.command.build_scripts import build_scripts_2to3 171 | except ImportError: 172 | pass 173 | else: 174 | options['cmdclass'].update({ 175 | 'build_py': build_py_2to3, 176 | 'build_scripts': build_scripts_2to3, 177 | }) 178 | 179 | 180 | setup(**options) 181 | 182 | if __name__ == "__main__": 183 | main() 184 | 185 | -------------------------------------------------------------------------------- /http_parser/http.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 - 2 | # 3 | # This file is part of http-parser released under the MIT license. 4 | # See the NOTICE for more information. 5 | 6 | try: 7 | from io import DEFAULT_BUFFER_SIZE, BufferedReader, TextIOWrapper 8 | except ImportError: 9 | from py25 import DEFAULT_BUFFER_SIZE, BufferedReader, TextIOWrapper 10 | 11 | 12 | try: 13 | bytes 14 | bytearray 15 | except (NameError, AttributeError): 16 | # python < 2.6 17 | from py25 import bytes, bytearray 18 | 19 | try: 20 | from http_parser.parser import HttpParser 21 | except ImportError: 22 | from http_parser.pyparser import HttpParser 23 | 24 | from http_parser.reader import HttpBodyReader 25 | from http_parser.util import status_reasons 26 | 27 | HTTP_BOTH = 2 28 | HTTP_RESPONSE = 1 29 | HTTP_REQUEST = 0 30 | 31 | class NoMoreData(Exception): 32 | """ exception raised when trying to parse headers but 33 | we didn't get all data needed. 34 | """ 35 | 36 | class ParserError(Exception): 37 | """ error while parsing http request """ 38 | 39 | class BadStatusLine(Exception): 40 | """ error when status line is invalid """ 41 | 42 | class HttpStream(object): 43 | """ An HTTP parser providing higher-level access to a readable, 44 | sequential io.RawIOBase object. You can use implementions of 45 | http_parser.reader (IterReader, StringReader, SocketReader) or 46 | create your own. 47 | """ 48 | 49 | def __init__(self, stream, kind=HTTP_BOTH, decompress=False): 50 | """ constructor of HttpStream. 51 | 52 | :attr stream: an io.RawIOBase object 53 | :attr kind: Int, could be 0 to parseonly requests, 54 | 1 to parse only responses or 2 if we want to let 55 | the parser detect the type. 56 | """ 57 | self.parser = HttpParser(kind=kind, decompress=decompress) 58 | self.stream = stream 59 | 60 | def _check_headers_complete(self): 61 | if self.parser.is_headers_complete(): 62 | return 63 | 64 | while True: 65 | try: 66 | data = self.next() 67 | except StopIteration: 68 | if self.parser.is_headers_complete(): 69 | return 70 | raise NoMoreData("Can't parse headers") 71 | 72 | if self.parser.is_headers_complete(): 73 | return 74 | 75 | 76 | def _wait_status_line(self, cond): 77 | if self.parser.is_headers_complete(): 78 | return True 79 | 80 | data = "" 81 | if not cond(): 82 | while True: 83 | try: 84 | data += self.next() 85 | except StopIteration: 86 | if self.parser.is_headers_complete(): 87 | return True 88 | raise BadStatusLine(data) 89 | if cond(): 90 | return True 91 | return True 92 | 93 | def _wait_on_url(self): 94 | return self._wait_status_line(self.parser.get_url) 95 | 96 | def _wait_on_status(self): 97 | return self._wait_status_line(self.parser.get_status_code) 98 | 99 | def url(self): 100 | """ get full url of the request """ 101 | self._wait_on_url() 102 | return self.parser.get_url() 103 | 104 | def path(self): 105 | """ get path of the request (url without query string and 106 | fragment """ 107 | self._wait_on_url() 108 | return self.parser.get_path() 109 | 110 | def query_string(self): 111 | """ get query string of the url """ 112 | self._wait_on_url() 113 | return self.parser.get_query_string() 114 | 115 | def fragment(self): 116 | """ get fragment of the url """ 117 | self._wait_on_url() 118 | return self.parser.get_fragment() 119 | 120 | def version(self): 121 | self._wait_on_status() 122 | return self.parser.get_version() 123 | 124 | def status_code(self): 125 | """ get status code of a response as integer """ 126 | self._wait_on_status() 127 | return self.parser.get_status_code() 128 | 129 | def status(self): 130 | """ return complete status with reason """ 131 | status_code = self.status_code() 132 | reason = status_reasons.get(int(status_code), 'unknown') 133 | return "%s %s" % (status_code, reason) 134 | 135 | 136 | def method(self): 137 | """ get HTTP method as string""" 138 | self._wait_on_status() 139 | return self.parser.get_method() 140 | 141 | def headers(self): 142 | """ get request/response headers, headers are returned in a 143 | OrderedDict that allows you to get value using insensitive 144 | keys.""" 145 | self._check_headers_complete() 146 | return self.parser.get_headers() 147 | 148 | def should_keep_alive(self): 149 | """ return True if the connection should be kept alive 150 | """ 151 | self._check_headers_complete() 152 | return self.parser.should_keep_alive() 153 | 154 | def is_chunked(self): 155 | """ return True if Transfer-Encoding header value is chunked""" 156 | self._check_headers_complete() 157 | return self.parser.is_chunked() 158 | 159 | def wsgi_environ(self, initial=None): 160 | """ get WSGI environ based on the current request. 161 | 162 | :attr initial: dict, initial values to fill in environ. 163 | """ 164 | self._check_headers_complete() 165 | return self.parser.get_wsgi_environ() 166 | 167 | def body_file(self, buffering=None, binary=True, encoding=None, 168 | errors=None, newline=None): 169 | """ return the body as a buffered stream object. If binary is 170 | true an io.BufferedReader will be returned, else an 171 | io.TextIOWrapper. 172 | """ 173 | self._check_headers_complete() 174 | 175 | if buffering is None: 176 | buffering = -1 177 | if buffering < 0: 178 | buffering = DEFAULT_BUFFER_SIZE 179 | 180 | raw = HttpBodyReader(self) 181 | buf = BufferedReader(raw, buffering) 182 | if binary: 183 | return buf 184 | text = TextIOWrapper(buf, encoding, errors, newline) 185 | return text 186 | 187 | def body_string(self, binary=True, encoding=None, errors=None, 188 | newline=None): 189 | """ return body as string """ 190 | return self.body_file(binary=binary, encoding=encoding, 191 | newline=newline).read() 192 | 193 | def __iter__(self): 194 | return self 195 | 196 | def next(self): 197 | if self.parser.is_message_complete(): 198 | raise StopIteration 199 | 200 | # fetch data 201 | b = bytearray(DEFAULT_BUFFER_SIZE) 202 | recved = self.stream.readinto(b) 203 | if recved is None: 204 | raise NoMoreData("no more data") 205 | 206 | del b[recved:] 207 | to_parse = bytes(b) 208 | # parse data 209 | nparsed = self.parser.execute(to_parse, recved) 210 | if nparsed != recved and not self.parser.is_message_complete(): 211 | raise ParserError("nparsed != recved (%s != %s)" % (nparsed, 212 | recved)) 213 | 214 | if recved == 0: 215 | raise StopIteration 216 | 217 | return to_parse 218 | -------------------------------------------------------------------------------- /http_parser/util.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 - 2 | # 3 | # This file is part of http-parser released under the MIT license. 4 | # See the NOTICE for more information. 5 | 6 | 7 | import sys 8 | 9 | if sys.version_info[0] == 3: 10 | from urllib.parse import unquote 11 | def b(s): 12 | return s.encode("latin-1") 13 | 14 | def bytes_to_str(b): 15 | return str(b, 'latin1') 16 | 17 | import io 18 | StringIO = io.StringIO 19 | 20 | else: 21 | from urllib import unquote 22 | def b(s): 23 | return s 24 | 25 | def bytes_to_str(s): 26 | return s 27 | 28 | try: 29 | import cStringIO 30 | StringIO = BytesIO = cStringIO.StringIO 31 | except ImportError: 32 | import StringIO 33 | StringIO = BytesIO = StringIO.StringIO 34 | 35 | if sys.version_info < (2, 6, 0, 'final'): 36 | from py25 import IOrderedDict 37 | else: 38 | from collections import MutableMapping 39 | from itertools import imap 40 | 41 | class IOrderedDict(dict, MutableMapping): 42 | 'Dictionary that remembers insertion order with insensitive key' 43 | # An inherited dict maps keys to values. 44 | # The inherited dict provides __getitem__, __len__, __contains__, and get. 45 | # The remaining methods are order-aware. 46 | # Big-O running times for all methods are the same as for regular dictionaries. 47 | 48 | # The internal self.__map dictionary maps keys to links in a doubly linked list. 49 | # The circular doubly linked list starts and ends with a sentinel element. 50 | # The sentinel element never gets deleted (this simplifies the algorithm). 51 | # Each link is stored as a list of length three: [PREV, NEXT, KEY]. 52 | 53 | def __init__(self, *args, **kwds): 54 | '''Initialize an ordered dictionary. Signature is the same as for 55 | regular dictionaries, but keyword arguments are not recommended 56 | because their insertion order is arbitrary. 57 | 58 | ''' 59 | if len(args) > 1: 60 | raise TypeError('expected at most 1 arguments, got %d' % len(args)) 61 | try: 62 | self.__root 63 | except AttributeError: 64 | self.__root = root = [None, None, None] # sentinel node 65 | PREV = 0 66 | NEXT = 1 67 | root[PREV] = root[NEXT] = root 68 | self.__map = {} 69 | self.__lower = {} 70 | self.update(*args, **kwds) 71 | 72 | def __setitem__(self, key, value, PREV=0, NEXT=1, dict_setitem=dict.__setitem__): 73 | 'od.__setitem__(i, y) <==> od[i]=y' 74 | # Setting a new item creates a new link which goes at the end of the linked 75 | # list, and the inherited dictionary is updated with the new key/value pair. 76 | if key not in self: 77 | root = self.__root 78 | last = root[PREV] 79 | last[NEXT] = root[PREV] = self.__map[key] = [last, root, key] 80 | self.__lower[key.lower()] = key 81 | key = self.__lower[key.lower()] 82 | dict_setitem(self, key, value) 83 | 84 | def __delitem__(self, key, PREV=0, NEXT=1, dict_delitem=dict.__delitem__): 85 | 'od.__delitem__(y) <==> del od[y]' 86 | # Deleting an existing item uses self.__map to find the link which is 87 | # then removed by updating the links in the predecessor and successor nodes. 88 | if key in self: 89 | key = self.__lower.pop(key.lower()) 90 | 91 | dict_delitem(self, key) 92 | link = self.__map.pop(key) 93 | link_prev = link[PREV] 94 | link_next = link[NEXT] 95 | link_prev[NEXT] = link_next 96 | link_next[PREV] = link_prev 97 | 98 | def __getitem__(self, key, dict_getitem=dict.__getitem__): 99 | if key in self: 100 | key = self.__lower.get(key.lower()) 101 | return dict_getitem(self, key) 102 | 103 | def __contains__(self, key): 104 | return key.lower() in self.__lower 105 | 106 | def __iter__(self, NEXT=1, KEY=2): 107 | 'od.__iter__() <==> iter(od)' 108 | # Traverse the linked list in order. 109 | root = self.__root 110 | curr = root[NEXT] 111 | while curr is not root: 112 | yield curr[KEY] 113 | curr = curr[NEXT] 114 | 115 | def __reversed__(self, PREV=0, KEY=2): 116 | 'od.__reversed__() <==> reversed(od)' 117 | # Traverse the linked list in reverse order. 118 | root = self.__root 119 | curr = root[PREV] 120 | while curr is not root: 121 | yield curr[KEY] 122 | curr = curr[PREV] 123 | 124 | def __reduce__(self): 125 | 'Return state information for pickling' 126 | items = [[k, self[k]] for k in self] 127 | tmp = self.__map, self.__root 128 | del self.__map, self.__root 129 | inst_dict = vars(self).copy() 130 | self.__map, self.__root = tmp 131 | if inst_dict: 132 | return (self.__class__, (items,), inst_dict) 133 | return self.__class__, (items,) 134 | 135 | def clear(self): 136 | 'od.clear() -> None. Remove all items from od.' 137 | try: 138 | for node in self.__map.itervalues(): 139 | del node[:] 140 | self.__root[:] = [self.__root, self.__root, None] 141 | self.__map.clear() 142 | except AttributeError: 143 | pass 144 | dict.clear(self) 145 | 146 | def get(self, key, default=None): 147 | if key in self: 148 | return self[key] 149 | return default 150 | 151 | setdefault = MutableMapping.setdefault 152 | update = MutableMapping.update 153 | pop = MutableMapping.pop 154 | keys = MutableMapping.keys 155 | values = MutableMapping.values 156 | items = MutableMapping.items 157 | __ne__ = MutableMapping.__ne__ 158 | 159 | def popitem(self, last=True): 160 | '''od.popitem() -> (k, v), return and remove a (key, value) pair. 161 | Pairs are returned in LIFO order if last is true or FIFO order if false. 162 | 163 | ''' 164 | if not self: 165 | raise KeyError('dictionary is empty') 166 | key = next(reversed(self) if last else iter(self)) 167 | value = self.pop(key) 168 | return key, value 169 | 170 | def __repr__(self): 171 | 'od.__repr__() <==> repr(od)' 172 | if not self: 173 | return '%s()' % (self.__class__.__name__,) 174 | return '%s(%r)' % (self.__class__.__name__, self.items()) 175 | 176 | def copy(self): 177 | 'od.copy() -> a shallow copy of od' 178 | return self.__class__(self) 179 | 180 | @classmethod 181 | def fromkeys(cls, iterable, value=None): 182 | '''OD.fromkeys(S[, v]) -> New ordered dictionary with keys from S 183 | and values equal to v (which defaults to None). 184 | 185 | ''' 186 | d = cls() 187 | for key in iterable: 188 | d[key] = value 189 | return d 190 | 191 | def __eq__(self, other): 192 | '''od.__eq__(y) <==> od==y. Comparison to another OD is order-sensitive 193 | while comparison to a regular mapping is order-insensitive. 194 | 195 | ''' 196 | if isinstance(other, OrderedDict): 197 | return len(self)==len(other) and \ 198 | all(imap(_eq, self.iteritems(), other.iteritems())) 199 | return dict.__eq__(self, other) 200 | 201 | def __del__(self): 202 | self.clear() # eliminate cyclical references 203 | 204 | 205 | status_reasons = { 206 | # Status Codes 207 | # Informational 208 | 100: 'Continue', 209 | 101: 'Switching Protocols', 210 | 102: 'Processing', 211 | 212 | # Successful 213 | 200: 'OK', 214 | 201: 'Created', 215 | 202: 'Accepted', 216 | 203: 'Non Authoritative Information', 217 | 204: 'No Content', 218 | 205: 'Reset Content', 219 | 206: 'Partial Content', 220 | 207: 'Multi Status', 221 | 226: 'IM Used', 222 | 223 | # Redirection 224 | 300: 'Multiple Choices', 225 | 301: 'Moved Permanently', 226 | 302: 'Found', 227 | 303: 'See Other', 228 | 304: 'Not Modified', 229 | 305: 'Use Proxy', 230 | 307: 'Temporary Redirect', 231 | 232 | # Client Error 233 | 400: 'Bad Request', 234 | 401: 'Unauthorized', 235 | 402: 'Payment Required', 236 | 403: 'Forbidden', 237 | 404: 'Not Found', 238 | 405: 'Method Not Allowed', 239 | 406: 'Not Acceptable', 240 | 407: 'Proxy Authentication Required', 241 | 408: 'Request Timeout', 242 | 409: 'Conflict', 243 | 410: 'Gone', 244 | 411: 'Length Required', 245 | 412: 'Precondition Failed', 246 | 413: 'Request Entity Too Large', 247 | 414: 'Request URI Too Long', 248 | 415: 'Unsupported Media Type', 249 | 416: 'Requested Range Not Satisfiable', 250 | 417: 'Expectation Failed', 251 | 422: 'Unprocessable Entity', 252 | 423: 'Locked', 253 | 424: 'Failed Dependency', 254 | 426: 'Upgrade Required', 255 | 256 | # Server Error 257 | 500: 'Internal Server Error', 258 | 501: 'Not Implemented', 259 | 502: 'Bad Gateway', 260 | 503: 'Service Unavailable', 261 | 504: 'Gateway Timeout', 262 | 505: 'HTTP Version Not Supported', 263 | 507: 'Insufficient Storage', 264 | 510: 'Not Extended', 265 | } 266 | -------------------------------------------------------------------------------- /http_parser/parser.pyx: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 - 2 | # 3 | # This file is part of http-parser released under the MIT license. 4 | # See the NOTICE for more information. 5 | 6 | from libc.stdlib cimport * 7 | import os 8 | import urlparse 9 | import zlib 10 | 11 | from http_parser.util import b, bytes_to_str, IOrderedDict, unquote 12 | 13 | cdef extern from "pyversion_compat.h": 14 | pass 15 | 16 | from cpython cimport PyBytes_FromStringAndSize 17 | 18 | cdef extern from "http_parser.h" nogil: 19 | 20 | cdef enum http_method: 21 | HTTP_DELETE, HTTP_GET, HTTP_HEAD, HTTP_POST, HTTP_PUT, 22 | HTTP_CONNECT, HTTP_OPTIONS, HTTP_TRACE, HTTP_COPY, HTTP_LOCK, 23 | HTTP_MKCOL, HTTP_MOVE, HTTP_PROPFIND, HTTP_PROPPATCH, HTTP_UNLOCK, 24 | HTTP_REPORT, HTTP_MKACTIVITY, HTTP_CHECKOUT, HTTP_MERGE, HTTP_MSEARCH, 25 | HTTP_NOTIFY, HTTP_SUBSCRIBE, HTTP_UNSUBSCRIBE, HTTP_PATCH 26 | 27 | 28 | cdef enum http_parser_type: 29 | HTTP_REQUEST, HTTP_RESPONSE, HTTP_BOTH 30 | 31 | cdef struct http_parser: 32 | int content_length 33 | unsigned short http_major 34 | unsigned short http_minor 35 | unsigned short status_code 36 | unsigned char method 37 | char upgrade 38 | void *data 39 | 40 | ctypedef int (*http_data_cb) (http_parser*, char *at, size_t length) 41 | ctypedef int (*http_cb) (http_parser*) 42 | 43 | struct http_parser_settings: 44 | http_cb on_message_begin 45 | http_data_cb on_url 46 | http_data_cb on_header_field 47 | http_data_cb on_header_value 48 | http_cb on_headers_complete 49 | http_data_cb on_body 50 | http_cb on_message_complete 51 | 52 | void http_parser_init(http_parser *parser, 53 | http_parser_type ptype) 54 | 55 | size_t http_parser_execute(http_parser *parser, 56 | http_parser_settings *settings, char *data, 57 | size_t len) 58 | 59 | int http_should_keep_alive(http_parser *parser) 60 | 61 | char *http_method_str(http_method) 62 | 63 | 64 | cdef int on_url_cb(http_parser *parser, char *at, 65 | size_t length): 66 | res = parser.data 67 | value = bytes_to_str(PyBytes_FromStringAndSize(at, length)) 68 | 69 | res.url = value 70 | return 0 71 | 72 | cdef int on_header_field_cb(http_parser *parser, char *at, 73 | size_t length): 74 | header_field = PyBytes_FromStringAndSize(at, length) 75 | res = parser.data 76 | 77 | if res._last_was_value: 78 | res._last_field = "" 79 | res._last_field += bytes_to_str(header_field) 80 | res._last_was_value = False 81 | return 0 82 | 83 | cdef int on_header_value_cb(http_parser *parser, char *at, 84 | size_t length): 85 | res = parser.data 86 | header_value = bytes_to_str(PyBytes_FromStringAndSize(at, length)) 87 | 88 | if res._last_field in res.headers: 89 | header_value = "%s, %s" % (res.headers[res._last_field], 90 | header_value) 91 | 92 | # update wsgi environ 93 | key = 'HTTP_%s' % res._last_field.upper().replace('-','_') 94 | res.environ[key] = header_value 95 | 96 | # add to headers 97 | res.headers[res._last_field] = header_value 98 | res._last_was_value = True 99 | return 0 100 | 101 | cdef int on_headers_complete_cb(http_parser *parser): 102 | res = parser.data 103 | res.headers_complete = True 104 | 105 | if res.decompress: 106 | encoding = res.headers.get('content-encoding') 107 | if encoding == 'gzip': 108 | res.decompressobj = zlib.decompressobj(16+zlib.MAX_WBITS) 109 | del res.headers['content-encoding'] 110 | elif encoding == 'deflate': 111 | res.decompressobj = zlib.decompressobj() 112 | del res.headers['content-encoding'] 113 | else: 114 | res.decompress = False 115 | 116 | return 0 117 | 118 | cdef int on_message_begin_cb(http_parser *parser): 119 | res = parser.data 120 | res.message_begin = True 121 | return 0 122 | 123 | cdef int on_body_cb(http_parser *parser, char *at, 124 | size_t length): 125 | res = parser.data 126 | value = PyBytes_FromStringAndSize(at, length) 127 | 128 | res.partial_body = True 129 | 130 | # decompress the value if needed 131 | if res.decompress: 132 | value = res.decompressobj.decompress(value) 133 | res.body.append(value) 134 | return 0 135 | 136 | cdef int on_message_complete_cb(http_parser *parser): 137 | res = parser.data 138 | res.message_complete = True 139 | return 0 140 | 141 | 142 | class _ParserData(object): 143 | 144 | def __init__(self, decompress=False): 145 | self.url = "" 146 | self.body = [] 147 | self.headers = IOrderedDict() 148 | self.environ = {} 149 | 150 | self.decompress = decompress 151 | self.decompressobj = None 152 | 153 | self.chunked = False 154 | 155 | self.headers_complete = False 156 | self.partial_body = False 157 | self.message_begin = False 158 | self.message_complete = False 159 | 160 | self._last_field = "" 161 | self._last_was_value = False 162 | 163 | cdef class HttpParser: 164 | """ Low level HTTP parser. """ 165 | 166 | cdef http_parser _parser 167 | cdef http_parser_settings _settings 168 | cdef object _data 169 | 170 | cdef str _path 171 | cdef str _query_string 172 | cdef str _fragment 173 | cdef object _parsed_url 174 | 175 | def __init__(self, kind=2, decompress=False): 176 | """ constructor of HttpParser object. 177 | 178 | 179 | :attr kind: Int, could be 0 to parseonly requests, 180 | 1 to parse only responses or 2 if we want to let 181 | the parser detect the type. 182 | """ 183 | 184 | # set parser type 185 | if kind == 2: 186 | parser_type = HTTP_BOTH 187 | elif kind == 1: 188 | parser_type = HTTP_RESPONSE 189 | elif kind == 0: 190 | parser_type = HTTP_REQUEST 191 | 192 | # initialize parser 193 | http_parser_init(&self._parser, parser_type) 194 | self._data = _ParserData(decompress=decompress) 195 | self._parser.data = self._data 196 | self._parsed_url = None 197 | self._path = "" 198 | self._query_string = "" 199 | self._fragment = "" 200 | 201 | # set callback 202 | self._settings.on_url = on_url_cb 203 | self._settings.on_body = on_body_cb 204 | self._settings.on_header_field = on_header_field_cb 205 | self._settings.on_header_value = on_header_value_cb 206 | self._settings.on_headers_complete = on_headers_complete_cb 207 | self._settings.on_message_begin = on_message_begin_cb 208 | self._settings.on_message_complete = on_message_complete_cb 209 | 210 | def execute(self, char *data, size_t length): 211 | """ Execute the parser with the last chunk. We pass the length 212 | to let the parser know when EOF has been received. In this case 213 | length == 0. 214 | 215 | :return recved: Int, received length of the data parsed. if 216 | recvd != length you should return an error. 217 | """ 218 | return http_parser_execute(&self._parser, &self._settings, 219 | data, length) 220 | 221 | def get_version(self): 222 | """ get HTTP version """ 223 | return (self._parser.http_major, self._parser.http_minor) 224 | 225 | def get_method(self): 226 | """ get HTTP method as string""" 227 | return http_method_str(self._parser.method) 228 | 229 | 230 | 231 | def get_status_code(self): 232 | """ get status code of a response as integer """ 233 | return self._parser.status_code 234 | 235 | def get_url(self): 236 | """ get full url of the request """ 237 | return self._data.url 238 | 239 | def maybe_parse_url(self): 240 | raw_url = self.get_url() 241 | if not self._parsed_url and raw_url: 242 | self._parsed_url = urlparse.urlsplit(raw_url) 243 | self._path = self._parsed_url.path or "" 244 | self._query_string = self._parsed_url.query or "" 245 | self._fragment = self._parsed_url.fragment or "" 246 | 247 | def get_path(self): 248 | """ get path of the request (url without query string and 249 | fragment """ 250 | self.maybe_parse_url() 251 | return self._path 252 | 253 | def get_query_string(self): 254 | """ get query string of the url """ 255 | self.maybe_parse_url() 256 | return self._query_string 257 | 258 | def get_fragment(self): 259 | """ get fragment of the url """ 260 | self.maybe_parse_url() 261 | return self._fragment 262 | 263 | def get_headers(self): 264 | """ get request/response headers, headers are returned in a 265 | OrderedDict that allows you to get value using insensitive keys. """ 266 | return self._data.headers 267 | 268 | def get_wsgi_environ(self): 269 | """ get WSGI environ based on the current request """ 270 | self.maybe_parse_url() 271 | environ = self._data.environ 272 | 273 | # clean special keys 274 | for key in ("CONTENT_LENGTH", "CONTENT_TYPE", "SCRIPT_NAME"): 275 | hkey = "HTTP_%s" % key 276 | if hkey in environ: 277 | environ[key] = environ.pop(hkey) 278 | 279 | script_name = environ.get('HTTP_SCRIPT_NAME', 280 | os.environ.get("SCRIPT_NAME", "")) 281 | 282 | if script_name: 283 | path_info = self._path.split(script_name, 1)[1] 284 | else: 285 | path_info = self._path 286 | 287 | environ.update({ 288 | 'REQUEST_METHOD': self.get_method(), 289 | 'SERVER_PROTOCOL': "HTTP/%s" % ".".join(map(str, 290 | self.get_version())), 291 | 'PATH_INFO': path_info, 292 | 'SCRIPT_NAME': script_name, 293 | 'QUERY_STRING': self._query_string, 294 | 'RAW_URI': self._data.url}) 295 | 296 | return environ 297 | 298 | def recv_body(self): 299 | """ return last chunk of the parsed body""" 300 | body = b("").join(self._data.body) 301 | self._data.body = [] 302 | self._data.partial_body = False 303 | return body 304 | 305 | def recv_body_into(self, barray): 306 | """ Receive the last chunk of the parsed bodyand store the data 307 | in a buffer rather than creating a new string. """ 308 | l = len(barray) 309 | body = b("").join(self._data.body) 310 | m = min(len(body), l) 311 | data, rest = body[:m], body[m:] 312 | barray[0:m] = bytes(data) 313 | if not rest: 314 | self._data.body = [] 315 | self._data.partial_body = False 316 | else: 317 | self._data.body = [rest] 318 | return m 319 | 320 | def is_upgrade(self): 321 | """ Do we get upgrade header in the request. Useful for 322 | websockets """ 323 | return self._parser_upgrade 324 | 325 | def is_headers_complete(self): 326 | """ return True if all headers have been parsed. """ 327 | return self._data.headers_complete 328 | 329 | def is_partial_body(self): 330 | """ return True if a chunk of body have been parsed """ 331 | return self._data.partial_body 332 | 333 | def is_message_begin(self): 334 | """ return True if the parsing start """ 335 | return self._data.message_begin 336 | 337 | def is_message_complete(self): 338 | """ return True if the parsing is done (we get EOF) """ 339 | return self._data.message_complete 340 | 341 | def is_chunked(self): 342 | """ return True if Transfer-Encoding header value is chunked""" 343 | te = self._data.headers.get('transfer-encoding', '').lower() 344 | return te == 'chunked' 345 | 346 | def should_keep_alive(self): 347 | """ return True if the connection should be kept alive 348 | """ 349 | return http_should_keep_alive(&self._parser) 350 | -------------------------------------------------------------------------------- /http_parser/http_parser.h: -------------------------------------------------------------------------------- 1 | /* Copyright Joyent, Inc. and other Node contributors. All rights reserved. 2 | * 3 | * Permission is hereby granted, free of charge, to any person obtaining a copy 4 | * of this software and associated documentation files (the "Software"), to 5 | * deal in the Software without restriction, including without limitation the 6 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 7 | * sell copies of the Software, and to permit persons to whom the Software is 8 | * furnished to do so, subject to the following conditions: 9 | * 10 | * The above copyright notice and this permission notice shall be included in 11 | * all copies or substantial portions of the Software. 12 | * 13 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 18 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 19 | * IN THE SOFTWARE. 20 | */ 21 | #ifndef http_parser_h 22 | #define http_parser_h 23 | #ifdef __cplusplus 24 | extern "C" { 25 | #endif 26 | 27 | #define HTTP_PARSER_VERSION_MAJOR 1 28 | #define HTTP_PARSER_VERSION_MINOR 0 29 | 30 | #include 31 | #if defined(_WIN32) && !defined(__MINGW32__) && (!defined(_MSC_VER) || _MSC_VER<1600) 32 | typedef __int8 int8_t; 33 | typedef unsigned __int8 uint8_t; 34 | typedef __int16 int16_t; 35 | typedef unsigned __int16 uint16_t; 36 | typedef __int32 int32_t; 37 | typedef unsigned __int32 uint32_t; 38 | typedef __int64 int64_t; 39 | typedef unsigned __int64 uint64_t; 40 | 41 | typedef unsigned int size_t; 42 | typedef int ssize_t; 43 | #else 44 | #include 45 | #endif 46 | 47 | /* Compile with -DHTTP_PARSER_STRICT=0 to make less checks, but run 48 | * faster 49 | */ 50 | #ifndef HTTP_PARSER_STRICT 51 | # define HTTP_PARSER_STRICT 1 52 | #endif 53 | 54 | /* Compile with -DHTTP_PARSER_DEBUG=1 to add extra debugging information to 55 | * the error reporting facility. 56 | */ 57 | #ifndef HTTP_PARSER_DEBUG 58 | # define HTTP_PARSER_DEBUG 0 59 | #endif 60 | 61 | 62 | /* Maximium header size allowed */ 63 | #define HTTP_MAX_HEADER_SIZE (80*1024) 64 | 65 | 66 | typedef struct http_parser http_parser; 67 | typedef struct http_parser_settings http_parser_settings; 68 | typedef struct http_parser_result http_parser_result; 69 | 70 | 71 | /* Callbacks should return non-zero to indicate an error. The parser will 72 | * then halt execution. 73 | * 74 | * The one exception is on_headers_complete. In a HTTP_RESPONSE parser 75 | * returning '1' from on_headers_complete will tell the parser that it 76 | * should not expect a body. This is used when receiving a response to a 77 | * HEAD request which may contain 'Content-Length' or 'Transfer-Encoding: 78 | * chunked' headers that indicate the presence of a body. 79 | * 80 | * http_data_cb does not return data chunks. It will be call arbitrarally 81 | * many times for each string. E.G. you might get 10 callbacks for "on_path" 82 | * each providing just a few characters more data. 83 | */ 84 | typedef int (*http_data_cb) (http_parser*, const char *at, size_t length); 85 | typedef int (*http_cb) (http_parser*); 86 | 87 | 88 | /* Request Methods */ 89 | enum http_method 90 | { HTTP_DELETE = 0 91 | , HTTP_GET 92 | , HTTP_HEAD 93 | , HTTP_POST 94 | , HTTP_PUT 95 | /* pathological */ 96 | , HTTP_CONNECT 97 | , HTTP_OPTIONS 98 | , HTTP_TRACE 99 | /* webdav */ 100 | , HTTP_COPY 101 | , HTTP_LOCK 102 | , HTTP_MKCOL 103 | , HTTP_MOVE 104 | , HTTP_PROPFIND 105 | , HTTP_PROPPATCH 106 | , HTTP_UNLOCK 107 | /* subversion */ 108 | , HTTP_REPORT 109 | , HTTP_MKACTIVITY 110 | , HTTP_CHECKOUT 111 | , HTTP_MERGE 112 | /* upnp */ 113 | , HTTP_MSEARCH 114 | , HTTP_NOTIFY 115 | , HTTP_SUBSCRIBE 116 | , HTTP_UNSUBSCRIBE 117 | /* RFC-5789 */ 118 | , HTTP_PATCH 119 | }; 120 | 121 | 122 | enum http_parser_type { HTTP_REQUEST, HTTP_RESPONSE, HTTP_BOTH }; 123 | 124 | 125 | /* Flag values for http_parser.flags field */ 126 | enum flags 127 | { F_CHUNKED = 1 << 0 128 | , F_CONNECTION_KEEP_ALIVE = 1 << 1 129 | , F_CONNECTION_CLOSE = 1 << 2 130 | , F_TRAILING = 1 << 3 131 | , F_UPGRADE = 1 << 4 132 | , F_SKIPBODY = 1 << 5 133 | }; 134 | 135 | 136 | /* Map for errno-related constants 137 | * 138 | * The provided argument should be a macro that takes 2 arguments. 139 | */ 140 | #define HTTP_ERRNO_MAP(XX) \ 141 | /* No error */ \ 142 | XX(OK, "success") \ 143 | \ 144 | /* Callback-related errors */ \ 145 | XX(CB_message_begin, "the on_message_begin callback failed") \ 146 | XX(CB_path, "the on_path callback failed") \ 147 | XX(CB_query_string, "the on_query_string callback failed") \ 148 | XX(CB_url, "the on_url callback failed") \ 149 | XX(CB_fragment, "the on_fragment callback failed") \ 150 | XX(CB_header_field, "the on_header_field callback failed") \ 151 | XX(CB_header_value, "the on_header_value callback failed") \ 152 | XX(CB_headers_complete, "the on_headers_complete callback failed") \ 153 | XX(CB_body, "the on_body callback failed") \ 154 | XX(CB_message_complete, "the on_message_complete callback failed") \ 155 | \ 156 | /* Parsing-related errors */ \ 157 | XX(INVALID_EOF_STATE, "stream ended at an unexpected time") \ 158 | XX(HEADER_OVERFLOW, \ 159 | "too many header bytes seen; overflow detected") \ 160 | XX(CLOSED_CONNECTION, \ 161 | "data received after completed connection: close message") \ 162 | XX(INVALID_VERSION, "invalid HTTP version") \ 163 | XX(INVALID_STATUS, "invalid HTTP status code") \ 164 | XX(INVALID_METHOD, "invalid HTTP method") \ 165 | XX(INVALID_URL, "invalid URL") \ 166 | XX(INVALID_HOST, "invalid host") \ 167 | XX(INVALID_PORT, "invalid port") \ 168 | XX(INVALID_PATH, "invalid path") \ 169 | XX(INVALID_QUERY_STRING, "invalid query string") \ 170 | XX(INVALID_FRAGMENT, "invalid fragment") \ 171 | XX(LF_EXPECTED, "LF character expected") \ 172 | XX(INVALID_HEADER_TOKEN, "invalid character in header") \ 173 | XX(INVALID_CONTENT_LENGTH, \ 174 | "invalid character in content-length header") \ 175 | XX(INVALID_CHUNK_SIZE, \ 176 | "invalid character in chunk size header") \ 177 | XX(INVALID_CONSTANT, "invalid constant string") \ 178 | XX(INVALID_INTERNAL_STATE, "encountered unexpected internal state")\ 179 | XX(STRICT, "strict mode assertion failed") \ 180 | XX(PAUSED, "parser is paused") \ 181 | XX(UNKNOWN, "an unknown error occurred") 182 | 183 | 184 | /* Define HPE_* values for each errno value above */ 185 | #define HTTP_ERRNO_GEN(n, s) HPE_##n, 186 | enum http_errno { 187 | HTTP_ERRNO_MAP(HTTP_ERRNO_GEN) 188 | }; 189 | #undef HTTP_ERRNO_GEN 190 | 191 | 192 | /* Get an http_errno value from an http_parser */ 193 | #define HTTP_PARSER_ERRNO(p) ((enum http_errno) (p)->http_errno) 194 | 195 | /* Get the line number that generated the current error */ 196 | #if HTTP_PARSER_DEBUG 197 | #define HTTP_PARSER_ERRNO_LINE(p) ((p)->error_lineno) 198 | #else 199 | #define HTTP_PARSER_ERRNO_LINE(p) 0 200 | #endif 201 | 202 | 203 | struct http_parser { 204 | /** PRIVATE **/ 205 | unsigned char type : 2; /* enum http_parser_type */ 206 | unsigned char flags : 6; /* F_* values from 'flags' enum; semi-public */ 207 | unsigned char state; /* enum state from http_parser.c */ 208 | unsigned char header_state; /* enum header_state from http_parser.c */ 209 | unsigned char index; /* index into current matcher */ 210 | 211 | uint32_t nread; /* # bytes read in various scenarios */ 212 | int64_t content_length; /* # bytes in body (0 if no Content-Length header) */ 213 | 214 | /** READ-ONLY **/ 215 | unsigned short http_major; 216 | unsigned short http_minor; 217 | unsigned short status_code; /* responses only */ 218 | unsigned char method; /* requests only */ 219 | unsigned char http_errno : 7; 220 | 221 | /* 1 = Upgrade header was present and the parser has exited because of that. 222 | * 0 = No upgrade header present. 223 | * Should be checked when http_parser_execute() returns in addition to 224 | * error checking. 225 | */ 226 | unsigned char upgrade : 1; 227 | 228 | #if HTTP_PARSER_DEBUG 229 | uint32_t error_lineno; 230 | #endif 231 | 232 | /** PUBLIC **/ 233 | void *data; /* A pointer to get hook to the "connection" or "socket" object */ 234 | }; 235 | 236 | 237 | struct http_parser_settings { 238 | http_cb on_message_begin; 239 | http_data_cb on_url; 240 | http_data_cb on_header_field; 241 | http_data_cb on_header_value; 242 | http_cb on_headers_complete; 243 | http_data_cb on_body; 244 | http_cb on_message_complete; 245 | }; 246 | 247 | 248 | enum http_parser_url_fields 249 | { UF_SCHEMA = 0 250 | , UF_HOST = 1 251 | , UF_PORT = 2 252 | , UF_PATH = 3 253 | , UF_QUERY = 4 254 | , UF_FRAGMENT = 5 255 | , UF_MAX = 6 256 | }; 257 | 258 | 259 | /* Result structure for http_parser_parse_url(). 260 | * 261 | * Callers should index into field_data[] with UF_* values iff field_set 262 | * has the relevant (1 << UF_*) bit set. As a courtesy to clients (and 263 | * because we probably have padding left over), we convert any port to 264 | * a uint16_t. 265 | */ 266 | struct http_parser_url { 267 | uint16_t field_set; /* Bitmask of (1 << UF_*) values */ 268 | uint16_t port; /* Converted UF_PORT string */ 269 | 270 | struct { 271 | uint16_t off; /* Offset into buffer in which field starts */ 272 | uint16_t len; /* Length of run in buffer */ 273 | } field_data[UF_MAX]; 274 | }; 275 | 276 | 277 | void http_parser_init(http_parser *parser, enum http_parser_type type); 278 | 279 | 280 | size_t http_parser_execute(http_parser *parser, 281 | const http_parser_settings *settings, 282 | const char *data, 283 | size_t len); 284 | 285 | 286 | /* If http_should_keep_alive() in the on_headers_complete or 287 | * on_message_complete callback returns true, then this will be should be 288 | * the last message on the connection. 289 | * If you are the server, respond with the "Connection: close" header. 290 | * If you are the client, close the connection. 291 | */ 292 | int http_should_keep_alive(http_parser *parser); 293 | 294 | /* Returns a string version of the HTTP method. */ 295 | const char *http_method_str(enum http_method m); 296 | 297 | /* Return a string name of the given error */ 298 | const char *http_errno_name(enum http_errno err); 299 | 300 | /* Return a string description of the given error */ 301 | const char *http_errno_description(enum http_errno err); 302 | 303 | /* Parse a URL; return nonzero on failure */ 304 | int http_parser_parse_url(const char *buf, size_t buflen, 305 | int is_connect, 306 | struct http_parser_url *u); 307 | 308 | /* Pause or un-pause the parser; a nonzero value pauses */ 309 | void http_parser_pause(http_parser *parser, int paused); 310 | 311 | #ifdef __cplusplus 312 | } 313 | #endif 314 | #endif 315 | -------------------------------------------------------------------------------- /http_parser/pyparser.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 - 2 | # 3 | # This file is part of http-parser released under the MIT license. 4 | # See the NOTICE for more information. 5 | 6 | import os 7 | import re 8 | import sys 9 | import urlparse 10 | import zlib 11 | 12 | from http_parser.util import b, bytes_to_str, IOrderedDict, StringIO, unquote 13 | 14 | 15 | METHOD_RE = re.compile("[A-Z0-9$-_.]{3,20}") 16 | VERSION_RE = re.compile("HTTP/(\d+).(\d+)") 17 | STATUS_RE = re.compile("(\d{3})\s*(\w*)") 18 | HEADER_RE = re.compile("[\x00-\x1F\x7F()<>@,;:\[\]={} \t\\\\\"]") 19 | 20 | # errors 21 | BAD_FIRST_LINE = 0 22 | INVALID_HEADER = 1 23 | INVALID_CHUNK = 2 24 | 25 | class InvalidRequestLine(Exception): 26 | """ error raised when first line is invalid """ 27 | 28 | class InvalidHeader(Exception): 29 | """ error raised on invalid header """ 30 | 31 | class InvalidChunkSize(Exception): 32 | """ error raised when we parse an invalid chunk size """ 33 | 34 | class HttpParser(object): 35 | 36 | def __init__(self, kind=2, decompress=False): 37 | self.kind = kind 38 | self.decompress = decompress 39 | 40 | # errors vars 41 | self.errno = None 42 | self.errstr = "" 43 | 44 | # protected variables 45 | self._buf = [] 46 | self._version = None 47 | self._method = None 48 | self._status_code = None 49 | self._status = None 50 | self._reason = None 51 | self._url = None 52 | self._path = None 53 | self._query_string = None 54 | self._fragment= None 55 | self._headers = IOrderedDict() 56 | self._environ = dict() 57 | self._chunked = False 58 | self._body = [] 59 | self._trailers = None 60 | self._partial_body = False 61 | self._clen = None 62 | self._clen_rest = None 63 | 64 | # private events 65 | self.__on_firstline = False 66 | self.__on_headers_complete = False 67 | self.__on_message_begin = False 68 | self.__on_message_complete = False 69 | 70 | self.__decompress_obj = None 71 | 72 | def get_version(self): 73 | return self._version 74 | 75 | def get_method(self): 76 | return self._method 77 | 78 | def get_status_code(self): 79 | return self._status_code 80 | 81 | def get_url(self): 82 | return self._url 83 | 84 | def get_path(self): 85 | return self._path 86 | 87 | def get_query_string(self): 88 | return self._query_string 89 | 90 | def get_fragment(self): 91 | return self._fragment 92 | 93 | def get_headers(self): 94 | return self._headers 95 | 96 | def get_environ(self): 97 | if not self.__on_headers_complete: 98 | return None 99 | 100 | environ = self._environ.copy() 101 | # clean special keys 102 | for key in ("CONTENT_LENGTH", "CONTENT_TYPE", "SCRIPT_NAME"): 103 | hkey = "HTTP_%s" % key 104 | if hkey in environ: 105 | environ[key] = environ.pop(hkey) 106 | 107 | script_name = environ.get('HTTP_SCRIPT_NAME', 108 | os.environ.get("SCRIPT_NAME", "")) 109 | if script_name: 110 | path_info = self._path.split(script_name, 1)[1] 111 | environ.update({ 112 | "PATH_INFO": unquote(path_info), 113 | "SCRIPT_NAME": script_name}) 114 | else: 115 | environ['SCRIPT_NAME'] = "" 116 | 117 | if environ.get('HTTP_X_FORWARDED_PROTOCOL', '').lower() == "ssl": 118 | environ['wsgi.url_scheme'] = "https" 119 | elif environ.get('HTTP_X_FORWARDED_SSL', '').lower() == "on": 120 | environ['wsgi.url_scheme'] = "https" 121 | else: 122 | environ['wsgi.url_scheme'] = "http" 123 | 124 | return environ 125 | 126 | def recv_body(self): 127 | """ return last chunk of the parsed body""" 128 | body = b("").join(self._body) 129 | self._body = [] 130 | self._partial_body = False 131 | return body 132 | 133 | def recv_body_into(self, barray): 134 | """ Receive the last chunk of the parsed bodyand store the data 135 | in a buffer rather than creating a new string. """ 136 | l = len(barray) 137 | body = b("").join(self._body) 138 | m = min(len(body), l) 139 | data, rest = body[:m], body[m:] 140 | barray[0:m] = data 141 | if not rest: 142 | self._body = [] 143 | self._partial_body = False 144 | else: 145 | self._body = [rest] 146 | return m 147 | 148 | def is_upgrade(self): 149 | """ Do we get upgrade header in the request. Useful for 150 | websockets """ 151 | return self._headers.get('connection', "") == "upgrade" 152 | 153 | def is_headers_complete(self): 154 | """ return True if all headers have been parsed. """ 155 | return self.__on_headers_complete 156 | 157 | def is_partial_body(self): 158 | """ return True if a chunk of body have been parsed """ 159 | return self._partial_body 160 | 161 | def is_message_begin(self): 162 | """ return True if the parsing start """ 163 | return self.__on_message_begin 164 | 165 | def is_message_complete(self): 166 | """ return True if the parsing is done (we get EOF) """ 167 | return self.__on_message_complete 168 | 169 | def is_chunked(self): 170 | """ return True if Transfer-Encoding header value is chunked""" 171 | return self._chunked 172 | 173 | def should_keep_alive(self): 174 | """ return True if the connection should be kept alive 175 | """ 176 | hconn = self._headers.get('connection', "").lower() 177 | if hconn == "close": 178 | return False 179 | elif hconn == "keep-alive": 180 | return True 181 | return self._version == (1, 1) 182 | 183 | def execute(self, data, length): 184 | # end of body can be passed manually by putting a length of 0 185 | 186 | if length == 0: 187 | self.on_message_complete = True 188 | return length 189 | 190 | # start to parse 191 | nb_parsed = 0 192 | while True: 193 | if not self.__on_firstline: 194 | idx = data.find(b("\r\n")) 195 | if idx < 0: 196 | self._buf.append(data) 197 | return len(data) 198 | else: 199 | self.__on_firstline = True 200 | self._buf.append(data[:idx]) 201 | first_line = bytes_to_str(b("").join(self._buf)) 202 | nb_parsed = nb_parsed + idx + 2 203 | 204 | rest = data[idx+2:] 205 | data = b("") 206 | if self._parse_firstline(first_line): 207 | self._buf = [rest] 208 | else: 209 | return nb_parsed 210 | elif not self.__on_headers_complete: 211 | if data: 212 | self._buf.append(data) 213 | data = b("") 214 | 215 | try: 216 | to_parse = b("").join(self._buf) 217 | ret = self._parse_headers(to_parse) 218 | if not ret: 219 | return length 220 | nb_parsed = nb_parsed + (len(to_parse) - ret) 221 | except InvalidHeader, e: 222 | self.errno = INVALID_HEADER 223 | self.errstr = str(e) 224 | return nb_parsed 225 | elif not self.__on_message_complete: 226 | if not self.__on_message_begin: 227 | self.__on_message_begin = True 228 | 229 | if data: 230 | self._buf.append(data) 231 | data = b("") 232 | 233 | ret = self._parse_body() 234 | if ret is None: 235 | return length 236 | 237 | elif ret < 0: 238 | return ret 239 | elif ret == 0: 240 | self.__on_message_complete = True 241 | return length 242 | else: 243 | nb_parsed = max(length, ret) 244 | 245 | else: 246 | return 0 247 | 248 | def _parse_firstline(self, line): 249 | try: 250 | if self.kind == 2: # auto detect 251 | try: 252 | self._parse_request_line(line) 253 | except InvalidRequestLine: 254 | self._parse_response_line(line) 255 | elif self.kind == 1: 256 | self._parse_response_line(line) 257 | elif self.kind == 0: 258 | self._parse_request_line(line) 259 | except InvalidRequestLine, e: 260 | self.errno = BAD_FIRST_LINE 261 | self.errstr = str(e) 262 | return False 263 | return True 264 | 265 | def _parse_response_line(self, line): 266 | bits = line.split(None, 1) 267 | if len(bits) != 2: 268 | raise InvalidRequestLine(line) 269 | 270 | # version 271 | matchv = VERSION_RE.match(bits[0]) 272 | if matchv is None: 273 | raise InvalidRequestLine("Invalid HTTP version: %s" % bits[0]) 274 | self._version = (int(matchv.group(1)), int(matchv.group(2))) 275 | 276 | # status 277 | matchs = STATUS_RE.match(bits[1]) 278 | if matchs is None: 279 | raise InvalidRequestLine("Invalid status %" % bits[1]) 280 | 281 | self._status = bits[1] 282 | self._status_code = int(matchs.group(1)) 283 | self._reason = matchs.group(2) 284 | 285 | def _parse_request_line(self, line): 286 | bits = line.split(None, 2) 287 | if len(bits) != 3: 288 | raise InvalidRequestLine(line) 289 | 290 | # Method 291 | if not METHOD_RE.match(bits[0]): 292 | raise InvalidRequestLine("invalid Method: %s" % bits[0]) 293 | self._method = bits[0].upper() 294 | 295 | # URI 296 | self._url = bits[1] 297 | parts = urlparse.urlsplit(bits[1]) 298 | self._path = parts.path or "" 299 | self._query_string = parts.query or "" 300 | self._fragment = parts.fragment or "" 301 | 302 | # Version 303 | match = VERSION_RE.match(bits[2]) 304 | if match is None: 305 | raise InvalidRequestLine("Invalid HTTP version: %s" % bits[2]) 306 | self._version = (int(match.group(1)), int(match.group(2))) 307 | 308 | # update environ 309 | self.environ.update({ 310 | "PATH_INFO": self._path, 311 | "QUERY_STRING": self._query_string, 312 | "RAW_URI": self._url, 313 | "REQUEST_METHOD": self._method, 314 | "SERVER_PROTOCOL": bits[2]}) 315 | 316 | def _parse_headers(self, data): 317 | idx = data.find(b("\r\n\r\n")) 318 | if idx < 0: # we don't have all headers 319 | return False 320 | 321 | # Split lines on \r\n keeping the \r\n on each line 322 | lines = [bytes_to_str(line) + "\r\n" for line in 323 | data[:idx].split(b("\r\n"))] 324 | 325 | # Parse headers into key/value pairs paying attention 326 | # to continuation lines. 327 | while len(lines): 328 | # Parse initial header name : value pair. 329 | curr = lines.pop(0) 330 | if curr.find(":") < 0: 331 | raise InvalidHeader("invalid line %s" % curr.strip()) 332 | name, value = curr.split(":", 1) 333 | name = name.rstrip(" \t").upper() 334 | if HEADER_RE.search(name): 335 | raise InvalidHeader("invalid header name %s" % name) 336 | name, value = name.strip(), [value.lstrip()] 337 | 338 | # Consume value continuation lines 339 | while len(lines) and lines[0].startswith((" ", "\t")): 340 | value.append(lines.pop(0)) 341 | value = ''.join(value).rstrip() 342 | 343 | # multiple headers 344 | if name in self._headers: 345 | value = "%s, %s" % (self._headers[name], value) 346 | 347 | # store new header value 348 | self._headers[name] = value 349 | 350 | # update WSGI environ 351 | key = 'HTTP_%s' % name.upper().replace('-','_') 352 | self._environ[key] = value 353 | 354 | # detect now if body is sent by chunks. 355 | clen = self._headers.get('content-length') 356 | te = self._headers.get('transfer-encoding', '').lower() 357 | 358 | if clen is not None: 359 | try: 360 | self._clen_rest = self._clen = int(clen) 361 | except ValueError: 362 | pass 363 | else: 364 | self._chunked = (te == 'chunked') 365 | if not self._chunked: 366 | self._clen_rest = sys.maxint 367 | 368 | # detect encoding and set decompress object 369 | encoding = self._headers.get('content-encoding') 370 | if encoding == "gzip": 371 | self.__decompress_obj = zlib.decompressobj(16+zlib.MAX_WBITS) 372 | elif encoding == "deflate": 373 | self.__decompress_obj = zlib.decompressobj() 374 | 375 | rest = data[idx+4:] 376 | self._buf = [rest] 377 | self.__on_headers_complete = True 378 | return len(rest) 379 | 380 | def _parse_body(self): 381 | if not self._chunked: 382 | body_part = b("").join(self._buf) 383 | self._clen_rest -= len(body_part) 384 | 385 | # maybe decompress 386 | if self.__decompress_obj is not None: 387 | body_part = self.__decompress_obj.decompress(body_part) 388 | 389 | self._partial_body = True 390 | self._body.append(body_part) 391 | self._buf = [] 392 | 393 | if self._clen_rest <= 0: 394 | self.__on_message_complete = True 395 | return 396 | else: 397 | data = b("").join(self._buf) 398 | try: 399 | 400 | size, rest = self._parse_chunk_size(data) 401 | except InvalidChunkSize, e: 402 | self.errno = INVALID_CHUNK 403 | self.errstr = "invalid chunk size [%s]" % str(e) 404 | return -1 405 | 406 | if size == 0: 407 | return size 408 | 409 | if size is None or len(rest) < size: 410 | return None 411 | 412 | 413 | body_part, rest = rest[:size], rest[size:] 414 | if len(rest) < 2: 415 | self.errno = INVALID_CHUNK 416 | self.errstr = "chunk missing terminator [%s]" % data 417 | return -1 418 | 419 | # maybe decompress 420 | if self.__decompress_obj is not None: 421 | body_part = self.__decompress_obj.decompress(body_part) 422 | 423 | self._partial_body = True 424 | self._body.append(body_part) 425 | 426 | self._buf = [rest[2:]] 427 | return len(rest) 428 | 429 | def _parse_chunk_size(self, data): 430 | idx = data.find(b("\r\n")) 431 | if idx < 0: 432 | return None, None 433 | line, rest_chunk = data[:idx], data[idx+2:] 434 | chunk_size = line.split(b(";"), 1)[0].strip() 435 | try: 436 | chunk_size = int(chunk_size, 16) 437 | except ValueError: 438 | raise InvalidChunkSize(chunk_size) 439 | 440 | if chunk_size == 0: 441 | self._parse_trailers(rest_chunk) 442 | return 0, None 443 | return chunk_size, rest_chunk 444 | 445 | def _parse_trailers(self, data): 446 | idx = data.find(b("\r\n\r\n")) 447 | 448 | if data[:2] == b("\r\n"): 449 | self._trailers = self._parse_headers(data[:idx]) 450 | -------------------------------------------------------------------------------- /http_parser/py25.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 - 2 | # 3 | # This file is part of http-parser released under the MIT license. 4 | # See the NOTICE for more information. 5 | 6 | 7 | 8 | import array 9 | import codecs 10 | from UserDict import DictMixin 11 | try: 12 | from thread import allocate_lock as Lock 13 | except ImportError: 14 | from dummy_thread import allocate_lock as Lock 15 | 16 | # open() uses st_blksize whenever we can 17 | DEFAULT_BUFFER_SIZE = 8 * 1024 # bytes 18 | 19 | bytes = str 20 | # bytearray is a mutable type that is easily turned into an instance of 21 | # bytes 22 | class bytearray(list): 23 | 24 | def __init__(self, i=0): 25 | if isinstance(i, int): 26 | list.__init__(self, [' '] * i) 27 | else: 28 | list.__init__(self, i) 29 | 30 | # for bytes(bytearray()) usage 31 | def __str__(self): return ''.join(self) 32 | # append automatically converts integers to characters 33 | def append(self, item): 34 | if isinstance(item, str): 35 | list.append(self, item) 36 | else: 37 | list.append(self, chr(item)) 38 | # += 39 | def __iadd__(self, other): 40 | for byte in other: 41 | self.append(byte) 42 | return self 43 | 44 | def endswith(self, c): 45 | return self[-1] == c 46 | 47 | def startswith(self, c): 48 | return self[0] == c 49 | 50 | 51 | class IOrderedDict(dict, DictMixin): 52 | 53 | 54 | def __init__(self, *args, **kwds): 55 | if len(args) > 1: 56 | raise TypeError('expected at most 1 arguments, got %d' % len(args)) 57 | try: 58 | self.__end 59 | except AttributeError: 60 | self.clear() 61 | self.update(*args, **kwds) 62 | self.__lower = {} 63 | 64 | def get(self, key, default=None): 65 | if key in self: 66 | return self[key] 67 | return default 68 | 69 | def clear(self): 70 | self.__end = end = [] 71 | end += [None, end, end] # sentinel node for doubly linked list 72 | self.__map = {} # key --> [key, prev, next] 73 | self.__lower = {} # key -> key.lower() 74 | dict.clear(self) 75 | 76 | def __setitem__(self, key, value): 77 | if key not in self: 78 | end = self.__end 79 | curr = end[1] 80 | curr[2] = end[1] = self.__map[key] = [key, curr, end] 81 | self.__lower[key.lower()] = key 82 | key = self.__lower[key.lower()] 83 | dict.__setitem__(self, key, value) 84 | 85 | def __delitem__(self, key): 86 | if key in self: 87 | key = self.__lower.pop(key.lower()) 88 | 89 | dict.__delitem__(self, key) 90 | key, prev, next = self.__map.pop(key) 91 | prev[2] = next 92 | next[1] = prev 93 | 94 | def __getitem__(self, key, dict_getitem=dict.__getitem__): 95 | if key in self: 96 | key = self.__lower.get(key.lower()) 97 | return dict_getitem(self, key) 98 | 99 | def __contains__(self, key): 100 | return key.lower() in self.__lower 101 | 102 | def __iter__(self): 103 | end = self.__end 104 | curr = end[2] 105 | while curr is not end: 106 | yield curr[0] 107 | curr = curr[2] 108 | 109 | def __reversed__(self): 110 | end = self.__end 111 | curr = end[1] 112 | while curr is not end: 113 | yield curr[0] 114 | curr = curr[1] 115 | 116 | def popitem(self, last=True): 117 | if not self: 118 | raise KeyError('dictionary is empty') 119 | if last: 120 | key = reversed(self).next() 121 | else: 122 | key = iter(self).next() 123 | value = self.pop(key) 124 | return key, value 125 | 126 | def __reduce__(self): 127 | items = [[k, self[k]] for k in self] 128 | tmp = self.__map, self.__end 129 | del self.__map, self.__end 130 | inst_dict = vars(self).copy() 131 | self.__map, self.__end = tmp 132 | if inst_dict: 133 | return (self.__class__, (items,), inst_dict) 134 | return self.__class__, (items,) 135 | 136 | def keys(self): 137 | return list(self) 138 | 139 | setdefault = DictMixin.setdefault 140 | update = DictMixin.update 141 | pop = DictMixin.pop 142 | values = DictMixin.values 143 | items = DictMixin.items 144 | iterkeys = DictMixin.iterkeys 145 | itervalues = DictMixin.itervalues 146 | iteritems = DictMixin.iteritems 147 | 148 | def __repr__(self): 149 | if not self: 150 | return '%s()' % (self.__class__.__name__,) 151 | return '%s(%r)' % (self.__class__.__name__, self.items()) 152 | 153 | def copy(self): 154 | return self.__class__(self) 155 | 156 | @classmethod 157 | def fromkeys(cls, iterable, value=None): 158 | d = cls() 159 | for key in iterable: 160 | d[key] = value 161 | return d 162 | 163 | def __eq__(self, other): 164 | if isinstance(other, OrderedDict): 165 | if len(self) != len(other): 166 | return False 167 | for p, q in zip(self.items(), other.items()): 168 | if p != q: 169 | return False 170 | return True 171 | return dict.__eq__(self, other) 172 | 173 | def __ne__(self, other): 174 | return not self == other 175 | 176 | 177 | 178 | class IOBase(object): 179 | 180 | ### Flush and close ### 181 | 182 | def seek(self, pos, whence=0): 183 | raise NotImplementedError 184 | 185 | def tell(self): 186 | """Return current stream position.""" 187 | return self.seek(0, 1) 188 | 189 | def truncate(self, pos=None): 190 | raise NotImplementedError 191 | 192 | def flush(self): 193 | """Flush write buffers, if applicable. 194 | 195 | This is not implemented for read-only and non-blocking streams. 196 | """ 197 | self._checkClosed() 198 | # XXX Should this return the number of bytes written??? 199 | 200 | __closed = False 201 | 202 | def close(self): 203 | """Flush and close the IO object. 204 | 205 | This method has no effect if the file is already closed. 206 | """ 207 | if not self.__closed: 208 | self.flush() 209 | self.__closed = True 210 | 211 | def __del__(self): 212 | """Destructor. Calls close().""" 213 | # The try/except block is in case this is called at program 214 | # exit time, when it's possible that globals have already been 215 | # deleted, and then the close() call might fail. Since 216 | # there's nothing we can do about such failures and they annoy 217 | # the end users, we suppress the traceback. 218 | try: 219 | self.close() 220 | except: 221 | pass 222 | 223 | 224 | @property 225 | def closed(self): 226 | """closed: bool. True iff the file has been closed. 227 | 228 | For backwards compatibility, this is a property, not a predicate. 229 | """ 230 | return self.__closed 231 | 232 | def _checkClosed(self, msg=None): 233 | """Internal: raise an ValueError if file is closed 234 | """ 235 | if self.closed: 236 | raise ValueError("I/O operation on closed file." 237 | if msg is None else msg) 238 | 239 | ### Context manager ### 240 | 241 | def __enter__(self): 242 | """Context management protocol. Returns self.""" 243 | self._checkClosed() 244 | return self 245 | 246 | def __exit__(self, *args): 247 | """Context management protocol. Calls close()""" 248 | self.close() 249 | 250 | 251 | def readable(self): 252 | """Return whether object was opened for reading. 253 | 254 | If False, read() will raise IOError. 255 | """ 256 | return False 257 | 258 | def _checkReadable(self, msg=None): 259 | """Internal: raise an IOError if file is not readable 260 | """ 261 | if not self.readable(): 262 | raise IOError("File or stream is not readable." 263 | if msg is None else msg) 264 | 265 | 266 | ### Readline[s] ### 267 | 268 | def readline(self, limit=-1): 269 | r"""Read and return a line from the stream. 270 | 271 | If limit is specified, at most limit bytes will be read. 272 | 273 | The line terminator is always b'\n' for binary files; for text 274 | files, the newlines argument to open can be used to select the line 275 | terminator(s) recognized. 276 | """ 277 | # For backwards compatibility, a (slowish) readline(). 278 | if hasattr(self, "peek"): 279 | def nreadahead(): 280 | readahead = self.peek(1) 281 | if not readahead: 282 | return 1 283 | 284 | readahead = "".join(readahead) 285 | n = (readahead.find("\n") + 1) or len(readahead) 286 | if limit >= 0: 287 | n = min(n, limit) 288 | return n 289 | else: 290 | def nreadahead(): 291 | return 1 292 | if limit is None: 293 | limit = -1 294 | elif not isinstance(limit, (int, long)): 295 | raise TypeError("limit must be an integer") 296 | res = bytearray() 297 | while limit < 0 or len(res) < limit: 298 | b = self.read(nreadahead()) 299 | if not b: 300 | break 301 | res += b 302 | if res.endswith("\n"): 303 | break 304 | return bytes(res) 305 | 306 | def __iter__(self): 307 | self._checkClosed() 308 | return self 309 | 310 | def next(self): 311 | line = self.readline() 312 | if not line: 313 | raise StopIteration 314 | return line 315 | 316 | def readlines(self, hint=None): 317 | """Return a list of lines from the stream. 318 | 319 | hint can be specified to control the number of lines read: no more 320 | lines will be read if the total size (in bytes/characters) of all 321 | lines so far exceeds hint. 322 | """ 323 | if hint is not None and not isinstance(hint, (int, long)): 324 | raise TypeError("integer or None expected") 325 | if hint is None or hint <= 0: 326 | return list(self) 327 | n = 0 328 | lines = [] 329 | for line in self: 330 | lines.append(line) 331 | n += len(line) 332 | if n >= hint: 333 | break 334 | return lines 335 | 336 | 337 | class RawIOBase(IOBase): 338 | 339 | """Base class for raw binary I/O.""" 340 | 341 | # The read() method is implemented by calling readinto(); derived 342 | # classes that want to support read() only need to implement 343 | # readinto() as a primitive operation. In general, readinto() can be 344 | # more efficient than read(). 345 | 346 | # (It would be tempting to also provide an implementation of 347 | # readinto() in terms of read(), in case the latter is a more suitable 348 | # primitive operation, but that would lead to nasty recursion in case 349 | # a subclass doesn't implement either.) 350 | 351 | 352 | def read(self, n=-1): 353 | """Read and return up to n bytes. 354 | 355 | Returns an empty bytes object on EOF, or None if the object is 356 | set not to block and has no data to read. 357 | """ 358 | if n is None: 359 | n = -1 360 | if n < 0: 361 | return self.readall() 362 | b = bytearray(n.__index__()) 363 | n = self.readinto(b) 364 | if n is None: 365 | return None 366 | del b[n:] 367 | return bytes(b) 368 | 369 | def readall(self): 370 | """Read until EOF, using multiple read() call.""" 371 | res = bytearray() 372 | while True: 373 | data = self.read(DEFAULT_BUFFER_SIZE) 374 | if not data: 375 | break 376 | res += data 377 | return bytes(res) 378 | 379 | def readinto(self, b): 380 | """Read up to len(b) bytes into b. 381 | 382 | Returns number of bytes read (0 for EOF), or None if the object 383 | is set not to block and has no data to read. 384 | """ 385 | raise NotImplementedError 386 | 387 | class BufferedReader(RawIOBase): 388 | 389 | def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE): 390 | self.raw = raw 391 | if buffer_size <= 0: 392 | raise ValueError("invalid buffer size") 393 | self.buffer_size = buffer_size 394 | self._reset_read_buf() 395 | self._read_lock = Lock() 396 | 397 | def _reset_read_buf(self): 398 | self._read_buf = "" 399 | self._read_pos = 0 400 | 401 | def read(self, n=None): 402 | """Read n bytes. 403 | 404 | Returns exactly n bytes of data unless the underlying raw IO 405 | stream reaches EOF or if the call would block in non-blocking 406 | mode. If n is negative, read until EOF or until read() would 407 | block. 408 | """ 409 | if n is not None and n < -1: 410 | raise ValueError("invalid number of bytes to read") 411 | 412 | self._read_lock.acquire() 413 | try: 414 | return self._read_unlocked(n) 415 | finally: 416 | self._read_lock.release() 417 | 418 | def readinto(self, b): 419 | """Read up to len(b) bytes into b. 420 | 421 | Like read(), this may issue multiple reads to the underlying raw 422 | stream, unless the latter is 'interactive'. 423 | 424 | Returns the number of bytes read (0 for EOF). 425 | 426 | Raises BlockingIOError if the underlying raw stream has no 427 | data at the moment. 428 | """ 429 | # XXX This ought to work with anything that supports the buffer API 430 | data = self.read(len(b)) 431 | n = len(data) 432 | try: 433 | b[:n] = data 434 | except TypeError, err: 435 | import array 436 | if not isinstance(b, array.array): 437 | raise err 438 | b[:n] = array.array('b', data) 439 | return n 440 | 441 | def _read_unlocked(self, n=None): 442 | nodata_val = "" 443 | empty_values = ("", None) 444 | buf = self._read_buf 445 | pos = self._read_pos 446 | 447 | # Special case for when the number of bytes to read is unspecified. 448 | if n is None or n == -1: 449 | self._reset_read_buf() 450 | chunks = [buf[pos:]] # Strip the consumed bytes. 451 | current_size = 0 452 | while True: 453 | # Read until EOF or until read() would block. 454 | chunk = self.raw.read() 455 | if chunk in empty_values: 456 | nodata_val = chunk 457 | break 458 | current_size += len(chunk) 459 | chunks.append(chunk) 460 | return "".join(chunks) or nodata_val 461 | 462 | # The number of bytes to read is specified, return at most n bytes. 463 | avail = len(buf) - pos # Length of the available buffered data. 464 | if n <= avail: 465 | # Fast path: the data to read is fully buffered. 466 | self._read_pos += n 467 | return buf[pos:pos+n] 468 | # Slow path: read from the stream until enough bytes are read, 469 | # or until an EOF occurs or until read() would block. 470 | chunks = [buf[pos:]] 471 | wanted = max(self.buffer_size, n) 472 | while avail < n: 473 | chunk = self.raw.read(wanted) 474 | if chunk in empty_values: 475 | nodata_val = chunk 476 | break 477 | avail += len(chunk) 478 | chunks.append(chunk) 479 | # n is more then avail only when an EOF occurred or when 480 | # read() would have blocked. 481 | n = min(n, avail) 482 | out = "".join(chunks) 483 | self._read_buf = out[n:] # Save the extra data in the buffer. 484 | self._read_pos = 0 485 | return out[:n] if out else nodata_val 486 | 487 | def peek(self, n=0): 488 | """Returns buffered bytes without advancing the position. 489 | 490 | The argument indicates a desired minimal number of bytes; we 491 | do at most one raw read to satisfy it. We never return more 492 | than self.buffer_size. 493 | """ 494 | self._read_lock.acquire() 495 | try: 496 | return self._peek_unlocked(n) 497 | finally: 498 | self._read_lock.release() 499 | 500 | def _peek_unlocked(self, n=0): 501 | want = min(n, self.buffer_size) 502 | have = len(self._read_buf) - self._read_pos 503 | if have < want or have <= 0: 504 | to_read = self.buffer_size - have 505 | current = self.raw.read(to_read) 506 | if current: 507 | self._read_buf = self._read_buf[self._read_pos:] + current 508 | self._read_pos = 0 509 | return self._read_buf[self._read_pos:] 510 | 511 | def read1(self, n): 512 | """Reads up to n bytes, with at most one read() system call.""" 513 | # Returns up to n bytes. If at least one byte is buffered, we 514 | # only return buffered bytes. Otherwise, we do one raw read. 515 | if n < 0: 516 | raise ValueError("number of bytes to read must be positive") 517 | if n == 0: 518 | return "" 519 | self._read_lock.acquire() 520 | try: 521 | self._peek_unlocked(1) 522 | return self._read_unlocked( 523 | min(n, len(self._read_buf) - self._read_pos)) 524 | finally: 525 | self._read_lock.release() 526 | 527 | def tell(self): 528 | return _BufferedIOMixin.tell(self) - len(self._read_buf) + self._read_pos 529 | 530 | def seek(self, pos, whence=0): 531 | if not (0 <= whence <= 2): 532 | raise ValueError("invalid whence value") 533 | self._read_lock.acquire() 534 | try: 535 | if whence == 1: 536 | pos -= len(self._read_buf) - self._read_pos 537 | 538 | pos = self.raw.seek(pos, whence) 539 | if pos < 0: 540 | raise IOError("tell() returned an invalid position") 541 | 542 | self._reset_read_buf() 543 | return pos 544 | finally: 545 | self._read_lock.release() 546 | 547 | def truncate(self, pos=None): 548 | # Flush the stream. We're mixing buffered I/O with lower-level I/O, 549 | # and a flush may be necessary to synch both views of the current 550 | # file state. 551 | self.flush() 552 | 553 | if pos is None: 554 | pos = self.tell() 555 | # XXX: Should seek() be used, instead of passing the position 556 | # XXX directly to truncate? 557 | return self.raw.truncate(pos) 558 | 559 | ### Flush and close ### 560 | 561 | def flush(self): 562 | if self.closed: 563 | raise ValueError("flush of closed file") 564 | self.raw.flush() 565 | 566 | def close(self): 567 | if self.raw is not None and not self.closed: 568 | self.flush() 569 | self.raw.close() 570 | 571 | def detach(self): 572 | if self.raw is None: 573 | raise ValueError("raw stream already detached") 574 | self.flush() 575 | raw = self.raw 576 | self.raw = None 577 | return raw 578 | 579 | ### Inquiries ### 580 | 581 | def seekable(self): 582 | return self.raw.seekable() 583 | 584 | def readable(self): 585 | return self.raw.readable() 586 | 587 | @property 588 | def closed(self): 589 | return self.raw.closed 590 | 591 | class IncrementalNewlineDecoder(codecs.IncrementalDecoder): 592 | r"""Codec used when reading a file in universal newlines mode. It wraps 593 | another incremental decoder, translating \r\n and \r into \n. It also 594 | records the types of newlines encountered. When used with 595 | translate=False, it ensures that the newline sequence is returned in 596 | one piece. 597 | """ 598 | def __init__(self, decoder, translate, errors='strict'): 599 | codecs.IncrementalDecoder.__init__(self, errors=errors) 600 | self.translate = translate 601 | self.decoder = decoder 602 | self.seennl = 0 603 | self.pendingcr = False 604 | 605 | def decode(self, input, final=False): 606 | # decode input (with the eventual \r from a previous pass) 607 | if self.decoder is None: 608 | output = input 609 | else: 610 | output = self.decoder.decode(input, final=final) 611 | if self.pendingcr and (output or final): 612 | output = "\r" + output 613 | self.pendingcr = False 614 | 615 | # retain last \r even when not translating data: 616 | # then readline() is sure to get \r\n in one pass 617 | if output.endswith("\r") and not final: 618 | output = output[:-1] 619 | self.pendingcr = True 620 | 621 | # Record which newlines are read 622 | crlf = output.count('\r\n') 623 | cr = output.count('\r') - crlf 624 | lf = output.count('\n') - crlf 625 | self.seennl |= (lf and self._LF) | (cr and self._CR) \ 626 | | (crlf and self._CRLF) 627 | 628 | if self.translate: 629 | if crlf: 630 | output = output.replace("\r\n", "\n") 631 | if cr: 632 | output = output.replace("\r", "\n") 633 | 634 | return output 635 | 636 | def getstate(self): 637 | if self.decoder is None: 638 | buf = "" 639 | flag = 0 640 | else: 641 | buf, flag = self.decoder.getstate() 642 | flag <<= 1 643 | if self.pendingcr: 644 | flag |= 1 645 | return buf, flag 646 | 647 | def setstate(self, state): 648 | buf, flag = state 649 | self.pendingcr = bool(flag & 1) 650 | if self.decoder is not None: 651 | self.decoder.setstate((buf, flag >> 1)) 652 | 653 | def reset(self): 654 | self.seennl = 0 655 | self.pendingcr = False 656 | if self.decoder is not None: 657 | self.decoder.reset() 658 | 659 | _LF = 1 660 | _CR = 2 661 | _CRLF = 4 662 | 663 | @property 664 | def newlines(self): 665 | return (None, 666 | "\n", 667 | "\r", 668 | ("\r", "\n"), 669 | "\r\n", 670 | ("\n", "\r\n"), 671 | ("\r", "\r\n"), 672 | ("\r", "\n", "\r\n") 673 | )[self.seennl] 674 | 675 | 676 | 677 | class TextIOWrapper(IOBase): 678 | 679 | _CHUNK_SIZE = 2048 680 | 681 | def __init__(self, buffer, encoding=None, errors=None, newline=None, 682 | line_buffering=False): 683 | if newline is not None and not isinstance(newline, basestring): 684 | raise TypeError("illegal newline type: %r" % (type(newline),)) 685 | if newline not in (None, "", "\n", "\r", "\r\n"): 686 | raise ValueError("illegal newline value: %r" % (newline,)) 687 | if encoding is None: 688 | try: 689 | import locale 690 | except ImportError: 691 | # Importing locale may fail if Python is being built 692 | encoding = "ascii" 693 | else: 694 | encoding = locale.getpreferredencoding() 695 | 696 | if not isinstance(encoding, basestring): 697 | raise ValueError("invalid encoding: %r" % encoding) 698 | 699 | if errors is None: 700 | errors = "strict" 701 | else: 702 | if not isinstance(errors, basestring): 703 | raise ValueError("invalid errors: %r" % errors) 704 | 705 | self.buffer = buffer 706 | self._line_buffering = line_buffering 707 | self._encoding = encoding 708 | self._errors = errors 709 | self._readuniversal = not newline 710 | self._readtranslate = newline is None 711 | self._readnl = newline 712 | self._writetranslate = newline != '' 713 | self._writenl = newline or os.linesep 714 | self._encoder = None 715 | self._decoder = None 716 | self._decoded_chars = '' # buffer for text returned from decoder 717 | self._decoded_chars_used = 0 # offset into _decoded_chars for read() 718 | self._snapshot = None # info for reconstructing decoder state 719 | self._seekable = self._telling = self.buffer.seekable() 720 | 721 | 722 | @property 723 | def encoding(self): 724 | return self._encoding 725 | 726 | @property 727 | def errors(self): 728 | return self._errors 729 | 730 | @property 731 | def line_buffering(self): 732 | return self._line_buffering 733 | 734 | def seekable(self): 735 | return self._seekable 736 | 737 | def readable(self): 738 | return self.buffer.readable() 739 | 740 | def flush(self): 741 | self.buffer.flush() 742 | self._telling = self._seekable 743 | 744 | def close(self): 745 | if self.buffer is not None and not self.closed: 746 | self.flush() 747 | self.buffer.close() 748 | 749 | @property 750 | def closed(self): 751 | return self.buffer.closed 752 | 753 | if self.closed: 754 | raise ValueError("write to closed file") 755 | if not isinstance(s, unicode): 756 | raise TypeError("can't write %s to text stream" % 757 | s.__class__.__name__) 758 | length = len(s) 759 | haslf = (self._writetranslate or self._line_buffering) and "\n" in s 760 | if haslf and self._writetranslate and self._writenl != "\n": 761 | s = s.replace("\n", self._writenl) 762 | encoder = self._encoder or self._get_encoder() 763 | # XXX What if we were just reading? 764 | b = encoder.encode(s) 765 | self.buffer.write(b) 766 | if self._line_buffering and (haslf or "\r" in s): 767 | self.flush() 768 | self._snapshot = None 769 | if self._decoder: 770 | self._decoder.reset() 771 | return length 772 | 773 | def _get_encoder(self): 774 | make_encoder = codecs.getincrementalencoder(self._encoding) 775 | self._encoder = make_encoder(self._errors) 776 | return self._encoder 777 | 778 | def _get_decoder(self): 779 | make_decoder = codecs.getincrementaldecoder(self._encoding) 780 | decoder = make_decoder(self._errors) 781 | if self._readuniversal: 782 | decoder = IncrementalNewlineDecoder(decoder, self._readtranslate) 783 | self._decoder = decoder 784 | return decoder 785 | 786 | # The following three methods implement an ADT for _decoded_chars. 787 | # Text returned from the decoder is buffered here until the client 788 | # requests it by calling our read() or readline() method. 789 | def _set_decoded_chars(self, chars): 790 | """Set the _decoded_chars buffer.""" 791 | self._decoded_chars = chars 792 | self._decoded_chars_used = 0 793 | 794 | def _get_decoded_chars(self, n=None): 795 | """Advance into the _decoded_chars buffer.""" 796 | offset = self._decoded_chars_used 797 | if n is None: 798 | chars = self._decoded_chars[offset:] 799 | else: 800 | chars = self._decoded_chars[offset:offset + n] 801 | self._decoded_chars_used += len(chars) 802 | return chars 803 | 804 | def _rewind_decoded_chars(self, n): 805 | """Rewind the _decoded_chars buffer.""" 806 | if self._decoded_chars_used < n: 807 | raise AssertionError("rewind decoded_chars out of bounds") 808 | self._decoded_chars_used -= n 809 | 810 | def _read_chunk(self): 811 | """ 812 | Read and decode the next chunk of data from the BufferedReader. 813 | """ 814 | 815 | # The return value is True unless EOF was reached. The decoded 816 | # string is placed in self._decoded_chars (replacing its previous 817 | # value). The entire input chunk is sent to the decoder, though 818 | # some of it may remain buffered in the decoder, yet to be 819 | # converted. 820 | 821 | if self._decoder is None: 822 | raise ValueError("no decoder") 823 | 824 | if self._telling: 825 | # To prepare for tell(), we need to snapshot a point in the 826 | # file where the decoder's input buffer is empty. 827 | 828 | dec_buffer, dec_flags = self._decoder.getstate() 829 | # Given this, we know there was a valid snapshot point 830 | # len(dec_buffer) bytes ago with decoder state (b'', dec_flags). 831 | 832 | # Read a chunk, decode it, and put the result in self._decoded_chars. 833 | input_chunk = self.buffer.read1(self._CHUNK_SIZE) 834 | eof = not input_chunk 835 | self._set_decoded_chars(self._decoder.decode(input_chunk, eof)) 836 | 837 | if self._telling: 838 | # At the snapshot point, len(dec_buffer) bytes before the read, 839 | # the next input to be decoded is dec_buffer + input_chunk. 840 | self._snapshot = (dec_flags, dec_buffer + input_chunk) 841 | 842 | return not eof 843 | 844 | def _pack_cookie(self, position, dec_flags=0, 845 | bytes_to_feed=0, need_eof=0, chars_to_skip=0): 846 | # The meaning of a tell() cookie is: seek to position, set the 847 | # decoder flags to dec_flags, read bytes_to_feed bytes, feed them 848 | # into the decoder with need_eof as the EOF flag, then skip 849 | # chars_to_skip characters of the decoded result. For most simple 850 | # decoders, tell() will often just give a byte offset in the file. 851 | return (position | (dec_flags<<64) | (bytes_to_feed<<128) | 852 | (chars_to_skip<<192) | bool(need_eof)<<256) 853 | 854 | def _unpack_cookie(self, bigint): 855 | rest, position = divmod(bigint, 1<<64) 856 | rest, dec_flags = divmod(rest, 1<<64) 857 | rest, bytes_to_feed = divmod(rest, 1<<64) 858 | need_eof, chars_to_skip = divmod(rest, 1<<64) 859 | return position, dec_flags, bytes_to_feed, need_eof, chars_to_skip 860 | 861 | def tell(self): 862 | if not self._seekable: 863 | raise IOError("underlying stream is not seekable") 864 | if not self._telling: 865 | raise IOError("telling position disabled by next() call") 866 | self.flush() 867 | position = self.buffer.tell() 868 | decoder = self._decoder 869 | if decoder is None or self._snapshot is None: 870 | if self._decoded_chars: 871 | # This should never happen. 872 | raise AssertionError("pending decoded text") 873 | return position 874 | 875 | # Skip backward to the snapshot point (see _read_chunk). 876 | dec_flags, next_input = self._snapshot 877 | position -= len(next_input) 878 | 879 | # How many decoded characters have been used up since the snapshot? 880 | chars_to_skip = self._decoded_chars_used 881 | if chars_to_skip == 0: 882 | # We haven't moved from the snapshot point. 883 | return self._pack_cookie(position, dec_flags) 884 | 885 | # Starting from the snapshot position, we will walk the decoder 886 | # forward until it gives us enough decoded characters. 887 | saved_state = decoder.getstate() 888 | try: 889 | # Note our initial start point. 890 | decoder.setstate(('', dec_flags)) 891 | start_pos = position 892 | start_flags, bytes_fed, chars_decoded = dec_flags, 0, 0 893 | need_eof = 0 894 | 895 | # Feed the decoder one byte at a time. As we go, note the 896 | # nearest "safe start point" before the current location 897 | # (a point where the decoder has nothing buffered, so seek() 898 | # can safely start from there and advance to this location). 899 | for next_byte in next_input: 900 | bytes_fed += 1 901 | chars_decoded += len(decoder.decode(next_byte)) 902 | dec_buffer, dec_flags = decoder.getstate() 903 | if not dec_buffer and chars_decoded <= chars_to_skip: 904 | # Decoder buffer is empty, so this is a safe start point. 905 | start_pos += bytes_fed 906 | chars_to_skip -= chars_decoded 907 | start_flags, bytes_fed, chars_decoded = dec_flags, 0, 0 908 | if chars_decoded >= chars_to_skip: 909 | break 910 | else: 911 | # We didn't get enough decoded data; signal EOF to get more. 912 | chars_decoded += len(decoder.decode('', final=True)) 913 | need_eof = 1 914 | if chars_decoded < chars_to_skip: 915 | raise IOError("can't reconstruct logical file position") 916 | 917 | # The returned cookie corresponds to the last safe start point. 918 | return self._pack_cookie( 919 | start_pos, start_flags, bytes_fed, need_eof, chars_to_skip) 920 | finally: 921 | decoder.setstate(saved_state) 922 | 923 | def truncate(self, pos=None): 924 | self.flush() 925 | if pos is None: 926 | pos = self.tell() 927 | return self.buffer.truncate(pos) 928 | 929 | def detach(self): 930 | if self.buffer is None: 931 | raise ValueError("buffer is already detached") 932 | self.flush() 933 | buffer = self.buffer 934 | self.buffer = None 935 | return buffer 936 | 937 | def seek(self, cookie, whence=0): 938 | if self.closed: 939 | raise ValueError("tell on closed file") 940 | if not self._seekable: 941 | raise IOError("underlying stream is not seekable") 942 | if whence == 1: # seek relative to current position 943 | if cookie != 0: 944 | raise IOError("can't do nonzero cur-relative seeks") 945 | # Seeking to the current position should attempt to 946 | # sync the underlying buffer with the current position. 947 | whence = 0 948 | cookie = self.tell() 949 | if whence == 2: # seek relative to end of file 950 | if cookie != 0: 951 | raise IOError("can't do nonzero end-relative seeks") 952 | self.flush() 953 | position = self.buffer.seek(0, 2) 954 | self._set_decoded_chars('') 955 | self._snapshot = None 956 | if self._decoder: 957 | self._decoder.reset() 958 | return position 959 | if whence != 0: 960 | raise ValueError("invalid whence (%r, should be 0, 1 or 2)" % 961 | (whence,)) 962 | if cookie < 0: 963 | raise ValueError("negative seek position %r" % (cookie,)) 964 | self.flush() 965 | 966 | # The strategy of seek() is to go back to the safe start point 967 | # and replay the effect of read(chars_to_skip) from there. 968 | start_pos, dec_flags, bytes_to_feed, need_eof, chars_to_skip = \ 969 | self._unpack_cookie(cookie) 970 | 971 | # Seek back to the safe start point. 972 | self.buffer.seek(start_pos) 973 | self._set_decoded_chars('') 974 | self._snapshot = None 975 | 976 | # Restore the decoder to its state from the safe start point. 977 | if cookie == 0 and self._decoder: 978 | self._decoder.reset() 979 | elif self._decoder or dec_flags or chars_to_skip: 980 | self._decoder = self._decoder or self._get_decoder() 981 | self._decoder.setstate(('', dec_flags)) 982 | self._snapshot = (dec_flags, '') 983 | 984 | if chars_to_skip: 985 | # Just like _read_chunk, feed the decoder and save a snapshot. 986 | input_chunk = self.buffer.read(bytes_to_feed) 987 | self._set_decoded_chars( 988 | self._decoder.decode(input_chunk, need_eof)) 989 | self._snapshot = (dec_flags, input_chunk) 990 | 991 | # Skip chars_to_skip of the decoded characters. 992 | if len(self._decoded_chars) < chars_to_skip: 993 | raise IOError("can't restore logical file position") 994 | self._decoded_chars_used = chars_to_skip 995 | 996 | # Finally, reset the encoder (merely useful for proper BOM handling) 997 | try: 998 | encoder = self._encoder or self._get_encoder() 999 | except LookupError: 1000 | # Sometimes the encoder doesn't exist 1001 | pass 1002 | else: 1003 | if cookie != 0: 1004 | encoder.setstate(0) 1005 | else: 1006 | encoder.reset() 1007 | return cookie 1008 | 1009 | def read(self, n=None): 1010 | self._checkReadable() 1011 | if n is None: 1012 | n = -1 1013 | decoder = self._decoder or self._get_decoder() 1014 | try: 1015 | n.__index__ 1016 | except AttributeError: 1017 | raise TypeError("an integer is required") 1018 | if n < 0: 1019 | # Read everything. 1020 | result = (self._get_decoded_chars() + 1021 | decoder.decode(self.buffer.read(), final=True)) 1022 | self._set_decoded_chars('') 1023 | self._snapshot = None 1024 | return result 1025 | else: 1026 | # Keep reading chunks until we have n characters to return. 1027 | eof = False 1028 | result = self._get_decoded_chars(n) 1029 | while len(result) < n and not eof: 1030 | eof = not self._read_chunk() 1031 | result += self._get_decoded_chars(n - len(result)) 1032 | return result 1033 | 1034 | def next(self): 1035 | self._telling = False 1036 | line = self.readline() 1037 | if not line: 1038 | self._snapshot = None 1039 | self._telling = self._seekable 1040 | raise StopIteration 1041 | return line 1042 | 1043 | def readline(self, limit=None): 1044 | if self.closed: 1045 | raise ValueError("read from closed file") 1046 | if limit is None: 1047 | limit = -1 1048 | elif not isinstance(limit, (int, long)): 1049 | raise TypeError("limit must be an integer") 1050 | 1051 | # Grab all the decoded text (we will rewind any extra bits later). 1052 | line = self._get_decoded_chars() 1053 | 1054 | start = 0 1055 | # Make the decoder if it doesn't already exist. 1056 | if not self._decoder: 1057 | self._get_decoder() 1058 | 1059 | pos = endpos = None 1060 | while True: 1061 | if self._readtranslate: 1062 | # Newlines are already translated, only search for \n 1063 | pos = line.find('\n', start) 1064 | if pos >= 0: 1065 | endpos = pos + 1 1066 | break 1067 | else: 1068 | start = len(line) 1069 | 1070 | elif self._readuniversal: 1071 | # Universal newline search. Find any of \r, \r\n, \n 1072 | # The decoder ensures that \r\n are not split in two pieces 1073 | 1074 | # In C we'd look for these in parallel of course. 1075 | nlpos = line.find("\n", start) 1076 | crpos = line.find("\r", start) 1077 | if crpos == -1: 1078 | if nlpos == -1: 1079 | # Nothing found 1080 | start = len(line) 1081 | else: 1082 | # Found \n 1083 | endpos = nlpos + 1 1084 | break 1085 | elif nlpos == -1: 1086 | # Found lone \r 1087 | endpos = crpos + 1 1088 | break 1089 | elif nlpos < crpos: 1090 | # Found \n 1091 | endpos = nlpos + 1 1092 | break 1093 | elif nlpos == crpos + 1: 1094 | # Found \r\n 1095 | endpos = crpos + 2 1096 | break 1097 | else: 1098 | # Found \r 1099 | endpos = crpos + 1 1100 | break 1101 | else: 1102 | # non-universal 1103 | pos = line.find(self._readnl) 1104 | if pos >= 0: 1105 | endpos = pos + len(self._readnl) 1106 | break 1107 | 1108 | if limit >= 0 and len(line) >= limit: 1109 | endpos = limit # reached length limit 1110 | break 1111 | 1112 | # No line ending seen yet - get more data' 1113 | while self._read_chunk(): 1114 | if self._decoded_chars: 1115 | break 1116 | if self._decoded_chars: 1117 | line += self._get_decoded_chars() 1118 | else: 1119 | # end of file 1120 | self._set_decoded_chars('') 1121 | self._snapshot = None 1122 | return line 1123 | 1124 | if limit >= 0 and endpos > limit: 1125 | endpos = limit # don't exceed limit 1126 | 1127 | # Rewind _decoded_chars to just after the line ending we found. 1128 | self._rewind_decoded_chars(len(line) - endpos) 1129 | return line[:endpos] 1130 | 1131 | @property 1132 | def newlines(self): 1133 | return self._decoder.newlines if self._decoder else None 1134 | 1135 | -------------------------------------------------------------------------------- /http_parser/http_parser.c: -------------------------------------------------------------------------------- 1 | /* Based on src/http/ngx_http_parse.c from NGINX copyright Igor Sysoev 2 | * 3 | * Additional changes are licensed under the same terms as NGINX and 4 | * copyright Joyent, Inc. and other Node contributors. All rights reserved. 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a copy 7 | * of this software and associated documentation files (the "Software"), to 8 | * deal in the Software without restriction, including without limitation the 9 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 10 | * sell copies of the Software, and to permit persons to whom the Software is 11 | * furnished to do so, subject to the following conditions: 12 | * 13 | * The above copyright notice and this permission notice shall be included in 14 | * all copies or substantial portions of the Software. 15 | * 16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 21 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 22 | * IN THE SOFTWARE. 23 | */ 24 | #include 25 | #include 26 | #include 27 | #include 28 | #include 29 | 30 | 31 | #ifndef MIN 32 | # define MIN(a,b) ((a) < (b) ? (a) : (b)) 33 | #endif 34 | 35 | 36 | #if HTTP_PARSER_DEBUG 37 | #define SET_ERRNO(e) \ 38 | do { \ 39 | parser->http_errno = (e); \ 40 | parser->error_lineno = __LINE__; \ 41 | } while (0) 42 | #else 43 | #define SET_ERRNO(e) \ 44 | do { \ 45 | parser->http_errno = (e); \ 46 | } while(0) 47 | #endif 48 | 49 | 50 | /* Run the notify callback FOR, returning ER if it fails */ 51 | #define CALLBACK_NOTIFY_(FOR, ER) \ 52 | do { \ 53 | assert(HTTP_PARSER_ERRNO(parser) == HPE_OK); \ 54 | \ 55 | if (settings->on_##FOR) { \ 56 | if (0 != settings->on_##FOR(parser)) { \ 57 | SET_ERRNO(HPE_CB_##FOR); \ 58 | } \ 59 | \ 60 | /* We either errored above or got paused; get out */ \ 61 | if (HTTP_PARSER_ERRNO(parser) != HPE_OK) { \ 62 | return (ER); \ 63 | } \ 64 | } \ 65 | } while (0) 66 | 67 | /* Run the notify callback FOR and consume the current byte */ 68 | #define CALLBACK_NOTIFY(FOR) CALLBACK_NOTIFY_(FOR, p - data + 1) 69 | 70 | /* Run the notify callback FOR and don't consume the current byte */ 71 | #define CALLBACK_NOTIFY_NOADVANCE(FOR) CALLBACK_NOTIFY_(FOR, p - data) 72 | 73 | /* Run data callback FOR with LEN bytes, returning ER if it fails */ 74 | #define CALLBACK_DATA_(FOR, LEN, ER) \ 75 | do { \ 76 | assert(HTTP_PARSER_ERRNO(parser) == HPE_OK); \ 77 | \ 78 | if (FOR##_mark) { \ 79 | if (settings->on_##FOR) { \ 80 | if (0 != settings->on_##FOR(parser, FOR##_mark, (LEN))) { \ 81 | SET_ERRNO(HPE_CB_##FOR); \ 82 | } \ 83 | \ 84 | /* We either errored above or got paused; get out */ \ 85 | if (HTTP_PARSER_ERRNO(parser) != HPE_OK) { \ 86 | return (ER); \ 87 | } \ 88 | } \ 89 | FOR##_mark = NULL; \ 90 | } \ 91 | } while (0) 92 | 93 | /* Run the data callback FOR and consume the current byte */ 94 | #define CALLBACK_DATA(FOR) \ 95 | CALLBACK_DATA_(FOR, p - FOR##_mark, p - data + 1) 96 | 97 | /* Run the data callback FOR and don't consume the current byte */ 98 | #define CALLBACK_DATA_NOADVANCE(FOR) \ 99 | CALLBACK_DATA_(FOR, p - FOR##_mark, p - data) 100 | 101 | /* Set the mark FOR; non-destructive if mark is already set */ 102 | #define MARK(FOR) \ 103 | do { \ 104 | if (!FOR##_mark) { \ 105 | FOR##_mark = p; \ 106 | } \ 107 | } while (0) 108 | 109 | 110 | #define PROXY_CONNECTION "proxy-connection" 111 | #define CONNECTION "connection" 112 | #define CONTENT_LENGTH "content-length" 113 | #define TRANSFER_ENCODING "transfer-encoding" 114 | #define UPGRADE "upgrade" 115 | #define CHUNKED "chunked" 116 | #define KEEP_ALIVE "keep-alive" 117 | #define CLOSE "close" 118 | 119 | 120 | static const char *method_strings[] = 121 | { "DELETE" 122 | , "GET" 123 | , "HEAD" 124 | , "POST" 125 | , "PUT" 126 | , "CONNECT" 127 | , "OPTIONS" 128 | , "TRACE" 129 | , "COPY" 130 | , "LOCK" 131 | , "MKCOL" 132 | , "MOVE" 133 | , "PROPFIND" 134 | , "PROPPATCH" 135 | , "UNLOCK" 136 | , "REPORT" 137 | , "MKACTIVITY" 138 | , "CHECKOUT" 139 | , "MERGE" 140 | , "M-SEARCH" 141 | , "NOTIFY" 142 | , "SUBSCRIBE" 143 | , "UNSUBSCRIBE" 144 | , "PATCH" 145 | }; 146 | 147 | 148 | /* Tokens as defined by rfc 2616. Also lowercases them. 149 | * token = 1* 150 | * separators = "(" | ")" | "<" | ">" | "@" 151 | * | "," | ";" | ":" | "\" | <"> 152 | * | "/" | "[" | "]" | "?" | "=" 153 | * | "{" | "}" | SP | HT 154 | */ 155 | static const char tokens[256] = { 156 | /* 0 nul 1 soh 2 stx 3 etx 4 eot 5 enq 6 ack 7 bel */ 157 | 0, 0, 0, 0, 0, 0, 0, 0, 158 | /* 8 bs 9 ht 10 nl 11 vt 12 np 13 cr 14 so 15 si */ 159 | 0, 0, 0, 0, 0, 0, 0, 0, 160 | /* 16 dle 17 dc1 18 dc2 19 dc3 20 dc4 21 nak 22 syn 23 etb */ 161 | 0, 0, 0, 0, 0, 0, 0, 0, 162 | /* 24 can 25 em 26 sub 27 esc 28 fs 29 gs 30 rs 31 us */ 163 | 0, 0, 0, 0, 0, 0, 0, 0, 164 | /* 32 sp 33 ! 34 " 35 # 36 $ 37 % 38 & 39 ' */ 165 | 0, '!', 0, '#', '$', '%', '&', '\'', 166 | /* 40 ( 41 ) 42 * 43 + 44 , 45 - 46 . 47 / */ 167 | 0, 0, '*', '+', 0, '-', '.', 0, 168 | /* 48 0 49 1 50 2 51 3 52 4 53 5 54 6 55 7 */ 169 | '0', '1', '2', '3', '4', '5', '6', '7', 170 | /* 56 8 57 9 58 : 59 ; 60 < 61 = 62 > 63 ? */ 171 | '8', '9', 0, 0, 0, 0, 0, 0, 172 | /* 64 @ 65 A 66 B 67 C 68 D 69 E 70 F 71 G */ 173 | 0, 'a', 'b', 'c', 'd', 'e', 'f', 'g', 174 | /* 72 H 73 I 74 J 75 K 76 L 77 M 78 N 79 O */ 175 | 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 176 | /* 80 P 81 Q 82 R 83 S 84 T 85 U 86 V 87 W */ 177 | 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 178 | /* 88 X 89 Y 90 Z 91 [ 92 \ 93 ] 94 ^ 95 _ */ 179 | 'x', 'y', 'z', 0, 0, 0, '^', '_', 180 | /* 96 ` 97 a 98 b 99 c 100 d 101 e 102 f 103 g */ 181 | '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 182 | /* 104 h 105 i 106 j 107 k 108 l 109 m 110 n 111 o */ 183 | 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 184 | /* 112 p 113 q 114 r 115 s 116 t 117 u 118 v 119 w */ 185 | 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 186 | /* 120 x 121 y 122 z 123 { 124 | 125 } 126 ~ 127 del */ 187 | 'x', 'y', 'z', 0, '|', 0, '~', 0 }; 188 | 189 | 190 | static const int8_t unhex[256] = 191 | {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1 192 | ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1 193 | ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1 194 | , 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1,-1,-1,-1,-1 195 | ,-1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1 196 | ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1 197 | ,-1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1 198 | ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1 199 | }; 200 | 201 | 202 | static const uint8_t normal_url_char[256] = { 203 | /* 0 nul 1 soh 2 stx 3 etx 4 eot 5 enq 6 ack 7 bel */ 204 | 0, 0, 0, 0, 0, 0, 0, 0, 205 | /* 8 bs 9 ht 10 nl 11 vt 12 np 13 cr 14 so 15 si */ 206 | 0, 0, 0, 0, 0, 0, 0, 0, 207 | /* 16 dle 17 dc1 18 dc2 19 dc3 20 dc4 21 nak 22 syn 23 etb */ 208 | 0, 0, 0, 0, 0, 0, 0, 0, 209 | /* 24 can 25 em 26 sub 27 esc 28 fs 29 gs 30 rs 31 us */ 210 | 0, 0, 0, 0, 0, 0, 0, 0, 211 | /* 32 sp 33 ! 34 " 35 # 36 $ 37 % 38 & 39 ' */ 212 | 0, 1, 1, 0, 1, 1, 1, 1, 213 | /* 40 ( 41 ) 42 * 43 + 44 , 45 - 46 . 47 / */ 214 | 1, 1, 1, 1, 1, 1, 1, 1, 215 | /* 48 0 49 1 50 2 51 3 52 4 53 5 54 6 55 7 */ 216 | 1, 1, 1, 1, 1, 1, 1, 1, 217 | /* 56 8 57 9 58 : 59 ; 60 < 61 = 62 > 63 ? */ 218 | 1, 1, 1, 1, 1, 1, 1, 0, 219 | /* 64 @ 65 A 66 B 67 C 68 D 69 E 70 F 71 G */ 220 | 1, 1, 1, 1, 1, 1, 1, 1, 221 | /* 72 H 73 I 74 J 75 K 76 L 77 M 78 N 79 O */ 222 | 1, 1, 1, 1, 1, 1, 1, 1, 223 | /* 80 P 81 Q 82 R 83 S 84 T 85 U 86 V 87 W */ 224 | 1, 1, 1, 1, 1, 1, 1, 1, 225 | /* 88 X 89 Y 90 Z 91 [ 92 \ 93 ] 94 ^ 95 _ */ 226 | 1, 1, 1, 1, 1, 1, 1, 1, 227 | /* 96 ` 97 a 98 b 99 c 100 d 101 e 102 f 103 g */ 228 | 1, 1, 1, 1, 1, 1, 1, 1, 229 | /* 104 h 105 i 106 j 107 k 108 l 109 m 110 n 111 o */ 230 | 1, 1, 1, 1, 1, 1, 1, 1, 231 | /* 112 p 113 q 114 r 115 s 116 t 117 u 118 v 119 w */ 232 | 1, 1, 1, 1, 1, 1, 1, 1, 233 | /* 120 x 121 y 122 z 123 { 124 | 125 } 126 ~ 127 del */ 234 | 1, 1, 1, 1, 1, 1, 1, 0, }; 235 | 236 | 237 | enum state 238 | { s_dead = 1 /* important that this is > 0 */ 239 | 240 | , s_start_req_or_res 241 | , s_res_or_resp_H 242 | , s_start_res 243 | , s_res_H 244 | , s_res_HT 245 | , s_res_HTT 246 | , s_res_HTTP 247 | , s_res_first_http_major 248 | , s_res_http_major 249 | , s_res_first_http_minor 250 | , s_res_http_minor 251 | , s_res_first_status_code 252 | , s_res_status_code 253 | , s_res_status 254 | , s_res_line_almost_done 255 | 256 | , s_start_req 257 | 258 | , s_req_method 259 | , s_req_spaces_before_url 260 | , s_req_schema 261 | , s_req_schema_slash 262 | , s_req_schema_slash_slash 263 | , s_req_host 264 | , s_req_port 265 | , s_req_path 266 | , s_req_query_string_start 267 | , s_req_query_string 268 | , s_req_fragment_start 269 | , s_req_fragment 270 | , s_req_http_start 271 | , s_req_http_H 272 | , s_req_http_HT 273 | , s_req_http_HTT 274 | , s_req_http_HTTP 275 | , s_req_first_http_major 276 | , s_req_http_major 277 | , s_req_first_http_minor 278 | , s_req_http_minor 279 | , s_req_line_almost_done 280 | 281 | , s_header_field_start 282 | , s_header_field 283 | , s_header_value_start 284 | , s_header_value 285 | , s_header_value_lws 286 | 287 | , s_header_almost_done 288 | 289 | , s_chunk_size_start 290 | , s_chunk_size 291 | , s_chunk_parameters 292 | , s_chunk_size_almost_done 293 | 294 | , s_headers_almost_done 295 | , s_headers_done 296 | 297 | /* Important: 's_headers_done' must be the last 'header' state. All 298 | * states beyond this must be 'body' states. It is used for overflow 299 | * checking. See the PARSING_HEADER() macro. 300 | */ 301 | 302 | , s_chunk_data 303 | , s_chunk_data_almost_done 304 | , s_chunk_data_done 305 | 306 | , s_body_identity 307 | , s_body_identity_eof 308 | 309 | , s_message_done 310 | }; 311 | 312 | 313 | #define PARSING_HEADER(state) (state <= s_headers_done) 314 | 315 | 316 | enum header_states 317 | { h_general = 0 318 | , h_C 319 | , h_CO 320 | , h_CON 321 | 322 | , h_matching_connection 323 | , h_matching_proxy_connection 324 | , h_matching_content_length 325 | , h_matching_transfer_encoding 326 | , h_matching_upgrade 327 | 328 | , h_connection 329 | , h_content_length 330 | , h_transfer_encoding 331 | , h_upgrade 332 | 333 | , h_matching_transfer_encoding_chunked 334 | , h_matching_connection_keep_alive 335 | , h_matching_connection_close 336 | 337 | , h_transfer_encoding_chunked 338 | , h_connection_keep_alive 339 | , h_connection_close 340 | }; 341 | 342 | 343 | /* Macros for character classes; depends on strict-mode */ 344 | #define CR '\r' 345 | #define LF '\n' 346 | #define LOWER(c) (unsigned char)(c | 0x20) 347 | #define IS_ALPHA(c) (LOWER(c) >= 'a' && LOWER(c) <= 'z') 348 | #define IS_NUM(c) ((c) >= '0' && (c) <= '9') 349 | #define IS_ALPHANUM(c) (IS_ALPHA(c) || IS_NUM(c)) 350 | 351 | #if HTTP_PARSER_STRICT 352 | #define TOKEN(c) (tokens[(unsigned char)c]) 353 | #define IS_URL_CHAR(c) (normal_url_char[(unsigned char) (c)]) 354 | #define IS_HOST_CHAR(c) (IS_ALPHANUM(c) || (c) == '.' || (c) == '-') 355 | #else 356 | #define TOKEN(c) ((c == ' ') ? ' ' : tokens[(unsigned char)c]) 357 | #define IS_URL_CHAR(c) \ 358 | (normal_url_char[(unsigned char) (c)] || ((c) & 0x80)) 359 | #define IS_HOST_CHAR(c) \ 360 | (IS_ALPHANUM(c) || (c) == '.' || (c) == '-' || (c) == '_') 361 | #endif 362 | 363 | 364 | #define start_state (parser->type == HTTP_REQUEST ? s_start_req : s_start_res) 365 | 366 | 367 | #if HTTP_PARSER_STRICT 368 | # define STRICT_CHECK(cond) \ 369 | do { \ 370 | if (cond) { \ 371 | SET_ERRNO(HPE_STRICT); \ 372 | goto error; \ 373 | } \ 374 | } while (0) 375 | # define NEW_MESSAGE() (http_should_keep_alive(parser) ? start_state : s_dead) 376 | #else 377 | # define STRICT_CHECK(cond) 378 | # define NEW_MESSAGE() start_state 379 | #endif 380 | 381 | 382 | /* Map errno values to strings for human-readable output */ 383 | #define HTTP_STRERROR_GEN(n, s) { "HPE_" #n, s }, 384 | static struct { 385 | const char *name; 386 | const char *description; 387 | } http_strerror_tab[] = { 388 | HTTP_ERRNO_MAP(HTTP_STRERROR_GEN) 389 | }; 390 | #undef HTTP_STRERROR_GEN 391 | 392 | int http_message_needs_eof(http_parser *parser); 393 | 394 | /* Our URL parser. 395 | * 396 | * This is designed to be shared by http_parser_execute() for URL validation, 397 | * hence it has a state transition + byte-for-byte interface. In addition, it 398 | * is meant to be embedded in http_parser_parse_url(), which does the dirty 399 | * work of turning state transitions URL components for its API. 400 | * 401 | * This function should only be invoked with non-space characters. It is 402 | * assumed that the caller cares about (and can detect) the transition between 403 | * URL and non-URL states by looking for these. 404 | */ 405 | static inline enum state 406 | parse_url_char(enum state s, const char ch, int is_connect) 407 | { 408 | assert(!isspace(ch)); 409 | 410 | switch (s) { 411 | case s_req_spaces_before_url: 412 | if (ch == '/' || ch == '*') { 413 | return s_req_path; 414 | } 415 | 416 | /* Proxied requests are followed by scheme of an absolute URI (alpha). 417 | * CONNECT is followed by a hostname, which begins with alphanum. 418 | * All other methods are followed by '/' or '*' (handled above). 419 | */ 420 | if (IS_ALPHA(ch) || (is_connect && IS_NUM(ch))) { 421 | return (is_connect) ? s_req_host : s_req_schema; 422 | } 423 | 424 | break; 425 | 426 | case s_req_schema: 427 | if (IS_ALPHA(ch)) { 428 | return s; 429 | } 430 | 431 | if (ch == ':') { 432 | return s_req_schema_slash; 433 | } 434 | 435 | break; 436 | 437 | case s_req_schema_slash: 438 | if (ch == '/') { 439 | return s_req_schema_slash_slash; 440 | } 441 | 442 | break; 443 | 444 | case s_req_schema_slash_slash: 445 | if (ch == '/') { 446 | return s_req_host; 447 | } 448 | 449 | break; 450 | 451 | case s_req_host: 452 | if (IS_HOST_CHAR(ch)) { 453 | return s; 454 | } 455 | 456 | switch (ch) { 457 | case ':': 458 | return s_req_port; 459 | 460 | case '/': 461 | return s_req_path; 462 | 463 | case '?': 464 | return s_req_query_string_start; 465 | } 466 | 467 | break; 468 | 469 | case s_req_port: 470 | if (IS_NUM(ch)) { 471 | return s; 472 | } 473 | 474 | switch (ch) { 475 | case '/': 476 | return s_req_path; 477 | 478 | case '?': 479 | return s_req_query_string_start; 480 | } 481 | 482 | break; 483 | 484 | case s_req_path: 485 | if (IS_URL_CHAR(ch)) { 486 | return s; 487 | } 488 | 489 | switch (ch) { 490 | case '?': 491 | return s_req_query_string_start; 492 | 493 | case '#': 494 | return s_req_fragment_start; 495 | } 496 | 497 | break; 498 | 499 | case s_req_query_string_start: 500 | if (IS_URL_CHAR(ch)) { 501 | return s_req_query_string; 502 | } 503 | 504 | switch (ch) { 505 | case '?': 506 | /* XXX ignore extra '?' ... is this right? */ 507 | return s; 508 | 509 | case '#': 510 | return s_req_fragment_start; 511 | } 512 | 513 | break; 514 | 515 | case s_req_query_string: 516 | if (IS_URL_CHAR(ch)) { 517 | return s; 518 | } 519 | 520 | switch (ch) { 521 | case '?': 522 | /* allow extra '?' in query string */ 523 | return s; 524 | 525 | case '#': 526 | return s_req_fragment_start; 527 | } 528 | 529 | break; 530 | 531 | case s_req_fragment_start: 532 | if (IS_URL_CHAR(ch)) { 533 | return s_req_fragment; 534 | } 535 | 536 | switch (ch) { 537 | case '?': 538 | return s_req_fragment; 539 | 540 | case '#': 541 | return s; 542 | } 543 | 544 | break; 545 | 546 | case s_req_fragment: 547 | if (IS_URL_CHAR(ch)) { 548 | return s; 549 | } 550 | 551 | switch (ch) { 552 | case '?': 553 | case '#': 554 | return s; 555 | } 556 | 557 | break; 558 | 559 | default: 560 | break; 561 | } 562 | 563 | /* We should never fall out of the switch above unless there's an error */ 564 | return s_dead; 565 | } 566 | 567 | size_t http_parser_execute (http_parser *parser, 568 | const http_parser_settings *settings, 569 | const char *data, 570 | size_t len) 571 | { 572 | char c, ch; 573 | int8_t unhex_val; 574 | const char *p = data; 575 | const char *header_field_mark = 0; 576 | const char *header_value_mark = 0; 577 | const char *url_mark = 0; 578 | const char *body_mark = 0; 579 | 580 | /* We're in an error state. Don't bother doing anything. */ 581 | if (HTTP_PARSER_ERRNO(parser) != HPE_OK) { 582 | return 0; 583 | } 584 | 585 | if (len == 0) { 586 | switch (parser->state) { 587 | case s_body_identity_eof: 588 | /* Use of CALLBACK_NOTIFY() here would erroneously return 1 byte read if 589 | * we got paused. 590 | */ 591 | CALLBACK_NOTIFY_NOADVANCE(message_complete); 592 | return 0; 593 | 594 | case s_dead: 595 | case s_start_req_or_res: 596 | case s_start_res: 597 | case s_start_req: 598 | return 0; 599 | 600 | default: 601 | SET_ERRNO(HPE_INVALID_EOF_STATE); 602 | return 1; 603 | } 604 | } 605 | 606 | 607 | if (parser->state == s_header_field) 608 | header_field_mark = data; 609 | if (parser->state == s_header_value) 610 | header_value_mark = data; 611 | if (parser->state == s_req_path || 612 | parser->state == s_req_schema || 613 | parser->state == s_req_schema_slash || 614 | parser->state == s_req_schema_slash_slash || 615 | parser->state == s_req_port || 616 | parser->state == s_req_query_string_start || 617 | parser->state == s_req_query_string || 618 | parser->state == s_req_host || 619 | parser->state == s_req_fragment_start || 620 | parser->state == s_req_fragment) 621 | url_mark = data; 622 | 623 | for (p=data; p != data + len; p++) { 624 | ch = *p; 625 | 626 | if (PARSING_HEADER(parser->state)) { 627 | ++parser->nread; 628 | /* Buffer overflow attack */ 629 | if (parser->nread > HTTP_MAX_HEADER_SIZE) { 630 | SET_ERRNO(HPE_HEADER_OVERFLOW); 631 | goto error; 632 | } 633 | } 634 | 635 | reexecute_byte: 636 | switch (parser->state) { 637 | 638 | case s_dead: 639 | /* this state is used after a 'Connection: close' message 640 | * the parser will error out if it reads another message 641 | */ 642 | SET_ERRNO(HPE_CLOSED_CONNECTION); 643 | goto error; 644 | 645 | case s_start_req_or_res: 646 | { 647 | if (ch == CR || ch == LF) 648 | break; 649 | parser->flags = 0; 650 | parser->content_length = -1; 651 | 652 | if (ch == 'H') { 653 | parser->state = s_res_or_resp_H; 654 | 655 | CALLBACK_NOTIFY(message_begin); 656 | } else { 657 | parser->type = HTTP_REQUEST; 658 | parser->state = s_start_req; 659 | goto reexecute_byte; 660 | } 661 | 662 | break; 663 | } 664 | 665 | case s_res_or_resp_H: 666 | if (ch == 'T') { 667 | parser->type = HTTP_RESPONSE; 668 | parser->state = s_res_HT; 669 | } else { 670 | if (ch != 'E') { 671 | SET_ERRNO(HPE_INVALID_CONSTANT); 672 | goto error; 673 | } 674 | 675 | parser->type = HTTP_REQUEST; 676 | parser->method = HTTP_HEAD; 677 | parser->index = 2; 678 | parser->state = s_req_method; 679 | } 680 | break; 681 | 682 | case s_start_res: 683 | { 684 | parser->flags = 0; 685 | parser->content_length = -1; 686 | 687 | switch (ch) { 688 | case 'H': 689 | parser->state = s_res_H; 690 | break; 691 | 692 | case CR: 693 | case LF: 694 | break; 695 | 696 | default: 697 | SET_ERRNO(HPE_INVALID_CONSTANT); 698 | goto error; 699 | } 700 | 701 | CALLBACK_NOTIFY(message_begin); 702 | break; 703 | } 704 | 705 | case s_res_H: 706 | STRICT_CHECK(ch != 'T'); 707 | parser->state = s_res_HT; 708 | break; 709 | 710 | case s_res_HT: 711 | STRICT_CHECK(ch != 'T'); 712 | parser->state = s_res_HTT; 713 | break; 714 | 715 | case s_res_HTT: 716 | STRICT_CHECK(ch != 'P'); 717 | parser->state = s_res_HTTP; 718 | break; 719 | 720 | case s_res_HTTP: 721 | STRICT_CHECK(ch != '/'); 722 | parser->state = s_res_first_http_major; 723 | break; 724 | 725 | case s_res_first_http_major: 726 | if (ch < '0' || ch > '9') { 727 | SET_ERRNO(HPE_INVALID_VERSION); 728 | goto error; 729 | } 730 | 731 | parser->http_major = ch - '0'; 732 | parser->state = s_res_http_major; 733 | break; 734 | 735 | /* major HTTP version or dot */ 736 | case s_res_http_major: 737 | { 738 | if (ch == '.') { 739 | parser->state = s_res_first_http_minor; 740 | break; 741 | } 742 | 743 | if (!IS_NUM(ch)) { 744 | SET_ERRNO(HPE_INVALID_VERSION); 745 | goto error; 746 | } 747 | 748 | parser->http_major *= 10; 749 | parser->http_major += ch - '0'; 750 | 751 | if (parser->http_major > 999) { 752 | SET_ERRNO(HPE_INVALID_VERSION); 753 | goto error; 754 | } 755 | 756 | break; 757 | } 758 | 759 | /* first digit of minor HTTP version */ 760 | case s_res_first_http_minor: 761 | if (!IS_NUM(ch)) { 762 | SET_ERRNO(HPE_INVALID_VERSION); 763 | goto error; 764 | } 765 | 766 | parser->http_minor = ch - '0'; 767 | parser->state = s_res_http_minor; 768 | break; 769 | 770 | /* minor HTTP version or end of request line */ 771 | case s_res_http_minor: 772 | { 773 | if (ch == ' ') { 774 | parser->state = s_res_first_status_code; 775 | break; 776 | } 777 | 778 | if (!IS_NUM(ch)) { 779 | SET_ERRNO(HPE_INVALID_VERSION); 780 | goto error; 781 | } 782 | 783 | parser->http_minor *= 10; 784 | parser->http_minor += ch - '0'; 785 | 786 | if (parser->http_minor > 999) { 787 | SET_ERRNO(HPE_INVALID_VERSION); 788 | goto error; 789 | } 790 | 791 | break; 792 | } 793 | 794 | case s_res_first_status_code: 795 | { 796 | if (!IS_NUM(ch)) { 797 | if (ch == ' ') { 798 | break; 799 | } 800 | 801 | SET_ERRNO(HPE_INVALID_STATUS); 802 | goto error; 803 | } 804 | parser->status_code = ch - '0'; 805 | parser->state = s_res_status_code; 806 | break; 807 | } 808 | 809 | case s_res_status_code: 810 | { 811 | if (!IS_NUM(ch)) { 812 | switch (ch) { 813 | case ' ': 814 | parser->state = s_res_status; 815 | break; 816 | case CR: 817 | parser->state = s_res_line_almost_done; 818 | break; 819 | case LF: 820 | parser->state = s_header_field_start; 821 | break; 822 | default: 823 | SET_ERRNO(HPE_INVALID_STATUS); 824 | goto error; 825 | } 826 | break; 827 | } 828 | 829 | parser->status_code *= 10; 830 | parser->status_code += ch - '0'; 831 | 832 | if (parser->status_code > 999) { 833 | SET_ERRNO(HPE_INVALID_STATUS); 834 | goto error; 835 | } 836 | 837 | break; 838 | } 839 | 840 | case s_res_status: 841 | /* the human readable status. e.g. "NOT FOUND" 842 | * we are not humans so just ignore this */ 843 | if (ch == CR) { 844 | parser->state = s_res_line_almost_done; 845 | break; 846 | } 847 | 848 | if (ch == LF) { 849 | parser->state = s_header_field_start; 850 | break; 851 | } 852 | break; 853 | 854 | case s_res_line_almost_done: 855 | STRICT_CHECK(ch != LF); 856 | parser->state = s_header_field_start; 857 | break; 858 | 859 | case s_start_req: 860 | { 861 | if (ch == CR || ch == LF) 862 | break; 863 | parser->flags = 0; 864 | parser->content_length = -1; 865 | 866 | if (!IS_ALPHA(ch)) { 867 | SET_ERRNO(HPE_INVALID_METHOD); 868 | goto error; 869 | } 870 | 871 | parser->method = (enum http_method) 0; 872 | parser->index = 1; 873 | switch (ch) { 874 | case 'C': parser->method = HTTP_CONNECT; /* or COPY, CHECKOUT */ break; 875 | case 'D': parser->method = HTTP_DELETE; break; 876 | case 'G': parser->method = HTTP_GET; break; 877 | case 'H': parser->method = HTTP_HEAD; break; 878 | case 'L': parser->method = HTTP_LOCK; break; 879 | case 'M': parser->method = HTTP_MKCOL; /* or MOVE, MKACTIVITY, MERGE, M-SEARCH */ break; 880 | case 'N': parser->method = HTTP_NOTIFY; break; 881 | case 'O': parser->method = HTTP_OPTIONS; break; 882 | case 'P': parser->method = HTTP_POST; 883 | /* or PROPFIND or PROPPATCH or PUT or PATCH */ 884 | break; 885 | case 'R': parser->method = HTTP_REPORT; break; 886 | case 'S': parser->method = HTTP_SUBSCRIBE; break; 887 | case 'T': parser->method = HTTP_TRACE; break; 888 | case 'U': parser->method = HTTP_UNLOCK; /* or UNSUBSCRIBE */ break; 889 | default: 890 | SET_ERRNO(HPE_INVALID_METHOD); 891 | goto error; 892 | } 893 | parser->state = s_req_method; 894 | 895 | CALLBACK_NOTIFY(message_begin); 896 | 897 | break; 898 | } 899 | 900 | case s_req_method: 901 | { 902 | const char *matcher; 903 | if (ch == '\0') { 904 | SET_ERRNO(HPE_INVALID_METHOD); 905 | goto error; 906 | } 907 | 908 | matcher = method_strings[parser->method]; 909 | if (ch == ' ' && matcher[parser->index] == '\0') { 910 | parser->state = s_req_spaces_before_url; 911 | } else if (ch == matcher[parser->index]) { 912 | ; /* nada */ 913 | } else if (parser->method == HTTP_CONNECT) { 914 | if (parser->index == 1 && ch == 'H') { 915 | parser->method = HTTP_CHECKOUT; 916 | } else if (parser->index == 2 && ch == 'P') { 917 | parser->method = HTTP_COPY; 918 | } else { 919 | goto error; 920 | } 921 | } else if (parser->method == HTTP_MKCOL) { 922 | if (parser->index == 1 && ch == 'O') { 923 | parser->method = HTTP_MOVE; 924 | } else if (parser->index == 1 && ch == 'E') { 925 | parser->method = HTTP_MERGE; 926 | } else if (parser->index == 1 && ch == '-') { 927 | parser->method = HTTP_MSEARCH; 928 | } else if (parser->index == 2 && ch == 'A') { 929 | parser->method = HTTP_MKACTIVITY; 930 | } else { 931 | goto error; 932 | } 933 | } else if (parser->index == 1 && parser->method == HTTP_POST) { 934 | if (ch == 'R') { 935 | parser->method = HTTP_PROPFIND; /* or HTTP_PROPPATCH */ 936 | } else if (ch == 'U') { 937 | parser->method = HTTP_PUT; 938 | } else if (ch == 'A') { 939 | parser->method = HTTP_PATCH; 940 | } else { 941 | goto error; 942 | } 943 | } else if (parser->index == 2 && parser->method == HTTP_UNLOCK && ch == 'S') { 944 | parser->method = HTTP_UNSUBSCRIBE; 945 | } else if (parser->index == 4 && parser->method == HTTP_PROPFIND && ch == 'P') { 946 | parser->method = HTTP_PROPPATCH; 947 | } else { 948 | SET_ERRNO(HPE_INVALID_METHOD); 949 | goto error; 950 | } 951 | 952 | ++parser->index; 953 | break; 954 | } 955 | 956 | case s_req_spaces_before_url: 957 | { 958 | if (ch == ' ') break; 959 | 960 | MARK(url); 961 | 962 | parser->state = parse_url_char( 963 | (enum state)parser->state, ch, parser->method == HTTP_CONNECT); 964 | if (parser->state == s_dead) { 965 | SET_ERRNO(HPE_INVALID_URL); 966 | goto error; 967 | } 968 | 969 | break; 970 | } 971 | 972 | case s_req_schema: 973 | case s_req_schema_slash: 974 | case s_req_schema_slash_slash: 975 | { 976 | switch (ch) { 977 | /* No whitespace allowed here */ 978 | case ' ': 979 | case CR: 980 | case LF: 981 | SET_ERRNO(HPE_INVALID_URL); 982 | goto error; 983 | default: 984 | parser->state = parse_url_char( 985 | (enum state)parser->state, ch, parser->method == HTTP_CONNECT); 986 | if (parser->state == s_dead) { 987 | SET_ERRNO(HPE_INVALID_URL); 988 | goto error; 989 | } 990 | } 991 | 992 | break; 993 | } 994 | 995 | case s_req_host: 996 | case s_req_port: 997 | case s_req_path: 998 | case s_req_query_string_start: 999 | case s_req_query_string: 1000 | case s_req_fragment_start: 1001 | case s_req_fragment: 1002 | { 1003 | /* XXX: There is a bug here where if we're on the first character 1004 | * of s_req_host (e.g. our URL is 'http://' and we see a whitespace 1005 | * character, we'll consider this a valid URL. This seems incorrect, 1006 | * but at least it's bug-compatible with what we had before. 1007 | */ 1008 | switch (ch) { 1009 | case ' ': 1010 | parser->state = s_req_http_start; 1011 | CALLBACK_DATA(url); 1012 | break; 1013 | case CR: 1014 | case LF: 1015 | parser->http_major = 0; 1016 | parser->http_minor = 9; 1017 | parser->state = (ch == CR) ? 1018 | s_req_line_almost_done : 1019 | s_header_field_start; 1020 | CALLBACK_DATA(url); 1021 | break; 1022 | default: 1023 | parser->state = parse_url_char( 1024 | (enum state)parser->state, ch, parser->method == HTTP_CONNECT); 1025 | if (parser->state == s_dead) { 1026 | SET_ERRNO(HPE_INVALID_URL); 1027 | goto error; 1028 | } 1029 | } 1030 | break; 1031 | } 1032 | 1033 | case s_req_http_start: 1034 | switch (ch) { 1035 | case 'H': 1036 | parser->state = s_req_http_H; 1037 | break; 1038 | case ' ': 1039 | break; 1040 | default: 1041 | SET_ERRNO(HPE_INVALID_CONSTANT); 1042 | goto error; 1043 | } 1044 | break; 1045 | 1046 | case s_req_http_H: 1047 | STRICT_CHECK(ch != 'T'); 1048 | parser->state = s_req_http_HT; 1049 | break; 1050 | 1051 | case s_req_http_HT: 1052 | STRICT_CHECK(ch != 'T'); 1053 | parser->state = s_req_http_HTT; 1054 | break; 1055 | 1056 | case s_req_http_HTT: 1057 | STRICT_CHECK(ch != 'P'); 1058 | parser->state = s_req_http_HTTP; 1059 | break; 1060 | 1061 | case s_req_http_HTTP: 1062 | STRICT_CHECK(ch != '/'); 1063 | parser->state = s_req_first_http_major; 1064 | break; 1065 | 1066 | /* first digit of major HTTP version */ 1067 | case s_req_first_http_major: 1068 | if (ch < '1' || ch > '9') { 1069 | SET_ERRNO(HPE_INVALID_VERSION); 1070 | goto error; 1071 | } 1072 | 1073 | parser->http_major = ch - '0'; 1074 | parser->state = s_req_http_major; 1075 | break; 1076 | 1077 | /* major HTTP version or dot */ 1078 | case s_req_http_major: 1079 | { 1080 | if (ch == '.') { 1081 | parser->state = s_req_first_http_minor; 1082 | break; 1083 | } 1084 | 1085 | if (!IS_NUM(ch)) { 1086 | SET_ERRNO(HPE_INVALID_VERSION); 1087 | goto error; 1088 | } 1089 | 1090 | parser->http_major *= 10; 1091 | parser->http_major += ch - '0'; 1092 | 1093 | if (parser->http_major > 999) { 1094 | SET_ERRNO(HPE_INVALID_VERSION); 1095 | goto error; 1096 | } 1097 | 1098 | break; 1099 | } 1100 | 1101 | /* first digit of minor HTTP version */ 1102 | case s_req_first_http_minor: 1103 | if (!IS_NUM(ch)) { 1104 | SET_ERRNO(HPE_INVALID_VERSION); 1105 | goto error; 1106 | } 1107 | 1108 | parser->http_minor = ch - '0'; 1109 | parser->state = s_req_http_minor; 1110 | break; 1111 | 1112 | /* minor HTTP version or end of request line */ 1113 | case s_req_http_minor: 1114 | { 1115 | if (ch == CR) { 1116 | parser->state = s_req_line_almost_done; 1117 | break; 1118 | } 1119 | 1120 | if (ch == LF) { 1121 | parser->state = s_header_field_start; 1122 | break; 1123 | } 1124 | 1125 | /* XXX allow spaces after digit? */ 1126 | 1127 | if (!IS_NUM(ch)) { 1128 | SET_ERRNO(HPE_INVALID_VERSION); 1129 | goto error; 1130 | } 1131 | 1132 | parser->http_minor *= 10; 1133 | parser->http_minor += ch - '0'; 1134 | 1135 | if (parser->http_minor > 999) { 1136 | SET_ERRNO(HPE_INVALID_VERSION); 1137 | goto error; 1138 | } 1139 | 1140 | break; 1141 | } 1142 | 1143 | /* end of request line */ 1144 | case s_req_line_almost_done: 1145 | { 1146 | if (ch != LF) { 1147 | SET_ERRNO(HPE_LF_EXPECTED); 1148 | goto error; 1149 | } 1150 | 1151 | parser->state = s_header_field_start; 1152 | break; 1153 | } 1154 | 1155 | case s_header_field_start: 1156 | { 1157 | if (ch == CR) { 1158 | parser->state = s_headers_almost_done; 1159 | break; 1160 | } 1161 | 1162 | if (ch == LF) { 1163 | /* they might be just sending \n instead of \r\n so this would be 1164 | * the second \n to denote the end of headers*/ 1165 | parser->state = s_headers_almost_done; 1166 | goto reexecute_byte; 1167 | } 1168 | 1169 | c = TOKEN(ch); 1170 | 1171 | if (!c) { 1172 | SET_ERRNO(HPE_INVALID_HEADER_TOKEN); 1173 | goto error; 1174 | } 1175 | 1176 | MARK(header_field); 1177 | 1178 | parser->index = 0; 1179 | parser->state = s_header_field; 1180 | 1181 | switch (c) { 1182 | case 'c': 1183 | parser->header_state = h_C; 1184 | break; 1185 | 1186 | case 'p': 1187 | parser->header_state = h_matching_proxy_connection; 1188 | break; 1189 | 1190 | case 't': 1191 | parser->header_state = h_matching_transfer_encoding; 1192 | break; 1193 | 1194 | case 'u': 1195 | parser->header_state = h_matching_upgrade; 1196 | break; 1197 | 1198 | default: 1199 | parser->header_state = h_general; 1200 | break; 1201 | } 1202 | break; 1203 | } 1204 | 1205 | case s_header_field: 1206 | { 1207 | c = TOKEN(ch); 1208 | 1209 | if (c) { 1210 | switch (parser->header_state) { 1211 | case h_general: 1212 | break; 1213 | 1214 | case h_C: 1215 | parser->index++; 1216 | parser->header_state = (c == 'o' ? h_CO : h_general); 1217 | break; 1218 | 1219 | case h_CO: 1220 | parser->index++; 1221 | parser->header_state = (c == 'n' ? h_CON : h_general); 1222 | break; 1223 | 1224 | case h_CON: 1225 | parser->index++; 1226 | switch (c) { 1227 | case 'n': 1228 | parser->header_state = h_matching_connection; 1229 | break; 1230 | case 't': 1231 | parser->header_state = h_matching_content_length; 1232 | break; 1233 | default: 1234 | parser->header_state = h_general; 1235 | break; 1236 | } 1237 | break; 1238 | 1239 | /* connection */ 1240 | 1241 | case h_matching_connection: 1242 | parser->index++; 1243 | if (parser->index > sizeof(CONNECTION)-1 1244 | || c != CONNECTION[parser->index]) { 1245 | parser->header_state = h_general; 1246 | } else if (parser->index == sizeof(CONNECTION)-2) { 1247 | parser->header_state = h_connection; 1248 | } 1249 | break; 1250 | 1251 | /* proxy-connection */ 1252 | 1253 | case h_matching_proxy_connection: 1254 | parser->index++; 1255 | if (parser->index > sizeof(PROXY_CONNECTION)-1 1256 | || c != PROXY_CONNECTION[parser->index]) { 1257 | parser->header_state = h_general; 1258 | } else if (parser->index == sizeof(PROXY_CONNECTION)-2) { 1259 | parser->header_state = h_connection; 1260 | } 1261 | break; 1262 | 1263 | /* content-length */ 1264 | 1265 | case h_matching_content_length: 1266 | parser->index++; 1267 | if (parser->index > sizeof(CONTENT_LENGTH)-1 1268 | || c != CONTENT_LENGTH[parser->index]) { 1269 | parser->header_state = h_general; 1270 | } else if (parser->index == sizeof(CONTENT_LENGTH)-2) { 1271 | parser->header_state = h_content_length; 1272 | } 1273 | break; 1274 | 1275 | /* transfer-encoding */ 1276 | 1277 | case h_matching_transfer_encoding: 1278 | parser->index++; 1279 | if (parser->index > sizeof(TRANSFER_ENCODING)-1 1280 | || c != TRANSFER_ENCODING[parser->index]) { 1281 | parser->header_state = h_general; 1282 | } else if (parser->index == sizeof(TRANSFER_ENCODING)-2) { 1283 | parser->header_state = h_transfer_encoding; 1284 | } 1285 | break; 1286 | 1287 | /* upgrade */ 1288 | 1289 | case h_matching_upgrade: 1290 | parser->index++; 1291 | if (parser->index > sizeof(UPGRADE)-1 1292 | || c != UPGRADE[parser->index]) { 1293 | parser->header_state = h_general; 1294 | } else if (parser->index == sizeof(UPGRADE)-2) { 1295 | parser->header_state = h_upgrade; 1296 | } 1297 | break; 1298 | 1299 | case h_connection: 1300 | case h_content_length: 1301 | case h_transfer_encoding: 1302 | case h_upgrade: 1303 | if (ch != ' ') parser->header_state = h_general; 1304 | break; 1305 | 1306 | default: 1307 | assert(0 && "Unknown header_state"); 1308 | break; 1309 | } 1310 | break; 1311 | } 1312 | 1313 | if (ch == ':') { 1314 | parser->state = s_header_value_start; 1315 | CALLBACK_DATA(header_field); 1316 | break; 1317 | } 1318 | 1319 | if (ch == CR) { 1320 | parser->state = s_header_almost_done; 1321 | CALLBACK_DATA(header_field); 1322 | break; 1323 | } 1324 | 1325 | if (ch == LF) { 1326 | parser->state = s_header_field_start; 1327 | CALLBACK_DATA(header_field); 1328 | break; 1329 | } 1330 | 1331 | SET_ERRNO(HPE_INVALID_HEADER_TOKEN); 1332 | goto error; 1333 | } 1334 | 1335 | case s_header_value_start: 1336 | { 1337 | if (ch == ' ' || ch == '\t') break; 1338 | 1339 | MARK(header_value); 1340 | 1341 | parser->state = s_header_value; 1342 | parser->index = 0; 1343 | 1344 | if (ch == CR) { 1345 | parser->header_state = h_general; 1346 | parser->state = s_header_almost_done; 1347 | CALLBACK_DATA(header_value); 1348 | break; 1349 | } 1350 | 1351 | if (ch == LF) { 1352 | parser->state = s_header_field_start; 1353 | CALLBACK_DATA(header_value); 1354 | break; 1355 | } 1356 | 1357 | c = LOWER(ch); 1358 | 1359 | switch (parser->header_state) { 1360 | case h_upgrade: 1361 | parser->flags |= F_UPGRADE; 1362 | parser->header_state = h_general; 1363 | break; 1364 | 1365 | case h_transfer_encoding: 1366 | /* looking for 'Transfer-Encoding: chunked' */ 1367 | if ('c' == c) { 1368 | parser->header_state = h_matching_transfer_encoding_chunked; 1369 | } else { 1370 | parser->header_state = h_general; 1371 | } 1372 | break; 1373 | 1374 | case h_content_length: 1375 | if (!IS_NUM(ch)) { 1376 | SET_ERRNO(HPE_INVALID_CONTENT_LENGTH); 1377 | goto error; 1378 | } 1379 | 1380 | parser->content_length = ch - '0'; 1381 | break; 1382 | 1383 | case h_connection: 1384 | /* looking for 'Connection: keep-alive' */ 1385 | if (c == 'k') { 1386 | parser->header_state = h_matching_connection_keep_alive; 1387 | /* looking for 'Connection: close' */ 1388 | } else if (c == 'c') { 1389 | parser->header_state = h_matching_connection_close; 1390 | } else { 1391 | parser->header_state = h_general; 1392 | } 1393 | break; 1394 | 1395 | default: 1396 | parser->header_state = h_general; 1397 | break; 1398 | } 1399 | break; 1400 | } 1401 | 1402 | case s_header_value: 1403 | { 1404 | 1405 | if (ch == CR) { 1406 | parser->state = s_header_almost_done; 1407 | CALLBACK_DATA(header_value); 1408 | break; 1409 | } 1410 | 1411 | if (ch == LF) { 1412 | parser->state = s_header_almost_done; 1413 | CALLBACK_DATA_NOADVANCE(header_value); 1414 | goto reexecute_byte; 1415 | } 1416 | 1417 | c = LOWER(ch); 1418 | 1419 | switch (parser->header_state) { 1420 | case h_general: 1421 | break; 1422 | 1423 | case h_connection: 1424 | case h_transfer_encoding: 1425 | assert(0 && "Shouldn't get here."); 1426 | break; 1427 | 1428 | case h_content_length: 1429 | if (ch == ' ') break; 1430 | if (!IS_NUM(ch)) { 1431 | SET_ERRNO(HPE_INVALID_CONTENT_LENGTH); 1432 | goto error; 1433 | } 1434 | 1435 | parser->content_length *= 10; 1436 | parser->content_length += ch - '0'; 1437 | break; 1438 | 1439 | /* Transfer-Encoding: chunked */ 1440 | case h_matching_transfer_encoding_chunked: 1441 | parser->index++; 1442 | if (parser->index > sizeof(CHUNKED)-1 1443 | || c != CHUNKED[parser->index]) { 1444 | parser->header_state = h_general; 1445 | } else if (parser->index == sizeof(CHUNKED)-2) { 1446 | parser->header_state = h_transfer_encoding_chunked; 1447 | } 1448 | break; 1449 | 1450 | /* looking for 'Connection: keep-alive' */ 1451 | case h_matching_connection_keep_alive: 1452 | parser->index++; 1453 | if (parser->index > sizeof(KEEP_ALIVE)-1 1454 | || c != KEEP_ALIVE[parser->index]) { 1455 | parser->header_state = h_general; 1456 | } else if (parser->index == sizeof(KEEP_ALIVE)-2) { 1457 | parser->header_state = h_connection_keep_alive; 1458 | } 1459 | break; 1460 | 1461 | /* looking for 'Connection: close' */ 1462 | case h_matching_connection_close: 1463 | parser->index++; 1464 | if (parser->index > sizeof(CLOSE)-1 || c != CLOSE[parser->index]) { 1465 | parser->header_state = h_general; 1466 | } else if (parser->index == sizeof(CLOSE)-2) { 1467 | parser->header_state = h_connection_close; 1468 | } 1469 | break; 1470 | 1471 | case h_transfer_encoding_chunked: 1472 | case h_connection_keep_alive: 1473 | case h_connection_close: 1474 | if (ch != ' ') parser->header_state = h_general; 1475 | break; 1476 | 1477 | default: 1478 | parser->state = s_header_value; 1479 | parser->header_state = h_general; 1480 | break; 1481 | } 1482 | break; 1483 | } 1484 | 1485 | case s_header_almost_done: 1486 | { 1487 | STRICT_CHECK(ch != LF); 1488 | 1489 | parser->state = s_header_value_lws; 1490 | 1491 | switch (parser->header_state) { 1492 | case h_connection_keep_alive: 1493 | parser->flags |= F_CONNECTION_KEEP_ALIVE; 1494 | break; 1495 | case h_connection_close: 1496 | parser->flags |= F_CONNECTION_CLOSE; 1497 | break; 1498 | case h_transfer_encoding_chunked: 1499 | parser->flags |= F_CHUNKED; 1500 | break; 1501 | default: 1502 | break; 1503 | } 1504 | 1505 | break; 1506 | } 1507 | 1508 | case s_header_value_lws: 1509 | { 1510 | if (ch == ' ' || ch == '\t') 1511 | parser->state = s_header_value_start; 1512 | else 1513 | { 1514 | parser->state = s_header_field_start; 1515 | goto reexecute_byte; 1516 | } 1517 | break; 1518 | } 1519 | 1520 | case s_headers_almost_done: 1521 | { 1522 | STRICT_CHECK(ch != LF); 1523 | 1524 | if (parser->flags & F_TRAILING) { 1525 | /* End of a chunked request */ 1526 | parser->state = NEW_MESSAGE(); 1527 | CALLBACK_NOTIFY(message_complete); 1528 | break; 1529 | } 1530 | 1531 | parser->state = s_headers_done; 1532 | 1533 | /* Set this here so that on_headers_complete() callbacks can see it */ 1534 | parser->upgrade = 1535 | (parser->flags & F_UPGRADE || parser->method == HTTP_CONNECT); 1536 | 1537 | /* Here we call the headers_complete callback. This is somewhat 1538 | * different than other callbacks because if the user returns 1, we 1539 | * will interpret that as saying that this message has no body. This 1540 | * is needed for the annoying case of recieving a response to a HEAD 1541 | * request. 1542 | * 1543 | * We'd like to use CALLBACK_NOTIFY_NOADVANCE() here but we cannot, so 1544 | * we have to simulate it by handling a change in errno below. 1545 | */ 1546 | if (settings->on_headers_complete) { 1547 | switch (settings->on_headers_complete(parser)) { 1548 | case 0: 1549 | break; 1550 | 1551 | case 1: 1552 | parser->flags |= F_SKIPBODY; 1553 | break; 1554 | 1555 | default: 1556 | SET_ERRNO(HPE_CB_headers_complete); 1557 | return p - data; /* Error */ 1558 | } 1559 | } 1560 | 1561 | if (HTTP_PARSER_ERRNO(parser) != HPE_OK) { 1562 | return p - data; 1563 | } 1564 | 1565 | goto reexecute_byte; 1566 | } 1567 | 1568 | case s_headers_done: 1569 | { 1570 | STRICT_CHECK(ch != LF); 1571 | 1572 | parser->nread = 0; 1573 | 1574 | /* Exit, the rest of the connect is in a different protocol. */ 1575 | if (parser->upgrade) { 1576 | parser->state = NEW_MESSAGE(); 1577 | CALLBACK_NOTIFY(message_complete); 1578 | return (p - data) + 1; 1579 | } 1580 | 1581 | if (parser->flags & F_SKIPBODY) { 1582 | parser->state = NEW_MESSAGE(); 1583 | CALLBACK_NOTIFY(message_complete); 1584 | } else if (parser->flags & F_CHUNKED) { 1585 | /* chunked encoding - ignore Content-Length header */ 1586 | parser->state = s_chunk_size_start; 1587 | } else { 1588 | if (parser->content_length == 0) { 1589 | /* Content-Length header given but zero: Content-Length: 0\r\n */ 1590 | parser->state = NEW_MESSAGE(); 1591 | CALLBACK_NOTIFY(message_complete); 1592 | } else if (parser->content_length > 0) { 1593 | /* Content-Length header given and non-zero */ 1594 | parser->state = s_body_identity; 1595 | } else { 1596 | if (parser->type == HTTP_REQUEST || 1597 | !http_message_needs_eof(parser)) { 1598 | /* Assume content-length 0 - read the next */ 1599 | parser->state = NEW_MESSAGE(); 1600 | CALLBACK_NOTIFY(message_complete); 1601 | } else { 1602 | /* Read body until EOF */ 1603 | parser->state = s_body_identity_eof; 1604 | } 1605 | } 1606 | } 1607 | 1608 | break; 1609 | } 1610 | 1611 | case s_body_identity: 1612 | { 1613 | uint64_t to_read = MIN(parser->content_length, (data + len) - p); 1614 | 1615 | assert(parser->content_length > 0); 1616 | 1617 | /* The difference between advancing content_length and p is because 1618 | * the latter will automaticaly advance on the next loop iteration. 1619 | * Further, if content_length ends up at 0, we want to see the last 1620 | * byte again for our message complete callback. 1621 | */ 1622 | MARK(body); 1623 | parser->content_length -= to_read; 1624 | p += to_read - 1; 1625 | 1626 | if (parser->content_length == 0) { 1627 | parser->state = s_message_done; 1628 | 1629 | /* Mimic CALLBACK_DATA_NOADVANCE() but with one extra byte. 1630 | * 1631 | * The alternative to doing this is to wait for the next byte to 1632 | * trigger the data callback, just as in every other case. The 1633 | * problem with this is that this makes it difficult for the test 1634 | * harness to distinguish between complete-on-EOF and 1635 | * complete-on-length. It's not clear that this distinction is 1636 | * important for applications, but let's keep it for now. 1637 | */ 1638 | CALLBACK_DATA_(body, p - body_mark + 1, p - data); 1639 | goto reexecute_byte; 1640 | } 1641 | 1642 | break; 1643 | } 1644 | 1645 | /* read until EOF */ 1646 | case s_body_identity_eof: 1647 | MARK(body); 1648 | p = data + len - 1; 1649 | 1650 | break; 1651 | 1652 | case s_message_done: 1653 | parser->state = NEW_MESSAGE(); 1654 | CALLBACK_NOTIFY(message_complete); 1655 | break; 1656 | 1657 | case s_chunk_size_start: 1658 | { 1659 | assert(parser->nread == 1); 1660 | assert(parser->flags & F_CHUNKED); 1661 | 1662 | unhex_val = unhex[(unsigned char)ch]; 1663 | if (unhex_val == -1) { 1664 | SET_ERRNO(HPE_INVALID_CHUNK_SIZE); 1665 | goto error; 1666 | } 1667 | 1668 | parser->content_length = unhex_val; 1669 | parser->state = s_chunk_size; 1670 | break; 1671 | } 1672 | 1673 | case s_chunk_size: 1674 | { 1675 | assert(parser->flags & F_CHUNKED); 1676 | 1677 | if (ch == CR) { 1678 | parser->state = s_chunk_size_almost_done; 1679 | break; 1680 | } 1681 | 1682 | unhex_val = unhex[(unsigned char)ch]; 1683 | 1684 | if (unhex_val == -1) { 1685 | if (ch == ';' || ch == ' ') { 1686 | parser->state = s_chunk_parameters; 1687 | break; 1688 | } 1689 | 1690 | SET_ERRNO(HPE_INVALID_CHUNK_SIZE); 1691 | goto error; 1692 | } 1693 | 1694 | parser->content_length *= 16; 1695 | parser->content_length += unhex_val; 1696 | break; 1697 | } 1698 | 1699 | case s_chunk_parameters: 1700 | { 1701 | assert(parser->flags & F_CHUNKED); 1702 | /* just ignore this shit. TODO check for overflow */ 1703 | if (ch == CR) { 1704 | parser->state = s_chunk_size_almost_done; 1705 | break; 1706 | } 1707 | break; 1708 | } 1709 | 1710 | case s_chunk_size_almost_done: 1711 | { 1712 | assert(parser->flags & F_CHUNKED); 1713 | STRICT_CHECK(ch != LF); 1714 | 1715 | parser->nread = 0; 1716 | 1717 | if (parser->content_length == 0) { 1718 | parser->flags |= F_TRAILING; 1719 | parser->state = s_header_field_start; 1720 | } else { 1721 | parser->state = s_chunk_data; 1722 | } 1723 | break; 1724 | } 1725 | 1726 | case s_chunk_data: 1727 | { 1728 | uint64_t to_read = MIN(parser->content_length, (data + len) - p); 1729 | 1730 | assert(parser->flags & F_CHUNKED); 1731 | assert(parser->content_length > 0); 1732 | 1733 | /* See the explanation in s_body_identity for why the content 1734 | * length and data pointers are managed this way. 1735 | */ 1736 | MARK(body); 1737 | parser->content_length -= to_read; 1738 | p += to_read - 1; 1739 | 1740 | if (parser->content_length == 0) { 1741 | parser->state = s_chunk_data_almost_done; 1742 | } 1743 | 1744 | break; 1745 | } 1746 | 1747 | case s_chunk_data_almost_done: 1748 | assert(parser->flags & F_CHUNKED); 1749 | assert(parser->content_length == 0); 1750 | STRICT_CHECK(ch != CR); 1751 | parser->state = s_chunk_data_done; 1752 | CALLBACK_DATA(body); 1753 | break; 1754 | 1755 | case s_chunk_data_done: 1756 | assert(parser->flags & F_CHUNKED); 1757 | STRICT_CHECK(ch != LF); 1758 | parser->nread = 0; 1759 | parser->state = s_chunk_size_start; 1760 | break; 1761 | 1762 | default: 1763 | assert(0 && "unhandled state"); 1764 | SET_ERRNO(HPE_INVALID_INTERNAL_STATE); 1765 | goto error; 1766 | } 1767 | } 1768 | 1769 | /* Run callbacks for any marks that we have leftover after we ran our of 1770 | * bytes. There should be at most one of these set, so it's OK to invoke 1771 | * them in series (unset marks will not result in callbacks). 1772 | * 1773 | * We use the NOADVANCE() variety of callbacks here because 'p' has already 1774 | * overflowed 'data' and this allows us to correct for the off-by-one that 1775 | * we'd otherwise have (since CALLBACK_DATA() is meant to be run with a 'p' 1776 | * value that's in-bounds). 1777 | */ 1778 | 1779 | assert(((header_field_mark ? 1 : 0) + 1780 | (header_value_mark ? 1 : 0) + 1781 | (url_mark ? 1 : 0) + 1782 | (body_mark ? 1 : 0)) <= 1); 1783 | 1784 | CALLBACK_DATA_NOADVANCE(header_field); 1785 | CALLBACK_DATA_NOADVANCE(header_value); 1786 | CALLBACK_DATA_NOADVANCE(url); 1787 | CALLBACK_DATA_NOADVANCE(body); 1788 | 1789 | return len; 1790 | 1791 | error: 1792 | if (HTTP_PARSER_ERRNO(parser) == HPE_OK) { 1793 | SET_ERRNO(HPE_UNKNOWN); 1794 | } 1795 | 1796 | return (p - data); 1797 | } 1798 | 1799 | 1800 | /* Does the parser need to see an EOF to find the end of the message? */ 1801 | int 1802 | http_message_needs_eof (http_parser *parser) 1803 | { 1804 | if (parser->type == HTTP_REQUEST) { 1805 | return 0; 1806 | } 1807 | 1808 | /* See RFC 2616 section 4.4 */ 1809 | if (parser->status_code / 100 == 1 || /* 1xx e.g. Continue */ 1810 | parser->status_code == 204 || /* No Content */ 1811 | parser->status_code == 304 || /* Not Modified */ 1812 | parser->flags & F_SKIPBODY) { /* response to a HEAD request */ 1813 | return 0; 1814 | } 1815 | 1816 | if ((parser->flags & F_CHUNKED) || parser->content_length >= 0) { 1817 | return 0; 1818 | } 1819 | 1820 | return 1; 1821 | } 1822 | 1823 | 1824 | int 1825 | http_should_keep_alive (http_parser *parser) 1826 | { 1827 | if (parser->http_major > 0 && parser->http_minor > 0) { 1828 | /* HTTP/1.1 */ 1829 | if (parser->flags & F_CONNECTION_CLOSE) { 1830 | return 0; 1831 | } 1832 | } else { 1833 | /* HTTP/1.0 or earlier */ 1834 | if (!(parser->flags & F_CONNECTION_KEEP_ALIVE)) { 1835 | return 0; 1836 | } 1837 | } 1838 | 1839 | return !http_message_needs_eof(parser); 1840 | } 1841 | 1842 | 1843 | const char * http_method_str (enum http_method m) 1844 | { 1845 | return method_strings[m]; 1846 | } 1847 | 1848 | 1849 | void 1850 | http_parser_init (http_parser *parser, enum http_parser_type t) 1851 | { 1852 | parser->type = t; 1853 | parser->state = (t == HTTP_REQUEST ? s_start_req : (t == HTTP_RESPONSE ? s_start_res : s_start_req_or_res)); 1854 | parser->nread = 0; 1855 | parser->upgrade = 0; 1856 | parser->flags = 0; 1857 | parser->method = 0; 1858 | parser->http_errno = HPE_OK; 1859 | } 1860 | 1861 | const char * 1862 | http_errno_name(enum http_errno err) { 1863 | assert(err < (sizeof(http_strerror_tab)/sizeof(http_strerror_tab[0]))); 1864 | return http_strerror_tab[err].name; 1865 | } 1866 | 1867 | const char * 1868 | http_errno_description(enum http_errno err) { 1869 | assert(err < (sizeof(http_strerror_tab)/sizeof(http_strerror_tab[0]))); 1870 | return http_strerror_tab[err].description; 1871 | } 1872 | 1873 | int 1874 | http_parser_parse_url(const char *buf, size_t buflen, int is_connect, 1875 | struct http_parser_url *u) 1876 | { 1877 | enum state s; 1878 | const char *p; 1879 | enum http_parser_url_fields uf, old_uf; 1880 | 1881 | u->port = u->field_set = 0; 1882 | s = s_req_spaces_before_url; 1883 | uf = old_uf = UF_MAX; 1884 | 1885 | for (p = buf; p < buf + buflen; p++) { 1886 | if ((s = parse_url_char(s, *p, is_connect)) == s_dead) { 1887 | return 1; 1888 | } 1889 | 1890 | /* Figure out the next field that we're operating on */ 1891 | switch (s) { 1892 | case s_req_schema: 1893 | case s_req_schema_slash: 1894 | case s_req_schema_slash_slash: 1895 | uf = UF_SCHEMA; 1896 | break; 1897 | 1898 | case s_req_host: 1899 | uf = UF_HOST; 1900 | break; 1901 | 1902 | case s_req_port: 1903 | uf = UF_PORT; 1904 | break; 1905 | 1906 | case s_req_path: 1907 | uf = UF_PATH; 1908 | break; 1909 | 1910 | case s_req_query_string_start: 1911 | case s_req_query_string: 1912 | uf = UF_QUERY; 1913 | break; 1914 | 1915 | case s_req_fragment_start: 1916 | case s_req_fragment: 1917 | uf = UF_FRAGMENT; 1918 | break; 1919 | 1920 | default: 1921 | assert(!"Unexpected state"); 1922 | return 1; 1923 | } 1924 | 1925 | /* Nothing's changed; soldier on */ 1926 | if (uf == old_uf) { 1927 | u->field_data[uf].len++; 1928 | continue; 1929 | } 1930 | 1931 | /* We ignore the first character in some fields; without this, we end up 1932 | * with the query being "?foo=bar" rather than "foo=bar". Callers probably 1933 | * don't want this. 1934 | */ 1935 | switch (uf) { 1936 | case UF_QUERY: 1937 | case UF_FRAGMENT: 1938 | case UF_PORT: 1939 | u->field_data[uf].off = p - buf + 1; 1940 | u->field_data[uf].len = 0; 1941 | break; 1942 | 1943 | default: 1944 | u->field_data[uf].off = p - buf; 1945 | u->field_data[uf].len = 1; 1946 | break; 1947 | } 1948 | 1949 | u->field_set |= (1 << uf); 1950 | old_uf = uf; 1951 | } 1952 | 1953 | if (u->field_set & (1 << UF_PORT)) { 1954 | /* Don't bother with endp; we've already validated the string */ 1955 | unsigned long v = strtoul(buf + u->field_data[UF_PORT].off, NULL, 10); 1956 | 1957 | /* Ports have a max value of 2^16 */ 1958 | if (v > 0xffff) { 1959 | return 1; 1960 | } 1961 | 1962 | u->port = (uint16_t) v; 1963 | } 1964 | 1965 | return 0; 1966 | } 1967 | 1968 | void 1969 | http_parser_pause(http_parser *parser, int paused) { 1970 | /* Users should only be pausing/unpausing a parser that is not in an error 1971 | * state. In non-debug builds, there's not much that we can do about this 1972 | * other than ignore it. 1973 | */ 1974 | if (HTTP_PARSER_ERRNO(parser) == HPE_OK || 1975 | HTTP_PARSER_ERRNO(parser) == HPE_PAUSED) { 1976 | SET_ERRNO((paused) ? HPE_PAUSED : HPE_OK); 1977 | } else { 1978 | assert(0 && "Attempting to pause parser in error state"); 1979 | } 1980 | } 1981 | --------------------------------------------------------------------------------