├── debian
    ├── compat
    ├── pyversions
    ├── source
    │   └── format
    ├── clean
    ├── changelog
    ├── rules
    ├── python-http-parser.preinst
    ├── control
    ├── watch
    └── copyright
├── THANKS
├── MANIFEST.in
├── .gitignore
├── http_parser
    ├── __init__.py
    ├── http_parser.gyp
    ├── pyversion_compat.h
    ├── reader.py
    ├── http.py
    ├── util.py
    ├── parser.pyx
    ├── http_parser.h
    ├── pyparser.py
    ├── py25.py
    └── http_parser.c
├── examples
    ├── httpstream.py
    └── httpparser.py
├── LICENSE
├── README.rst
├── NOTICE
└── setup.py


/debian/compat:
--------------------------------------------------------------------------------
1 | 7
2 | 


--------------------------------------------------------------------------------
/debian/pyversions:
--------------------------------------------------------------------------------
1 | 2.5-
2 | 


--------------------------------------------------------------------------------
/debian/source/format:
--------------------------------------------------------------------------------
1 | 3.0 (native)
2 | 


--------------------------------------------------------------------------------
/debian/clean:
--------------------------------------------------------------------------------
1 | http-parser.egg-info/*
2 | 


--------------------------------------------------------------------------------
/THANKS:
--------------------------------------------------------------------------------
1 | Benoit Calvez <benoit@litchis.org>
2 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include .gitignore
2 | include LICENSE
3 | include NOTICE
4 | include README.rst
5 | include THANKS
6 | recursive-include http_parser *
7 | recursive-include examples *
8 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | *.gem
 2 | *.swp
 3 | *.pyc
 4 | *.pyo
 5 | *#*
 6 | *.sw*
 7 | build
 8 | dist
 9 | setuptools-*
10 | .svn/*
11 | .DS_Store
12 | *.so
13 | http_parser.egg-info
14 | nohup.out
15 | .coverage
16 | doc/.sass-cache
17 | 


--------------------------------------------------------------------------------
/http_parser/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -
2 | #
3 | # This file is part of http_parser released under the MIT license.
4 | # See the NOTICE for more information.
5 | 
6 | version_info = (0, 7, 5)
7 | __version__ = ".".join(map(str, version_info))
8 | 


--------------------------------------------------------------------------------
/debian/changelog:
--------------------------------------------------------------------------------
 1 | python-http-parser (0.6.0-1) unstable; urgency=low
 2 | 
 3 |   * bump version.
 4 | 
 5 |  -- Benoit Chesneau <benoitc@e-engura.org>  Mon, 20 Jun 2011 17:20:00 +0100
 6 | 
 7 | python-http-parser (0.5.4-1) unstable; urgency=low
 8 | 
 9 |   * bump version.
10 | 
11 |  -- Benoit Chesneau <benoitc@e-engura.org>  Mon, 20 Jun 2011 15:52:00 +0100
12 | 


--------------------------------------------------------------------------------
/debian/rules:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/make -f
 2 | # -*- makefile -*-
 3 | # Sample debian/rules that uses debhelper.
 4 | # This file was originally written by Joey Hess and Craig Small.
 5 | # As a special exception, when this file is copied by dh-make into a
 6 | # dh-make output file, you may use that output file without restriction.
 7 | # This special exception was added by Craig Small in version 0.37 of dh-make.
 8 | 
 9 | # Uncomment this to turn on verbose mode.
10 | # export DH_VERBOSE=1
11 | 
12 | %:
13 | 	dh  $@
14 | 


--------------------------------------------------------------------------------
/debian/python-http-parser.preinst:
--------------------------------------------------------------------------------
 1 | #! /bin/sh
 2 | 
 3 | set -e
 4 | 
 5 | # This was added by stdeb to workaround Debian #479852. In a nutshell,
 6 | # pycentral does not remove normally remove its symlinks on an
 7 | # upgrade. Since we're using python-support, however, those symlinks
 8 | # will be broken. This tells python-central to clean up any symlinks.
 9 | if [ -e /var/lib/dpkg/info/python-http-parser.list ] && which pycentral >/dev/null 2>&1
10 | then
11 |     pycentral pkgremove python-http-parser
12 | fi
13 | 
14 | #DEBHELPER#
15 | 


--------------------------------------------------------------------------------
/debian/control:
--------------------------------------------------------------------------------
 1 | Source: python-http-parser
 2 | Section: python
 3 | Priority: optional
 4 | Maintainer: Benoit Chesneau <benoitc@e-engura.com>
 5 | Build-Depends: debhelper (>= 7), python-support, python-setuptools
 6 | Standards-Version: 3.9.0.0
 7 | Homepage: http://github.com/benoitc/http-parser
 8 | 
 9 | Package: python-http-parser
10 | Architecture: all
11 | Depends: ${python:Depends}, ${shlibs:Depends}, ${misc:Depends}
12 | Provides: ${python:Provides}
13 | Description: Python http request/response parser
14 |  HTTP request/response parser for Python in C under MIT License, based on
15 |  http-parser from Ryan Dahl.
16 | 


--------------------------------------------------------------------------------
/examples/httpstream.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | import socket
 3 | 
 4 | from http_parser.http import HttpStream
 5 | from http_parser.reader import SocketReader
 6 | 
 7 | from http_parser.util import b
 8 | 
 9 | def main():
10 |     s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
11 |     try:
12 |         s.connect(('gunicorn.org', 80))
13 |         s.send(b("GET / HTTP/1.1\r\nHost: gunicorn.org\r\n\r\n"))
14 |         p = HttpStream(SocketReader(s))
15 |         print(p.headers())
16 | 
17 |         print(p.body_file().read())
18 |     finally:
19 |         s.close()
20 | 
21 | if __name__ == "__main__":
22 |     main()
23 | 
24 | 
25 | 


--------------------------------------------------------------------------------
/debian/watch:
--------------------------------------------------------------------------------
 1 | # Example watch control file for uscan
 2 | # Rename this file to "watch" and then you can run the "uscan" command
 3 | # to check for upstream updates and more.
 4 | # See uscan(1) for format
 5 | 
 6 | # Compulsory line, this is a version 3 file
 7 | version=3
 8 | 
 9 | # Uncomment to examine a Webpage
10 | # <Webpage URL> <string match>
11 | #http://www.example.com/downloads.php python-couchdbkit-(.*)\.tar\.gz
12 | opts=dversionmangle=s/\+dfsg$// \
13 |   http://pypi.python.org/packages/source/c/http-parser/http-parser-(.*).tar.gz
14 | #  http://github.com/benoitc/couchdbkit/downloads/ /benoitc/couchdbkit/tarball/([0-9].*)
15 | 
16 | # Uncomment to examine a Webserver directory
17 | #http://www.example.com/pub/python-couchdbkit-(.*)\.tar\.gz
18 | 
19 | # Uncommment to examine a FTP server
20 | #ftp://ftp.example.com/pub/python-couchdbkit-(.*)\.tar\.gz debian uupdate
21 | 
22 | # Uncomment to find new files on sourceforge, for devscripts >= 2.9
23 | # http://sf.net/python-couchdbkit/python-couchdbkit-(.*)\.tar\.gz
24 | 
25 | # Uncomment to find new files on GooglePages
26 | # http://example.googlepages.com/foo.html python-couchdbkit-(.*)\.tar\.gz
27 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | 2011,2012 (c) Benoît Chesneau <benoitc@e-engura.org>
 2 | 
 3 | Permission is hereby granted, free of charge, to any person
 4 | obtaining a copy of this software and associated documentation
 5 | files (the "Software"), to deal in the Software without
 6 | restriction, including without limitation the rights to use,
 7 | copy, modify, merge, publish, distribute, sublicense, and/or sell
 8 | copies of the Software, and to permit persons to whom the
 9 | Software is furnished to do so, subject to the following
10 | conditions:
11 | 
12 | The above copyright notice and this permission notice shall be
13 | included in all copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
17 | OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
19 | HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
20 | WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 | OTHER DEALINGS IN THE SOFTWARE.
23 | 


--------------------------------------------------------------------------------
/debian/copyright:
--------------------------------------------------------------------------------
 1 | 2011 (c) Benoît Chesneau <benoitc@e-engura.org>
 2 | 
 3 | Permission is hereby granted, free of charge, to any person
 4 | obtaining a copy of this software and associated documentation
 5 | files (the "Software"), to deal in the Software without
 6 | restriction, including without limitation the rights to use,
 7 | copy, modify, merge, publish, distribute, sublicense, and/or sell
 8 | copies of the Software, and to permit persons to whom the
 9 | Software is furnished to do so, subject to the following
10 | conditions:
11 | 
12 | The above copyright notice and this permission notice shall be
13 | included in all copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
17 | OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
19 | HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
20 | WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 | OTHER DEALINGS IN THE SOFTWARE.
23 | 
24 | 


--------------------------------------------------------------------------------
/examples/httpparser.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | import socket
 3 | 
 4 | try:
 5 |     from http_parser.parser import HttpParser
 6 | except ImportError:
 7 |     from http_parser.pyparser import HttpParser
 8 | from http_parser.util import b
 9 | 
10 | def main():
11 | 
12 |     p = HttpParser()
13 |     s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
14 |     body = []
15 |     header_done = False
16 |     try:
17 |         s.connect(('gunicorn.org', 80))
18 |         s.send(b("GET / HTTP/1.1\r\nHost: gunicorn.org\r\n\r\n"))
19 |         
20 |         while True:
21 |             data = s.recv(1024)
22 |             if not data:
23 |                 break
24 | 
25 |             recved = len(data)
26 |             nparsed = p.execute(data, recved)
27 |             assert nparsed == recved
28 | 
29 |             if p.is_headers_complete() and not header_done:
30 |                 print(p.get_headers())
31 |                 print(p.get_headers()['content-length'])
32 |                 header_done = True
33 | 
34 |             if p.is_partial_body():
35 |                 body.append(p.recv_body())
36 | 
37 |             if p.is_message_complete():
38 |                 break
39 | 
40 |         print(b("").join(body))
41 |     
42 |     finally:
43 |         s.close()
44 | 
45 | if __name__ == "__main__":
46 |     main()
47 | 
48 | 
49 | 


--------------------------------------------------------------------------------
/http_parser/http_parser.gyp:
--------------------------------------------------------------------------------
 1 | # This file is used with the GYP meta build system.
 2 | # http://code.google.com/p/gyp/
 3 | # To build try this:
 4 | #   svn co http://gyp.googlecode.com/svn/trunk gyp
 5 | #   ./gyp/gyp -f make --depth=`pwd` http_parser.gyp 
 6 | #   ./out/Debug/test 
 7 | {
 8 |   'target_defaults': {
 9 |     'default_configuration': 'Debug',
10 |     'configurations': {
11 |       # TODO: hoist these out and put them somewhere common, because
12 |       #       RuntimeLibrary MUST MATCH across the entire project
13 |       'Debug': {
14 |         'defines': [ 'DEBUG', '_DEBUG' ],
15 |         'msvs_settings': {
16 |           'VCCLCompilerTool': {
17 |             'RuntimeLibrary': 1, # static debug
18 |           },
19 |         },
20 |       },
21 |       'Release': {
22 |         'defines': [ 'NDEBUG' ],
23 |         'msvs_settings': {
24 |           'VCCLCompilerTool': {
25 |             'RuntimeLibrary': 0, # static release
26 |           },
27 |         },
28 |       }
29 |     },
30 |     'msvs_settings': {
31 |       'VCCLCompilerTool': {
32 |       },
33 |       'VCLibrarianTool': {
34 |       },
35 |       'VCLinkerTool': {
36 |         'GenerateDebugInformation': 'true',
37 |       },
38 |     },
39 |     'conditions': [
40 |       ['OS == "win"', {
41 |         'defines': [
42 |           'WIN32'
43 |         ],
44 |       }]
45 |     ],
46 |   },
47 | 
48 |   'targets': [
49 |     {
50 |       'target_name': 'http_parser',
51 |       'type': 'static_library',
52 |       'include_dirs': [ '.' ],
53 |       'direct_dependent_settings': {
54 |         'include_dirs': [ '.' ],
55 |       },
56 |       'defines': [ 'HTTP_PARSER_STRICT=0' ],
57 |       'sources': [ './http_parser.c', ],
58 |       'conditions': [
59 |         ['OS=="win"', {
60 |           'msvs_settings': {
61 |             'VCCLCompilerTool': {
62 |               # Compile as C++. http_parser.c is actually C99, but C++ is
63 |               # close enough in this case.
64 |               'CompileAs': 2,
65 |             },
66 |           },
67 |         }]
68 |       ],
69 |     },
70 | 
71 |     {
72 |       'target_name': 'test',
73 |       'type': 'executable',
74 |       'dependencies': [ 'http_parser' ],
75 |       'sources': [ 'test.c' ]
76 |     }
77 |   ]
78 | }
79 | 
80 | 


--------------------------------------------------------------------------------
/http_parser/pyversion_compat.h:
--------------------------------------------------------------------------------
 1 | #include "Python.h"
 2 | 
 3 | #if PY_VERSION_HEX < 0x02070000
 4 |     #if PY_VERSION_HEX < 0x02060000
 5 |         #define PyObject_CheckBuffer(object) (0)
 6 | 
 7 |         #define PyObject_GetBuffer(obj, view, flags) (PyErr_SetString(PyExc_NotImplementedError, \
 8 |                         "new buffer interface is not available"), -1)
 9 |         #define PyBuffer_FillInfo(view, obj, buf, len, readonly, flags) (PyErr_SetString(PyExc_NotImplementedError, \
10 |                     "new buffer interface is not available"), -1)
11 |         #define PyBuffer_Release(obj) (PyErr_SetString(PyExc_NotImplementedError, \
12 |                         "new buffer interface is not available"), -1)
13 |         // Bytes->String
14 |         #define PyBytes_FromStringAndSize PyString_FromStringAndSize
15 |         #define PyBytes_FromString PyString_FromString
16 |         #define PyBytes_AsString PyString_AsString
17 |         #define PyBytes_Size PyString_Size
18 |     #endif
19 | 
20 |     #define PyMemoryView_FromBuffer(info) (PyErr_SetString(PyExc_NotImplementedError, \
21 |                     "new buffer interface is not available"), (PyObject *)NULL)
22 |     #define PyMemoryView_FromObject(object)     (PyErr_SetString(PyExc_NotImplementedError, \
23 |                                         "new buffer interface is not available"), (PyObject *)NULL)
24 | #endif
25 | 
26 | #if PY_VERSION_HEX >= 0x03000000
27 |     // for buffers
28 |     #define Py_END_OF_BUFFER ((Py_ssize_t) 0)
29 | 
30 |     #define PyObject_CheckReadBuffer(object) (0)
31 | 
32 |     #define PyBuffer_FromMemory(ptr, s) (PyErr_SetString(PyExc_NotImplementedError, \
33 |                             "old buffer interface is not available"), (PyObject *)NULL)
34 |     #define PyBuffer_FromReadWriteMemory(ptr, s) (PyErr_SetString(PyExc_NotImplementedError, \
35 |                             "old buffer interface is not available"), (PyObject *)NULL)
36 |     #define PyBuffer_FromObject(object, offset, size)  (PyErr_SetString(PyExc_NotImplementedError, \
37 |                             "old buffer interface is not available"), (PyObject *)NULL)
38 |     #define PyBuffer_FromReadWriteObject(object, offset, size)  (PyErr_SetString(PyExc_NotImplementedError, \
39 |                             "old buffer interface is not available"), (PyObject *)NULL)
40 | 
41 | #endif
42 | 
43 | 


--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
  1 | http-parser
  2 | -----------
  3 | 
  4 | HTTP request/response parser for Python compatible with Python 2.x
  5 | (>=2.5.4), Python 3 and Pypy. If possible a C parser based on
  6 | http-parser_ from Ryan Dahl will be used. 
  7 | 
  8 | http-parser is under the MIT license.
  9 | 
 10 | Project url: https://github.com/benoitc/http-parser/
 11 | 
 12 | Requirements:
 13 | -------------
 14 | 
 15 | - Python 2.5 or sup. Pypy latest version.
 16 | - Cython if you need to rebuild the C code (Not needed for Pypy)
 17 | 
 18 | Installation
 19 | ------------
 20 | 
 21 | ::
 22 | 
 23 |     $ pip install http-parser
 24 | 
 25 | Or install from source::
 26 | 
 27 |     $ git clone git://github.com/benoitc/http-parser.git
 28 |     $ cd http-parser && python setup.py install
 29 | 
 30 | 
 31 | Note: if you get an error on MacOSX try to install with the following
 32 | arguments:
 33 | 
 34 |     $ env ARCHFLAGS="-arch i386 -arch x86_64" python setup.py install
 35 | 
 36 | Usage
 37 | -----
 38 | 
 39 | http-parser provide you **parser.HttpParser** low-level parser in C that
 40 | you can access in your python program and **http.HttpStream** providing
 41 | higher-level access to a readable,sequential io.RawIOBase object.
 42 | 
 43 | To help you in your day work, http-parser provides you 3 kind of readers
 44 | in the reader module: IterReader to read iterables, StringReader to
 45 | reads strings and StringIO objects, SocketReader to read sockets or
 46 | objects with the same api (recv_into needed). You can of course use any
 47 | io.RawIOBase object.
 48 | 
 49 | Example of HttpStream
 50 | +++++++++++++++++++++
 51 | 
 52 | ex::
 53 |     
 54 |     #!/usr/bin/env python
 55 |     import socket
 56 | 
 57 |     from http_parser.http import HttpStream
 58 |     from http_parser.reader import SocketReader
 59 | 
 60 |     def main():
 61 |         s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
 62 |         try:
 63 |             s.connect(('gunicorn.org', 80))
 64 |             s.send("GET / HTTP/1.1\r\nHost: gunicorn.org\r\n\r\n")
 65 |             r = SocketReader(s)
 66 |             p = HttpStream(r)
 67 |             print p.headers()
 68 |             print p.body_file().read()
 69 |         finally:
 70 |             s.close()
 71 | 
 72 |     if __name__ == "__main__":
 73 |         main()
 74 | 
 75 | Example of HttpParser:
 76 | ++++++++++++++++++++++
 77 | 
 78 | ::
 79 |     
 80 |     #!/usr/bin/env python
 81 |     import socket
 82 | 
 83 |     # try to import C parser then fallback in pure python parser.
 84 |     try:
 85 |         from http_parser.parser import HttpParser
 86 |     except ImportError:
 87 |         from http_parser.pyparser import HttpParser
 88 | 
 89 | 
 90 |     def main():
 91 | 
 92 |         p = HttpParser()
 93 |         s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
 94 |         body = []
 95 |         try:
 96 |             s.connect(('gunicorn.org', 80))
 97 |             s.send("GET / HTTP/1.1\r\nHost: gunicorn.org\r\n\r\n")
 98 |             
 99 |             while True:
100 |                 data = s.recv(1024)
101 |                 if not data:
102 |                     break
103 | 
104 |                 recved = len(data)
105 |                 nparsed = p.execute(data, recved)
106 |                 assert nparsed == recved
107 | 
108 |                 if p.is_headers_complete():
109 |                     print p.get_headers()
110 | 
111 |                 if p.is_partial_body():
112 |                     body.append(p.recv_body())
113 | 
114 |                 if p.is_message_complete():
115 |                     break
116 | 
117 |             print "".join(body)
118 |         
119 |         finally:
120 |             s.close()
121 | 
122 |     if __name__ == "__main__":
123 |         main()
124 | 
125 | 
126 | You can find more docs in the code (or use a doc generator).
127 | 
128 | 
129 | Copyright
130 | ---------
131 | 
132 | 2011,2012 (c) Benoît Chesneau <benoitc@e-engura.org>
133 | 
134 | 
135 | .. http-parser_ https://github.com/ry/http-parser
136 | 


--------------------------------------------------------------------------------
/NOTICE:
--------------------------------------------------------------------------------
  1 | http-parser
  2 | 2011,2012 (c) Benoît Chesneau <benoitc@e-engura.org>
  3 | 
  4 | http-parser is released under the MIT license. See the LICENSE
  5 | file for the complete license.
  6 | 
  7 | 
  8 | http-parser.c, http-parser.h under MIT license
  9 | ----------------------------------------------
 10 | 
 11 | Copyright Joyent, Inc. and other Node contributors. All rights reserved.
 12 | 
 13 | Permission is hereby granted, free of charge, to any person obtaining a
 14 | copy of this software and associated documentation files (the
 15 | "Software"), to deal in the Software without restriction, including
 16 | without limitation the rights to use, copy, modify, merge, publish,
 17 | distribute, sublicense, and/or sell copies of the Software, and to
 18 | permit persons to whom the Software is furnished to do so, subject to
 19 | the following conditions:
 20 | 
 21 | The above copyright notice and this permission notice shall be included
 22 | in all copies or substantial portions of the Software.
 23 | 
 24 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
 25 | OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 26 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 27 | IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
 28 | CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 29 | TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
 30 | SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 31 | 
 32 | 
 33 | 
 34 | setup.py my_build_ext function under MIT License
 35 | ------------------------------------------------
 36 | 
 37 | Copyright Denis Bilenko and the contributors, http://www.gevent.org
 38 | 
 39 | Permission is hereby granted, free of charge, to any person obtaining a
 40 | copy of this software and associated documentation files (the
 41 | "Software"), to deal in the Software without restriction, including
 42 | without limitation the rights to use, copy, modify, merge, publish,
 43 | distribute, sublicense, and/or sell copies of the Software, and to
 44 | permit persons to whom the Software is furnished to do so, subject to
 45 | the following conditions:
 46 | 
 47 | The above copyright notice and this permission notice shall be included
 48 | in all copies or substantial portions of the Software.
 49 | 
 50 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
 51 | OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 52 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 53 | IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
 54 | CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 55 | TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
 56 | SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 57 | 
 58 | 
 59 | util.py - IOrderedDict
 60 | ----------------------
 61 | 
 62 | IOrderedDict is based on collections.OrderedDict module, with
 63 | insensitive key search support.
 64 | 
 65 | Under PSF license.
 66 | 
 67 | Copyright © 2001-2010 Python Software Foundation; All Rights Reserved
 68 | 
 69 | This LICENSE AGREEMENT is between the Python Software Foundation
 70 | (“PSF”), and the Individual or Organization (“Licensee”) accessing and
 71 | otherwise using Python 2.7.1 software in source or binary form and its
 72 | associated documentation.
 73 | 
 74 | Subject to the terms and conditions of this License Agreement, PSF
 75 | hereby grants Licensee a nonexclusive, royalty-free, world-wide license
 76 | to reproduce, analyze, test, perform and/or display publicly, prepare
 77 | derivative works, distribute, and otherwise use Python 2.7.1 alone or in
 78 | any derivative version, provided, however, that PSF’s License Agreement
 79 | and PSF’s notice of copyright, i.e., “Copyright © 2001-2010 Python
 80 | Software Foundation; All Rights Reserved” are retained in Python 2.7.1
 81 | alone or in any derivative version prepared by Licensee.
 82 | 
 83 | In the event Licensee prepares a derivative work that is based on or
 84 | incorporates Python 2.7.1 or any part thereof, and wants to make the
 85 | derivative work available to others as provided herein, then Licensee
 86 | hereby agrees to include in any such work a brief summary of the changes
 87 | made to Python 2.7.1.
 88 | 
 89 | PSF is making Python 2.7.1 available to Licensee on an “AS IS” basis.
 90 | PSF MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR IMPLIED. BY WAY
 91 | OF EXAMPLE, BUT NOT LIMITATION, PSF MAKES NO AND DISCLAIMS ANY
 92 | REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS FOR ANY
 93 | PARTICULAR PURPOSE OR THAT THE USE OF PYTHON 2.7.1 WILL NOT INFRINGE ANY
 94 | THIRD PARTY RIGHTS.
 95 | 
 96 | PSF SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYTHON 2.7.1
 97 | FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS A
 98 | RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYTHON 2.7.1, OR
 99 | ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF.
100 | 
101 | This License Agreement will automatically terminate upon a material
102 | breach of its terms and conditions.
103 | 
104 | Nothing in this License Agreement shall be deemed to create any
105 | relationship of agency, partnership, or joint venture between PSF and
106 | Licensee. This License Agreement does not grant permission to use PSF
107 | trademarks or trade name in a trademark sense to endorse or promote
108 | products or services of Licensee, or any third party.
109 | 
110 | By copying, installing or otherwise using Python 2.7.1, Licensee agrees
111 | to be bound by the terms and conditions of this License Agreement.
112 | 
113 | 
114 | py25.IOBase, py25.RawIOBase, py25.BufferedReader, py25.TextIOWrapper:
115 | ---------------------------------------------------------------------
116 | 
117 | Partial implementation of io classes from python 2.7. Only read
118 | functions have been ported.
119 | 
120 | 
121 | Under PSF license.
122 | 
123 | Copyright © 2001-2010 Python Software Foundation; All Rights Reserved
124 | 
125 | 
126 | 


--------------------------------------------------------------------------------
/http_parser/reader.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -
  2 | #
  3 | # This file is part of http-parser released under the MIT license.
  4 | 
  5 | # See the NOTICE for more information.
  6 | 
  7 | from errno import EINTR, EAGAIN, EWOULDBLOCK
  8 | import socket
  9 | import sys
 10 | import types
 11 | try:
 12 |     from cStringIO import StringIO
 13 | except ImportError:
 14 |     from StringIO import StringIO
 15 | 
 16 | try:
 17 |     from io import DEFAULT_BUFFER_SIZE, RawIOBase
 18 | except ImportError:
 19 |     from http_parser.py25 import DEFAULT_BUFFER_SIZE, RawIOBase
 20 | 
 21 | try:
 22 |     bytes
 23 |     bytearray
 24 | except (NameError, AttributeError):
 25 |     # python < 2.6
 26 |     from py25 import bytes, bytearray
 27 | 
 28 | 
 29 | _blocking_errnos = ( EAGAIN, EWOULDBLOCK )
 30 | 
 31 | if sys.version_info < (2, 7, 0, 'final'):
 32 |     # in python 2.6 socket.recv_into doesn't support bytesarray
 33 |     import array
 34 |     def _readinto(sock, b):
 35 |         l = max(len(b), DEFAULT_BUFFER_SIZE)
 36 |         while True:
 37 |             try:
 38 |                 buf = sock.recv(l)
 39 |                 recved = len(buf)
 40 |                 b[0:recved] = buf
 41 |                 return recved
 42 |             except socket.error, e:
 43 |                 n = e.args[0]
 44 |                 if n == EINTR:
 45 |                     continue
 46 |                 if n in _blocking_errnos:
 47 |                     return None
 48 |                 raise
 49 | else:
 50 |     _readinto = None
 51 | 
 52 | class HttpBodyReader(RawIOBase):
 53 |     """ Raw implementation to stream http body """
 54 | 
 55 |     def __init__(self, http_stream):
 56 |         self.http_stream = http_stream
 57 |         self.eof = False
 58 | 
 59 |     def readinto(self, b):
 60 |         if self.http_stream.parser.is_message_complete() or self.eof:
 61 |             if  self.http_stream.parser.is_partial_body():
 62 |                 return self.http_stream.parser.recv_body_into(b)
 63 |             return 0
 64 | 
 65 |         self._checkReadable()
 66 |         try:
 67 |             self._checkClosed()
 68 |         except AttributeError:
 69 |             pass
 70 | 
 71 |         while True:
 72 |             buf = bytearray(DEFAULT_BUFFER_SIZE)
 73 |             recved = self.http_stream.stream.readinto(buf)
 74 |             if recved is None:
 75 |                 break
 76 | 
 77 |             del buf[recved:]
 78 |             nparsed = self.http_stream.parser.execute(bytes(buf), recved)
 79 |             if nparsed != recved:
 80 |                 return None
 81 | 
 82 |             if self.http_stream.parser.is_partial_body() or recved == 0:
 83 |                 break
 84 |             elif self.http_stream.parser.is_message_complete():
 85 |                 break
 86 | 
 87 |         if not self.http_stream.parser.is_partial_body():
 88 |             self.eof = True
 89 |             b = bytes('')
 90 |             return len(b)
 91 | 
 92 |         return self.http_stream.parser.recv_body_into(b)
 93 | 
 94 |     def readable(self):
 95 |         return not self.closed or self.http_stream.parser.is_partial_body()
 96 | 
 97 |     def close(self):
 98 |         if self.closed:
 99 |             return
100 |         RawIOBase.close(self)
101 |         self.http_stream = None
102 | 
103 | class IterReader(RawIOBase):
104 |     """ A raw reader implementation for iterable """
105 |     def __init__(self, iterable):
106 |         self.iter = iter(iterable)
107 |         self._buffer = ""
108 | 
109 |     def readinto(self, b):
110 |         self._checkClosed()
111 |         self._checkReadable()
112 | 
113 |         l = len(b)
114 |         try:
115 |             chunk = self.iter.next()
116 |             self._buffer += chunk
117 |             m = min(len(self._buffer), l)
118 |             data, self._buffer = self._buffer[:m], self._buffer[m:]
119 |             b[0:m] = data
120 |             return len(data)
121 |         except StopIteration:
122 |             del b[0:]
123 |             return 0
124 | 
125 |     def readable(self):
126 |         return not self.closed
127 | 
128 |     def close(self):
129 |         if self.closed:
130 |             return
131 |         RawIOBase.close(self)
132 |         self.iter = None
133 | 
134 | class StringReader(IterReader):
135 |     """ a raw reader for strings or StringIO.StringIO,
136 |     cStringIO.StringIO objects """
137 | 
138 |     def __init__(self, string):
139 |         if isinstance(string, types.StringTypes):
140 |             iterable = StringIO(string)
141 |         else:
142 |             iterable = string
143 |         IterReader.__init__(self, iterable)
144 | 
145 | 
146 | 
147 | 
148 | class SocketReader(RawIOBase):
149 |     """ a raw reader for sockets or socket like interface. based
150 |     on SocketIO object from python3.2 """
151 | 
152 |     def __init__(self, sock):
153 |         RawIOBase.__init__(self)
154 |         self._sock = sock
155 | 
156 |     if _readinto is not None:
157 |         def readinto(self, b):
158 |             try:
159 |                 self._checkClosed()
160 |             except AttributeError:
161 |                 pass
162 |             self._checkReadable()
163 |             return _readinto(self._sock, b)
164 | 
165 |     else:
166 |         def readinto(self, b):
167 |             try:
168 |                 self._checkClosed()
169 |             except AttributeError:
170 |                 pass
171 |             self._checkReadable()
172 | 
173 |             while True:
174 |                 try:
175 |                     return self._sock.recv_into(b)
176 |                 except socket.error, e:
177 |                     n = e.args[0]
178 |                     if n == EINTR:
179 |                         continue
180 |                     if n in _blocking_errnos:
181 |                         return None
182 |                     raise
183 | 
184 |     def readable(self):
185 |         """True if the SocketIO is open for reading.
186 |         """
187 |         return not self.closed
188 | 
189 |     def fileno(self):
190 |         """Return the file descriptor of the underlying socket.
191 |         """
192 |         self._checkClosed()
193 |         return self._sock.fileno()
194 | 
195 |     def close(self):
196 |         """Close the SocketIO object. This doesn't close the underlying
197 |         socket, except if all references to it have disappeared.
198 |         """
199 |         if self.closed:
200 |             return
201 |         RawIOBase.close(self)
202 |         self._sock = None
203 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -
  2 | #
  3 | # This file is part of http-parser released under the MIT license. 
  4 | # See the NOTICE for more information.
  5 | 
  6 | from __future__ import with_statement
  7 | 
  8 | from distutils.core import setup
  9 | from distutils.command import build_ext
 10 | from distutils.command.install import INSTALL_SCHEMES
 11 | from distutils.extension import Extension
 12 | from distutils.errors import CCompilerError, DistutilsExecError
 13 | import glob
 14 | from imp import load_source
 15 | import os
 16 | import sys
 17 | import traceback
 18 | 
 19 | if not hasattr(sys, 'version_info') or \
 20 |         sys.version_info < (2, 5, 0, 'final'):
 21 |     raise SystemExit("http-parser requires Python 2.6x or later")
 22 | 
 23 | is_pypy = hasattr(sys, 'pypy_version_info')
 24 | 
 25 | CLASSIFIERS = [
 26 |         'Development Status :: 4 - Beta',
 27 |         'Environment :: Other Environment',
 28 |         'Intended Audience :: Developers',
 29 |         'License :: OSI Approved :: MIT License',
 30 |         'Operating System :: MacOS :: MacOS X',
 31 |         'Operating System :: POSIX',
 32 |         'Programming Language :: Python',
 33 |         'Topic :: Internet',
 34 |         'Topic :: Utilities',
 35 |         'Topic :: Software Development :: Libraries :: Python Modules',
 36 |     ]
 37 | 
 38 | 
 39 | MODULES = ["http_parser"]
 40 | 
 41 | INCLUDE_DIRS = ["http_parser"]
 42 | SOURCES = [os.path.join("http_parser", "parser.c"),
 43 |         os.path.join("http_parser", "http_parser.c")]
 44 | 
 45 | for scheme in INSTALL_SCHEMES.values():
 46 |     scheme['data'] = scheme['purelib']
 47 | 
 48 | class my_build_ext(build_ext.build_ext):
 49 |     user_options = (build_ext.build_ext.user_options
 50 |                     + [("cython=", None, "path to the cython executable")])
 51 | 
 52 |     def initialize_options(self):
 53 |         build_ext.build_ext.initialize_options(self)
 54 |         self.cython = "cython"
 55 | 
 56 |     def compile_cython(self):
 57 |         sources = glob.glob('http_parser/*.pyx')
 58 |         if not sources:
 59 |             if not os.path.exists('http_parser/parser.c'):
 60 |                 sys.stderr.write('Could not find http_parser/parser.c\n')
 61 | 
 62 |         if os.path.exists('http_parser/parser.c'):
 63 |             core_c_mtime = os.stat('http_parser/parser.c').st_mtime
 64 |             changed = [filename for filename in sources if \
 65 |                     (os.stat(filename).st_mtime - core_c_mtime) > 1]
 66 |             if not changed:
 67 |                 return
 68 |             sys.stderr.write('Running %s (changed: %s)\n' % (self.cython, 
 69 |                 ', '.join(changed)))
 70 |         else:
 71 |             sys.stderr.write('Running %s' % self.cython)
 72 |         cython_result = os.system('%s http_parser/parser.pyx' % self.cython)
 73 |         if cython_result:
 74 |             if os.system('%s -V 2> %s' % (self.cython, os.devnull)):
 75 |                 # there's no cython in the system
 76 |                 sys.stderr.write('No cython found, cannot rebuild parser.c\n')
 77 |                 return
 78 |             sys.exit(1)
 79 | 
 80 |     def build_extension(self, ext):
 81 |         if self.cython:
 82 |             self.compile_cython()
 83 |         try:
 84 |             result = build_ext.build_ext.build_extension(self, ext)
 85 |             # hack: create a symlink from build/../parser.so to http_parser/parser.so
 86 |             # to prevent "ImportError: cannot import name core" failures
 87 | 
 88 |             fullname = self.get_ext_fullname(ext.name)
 89 |             modpath = fullname.split('.')
 90 |             filename = self.get_ext_filename(ext.name)
 91 |             filename = os.path.split(filename)[-1]
 92 |             if not self.inplace:
 93 |                 filename = os.path.join(*modpath[:-1] + [filename])
 94 |                 path_to_build_core_so = os.path.abspath(
 95 |                         os.path.join(self.build_lib, filename))
 96 |                 path_to_core_so = os.path.abspath(
 97 |                         os.path.join('http_parser', 
 98 |                             os.path.basename(path_to_build_core_so)))
 99 |                 if path_to_build_core_so != path_to_core_so:
100 |                     try:
101 |                         os.unlink(path_to_core_so)
102 |                     except OSError:
103 |                         pass
104 |                     if hasattr(os, 'symlink'):
105 |                         print('Linking %s to %s' % (path_to_build_core_so, 
106 |                             path_to_core_so))
107 |                         os.symlink(path_to_build_core_so, path_to_core_so)
108 |                     else:
109 |                         print('Copying %s to %s' % (path_to_build_core_so, 
110 |                             path_to_core_so))
111 |                         import shutil
112 |                         shutil.copyfile(path_to_build_core_so, path_to_core_so)
113 |             return result
114 | 
115 |         except (Exception, CCompilerError,):
116 |             traceback.print_exc()
117 |             sys.stderr.write("warning: can't build parser.c speedup.\n\n")
118 |             sys.stderr.write("You can can safely ignire previous error.\n")
119 | 
120 |         
121 | 
122 | def main():
123 |     http_parser = load_source("http_parser", os.path.join("http_parser",
124 |         "__init__.py"))
125 | 
126 |     # read long description
127 |     with open(os.path.join(os.path.dirname(__file__), 'README.rst')) as f:
128 |         long_description = f.read()
129 | 
130 |     PACKAGES = {}
131 |     for name in MODULES:
132 |         PACKAGES[name] = name.replace(".", "/")
133 | 
134 |     DATA_FILES = [
135 |         ('http_parser', ["LICENSE", "MANIFEST.in", "NOTICE", "README.rst",
136 |                         "THANKS",])
137 |         ]
138 |     
139 |     
140 |     options = dict(
141 |             name = 'http-parser',
142 |             version = http_parser.__version__,
143 |             description = 'http request/response parser',
144 |             long_description = long_description,
145 |             author = 'Benoit Chesneau',
146 |             author_email = 'benoitc@e-engura.com',
147 |             license = 'MIT',
148 |             url = 'http://github.com/benoitc/http-parser',
149 |             classifiers = CLASSIFIERS,
150 |             platforms=['any'],
151 |             packages = PACKAGES.keys(),
152 |             package_dir = PACKAGES,
153 |             data_files = DATA_FILES,
154 |             
155 |     )
156 | 
157 | 
158 |     if not is_pypy:
159 |         EXT_MODULES = [Extension("http_parser.parser", 
160 |             sources=SOURCES, include_dirs=INCLUDE_DIRS)]
161 | 
162 | 
163 |         options.update(dict(
164 |             cmdclass = {'build_ext': my_build_ext},
165 |             ext_modules = EXT_MODULES))
166 | 
167 |     # Python 3: run 2to3
168 |     try:
169 |         from distutils.command.build_py import build_py_2to3
170 |         from distutils.command.build_scripts import build_scripts_2to3
171 |     except ImportError:
172 |         pass
173 |     else:
174 |         options['cmdclass'].update({
175 |             'build_py': build_py_2to3,
176 |             'build_scripts': build_scripts_2to3,
177 |         })
178 |     
179 | 
180 |     setup(**options)
181 | 
182 | if __name__ == "__main__":
183 |     main()
184 | 
185 | 


--------------------------------------------------------------------------------
/http_parser/http.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -
  2 | #
  3 | # This file is part of http-parser released under the MIT license.
  4 | # See the NOTICE for more information.
  5 | 
  6 | try:
  7 |     from io import DEFAULT_BUFFER_SIZE, BufferedReader, TextIOWrapper
  8 | except ImportError:
  9 |     from py25 import DEFAULT_BUFFER_SIZE, BufferedReader, TextIOWrapper
 10 | 
 11 | 
 12 | try:
 13 |     bytes
 14 |     bytearray
 15 | except (NameError, AttributeError):
 16 |     # python < 2.6
 17 |     from py25 import bytes, bytearray
 18 | 
 19 | try:
 20 |     from http_parser.parser import HttpParser
 21 | except ImportError:
 22 |     from http_parser.pyparser import HttpParser
 23 | 
 24 | from http_parser.reader import HttpBodyReader
 25 | from http_parser.util import status_reasons
 26 | 
 27 | HTTP_BOTH = 2
 28 | HTTP_RESPONSE = 1
 29 | HTTP_REQUEST = 0
 30 | 
 31 | class NoMoreData(Exception):
 32 |     """ exception raised when trying to parse headers but
 33 |     we didn't get all data needed.
 34 |     """
 35 | 
 36 | class ParserError(Exception):
 37 |     """ error while parsing http request """
 38 | 
 39 | class BadStatusLine(Exception):
 40 |     """ error when status line is invalid """
 41 | 
 42 | class HttpStream(object):
 43 |     """ An HTTP parser providing higher-level access to a readable,
 44 |     sequential io.RawIOBase object. You can use implementions of
 45 |     http_parser.reader (IterReader, StringReader, SocketReader) or
 46 |     create your own.
 47 |     """
 48 | 
 49 |     def __init__(self, stream, kind=HTTP_BOTH, decompress=False):
 50 |         """ constructor of HttpStream.
 51 | 
 52 |         :attr stream: an io.RawIOBase object
 53 |         :attr kind: Int,  could be 0 to parseonly requests,
 54 |         1 to parse only responses or 2 if we want to let
 55 |         the parser detect the type.
 56 |         """
 57 |         self.parser = HttpParser(kind=kind, decompress=decompress)
 58 |         self.stream = stream
 59 | 
 60 |     def _check_headers_complete(self):
 61 |         if self.parser.is_headers_complete():
 62 |             return
 63 | 
 64 |         while True:
 65 |             try:
 66 |                 data = self.next()
 67 |             except StopIteration:
 68 |                 if self.parser.is_headers_complete():
 69 |                     return
 70 |                 raise NoMoreData("Can't parse headers")
 71 | 
 72 |             if self.parser.is_headers_complete():
 73 |                 return
 74 | 
 75 | 
 76 |     def _wait_status_line(self, cond):
 77 |         if self.parser.is_headers_complete():
 78 |             return True
 79 | 
 80 |         data = ""
 81 |         if not cond():
 82 |             while True:
 83 |                 try:
 84 |                     data += self.next()
 85 |                 except StopIteration:
 86 |                     if self.parser.is_headers_complete():
 87 |                         return True
 88 |                     raise BadStatusLine(data)
 89 |                 if cond():
 90 |                     return True
 91 |         return True
 92 | 
 93 |     def _wait_on_url(self):
 94 |         return self._wait_status_line(self.parser.get_url)
 95 | 
 96 |     def _wait_on_status(self):
 97 |         return self._wait_status_line(self.parser.get_status_code)
 98 | 
 99 |     def url(self):
100 |         """ get full url of the request """
101 |         self._wait_on_url()
102 |         return self.parser.get_url()
103 | 
104 |     def path(self):
105 |         """ get path of the request (url without query string and
106 |         fragment """
107 |         self._wait_on_url()
108 |         return self.parser.get_path()
109 | 
110 |     def query_string(self):
111 |         """ get query string of the url """
112 |         self._wait_on_url()
113 |         return self.parser.get_query_string()
114 | 
115 |     def fragment(self):
116 |         """ get fragment of the url """
117 |         self._wait_on_url()
118 |         return self.parser.get_fragment()
119 | 
120 |     def version(self):
121 |         self._wait_on_status()
122 |         return self.parser.get_version()
123 | 
124 |     def status_code(self):
125 |         """ get status code of a response as integer """
126 |         self._wait_on_status()
127 |         return self.parser.get_status_code()
128 | 
129 |     def status(self):
130 |         """ return complete status with reason """
131 |         status_code = self.status_code()
132 |         reason = status_reasons.get(int(status_code), 'unknown')
133 |         return "%s %s" % (status_code, reason)
134 | 
135 | 
136 |     def method(self):
137 |         """ get HTTP method as string"""
138 |         self._wait_on_status()
139 |         return self.parser.get_method()
140 | 
141 |     def headers(self):
142 |         """ get request/response headers, headers are returned in a
143 |         OrderedDict that allows you to get value using insensitive
144 |         keys."""
145 |         self._check_headers_complete()
146 |         return self.parser.get_headers()
147 | 
148 |     def should_keep_alive(self):
149 |         """ return True if the connection should be kept alive
150 |         """
151 |         self._check_headers_complete()
152 |         return self.parser.should_keep_alive()
153 | 
154 |     def is_chunked(self):
155 |         """ return True if Transfer-Encoding header value is chunked"""
156 |         self._check_headers_complete()
157 |         return self.parser.is_chunked()
158 | 
159 |     def wsgi_environ(self, initial=None):
160 |         """ get WSGI environ based on the current request.
161 | 
162 |         :attr initial: dict, initial values to fill in environ.
163 |         """
164 |         self._check_headers_complete()
165 |         return self.parser.get_wsgi_environ()
166 | 
167 |     def body_file(self, buffering=None, binary=True, encoding=None,
168 |             errors=None, newline=None):
169 |         """ return the body as a buffered stream object. If binary is
170 |         true an io.BufferedReader will be returned, else an
171 |         io.TextIOWrapper.
172 |         """
173 |         self._check_headers_complete()
174 | 
175 |         if buffering is None:
176 |             buffering = -1
177 |         if buffering < 0:
178 |             buffering = DEFAULT_BUFFER_SIZE
179 | 
180 |         raw = HttpBodyReader(self)
181 |         buf = BufferedReader(raw, buffering)
182 |         if binary:
183 |             return buf
184 |         text = TextIOWrapper(buf, encoding, errors, newline)
185 |         return text
186 | 
187 |     def body_string(self, binary=True, encoding=None, errors=None,
188 |             newline=None):
189 |         """ return body as string """
190 |         return self.body_file(binary=binary, encoding=encoding,
191 |                 newline=newline).read()
192 | 
193 |     def __iter__(self):
194 |         return self
195 | 
196 |     def next(self):
197 |         if self.parser.is_message_complete():
198 |             raise StopIteration
199 | 
200 |         # fetch data
201 |         b = bytearray(DEFAULT_BUFFER_SIZE)
202 |         recved = self.stream.readinto(b)
203 |         if recved is None:
204 |             raise NoMoreData("no more data")
205 | 
206 |         del b[recved:]
207 |         to_parse = bytes(b)
208 |         # parse data
209 |         nparsed = self.parser.execute(to_parse, recved)
210 |         if nparsed != recved and not self.parser.is_message_complete():
211 |             raise ParserError("nparsed != recved (%s != %s)" % (nparsed,
212 |                 recved))
213 | 
214 |         if recved == 0:
215 |             raise StopIteration
216 | 
217 |         return to_parse
218 | 


--------------------------------------------------------------------------------
/http_parser/util.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -
  2 | #
  3 | # This file is part of http-parser released under the MIT license. 
  4 | # See the NOTICE for more information.
  5 | 
  6 | 
  7 | import sys
  8 | 
  9 | if sys.version_info[0] == 3:
 10 |     from urllib.parse import unquote
 11 |     def b(s):
 12 |         return s.encode("latin-1")
 13 |    
 14 |     def bytes_to_str(b):
 15 |         return str(b, 'latin1')
 16 | 
 17 |     import io
 18 |     StringIO = io.StringIO
 19 |     
 20 | else:
 21 |     from urllib import unquote
 22 |     def b(s):
 23 |         return s
 24 | 
 25 |     def bytes_to_str(s):
 26 |         return s
 27 | 
 28 |     try:
 29 |         import cStringIO
 30 |         StringIO = BytesIO = cStringIO.StringIO
 31 |     except ImportError:
 32 |         import StringIO
 33 |         StringIO = BytesIO = StringIO.StringIO
 34 | 
 35 | if sys.version_info < (2, 6, 0, 'final'):
 36 |     from py25 import IOrderedDict
 37 | else:
 38 |     from collections import MutableMapping
 39 |     from itertools import imap
 40 | 
 41 |     class IOrderedDict(dict, MutableMapping):
 42 |         'Dictionary that remembers insertion order with insensitive key'
 43 |         # An inherited dict maps keys to values.
 44 |         # The inherited dict provides __getitem__, __len__, __contains__, and get.
 45 |         # The remaining methods are order-aware.
 46 |         # Big-O running times for all methods are the same as for regular dictionaries.
 47 | 
 48 |         # The internal self.__map dictionary maps keys to links in a doubly linked list.
 49 |         # The circular doubly linked list starts and ends with a sentinel element.
 50 |         # The sentinel element never gets deleted (this simplifies the algorithm).
 51 |         # Each link is stored as a list of length three:  [PREV, NEXT, KEY].
 52 | 
 53 |         def __init__(self, *args, **kwds):
 54 |             '''Initialize an ordered dictionary.  Signature is the same as for
 55 |             regular dictionaries, but keyword arguments are not recommended
 56 |             because their insertion order is arbitrary.
 57 | 
 58 |             '''
 59 |             if len(args) > 1:
 60 |                 raise TypeError('expected at most 1 arguments, got %d' % len(args))
 61 |             try:
 62 |                 self.__root
 63 |             except AttributeError:
 64 |                 self.__root = root = [None, None, None]     # sentinel node
 65 |                 PREV = 0
 66 |                 NEXT = 1
 67 |                 root[PREV] = root[NEXT] = root
 68 |                 self.__map = {}
 69 |             self.__lower = {}
 70 |             self.update(*args, **kwds)
 71 | 
 72 |         def __setitem__(self, key, value, PREV=0, NEXT=1, dict_setitem=dict.__setitem__):
 73 |             'od.__setitem__(i, y) <==> od[i]=y'
 74 |             # Setting a new item creates a new link which goes at the end of the linked
 75 |             # list, and the inherited dictionary is updated with the new key/value pair.
 76 |             if key not in self:
 77 |                 root = self.__root
 78 |                 last = root[PREV]
 79 |                 last[NEXT] = root[PREV] = self.__map[key] = [last, root, key]
 80 |                 self.__lower[key.lower()] = key
 81 |             key = self.__lower[key.lower()]
 82 |             dict_setitem(self, key, value)
 83 | 
 84 |         def __delitem__(self, key, PREV=0, NEXT=1, dict_delitem=dict.__delitem__):
 85 |             'od.__delitem__(y) <==> del od[y]'
 86 |             # Deleting an existing item uses self.__map to find the link which is
 87 |             # then removed by updating the links in the predecessor and successor nodes.
 88 |             if key in self:
 89 |                 key = self.__lower.pop(key.lower())
 90 | 
 91 |             dict_delitem(self, key)
 92 |             link = self.__map.pop(key)
 93 |             link_prev = link[PREV]
 94 |             link_next = link[NEXT]
 95 |             link_prev[NEXT] = link_next
 96 |             link_next[PREV] = link_prev
 97 | 
 98 |         def __getitem__(self, key, dict_getitem=dict.__getitem__):
 99 |             if key in self:
100 |                 key = self.__lower.get(key.lower())
101 |             return dict_getitem(self, key)
102 | 
103 |         def __contains__(self, key):
104 |             return key.lower() in self.__lower
105 | 
106 |         def __iter__(self, NEXT=1, KEY=2):
107 |             'od.__iter__() <==> iter(od)'
108 |             # Traverse the linked list in order.
109 |             root = self.__root
110 |             curr = root[NEXT]
111 |             while curr is not root:
112 |                 yield curr[KEY]
113 |                 curr = curr[NEXT]
114 | 
115 |         def __reversed__(self, PREV=0, KEY=2):
116 |             'od.__reversed__() <==> reversed(od)'
117 |             # Traverse the linked list in reverse order.
118 |             root = self.__root
119 |             curr = root[PREV]
120 |             while curr is not root:
121 |                 yield curr[KEY]
122 |                 curr = curr[PREV]
123 | 
124 |         def __reduce__(self):
125 |             'Return state information for pickling'
126 |             items = [[k, self[k]] for k in self]
127 |             tmp = self.__map, self.__root
128 |             del self.__map, self.__root
129 |             inst_dict = vars(self).copy()
130 |             self.__map, self.__root = tmp
131 |             if inst_dict:
132 |                 return (self.__class__, (items,), inst_dict)
133 |             return self.__class__, (items,)
134 | 
135 |         def clear(self):
136 |             'od.clear() -> None.  Remove all items from od.'
137 |             try:
138 |                 for node in self.__map.itervalues():
139 |                     del node[:]
140 |                 self.__root[:] = [self.__root, self.__root, None]
141 |                 self.__map.clear()
142 |             except AttributeError:
143 |                 pass
144 |             dict.clear(self)
145 | 
146 |         def get(self, key, default=None):
147 |             if key in self:
148 |                 return self[key]
149 |             return default
150 | 
151 |         setdefault = MutableMapping.setdefault
152 |         update = MutableMapping.update
153 |         pop = MutableMapping.pop
154 |         keys = MutableMapping.keys
155 |         values = MutableMapping.values
156 |         items = MutableMapping.items
157 |         __ne__ = MutableMapping.__ne__
158 | 
159 |         def popitem(self, last=True):
160 |             '''od.popitem() -> (k, v), return and remove a (key, value) pair.
161 |             Pairs are returned in LIFO order if last is true or FIFO order if false.
162 | 
163 |             '''
164 |             if not self:
165 |                 raise KeyError('dictionary is empty')
166 |             key = next(reversed(self) if last else iter(self))
167 |             value = self.pop(key)
168 |             return key, value
169 | 
170 |         def __repr__(self):
171 |             'od.__repr__() <==> repr(od)'
172 |             if not self:
173 |                 return '%s()' % (self.__class__.__name__,)
174 |             return '%s(%r)' % (self.__class__.__name__, self.items())
175 | 
176 |         def copy(self):
177 |             'od.copy() -> a shallow copy of od'
178 |             return self.__class__(self)
179 | 
180 |         @classmethod
181 |         def fromkeys(cls, iterable, value=None):
182 |             '''OD.fromkeys(S[, v]) -> New ordered dictionary with keys from S
183 |             and values equal to v (which defaults to None).
184 | 
185 |             '''
186 |             d = cls()
187 |             for key in iterable:
188 |                 d[key] = value
189 |             return d
190 | 
191 |         def __eq__(self, other):
192 |             '''od.__eq__(y) <==> od==y.  Comparison to another OD is order-sensitive
193 |             while comparison to a regular mapping is order-insensitive.
194 | 
195 |             '''
196 |             if isinstance(other, OrderedDict):
197 |                 return len(self)==len(other) and \
198 |                        all(imap(_eq, self.iteritems(), other.iteritems()))
199 |             return dict.__eq__(self, other)
200 | 
201 |         def __del__(self):
202 |             self.clear()                # eliminate cyclical references
203 | 
204 | 
205 | status_reasons = {
206 |     # Status Codes
207 |     # Informational
208 |     100: 'Continue',
209 |     101: 'Switching Protocols',
210 |     102: 'Processing',
211 | 
212 |     # Successful
213 |     200: 'OK',
214 |     201: 'Created',
215 |     202: 'Accepted',
216 |     203: 'Non Authoritative Information',
217 |     204: 'No Content',
218 |     205: 'Reset Content',
219 |     206: 'Partial Content',
220 |     207: 'Multi Status',
221 |     226: 'IM Used',
222 | 
223 |     # Redirection
224 |     300: 'Multiple Choices',
225 |     301: 'Moved Permanently',
226 |     302: 'Found',
227 |     303: 'See Other',
228 |     304: 'Not Modified',
229 |     305: 'Use Proxy',
230 |     307: 'Temporary Redirect',
231 | 
232 |     # Client Error
233 |     400: 'Bad Request',
234 |     401: 'Unauthorized',
235 |     402: 'Payment Required',
236 |     403: 'Forbidden',
237 |     404: 'Not Found',
238 |     405: 'Method Not Allowed',
239 |     406: 'Not Acceptable',
240 |     407: 'Proxy Authentication Required',
241 |     408: 'Request Timeout',
242 |     409: 'Conflict',
243 |     410: 'Gone',
244 |     411: 'Length Required',
245 |     412: 'Precondition Failed',
246 |     413: 'Request Entity Too Large',
247 |     414: 'Request URI Too Long',
248 |     415: 'Unsupported Media Type',
249 |     416: 'Requested Range Not Satisfiable',
250 |     417: 'Expectation Failed',
251 |     422: 'Unprocessable Entity',
252 |     423: 'Locked',
253 |     424: 'Failed Dependency',
254 |     426: 'Upgrade Required',
255 | 
256 |     # Server Error
257 |     500: 'Internal Server Error',
258 |     501: 'Not Implemented',
259 |     502: 'Bad Gateway',
260 |     503: 'Service Unavailable',
261 |     504: 'Gateway Timeout',
262 |     505: 'HTTP Version Not Supported',
263 |     507: 'Insufficient Storage',
264 |     510: 'Not Extended',
265 | }
266 | 


--------------------------------------------------------------------------------
/http_parser/parser.pyx:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -
  2 | #
  3 | # This file is part of http-parser released under the MIT license.
  4 | # See the NOTICE for more information.
  5 | 
  6 | from libc.stdlib cimport *
  7 | import os
  8 | import urlparse
  9 | import zlib
 10 | 
 11 | from http_parser.util import b, bytes_to_str, IOrderedDict, unquote
 12 | 
 13 | cdef extern from "pyversion_compat.h":
 14 |     pass
 15 | 
 16 | from cpython cimport PyBytes_FromStringAndSize
 17 | 
 18 | cdef extern from "http_parser.h" nogil:
 19 | 
 20 |     cdef enum http_method:
 21 |         HTTP_DELETE, HTTP_GET, HTTP_HEAD, HTTP_POST, HTTP_PUT,
 22 |         HTTP_CONNECT, HTTP_OPTIONS, HTTP_TRACE, HTTP_COPY, HTTP_LOCK,
 23 |         HTTP_MKCOL, HTTP_MOVE, HTTP_PROPFIND, HTTP_PROPPATCH, HTTP_UNLOCK,
 24 |         HTTP_REPORT, HTTP_MKACTIVITY, HTTP_CHECKOUT, HTTP_MERGE, HTTP_MSEARCH,
 25 |         HTTP_NOTIFY, HTTP_SUBSCRIBE, HTTP_UNSUBSCRIBE, HTTP_PATCH
 26 | 
 27 | 
 28 |     cdef enum http_parser_type:
 29 |         HTTP_REQUEST, HTTP_RESPONSE, HTTP_BOTH
 30 | 
 31 |     cdef struct http_parser:
 32 |         int content_length
 33 |         unsigned short http_major
 34 |         unsigned short http_minor
 35 |         unsigned short status_code
 36 |         unsigned char method
 37 |         char upgrade
 38 |         void *data
 39 | 
 40 |     ctypedef int (*http_data_cb) (http_parser*, char *at, size_t length)
 41 |     ctypedef int (*http_cb) (http_parser*)
 42 | 
 43 |     struct http_parser_settings:
 44 |         http_cb on_message_begin
 45 |         http_data_cb on_url
 46 |         http_data_cb on_header_field
 47 |         http_data_cb on_header_value
 48 |         http_cb on_headers_complete
 49 |         http_data_cb on_body
 50 |         http_cb on_message_complete
 51 | 
 52 |     void http_parser_init(http_parser *parser,
 53 |             http_parser_type ptype)
 54 | 
 55 |     size_t http_parser_execute(http_parser *parser,
 56 |             http_parser_settings *settings, char *data,
 57 |             size_t len)
 58 | 
 59 |     int http_should_keep_alive(http_parser *parser)
 60 | 
 61 |     char *http_method_str(http_method)
 62 | 
 63 | 
 64 | cdef int on_url_cb(http_parser *parser, char *at,
 65 |         size_t length):
 66 |     res = <object>parser.data
 67 |     value = bytes_to_str(PyBytes_FromStringAndSize(at, length))
 68 | 
 69 |     res.url = value
 70 |     return 0
 71 | 
 72 | cdef int on_header_field_cb(http_parser *parser, char *at,
 73 |         size_t length):
 74 |     header_field = PyBytes_FromStringAndSize(at, length)
 75 |     res = <object>parser.data
 76 | 
 77 |     if res._last_was_value:
 78 |         res._last_field = ""
 79 |     res._last_field += bytes_to_str(header_field)
 80 |     res._last_was_value = False
 81 |     return 0
 82 | 
 83 | cdef int on_header_value_cb(http_parser *parser, char *at,
 84 |         size_t length):
 85 |     res = <object>parser.data
 86 |     header_value = bytes_to_str(PyBytes_FromStringAndSize(at, length))
 87 | 
 88 |     if res._last_field in res.headers:
 89 |         header_value = "%s, %s" % (res.headers[res._last_field],
 90 |                 header_value)
 91 | 
 92 |     # update wsgi environ
 93 |     key =  'HTTP_%s' % res._last_field.upper().replace('-','_')
 94 |     res.environ[key] = header_value
 95 | 
 96 |     # add to headers
 97 |     res.headers[res._last_field] = header_value
 98 |     res._last_was_value = True
 99 |     return 0
100 | 
101 | cdef int on_headers_complete_cb(http_parser *parser):
102 |     res = <object>parser.data
103 |     res.headers_complete = True
104 | 
105 |     if res.decompress:
106 |         encoding = res.headers.get('content-encoding')
107 |         if encoding == 'gzip':
108 |             res.decompressobj = zlib.decompressobj(16+zlib.MAX_WBITS)
109 |             del res.headers['content-encoding']
110 |         elif encoding == 'deflate':
111 |             res.decompressobj = zlib.decompressobj()
112 |             del res.headers['content-encoding']
113 |         else:
114 |             res.decompress = False
115 | 
116 |     return 0
117 | 
118 | cdef int on_message_begin_cb(http_parser *parser):
119 |     res = <object>parser.data
120 |     res.message_begin = True
121 |     return 0
122 | 
123 | cdef int on_body_cb(http_parser *parser, char *at,
124 |         size_t length):
125 |     res = <object>parser.data
126 |     value = PyBytes_FromStringAndSize(at, length)
127 | 
128 |     res.partial_body = True
129 | 
130 |     # decompress the value if needed
131 |     if res.decompress:
132 |         value = res.decompressobj.decompress(value)
133 |     res.body.append(value)
134 |     return 0
135 | 
136 | cdef int on_message_complete_cb(http_parser *parser):
137 |     res = <object>parser.data
138 |     res.message_complete = True
139 |     return 0
140 | 
141 | 
142 | class _ParserData(object):
143 | 
144 |     def __init__(self, decompress=False):
145 |         self.url = ""
146 |         self.body = []
147 |         self.headers = IOrderedDict()
148 |         self.environ = {}
149 | 
150 |         self.decompress = decompress
151 |         self.decompressobj = None
152 | 
153 |         self.chunked = False
154 | 
155 |         self.headers_complete = False
156 |         self.partial_body = False
157 |         self.message_begin = False
158 |         self.message_complete = False
159 | 
160 |         self._last_field = ""
161 |         self._last_was_value = False
162 | 
163 | cdef class HttpParser:
164 |     """ Low level HTTP parser.  """
165 | 
166 |     cdef http_parser _parser
167 |     cdef http_parser_settings _settings
168 |     cdef object _data
169 | 
170 |     cdef str _path
171 |     cdef str _query_string
172 |     cdef str _fragment
173 |     cdef object _parsed_url
174 | 
175 |     def __init__(self, kind=2, decompress=False):
176 |         """ constructor of HttpParser object.
177 | 
178 | 
179 |         :attr kind: Int,  could be 0 to parseonly requests,
180 |         1 to parse only responses or 2 if we want to let
181 |         the parser detect the type.
182 |         """
183 | 
184 |         # set parser type
185 |         if kind == 2:
186 |             parser_type = HTTP_BOTH
187 |         elif kind == 1:
188 |             parser_type = HTTP_RESPONSE
189 |         elif kind == 0:
190 |             parser_type = HTTP_REQUEST
191 | 
192 |         # initialize parser
193 |         http_parser_init(&self._parser, parser_type)
194 |         self._data = _ParserData(decompress=decompress)
195 |         self._parser.data = <void *>self._data
196 |         self._parsed_url = None
197 |         self._path = ""
198 |         self._query_string = ""
199 |         self._fragment = ""
200 | 
201 |         # set callback
202 |         self._settings.on_url = <http_data_cb>on_url_cb
203 |         self._settings.on_body = <http_data_cb>on_body_cb
204 |         self._settings.on_header_field = <http_data_cb>on_header_field_cb
205 |         self._settings.on_header_value = <http_data_cb>on_header_value_cb
206 |         self._settings.on_headers_complete = <http_cb>on_headers_complete_cb
207 |         self._settings.on_message_begin = <http_cb>on_message_begin_cb
208 |         self._settings.on_message_complete = <http_cb>on_message_complete_cb
209 | 
210 |     def execute(self, char *data, size_t length):
211 |         """ Execute the parser with the last chunk. We pass the length
212 |         to let the parser know when EOF has been received. In this case
213 |         length == 0.
214 | 
215 |         :return recved: Int, received length of the data parsed. if
216 |         recvd != length you should return an error.
217 |         """
218 |         return http_parser_execute(&self._parser, &self._settings,
219 |                 data, length)
220 | 
221 |     def get_version(self):
222 |         """ get HTTP version """
223 |         return (self._parser.http_major, self._parser.http_minor)
224 | 
225 |     def get_method(self):
226 |         """ get HTTP method as string"""
227 |         return http_method_str(<http_method>self._parser.method)
228 | 
229 | 
230 | 
231 |     def get_status_code(self):
232 |         """ get status code of a response as integer """
233 |         return self._parser.status_code
234 | 
235 |     def get_url(self):
236 |         """ get full url of the request """
237 |         return self._data.url
238 | 
239 |     def maybe_parse_url(self):
240 |         raw_url = self.get_url()
241 |         if not self._parsed_url and raw_url:
242 |             self._parsed_url = urlparse.urlsplit(raw_url)
243 |             self._path =  self._parsed_url.path or ""
244 |             self._query_string = self._parsed_url.query or ""
245 |             self._fragment = self._parsed_url.fragment or ""
246 | 
247 |     def get_path(self):
248 |         """ get path of the request (url without query string and
249 |         fragment """
250 |         self.maybe_parse_url()
251 |         return self._path
252 | 
253 |     def get_query_string(self):
254 |         """ get query string of the url """
255 |         self.maybe_parse_url()
256 |         return self._query_string
257 | 
258 |     def get_fragment(self):
259 |         """ get fragment of the url """
260 |         self.maybe_parse_url()
261 |         return self._fragment
262 | 
263 |     def get_headers(self):
264 |         """ get request/response headers, headers are returned in a
265 |         OrderedDict that allows you to get value using insensitive keys. """
266 |         return self._data.headers
267 | 
268 |     def get_wsgi_environ(self):
269 |         """ get WSGI environ based on the current request """
270 |         self.maybe_parse_url()
271 |         environ = self._data.environ
272 | 
273 |         # clean special keys
274 |         for key in ("CONTENT_LENGTH", "CONTENT_TYPE", "SCRIPT_NAME"):
275 |             hkey = "HTTP_%s" % key
276 |             if hkey in environ:
277 |                 environ[key] = environ.pop(hkey)
278 | 
279 |         script_name = environ.get('HTTP_SCRIPT_NAME',
280 |                 os.environ.get("SCRIPT_NAME", ""))
281 | 
282 |         if script_name:
283 |             path_info = self._path.split(script_name, 1)[1]
284 |         else:
285 |             path_info = self._path
286 | 
287 |         environ.update({
288 |             'REQUEST_METHOD': self.get_method(),
289 |             'SERVER_PROTOCOL': "HTTP/%s" % ".".join(map(str,
290 |                 self.get_version())),
291 |             'PATH_INFO': path_info,
292 |             'SCRIPT_NAME': script_name,
293 |             'QUERY_STRING': self._query_string,
294 |             'RAW_URI': self._data.url})
295 | 
296 |         return environ
297 | 
298 |     def recv_body(self):
299 |         """ return last chunk of the parsed body"""
300 |         body = b("").join(self._data.body)
301 |         self._data.body = []
302 |         self._data.partial_body = False
303 |         return body
304 | 
305 |     def recv_body_into(self, barray):
306 |         """ Receive the last chunk of the parsed bodyand store the data
307 |         in a buffer rather than creating a new string. """
308 |         l = len(barray)
309 |         body = b("").join(self._data.body)
310 |         m = min(len(body), l)
311 |         data, rest = body[:m], body[m:]
312 |         barray[0:m] = bytes(data)
313 |         if not rest:
314 |             self._data.body = []
315 |             self._data.partial_body = False
316 |         else:
317 |             self._data.body = [rest]
318 |         return m
319 | 
320 |     def is_upgrade(self):
321 |         """ Do we get upgrade header in the request. Useful for
322 |         websockets """
323 |         return self._parser_upgrade
324 | 
325 |     def is_headers_complete(self):
326 |         """ return True if all headers have been parsed. """
327 |         return self._data.headers_complete
328 | 
329 |     def is_partial_body(self):
330 |         """ return True if a chunk of body have been parsed """
331 |         return self._data.partial_body
332 | 
333 |     def is_message_begin(self):
334 |         """ return True if the parsing start """
335 |         return self._data.message_begin
336 | 
337 |     def is_message_complete(self):
338 |         """ return True if the parsing is done (we get EOF) """
339 |         return self._data.message_complete
340 | 
341 |     def is_chunked(self):
342 |         """ return True if Transfer-Encoding header value is chunked"""
343 |         te = self._data.headers.get('transfer-encoding', '').lower()
344 |         return te == 'chunked'
345 | 
346 |     def should_keep_alive(self):
347 |         """ return True if the connection should be kept alive
348 |         """
349 |         return http_should_keep_alive(&self._parser)
350 | 


--------------------------------------------------------------------------------
/http_parser/http_parser.h:
--------------------------------------------------------------------------------
  1 | /* Copyright Joyent, Inc. and other Node contributors. All rights reserved.
  2 |  *
  3 |  * Permission is hereby granted, free of charge, to any person obtaining a copy
  4 |  * of this software and associated documentation files (the "Software"), to
  5 |  * deal in the Software without restriction, including without limitation the
  6 |  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
  7 |  * sell copies of the Software, and to permit persons to whom the Software is
  8 |  * furnished to do so, subject to the following conditions:
  9 |  *
 10 |  * The above copyright notice and this permission notice shall be included in
 11 |  * all copies or substantial portions of the Software.
 12 |  *
 13 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 14 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 15 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 16 |  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 17 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 18 |  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
 19 |  * IN THE SOFTWARE.
 20 |  */
 21 | #ifndef http_parser_h
 22 | #define http_parser_h
 23 | #ifdef __cplusplus
 24 | extern "C" {
 25 | #endif
 26 | 
 27 | #define HTTP_PARSER_VERSION_MAJOR 1
 28 | #define HTTP_PARSER_VERSION_MINOR 0
 29 | 
 30 | #include <sys/types.h>
 31 | #if defined(_WIN32) && !defined(__MINGW32__) && (!defined(_MSC_VER) || _MSC_VER<1600)
 32 | typedef __int8 int8_t;
 33 | typedef unsigned __int8 uint8_t;
 34 | typedef __int16 int16_t;
 35 | typedef unsigned __int16 uint16_t;
 36 | typedef __int32 int32_t;
 37 | typedef unsigned __int32 uint32_t;
 38 | typedef __int64 int64_t;
 39 | typedef unsigned __int64 uint64_t;
 40 | 
 41 | typedef unsigned int size_t;
 42 | typedef int ssize_t;
 43 | #else
 44 | #include <stdint.h>
 45 | #endif
 46 | 
 47 | /* Compile with -DHTTP_PARSER_STRICT=0 to make less checks, but run
 48 |  * faster
 49 |  */
 50 | #ifndef HTTP_PARSER_STRICT
 51 | # define HTTP_PARSER_STRICT 1
 52 | #endif
 53 | 
 54 | /* Compile with -DHTTP_PARSER_DEBUG=1 to add extra debugging information to
 55 |  * the error reporting facility.
 56 |  */
 57 | #ifndef HTTP_PARSER_DEBUG
 58 | # define HTTP_PARSER_DEBUG 0
 59 | #endif
 60 | 
 61 | 
 62 | /* Maximium header size allowed */
 63 | #define HTTP_MAX_HEADER_SIZE (80*1024)
 64 | 
 65 | 
 66 | typedef struct http_parser http_parser;
 67 | typedef struct http_parser_settings http_parser_settings;
 68 | typedef struct http_parser_result http_parser_result;
 69 | 
 70 | 
 71 | /* Callbacks should return non-zero to indicate an error. The parser will
 72 |  * then halt execution.
 73 |  *
 74 |  * The one exception is on_headers_complete. In a HTTP_RESPONSE parser
 75 |  * returning '1' from on_headers_complete will tell the parser that it
 76 |  * should not expect a body. This is used when receiving a response to a
 77 |  * HEAD request which may contain 'Content-Length' or 'Transfer-Encoding:
 78 |  * chunked' headers that indicate the presence of a body.
 79 |  *
 80 |  * http_data_cb does not return data chunks. It will be call arbitrarally
 81 |  * many times for each string. E.G. you might get 10 callbacks for "on_path"
 82 |  * each providing just a few characters more data.
 83 |  */
 84 | typedef int (*http_data_cb) (http_parser*, const char *at, size_t length);
 85 | typedef int (*http_cb) (http_parser*);
 86 | 
 87 | 
 88 | /* Request Methods */
 89 | enum http_method
 90 |   { HTTP_DELETE    = 0
 91 |   , HTTP_GET
 92 |   , HTTP_HEAD
 93 |   , HTTP_POST
 94 |   , HTTP_PUT
 95 |   /* pathological */
 96 |   , HTTP_CONNECT
 97 |   , HTTP_OPTIONS
 98 |   , HTTP_TRACE
 99 |   /* webdav */
100 |   , HTTP_COPY
101 |   , HTTP_LOCK
102 |   , HTTP_MKCOL
103 |   , HTTP_MOVE
104 |   , HTTP_PROPFIND
105 |   , HTTP_PROPPATCH
106 |   , HTTP_UNLOCK
107 |   /* subversion */
108 |   , HTTP_REPORT
109 |   , HTTP_MKACTIVITY
110 |   , HTTP_CHECKOUT
111 |   , HTTP_MERGE
112 |   /* upnp */
113 |   , HTTP_MSEARCH
114 |   , HTTP_NOTIFY
115 |   , HTTP_SUBSCRIBE
116 |   , HTTP_UNSUBSCRIBE
117 |   /* RFC-5789 */
118 |   , HTTP_PATCH
119 |   };
120 | 
121 | 
122 | enum http_parser_type { HTTP_REQUEST, HTTP_RESPONSE, HTTP_BOTH };
123 | 
124 | 
125 | /* Flag values for http_parser.flags field */
126 | enum flags
127 |   { F_CHUNKED               = 1 << 0
128 |   , F_CONNECTION_KEEP_ALIVE = 1 << 1
129 |   , F_CONNECTION_CLOSE      = 1 << 2
130 |   , F_TRAILING              = 1 << 3
131 |   , F_UPGRADE               = 1 << 4
132 |   , F_SKIPBODY              = 1 << 5
133 |   };
134 | 
135 | 
136 | /* Map for errno-related constants
137 |  * 
138 |  * The provided argument should be a macro that takes 2 arguments.
139 |  */
140 | #define HTTP_ERRNO_MAP(XX)                                           \
141 |   /* No error */                                                     \
142 |   XX(OK, "success")                                                  \
143 |                                                                      \
144 |   /* Callback-related errors */                                      \
145 |   XX(CB_message_begin, "the on_message_begin callback failed")       \
146 |   XX(CB_path, "the on_path callback failed")                         \
147 |   XX(CB_query_string, "the on_query_string callback failed")         \
148 |   XX(CB_url, "the on_url callback failed")                           \
149 |   XX(CB_fragment, "the on_fragment callback failed")                 \
150 |   XX(CB_header_field, "the on_header_field callback failed")         \
151 |   XX(CB_header_value, "the on_header_value callback failed")         \
152 |   XX(CB_headers_complete, "the on_headers_complete callback failed") \
153 |   XX(CB_body, "the on_body callback failed")                         \
154 |   XX(CB_message_complete, "the on_message_complete callback failed") \
155 |                                                                      \
156 |   /* Parsing-related errors */                                       \
157 |   XX(INVALID_EOF_STATE, "stream ended at an unexpected time")        \
158 |   XX(HEADER_OVERFLOW,                                                \
159 |      "too many header bytes seen; overflow detected")                \
160 |   XX(CLOSED_CONNECTION,                                              \
161 |      "data received after completed connection: close message")      \
162 |   XX(INVALID_VERSION, "invalid HTTP version")                        \
163 |   XX(INVALID_STATUS, "invalid HTTP status code")                     \
164 |   XX(INVALID_METHOD, "invalid HTTP method")                          \
165 |   XX(INVALID_URL, "invalid URL")                                     \
166 |   XX(INVALID_HOST, "invalid host")                                   \
167 |   XX(INVALID_PORT, "invalid port")                                   \
168 |   XX(INVALID_PATH, "invalid path")                                   \
169 |   XX(INVALID_QUERY_STRING, "invalid query string")                   \
170 |   XX(INVALID_FRAGMENT, "invalid fragment")                           \
171 |   XX(LF_EXPECTED, "LF character expected")                           \
172 |   XX(INVALID_HEADER_TOKEN, "invalid character in header")            \
173 |   XX(INVALID_CONTENT_LENGTH,                                         \
174 |      "invalid character in content-length header")                   \
175 |   XX(INVALID_CHUNK_SIZE,                                             \
176 |      "invalid character in chunk size header")                       \
177 |   XX(INVALID_CONSTANT, "invalid constant string")                    \
178 |   XX(INVALID_INTERNAL_STATE, "encountered unexpected internal state")\
179 |   XX(STRICT, "strict mode assertion failed")                         \
180 |   XX(PAUSED, "parser is paused")                                     \
181 |   XX(UNKNOWN, "an unknown error occurred")
182 | 
183 | 
184 | /* Define HPE_* values for each errno value above */
185 | #define HTTP_ERRNO_GEN(n, s) HPE_##n,
186 | enum http_errno {
187 |   HTTP_ERRNO_MAP(HTTP_ERRNO_GEN)
188 | };
189 | #undef HTTP_ERRNO_GEN
190 | 
191 | 
192 | /* Get an http_errno value from an http_parser */
193 | #define HTTP_PARSER_ERRNO(p)            ((enum http_errno) (p)->http_errno)
194 | 
195 | /* Get the line number that generated the current error */
196 | #if HTTP_PARSER_DEBUG
197 | #define HTTP_PARSER_ERRNO_LINE(p)       ((p)->error_lineno)
198 | #else
199 | #define HTTP_PARSER_ERRNO_LINE(p)       0
200 | #endif
201 | 
202 | 
203 | struct http_parser {
204 |   /** PRIVATE **/
205 |   unsigned char type : 2;     /* enum http_parser_type */
206 |   unsigned char flags : 6;    /* F_* values from 'flags' enum; semi-public */
207 |   unsigned char state;        /* enum state from http_parser.c */
208 |   unsigned char header_state; /* enum header_state from http_parser.c */
209 |   unsigned char index;        /* index into current matcher */
210 | 
211 |   uint32_t nread;          /* # bytes read in various scenarios */
212 |   int64_t content_length;  /* # bytes in body (0 if no Content-Length header) */
213 | 
214 |   /** READ-ONLY **/
215 |   unsigned short http_major;
216 |   unsigned short http_minor;
217 |   unsigned short status_code; /* responses only */
218 |   unsigned char method;       /* requests only */
219 |   unsigned char http_errno : 7;
220 | 
221 |   /* 1 = Upgrade header was present and the parser has exited because of that.
222 |    * 0 = No upgrade header present.
223 |    * Should be checked when http_parser_execute() returns in addition to
224 |    * error checking.
225 |    */
226 |   unsigned char upgrade : 1;
227 | 
228 | #if HTTP_PARSER_DEBUG
229 |   uint32_t error_lineno;
230 | #endif
231 | 
232 |   /** PUBLIC **/
233 |   void *data; /* A pointer to get hook to the "connection" or "socket" object */
234 | };
235 | 
236 | 
237 | struct http_parser_settings {
238 |   http_cb      on_message_begin;
239 |   http_data_cb on_url;
240 |   http_data_cb on_header_field;
241 |   http_data_cb on_header_value;
242 |   http_cb      on_headers_complete;
243 |   http_data_cb on_body;
244 |   http_cb      on_message_complete;
245 | };
246 | 
247 | 
248 | enum http_parser_url_fields
249 |   { UF_SCHEMA           = 0
250 |   , UF_HOST             = 1
251 |   , UF_PORT             = 2
252 |   , UF_PATH             = 3
253 |   , UF_QUERY            = 4
254 |   , UF_FRAGMENT         = 5
255 |   , UF_MAX              = 6
256 |   };
257 | 
258 | 
259 | /* Result structure for http_parser_parse_url().
260 |  *
261 |  * Callers should index into field_data[] with UF_* values iff field_set
262 |  * has the relevant (1 << UF_*) bit set. As a courtesy to clients (and
263 |  * because we probably have padding left over), we convert any port to
264 |  * a uint16_t.
265 |  */
266 | struct http_parser_url {
267 |   uint16_t field_set;           /* Bitmask of (1 << UF_*) values */
268 |   uint16_t port;                /* Converted UF_PORT string */
269 | 
270 |   struct {
271 |     uint16_t off;               /* Offset into buffer in which field starts */
272 |     uint16_t len;               /* Length of run in buffer */
273 |   } field_data[UF_MAX];
274 | };
275 | 
276 | 
277 | void http_parser_init(http_parser *parser, enum http_parser_type type);
278 | 
279 | 
280 | size_t http_parser_execute(http_parser *parser,
281 |                            const http_parser_settings *settings,
282 |                            const char *data,
283 |                            size_t len);
284 | 
285 | 
286 | /* If http_should_keep_alive() in the on_headers_complete or
287 |  * on_message_complete callback returns true, then this will be should be
288 |  * the last message on the connection.
289 |  * If you are the server, respond with the "Connection: close" header.
290 |  * If you are the client, close the connection.
291 |  */
292 | int http_should_keep_alive(http_parser *parser);
293 | 
294 | /* Returns a string version of the HTTP method. */
295 | const char *http_method_str(enum http_method m);
296 | 
297 | /* Return a string name of the given error */
298 | const char *http_errno_name(enum http_errno err);
299 | 
300 | /* Return a string description of the given error */
301 | const char *http_errno_description(enum http_errno err);
302 | 
303 | /* Parse a URL; return nonzero on failure */
304 | int http_parser_parse_url(const char *buf, size_t buflen,
305 |                           int is_connect,
306 |                           struct http_parser_url *u);
307 | 
308 | /* Pause or un-pause the parser; a nonzero value pauses */
309 | void http_parser_pause(http_parser *parser, int paused);
310 | 
311 | #ifdef __cplusplus
312 | }
313 | #endif
314 | #endif
315 | 


--------------------------------------------------------------------------------
/http_parser/pyparser.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -
  2 | #
  3 | # This file is part of http-parser released under the MIT license.
  4 | # See the NOTICE for more information.
  5 | 
  6 | import os
  7 | import re
  8 | import sys
  9 | import urlparse
 10 | import zlib
 11 | 
 12 | from http_parser.util import b, bytes_to_str, IOrderedDict, StringIO, unquote
 13 | 
 14 | 
 15 | METHOD_RE = re.compile("[A-Z0-9$-_.]{3,20}")
 16 | VERSION_RE = re.compile("HTTP/(\d+).(\d+)")
 17 | STATUS_RE = re.compile("(\d{3})\s*(\w*)")
 18 | HEADER_RE = re.compile("[\x00-\x1F\x7F()<>@,;:\[\]={} \t\\\\\"]")
 19 | 
 20 | # errors
 21 | BAD_FIRST_LINE = 0
 22 | INVALID_HEADER = 1
 23 | INVALID_CHUNK = 2
 24 | 
 25 | class InvalidRequestLine(Exception):
 26 |     """ error raised when first line is invalid """
 27 | 
 28 | class InvalidHeader(Exception):
 29 |     """ error raised on invalid header """
 30 | 
 31 | class InvalidChunkSize(Exception):
 32 |     """ error raised when we parse an invalid chunk size """
 33 | 
 34 | class HttpParser(object):
 35 | 
 36 |     def __init__(self, kind=2, decompress=False):
 37 |         self.kind = kind
 38 |         self.decompress = decompress
 39 | 
 40 |         # errors vars
 41 |         self.errno = None
 42 |         self.errstr = ""
 43 | 
 44 |         # protected variables
 45 |         self._buf = []
 46 |         self._version = None
 47 |         self._method = None
 48 |         self._status_code = None
 49 |         self._status = None
 50 |         self._reason = None
 51 |         self._url = None
 52 |         self._path = None
 53 |         self._query_string = None
 54 |         self._fragment= None
 55 |         self._headers = IOrderedDict()
 56 |         self._environ = dict()
 57 |         self._chunked = False
 58 |         self._body = []
 59 |         self._trailers = None
 60 |         self._partial_body = False
 61 |         self._clen = None
 62 |         self._clen_rest = None
 63 | 
 64 |         # private events
 65 |         self.__on_firstline = False
 66 |         self.__on_headers_complete = False
 67 |         self.__on_message_begin = False
 68 |         self.__on_message_complete = False
 69 | 
 70 |         self.__decompress_obj = None
 71 | 
 72 |     def get_version(self):
 73 |         return self._version
 74 | 
 75 |     def get_method(self):
 76 |         return self._method
 77 | 
 78 |     def get_status_code(self):
 79 |         return self._status_code
 80 | 
 81 |     def get_url(self):
 82 |         return self._url
 83 | 
 84 |     def get_path(self):
 85 |         return self._path
 86 | 
 87 |     def get_query_string(self):
 88 |         return self._query_string
 89 | 
 90 |     def get_fragment(self):
 91 |         return self._fragment
 92 | 
 93 |     def get_headers(self):
 94 |         return self._headers
 95 | 
 96 |     def get_environ(self):
 97 |         if not self.__on_headers_complete:
 98 |             return None
 99 | 
100 |         environ = self._environ.copy()
101 |         # clean special keys
102 |         for key in ("CONTENT_LENGTH", "CONTENT_TYPE", "SCRIPT_NAME"):
103 |             hkey = "HTTP_%s" % key
104 |             if hkey in environ:
105 |                 environ[key] = environ.pop(hkey)
106 | 
107 |         script_name = environ.get('HTTP_SCRIPT_NAME',
108 |                 os.environ.get("SCRIPT_NAME", ""))
109 |         if script_name:
110 |             path_info = self._path.split(script_name, 1)[1]
111 |             environ.update({
112 |                 "PATH_INFO": unquote(path_info),
113 |                 "SCRIPT_NAME": script_name})
114 |         else:
115 |             environ['SCRIPT_NAME'] = ""
116 | 
117 |         if environ.get('HTTP_X_FORWARDED_PROTOCOL', '').lower() == "ssl":
118 |             environ['wsgi.url_scheme'] = "https"
119 |         elif environ.get('HTTP_X_FORWARDED_SSL', '').lower() == "on":
120 |             environ['wsgi.url_scheme'] = "https"
121 |         else:
122 |             environ['wsgi.url_scheme'] = "http"
123 | 
124 |         return environ
125 | 
126 |     def recv_body(self):
127 |         """ return last chunk of the parsed body"""
128 |         body = b("").join(self._body)
129 |         self._body = []
130 |         self._partial_body = False
131 |         return body
132 | 
133 |     def recv_body_into(self, barray):
134 |         """ Receive the last chunk of the parsed bodyand store the data
135 |         in a buffer rather than creating a new string. """
136 |         l = len(barray)
137 |         body = b("").join(self._body)
138 |         m = min(len(body), l)
139 |         data, rest = body[:m], body[m:]
140 |         barray[0:m] = data
141 |         if not rest:
142 |             self._body = []
143 |             self._partial_body = False
144 |         else:
145 |             self._body = [rest]
146 |         return m
147 | 
148 |     def is_upgrade(self):
149 |         """ Do we get upgrade header in the request. Useful for
150 |         websockets """
151 |         return self._headers.get('connection', "") == "upgrade"
152 | 
153 |     def is_headers_complete(self):
154 |         """ return True if all headers have been parsed. """
155 |         return self.__on_headers_complete
156 | 
157 |     def is_partial_body(self):
158 |         """ return True if a chunk of body have been parsed """
159 |         return self._partial_body
160 | 
161 |     def is_message_begin(self):
162 |         """ return True if the parsing start """
163 |         return self.__on_message_begin
164 | 
165 |     def is_message_complete(self):
166 |         """ return True if the parsing is done (we get EOF) """
167 |         return self.__on_message_complete
168 | 
169 |     def is_chunked(self):
170 |         """ return True if Transfer-Encoding header value is chunked"""
171 |         return self._chunked
172 | 
173 |     def should_keep_alive(self):
174 |         """ return True if the connection should be kept alive
175 |         """
176 |         hconn = self._headers.get('connection', "").lower()
177 |         if hconn == "close":
178 |             return False
179 |         elif hconn == "keep-alive":
180 |             return True
181 |         return self._version == (1, 1)
182 | 
183 |     def execute(self, data, length):
184 |         # end of body can be passed manually by putting a length of 0
185 | 
186 |         if length == 0:
187 |             self.on_message_complete = True
188 |             return length
189 | 
190 |         # start to parse
191 |         nb_parsed = 0
192 |         while True:
193 |             if not self.__on_firstline:
194 |                 idx = data.find(b("\r\n"))
195 |                 if idx < 0:
196 |                     self._buf.append(data)
197 |                     return len(data)
198 |                 else:
199 |                     self.__on_firstline = True
200 |                     self._buf.append(data[:idx])
201 |                     first_line = bytes_to_str(b("").join(self._buf))
202 |                     nb_parsed = nb_parsed + idx + 2
203 | 
204 |                     rest = data[idx+2:]
205 |                     data = b("")
206 |                     if self._parse_firstline(first_line):
207 |                         self._buf = [rest]
208 |                     else:
209 |                         return nb_parsed
210 |             elif not self.__on_headers_complete:
211 |                 if data:
212 |                     self._buf.append(data)
213 |                     data = b("")
214 | 
215 |                 try:
216 |                     to_parse = b("").join(self._buf)
217 |                     ret = self._parse_headers(to_parse)
218 |                     if not ret:
219 |                         return length
220 |                     nb_parsed = nb_parsed + (len(to_parse) - ret)
221 |                 except InvalidHeader, e:
222 |                     self.errno = INVALID_HEADER
223 |                     self.errstr = str(e)
224 |                     return nb_parsed
225 |             elif not self.__on_message_complete:
226 |                 if not self.__on_message_begin:
227 |                     self.__on_message_begin = True
228 | 
229 |                 if data:
230 |                     self._buf.append(data)
231 |                     data = b("")
232 | 
233 |                 ret = self._parse_body()
234 |                 if ret is None:
235 |                     return length
236 | 
237 |                 elif ret < 0:
238 |                     return ret
239 |                 elif ret == 0:
240 |                     self.__on_message_complete = True
241 |                     return length
242 |                 else:
243 |                     nb_parsed = max(length, ret)
244 | 
245 |             else:
246 |                 return 0
247 | 
248 |     def _parse_firstline(self, line):
249 |         try:
250 |             if self.kind == 2: # auto detect
251 |                 try:
252 |                     self._parse_request_line(line)
253 |                 except InvalidRequestLine:
254 |                     self._parse_response_line(line)
255 |             elif self.kind == 1:
256 |                 self._parse_response_line(line)
257 |             elif self.kind == 0:
258 |                 self._parse_request_line(line)
259 |         except InvalidRequestLine, e:
260 |             self.errno = BAD_FIRST_LINE
261 |             self.errstr = str(e)
262 |             return False
263 |         return True
264 | 
265 |     def _parse_response_line(self, line):
266 |         bits = line.split(None, 1)
267 |         if len(bits) != 2:
268 |             raise InvalidRequestLine(line)
269 | 
270 |         # version
271 |         matchv = VERSION_RE.match(bits[0])
272 |         if matchv is None:
273 |             raise InvalidRequestLine("Invalid HTTP version: %s" % bits[0])
274 |         self._version = (int(matchv.group(1)), int(matchv.group(2)))
275 | 
276 |         # status
277 |         matchs = STATUS_RE.match(bits[1])
278 |         if matchs is None:
279 |             raise InvalidRequestLine("Invalid status %" % bits[1])
280 | 
281 |         self._status = bits[1]
282 |         self._status_code = int(matchs.group(1))
283 |         self._reason = matchs.group(2)
284 | 
285 |     def _parse_request_line(self, line):
286 |         bits = line.split(None, 2)
287 |         if len(bits) != 3:
288 |             raise InvalidRequestLine(line)
289 | 
290 |         # Method
291 |         if not METHOD_RE.match(bits[0]):
292 |             raise InvalidRequestLine("invalid Method: %s" % bits[0])
293 |         self._method = bits[0].upper()
294 | 
295 |         # URI
296 |         self._url = bits[1]
297 |         parts = urlparse.urlsplit(bits[1])
298 |         self._path = parts.path or ""
299 |         self._query_string = parts.query or ""
300 |         self._fragment = parts.fragment or ""
301 | 
302 |         # Version
303 |         match = VERSION_RE.match(bits[2])
304 |         if match is None:
305 |             raise InvalidRequestLine("Invalid HTTP version: %s" % bits[2])
306 |         self._version = (int(match.group(1)), int(match.group(2)))
307 | 
308 |         # update environ
309 |         self.environ.update({
310 |             "PATH_INFO": self._path,
311 |             "QUERY_STRING": self._query_string,
312 |             "RAW_URI": self._url,
313 |             "REQUEST_METHOD": self._method,
314 |             "SERVER_PROTOCOL": bits[2]})
315 | 
316 |     def _parse_headers(self, data):
317 |         idx = data.find(b("\r\n\r\n"))
318 |         if idx < 0: # we don't have all headers
319 |             return False
320 | 
321 |         # Split lines on \r\n keeping the \r\n on each line
322 |         lines = [bytes_to_str(line) + "\r\n" for line in
323 |                 data[:idx].split(b("\r\n"))]
324 | 
325 |         # Parse headers into key/value pairs paying attention
326 |         # to continuation lines.
327 |         while len(lines):
328 |             # Parse initial header name : value pair.
329 |             curr = lines.pop(0)
330 |             if curr.find(":") < 0:
331 |                 raise InvalidHeader("invalid line %s" % curr.strip())
332 |             name, value = curr.split(":", 1)
333 |             name = name.rstrip(" \t").upper()
334 |             if HEADER_RE.search(name):
335 |                 raise InvalidHeader("invalid header name %s" % name)
336 |             name, value = name.strip(), [value.lstrip()]
337 | 
338 |             # Consume value continuation lines
339 |             while len(lines) and lines[0].startswith((" ", "\t")):
340 |                 value.append(lines.pop(0))
341 |             value = ''.join(value).rstrip()
342 | 
343 |             # multiple headers
344 |             if name in self._headers:
345 |                 value = "%s, %s" % (self._headers[name], value)
346 | 
347 |             # store new header value
348 |             self._headers[name] = value
349 | 
350 |             # update WSGI environ
351 |             key =  'HTTP_%s' % name.upper().replace('-','_')
352 |             self._environ[key] = value
353 | 
354 |         # detect now if body is sent by chunks.
355 |         clen = self._headers.get('content-length')
356 |         te = self._headers.get('transfer-encoding', '').lower()
357 | 
358 |         if clen is not None:
359 |             try:
360 |                 self._clen_rest = self._clen = int(clen)
361 |             except ValueError:
362 |                 pass
363 |         else:
364 |             self._chunked = (te == 'chunked')
365 |             if not self._chunked:
366 |                 self._clen_rest = sys.maxint
367 | 
368 |         # detect encoding and set decompress object
369 |         encoding = self._headers.get('content-encoding')
370 |         if encoding == "gzip":
371 |             self.__decompress_obj = zlib.decompressobj(16+zlib.MAX_WBITS)
372 |         elif encoding == "deflate":
373 |             self.__decompress_obj = zlib.decompressobj()
374 | 
375 |         rest = data[idx+4:]
376 |         self._buf = [rest]
377 |         self.__on_headers_complete = True
378 |         return len(rest)
379 | 
380 |     def _parse_body(self):
381 |         if not self._chunked:
382 |             body_part = b("").join(self._buf)
383 |             self._clen_rest -= len(body_part)
384 | 
385 |             # maybe decompress
386 |             if self.__decompress_obj is not None:
387 |                 body_part = self.__decompress_obj.decompress(body_part)
388 | 
389 |             self._partial_body = True
390 |             self._body.append(body_part)
391 |             self._buf = []
392 | 
393 |             if self._clen_rest <= 0:
394 |                 self.__on_message_complete = True
395 |             return
396 |         else:
397 |             data = b("").join(self._buf)
398 |             try:
399 | 
400 |                 size, rest = self._parse_chunk_size(data)
401 |             except InvalidChunkSize, e:
402 |                 self.errno = INVALID_CHUNK
403 |                 self.errstr = "invalid chunk size [%s]" % str(e)
404 |                 return -1
405 | 
406 |             if size == 0:
407 |                 return size
408 | 
409 |             if size is None or len(rest) < size:
410 |                 return None
411 | 
412 | 
413 |             body_part, rest = rest[:size], rest[size:]
414 |             if len(rest) < 2:
415 |                 self.errno = INVALID_CHUNK
416 |                 self.errstr = "chunk missing terminator [%s]" % data
417 |                 return -1
418 | 
419 |             # maybe decompress
420 |             if self.__decompress_obj is not None:
421 |                 body_part = self.__decompress_obj.decompress(body_part)
422 | 
423 |             self._partial_body = True
424 |             self._body.append(body_part)
425 | 
426 |             self._buf = [rest[2:]]
427 |             return len(rest)
428 | 
429 |     def _parse_chunk_size(self, data):
430 |         idx = data.find(b("\r\n"))
431 |         if idx < 0:
432 |             return None, None
433 |         line, rest_chunk = data[:idx], data[idx+2:]
434 |         chunk_size = line.split(b(";"), 1)[0].strip()
435 |         try:
436 |             chunk_size = int(chunk_size, 16)
437 |         except ValueError:
438 |             raise InvalidChunkSize(chunk_size)
439 | 
440 |         if chunk_size == 0:
441 |             self._parse_trailers(rest_chunk)
442 |             return 0, None
443 |         return chunk_size, rest_chunk
444 | 
445 |     def _parse_trailers(self, data):
446 |         idx = data.find(b("\r\n\r\n"))
447 | 
448 |         if data[:2] == b("\r\n"):
449 |             self._trailers = self._parse_headers(data[:idx])
450 | 


--------------------------------------------------------------------------------
/http_parser/py25.py:
--------------------------------------------------------------------------------
   1 | # -*- coding: utf-8 -
   2 | #
   3 | # This file is part of http-parser released under the MIT license. 
   4 | # See the NOTICE for more information.
   5 | 
   6 | 
   7 | 
   8 | import array
   9 | import codecs
  10 | from UserDict import DictMixin
  11 | try:
  12 |     from thread import allocate_lock as Lock
  13 | except ImportError:
  14 |     from dummy_thread import allocate_lock as Lock
  15 | 
  16 | # open() uses st_blksize whenever we can
  17 | DEFAULT_BUFFER_SIZE = 8 * 1024  # bytes
  18 | 
  19 | bytes = str
  20 | # bytearray is a mutable type that is easily turned into an instance of
  21 | # bytes
  22 | class bytearray(list):
  23 | 
  24 |     def __init__(self, i=0):
  25 |         if isinstance(i, int):
  26 |             list.__init__(self, [' '] * i)
  27 |         else:
  28 |             list.__init__(self, i)
  29 | 
  30 |     # for bytes(bytearray()) usage
  31 |     def __str__(self): return ''.join(self)
  32 |     # append automatically converts integers to characters
  33 |     def append(self, item):
  34 |         if isinstance(item, str):
  35 |             list.append(self, item)
  36 |         else:
  37 |             list.append(self, chr(item))
  38 |     # +=
  39 |     def __iadd__(self, other):
  40 |         for byte in other:
  41 |             self.append(byte)
  42 |         return self
  43 | 
  44 |     def endswith(self, c):
  45 |         return self[-1] == c
  46 | 
  47 |     def startswith(self, c):
  48 |         return self[0] == c
  49 | 
  50 | 
  51 | class IOrderedDict(dict, DictMixin):
  52 | 
  53 | 
  54 |     def __init__(self, *args, **kwds):
  55 |         if len(args) > 1:
  56 |             raise TypeError('expected at most 1 arguments, got %d' % len(args))
  57 |         try:
  58 |             self.__end
  59 |         except AttributeError:
  60 |             self.clear()
  61 |         self.update(*args, **kwds)
  62 |         self.__lower = {}
  63 | 
  64 |     def get(self, key, default=None):
  65 |         if key in self:
  66 |             return self[key]
  67 |         return default
  68 | 
  69 |     def clear(self):
  70 |         self.__end = end = []
  71 |         end += [None, end, end]         # sentinel node for doubly linked list
  72 |         self.__map = {}                 # key --> [key, prev, next]
  73 |         self.__lower = {}               # key -> key.lower()
  74 |         dict.clear(self)
  75 |         
  76 |     def __setitem__(self, key, value):
  77 |         if key not in self:
  78 |             end = self.__end
  79 |             curr = end[1]
  80 |             curr[2] = end[1] = self.__map[key] = [key, curr, end]
  81 |             self.__lower[key.lower()] = key
  82 |         key = self.__lower[key.lower()]
  83 |         dict.__setitem__(self, key, value)
  84 | 
  85 |     def __delitem__(self, key):
  86 |         if key in self:
  87 |             key = self.__lower.pop(key.lower())
  88 | 
  89 |         dict.__delitem__(self, key)
  90 |         key, prev, next = self.__map.pop(key)
  91 |         prev[2] = next
  92 |         next[1] = prev
  93 | 
  94 |     def __getitem__(self, key, dict_getitem=dict.__getitem__):
  95 |         if key in self:
  96 |             key = self.__lower.get(key.lower())
  97 |         return dict_getitem(self, key)
  98 | 
  99 |     def __contains__(self, key):
 100 |         return key.lower() in self.__lower
 101 | 
 102 |     def __iter__(self):
 103 |         end = self.__end
 104 |         curr = end[2]
 105 |         while curr is not end:
 106 |             yield curr[0]
 107 |             curr = curr[2]
 108 | 
 109 |     def __reversed__(self):
 110 |         end = self.__end
 111 |         curr = end[1]
 112 |         while curr is not end:
 113 |             yield curr[0]
 114 |             curr = curr[1]
 115 | 
 116 |     def popitem(self, last=True):
 117 |         if not self:
 118 |             raise KeyError('dictionary is empty')
 119 |         if last:
 120 |             key = reversed(self).next()
 121 |         else:
 122 |             key = iter(self).next()
 123 |         value = self.pop(key)
 124 |         return key, value
 125 | 
 126 |     def __reduce__(self):
 127 |         items = [[k, self[k]] for k in self]
 128 |         tmp = self.__map, self.__end
 129 |         del self.__map, self.__end
 130 |         inst_dict = vars(self).copy()
 131 |         self.__map, self.__end = tmp
 132 |         if inst_dict:
 133 |             return (self.__class__, (items,), inst_dict)
 134 |         return self.__class__, (items,)
 135 | 
 136 |     def keys(self):
 137 |         return list(self)
 138 | 
 139 |     setdefault = DictMixin.setdefault
 140 |     update = DictMixin.update
 141 |     pop = DictMixin.pop
 142 |     values = DictMixin.values
 143 |     items = DictMixin.items
 144 |     iterkeys = DictMixin.iterkeys
 145 |     itervalues = DictMixin.itervalues
 146 |     iteritems = DictMixin.iteritems
 147 | 
 148 |     def __repr__(self):
 149 |         if not self:
 150 |             return '%s()' % (self.__class__.__name__,)
 151 |         return '%s(%r)' % (self.__class__.__name__, self.items())
 152 | 
 153 |     def copy(self):
 154 |         return self.__class__(self)
 155 | 
 156 |     @classmethod
 157 |     def fromkeys(cls, iterable, value=None):
 158 |         d = cls()
 159 |         for key in iterable:
 160 |             d[key] = value
 161 |         return d
 162 | 
 163 |     def __eq__(self, other):
 164 |         if isinstance(other, OrderedDict):
 165 |             if len(self) != len(other):
 166 |                 return False
 167 |             for p, q in  zip(self.items(), other.items()):
 168 |                 if p != q:
 169 |                     return False
 170 |             return True
 171 |         return dict.__eq__(self, other)
 172 | 
 173 |     def __ne__(self, other):
 174 |         return not self == other
 175 | 
 176 | 
 177 | 
 178 | class IOBase(object):
 179 |     
 180 |     ### Flush and close ###
 181 | 
 182 |     def seek(self, pos, whence=0):
 183 |         raise NotImplementedError
 184 | 
 185 |     def tell(self):
 186 |         """Return current stream position."""
 187 |         return self.seek(0, 1)
 188 | 
 189 |     def truncate(self, pos=None):
 190 |         raise NotImplementedError
 191 | 
 192 |     def flush(self):
 193 |         """Flush write buffers, if applicable.
 194 | 
 195 |         This is not implemented for read-only and non-blocking streams.
 196 |         """
 197 |         self._checkClosed()
 198 |         # XXX Should this return the number of bytes written???
 199 | 
 200 |     __closed = False
 201 | 
 202 |     def close(self):
 203 |         """Flush and close the IO object.
 204 | 
 205 |         This method has no effect if the file is already closed.
 206 |         """
 207 |         if not self.__closed:
 208 |             self.flush()
 209 |             self.__closed = True
 210 | 
 211 |     def __del__(self):
 212 |         """Destructor.  Calls close()."""
 213 |         # The try/except block is in case this is called at program
 214 |         # exit time, when it's possible that globals have already been
 215 |         # deleted, and then the close() call might fail.  Since
 216 |         # there's nothing we can do about such failures and they annoy
 217 |         # the end users, we suppress the traceback.
 218 |         try:
 219 |             self.close()
 220 |         except:
 221 |             pass
 222 | 
 223 | 
 224 |     @property
 225 |     def closed(self):
 226 |         """closed: bool.  True iff the file has been closed.
 227 | 
 228 |         For backwards compatibility, this is a property, not a predicate.
 229 |         """
 230 |         return self.__closed
 231 | 
 232 |     def _checkClosed(self, msg=None):
 233 |         """Internal: raise an ValueError if file is closed
 234 |         """
 235 |         if self.closed:
 236 |             raise ValueError("I/O operation on closed file."
 237 |                              if msg is None else msg)
 238 | 
 239 |     ### Context manager ###
 240 | 
 241 |     def __enter__(self):
 242 |         """Context management protocol.  Returns self."""
 243 |         self._checkClosed()
 244 |         return self
 245 | 
 246 |     def __exit__(self, *args):
 247 |         """Context management protocol.  Calls close()"""
 248 |         self.close()
 249 | 
 250 |     
 251 |     def readable(self):
 252 |         """Return whether object was opened for reading.
 253 | 
 254 |         If False, read() will raise IOError.
 255 |         """
 256 |         return False
 257 | 
 258 |     def _checkReadable(self, msg=None):
 259 |         """Internal: raise an IOError if file is not readable
 260 |         """
 261 |         if not self.readable():
 262 |             raise IOError("File or stream is not readable."
 263 |                           if msg is None else msg)
 264 | 
 265 | 
 266 |     ### Readline[s] ###
 267 | 
 268 |     def readline(self, limit=-1):
 269 |         r"""Read and return a line from the stream.
 270 | 
 271 |         If limit is specified, at most limit bytes will be read.
 272 | 
 273 |         The line terminator is always b'\n' for binary files; for text
 274 |         files, the newlines argument to open can be used to select the line
 275 |         terminator(s) recognized.
 276 |         """
 277 |         # For backwards compatibility, a (slowish) readline().
 278 |         if hasattr(self, "peek"):
 279 |             def nreadahead():
 280 |                 readahead = self.peek(1)
 281 |                 if not readahead:
 282 |                     return 1
 283 | 
 284 |                 readahead = "".join(readahead)
 285 |                 n = (readahead.find("\n") + 1) or len(readahead)
 286 |                 if limit >= 0:
 287 |                     n = min(n, limit)
 288 |                 return n
 289 |         else:
 290 |             def nreadahead():
 291 |                 return 1
 292 |         if limit is None:
 293 |             limit = -1
 294 |         elif not isinstance(limit, (int, long)):
 295 |             raise TypeError("limit must be an integer")
 296 |         res = bytearray()
 297 |         while limit < 0 or len(res) < limit:
 298 |             b = self.read(nreadahead())
 299 |             if not b:
 300 |                 break
 301 |             res += b
 302 |             if res.endswith("\n"):
 303 |                 break
 304 |         return bytes(res)
 305 | 
 306 |     def __iter__(self):
 307 |         self._checkClosed()
 308 |         return self
 309 | 
 310 |     def next(self):
 311 |         line = self.readline()
 312 |         if not line:
 313 |             raise StopIteration
 314 |         return line
 315 | 
 316 |     def readlines(self, hint=None):
 317 |         """Return a list of lines from the stream.
 318 | 
 319 |         hint can be specified to control the number of lines read: no more
 320 |         lines will be read if the total size (in bytes/characters) of all
 321 |         lines so far exceeds hint.
 322 |         """
 323 |         if hint is not None and not isinstance(hint, (int, long)):
 324 |             raise TypeError("integer or None expected")
 325 |         if hint is None or hint <= 0:
 326 |             return list(self)
 327 |         n = 0
 328 |         lines = []
 329 |         for line in self:
 330 |             lines.append(line)
 331 |             n += len(line)
 332 |             if n >= hint:
 333 |                 break
 334 |         return lines
 335 | 
 336 | 
 337 | class RawIOBase(IOBase):
 338 | 
 339 |     """Base class for raw binary I/O."""
 340 | 
 341 |     # The read() method is implemented by calling readinto(); derived
 342 |     # classes that want to support read() only need to implement
 343 |     # readinto() as a primitive operation.  In general, readinto() can be
 344 |     # more efficient than read().
 345 | 
 346 |     # (It would be tempting to also provide an implementation of
 347 |     # readinto() in terms of read(), in case the latter is a more suitable
 348 |     # primitive operation, but that would lead to nasty recursion in case
 349 |     # a subclass doesn't implement either.)
 350 | 
 351 | 
 352 |     def read(self, n=-1):
 353 |         """Read and return up to n bytes.
 354 | 
 355 |         Returns an empty bytes object on EOF, or None if the object is
 356 |         set not to block and has no data to read.
 357 |         """
 358 |         if n is None:
 359 |             n = -1
 360 |         if n < 0:
 361 |             return self.readall()
 362 |         b = bytearray(n.__index__())
 363 |         n = self.readinto(b)
 364 |         if n is None:
 365 |             return None
 366 |         del b[n:]
 367 |         return bytes(b)
 368 | 
 369 |     def readall(self):
 370 |         """Read until EOF, using multiple read() call."""
 371 |         res = bytearray()
 372 |         while True:
 373 |             data = self.read(DEFAULT_BUFFER_SIZE)
 374 |             if not data:
 375 |                 break
 376 |             res += data
 377 |         return bytes(res)
 378 | 
 379 |     def readinto(self, b):
 380 |         """Read up to len(b) bytes into b.
 381 | 
 382 |         Returns number of bytes read (0 for EOF), or None if the object
 383 |         is set not to block and has no data to read.
 384 |         """
 385 |         raise NotImplementedError
 386 | 
 387 | class BufferedReader(RawIOBase):
 388 | 
 389 |     def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE):
 390 |         self.raw = raw
 391 |         if buffer_size <= 0:
 392 |             raise ValueError("invalid buffer size")
 393 |         self.buffer_size = buffer_size
 394 |         self._reset_read_buf()
 395 |         self._read_lock = Lock()
 396 | 
 397 |     def _reset_read_buf(self):
 398 |         self._read_buf = ""
 399 |         self._read_pos = 0
 400 | 
 401 |     def read(self, n=None):
 402 |         """Read n bytes.
 403 | 
 404 |         Returns exactly n bytes of data unless the underlying raw IO
 405 |         stream reaches EOF or if the call would block in non-blocking
 406 |         mode. If n is negative, read until EOF or until read() would
 407 |         block.
 408 |         """
 409 |         if n is not None and n < -1:
 410 |             raise ValueError("invalid number of bytes to read")
 411 | 
 412 |         self._read_lock.acquire()
 413 |         try:
 414 |             return self._read_unlocked(n)
 415 |         finally:
 416 |             self._read_lock.release()
 417 | 
 418 |     def readinto(self, b):
 419 |         """Read up to len(b) bytes into b.
 420 | 
 421 |         Like read(), this may issue multiple reads to the underlying raw
 422 |         stream, unless the latter is 'interactive'.
 423 | 
 424 |         Returns the number of bytes read (0 for EOF).
 425 | 
 426 |         Raises BlockingIOError if the underlying raw stream has no
 427 |         data at the moment.
 428 |         """
 429 |         # XXX This ought to work with anything that supports the buffer API
 430 |         data = self.read(len(b))
 431 |         n = len(data)
 432 |         try:
 433 |             b[:n] = data
 434 |         except TypeError, err:
 435 |             import array
 436 |             if not isinstance(b, array.array):
 437 |                 raise err
 438 |             b[:n] = array.array('b', data)
 439 |         return n
 440 | 
 441 |     def _read_unlocked(self, n=None):
 442 |         nodata_val = ""
 443 |         empty_values = ("", None)
 444 |         buf = self._read_buf
 445 |         pos = self._read_pos
 446 | 
 447 |         # Special case for when the number of bytes to read is unspecified.
 448 |         if n is None or n == -1:
 449 |             self._reset_read_buf()
 450 |             chunks = [buf[pos:]]  # Strip the consumed bytes.
 451 |             current_size = 0
 452 |             while True:
 453 |                 # Read until EOF or until read() would block.
 454 |                 chunk = self.raw.read()
 455 |                 if chunk in empty_values:
 456 |                     nodata_val = chunk
 457 |                     break
 458 |                 current_size += len(chunk)
 459 |                 chunks.append(chunk)
 460 |             return "".join(chunks) or nodata_val
 461 | 
 462 |         # The number of bytes to read is specified, return at most n bytes.
 463 |         avail = len(buf) - pos  # Length of the available buffered data.
 464 |         if n <= avail:
 465 |             # Fast path: the data to read is fully buffered.
 466 |             self._read_pos += n
 467 |             return buf[pos:pos+n]
 468 |         # Slow path: read from the stream until enough bytes are read,
 469 |         # or until an EOF occurs or until read() would block.
 470 |         chunks = [buf[pos:]]
 471 |         wanted = max(self.buffer_size, n)
 472 |         while avail < n:
 473 |             chunk = self.raw.read(wanted)
 474 |             if chunk in empty_values:
 475 |                 nodata_val = chunk
 476 |                 break
 477 |             avail += len(chunk)
 478 |             chunks.append(chunk)
 479 |         # n is more then avail only when an EOF occurred or when
 480 |         # read() would have blocked.
 481 |         n = min(n, avail)
 482 |         out = "".join(chunks)
 483 |         self._read_buf = out[n:]  # Save the extra data in the buffer.
 484 |         self._read_pos = 0
 485 |         return out[:n] if out else nodata_val
 486 | 
 487 |     def peek(self, n=0):
 488 |         """Returns buffered bytes without advancing the position.
 489 | 
 490 |         The argument indicates a desired minimal number of bytes; we
 491 |         do at most one raw read to satisfy it.  We never return more
 492 |         than self.buffer_size.
 493 |         """
 494 |         self._read_lock.acquire()
 495 |         try:
 496 |             return self._peek_unlocked(n)
 497 |         finally:
 498 |             self._read_lock.release()
 499 | 
 500 |     def _peek_unlocked(self, n=0):
 501 |         want = min(n, self.buffer_size)
 502 |         have = len(self._read_buf) - self._read_pos
 503 |         if have < want or have <= 0:
 504 |             to_read = self.buffer_size - have
 505 |             current = self.raw.read(to_read)
 506 |             if current:
 507 |                 self._read_buf = self._read_buf[self._read_pos:] + current
 508 |                 self._read_pos = 0
 509 |         return self._read_buf[self._read_pos:]
 510 | 
 511 |     def read1(self, n):
 512 |         """Reads up to n bytes, with at most one read() system call."""
 513 |         # Returns up to n bytes.  If at least one byte is buffered, we
 514 |         # only return buffered bytes.  Otherwise, we do one raw read.
 515 |         if n < 0:
 516 |             raise ValueError("number of bytes to read must be positive")
 517 |         if n == 0:
 518 |             return ""
 519 |         self._read_lock.acquire()
 520 |         try:
 521 |             self._peek_unlocked(1)
 522 |             return self._read_unlocked(
 523 |                 min(n, len(self._read_buf) - self._read_pos))
 524 |         finally:
 525 |             self._read_lock.release()
 526 | 
 527 |     def tell(self):
 528 |         return _BufferedIOMixin.tell(self) - len(self._read_buf) + self._read_pos
 529 | 
 530 |     def seek(self, pos, whence=0):
 531 |         if not (0 <= whence <= 2):
 532 |             raise ValueError("invalid whence value")
 533 |         self._read_lock.acquire()
 534 |         try:
 535 |             if whence == 1:
 536 |                 pos -= len(self._read_buf) - self._read_pos
 537 | 
 538 |             pos = self.raw.seek(pos, whence)
 539 |             if pos < 0:
 540 |                 raise IOError("tell() returned an invalid position")
 541 | 
 542 |             self._reset_read_buf()
 543 |             return pos
 544 |         finally:
 545 |             self._read_lock.release()
 546 | 
 547 |     def truncate(self, pos=None):
 548 |         # Flush the stream.  We're mixing buffered I/O with lower-level I/O,
 549 |         # and a flush may be necessary to synch both views of the current
 550 |         # file state.
 551 |         self.flush()
 552 | 
 553 |         if pos is None:
 554 |             pos = self.tell()
 555 |         # XXX: Should seek() be used, instead of passing the position
 556 |         # XXX  directly to truncate?
 557 |         return self.raw.truncate(pos)
 558 | 
 559 |     ### Flush and close ###
 560 | 
 561 |     def flush(self):
 562 |         if self.closed:
 563 |             raise ValueError("flush of closed file")
 564 |         self.raw.flush()
 565 | 
 566 |     def close(self):
 567 |         if self.raw is not None and not self.closed:
 568 |             self.flush()
 569 |             self.raw.close()
 570 | 
 571 |     def detach(self):
 572 |         if self.raw is None:
 573 |             raise ValueError("raw stream already detached")
 574 |         self.flush()
 575 |         raw = self.raw
 576 |         self.raw = None
 577 |         return raw
 578 | 
 579 |     ### Inquiries ###
 580 | 
 581 |     def seekable(self):
 582 |         return self.raw.seekable()
 583 | 
 584 |     def readable(self):
 585 |         return self.raw.readable()
 586 | 
 587 |     @property
 588 |     def closed(self):
 589 |         return self.raw.closed
 590 | 
 591 | class IncrementalNewlineDecoder(codecs.IncrementalDecoder):
 592 |     r"""Codec used when reading a file in universal newlines mode.  It wraps
 593 |     another incremental decoder, translating \r\n and \r into \n.  It also
 594 |     records the types of newlines encountered.  When used with
 595 |     translate=False, it ensures that the newline sequence is returned in
 596 |     one piece.
 597 |     """
 598 |     def __init__(self, decoder, translate, errors='strict'):
 599 |         codecs.IncrementalDecoder.__init__(self, errors=errors)
 600 |         self.translate = translate
 601 |         self.decoder = decoder
 602 |         self.seennl = 0
 603 |         self.pendingcr = False
 604 | 
 605 |     def decode(self, input, final=False):
 606 |         # decode input (with the eventual \r from a previous pass)
 607 |         if self.decoder is None:
 608 |             output = input
 609 |         else:
 610 |             output = self.decoder.decode(input, final=final)
 611 |         if self.pendingcr and (output or final):
 612 |             output = "\r" + output
 613 |             self.pendingcr = False
 614 | 
 615 |         # retain last \r even when not translating data:
 616 |         # then readline() is sure to get \r\n in one pass
 617 |         if output.endswith("\r") and not final:
 618 |             output = output[:-1]
 619 |             self.pendingcr = True
 620 | 
 621 |         # Record which newlines are read
 622 |         crlf = output.count('\r\n')
 623 |         cr = output.count('\r') - crlf
 624 |         lf = output.count('\n') - crlf
 625 |         self.seennl |= (lf and self._LF) | (cr and self._CR) \
 626 |                     | (crlf and self._CRLF)
 627 | 
 628 |         if self.translate:
 629 |             if crlf:
 630 |                 output = output.replace("\r\n", "\n")
 631 |             if cr:
 632 |                 output = output.replace("\r", "\n")
 633 | 
 634 |         return output
 635 | 
 636 |     def getstate(self):
 637 |         if self.decoder is None:
 638 |             buf = ""
 639 |             flag = 0
 640 |         else:
 641 |             buf, flag = self.decoder.getstate()
 642 |         flag <<= 1
 643 |         if self.pendingcr:
 644 |             flag |= 1
 645 |         return buf, flag
 646 | 
 647 |     def setstate(self, state):
 648 |         buf, flag = state
 649 |         self.pendingcr = bool(flag & 1)
 650 |         if self.decoder is not None:
 651 |             self.decoder.setstate((buf, flag >> 1))
 652 | 
 653 |     def reset(self):
 654 |         self.seennl = 0
 655 |         self.pendingcr = False
 656 |         if self.decoder is not None:
 657 |             self.decoder.reset()
 658 | 
 659 |     _LF = 1
 660 |     _CR = 2
 661 |     _CRLF = 4
 662 | 
 663 |     @property
 664 |     def newlines(self):
 665 |         return (None,
 666 |                 "\n",
 667 |                 "\r",
 668 |                 ("\r", "\n"),
 669 |                 "\r\n",
 670 |                 ("\n", "\r\n"),
 671 |                 ("\r", "\r\n"),
 672 |                 ("\r", "\n", "\r\n")
 673 |                )[self.seennl]
 674 | 
 675 | 
 676 | 
 677 | class TextIOWrapper(IOBase):
 678 | 
 679 |     _CHUNK_SIZE = 2048
 680 | 
 681 |     def __init__(self, buffer, encoding=None, errors=None, newline=None,
 682 |                  line_buffering=False):
 683 |         if newline is not None and not isinstance(newline, basestring):
 684 |             raise TypeError("illegal newline type: %r" % (type(newline),))
 685 |         if newline not in (None, "", "\n", "\r", "\r\n"):
 686 |             raise ValueError("illegal newline value: %r" % (newline,))
 687 |         if encoding is None:
 688 |             try:
 689 |                 import locale
 690 |             except ImportError:
 691 |                 # Importing locale may fail if Python is being built
 692 |                 encoding = "ascii"
 693 |             else:
 694 |                 encoding = locale.getpreferredencoding()
 695 | 
 696 |         if not isinstance(encoding, basestring):
 697 |             raise ValueError("invalid encoding: %r" % encoding)
 698 | 
 699 |         if errors is None:
 700 |             errors = "strict"
 701 |         else:
 702 |             if not isinstance(errors, basestring):
 703 |                 raise ValueError("invalid errors: %r" % errors)
 704 | 
 705 |         self.buffer = buffer
 706 |         self._line_buffering = line_buffering
 707 |         self._encoding = encoding
 708 |         self._errors = errors
 709 |         self._readuniversal = not newline
 710 |         self._readtranslate = newline is None
 711 |         self._readnl = newline
 712 |         self._writetranslate = newline != ''
 713 |         self._writenl = newline or os.linesep
 714 |         self._encoder = None
 715 |         self._decoder = None
 716 |         self._decoded_chars = ''  # buffer for text returned from decoder
 717 |         self._decoded_chars_used = 0  # offset into _decoded_chars for read()
 718 |         self._snapshot = None  # info for reconstructing decoder state
 719 |         self._seekable = self._telling = self.buffer.seekable()
 720 | 
 721 |         
 722 |     @property
 723 |     def encoding(self):
 724 |         return self._encoding
 725 | 
 726 |     @property
 727 |     def errors(self):
 728 |         return self._errors
 729 | 
 730 |     @property
 731 |     def line_buffering(self):
 732 |         return self._line_buffering
 733 | 
 734 |     def seekable(self):
 735 |         return self._seekable
 736 | 
 737 |     def readable(self):
 738 |         return self.buffer.readable()
 739 | 
 740 |     def flush(self):
 741 |         self.buffer.flush()
 742 |         self._telling = self._seekable
 743 | 
 744 |     def close(self):
 745 |         if self.buffer is not None and not self.closed:
 746 |             self.flush()
 747 |             self.buffer.close()
 748 | 
 749 |     @property
 750 |     def closed(self):
 751 |         return self.buffer.closed
 752 | 
 753 |         if self.closed:
 754 |             raise ValueError("write to closed file")
 755 |         if not isinstance(s, unicode):
 756 |             raise TypeError("can't write %s to text stream" %
 757 |                             s.__class__.__name__)
 758 |         length = len(s)
 759 |         haslf = (self._writetranslate or self._line_buffering) and "\n" in s
 760 |         if haslf and self._writetranslate and self._writenl != "\n":
 761 |             s = s.replace("\n", self._writenl)
 762 |         encoder = self._encoder or self._get_encoder()
 763 |         # XXX What if we were just reading?
 764 |         b = encoder.encode(s)
 765 |         self.buffer.write(b)
 766 |         if self._line_buffering and (haslf or "\r" in s):
 767 |             self.flush()
 768 |         self._snapshot = None
 769 |         if self._decoder:
 770 |             self._decoder.reset()
 771 |         return length
 772 | 
 773 |     def _get_encoder(self):
 774 |         make_encoder = codecs.getincrementalencoder(self._encoding)
 775 |         self._encoder = make_encoder(self._errors)
 776 |         return self._encoder
 777 | 
 778 |     def _get_decoder(self):
 779 |         make_decoder = codecs.getincrementaldecoder(self._encoding)
 780 |         decoder = make_decoder(self._errors)
 781 |         if self._readuniversal:
 782 |             decoder = IncrementalNewlineDecoder(decoder, self._readtranslate)
 783 |         self._decoder = decoder
 784 |         return decoder
 785 | 
 786 |     # The following three methods implement an ADT for _decoded_chars.
 787 |     # Text returned from the decoder is buffered here until the client
 788 |     # requests it by calling our read() or readline() method.
 789 |     def _set_decoded_chars(self, chars):
 790 |         """Set the _decoded_chars buffer."""
 791 |         self._decoded_chars = chars
 792 |         self._decoded_chars_used = 0
 793 | 
 794 |     def _get_decoded_chars(self, n=None):
 795 |         """Advance into the _decoded_chars buffer."""
 796 |         offset = self._decoded_chars_used
 797 |         if n is None:
 798 |             chars = self._decoded_chars[offset:]
 799 |         else:
 800 |             chars = self._decoded_chars[offset:offset + n]
 801 |         self._decoded_chars_used += len(chars)
 802 |         return chars
 803 | 
 804 |     def _rewind_decoded_chars(self, n):
 805 |         """Rewind the _decoded_chars buffer."""
 806 |         if self._decoded_chars_used < n:
 807 |             raise AssertionError("rewind decoded_chars out of bounds")
 808 |         self._decoded_chars_used -= n
 809 | 
 810 |     def _read_chunk(self):
 811 |         """
 812 |         Read and decode the next chunk of data from the BufferedReader.
 813 |         """
 814 | 
 815 |         # The return value is True unless EOF was reached.  The decoded
 816 |         # string is placed in self._decoded_chars (replacing its previous
 817 |         # value).  The entire input chunk is sent to the decoder, though
 818 |         # some of it may remain buffered in the decoder, yet to be
 819 |         # converted.
 820 | 
 821 |         if self._decoder is None:
 822 |             raise ValueError("no decoder")
 823 | 
 824 |         if self._telling:
 825 |             # To prepare for tell(), we need to snapshot a point in the
 826 |             # file where the decoder's input buffer is empty.
 827 | 
 828 |             dec_buffer, dec_flags = self._decoder.getstate()
 829 |             # Given this, we know there was a valid snapshot point
 830 |             # len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
 831 | 
 832 |         # Read a chunk, decode it, and put the result in self._decoded_chars.
 833 |         input_chunk = self.buffer.read1(self._CHUNK_SIZE)
 834 |         eof = not input_chunk
 835 |         self._set_decoded_chars(self._decoder.decode(input_chunk, eof))
 836 | 
 837 |         if self._telling:
 838 |             # At the snapshot point, len(dec_buffer) bytes before the read,
 839 |             # the next input to be decoded is dec_buffer + input_chunk.
 840 |             self._snapshot = (dec_flags, dec_buffer + input_chunk)
 841 | 
 842 |         return not eof
 843 | 
 844 |     def _pack_cookie(self, position, dec_flags=0,
 845 |                            bytes_to_feed=0, need_eof=0, chars_to_skip=0):
 846 |         # The meaning of a tell() cookie is: seek to position, set the
 847 |         # decoder flags to dec_flags, read bytes_to_feed bytes, feed them
 848 |         # into the decoder with need_eof as the EOF flag, then skip
 849 |         # chars_to_skip characters of the decoded result.  For most simple
 850 |         # decoders, tell() will often just give a byte offset in the file.
 851 |         return (position | (dec_flags<<64) | (bytes_to_feed<<128) |
 852 |                (chars_to_skip<<192) | bool(need_eof)<<256)
 853 | 
 854 |     def _unpack_cookie(self, bigint):
 855 |         rest, position = divmod(bigint, 1<<64)
 856 |         rest, dec_flags = divmod(rest, 1<<64)
 857 |         rest, bytes_to_feed = divmod(rest, 1<<64)
 858 |         need_eof, chars_to_skip = divmod(rest, 1<<64)
 859 |         return position, dec_flags, bytes_to_feed, need_eof, chars_to_skip
 860 | 
 861 |     def tell(self):
 862 |         if not self._seekable:
 863 |             raise IOError("underlying stream is not seekable")
 864 |         if not self._telling:
 865 |             raise IOError("telling position disabled by next() call")
 866 |         self.flush()
 867 |         position = self.buffer.tell()
 868 |         decoder = self._decoder
 869 |         if decoder is None or self._snapshot is None:
 870 |             if self._decoded_chars:
 871 |                 # This should never happen.
 872 |                 raise AssertionError("pending decoded text")
 873 |             return position
 874 | 
 875 |         # Skip backward to the snapshot point (see _read_chunk).
 876 |         dec_flags, next_input = self._snapshot
 877 |         position -= len(next_input)
 878 | 
 879 |         # How many decoded characters have been used up since the snapshot?
 880 |         chars_to_skip = self._decoded_chars_used
 881 |         if chars_to_skip == 0:
 882 |             # We haven't moved from the snapshot point.
 883 |             return self._pack_cookie(position, dec_flags)
 884 | 
 885 |         # Starting from the snapshot position, we will walk the decoder
 886 |         # forward until it gives us enough decoded characters.
 887 |         saved_state = decoder.getstate()
 888 |         try:
 889 |             # Note our initial start point.
 890 |             decoder.setstate(('', dec_flags))
 891 |             start_pos = position
 892 |             start_flags, bytes_fed, chars_decoded = dec_flags, 0, 0
 893 |             need_eof = 0
 894 | 
 895 |             # Feed the decoder one byte at a time.  As we go, note the
 896 |             # nearest "safe start point" before the current location
 897 |             # (a point where the decoder has nothing buffered, so seek()
 898 |             # can safely start from there and advance to this location).
 899 |             for next_byte in next_input:
 900 |                 bytes_fed += 1
 901 |                 chars_decoded += len(decoder.decode(next_byte))
 902 |                 dec_buffer, dec_flags = decoder.getstate()
 903 |                 if not dec_buffer and chars_decoded <= chars_to_skip:
 904 |                     # Decoder buffer is empty, so this is a safe start point.
 905 |                     start_pos += bytes_fed
 906 |                     chars_to_skip -= chars_decoded
 907 |                     start_flags, bytes_fed, chars_decoded = dec_flags, 0, 0
 908 |                 if chars_decoded >= chars_to_skip:
 909 |                     break
 910 |             else:
 911 |                 # We didn't get enough decoded data; signal EOF to get more.
 912 |                 chars_decoded += len(decoder.decode('', final=True))
 913 |                 need_eof = 1
 914 |                 if chars_decoded < chars_to_skip:
 915 |                     raise IOError("can't reconstruct logical file position")
 916 | 
 917 |             # The returned cookie corresponds to the last safe start point.
 918 |             return self._pack_cookie(
 919 |                 start_pos, start_flags, bytes_fed, need_eof, chars_to_skip)
 920 |         finally:
 921 |             decoder.setstate(saved_state)
 922 | 
 923 |     def truncate(self, pos=None):
 924 |         self.flush()
 925 |         if pos is None:
 926 |             pos = self.tell()
 927 |         return self.buffer.truncate(pos)
 928 | 
 929 |     def detach(self):
 930 |         if self.buffer is None:
 931 |             raise ValueError("buffer is already detached")
 932 |         self.flush()
 933 |         buffer = self.buffer
 934 |         self.buffer = None
 935 |         return buffer
 936 | 
 937 |     def seek(self, cookie, whence=0):
 938 |         if self.closed:
 939 |             raise ValueError("tell on closed file")
 940 |         if not self._seekable:
 941 |             raise IOError("underlying stream is not seekable")
 942 |         if whence == 1: # seek relative to current position
 943 |             if cookie != 0:
 944 |                 raise IOError("can't do nonzero cur-relative seeks")
 945 |             # Seeking to the current position should attempt to
 946 |             # sync the underlying buffer with the current position.
 947 |             whence = 0
 948 |             cookie = self.tell()
 949 |         if whence == 2: # seek relative to end of file
 950 |             if cookie != 0:
 951 |                 raise IOError("can't do nonzero end-relative seeks")
 952 |             self.flush()
 953 |             position = self.buffer.seek(0, 2)
 954 |             self._set_decoded_chars('')
 955 |             self._snapshot = None
 956 |             if self._decoder:
 957 |                 self._decoder.reset()
 958 |             return position
 959 |         if whence != 0:
 960 |             raise ValueError("invalid whence (%r, should be 0, 1 or 2)" %
 961 |                              (whence,))
 962 |         if cookie < 0:
 963 |             raise ValueError("negative seek position %r" % (cookie,))
 964 |         self.flush()
 965 | 
 966 |         # The strategy of seek() is to go back to the safe start point
 967 |         # and replay the effect of read(chars_to_skip) from there.
 968 |         start_pos, dec_flags, bytes_to_feed, need_eof, chars_to_skip = \
 969 |             self._unpack_cookie(cookie)
 970 | 
 971 |         # Seek back to the safe start point.
 972 |         self.buffer.seek(start_pos)
 973 |         self._set_decoded_chars('')
 974 |         self._snapshot = None
 975 | 
 976 |         # Restore the decoder to its state from the safe start point.
 977 |         if cookie == 0 and self._decoder:
 978 |             self._decoder.reset()
 979 |         elif self._decoder or dec_flags or chars_to_skip:
 980 |             self._decoder = self._decoder or self._get_decoder()
 981 |             self._decoder.setstate(('', dec_flags))
 982 |             self._snapshot = (dec_flags, '')
 983 | 
 984 |         if chars_to_skip:
 985 |             # Just like _read_chunk, feed the decoder and save a snapshot.
 986 |             input_chunk = self.buffer.read(bytes_to_feed)
 987 |             self._set_decoded_chars(
 988 |                 self._decoder.decode(input_chunk, need_eof))
 989 |             self._snapshot = (dec_flags, input_chunk)
 990 | 
 991 |             # Skip chars_to_skip of the decoded characters.
 992 |             if len(self._decoded_chars) < chars_to_skip:
 993 |                 raise IOError("can't restore logical file position")
 994 |             self._decoded_chars_used = chars_to_skip
 995 | 
 996 |         # Finally, reset the encoder (merely useful for proper BOM handling)
 997 |         try:
 998 |             encoder = self._encoder or self._get_encoder()
 999 |         except LookupError:
1000 |             # Sometimes the encoder doesn't exist
1001 |             pass
1002 |         else:
1003 |             if cookie != 0:
1004 |                 encoder.setstate(0)
1005 |             else:
1006 |                 encoder.reset()
1007 |         return cookie
1008 | 
1009 |     def read(self, n=None):
1010 |         self._checkReadable()
1011 |         if n is None:
1012 |             n = -1
1013 |         decoder = self._decoder or self._get_decoder()
1014 |         try:
1015 |             n.__index__
1016 |         except AttributeError:
1017 |             raise TypeError("an integer is required")
1018 |         if n < 0:
1019 |             # Read everything.
1020 |             result = (self._get_decoded_chars() +
1021 |                       decoder.decode(self.buffer.read(), final=True))
1022 |             self._set_decoded_chars('')
1023 |             self._snapshot = None
1024 |             return result
1025 |         else:
1026 |             # Keep reading chunks until we have n characters to return.
1027 |             eof = False
1028 |             result = self._get_decoded_chars(n)
1029 |             while len(result) < n and not eof:
1030 |                 eof = not self._read_chunk()
1031 |                 result += self._get_decoded_chars(n - len(result))
1032 |             return result
1033 | 
1034 |     def next(self):
1035 |         self._telling = False
1036 |         line = self.readline()
1037 |         if not line:
1038 |             self._snapshot = None
1039 |             self._telling = self._seekable
1040 |             raise StopIteration
1041 |         return line
1042 | 
1043 |     def readline(self, limit=None):
1044 |         if self.closed:
1045 |             raise ValueError("read from closed file")
1046 |         if limit is None:
1047 |             limit = -1
1048 |         elif not isinstance(limit, (int, long)):
1049 |             raise TypeError("limit must be an integer")
1050 | 
1051 |         # Grab all the decoded text (we will rewind any extra bits later).
1052 |         line = self._get_decoded_chars()
1053 | 
1054 |         start = 0
1055 |         # Make the decoder if it doesn't already exist.
1056 |         if not self._decoder:
1057 |             self._get_decoder()
1058 | 
1059 |         pos = endpos = None
1060 |         while True:
1061 |             if self._readtranslate:
1062 |                 # Newlines are already translated, only search for \n
1063 |                 pos = line.find('\n', start)
1064 |                 if pos >= 0:
1065 |                     endpos = pos + 1
1066 |                     break
1067 |                 else:
1068 |                     start = len(line)
1069 | 
1070 |             elif self._readuniversal:
1071 |                 # Universal newline search. Find any of \r, \r\n, \n
1072 |                 # The decoder ensures that \r\n are not split in two pieces
1073 | 
1074 |                 # In C we'd look for these in parallel of course.
1075 |                 nlpos = line.find("\n", start)
1076 |                 crpos = line.find("\r", start)
1077 |                 if crpos == -1:
1078 |                     if nlpos == -1:
1079 |                         # Nothing found
1080 |                         start = len(line)
1081 |                     else:
1082 |                         # Found \n
1083 |                         endpos = nlpos + 1
1084 |                         break
1085 |                 elif nlpos == -1:
1086 |                     # Found lone \r
1087 |                     endpos = crpos + 1
1088 |                     break
1089 |                 elif nlpos < crpos:
1090 |                     # Found \n
1091 |                     endpos = nlpos + 1
1092 |                     break
1093 |                 elif nlpos == crpos + 1:
1094 |                     # Found \r\n
1095 |                     endpos = crpos + 2
1096 |                     break
1097 |                 else:
1098 |                     # Found \r
1099 |                     endpos = crpos + 1
1100 |                     break
1101 |             else:
1102 |                 # non-universal
1103 |                 pos = line.find(self._readnl)
1104 |                 if pos >= 0:
1105 |                     endpos = pos + len(self._readnl)
1106 |                     break
1107 | 
1108 |             if limit >= 0 and len(line) >= limit:
1109 |                 endpos = limit  # reached length limit
1110 |                 break
1111 | 
1112 |             # No line ending seen yet - get more data'
1113 |             while self._read_chunk():
1114 |                 if self._decoded_chars:
1115 |                     break
1116 |             if self._decoded_chars:
1117 |                 line += self._get_decoded_chars()
1118 |             else:
1119 |                 # end of file
1120 |                 self._set_decoded_chars('')
1121 |                 self._snapshot = None
1122 |                 return line
1123 | 
1124 |         if limit >= 0 and endpos > limit:
1125 |             endpos = limit  # don't exceed limit
1126 | 
1127 |         # Rewind _decoded_chars to just after the line ending we found.
1128 |         self._rewind_decoded_chars(len(line) - endpos)
1129 |         return line[:endpos]
1130 | 
1131 |     @property
1132 |     def newlines(self):
1133 |         return self._decoder.newlines if self._decoder else None
1134 | 
1135 | 


--------------------------------------------------------------------------------
/http_parser/http_parser.c:
--------------------------------------------------------------------------------
   1 | /* Based on src/http/ngx_http_parse.c from NGINX copyright Igor Sysoev
   2 |  *
   3 |  * Additional changes are licensed under the same terms as NGINX and
   4 |  * copyright Joyent, Inc. and other Node contributors. All rights reserved.
   5 |  *
   6 |  * Permission is hereby granted, free of charge, to any person obtaining a copy
   7 |  * of this software and associated documentation files (the "Software"), to
   8 |  * deal in the Software without restriction, including without limitation the
   9 |  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
  10 |  * sell copies of the Software, and to permit persons to whom the Software is
  11 |  * furnished to do so, subject to the following conditions:
  12 |  *
  13 |  * The above copyright notice and this permission notice shall be included in
  14 |  * all copies or substantial portions of the Software.
  15 |  *
  16 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  19 |  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  21 |  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  22 |  * IN THE SOFTWARE.
  23 |  */
  24 | #include <http_parser.h>
  25 | #include <assert.h>
  26 | #include <stddef.h>
  27 | #include <ctype.h>
  28 | #include <stdlib.h>
  29 | 
  30 | 
  31 | #ifndef MIN
  32 | # define MIN(a,b) ((a) < (b) ? (a) : (b))
  33 | #endif
  34 | 
  35 | 
  36 | #if HTTP_PARSER_DEBUG
  37 | #define SET_ERRNO(e)                                                 \
  38 | do {                                                                 \
  39 |   parser->http_errno = (e);                                          \
  40 |   parser->error_lineno = __LINE__;                                   \
  41 | } while (0)
  42 | #else
  43 | #define SET_ERRNO(e)                                                 \
  44 | do {                                                                 \
  45 |   parser->http_errno = (e);                                          \
  46 | } while(0)
  47 | #endif
  48 | 
  49 | 
  50 | /* Run the notify callback FOR, returning ER if it fails */
  51 | #define CALLBACK_NOTIFY_(FOR, ER)                                    \
  52 | do {                                                                 \
  53 |   assert(HTTP_PARSER_ERRNO(parser) == HPE_OK);                       \
  54 |                                                                      \
  55 |   if (settings->on_##FOR) {                                          \
  56 |     if (0 != settings->on_##FOR(parser)) {                           \
  57 |       SET_ERRNO(HPE_CB_##FOR);                                       \
  58 |     }                                                                \
  59 |                                                                      \
  60 |     /* We either errored above or got paused; get out */             \
  61 |     if (HTTP_PARSER_ERRNO(parser) != HPE_OK) {                       \
  62 |       return (ER);                                                   \
  63 |     }                                                                \
  64 |   }                                                                  \
  65 | } while (0)
  66 | 
  67 | /* Run the notify callback FOR and consume the current byte */
  68 | #define CALLBACK_NOTIFY(FOR)            CALLBACK_NOTIFY_(FOR, p - data + 1)
  69 | 
  70 | /* Run the notify callback FOR and don't consume the current byte */
  71 | #define CALLBACK_NOTIFY_NOADVANCE(FOR)  CALLBACK_NOTIFY_(FOR, p - data)
  72 | 
  73 | /* Run data callback FOR with LEN bytes, returning ER if it fails */
  74 | #define CALLBACK_DATA_(FOR, LEN, ER)                                 \
  75 | do {                                                                 \
  76 |   assert(HTTP_PARSER_ERRNO(parser) == HPE_OK);                       \
  77 |                                                                      \
  78 |   if (FOR##_mark) {                                                  \
  79 |     if (settings->on_##FOR) {                                        \
  80 |       if (0 != settings->on_##FOR(parser, FOR##_mark, (LEN))) {      \
  81 |         SET_ERRNO(HPE_CB_##FOR);                                     \
  82 |       }                                                              \
  83 |                                                                      \
  84 |       /* We either errored above or got paused; get out */           \
  85 |       if (HTTP_PARSER_ERRNO(parser) != HPE_OK) {                     \
  86 |         return (ER);                                                 \
  87 |       }                                                              \
  88 |     }                                                                \
  89 |     FOR##_mark = NULL;                                               \
  90 |   }                                                                  \
  91 | } while (0)
  92 |   
  93 | /* Run the data callback FOR and consume the current byte */
  94 | #define CALLBACK_DATA(FOR)                                           \
  95 |     CALLBACK_DATA_(FOR, p - FOR##_mark, p - data + 1)
  96 | 
  97 | /* Run the data callback FOR and don't consume the current byte */
  98 | #define CALLBACK_DATA_NOADVANCE(FOR)                                 \
  99 |     CALLBACK_DATA_(FOR, p - FOR##_mark, p - data)
 100 | 
 101 | /* Set the mark FOR; non-destructive if mark is already set */
 102 | #define MARK(FOR)                                                    \
 103 | do {                                                                 \
 104 |   if (!FOR##_mark) {                                                 \
 105 |     FOR##_mark = p;                                                  \
 106 |   }                                                                  \
 107 | } while (0)
 108 | 
 109 | 
 110 | #define PROXY_CONNECTION "proxy-connection"
 111 | #define CONNECTION "connection"
 112 | #define CONTENT_LENGTH "content-length"
 113 | #define TRANSFER_ENCODING "transfer-encoding"
 114 | #define UPGRADE "upgrade"
 115 | #define CHUNKED "chunked"
 116 | #define KEEP_ALIVE "keep-alive"
 117 | #define CLOSE "close"
 118 | 
 119 | 
 120 | static const char *method_strings[] =
 121 |   { "DELETE"
 122 |   , "GET"
 123 |   , "HEAD"
 124 |   , "POST"
 125 |   , "PUT"
 126 |   , "CONNECT"
 127 |   , "OPTIONS"
 128 |   , "TRACE"
 129 |   , "COPY"
 130 |   , "LOCK"
 131 |   , "MKCOL"
 132 |   , "MOVE"
 133 |   , "PROPFIND"
 134 |   , "PROPPATCH"
 135 |   , "UNLOCK"
 136 |   , "REPORT"
 137 |   , "MKACTIVITY"
 138 |   , "CHECKOUT"
 139 |   , "MERGE"
 140 |   , "M-SEARCH"
 141 |   , "NOTIFY"
 142 |   , "SUBSCRIBE"
 143 |   , "UNSUBSCRIBE"
 144 |   , "PATCH"
 145 |   };
 146 | 
 147 | 
 148 | /* Tokens as defined by rfc 2616. Also lowercases them.
 149 |  *        token       = 1*<any CHAR except CTLs or separators>
 150 |  *     separators     = "(" | ")" | "<" | ">" | "@"
 151 |  *                    | "," | ";" | ":" | "\" | <">
 152 |  *                    | "/" | "[" | "]" | "?" | "="
 153 |  *                    | "{" | "}" | SP | HT
 154 |  */
 155 | static const char tokens[256] = {
 156 | /*   0 nul    1 soh    2 stx    3 etx    4 eot    5 enq    6 ack    7 bel  */
 157 |         0,       0,       0,       0,       0,       0,       0,       0,
 158 | /*   8 bs     9 ht    10 nl    11 vt    12 np    13 cr    14 so    15 si   */
 159 |         0,       0,       0,       0,       0,       0,       0,       0,
 160 | /*  16 dle   17 dc1   18 dc2   19 dc3   20 dc4   21 nak   22 syn   23 etb */
 161 |         0,       0,       0,       0,       0,       0,       0,       0,
 162 | /*  24 can   25 em    26 sub   27 esc   28 fs    29 gs    30 rs    31 us  */
 163 |         0,       0,       0,       0,       0,       0,       0,       0,
 164 | /*  32 sp    33  !    34  "    35  #    36  $    37  %    38  &    39  '  */
 165 |         0,      '!',      0,      '#',     '$',     '%',     '&',    '\'',
 166 | /*  40  (    41  )    42  *    43  +    44  ,    45  -    46  .    47  /  */
 167 |         0,       0,      '*',     '+',      0,      '-',     '.',      0,
 168 | /*  48  0    49  1    50  2    51  3    52  4    53  5    54  6    55  7  */
 169 |        '0',     '1',     '2',     '3',     '4',     '5',     '6',     '7',
 170 | /*  56  8    57  9    58  :    59  ;    60  <    61  =    62  >    63  ?  */
 171 |        '8',     '9',      0,       0,       0,       0,       0,       0,
 172 | /*  64  @    65  A    66  B    67  C    68  D    69  E    70  F    71  G  */
 173 |         0,      'a',     'b',     'c',     'd',     'e',     'f',     'g',
 174 | /*  72  H    73  I    74  J    75  K    76  L    77  M    78  N    79  O  */
 175 |        'h',     'i',     'j',     'k',     'l',     'm',     'n',     'o',
 176 | /*  80  P    81  Q    82  R    83  S    84  T    85  U    86  V    87  W  */
 177 |        'p',     'q',     'r',     's',     't',     'u',     'v',     'w',
 178 | /*  88  X    89  Y    90  Z    91  [    92  \    93  ]    94  ^    95  _  */
 179 |        'x',     'y',     'z',      0,       0,       0,      '^',     '_',
 180 | /*  96  `    97  a    98  b    99  c   100  d   101  e   102  f   103  g  */
 181 |        '`',     'a',     'b',     'c',     'd',     'e',     'f',     'g',
 182 | /* 104  h   105  i   106  j   107  k   108  l   109  m   110  n   111  o  */
 183 |        'h',     'i',     'j',     'k',     'l',     'm',     'n',     'o',
 184 | /* 112  p   113  q   114  r   115  s   116  t   117  u   118  v   119  w  */
 185 |        'p',     'q',     'r',     's',     't',     'u',     'v',     'w',
 186 | /* 120  x   121  y   122  z   123  {   124  |   125  }   126  ~   127 del */
 187 |        'x',     'y',     'z',      0,      '|',      0,      '~',       0 };
 188 | 
 189 | 
 190 | static const int8_t unhex[256] =
 191 |   {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
 192 |   ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
 193 |   ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
 194 |   , 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1,-1,-1,-1,-1
 195 |   ,-1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1
 196 |   ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
 197 |   ,-1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1
 198 |   ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
 199 |   };
 200 | 
 201 | 
 202 | static const uint8_t normal_url_char[256] = {
 203 | /*   0 nul    1 soh    2 stx    3 etx    4 eot    5 enq    6 ack    7 bel  */
 204 |         0,       0,       0,       0,       0,       0,       0,       0,
 205 | /*   8 bs     9 ht    10 nl    11 vt    12 np    13 cr    14 so    15 si   */
 206 |         0,       0,       0,       0,       0,       0,       0,       0,
 207 | /*  16 dle   17 dc1   18 dc2   19 dc3   20 dc4   21 nak   22 syn   23 etb */
 208 |         0,       0,       0,       0,       0,       0,       0,       0,
 209 | /*  24 can   25 em    26 sub   27 esc   28 fs    29 gs    30 rs    31 us  */
 210 |         0,       0,       0,       0,       0,       0,       0,       0,
 211 | /*  32 sp    33  !    34  "    35  #    36  $    37  %    38  &    39  '  */
 212 |         0,       1,       1,       0,       1,       1,       1,       1,
 213 | /*  40  (    41  )    42  *    43  +    44  ,    45  -    46  .    47  /  */
 214 |         1,       1,       1,       1,       1,       1,       1,       1,
 215 | /*  48  0    49  1    50  2    51  3    52  4    53  5    54  6    55  7  */
 216 |         1,       1,       1,       1,       1,       1,       1,       1,
 217 | /*  56  8    57  9    58  :    59  ;    60  <    61  =    62  >    63  ?  */
 218 |         1,       1,       1,       1,       1,       1,       1,       0,
 219 | /*  64  @    65  A    66  B    67  C    68  D    69  E    70  F    71  G  */
 220 |         1,       1,       1,       1,       1,       1,       1,       1,
 221 | /*  72  H    73  I    74  J    75  K    76  L    77  M    78  N    79  O  */
 222 |         1,       1,       1,       1,       1,       1,       1,       1,
 223 | /*  80  P    81  Q    82  R    83  S    84  T    85  U    86  V    87  W  */
 224 |         1,       1,       1,       1,       1,       1,       1,       1,
 225 | /*  88  X    89  Y    90  Z    91  [    92  \    93  ]    94  ^    95  _  */
 226 |         1,       1,       1,       1,       1,       1,       1,       1,
 227 | /*  96  `    97  a    98  b    99  c   100  d   101  e   102  f   103  g  */
 228 |         1,       1,       1,       1,       1,       1,       1,       1,
 229 | /* 104  h   105  i   106  j   107  k   108  l   109  m   110  n   111  o  */
 230 |         1,       1,       1,       1,       1,       1,       1,       1,
 231 | /* 112  p   113  q   114  r   115  s   116  t   117  u   118  v   119  w  */
 232 |         1,       1,       1,       1,       1,       1,       1,       1,
 233 | /* 120  x   121  y   122  z   123  {   124  |   125  }   126  ~   127 del */
 234 |         1,       1,       1,       1,       1,       1,       1,       0, };
 235 | 
 236 | 
 237 | enum state
 238 |   { s_dead = 1 /* important that this is > 0 */
 239 | 
 240 |   , s_start_req_or_res
 241 |   , s_res_or_resp_H
 242 |   , s_start_res
 243 |   , s_res_H
 244 |   , s_res_HT
 245 |   , s_res_HTT
 246 |   , s_res_HTTP
 247 |   , s_res_first_http_major
 248 |   , s_res_http_major
 249 |   , s_res_first_http_minor
 250 |   , s_res_http_minor
 251 |   , s_res_first_status_code
 252 |   , s_res_status_code
 253 |   , s_res_status
 254 |   , s_res_line_almost_done
 255 | 
 256 |   , s_start_req
 257 | 
 258 |   , s_req_method
 259 |   , s_req_spaces_before_url
 260 |   , s_req_schema
 261 |   , s_req_schema_slash
 262 |   , s_req_schema_slash_slash
 263 |   , s_req_host
 264 |   , s_req_port
 265 |   , s_req_path
 266 |   , s_req_query_string_start
 267 |   , s_req_query_string
 268 |   , s_req_fragment_start
 269 |   , s_req_fragment
 270 |   , s_req_http_start
 271 |   , s_req_http_H
 272 |   , s_req_http_HT
 273 |   , s_req_http_HTT
 274 |   , s_req_http_HTTP
 275 |   , s_req_first_http_major
 276 |   , s_req_http_major
 277 |   , s_req_first_http_minor
 278 |   , s_req_http_minor
 279 |   , s_req_line_almost_done
 280 | 
 281 |   , s_header_field_start
 282 |   , s_header_field
 283 |   , s_header_value_start
 284 |   , s_header_value
 285 |   , s_header_value_lws
 286 | 
 287 |   , s_header_almost_done
 288 | 
 289 |   , s_chunk_size_start
 290 |   , s_chunk_size
 291 |   , s_chunk_parameters
 292 |   , s_chunk_size_almost_done
 293 | 
 294 |   , s_headers_almost_done
 295 |   , s_headers_done
 296 | 
 297 |   /* Important: 's_headers_done' must be the last 'header' state. All
 298 |    * states beyond this must be 'body' states. It is used for overflow
 299 |    * checking. See the PARSING_HEADER() macro.
 300 |    */
 301 | 
 302 |   , s_chunk_data
 303 |   , s_chunk_data_almost_done
 304 |   , s_chunk_data_done
 305 | 
 306 |   , s_body_identity
 307 |   , s_body_identity_eof
 308 | 
 309 |   , s_message_done
 310 |   };
 311 | 
 312 | 
 313 | #define PARSING_HEADER(state) (state <= s_headers_done)
 314 | 
 315 | 
 316 | enum header_states
 317 |   { h_general = 0
 318 |   , h_C
 319 |   , h_CO
 320 |   , h_CON
 321 | 
 322 |   , h_matching_connection
 323 |   , h_matching_proxy_connection
 324 |   , h_matching_content_length
 325 |   , h_matching_transfer_encoding
 326 |   , h_matching_upgrade
 327 | 
 328 |   , h_connection
 329 |   , h_content_length
 330 |   , h_transfer_encoding
 331 |   , h_upgrade
 332 | 
 333 |   , h_matching_transfer_encoding_chunked
 334 |   , h_matching_connection_keep_alive
 335 |   , h_matching_connection_close
 336 | 
 337 |   , h_transfer_encoding_chunked
 338 |   , h_connection_keep_alive
 339 |   , h_connection_close
 340 |   };
 341 | 
 342 | 
 343 | /* Macros for character classes; depends on strict-mode  */
 344 | #define CR                  '\r'
 345 | #define LF                  '\n'
 346 | #define LOWER(c)            (unsigned char)(c | 0x20)
 347 | #define IS_ALPHA(c)         (LOWER(c) >= 'a' && LOWER(c) <= 'z')
 348 | #define IS_NUM(c)           ((c) >= '0' && (c) <= '9')
 349 | #define IS_ALPHANUM(c)      (IS_ALPHA(c) || IS_NUM(c))
 350 | 
 351 | #if HTTP_PARSER_STRICT
 352 | #define TOKEN(c)            (tokens[(unsigned char)c])
 353 | #define IS_URL_CHAR(c)      (normal_url_char[(unsigned char) (c)])
 354 | #define IS_HOST_CHAR(c)     (IS_ALPHANUM(c) || (c) == '.' || (c) == '-')
 355 | #else
 356 | #define TOKEN(c)            ((c == ' ') ? ' ' : tokens[(unsigned char)c])
 357 | #define IS_URL_CHAR(c)                                                         \
 358 |   (normal_url_char[(unsigned char) (c)] || ((c) & 0x80))
 359 | #define IS_HOST_CHAR(c)                                                        \
 360 |   (IS_ALPHANUM(c) || (c) == '.' || (c) == '-' || (c) == '_')
 361 | #endif
 362 | 
 363 | 
 364 | #define start_state (parser->type == HTTP_REQUEST ? s_start_req : s_start_res)
 365 | 
 366 | 
 367 | #if HTTP_PARSER_STRICT
 368 | # define STRICT_CHECK(cond)                                          \
 369 | do {                                                                 \
 370 |   if (cond) {                                                        \
 371 |     SET_ERRNO(HPE_STRICT);                                           \
 372 |     goto error;                                                      \
 373 |   }                                                                  \
 374 | } while (0)
 375 | # define NEW_MESSAGE() (http_should_keep_alive(parser) ? start_state : s_dead)
 376 | #else
 377 | # define STRICT_CHECK(cond)
 378 | # define NEW_MESSAGE() start_state
 379 | #endif
 380 | 
 381 | 
 382 | /* Map errno values to strings for human-readable output */
 383 | #define HTTP_STRERROR_GEN(n, s) { "HPE_" #n, s },
 384 | static struct {
 385 |   const char *name;
 386 |   const char *description;
 387 | } http_strerror_tab[] = {
 388 |   HTTP_ERRNO_MAP(HTTP_STRERROR_GEN)
 389 | };
 390 | #undef HTTP_STRERROR_GEN
 391 | 
 392 | int http_message_needs_eof(http_parser *parser);
 393 | 
 394 | /* Our URL parser.
 395 |  *
 396 |  * This is designed to be shared by http_parser_execute() for URL validation,
 397 |  * hence it has a state transition + byte-for-byte interface. In addition, it
 398 |  * is meant to be embedded in http_parser_parse_url(), which does the dirty
 399 |  * work of turning state transitions URL components for its API.
 400 |  *
 401 |  * This function should only be invoked with non-space characters. It is
 402 |  * assumed that the caller cares about (and can detect) the transition between
 403 |  * URL and non-URL states by looking for these.
 404 |  */
 405 | static inline enum state
 406 | parse_url_char(enum state s, const char ch, int is_connect)
 407 | {
 408 |   assert(!isspace(ch));
 409 | 
 410 |   switch (s) {
 411 |     case s_req_spaces_before_url:
 412 |       if (ch == '/' || ch == '*') {
 413 |         return s_req_path;
 414 |       }
 415 | 
 416 |       /* Proxied requests are followed by scheme of an absolute URI (alpha).
 417 |        * CONNECT is followed by a hostname, which begins with alphanum.
 418 |        * All other methods are followed by '/' or '*' (handled above).
 419 |        */
 420 |       if (IS_ALPHA(ch) || (is_connect && IS_NUM(ch))) {
 421 |         return (is_connect) ? s_req_host : s_req_schema;
 422 |       }
 423 | 
 424 |       break;
 425 | 
 426 |     case s_req_schema:
 427 |       if (IS_ALPHA(ch)) {
 428 |         return s;
 429 |       }
 430 | 
 431 |       if (ch == ':') {
 432 |         return s_req_schema_slash;
 433 |       }
 434 | 
 435 |       break;
 436 | 
 437 |     case s_req_schema_slash:
 438 |       if (ch == '/') {
 439 |         return s_req_schema_slash_slash;
 440 |       }
 441 | 
 442 |       break;
 443 | 
 444 |     case s_req_schema_slash_slash:
 445 |       if (ch == '/') {
 446 |         return s_req_host;
 447 |       }
 448 | 
 449 |       break;
 450 | 
 451 |     case s_req_host:
 452 |       if (IS_HOST_CHAR(ch)) {
 453 |         return s;
 454 |       }
 455 | 
 456 |       switch (ch) {
 457 |         case ':':
 458 |           return s_req_port;
 459 | 
 460 |         case '/':
 461 |           return s_req_path;
 462 | 
 463 |         case '?':
 464 |           return s_req_query_string_start;
 465 |       }
 466 | 
 467 |       break;
 468 | 
 469 |     case s_req_port:
 470 |       if (IS_NUM(ch)) {
 471 |         return s;
 472 |       }
 473 | 
 474 |       switch (ch) {
 475 |         case '/':
 476 |           return s_req_path;
 477 | 
 478 |         case '?':
 479 |           return s_req_query_string_start;
 480 |       }
 481 | 
 482 |       break;
 483 | 
 484 |     case s_req_path:
 485 |       if (IS_URL_CHAR(ch)) {
 486 |         return s;
 487 |       }
 488 | 
 489 |       switch (ch) {
 490 |         case '?':
 491 |           return s_req_query_string_start;
 492 | 
 493 |         case '#':
 494 |           return s_req_fragment_start;
 495 |       }
 496 | 
 497 |       break;
 498 | 
 499 |     case s_req_query_string_start:
 500 |       if (IS_URL_CHAR(ch)) {
 501 |         return s_req_query_string;
 502 |       }
 503 | 
 504 |       switch (ch) {
 505 |         case '?':
 506 |           /* XXX ignore extra '?' ... is this right? */
 507 |           return s;
 508 | 
 509 |         case '#':
 510 |           return s_req_fragment_start;
 511 |       }
 512 | 
 513 |       break;
 514 | 
 515 |     case s_req_query_string:
 516 |       if (IS_URL_CHAR(ch)) {
 517 |         return s;
 518 |       }
 519 | 
 520 |       switch (ch) {
 521 |         case '?':
 522 |           /* allow extra '?' in query string */
 523 |           return s;
 524 | 
 525 |         case '#':
 526 |           return s_req_fragment_start;
 527 |       }
 528 | 
 529 |       break;
 530 | 
 531 |     case s_req_fragment_start:
 532 |       if (IS_URL_CHAR(ch)) {
 533 |         return s_req_fragment;
 534 |       }
 535 | 
 536 |       switch (ch) {
 537 |         case '?':
 538 |           return s_req_fragment;
 539 | 
 540 |         case '#':
 541 |           return s;
 542 |       }
 543 | 
 544 |       break;
 545 | 
 546 |     case s_req_fragment:
 547 |       if (IS_URL_CHAR(ch)) {
 548 |         return s;
 549 |       }
 550 | 
 551 |       switch (ch) {
 552 |         case '?':
 553 |         case '#':
 554 |           return s;
 555 |       }
 556 | 
 557 |       break;
 558 | 
 559 |     default:
 560 |       break;
 561 |   }
 562 | 
 563 |   /* We should never fall out of the switch above unless there's an error */
 564 |   return s_dead;
 565 | }
 566 | 
 567 | size_t http_parser_execute (http_parser *parser,
 568 |                             const http_parser_settings *settings,
 569 |                             const char *data,
 570 |                             size_t len)
 571 | {
 572 |   char c, ch;
 573 |   int8_t unhex_val;
 574 |   const char *p = data;
 575 |   const char *header_field_mark = 0;
 576 |   const char *header_value_mark = 0;
 577 |   const char *url_mark = 0;
 578 |   const char *body_mark = 0;
 579 | 
 580 |   /* We're in an error state. Don't bother doing anything. */
 581 |   if (HTTP_PARSER_ERRNO(parser) != HPE_OK) {
 582 |     return 0;
 583 |   }
 584 | 
 585 |   if (len == 0) {
 586 |     switch (parser->state) {
 587 |       case s_body_identity_eof:
 588 |         /* Use of CALLBACK_NOTIFY() here would erroneously return 1 byte read if
 589 |          * we got paused.
 590 |          */
 591 |         CALLBACK_NOTIFY_NOADVANCE(message_complete);
 592 |         return 0;
 593 | 
 594 |       case s_dead:
 595 |       case s_start_req_or_res:
 596 |       case s_start_res:
 597 |       case s_start_req:
 598 |         return 0;
 599 | 
 600 |       default:
 601 |         SET_ERRNO(HPE_INVALID_EOF_STATE);
 602 |         return 1;
 603 |     }
 604 |   }
 605 | 
 606 | 
 607 |   if (parser->state == s_header_field)
 608 |     header_field_mark = data;
 609 |   if (parser->state == s_header_value)
 610 |     header_value_mark = data;
 611 |   if (parser->state == s_req_path ||
 612 |       parser->state == s_req_schema ||
 613 |       parser->state == s_req_schema_slash ||
 614 |       parser->state == s_req_schema_slash_slash ||
 615 |       parser->state == s_req_port ||
 616 |       parser->state == s_req_query_string_start ||
 617 |       parser->state == s_req_query_string ||
 618 |       parser->state == s_req_host ||
 619 |       parser->state == s_req_fragment_start ||
 620 |       parser->state == s_req_fragment)
 621 |     url_mark = data;
 622 | 
 623 |   for (p=data; p != data + len; p++) {
 624 |     ch = *p;
 625 | 
 626 |     if (PARSING_HEADER(parser->state)) {
 627 |       ++parser->nread;
 628 |       /* Buffer overflow attack */
 629 |       if (parser->nread > HTTP_MAX_HEADER_SIZE) {
 630 |         SET_ERRNO(HPE_HEADER_OVERFLOW);
 631 |         goto error;
 632 |       }
 633 |     }
 634 | 
 635 |     reexecute_byte:
 636 |     switch (parser->state) {
 637 | 
 638 |       case s_dead:
 639 |         /* this state is used after a 'Connection: close' message
 640 |          * the parser will error out if it reads another message
 641 |          */
 642 |         SET_ERRNO(HPE_CLOSED_CONNECTION);
 643 |         goto error;
 644 | 
 645 |       case s_start_req_or_res:
 646 |       {
 647 |         if (ch == CR || ch == LF)
 648 |           break;
 649 |         parser->flags = 0;
 650 |         parser->content_length = -1;
 651 | 
 652 |         if (ch == 'H') {
 653 |           parser->state = s_res_or_resp_H;
 654 | 
 655 |           CALLBACK_NOTIFY(message_begin);
 656 |         } else {
 657 |           parser->type = HTTP_REQUEST;
 658 |           parser->state = s_start_req;
 659 |           goto reexecute_byte;
 660 |         }
 661 | 
 662 |         break;
 663 |       }
 664 | 
 665 |       case s_res_or_resp_H:
 666 |         if (ch == 'T') {
 667 |           parser->type = HTTP_RESPONSE;
 668 |           parser->state = s_res_HT;
 669 |         } else {
 670 |           if (ch != 'E') {
 671 |             SET_ERRNO(HPE_INVALID_CONSTANT);
 672 |             goto error;
 673 |           }
 674 | 
 675 |           parser->type = HTTP_REQUEST;
 676 |           parser->method = HTTP_HEAD;
 677 |           parser->index = 2;
 678 |           parser->state = s_req_method;
 679 |         }
 680 |         break;
 681 | 
 682 |       case s_start_res:
 683 |       {
 684 |         parser->flags = 0;
 685 |         parser->content_length = -1;
 686 | 
 687 |         switch (ch) {
 688 |           case 'H':
 689 |             parser->state = s_res_H;
 690 |             break;
 691 | 
 692 |           case CR:
 693 |           case LF:
 694 |             break;
 695 | 
 696 |           default:
 697 |             SET_ERRNO(HPE_INVALID_CONSTANT);
 698 |             goto error;
 699 |         }
 700 | 
 701 |         CALLBACK_NOTIFY(message_begin);
 702 |         break;
 703 |       }
 704 | 
 705 |       case s_res_H:
 706 |         STRICT_CHECK(ch != 'T');
 707 |         parser->state = s_res_HT;
 708 |         break;
 709 | 
 710 |       case s_res_HT:
 711 |         STRICT_CHECK(ch != 'T');
 712 |         parser->state = s_res_HTT;
 713 |         break;
 714 | 
 715 |       case s_res_HTT:
 716 |         STRICT_CHECK(ch != 'P');
 717 |         parser->state = s_res_HTTP;
 718 |         break;
 719 | 
 720 |       case s_res_HTTP:
 721 |         STRICT_CHECK(ch != '/');
 722 |         parser->state = s_res_first_http_major;
 723 |         break;
 724 | 
 725 |       case s_res_first_http_major:
 726 |         if (ch < '0' || ch > '9') {
 727 |           SET_ERRNO(HPE_INVALID_VERSION);
 728 |           goto error;
 729 |         }
 730 | 
 731 |         parser->http_major = ch - '0';
 732 |         parser->state = s_res_http_major;
 733 |         break;
 734 | 
 735 |       /* major HTTP version or dot */
 736 |       case s_res_http_major:
 737 |       {
 738 |         if (ch == '.') {
 739 |           parser->state = s_res_first_http_minor;
 740 |           break;
 741 |         }
 742 | 
 743 |         if (!IS_NUM(ch)) {
 744 |           SET_ERRNO(HPE_INVALID_VERSION);
 745 |           goto error;
 746 |         }
 747 | 
 748 |         parser->http_major *= 10;
 749 |         parser->http_major += ch - '0';
 750 | 
 751 |         if (parser->http_major > 999) {
 752 |           SET_ERRNO(HPE_INVALID_VERSION);
 753 |           goto error;
 754 |         }
 755 | 
 756 |         break;
 757 |       }
 758 | 
 759 |       /* first digit of minor HTTP version */
 760 |       case s_res_first_http_minor:
 761 |         if (!IS_NUM(ch)) {
 762 |           SET_ERRNO(HPE_INVALID_VERSION);
 763 |           goto error;
 764 |         }
 765 | 
 766 |         parser->http_minor = ch - '0';
 767 |         parser->state = s_res_http_minor;
 768 |         break;
 769 | 
 770 |       /* minor HTTP version or end of request line */
 771 |       case s_res_http_minor:
 772 |       {
 773 |         if (ch == ' ') {
 774 |           parser->state = s_res_first_status_code;
 775 |           break;
 776 |         }
 777 | 
 778 |         if (!IS_NUM(ch)) {
 779 |           SET_ERRNO(HPE_INVALID_VERSION);
 780 |           goto error;
 781 |         }
 782 | 
 783 |         parser->http_minor *= 10;
 784 |         parser->http_minor += ch - '0';
 785 | 
 786 |         if (parser->http_minor > 999) {
 787 |           SET_ERRNO(HPE_INVALID_VERSION);
 788 |           goto error;
 789 |         }
 790 | 
 791 |         break;
 792 |       }
 793 | 
 794 |       case s_res_first_status_code:
 795 |       {
 796 |         if (!IS_NUM(ch)) {
 797 |           if (ch == ' ') {
 798 |             break;
 799 |           }
 800 | 
 801 |           SET_ERRNO(HPE_INVALID_STATUS);
 802 |           goto error;
 803 |         }
 804 |         parser->status_code = ch - '0';
 805 |         parser->state = s_res_status_code;
 806 |         break;
 807 |       }
 808 | 
 809 |       case s_res_status_code:
 810 |       {
 811 |         if (!IS_NUM(ch)) {
 812 |           switch (ch) {
 813 |             case ' ':
 814 |               parser->state = s_res_status;
 815 |               break;
 816 |             case CR:
 817 |               parser->state = s_res_line_almost_done;
 818 |               break;
 819 |             case LF:
 820 |               parser->state = s_header_field_start;
 821 |               break;
 822 |             default:
 823 |               SET_ERRNO(HPE_INVALID_STATUS);
 824 |               goto error;
 825 |           }
 826 |           break;
 827 |         }
 828 | 
 829 |         parser->status_code *= 10;
 830 |         parser->status_code += ch - '0';
 831 | 
 832 |         if (parser->status_code > 999) {
 833 |           SET_ERRNO(HPE_INVALID_STATUS);
 834 |           goto error;
 835 |         }
 836 | 
 837 |         break;
 838 |       }
 839 | 
 840 |       case s_res_status:
 841 |         /* the human readable status. e.g. "NOT FOUND"
 842 |          * we are not humans so just ignore this */
 843 |         if (ch == CR) {
 844 |           parser->state = s_res_line_almost_done;
 845 |           break;
 846 |         }
 847 | 
 848 |         if (ch == LF) {
 849 |           parser->state = s_header_field_start;
 850 |           break;
 851 |         }
 852 |         break;
 853 | 
 854 |       case s_res_line_almost_done:
 855 |         STRICT_CHECK(ch != LF);
 856 |         parser->state = s_header_field_start;
 857 |         break;
 858 | 
 859 |       case s_start_req:
 860 |       {
 861 |         if (ch == CR || ch == LF)
 862 |           break;
 863 |         parser->flags = 0;
 864 |         parser->content_length = -1;
 865 | 
 866 |         if (!IS_ALPHA(ch)) {
 867 |           SET_ERRNO(HPE_INVALID_METHOD);
 868 |           goto error;
 869 |         }
 870 | 
 871 |         parser->method = (enum http_method) 0;
 872 |         parser->index = 1;
 873 |         switch (ch) {
 874 |           case 'C': parser->method = HTTP_CONNECT; /* or COPY, CHECKOUT */ break;
 875 |           case 'D': parser->method = HTTP_DELETE; break;
 876 |           case 'G': parser->method = HTTP_GET; break;
 877 |           case 'H': parser->method = HTTP_HEAD; break;
 878 |           case 'L': parser->method = HTTP_LOCK; break;
 879 |           case 'M': parser->method = HTTP_MKCOL; /* or MOVE, MKACTIVITY, MERGE, M-SEARCH */ break;
 880 |           case 'N': parser->method = HTTP_NOTIFY; break;
 881 |           case 'O': parser->method = HTTP_OPTIONS; break;
 882 |           case 'P': parser->method = HTTP_POST;
 883 |             /* or PROPFIND or PROPPATCH or PUT or PATCH */
 884 |             break;
 885 |           case 'R': parser->method = HTTP_REPORT; break;
 886 |           case 'S': parser->method = HTTP_SUBSCRIBE; break;
 887 |           case 'T': parser->method = HTTP_TRACE; break;
 888 |           case 'U': parser->method = HTTP_UNLOCK; /* or UNSUBSCRIBE */ break;
 889 |           default:
 890 |             SET_ERRNO(HPE_INVALID_METHOD);
 891 |             goto error;
 892 |         }
 893 |         parser->state = s_req_method;
 894 | 
 895 |         CALLBACK_NOTIFY(message_begin);
 896 | 
 897 |         break;
 898 |       }
 899 | 
 900 |       case s_req_method:
 901 |       {
 902 |         const char *matcher;
 903 |         if (ch == '\0') {
 904 |           SET_ERRNO(HPE_INVALID_METHOD);
 905 |           goto error;
 906 |         }
 907 | 
 908 |         matcher = method_strings[parser->method];
 909 |         if (ch == ' ' && matcher[parser->index] == '\0') {
 910 |           parser->state = s_req_spaces_before_url;
 911 |         } else if (ch == matcher[parser->index]) {
 912 |           ; /* nada */
 913 |         } else if (parser->method == HTTP_CONNECT) {
 914 |           if (parser->index == 1 && ch == 'H') {
 915 |             parser->method = HTTP_CHECKOUT;
 916 |           } else if (parser->index == 2  && ch == 'P') {
 917 |             parser->method = HTTP_COPY;
 918 |           } else {
 919 |             goto error;
 920 |           }
 921 |         } else if (parser->method == HTTP_MKCOL) {
 922 |           if (parser->index == 1 && ch == 'O') {
 923 |             parser->method = HTTP_MOVE;
 924 |           } else if (parser->index == 1 && ch == 'E') {
 925 |             parser->method = HTTP_MERGE;
 926 |           } else if (parser->index == 1 && ch == '-') {
 927 |             parser->method = HTTP_MSEARCH;
 928 |           } else if (parser->index == 2 && ch == 'A') {
 929 |             parser->method = HTTP_MKACTIVITY;
 930 |           } else {
 931 |             goto error;
 932 |           }
 933 |         } else if (parser->index == 1 && parser->method == HTTP_POST) {
 934 |           if (ch == 'R') {
 935 |             parser->method = HTTP_PROPFIND; /* or HTTP_PROPPATCH */
 936 |           } else if (ch == 'U') {
 937 |             parser->method = HTTP_PUT;
 938 |           } else if (ch == 'A') {
 939 |             parser->method = HTTP_PATCH;
 940 |           } else {
 941 |             goto error;
 942 |           }
 943 |         } else if (parser->index == 2 && parser->method == HTTP_UNLOCK && ch == 'S') {
 944 |           parser->method = HTTP_UNSUBSCRIBE;
 945 |         } else if (parser->index == 4 && parser->method == HTTP_PROPFIND && ch == 'P') {
 946 |           parser->method = HTTP_PROPPATCH;
 947 |         } else {
 948 |           SET_ERRNO(HPE_INVALID_METHOD);
 949 |           goto error;
 950 |         }
 951 | 
 952 |         ++parser->index;
 953 |         break;
 954 |       }
 955 | 
 956 |       case s_req_spaces_before_url:
 957 |       {
 958 |         if (ch == ' ') break;
 959 | 
 960 |         MARK(url);
 961 | 
 962 |         parser->state = parse_url_char(
 963 |             (enum state)parser->state, ch, parser->method == HTTP_CONNECT);
 964 |         if (parser->state == s_dead) {
 965 |           SET_ERRNO(HPE_INVALID_URL);
 966 |           goto error;
 967 |         }
 968 | 
 969 |         break;
 970 |       }
 971 | 
 972 |       case s_req_schema:
 973 |       case s_req_schema_slash:
 974 |       case s_req_schema_slash_slash:
 975 |       {
 976 |         switch (ch) {
 977 |           /* No whitespace allowed here */
 978 |           case ' ':
 979 |           case CR:
 980 |           case LF:
 981 |             SET_ERRNO(HPE_INVALID_URL);
 982 |             goto error;
 983 |           default:
 984 |             parser->state = parse_url_char(
 985 |                 (enum state)parser->state, ch, parser->method == HTTP_CONNECT);
 986 |             if (parser->state == s_dead) {
 987 |               SET_ERRNO(HPE_INVALID_URL);
 988 |               goto error;
 989 |             }
 990 |         }
 991 | 
 992 |         break;
 993 |       }
 994 | 
 995 |       case s_req_host:
 996 |       case s_req_port:
 997 |       case s_req_path:
 998 |       case s_req_query_string_start:
 999 |       case s_req_query_string:
1000 |       case s_req_fragment_start:
1001 |       case s_req_fragment:
1002 |       {
1003 |         /* XXX: There is a bug here where if we're on the first character
1004 |          *      of s_req_host (e.g. our URL is 'http://' and we see a whitespace
1005 |          *      character, we'll consider this a valid URL. This seems incorrect,
1006 |          *      but at least it's bug-compatible with what we had before.
1007 |          */
1008 |         switch (ch) {
1009 |           case ' ':
1010 |             parser->state = s_req_http_start;
1011 |             CALLBACK_DATA(url);
1012 |             break;
1013 |           case CR:
1014 |           case LF:
1015 |             parser->http_major = 0;
1016 |             parser->http_minor = 9;
1017 |             parser->state = (ch == CR) ?
1018 |               s_req_line_almost_done :
1019 |               s_header_field_start;
1020 |             CALLBACK_DATA(url);
1021 |             break;
1022 |           default:
1023 |             parser->state = parse_url_char(
1024 |                 (enum state)parser->state, ch, parser->method == HTTP_CONNECT);
1025 |             if (parser->state == s_dead) {
1026 |               SET_ERRNO(HPE_INVALID_URL);
1027 |               goto error;
1028 |             }
1029 |         }
1030 |         break;
1031 |       }
1032 | 
1033 |       case s_req_http_start:
1034 |         switch (ch) {
1035 |           case 'H':
1036 |             parser->state = s_req_http_H;
1037 |             break;
1038 |           case ' ':
1039 |             break;
1040 |           default:
1041 |             SET_ERRNO(HPE_INVALID_CONSTANT);
1042 |             goto error;
1043 |         }
1044 |         break;
1045 | 
1046 |       case s_req_http_H:
1047 |         STRICT_CHECK(ch != 'T');
1048 |         parser->state = s_req_http_HT;
1049 |         break;
1050 | 
1051 |       case s_req_http_HT:
1052 |         STRICT_CHECK(ch != 'T');
1053 |         parser->state = s_req_http_HTT;
1054 |         break;
1055 | 
1056 |       case s_req_http_HTT:
1057 |         STRICT_CHECK(ch != 'P');
1058 |         parser->state = s_req_http_HTTP;
1059 |         break;
1060 | 
1061 |       case s_req_http_HTTP:
1062 |         STRICT_CHECK(ch != '/');
1063 |         parser->state = s_req_first_http_major;
1064 |         break;
1065 | 
1066 |       /* first digit of major HTTP version */
1067 |       case s_req_first_http_major:
1068 |         if (ch < '1' || ch > '9') {
1069 |           SET_ERRNO(HPE_INVALID_VERSION);
1070 |           goto error;
1071 |         }
1072 | 
1073 |         parser->http_major = ch - '0';
1074 |         parser->state = s_req_http_major;
1075 |         break;
1076 | 
1077 |       /* major HTTP version or dot */
1078 |       case s_req_http_major:
1079 |       {
1080 |         if (ch == '.') {
1081 |           parser->state = s_req_first_http_minor;
1082 |           break;
1083 |         }
1084 | 
1085 |         if (!IS_NUM(ch)) {
1086 |           SET_ERRNO(HPE_INVALID_VERSION);
1087 |           goto error;
1088 |         }
1089 | 
1090 |         parser->http_major *= 10;
1091 |         parser->http_major += ch - '0';
1092 | 
1093 |         if (parser->http_major > 999) {
1094 |           SET_ERRNO(HPE_INVALID_VERSION);
1095 |           goto error;
1096 |         }
1097 | 
1098 |         break;
1099 |       }
1100 | 
1101 |       /* first digit of minor HTTP version */
1102 |       case s_req_first_http_minor:
1103 |         if (!IS_NUM(ch)) {
1104 |           SET_ERRNO(HPE_INVALID_VERSION);
1105 |           goto error;
1106 |         }
1107 | 
1108 |         parser->http_minor = ch - '0';
1109 |         parser->state = s_req_http_minor;
1110 |         break;
1111 | 
1112 |       /* minor HTTP version or end of request line */
1113 |       case s_req_http_minor:
1114 |       {
1115 |         if (ch == CR) {
1116 |           parser->state = s_req_line_almost_done;
1117 |           break;
1118 |         }
1119 | 
1120 |         if (ch == LF) {
1121 |           parser->state = s_header_field_start;
1122 |           break;
1123 |         }
1124 | 
1125 |         /* XXX allow spaces after digit? */
1126 | 
1127 |         if (!IS_NUM(ch)) {
1128 |           SET_ERRNO(HPE_INVALID_VERSION);
1129 |           goto error;
1130 |         }
1131 | 
1132 |         parser->http_minor *= 10;
1133 |         parser->http_minor += ch - '0';
1134 | 
1135 |         if (parser->http_minor > 999) {
1136 |           SET_ERRNO(HPE_INVALID_VERSION);
1137 |           goto error;
1138 |         }
1139 | 
1140 |         break;
1141 |       }
1142 | 
1143 |       /* end of request line */
1144 |       case s_req_line_almost_done:
1145 |       {
1146 |         if (ch != LF) {
1147 |           SET_ERRNO(HPE_LF_EXPECTED);
1148 |           goto error;
1149 |         }
1150 | 
1151 |         parser->state = s_header_field_start;
1152 |         break;
1153 |       }
1154 | 
1155 |       case s_header_field_start:
1156 |       {
1157 |         if (ch == CR) {
1158 |           parser->state = s_headers_almost_done;
1159 |           break;
1160 |         }
1161 | 
1162 |         if (ch == LF) {
1163 |           /* they might be just sending \n instead of \r\n so this would be
1164 |            * the second \n to denote the end of headers*/
1165 |           parser->state = s_headers_almost_done;
1166 |           goto reexecute_byte;
1167 |         }
1168 | 
1169 |         c = TOKEN(ch);
1170 | 
1171 |         if (!c) {
1172 |           SET_ERRNO(HPE_INVALID_HEADER_TOKEN);
1173 |           goto error;
1174 |         }
1175 | 
1176 |         MARK(header_field);
1177 | 
1178 |         parser->index = 0;
1179 |         parser->state = s_header_field;
1180 | 
1181 |         switch (c) {
1182 |           case 'c':
1183 |             parser->header_state = h_C;
1184 |             break;
1185 | 
1186 |           case 'p':
1187 |             parser->header_state = h_matching_proxy_connection;
1188 |             break;
1189 | 
1190 |           case 't':
1191 |             parser->header_state = h_matching_transfer_encoding;
1192 |             break;
1193 | 
1194 |           case 'u':
1195 |             parser->header_state = h_matching_upgrade;
1196 |             break;
1197 | 
1198 |           default:
1199 |             parser->header_state = h_general;
1200 |             break;
1201 |         }
1202 |         break;
1203 |       }
1204 | 
1205 |       case s_header_field:
1206 |       {
1207 |         c = TOKEN(ch);
1208 | 
1209 |         if (c) {
1210 |           switch (parser->header_state) {
1211 |             case h_general:
1212 |               break;
1213 | 
1214 |             case h_C:
1215 |               parser->index++;
1216 |               parser->header_state = (c == 'o' ? h_CO : h_general);
1217 |               break;
1218 | 
1219 |             case h_CO:
1220 |               parser->index++;
1221 |               parser->header_state = (c == 'n' ? h_CON : h_general);
1222 |               break;
1223 | 
1224 |             case h_CON:
1225 |               parser->index++;
1226 |               switch (c) {
1227 |                 case 'n':
1228 |                   parser->header_state = h_matching_connection;
1229 |                   break;
1230 |                 case 't':
1231 |                   parser->header_state = h_matching_content_length;
1232 |                   break;
1233 |                 default:
1234 |                   parser->header_state = h_general;
1235 |                   break;
1236 |               }
1237 |               break;
1238 | 
1239 |             /* connection */
1240 | 
1241 |             case h_matching_connection:
1242 |               parser->index++;
1243 |               if (parser->index > sizeof(CONNECTION)-1
1244 |                   || c != CONNECTION[parser->index]) {
1245 |                 parser->header_state = h_general;
1246 |               } else if (parser->index == sizeof(CONNECTION)-2) {
1247 |                 parser->header_state = h_connection;
1248 |               }
1249 |               break;
1250 | 
1251 |             /* proxy-connection */
1252 | 
1253 |             case h_matching_proxy_connection:
1254 |               parser->index++;
1255 |               if (parser->index > sizeof(PROXY_CONNECTION)-1
1256 |                   || c != PROXY_CONNECTION[parser->index]) {
1257 |                 parser->header_state = h_general;
1258 |               } else if (parser->index == sizeof(PROXY_CONNECTION)-2) {
1259 |                 parser->header_state = h_connection;
1260 |               }
1261 |               break;
1262 | 
1263 |             /* content-length */
1264 | 
1265 |             case h_matching_content_length:
1266 |               parser->index++;
1267 |               if (parser->index > sizeof(CONTENT_LENGTH)-1
1268 |                   || c != CONTENT_LENGTH[parser->index]) {
1269 |                 parser->header_state = h_general;
1270 |               } else if (parser->index == sizeof(CONTENT_LENGTH)-2) {
1271 |                 parser->header_state = h_content_length;
1272 |               }
1273 |               break;
1274 | 
1275 |             /* transfer-encoding */
1276 | 
1277 |             case h_matching_transfer_encoding:
1278 |               parser->index++;
1279 |               if (parser->index > sizeof(TRANSFER_ENCODING)-1
1280 |                   || c != TRANSFER_ENCODING[parser->index]) {
1281 |                 parser->header_state = h_general;
1282 |               } else if (parser->index == sizeof(TRANSFER_ENCODING)-2) {
1283 |                 parser->header_state = h_transfer_encoding;
1284 |               }
1285 |               break;
1286 | 
1287 |             /* upgrade */
1288 | 
1289 |             case h_matching_upgrade:
1290 |               parser->index++;
1291 |               if (parser->index > sizeof(UPGRADE)-1
1292 |                   || c != UPGRADE[parser->index]) {
1293 |                 parser->header_state = h_general;
1294 |               } else if (parser->index == sizeof(UPGRADE)-2) {
1295 |                 parser->header_state = h_upgrade;
1296 |               }
1297 |               break;
1298 | 
1299 |             case h_connection:
1300 |             case h_content_length:
1301 |             case h_transfer_encoding:
1302 |             case h_upgrade:
1303 |               if (ch != ' ') parser->header_state = h_general;
1304 |               break;
1305 | 
1306 |             default:
1307 |               assert(0 && "Unknown header_state");
1308 |               break;
1309 |           }
1310 |           break;
1311 |         }
1312 | 
1313 |         if (ch == ':') {
1314 |           parser->state = s_header_value_start;
1315 |           CALLBACK_DATA(header_field);
1316 |           break;
1317 |         }
1318 | 
1319 |         if (ch == CR) {
1320 |           parser->state = s_header_almost_done;
1321 |           CALLBACK_DATA(header_field);
1322 |           break;
1323 |         }
1324 | 
1325 |         if (ch == LF) {
1326 |           parser->state = s_header_field_start;
1327 |           CALLBACK_DATA(header_field);
1328 |           break;
1329 |         }
1330 | 
1331 |         SET_ERRNO(HPE_INVALID_HEADER_TOKEN);
1332 |         goto error;
1333 |       }
1334 | 
1335 |       case s_header_value_start:
1336 |       {
1337 |         if (ch == ' ' || ch == '\t') break;
1338 | 
1339 |         MARK(header_value);
1340 | 
1341 |         parser->state = s_header_value;
1342 |         parser->index = 0;
1343 | 
1344 |         if (ch == CR) {
1345 |           parser->header_state = h_general;
1346 |           parser->state = s_header_almost_done;
1347 |           CALLBACK_DATA(header_value);
1348 |           break;
1349 |         }
1350 | 
1351 |         if (ch == LF) {
1352 |           parser->state = s_header_field_start;
1353 |           CALLBACK_DATA(header_value);
1354 |           break;
1355 |         }
1356 | 
1357 |         c = LOWER(ch);
1358 | 
1359 |         switch (parser->header_state) {
1360 |           case h_upgrade:
1361 |             parser->flags |= F_UPGRADE;
1362 |             parser->header_state = h_general;
1363 |             break;
1364 | 
1365 |           case h_transfer_encoding:
1366 |             /* looking for 'Transfer-Encoding: chunked' */
1367 |             if ('c' == c) {
1368 |               parser->header_state = h_matching_transfer_encoding_chunked;
1369 |             } else {
1370 |               parser->header_state = h_general;
1371 |             }
1372 |             break;
1373 | 
1374 |           case h_content_length:
1375 |             if (!IS_NUM(ch)) {
1376 |               SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1377 |               goto error;
1378 |             }
1379 | 
1380 |             parser->content_length = ch - '0';
1381 |             break;
1382 | 
1383 |           case h_connection:
1384 |             /* looking for 'Connection: keep-alive' */
1385 |             if (c == 'k') {
1386 |               parser->header_state = h_matching_connection_keep_alive;
1387 |             /* looking for 'Connection: close' */
1388 |             } else if (c == 'c') {
1389 |               parser->header_state = h_matching_connection_close;
1390 |             } else {
1391 |               parser->header_state = h_general;
1392 |             }
1393 |             break;
1394 | 
1395 |           default:
1396 |             parser->header_state = h_general;
1397 |             break;
1398 |         }
1399 |         break;
1400 |       }
1401 | 
1402 |       case s_header_value:
1403 |       {
1404 | 
1405 |         if (ch == CR) {
1406 |           parser->state = s_header_almost_done;
1407 |           CALLBACK_DATA(header_value);
1408 |           break;
1409 |         }
1410 | 
1411 |         if (ch == LF) {
1412 |           parser->state = s_header_almost_done;
1413 |           CALLBACK_DATA_NOADVANCE(header_value);
1414 |           goto reexecute_byte;
1415 |         }
1416 | 
1417 |         c = LOWER(ch);
1418 | 
1419 |         switch (parser->header_state) {
1420 |           case h_general:
1421 |             break;
1422 | 
1423 |           case h_connection:
1424 |           case h_transfer_encoding:
1425 |             assert(0 && "Shouldn't get here.");
1426 |             break;
1427 | 
1428 |           case h_content_length:
1429 |             if (ch == ' ') break;
1430 |             if (!IS_NUM(ch)) {
1431 |               SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1432 |               goto error;
1433 |             }
1434 | 
1435 |             parser->content_length *= 10;
1436 |             parser->content_length += ch - '0';
1437 |             break;
1438 | 
1439 |           /* Transfer-Encoding: chunked */
1440 |           case h_matching_transfer_encoding_chunked:
1441 |             parser->index++;
1442 |             if (parser->index > sizeof(CHUNKED)-1
1443 |                 || c != CHUNKED[parser->index]) {
1444 |               parser->header_state = h_general;
1445 |             } else if (parser->index == sizeof(CHUNKED)-2) {
1446 |               parser->header_state = h_transfer_encoding_chunked;
1447 |             }
1448 |             break;
1449 | 
1450 |           /* looking for 'Connection: keep-alive' */
1451 |           case h_matching_connection_keep_alive:
1452 |             parser->index++;
1453 |             if (parser->index > sizeof(KEEP_ALIVE)-1
1454 |                 || c != KEEP_ALIVE[parser->index]) {
1455 |               parser->header_state = h_general;
1456 |             } else if (parser->index == sizeof(KEEP_ALIVE)-2) {
1457 |               parser->header_state = h_connection_keep_alive;
1458 |             }
1459 |             break;
1460 | 
1461 |           /* looking for 'Connection: close' */
1462 |           case h_matching_connection_close:
1463 |             parser->index++;
1464 |             if (parser->index > sizeof(CLOSE)-1 || c != CLOSE[parser->index]) {
1465 |               parser->header_state = h_general;
1466 |             } else if (parser->index == sizeof(CLOSE)-2) {
1467 |               parser->header_state = h_connection_close;
1468 |             }
1469 |             break;
1470 | 
1471 |           case h_transfer_encoding_chunked:
1472 |           case h_connection_keep_alive:
1473 |           case h_connection_close:
1474 |             if (ch != ' ') parser->header_state = h_general;
1475 |             break;
1476 | 
1477 |           default:
1478 |             parser->state = s_header_value;
1479 |             parser->header_state = h_general;
1480 |             break;
1481 |         }
1482 |         break;
1483 |       }
1484 | 
1485 |       case s_header_almost_done:
1486 |       {
1487 |         STRICT_CHECK(ch != LF);
1488 | 
1489 |         parser->state = s_header_value_lws;
1490 | 
1491 |         switch (parser->header_state) {
1492 |           case h_connection_keep_alive:
1493 |             parser->flags |= F_CONNECTION_KEEP_ALIVE;
1494 |             break;
1495 |           case h_connection_close:
1496 |             parser->flags |= F_CONNECTION_CLOSE;
1497 |             break;
1498 |           case h_transfer_encoding_chunked:
1499 |             parser->flags |= F_CHUNKED;
1500 |             break;
1501 |           default:
1502 |             break;
1503 |         }
1504 | 
1505 |         break;
1506 |       }
1507 | 
1508 |       case s_header_value_lws:
1509 |       {
1510 |         if (ch == ' ' || ch == '\t')
1511 |           parser->state = s_header_value_start;
1512 |         else
1513 |         {
1514 |           parser->state = s_header_field_start;
1515 |           goto reexecute_byte;
1516 |         }
1517 |         break;
1518 |       }
1519 | 
1520 |       case s_headers_almost_done:
1521 |       {
1522 |         STRICT_CHECK(ch != LF);
1523 | 
1524 |         if (parser->flags & F_TRAILING) {
1525 |           /* End of a chunked request */
1526 |           parser->state = NEW_MESSAGE();
1527 |           CALLBACK_NOTIFY(message_complete);
1528 |           break;
1529 |         }
1530 | 
1531 |         parser->state = s_headers_done;
1532 | 
1533 |         /* Set this here so that on_headers_complete() callbacks can see it */
1534 |         parser->upgrade =
1535 |           (parser->flags & F_UPGRADE || parser->method == HTTP_CONNECT);
1536 | 
1537 |         /* Here we call the headers_complete callback. This is somewhat
1538 |          * different than other callbacks because if the user returns 1, we
1539 |          * will interpret that as saying that this message has no body. This
1540 |          * is needed for the annoying case of recieving a response to a HEAD
1541 |          * request.
1542 |          *
1543 |          * We'd like to use CALLBACK_NOTIFY_NOADVANCE() here but we cannot, so
1544 |          * we have to simulate it by handling a change in errno below.
1545 |          */
1546 |         if (settings->on_headers_complete) {
1547 |           switch (settings->on_headers_complete(parser)) {
1548 |             case 0:
1549 |               break;
1550 | 
1551 |             case 1:
1552 |               parser->flags |= F_SKIPBODY;
1553 |               break;
1554 | 
1555 |             default:
1556 |               SET_ERRNO(HPE_CB_headers_complete);
1557 |               return p - data; /* Error */
1558 |           }
1559 |         }
1560 | 
1561 |         if (HTTP_PARSER_ERRNO(parser) != HPE_OK) {
1562 |           return p - data;
1563 |         }
1564 | 
1565 |         goto reexecute_byte;
1566 |       }
1567 | 
1568 |       case s_headers_done:
1569 |       {
1570 |         STRICT_CHECK(ch != LF);
1571 | 
1572 |         parser->nread = 0;
1573 | 
1574 |         /* Exit, the rest of the connect is in a different protocol. */
1575 |         if (parser->upgrade) {
1576 |           parser->state = NEW_MESSAGE();
1577 |           CALLBACK_NOTIFY(message_complete);
1578 |           return (p - data) + 1;
1579 |         }
1580 | 
1581 |         if (parser->flags & F_SKIPBODY) {
1582 |           parser->state = NEW_MESSAGE();
1583 |           CALLBACK_NOTIFY(message_complete);
1584 |         } else if (parser->flags & F_CHUNKED) {
1585 |           /* chunked encoding - ignore Content-Length header */
1586 |           parser->state = s_chunk_size_start;
1587 |         } else {
1588 |           if (parser->content_length == 0) {
1589 |             /* Content-Length header given but zero: Content-Length: 0\r\n */
1590 |             parser->state = NEW_MESSAGE();
1591 |             CALLBACK_NOTIFY(message_complete);
1592 |           } else if (parser->content_length > 0) {
1593 |             /* Content-Length header given and non-zero */
1594 |             parser->state = s_body_identity;
1595 |           } else {
1596 |             if (parser->type == HTTP_REQUEST ||
1597 |                 !http_message_needs_eof(parser)) {
1598 |               /* Assume content-length 0 - read the next */
1599 |               parser->state = NEW_MESSAGE();
1600 |               CALLBACK_NOTIFY(message_complete);
1601 |             } else {
1602 |               /* Read body until EOF */
1603 |               parser->state = s_body_identity_eof;
1604 |             }
1605 |           }
1606 |         }
1607 | 
1608 |         break;
1609 |       }
1610 | 
1611 |       case s_body_identity:
1612 |       {
1613 |         uint64_t to_read = MIN(parser->content_length, (data + len) - p);
1614 | 
1615 |         assert(parser->content_length > 0);
1616 | 
1617 |         /* The difference between advancing content_length and p is because
1618 |          * the latter will automaticaly advance on the next loop iteration.
1619 |          * Further, if content_length ends up at 0, we want to see the last
1620 |          * byte again for our message complete callback.
1621 |          */
1622 |         MARK(body);
1623 |         parser->content_length -= to_read;
1624 |         p += to_read - 1;
1625 | 
1626 |         if (parser->content_length == 0) {
1627 |           parser->state = s_message_done;
1628 | 
1629 |           /* Mimic CALLBACK_DATA_NOADVANCE() but with one extra byte.
1630 |            *
1631 |            * The alternative to doing this is to wait for the next byte to
1632 |            * trigger the data callback, just as in every other case. The
1633 |            * problem with this is that this makes it difficult for the test
1634 |            * harness to distinguish between complete-on-EOF and
1635 |            * complete-on-length. It's not clear that this distinction is
1636 |            * important for applications, but let's keep it for now.
1637 |            */
1638 |           CALLBACK_DATA_(body, p - body_mark + 1, p - data);
1639 |           goto reexecute_byte;
1640 |         }
1641 | 
1642 |         break;
1643 |       }
1644 | 
1645 |       /* read until EOF */
1646 |       case s_body_identity_eof:
1647 |         MARK(body);
1648 |         p = data + len - 1;
1649 | 
1650 |         break;
1651 | 
1652 |       case s_message_done:
1653 |         parser->state = NEW_MESSAGE();
1654 |         CALLBACK_NOTIFY(message_complete);
1655 |         break;
1656 | 
1657 |       case s_chunk_size_start:
1658 |       {
1659 |         assert(parser->nread == 1);
1660 |         assert(parser->flags & F_CHUNKED);
1661 | 
1662 |         unhex_val = unhex[(unsigned char)ch];
1663 |         if (unhex_val == -1) {
1664 |           SET_ERRNO(HPE_INVALID_CHUNK_SIZE);
1665 |           goto error;
1666 |         }
1667 | 
1668 |         parser->content_length = unhex_val;
1669 |         parser->state = s_chunk_size;
1670 |         break;
1671 |       }
1672 | 
1673 |       case s_chunk_size:
1674 |       {
1675 |         assert(parser->flags & F_CHUNKED);
1676 | 
1677 |         if (ch == CR) {
1678 |           parser->state = s_chunk_size_almost_done;
1679 |           break;
1680 |         }
1681 | 
1682 |         unhex_val = unhex[(unsigned char)ch];
1683 | 
1684 |         if (unhex_val == -1) {
1685 |           if (ch == ';' || ch == ' ') {
1686 |             parser->state = s_chunk_parameters;
1687 |             break;
1688 |           }
1689 | 
1690 |           SET_ERRNO(HPE_INVALID_CHUNK_SIZE);
1691 |           goto error;
1692 |         }
1693 | 
1694 |         parser->content_length *= 16;
1695 |         parser->content_length += unhex_val;
1696 |         break;
1697 |       }
1698 | 
1699 |       case s_chunk_parameters:
1700 |       {
1701 |         assert(parser->flags & F_CHUNKED);
1702 |         /* just ignore this shit. TODO check for overflow */
1703 |         if (ch == CR) {
1704 |           parser->state = s_chunk_size_almost_done;
1705 |           break;
1706 |         }
1707 |         break;
1708 |       }
1709 | 
1710 |       case s_chunk_size_almost_done:
1711 |       {
1712 |         assert(parser->flags & F_CHUNKED);
1713 |         STRICT_CHECK(ch != LF);
1714 | 
1715 |         parser->nread = 0;
1716 | 
1717 |         if (parser->content_length == 0) {
1718 |           parser->flags |= F_TRAILING;
1719 |           parser->state = s_header_field_start;
1720 |         } else {
1721 |           parser->state = s_chunk_data;
1722 |         }
1723 |         break;
1724 |       }
1725 | 
1726 |       case s_chunk_data:
1727 |       {
1728 |         uint64_t to_read = MIN(parser->content_length, (data + len) - p);
1729 | 
1730 |         assert(parser->flags & F_CHUNKED);
1731 |         assert(parser->content_length > 0);
1732 | 
1733 |         /* See the explanation in s_body_identity for why the content
1734 |          * length and data pointers are managed this way.
1735 |          */
1736 |         MARK(body);
1737 |         parser->content_length -= to_read;
1738 |         p += to_read - 1;
1739 | 
1740 |         if (parser->content_length == 0) {
1741 |           parser->state = s_chunk_data_almost_done;
1742 |         }
1743 | 
1744 |         break;
1745 |       }
1746 | 
1747 |       case s_chunk_data_almost_done:
1748 |         assert(parser->flags & F_CHUNKED);
1749 |         assert(parser->content_length == 0);
1750 |         STRICT_CHECK(ch != CR);
1751 |         parser->state = s_chunk_data_done;
1752 |         CALLBACK_DATA(body);
1753 |         break;
1754 | 
1755 |       case s_chunk_data_done:
1756 |         assert(parser->flags & F_CHUNKED);
1757 |         STRICT_CHECK(ch != LF);
1758 |         parser->nread = 0;
1759 |         parser->state = s_chunk_size_start;
1760 |         break;
1761 | 
1762 |       default:
1763 |         assert(0 && "unhandled state");
1764 |         SET_ERRNO(HPE_INVALID_INTERNAL_STATE);
1765 |         goto error;
1766 |     }
1767 |   }
1768 | 
1769 |   /* Run callbacks for any marks that we have leftover after we ran our of
1770 |    * bytes. There should be at most one of these set, so it's OK to invoke
1771 |    * them in series (unset marks will not result in callbacks).
1772 |    *
1773 |    * We use the NOADVANCE() variety of callbacks here because 'p' has already
1774 |    * overflowed 'data' and this allows us to correct for the off-by-one that
1775 |    * we'd otherwise have (since CALLBACK_DATA() is meant to be run with a 'p'
1776 |    * value that's in-bounds).
1777 |    */
1778 | 
1779 |   assert(((header_field_mark ? 1 : 0) +
1780 |           (header_value_mark ? 1 : 0) +
1781 |           (url_mark ? 1 : 0)  +
1782 |           (body_mark ? 1 : 0)) <= 1);
1783 | 
1784 |   CALLBACK_DATA_NOADVANCE(header_field);
1785 |   CALLBACK_DATA_NOADVANCE(header_value);
1786 |   CALLBACK_DATA_NOADVANCE(url);
1787 |   CALLBACK_DATA_NOADVANCE(body);
1788 | 
1789 |   return len;
1790 | 
1791 | error:
1792 |   if (HTTP_PARSER_ERRNO(parser) == HPE_OK) {
1793 |     SET_ERRNO(HPE_UNKNOWN);
1794 |   }
1795 | 
1796 |   return (p - data);
1797 | }
1798 | 
1799 | 
1800 | /* Does the parser need to see an EOF to find the end of the message? */
1801 | int
1802 | http_message_needs_eof (http_parser *parser)
1803 | {
1804 |   if (parser->type == HTTP_REQUEST) {
1805 |     return 0;
1806 |   }
1807 | 
1808 |   /* See RFC 2616 section 4.4 */
1809 |   if (parser->status_code / 100 == 1 || /* 1xx e.g. Continue */
1810 |       parser->status_code == 204 ||     /* No Content */
1811 |       parser->status_code == 304 ||     /* Not Modified */
1812 |       parser->flags & F_SKIPBODY) {     /* response to a HEAD request */
1813 |     return 0;
1814 |   }
1815 | 
1816 |   if ((parser->flags & F_CHUNKED) || parser->content_length >= 0) {
1817 |     return 0;
1818 |   }
1819 | 
1820 |   return 1;
1821 | }
1822 | 
1823 | 
1824 | int
1825 | http_should_keep_alive (http_parser *parser)
1826 | {
1827 |   if (parser->http_major > 0 && parser->http_minor > 0) {
1828 |     /* HTTP/1.1 */
1829 |     if (parser->flags & F_CONNECTION_CLOSE) {
1830 |       return 0;
1831 |     }
1832 |   } else {
1833 |     /* HTTP/1.0 or earlier */
1834 |     if (!(parser->flags & F_CONNECTION_KEEP_ALIVE)) {
1835 |       return 0;
1836 |     }
1837 |   }
1838 | 
1839 |   return !http_message_needs_eof(parser);
1840 | }
1841 | 
1842 | 
1843 | const char * http_method_str (enum http_method m)
1844 | {
1845 |   return method_strings[m];
1846 | }
1847 | 
1848 | 
1849 | void
1850 | http_parser_init (http_parser *parser, enum http_parser_type t)
1851 | {
1852 |   parser->type = t;
1853 |   parser->state = (t == HTTP_REQUEST ? s_start_req : (t == HTTP_RESPONSE ? s_start_res : s_start_req_or_res));
1854 |   parser->nread = 0;
1855 |   parser->upgrade = 0;
1856 |   parser->flags = 0;
1857 |   parser->method = 0;
1858 |   parser->http_errno = HPE_OK;
1859 | }
1860 | 
1861 | const char *
1862 | http_errno_name(enum http_errno err) {
1863 |   assert(err < (sizeof(http_strerror_tab)/sizeof(http_strerror_tab[0])));
1864 |   return http_strerror_tab[err].name;
1865 | }
1866 | 
1867 | const char *
1868 | http_errno_description(enum http_errno err) {
1869 |   assert(err < (sizeof(http_strerror_tab)/sizeof(http_strerror_tab[0])));
1870 |   return http_strerror_tab[err].description;
1871 | }
1872 | 
1873 | int
1874 | http_parser_parse_url(const char *buf, size_t buflen, int is_connect,
1875 |                       struct http_parser_url *u)
1876 | {
1877 |   enum state s;
1878 |   const char *p;
1879 |   enum http_parser_url_fields uf, old_uf;
1880 | 
1881 |   u->port = u->field_set = 0;
1882 |   s = s_req_spaces_before_url;
1883 |   uf = old_uf = UF_MAX;
1884 | 
1885 |   for (p = buf; p < buf + buflen; p++) {
1886 |     if ((s = parse_url_char(s, *p, is_connect)) == s_dead) {
1887 |       return 1;
1888 |     }
1889 | 
1890 |     /* Figure out the next field that we're operating on */
1891 |     switch (s) {
1892 |       case s_req_schema:
1893 |       case s_req_schema_slash:
1894 |       case s_req_schema_slash_slash:
1895 |         uf = UF_SCHEMA;
1896 |         break;
1897 | 
1898 |       case s_req_host:
1899 |         uf = UF_HOST;
1900 |         break;
1901 | 
1902 |       case s_req_port:
1903 |         uf = UF_PORT;
1904 |         break;
1905 | 
1906 |       case s_req_path:
1907 |         uf = UF_PATH;
1908 |         break;
1909 | 
1910 |       case s_req_query_string_start:
1911 |       case s_req_query_string:
1912 |         uf = UF_QUERY;
1913 |         break;
1914 | 
1915 |       case s_req_fragment_start:
1916 |       case s_req_fragment:
1917 |         uf = UF_FRAGMENT;
1918 |         break;
1919 | 
1920 |       default:
1921 |         assert(!"Unexpected state");
1922 |         return 1;
1923 |     }
1924 | 
1925 |     /* Nothing's changed; soldier on */
1926 |     if (uf == old_uf) {
1927 |       u->field_data[uf].len++;
1928 |       continue;
1929 |     }
1930 | 
1931 |     /* We ignore the first character in some fields; without this, we end up
1932 |      * with the query being "?foo=bar" rather than "foo=bar". Callers probably
1933 |      * don't want this.
1934 |      */
1935 |     switch (uf) {
1936 |     case UF_QUERY:
1937 |     case UF_FRAGMENT:
1938 |     case UF_PORT:
1939 |         u->field_data[uf].off = p - buf + 1;
1940 |         u->field_data[uf].len = 0;
1941 |         break;
1942 | 
1943 |     default:
1944 |         u->field_data[uf].off = p - buf;
1945 |         u->field_data[uf].len = 1;
1946 |         break;
1947 |     }
1948 | 
1949 |     u->field_set |= (1 << uf);
1950 |     old_uf = uf;
1951 |   }
1952 | 
1953 |   if (u->field_set & (1 << UF_PORT)) {
1954 |     /* Don't bother with endp; we've already validated the string */
1955 |     unsigned long v = strtoul(buf + u->field_data[UF_PORT].off, NULL, 10);
1956 | 
1957 |     /* Ports have a max value of 2^16 */
1958 |     if (v > 0xffff) {
1959 |       return 1;
1960 |     }
1961 | 
1962 |     u->port = (uint16_t) v;
1963 |   }
1964 | 
1965 |   return 0;
1966 | }
1967 | 
1968 | void
1969 | http_parser_pause(http_parser *parser, int paused) {
1970 |   /* Users should only be pausing/unpausing a parser that is not in an error
1971 |    * state. In non-debug builds, there's not much that we can do about this
1972 |    * other than ignore it.
1973 |    */
1974 |   if (HTTP_PARSER_ERRNO(parser) == HPE_OK ||
1975 |       HTTP_PARSER_ERRNO(parser) == HPE_PAUSED) {
1976 |     SET_ERRNO((paused) ? HPE_PAUSED : HPE_OK);
1977 |   } else {
1978 |     assert(0 && "Attempting to pause parser in error state");
1979 |   }
1980 | }
1981 | 


--------------------------------------------------------------------------------