├── run.sh
├── .travis.yml
├── .gitignore
├── .mailmap
├── LICENSE-MIT
├── contrib
    ├── url_parser.c
    └── parsertrace.c
├── AUTHORS
├── http_parser.gyp
├── bench.c
├── Makefile
├── demo.c
├── README.md
├── http_parser.h
└── http_parser.c


/run.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | # make
3 | # make parsertrace
4 | # make url_parser
5 | # make http_parser.o
6 | # gcc -Wall -Wextra -O3 http_parser.o demo.c -o demo -g
7 | # ./demo 
8 | 
9 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: c
 2 | 
 3 | compiler:
 4 |   - clang
 5 |   - gcc
 6 | 
 7 | script:
 8 |   - "make"
 9 | 
10 | notifications:
11 |   email: false
12 |   irc:
13 |     - "irc.freenode.net#node-ci"
14 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | /out/
 2 | core
 3 | tags
 4 | *.o
 5 | test
 6 | test_g
 7 | test_fast
 8 | bench
 9 | url_parser
10 | parsertrace
11 | parsertrace_g
12 | *.mk
13 | *.Makefile
14 | *.so.*
15 | *.exe.*
16 | *.exe
17 | *.a
18 | 
19 | 
20 | # Visual Studio uglies
21 | *.suo
22 | *.sln
23 | *.vcxproj
24 | *.vcxproj.filters
25 | *.vcxproj.user
26 | *.opensdf
27 | *.ncrunchsolution*
28 | *.sdf
29 | *.vsp
30 | *.psess
31 | 


--------------------------------------------------------------------------------
/.mailmap:
--------------------------------------------------------------------------------
1 | # update AUTHORS with:
2 | #   git log --all --reverse --format='%aN <%aE>' | perl -ne 'BEGIN{print "# Authors ordered by first contribution.\n"} print unless $h{$_}; $h{$_} = 1' > AUTHORS
3 | Ryan Dahl <ry@tinyclouds.org>
4 | Salman Haq <salman.haq@asti-usa.com>
5 | Simon Zimmermann <simonz05@gmail.com>
6 | Thomas LE ROUX <thomas@november-eleven.fr> LE ROUX Thomas <thomas@procheo.fr>
7 | Thomas LE ROUX <thomas@november-eleven.fr> Thomas LE ROUX <thomas@procheo.fr>
8 | Fedor Indutny <fedor@indutny.com>
9 | 


--------------------------------------------------------------------------------
/LICENSE-MIT:
--------------------------------------------------------------------------------
 1 | http_parser.c is based on src/http/ngx_http_parse.c from NGINX copyright
 2 | Igor Sysoev.
 3 | 
 4 | Additional changes are licensed under the same terms as NGINX and
 5 | copyright Joyent, Inc. and other Node contributors. All rights reserved.
 6 | 
 7 | Permission is hereby granted, free of charge, to any person obtaining a copy
 8 | of this software and associated documentation files (the "Software"), to
 9 | deal in the Software without restriction, including without limitation the
10 | rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
11 | sell copies of the Software, and to permit persons to whom the Software is
12 | furnished to do so, subject to the following conditions:
13 | 
14 | The above copyright notice and this permission notice shall be included in
15 | all copies or substantial portions of the Software.
16 | 
17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
22 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
23 | IN THE SOFTWARE. 
24 | 


--------------------------------------------------------------------------------
/contrib/url_parser.c:
--------------------------------------------------------------------------------
 1 | #include "http_parser.h"
 2 | #include <stdio.h>
 3 | #include <string.h>
 4 | 
 5 | void
 6 | dump_url (const char *url, const struct http_parser_url *u)
 7 | {
 8 |   unsigned int i;
 9 | 
10 |   printf("\tfield_set: 0x%x, port: %u\n", u->field_set, u->port);
11 |   for (i = 0; i < UF_MAX; i++) {
12 |     if ((u->field_set & (1 << i)) == 0) {
13 |       printf("\tfield_data[%u]: unset\n", i);
14 |       continue;
15 |     }
16 | 
17 |     printf("\tfield_data[%u]: off: %u, len: %u, part: %.*s\n",
18 |            i,
19 |            u->field_data[i].off,
20 |            u->field_data[i].len,
21 |            u->field_data[i].len,
22 |            url + u->field_data[i].off);
23 |   }
24 | }
25 | 
26 | int main(int argc, char ** argv) {
27 |   struct http_parser_url u;
28 |   int len, connect, result;
29 | 
30 |   if (argc != 3) {
31 |     printf("Syntax : %s connect|get url\n", argv[0]);
32 |     return 1;
33 |   }
34 |   len = strlen(argv[2]);
35 |   connect = strcmp("connect", argv[1]) == 0 ? 1 : 0;
36 |   printf("Parsing %s, connect %d\n", argv[2], connect);
37 | 
38 |   http_parser_url_init(&u);
39 |   result = http_parser_parse_url(argv[2], len, connect, &u);
40 |   if (result != 0) {
41 |     printf("Parse error : %d\n", result);
42 |     return result;
43 |   }
44 |   printf("Parse ok, result : \n");
45 |   dump_url(argv[2], &u);
46 |   return 0;
47 | }
48 | 


--------------------------------------------------------------------------------
/AUTHORS:
--------------------------------------------------------------------------------
 1 | # Authors ordered by first contribution.
 2 | Ryan Dahl <ry@tinyclouds.org>
 3 | Jeremy Hinegardner <jeremy@hinegardner.org>
 4 | Sergey Shepelev <temotor@gmail.com>
 5 | Joe Damato <ice799@gmail.com>
 6 | tomika <tomika_nospam@freemail.hu>
 7 | Phoenix Sol <phoenix@burninglabs.com>
 8 | Cliff Frey <cliff@meraki.com>
 9 | Ewen Cheslack-Postava <ewencp@cs.stanford.edu>
10 | Santiago Gala <sgala@apache.org>
11 | Tim Becker <tim.becker@syngenio.de>
12 | Jeff Terrace <jterrace@gmail.com>
13 | Ben Noordhuis <info@bnoordhuis.nl>
14 | Nathan Rajlich <nathan@tootallnate.net>
15 | Mark Nottingham <mnot@mnot.net>
16 | Aman Gupta <aman@tmm1.net>
17 | Tim Becker <tim.becker@kuriositaet.de>
18 | Sean Cunningham <sean.cunningham@mandiant.com>
19 | Peter Griess <pg@std.in>
20 | Salman Haq <salman.haq@asti-usa.com>
21 | Cliff Frey <clifffrey@gmail.com>
22 | Jon Kolb <jon@b0g.us>
23 | Fouad Mardini <f.mardini@gmail.com>
24 | Paul Querna <pquerna@apache.org>
25 | Felix Geisendörfer <felix@debuggable.com>
26 | koichik <koichik@improvement.jp>
27 | Andre Caron <andre.l.caron@gmail.com>
28 | Ivo Raisr <ivosh@ivosh.net>
29 | James McLaughlin <jamie@lacewing-project.org>
30 | David Gwynne <loki@animata.net>
31 | Thomas LE ROUX <thomas@november-eleven.fr>
32 | Randy Rizun <rrizun@ortivawireless.com>
33 | Andre Louis Caron <andre.louis.caron@usherbrooke.ca>
34 | Simon Zimmermann <simonz05@gmail.com>
35 | Erik Dubbelboer <erik@dubbelboer.com>
36 | Martell Malone <martellmalone@gmail.com>
37 | Bertrand Paquet <bpaquet@octo.com>
38 | BogDan Vatra <bogdan@kde.org>
39 | Peter Faiman <peter@thepicard.org>
40 | Corey Richardson <corey@octayn.net>
41 | Tóth Tamás <tomika_nospam@freemail.hu>
42 | Cam Swords <cam.swords@gmail.com>
43 | Chris Dickinson <christopher.s.dickinson@gmail.com>
44 | Uli Köhler <ukoehler@btronik.de>
45 | Charlie Somerville <charlie@charliesomerville.com>
46 | Patrik Stutz <patrik.stutz@gmail.com>
47 | Fedor Indutny <fedor.indutny@gmail.com>
48 | runner <runner.mei@gmail.com>
49 | Alexis Campailla <alexis@janeasystems.com>
50 | David Wragg <david@wragg.org>
51 | Vinnie Falco <vinnie.falco@gmail.com>
52 | Alex Butum <alexbutum@linux.com>
53 | Rex Feng <rexfeng@gmail.com>
54 | Alex Kocharin <alex@kocharin.ru>
55 | Mark Koopman <markmontymark@yahoo.com>
56 | Helge Heß <me@helgehess.eu>
57 | Alexis La Goutte <alexis.lagoutte@gmail.com>
58 | George Miroshnykov <george.miroshnykov@gmail.com>
59 | Maciej Małecki <me@mmalecki.com>
60 | Marc O'Morain <github.com@marcomorain.com>
61 | Jeff Pinner <jpinner@twitter.com>
62 | Timothy J Fontaine <tjfontaine@gmail.com>
63 | Akagi201 <akagi201@gmail.com>
64 | Romain Giraud <giraud.romain@gmail.com>
65 | Jay Satiro <raysatiro@yahoo.com>
66 | Arne Steen <Arne.Steen@gmx.de>
67 | Kjell Schubert <kjell.schubert@gmail.com>
68 | Olivier Mengué <dolmen@cpan.org>
69 | 


--------------------------------------------------------------------------------
/http_parser.gyp:
--------------------------------------------------------------------------------
  1 | # This file is used with the GYP meta build system.
  2 | # http://code.google.com/p/gyp/
  3 | # To build try this:
  4 | #   svn co http://gyp.googlecode.com/svn/trunk gyp
  5 | #   ./gyp/gyp -f make --depth=`pwd` http_parser.gyp 
  6 | #   ./out/Debug/test 
  7 | {
  8 |   'target_defaults': {
  9 |     'default_configuration': 'Debug',
 10 |     'configurations': {
 11 |       # TODO: hoist these out and put them somewhere common, because
 12 |       #       RuntimeLibrary MUST MATCH across the entire project
 13 |       'Debug': {
 14 |         'defines': [ 'DEBUG', '_DEBUG' ],
 15 |         'cflags': [ '-Wall', '-Wextra', '-O0', '-g', '-ftrapv' ],
 16 |         'msvs_settings': {
 17 |           'VCCLCompilerTool': {
 18 |             'RuntimeLibrary': 1, # static debug
 19 |           },
 20 |         },
 21 |       },
 22 |       'Release': {
 23 |         'defines': [ 'NDEBUG' ],
 24 |         'cflags': [ '-Wall', '-Wextra', '-O3' ],
 25 |         'msvs_settings': {
 26 |           'VCCLCompilerTool': {
 27 |             'RuntimeLibrary': 0, # static release
 28 |           },
 29 |         },
 30 |       }
 31 |     },
 32 |     'msvs_settings': {
 33 |       'VCCLCompilerTool': {
 34 |       },
 35 |       'VCLibrarianTool': {
 36 |       },
 37 |       'VCLinkerTool': {
 38 |         'GenerateDebugInformation': 'true',
 39 |       },
 40 |     },
 41 |     'conditions': [
 42 |       ['OS == "win"', {
 43 |         'defines': [
 44 |           'WIN32'
 45 |         ],
 46 |       }]
 47 |     ],
 48 |   },
 49 | 
 50 |   'targets': [
 51 |     {
 52 |       'target_name': 'http_parser',
 53 |       'type': 'static_library',
 54 |       'include_dirs': [ '.' ],
 55 |       'direct_dependent_settings': {
 56 |         'defines': [ 'HTTP_PARSER_STRICT=0' ],
 57 |         'include_dirs': [ '.' ],
 58 |       },
 59 |       'defines': [ 'HTTP_PARSER_STRICT=0' ],
 60 |       'sources': [ './http_parser.c', ],
 61 |       'conditions': [
 62 |         ['OS=="win"', {
 63 |           'msvs_settings': {
 64 |             'VCCLCompilerTool': {
 65 |               # Compile as C++. http_parser.c is actually C99, but C++ is
 66 |               # close enough in this case.
 67 |               'CompileAs': 2,
 68 |             },
 69 |           },
 70 |         }]
 71 |       ],
 72 |     },
 73 | 
 74 |     {
 75 |       'target_name': 'http_parser_strict',
 76 |       'type': 'static_library',
 77 |       'include_dirs': [ '.' ],
 78 |       'direct_dependent_settings': {
 79 |         'defines': [ 'HTTP_PARSER_STRICT=1' ],
 80 |         'include_dirs': [ '.' ],
 81 |       },
 82 |       'defines': [ 'HTTP_PARSER_STRICT=1' ],
 83 |       'sources': [ './http_parser.c', ],
 84 |       'conditions': [
 85 |         ['OS=="win"', {
 86 |           'msvs_settings': {
 87 |             'VCCLCompilerTool': {
 88 |               # Compile as C++. http_parser.c is actually C99, but C++ is
 89 |               # close enough in this case.
 90 |               'CompileAs': 2,
 91 |             },
 92 |           },
 93 |         }]
 94 |       ],
 95 |     },
 96 | 
 97 |     {
 98 |       'target_name': 'test-nonstrict',
 99 |       'type': 'executable',
100 |       'dependencies': [ 'http_parser' ],
101 |       'sources': [ 'test.c' ]
102 |     },
103 | 
104 |     {
105 |       'target_name': 'test-strict',
106 |       'type': 'executable',
107 |       'dependencies': [ 'http_parser_strict' ],
108 |       'sources': [ 'test.c' ]
109 |     }
110 |   ]
111 | }
112 | 


--------------------------------------------------------------------------------
/bench.c:
--------------------------------------------------------------------------------
  1 | /* Copyright Fedor Indutny. All rights reserved.
  2 |  *
  3 |  * Permission is hereby granted, free of charge, to any person obtaining a copy
  4 |  * of this software and associated documentation files (the "Software"), to
  5 |  * deal in the Software without restriction, including without limitation the
  6 |  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
  7 |  * sell copies of the Software, and to permit persons to whom the Software is
  8 |  * furnished to do so, subject to the following conditions:
  9 |  *
 10 |  * The above copyright notice and this permission notice shall be included in
 11 |  * all copies or substantial portions of the Software.
 12 |  *
 13 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 14 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 15 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 16 |  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 17 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 18 |  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
 19 |  * IN THE SOFTWARE.
 20 |  */
 21 | #include "http_parser.h"
 22 | #include <assert.h>
 23 | #include <stdio.h>
 24 | #include <string.h>
 25 | #include <sys/time.h>
 26 | 
 27 | static const char data[] =
 28 |     "POST /joyent/http-parser HTTP/1.1\r\n"
 29 |     "Host: github.com\r\n"
 30 |     "DNT: 1\r\n"
 31 |     "Accept-Encoding: gzip, deflate, sdch\r\n"
 32 |     "Accept-Language: ru-RU,ru;q=0.8,en-US;q=0.6,en;q=0.4\r\n"
 33 |     "User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) "
 34 |         "AppleWebKit/537.36 (KHTML, like Gecko) "
 35 |         "Chrome/39.0.2171.65 Safari/537.36\r\n"
 36 |     "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,"
 37 |         "image/webp,*/*;q=0.8\r\n"
 38 |     "Referer: https://github.com/joyent/http-parser\r\n"
 39 |     "Connection: keep-alive\r\n"
 40 |     "Transfer-Encoding: chunked\r\n"
 41 |     "Cache-Control: max-age=0\r\n\r\nb\r\nhello world\r\n0\r\n\r\n";
 42 | static const size_t data_len = sizeof(data) - 1;
 43 | 
 44 | static int on_info(http_parser* p) {
 45 |   return 0;
 46 | }
 47 | 
 48 | 
 49 | static int on_data(http_parser* p, const char *at, size_t length) {
 50 |   return 0;
 51 | }
 52 | 
 53 | static http_parser_settings settings = {
 54 |   .on_message_begin = on_info,
 55 |   .on_headers_complete = on_info,
 56 |   .on_message_complete = on_info,
 57 |   .on_header_field = on_data,
 58 |   .on_header_value = on_data,
 59 |   .on_url = on_data,
 60 |   .on_status = on_data,
 61 |   .on_body = on_data
 62 | };
 63 | 
 64 | int bench(int iter_count, int silent) {
 65 |   struct http_parser parser;
 66 |   int i;
 67 |   int err;
 68 |   struct timeval start;
 69 |   struct timeval end;
 70 |   float rps;
 71 | 
 72 |   if (!silent) {
 73 |     err = gettimeofday(&start, NULL);
 74 |     assert(err == 0);
 75 |   }
 76 | 
 77 |   for (i = 0; i < iter_count; i++) {
 78 |     size_t parsed;
 79 |     http_parser_init(&parser, HTTP_REQUEST);
 80 | 
 81 |     parsed = http_parser_execute(&parser, &settings, data, data_len);
 82 |     assert(parsed == data_len);
 83 |   }
 84 | 
 85 |   if (!silent) {
 86 |     err = gettimeofday(&end, NULL);
 87 |     assert(err == 0);
 88 | 
 89 |     fprintf(stdout, "Benchmark result:\n");
 90 | 
 91 |     rps = (float) (end.tv_sec - start.tv_sec) +
 92 |           (end.tv_usec - start.tv_usec) * 1e-6f;
 93 |     fprintf(stdout, "Took %f seconds to run\n", rps);
 94 | 
 95 |     rps = (float) iter_count / rps;
 96 |     fprintf(stdout, "%f req/sec\n", rps);
 97 |     fflush(stdout);
 98 |   }
 99 | 
100 |   return 0;
101 | }
102 | 
103 | int main(int argc, char** argv) {
104 |   if (argc == 2 && strcmp(argv[1], "infinite") == 0) {
105 |     for (;;)
106 |       bench(5000000, 1);
107 |     return 0;
108 |   } else {
109 |     return bench(5000000, 0);
110 |   }
111 | }
112 | 


--------------------------------------------------------------------------------
/contrib/parsertrace.c:
--------------------------------------------------------------------------------
  1 | /* Based on src/http/ngx_http_parse.c from NGINX copyright Igor Sysoev
  2 |  *
  3 |  * Additional changes are licensed under the same terms as NGINX and
  4 |  * copyright Joyent, Inc. and other Node contributors. All rights reserved.
  5 |  *
  6 |  * Permission is hereby granted, free of charge, to any person obtaining a copy
  7 |  * of this software and associated documentation files (the "Software"), to
  8 |  * deal in the Software without restriction, including without limitation the
  9 |  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
 10 |  * sell copies of the Software, and to permit persons to whom the Software is
 11 |  * furnished to do so, subject to the following conditions:
 12 |  *
 13 |  * The above copyright notice and this permission notice shall be included in
 14 |  * all copies or substantial portions of the Software.
 15 |  *
 16 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 17 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 18 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 19 |  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 20 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 21 |  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
 22 |  * IN THE SOFTWARE.
 23 |  */
 24 | 
 25 | /* Dump what the parser finds to stdout as it happen */
 26 | 
 27 | #include "http_parser.h"
 28 | #include <stdio.h>
 29 | #include <stdlib.h>
 30 | #include <string.h>
 31 | 
 32 | int on_message_begin(http_parser* _) {
 33 |   (void)_;
 34 |   printf("\n***MESSAGE BEGIN***\n\n");
 35 |   return 0;
 36 | }
 37 | 
 38 | int on_headers_complete(http_parser* _) {
 39 |   (void)_;
 40 |   printf("\n***HEADERS COMPLETE***\n\n");
 41 |   return 0;
 42 | }
 43 | 
 44 | int on_message_complete(http_parser* _) {
 45 |   (void)_;
 46 |   printf("\n***MESSAGE COMPLETE***\n\n");
 47 |   return 0;
 48 | }
 49 | 
 50 | int on_url(http_parser* _, const char* at, size_t length) {
 51 |   (void)_;
 52 |   printf("Url: %.*s\n", (int)length, at);
 53 |   return 0;
 54 | }
 55 | 
 56 | int on_header_field(http_parser* _, const char* at, size_t length) {
 57 |   (void)_;
 58 |   printf("Header field: %.*s\n", (int)length, at);
 59 |   return 0;
 60 | }
 61 | 
 62 | int on_header_value(http_parser* _, const char* at, size_t length) {
 63 |   (void)_;
 64 |   printf("Header value: %.*s\n", (int)length, at);
 65 |   return 0;
 66 | }
 67 | 
 68 | int on_body(http_parser* _, const char* at, size_t length) {
 69 |   (void)_;
 70 |   printf("Body: %.*s\n", (int)length, at);
 71 |   return 0;
 72 | }
 73 | 
 74 | void usage(const char* name) {
 75 |   fprintf(stderr,
 76 |           "Usage: %s $type $filename\n"
 77 |           "  type: -x, where x is one of {r,b,q}\n"
 78 |           "  parses file as a Response, reQuest, or Both\n",
 79 |           name);
 80 |   exit(EXIT_FAILURE);
 81 | }
 82 | 
 83 | int main(int argc, char* argv[]) {
 84 |   enum http_parser_type file_type;
 85 | 
 86 |   if (argc != 3) {
 87 |     usage(argv[0]);
 88 |   }
 89 | 
 90 |   char* type = argv[1];
 91 |   if (type[0] != '-') {
 92 |     usage(argv[0]);
 93 |   }
 94 | 
 95 |   switch (type[1]) {
 96 |     /* in the case of "-", type[1] will be NUL */
 97 |     case 'r':
 98 |       file_type = HTTP_RESPONSE;
 99 |       break;
100 |     case 'q':
101 |       file_type = HTTP_REQUEST;
102 |       break;
103 |     case 'b':
104 |       file_type = HTTP_BOTH;
105 |       break;
106 |     default:
107 |       usage(argv[0]);
108 |   }
109 | 
110 |   char* filename = argv[2];
111 |   FILE* file = fopen(filename, "r");
112 |   if (file == NULL) {
113 |     perror("fopen");
114 |     goto fail;
115 |   }
116 | 
117 |   fseek(file, 0, SEEK_END);
118 |   long file_length = ftell(file);
119 |   if (file_length == -1) {
120 |     perror("ftell");
121 |     goto fail;
122 |   }
123 |   fseek(file, 0, SEEK_SET);
124 | 
125 |   char* data = malloc(file_length);
126 |   if (fread(data, 1, file_length, file) != (size_t)file_length) {
127 |     fprintf(stderr, "couldn't read entire file\n");
128 |     free(data);
129 |     goto fail;
130 |   }
131 | 
132 |   http_parser_settings settings;
133 |   memset(&settings, 0, sizeof(settings));
134 |   settings.on_message_begin = on_message_begin;
135 |   settings.on_url = on_url;
136 |   settings.on_header_field = on_header_field;
137 |   settings.on_header_value = on_header_value;
138 |   settings.on_headers_complete = on_headers_complete;
139 |   settings.on_body = on_body;
140 |   settings.on_message_complete = on_message_complete;
141 | 
142 |   http_parser parser;
143 |   http_parser_init(&parser, file_type);
144 |   size_t nparsed = http_parser_execute(&parser, &settings, data, file_length);
145 |   free(data);
146 | 
147 |   if (nparsed != (size_t)file_length) {
148 |     fprintf(stderr,
149 |             "Error: %s (%s)\n",
150 |             http_errno_description(HTTP_PARSER_ERRNO(&parser)),
151 |             http_errno_name(HTTP_PARSER_ERRNO(&parser)));
152 |     goto fail;
153 |   }
154 | 
155 |   return EXIT_SUCCESS;
156 | 
157 | fail:
158 |   fclose(file);
159 |   return EXIT_FAILURE;
160 | }
161 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
  1 | # Copyright Joyent, Inc. and other Node contributors. All rights reserved.
  2 | #
  3 | # Permission is hereby granted, free of charge, to any person obtaining a copy
  4 | # of this software and associated documentation files (the "Software"), to
  5 | # deal in the Software without restriction, including without limitation the
  6 | # rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
  7 | # sell copies of the Software, and to permit persons to whom the Software is
  8 | # furnished to do so, subject to the following conditions:
  9 | #
 10 | # The above copyright notice and this permission notice shall be included in
 11 | # all copies or substantial portions of the Software.
 12 | #
 13 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 14 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 15 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 16 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 17 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 18 | # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
 19 | # IN THE SOFTWARE.
 20 | 
 21 | PLATFORM ?= $(shell sh -c 'uname -s | tr "[A-Z]" "[a-z]"')
 22 | HELPER ?=
 23 | BINEXT ?=
 24 | ifeq (darwin,$(PLATFORM))
 25 | SONAME ?= libhttp_parser.2.7.1.dylib
 26 | SOEXT ?= dylib
 27 | else ifeq (wine,$(PLATFORM))
 28 | CC = winegcc
 29 | BINEXT = .exe.so
 30 | HELPER = wine
 31 | else
 32 | SONAME ?= libhttp_parser.so.2.7.1
 33 | SOEXT ?= so
 34 | endif
 35 | 
 36 | CC?=gcc
 37 | AR?=ar
 38 | 
 39 | CPPFLAGS ?=
 40 | LDFLAGS ?=
 41 | 
 42 | CPPFLAGS += -I.
 43 | CPPFLAGS_DEBUG = $(CPPFLAGS) -DHTTP_PARSER_STRICT=1
 44 | CPPFLAGS_DEBUG += $(CPPFLAGS_DEBUG_EXTRA)
 45 | CPPFLAGS_FAST = $(CPPFLAGS) -DHTTP_PARSER_STRICT=0
 46 | CPPFLAGS_FAST += $(CPPFLAGS_FAST_EXTRA)
 47 | CPPFLAGS_BENCH = $(CPPFLAGS_FAST)
 48 | 
 49 | CFLAGS += -Wall -Wextra -Werror
 50 | CFLAGS_DEBUG = $(CFLAGS) -O0 -g $(CFLAGS_DEBUG_EXTRA)
 51 | CFLAGS_FAST = $(CFLAGS) -O3 $(CFLAGS_FAST_EXTRA)
 52 | CFLAGS_BENCH = $(CFLAGS_FAST) -Wno-unused-parameter
 53 | CFLAGS_LIB = $(CFLAGS_FAST) -fPIC
 54 | 
 55 | LDFLAGS_LIB = $(LDFLAGS) -shared
 56 | 
 57 | INSTALL ?= install
 58 | PREFIX ?= $(DESTDIR)/usr
 59 | LIBDIR = $(PREFIX)/lib
 60 | INCLUDEDIR = $(PREFIX)/include
 61 | 
 62 | ifneq (darwin,$(PLATFORM))
 63 | # TODO(bnoordhuis) The native SunOS linker expects -h rather than -soname...
 64 | LDFLAGS_LIB += -Wl,-soname=$(SONAME)
 65 | endif
 66 | 
 67 | test: test_g test_fast demo
 68 | 	$(HELPER) ./test_g$(BINEXT)
 69 | 	$(HELPER) ./test_fast$(BINEXT)
 70 | 
 71 | test_g: http_parser_g.o test_g.o
 72 | 	$(CC) $(CFLAGS_DEBUG) $(LDFLAGS) http_parser_g.o test_g.o -o $@
 73 | 
 74 | demo: http_parser.o demo.o
 75 | 	$(CC) -g http_parser.o demo.o -o $@
 76 | 
 77 | test_g.o: test.c http_parser.h Makefile
 78 | 	$(CC) $(CPPFLAGS_DEBUG) $(CFLAGS_DEBUG) -c test.c -o $@
 79 | 
 80 | http_parser_g.o: http_parser.c http_parser.h Makefile
 81 | 	$(CC) $(CPPFLAGS_DEBUG) $(CFLAGS_DEBUG) -c http_parser.c -o $@
 82 | 
 83 | test_fast: http_parser.o test.o http_parser.h
 84 | 	$(CC) $(CFLAGS_FAST) $(LDFLAGS) http_parser.o test.o -o $@
 85 | 
 86 | test.o: test.c http_parser.h Makefile
 87 | 	$(CC) $(CPPFLAGS_FAST) $(CFLAGS_FAST) -c test.c -o $@
 88 | 
 89 | demo.o:	demo.c http_parser.h Makefile
 90 | 	$(CC) -c -g demo.c -o $@
 91 | 
 92 | bench: http_parser.o bench.o
 93 | 	$(CC) $(CFLAGS_BENCH) $(LDFLAGS) http_parser.o bench.o -o $@
 94 | 
 95 | bench.o: bench.c http_parser.h Makefile
 96 | 	$(CC) $(CPPFLAGS_BENCH) $(CFLAGS_BENCH) -c bench.c -o $@
 97 | 
 98 | http_parser.o: http_parser.c http_parser.h Makefile
 99 | 	$(CC) $(CPPFLAGS_FAST) $(CFLAGS_FAST) -c http_parser.c
100 | 
101 | test-run-timed: test_fast
102 | 	while(true) do time $(HELPER) ./test_fast$(BINEXT) > /dev/null; done
103 | 
104 | test-valgrind: test_g
105 | 	valgrind ./test_g
106 | 
107 | libhttp_parser.o: http_parser.c http_parser.h Makefile
108 | 	$(CC) $(CPPFLAGS_FAST) $(CFLAGS_LIB) -c http_parser.c -o libhttp_parser.o
109 | 
110 | library: libhttp_parser.o
111 | 	$(CC) $(LDFLAGS_LIB) -o $(SONAME) $<
112 | 
113 | package: http_parser.o
114 | 	$(AR) rcs libhttp_parser.a http_parser.o
115 | 
116 | url_parser: http_parser.o contrib/url_parser.c
117 | 	$(CC) $(CPPFLAGS_FAST) $(CFLAGS_FAST) $^ -o $@
118 | 
119 | url_parser_g: http_parser_g.o contrib/url_parser.c
120 | 	$(CC) $(CPPFLAGS_DEBUG) $(CFLAGS_DEBUG) $^ -o $@
121 | 
122 | parsertrace: http_parser.o contrib/parsertrace.c
123 | 	$(CC) $(CPPFLAGS_FAST) $(CFLAGS_FAST) $^ -o parsertrace$(BINEXT)
124 | 
125 | parsertrace_g: http_parser_g.o contrib/parsertrace.c
126 | 	$(CC) $(CPPFLAGS_DEBUG) $(CFLAGS_DEBUG) $^ -o parsertrace_g$(BINEXT)
127 | 
128 | tags: http_parser.c http_parser.h test.c
129 | 	ctags $^
130 | 
131 | install: library
132 | 	$(INSTALL) -D  http_parser.h $(INCLUDEDIR)/http_parser.h
133 | 	$(INSTALL) -D $(SONAME) $(LIBDIR)/$(SONAME)
134 | 	ln -s $(LIBDIR)/$(SONAME) $(LIBDIR)/libhttp_parser.$(SOEXT)
135 | 
136 | install-strip: library
137 | 	$(INSTALL) -D  http_parser.h $(INCLUDEDIR)/http_parser.h
138 | 	$(INSTALL) -D -s $(SONAME) $(LIBDIR)/$(SONAME)
139 | 	ln -s $(LIBDIR)/$(SONAME) $(LIBDIR)/libhttp_parser.$(SOEXT)
140 | 
141 | uninstall:
142 | 	rm $(INCLUDEDIR)/http_parser.h
143 | 	rm $(LIBDIR)/$(SONAME)
144 | 	rm $(LIBDIR)/libhttp_parser.so
145 | 
146 | clean:
147 | 	rm -f *.o *.a tags test test_fast test_g \
148 | 		http_parser.tar libhttp_parser.so.* \
149 | 		url_parser url_parser_g parsertrace parsertrace_g \
150 | 		*.exe *.exe.so
151 | 
152 | contrib/url_parser.c:	http_parser.h
153 | contrib/parsertrace.c:	http_parser.h
154 | 
155 | .PHONY: clean package test-run test-run-timed test-valgrind install install-strip uninstall
156 | 


--------------------------------------------------------------------------------
/demo.c:
--------------------------------------------------------------------------------
  1 | #include <stdio.h>
  2 | #include <stdlib.h>
  3 | #include <string.h>
  4 | #include <assert.h>
  5 | #include <time.h>
  6 | #include <sys/types.h>
  7 | #include <sys/socket.h>
  8 | #include <netinet/in.h>
  9 | #include <netdb.h>
 10 | #include <sys/time.h>
 11 | 
 12 | #include "http_parser.h"
 13 | 
 14 | 
 15 | char content_type_flag = 0;
 16 | char content_length_flag = 0;
 17 | 
 18 | #define BUFSIZZ 1024*8
 19 | 
 20 | #define IMAGE_TYPE_PNG "image/png"
 21 | #define IMAGE_TYPE_PNG_EXT ".png"
 22 | #define IMAGE_TYPE_JPEG "image/jpeg"
 23 | #define IMAGE_TYPE_JPEG_EXT ".jpeg"
 24 | #define IMAGE_TYPE_GIF "image/gif"
 25 | #define IMAGE_TYPE_GIF_EXT ".gif"
 26 | #define IMAGE_TYPE_BMP "image/bmp"
 27 | #define IMAGE_TYPE_BMP_EXT ".bmp"
 28 | 
 29 | #define IMAGE_TYPE_DEFAULT_EXT ".html"
 30 | 
 31 | struct http_content{
 32 | 	char type[32];
 33 | 	unsigned int response_size;
 34 | 	unsigned int content_size;
 35 | 	char *content_start;
 36 | 	char header_checked;
 37 | 	char mem_realloced;
 38 | }http_img ;
 39 | 
 40 | static http_parser *parser;
 41 | 
 42 | int on_message_begin(http_parser* _)
 43 | {
 44 | 	(void)_;
 45 | 	printf("\n***MESSAGE BEGIN***\n\n");
 46 | 	return 0;
 47 | }
 48 | 
 49 | int on_headers_complete(http_parser* _)
 50 | {
 51 | 	(void)_;
 52 | 	printf("\n***HEADERS COMPLETE***\n\n");
 53 | 	return 0;
 54 | }
 55 | 
 56 | int on_message_complete(http_parser* _)
 57 | 	{
 58 | 	(void)_;
 59 | 	printf("\n***MESSAGE COMPLETE***\n\n");
 60 | 	return 0;
 61 | }
 62 | 
 63 | int on_url(http_parser* _, const char* at, size_t length)
 64 | {
 65 | 	(void)_;
 66 | 	printf("Url: %.*s\n", (int)length, at);
 67 | 	return 0;
 68 | }
 69 | int on_header_field(http_parser* _, const char* at, size_t length)
 70 | {
 71 | 	(void)_;
 72 | 	printf("Header field: %.*s\n", (int)length, at);
 73 | 	if(!memcmp("Content-Type", at, length))
 74 | 	{
 75 | 		//printf("Found Content-Type\n");
 76 | 		content_type_flag = 1;
 77 | 	}
 78 | 	if(!memcmp("Content-Length", at, length))
 79 | 	{
 80 | 		//printf("Found Content-Length\n");
 81 | 		content_length_flag = 1;
 82 | 	}
 83 | 
 84 | 	return 0;
 85 | }
 86 | 
 87 | int on_header_value(http_parser* _, const char* at, size_t length)
 88 | {
 89 | 	(void)_;
 90 | 	printf("Header value: %.*s\n", (int)length, at);
 91 | 	if(content_type_flag)
 92 | 	{
 93 | 		memcpy(http_img.type, at, length);
 94 | 		//printf("http_img.type = %s\n", http_img.type);
 95 | 		content_type_flag = 0;
 96 | 	}
 97 | 
 98 | 	if(content_length_flag)
 99 | 	{
100 | 		char value[32];
101 | 		memcpy(value, at, length);
102 | 		//printf("http_img.content_size = %s\n", value);
103 | 		http_img.content_size = atoi(value);
104 | 		content_length_flag = 0;
105 | 	}
106 | 	return 0;
107 | }
108 | 
109 | int on_body(http_parser* _, const char* at, size_t length)
110 | {
111 | 	(void)_;
112 | 	unsigned int z;
113 | 	char *p = at;
114 | 
115 | 	http_img.content_start=at;
116 | //	if(!memcmp("image/png", http_img.type, strlen("image/png")))
117 | //	{
118 | //		printf("Found PNG body!http_img.content_start=%p\n", http_img.content_start);
119 | //	}
120 | 	return 0;
121 | }
122 | 
123 | static http_parser_settings settings_null =
124 | {
125 | 	.on_message_begin = on_message_begin,
126 | 	.on_header_field = on_header_field,
127 | 	.on_header_value = on_header_value,
128 | 	.on_url = on_url,
129 | 	.on_status = 0,
130 | 	.on_body = on_body,
131 | 	.on_headers_complete = on_headers_complete,
132 | 	.on_message_complete = on_message_complete
133 | };
134 | 
135 | 
136 | static double tminterval(struct timeval tmstart)
137 | {
138 | 	double ret = 0;
139 | 	struct timeval now;
140 | 
141 | 	gettimeofday(&now, NULL);
142 | 
143 | 	ret = ((now.tv_sec + now.tv_usec * 1e-6)
144 | 	- (tmstart.tv_sec + tmstart.tv_usec * 1e-6));
145 | 	tmstart = now;
146 | 
147 | 	return ret;
148 | }
149 | 
150 | int main (void)
151 | {
152 | 	float start, end;
153 | 	size_t parsed;
154 | 	struct sockaddr_in address;
155 | 	int client_sock;
156 | 	int len,result;
157 | 	int n;
158 | 	char buffer[BUFSIZZ];
159 | 	char *host="admin.omsg.cn";
160 | 	short port = 80;
161 | 	struct hostent *he;
162 | 	struct in_addr **addr_list;
163 | 	int i;
164 | 	struct timeval tmstart;
165 | 	char *http_buf, *mbuf;
166 | 	unsigned int http_buf_len = 0;
167 | 	char check_http_header=0;
168 | 	char outfile[20] = "logo";
169 | 	char *file_ext;
170 | 	FILE *fp;
171 | 	int mbuf_len = 0;
172 | 	char *p;
173 | 
174 | 	parser = malloc(sizeof(http_parser));
175 | 	memset(&http_img, 0, sizeof(http_img));
176 | 	http_buf = malloc(BUFSIZZ);
177 | 	memset(http_buf,0, sizeof(http_buf));
178 | 	mbuf = malloc(BUFSIZZ);
179 | 	memset(mbuf,0, sizeof(mbuf));
180 | 
181 | 
182 | //Connect to server
183 | 	/*PNG*///char *buf = "GET http://admin.omsg.cn/uploadpic/2016121034000012.png HTTP/1.1\r\nHost: admin.omsg.cn\r\nAccept: */*\r\nConnection: Keep-Alive\r\n\r\n";
184 | 	/*JPEG*/char *buf = "GET http://pic67.nipic.com/file/20150515/19533051_112209270000_2.jpg HTTP/1.1\r\nHost: pic67.nipic.com\r\nAccept: */*\r\nConnection: Keep-Alive\r\n\r\n";
185 | 
186 | 	if ((he = gethostbyname(host)) == NULL) {  // get the host info
187 | 		printf("gethostbyname error\n");
188 | 		return 1;
189 | 	}
190 | 	printf("Official name is: %s\n", he->h_name);
191 | 	printf("	  IP addresses: ");
192 | 	addr_list = (struct in_addr **)he->h_addr_list;
193 | 	for(i = 0; addr_list[i] != NULL; i++) {
194 | 		printf("\t%s \n", inet_ntoa(addr_list[0]));
195 | 	}
196 | 
197 | 	client_sock=socket(AF_INET,SOCK_STREAM,0);
198 | 	address.sin_addr.s_addr=inet_addr(inet_ntoa(*addr_list[0]));
199 | 	address.sin_family=AF_INET;
200 | 	address.sin_port=htons(port);
201 | 	len=sizeof(address);
202 | 	result=connect(client_sock,(struct sockaddr *)&address,len);
203 | 	if(result==-1){
204 | 		printf("error!\n");
205 | 		exit(-1);
206 | 	}
207 | 
208 | //Parse request
209 | 	http_parser_init(parser, HTTP_REQUEST);
210 | 	parsed = http_parser_execute(parser, &settings_null, buf, strlen(buf));
211 | 
212 | //Send request
213 | 	n=write(client_sock,buf, strlen(buf));
214 | 	if(n<0){
215 | 		printf("error write\n");
216 | 	}
217 | 
218 | //Receive response
219 | 	do{
220 | 		if((http_img.response_size>BUFSIZZ) && (!http_img.mem_realloced))
221 | 		{
222 | 		//Need more memory, realloc it.
223 | 			http_buf = realloc(http_buf,http_img.response_size);
224 | 			http_img.mem_realloced = 1;
225 | 			printf("realloc memory size to %d\n", http_img.response_size);
226 | 		}
227 | 		memcpy(http_buf+http_buf_len, mbuf, mbuf_len);
228 | 		http_buf_len += mbuf_len;
229 | 		mbuf_len=recv(client_sock,mbuf,sizeof(mbuf),0);
230 | 		if(!http_img.header_checked && (http_buf_len>1024))
231 | 		{
232 | 		//Parse http response
233 | 			http_parser_init(parser, HTTP_RESPONSE);
234 | 			parsed = http_parser_execute(parser, &settings_null, http_buf, strlen(http_buf));
235 | 			http_img.response_size = http_img.content_start - http_buf + http_img.content_size;
236 | 			printf("response_size=%d content_size=%d\n", http_img.response_size, http_img.content_size);
237 | 			http_img.header_checked =1;
238 | 		}
239 | 
240 | 	}while(mbuf_len>0);
241 | 
242 | 	//Reparse http response, in case realloc change the http_buf address.
243 | 		http_parser_init(parser, HTTP_RESPONSE);
244 | 		parsed = http_parser_execute(parser, &settings_null, http_buf, strlen(http_buf));
245 | 
246 | 	//Checkout PNG body, and write to local file.
247 | 		p = strtok(http_img.type, ";");
248 | 		if(!strcmp(p, IMAGE_TYPE_PNG))
249 | 			file_ext = IMAGE_TYPE_PNG_EXT;
250 | 		else if(!strcmp(p, IMAGE_TYPE_JPEG))
251 | 			file_ext = IMAGE_TYPE_JPEG_EXT;
252 | 		else if(!strcmp(p, IMAGE_TYPE_GIF))
253 | 			file_ext = IMAGE_TYPE_GIF_EXT;
254 | 		else if(!strcmp(p, IMAGE_TYPE_BMP))
255 | 			file_ext = IMAGE_TYPE_BMP_EXT;
256 | 		else
257 | 			file_ext = IMAGE_TYPE_DEFAULT_EXT;
258 | 		strcat(outfile, file_ext);
259 | 		printf("Content-Type=%s\n", outfile);
260 | 		if((fp = fopen(outfile,"wra+"))==NULL)
261 | 		{
262 | 			printf("can't open abc.txt\n");
263 | 		}
264 | 		if(fp != NULL)
265 | 			if(fwrite(http_img.content_start,sizeof(char),http_img.content_size,fp)!=http_img.content_size)
266 | 				printf("can't write %s\n", outfile);
267 | 		if(fp != NULL)
268 | 			fclose(fp);
269 | 
270 | 
271 | #if 0
272 | 	buf = "GET http://admin.omsg.cn/uploadpic/2016121034000012.png HTTP/1.1\r\nHost: admin.omsg.cn\r\nAccept: */*\r\nConnection: Keep-Alive\r\n\r\n";
273 | 
274 | 	start = (float)clock()/CLOCKS_PER_SEC;
275 | 
276 | 	http_parser_init(parser, HTTP_REQUEST);
277 | 	parsed = http_parser_execute(parser, &settings_null, buf, strlen(buf));
278 | 
279 | 	end = (float)clock()/CLOCKS_PER_SEC;
280 | 
281 | 
282 | 	buf="HTTP/1.1 200 OK\r\n"
283 | 	"Date: Tue, 04 Aug 2009 07:59:32 GMT\r\n"
284 | 	"Server: Apache\r\n"
285 | 	"X-Powered-By: Servlet/2.5 JSP/2.1\r\n"
286 | 	"Content-Type: text/xml; charset=utf-8\r\n"
287 | 	"Connection: close\r\n"
288 | 	"\r\n"
289 | 	"<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"
290 | 	"<SOAP-ENV:Envelope xmlns:SOAP-ENV=\"http://schemas.xmlsoap.org/soap/envelope/\">\n"
291 | 	"  <SOAP-ENV:Body>\n"
292 | 	"    <SOAP-ENV:Fault>\n"
293 | 	"       <faultcode>SOAP-ENV:Client</faultcode>\n"
294 | 	"       <faultstring>Client Error</faultstring>\n"
295 | 	"    </SOAP-ENV:Fault>\n"
296 | 	"  </SOAP-ENV:Body>\n"
297 | 	"</SOAP-ENV:Envelope>";
298 | 
299 | 	http_parser_init(parser, HTTP_RESPONSE);
300 | 	parsed = http_parser_execute(parser, &settings_null, buf, strlen(buf));
301 | #endif
302 | 
303 | 	//Free allocated buffer
304 | 		if(parser)
305 | 			free(parser);
306 | 		if(http_buf)
307 | 			free(http_buf);
308 | 		if(mbuf)
309 | 			free(mbuf);
310 | 
311 | 	printf("Elapsed %f seconds.\n", (end - start));
312 | 
313 | 	return (EXIT_SUCCESS);
314 | }
315 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | HTTP Parser
  2 | ===========
  3 | 
  4 | [![Build Status](https://api.travis-ci.org/nodejs/http-parser.svg?branch=master)](https://travis-ci.org/nodejs/http-parser)
  5 | 
  6 | This is a parser for HTTP messages written in C. It parses both requests and
  7 | responses. The parser is designed to be used in performance HTTP
  8 | applications. It does not make any syscalls nor allocations, it does not
  9 | buffer data, it can be interrupted at anytime. Depending on your
 10 | architecture, it only requires about 40 bytes of data per message
 11 | stream (in a web server that is per connection).
 12 | 
 13 | Features:
 14 | 
 15 |   * No dependencies
 16 |   * Handles persistent streams (keep-alive).
 17 |   * Decodes chunked encoding.
 18 |   * Upgrade support
 19 |   * Defends against buffer overflow attacks.
 20 | 
 21 | The parser extracts the following information from HTTP messages:
 22 | 
 23 |   * Header fields and values
 24 |   * Content-Length
 25 |   * Request method
 26 |   * Response status code
 27 |   * Transfer-Encoding
 28 |   * HTTP version
 29 |   * Request URL
 30 |   * Message body
 31 | 
 32 | 
 33 | Usage
 34 | -----
 35 | 
 36 | One `http_parser` object is used per TCP connection. Initialize the struct
 37 | using `http_parser_init()` and set the callbacks. That might look something
 38 | like this for a request parser:
 39 | ```c
 40 | http_parser_settings settings;
 41 | settings.on_url = my_url_callback;
 42 | settings.on_header_field = my_header_field_callback;
 43 | /* ... */
 44 | 
 45 | http_parser *parser = malloc(sizeof(http_parser));
 46 | http_parser_init(parser, HTTP_REQUEST);
 47 | parser->data = my_socket;
 48 | ```
 49 | 
 50 | When data is received on the socket execute the parser and check for errors.
 51 | 
 52 | ```c
 53 | size_t len = 80*1024, nparsed;
 54 | char buf[len];
 55 | ssize_t recved;
 56 | 
 57 | recved = recv(fd, buf, len, 0);
 58 | 
 59 | if (recved < 0) {
 60 |   /* Handle error. */
 61 | }
 62 | 
 63 | /* Start up / continue the parser.
 64 |  * Note we pass recved==0 to signal that EOF has been received.
 65 |  */
 66 | nparsed = http_parser_execute(parser, &settings, buf, recved);
 67 | 
 68 | if (parser->upgrade) {
 69 |   /* handle new protocol */
 70 | } else if (nparsed != recved) {
 71 |   /* Handle error. Usually just close the connection. */
 72 | }
 73 | ```
 74 | 
 75 | HTTP needs to know where the end of the stream is. For example, sometimes
 76 | servers send responses without Content-Length and expect the client to
 77 | consume input (for the body) until EOF. To tell http_parser about EOF, give
 78 | `0` as the fourth parameter to `http_parser_execute()`. Callbacks and errors
 79 | can still be encountered during an EOF, so one must still be prepared
 80 | to receive them.
 81 | 
 82 | Scalar valued message information such as `status_code`, `method`, and the
 83 | HTTP version are stored in the parser structure. This data is only
 84 | temporally stored in `http_parser` and gets reset on each new message. If
 85 | this information is needed later, copy it out of the structure during the
 86 | `headers_complete` callback.
 87 | 
 88 | The parser decodes the transfer-encoding for both requests and responses
 89 | transparently. That is, a chunked encoding is decoded before being sent to
 90 | the on_body callback.
 91 | 
 92 | 
 93 | The Special Problem of Upgrade
 94 | ------------------------------
 95 | 
 96 | HTTP supports upgrading the connection to a different protocol. An
 97 | increasingly common example of this is the WebSocket protocol which sends
 98 | a request like
 99 | 
100 |         GET /demo HTTP/1.1
101 |         Upgrade: WebSocket
102 |         Connection: Upgrade
103 |         Host: example.com
104 |         Origin: http://example.com
105 |         WebSocket-Protocol: sample
106 | 
107 | followed by non-HTTP data.
108 | 
109 | (See [RFC6455](https://tools.ietf.org/html/rfc6455) for more information the
110 | WebSocket protocol.)
111 | 
112 | To support this, the parser will treat this as a normal HTTP message without a
113 | body, issuing both on_headers_complete and on_message_complete callbacks. However
114 | http_parser_execute() will stop parsing at the end of the headers and return.
115 | 
116 | The user is expected to check if `parser->upgrade` has been set to 1 after
117 | `http_parser_execute()` returns. Non-HTTP data begins at the buffer supplied
118 | offset by the return value of `http_parser_execute()`.
119 | 
120 | 
121 | Callbacks
122 | ---------
123 | 
124 | During the `http_parser_execute()` call, the callbacks set in
125 | `http_parser_settings` will be executed. The parser maintains state and
126 | never looks behind, so buffering the data is not necessary. If you need to
127 | save certain data for later usage, you can do that from the callbacks.
128 | 
129 | There are two types of callbacks:
130 | 
131 | * notification `typedef int (*http_cb) (http_parser*);`
132 |     Callbacks: on_message_begin, on_headers_complete, on_message_complete.
133 | * data `typedef int (*http_data_cb) (http_parser*, const char *at, size_t length);`
134 |     Callbacks: (requests only) on_url,
135 |                (common) on_header_field, on_header_value, on_body;
136 | 
137 | Callbacks must return 0 on success. Returning a non-zero value indicates
138 | error to the parser, making it exit immediately.
139 | 
140 | For cases where it is necessary to pass local information to/from a callback,
141 | the `http_parser` object's `data` field can be used.
142 | An example of such a case is when using threads to handle a socket connection,
143 | parse a request, and then give a response over that socket. By instantiation
144 | of a thread-local struct containing relevant data (e.g. accepted socket,
145 | allocated memory for callbacks to write into, etc), a parser's callbacks are
146 | able to communicate data between the scope of the thread and the scope of the
147 | callback in a threadsafe manner. This allows http-parser to be used in
148 | multi-threaded contexts.
149 | 
150 | Example:
151 | ```c
152 |  typedef struct {
153 |   socket_t sock;
154 |   void* buffer;
155 |   int buf_len;
156 |  } custom_data_t;
157 | 
158 | 
159 | int my_url_callback(http_parser* parser, const char *at, size_t length) {
160 |   /* access to thread local custom_data_t struct.
161 |   Use this access save parsed data for later use into thread local
162 |   buffer, or communicate over socket
163 |   */
164 |   parser->data;
165 |   ...
166 |   return 0;
167 | }
168 | 
169 | ...
170 | 
171 | void http_parser_thread(socket_t sock) {
172 |  int nparsed = 0;
173 |  /* allocate memory for user data */
174 |  custom_data_t *my_data = malloc(sizeof(custom_data_t));
175 | 
176 |  /* some information for use by callbacks.
177 |  * achieves thread -> callback information flow */
178 |  my_data->sock = sock;
179 | 
180 |  /* instantiate a thread-local parser */
181 |  http_parser *parser = malloc(sizeof(http_parser));
182 |  http_parser_init(parser, HTTP_REQUEST); /* initialise parser */
183 |  /* this custom data reference is accessible through the reference to the
184 |  parser supplied to callback functions */
185 |  parser->data = my_data;
186 | 
187 |  http_parser_settings settings; /* set up callbacks */
188 |  settings.on_url = my_url_callback;
189 | 
190 |  /* execute parser */
191 |  nparsed = http_parser_execute(parser, &settings, buf, recved);
192 | 
193 |  ...
194 |  /* parsed information copied from callback.
195 |  can now perform action on data copied into thread-local memory from callbacks.
196 |  achieves callback -> thread information flow */
197 |  my_data->buffer;
198 |  ...
199 | }
200 | 
201 | ```
202 | 
203 | In case you parse HTTP message in chunks (i.e. `read()` request line
204 | from socket, parse, read half headers, parse, etc) your data callbacks
205 | may be called more than once. Http-parser guarantees that data pointer is only
206 | valid for the lifetime of callback. You can also `read()` into a heap allocated
207 | buffer to avoid copying memory around if this fits your application.
208 | 
209 | Reading headers may be a tricky task if you read/parse headers partially.
210 | Basically, you need to remember whether last header callback was field or value
211 | and apply the following logic:
212 | 
213 |     (on_header_field and on_header_value shortened to on_h_*)
214 |      ------------------------ ------------ --------------------------------------------
215 |     | State (prev. callback) | Callback   | Description/action                         |
216 |      ------------------------ ------------ --------------------------------------------
217 |     | nothing (first call)   | on_h_field | Allocate new buffer and copy callback data |
218 |     |                        |            | into it                                    |
219 |      ------------------------ ------------ --------------------------------------------
220 |     | value                  | on_h_field | New header started.                        |
221 |     |                        |            | Copy current name,value buffers to headers |
222 |     |                        |            | list and allocate new buffer for new name  |
223 |      ------------------------ ------------ --------------------------------------------
224 |     | field                  | on_h_field | Previous name continues. Reallocate name   |
225 |     |                        |            | buffer and append callback data to it      |
226 |      ------------------------ ------------ --------------------------------------------
227 |     | field                  | on_h_value | Value for current header started. Allocate |
228 |     |                        |            | new buffer and copy callback data to it    |
229 |      ------------------------ ------------ --------------------------------------------
230 |     | value                  | on_h_value | Value continues. Reallocate value buffer   |
231 |     |                        |            | and append callback data to it             |
232 |      ------------------------ ------------ --------------------------------------------
233 | 
234 | 
235 | Parsing URLs
236 | ------------
237 | 
238 | A simplistic zero-copy URL parser is provided as `http_parser_parse_url()`.
239 | Users of this library may wish to use it to parse URLs constructed from
240 | consecutive `on_url` callbacks.
241 | 
242 | See examples of reading in headers:
243 | 
244 | * [partial example](http://gist.github.com/155877) in C
245 | * [from http-parser tests](http://github.com/joyent/http-parser/blob/37a0ff8/test.c#L403) in C
246 | * [from Node library](http://github.com/joyent/node/blob/842eaf4/src/http.js#L284) in Javascript
247 | 


--------------------------------------------------------------------------------
/http_parser.h:
--------------------------------------------------------------------------------
  1 | /* Copyright Joyent, Inc. and other Node contributors. All rights reserved.
  2 |  *
  3 |  * Permission is hereby granted, free of charge, to any person obtaining a copy
  4 |  * of this software and associated documentation files (the "Software"), to
  5 |  * deal in the Software without restriction, including without limitation the
  6 |  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
  7 |  * sell copies of the Software, and to permit persons to whom the Software is
  8 |  * furnished to do so, subject to the following conditions:
  9 |  *
 10 |  * The above copyright notice and this permission notice shall be included in
 11 |  * all copies or substantial portions of the Software.
 12 |  *
 13 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 14 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 15 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 16 |  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 17 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 18 |  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
 19 |  * IN THE SOFTWARE.
 20 |  */
 21 | #ifndef http_parser_h
 22 | #define http_parser_h
 23 | #ifdef __cplusplus
 24 | extern "C" {
 25 | #endif
 26 | 
 27 | /* Also update SONAME in the Makefile whenever you change these. */
 28 | #define HTTP_PARSER_VERSION_MAJOR 2
 29 | #define HTTP_PARSER_VERSION_MINOR 7
 30 | #define HTTP_PARSER_VERSION_PATCH 1
 31 | 
 32 | #include <sys/types.h>
 33 | #if defined(_WIN32) && !defined(__MINGW32__) && \
 34 |   (!defined(_MSC_VER) || _MSC_VER<1600) && !defined(__WINE__)
 35 | #include <BaseTsd.h>
 36 | #include <stddef.h>
 37 | typedef __int8 int8_t;
 38 | typedef unsigned __int8 uint8_t;
 39 | typedef __int16 int16_t;
 40 | typedef unsigned __int16 uint16_t;
 41 | typedef __int32 int32_t;
 42 | typedef unsigned __int32 uint32_t;
 43 | typedef __int64 int64_t;
 44 | typedef unsigned __int64 uint64_t;
 45 | #else
 46 | #include <stdint.h>
 47 | #endif
 48 | 
 49 | /* Compile with -DHTTP_PARSER_STRICT=0 to make less checks, but run
 50 |  * faster
 51 |  */
 52 | #ifndef HTTP_PARSER_STRICT
 53 | # define HTTP_PARSER_STRICT 1
 54 | #endif
 55 | 
 56 | /* Maximium header size allowed. If the macro is not defined
 57 |  * before including this header then the default is used. To
 58 |  * change the maximum header size, define the macro in the build
 59 |  * environment (e.g. -DHTTP_MAX_HEADER_SIZE=<value>). To remove
 60 |  * the effective limit on the size of the header, define the macro
 61 |  * to a very large number (e.g. -DHTTP_MAX_HEADER_SIZE=0x7fffffff)
 62 |  */
 63 | #ifndef HTTP_MAX_HEADER_SIZE
 64 | # define HTTP_MAX_HEADER_SIZE (80*1024)
 65 | #endif
 66 | 
 67 | typedef struct http_parser http_parser;
 68 | typedef struct http_parser_settings http_parser_settings;
 69 | 
 70 | 
 71 | /* Callbacks should return non-zero to indicate an error. The parser will
 72 |  * then halt execution.
 73 |  *
 74 |  * The one exception is on_headers_complete. In a HTTP_RESPONSE parser
 75 |  * returning '1' from on_headers_complete will tell the parser that it
 76 |  * should not expect a body. This is used when receiving a response to a
 77 |  * HEAD request which may contain 'Content-Length' or 'Transfer-Encoding:
 78 |  * chunked' headers that indicate the presence of a body.
 79 |  *
 80 |  * Returning `2` from on_headers_complete will tell parser that it should not
 81 |  * expect neither a body nor any futher responses on this connection. This is
 82 |  * useful for handling responses to a CONNECT request which may not contain
 83 |  * `Upgrade` or `Connection: upgrade` headers.
 84 |  *
 85 |  * http_data_cb does not return data chunks. It will be called arbitrarily
 86 |  * many times for each string. E.G. you might get 10 callbacks for "on_url"
 87 |  * each providing just a few characters more data.
 88 |  */
 89 | typedef int (*http_data_cb) (http_parser*, const char *at, size_t length);
 90 | typedef int (*http_cb) (http_parser*);
 91 | 
 92 | 
 93 | /* Status Codes */
 94 | #define HTTP_STATUS_MAP(XX)                                                 \
 95 |   XX(100, CONTINUE,                        Continue)                        \
 96 |   XX(101, SWITCHING_PROTOCOLS,             Switching Protocols)             \
 97 |   XX(102, PROCESSING,                      Processing)                      \
 98 |   XX(200, OK,                              OK)                              \
 99 |   XX(201, CREATED,                         Created)                         \
100 |   XX(202, ACCEPTED,                        Accepted)                        \
101 |   XX(203, NON_AUTHORITATIVE_INFORMATION,   Non-Authoritative Information)   \
102 |   XX(204, NO_CONTENT,                      No Content)                      \
103 |   XX(205, RESET_CONTENT,                   Reset Content)                   \
104 |   XX(206, PARTIAL_CONTENT,                 Partial Content)                 \
105 |   XX(207, MULTI_STATUS,                    Multi-Status)                    \
106 |   XX(208, ALREADY_REPORTED,                Already Reported)                \
107 |   XX(226, IM_USED,                         IM Used)                         \
108 |   XX(300, MULTIPLE_CHOICES,                Multiple Choices)                \
109 |   XX(301, MOVED_PERMANENTLY,               Moved Permanently)               \
110 |   XX(302, FOUND,                           Found)                           \
111 |   XX(303, SEE_OTHER,                       See Other)                       \
112 |   XX(304, NOT_MODIFIED,                    Not Modified)                    \
113 |   XX(305, USE_PROXY,                       Use Proxy)                       \
114 |   XX(307, TEMPORARY_REDIRECT,              Temporary Redirect)              \
115 |   XX(308, PERMANENT_REDIRECT,              Permanent Redirect)              \
116 |   XX(400, BAD_REQUEST,                     Bad Request)                     \
117 |   XX(401, UNAUTHORIZED,                    Unauthorized)                    \
118 |   XX(402, PAYMENT_REQUIRED,                Payment Required)                \
119 |   XX(403, FORBIDDEN,                       Forbidden)                       \
120 |   XX(404, NOT_FOUND,                       Not Found)                       \
121 |   XX(405, METHOD_NOT_ALLOWED,              Method Not Allowed)              \
122 |   XX(406, NOT_ACCEPTABLE,                  Not Acceptable)                  \
123 |   XX(407, PROXY_AUTHENTICATION_REQUIRED,   Proxy Authentication Required)   \
124 |   XX(408, REQUEST_TIMEOUT,                 Request Timeout)                 \
125 |   XX(409, CONFLICT,                        Conflict)                        \
126 |   XX(410, GONE,                            Gone)                            \
127 |   XX(411, LENGTH_REQUIRED,                 Length Required)                 \
128 |   XX(412, PRECONDITION_FAILED,             Precondition Failed)             \
129 |   XX(413, PAYLOAD_TOO_LARGE,               Payload Too Large)               \
130 |   XX(414, URI_TOO_LONG,                    URI Too Long)                    \
131 |   XX(415, UNSUPPORTED_MEDIA_TYPE,          Unsupported Media Type)          \
132 |   XX(416, RANGE_NOT_SATISFIABLE,           Range Not Satisfiable)           \
133 |   XX(417, EXPECTATION_FAILED,              Expectation Failed)              \
134 |   XX(421, MISDIRECTED_REQUEST,             Misdirected Request)             \
135 |   XX(422, UNPROCESSABLE_ENTITY,            Unprocessable Entity)            \
136 |   XX(423, LOCKED,                          Locked)                          \
137 |   XX(424, FAILED_DEPENDENCY,               Failed Dependency)               \
138 |   XX(426, UPGRADE_REQUIRED,                Upgrade Required)                \
139 |   XX(428, PRECONDITION_REQUIRED,           Precondition Required)           \
140 |   XX(429, TOO_MANY_REQUESTS,               Too Many Requests)               \
141 |   XX(431, REQUEST_HEADER_FIELDS_TOO_LARGE, Request Header Fields Too Large) \
142 |   XX(451, UNAVAILABLE_FOR_LEGAL_REASONS,   Unavailable For Legal Reasons)   \
143 |   XX(500, INTERNAL_SERVER_ERROR,           Internal Server Error)           \
144 |   XX(501, NOT_IMPLEMENTED,                 Not Implemented)                 \
145 |   XX(502, BAD_GATEWAY,                     Bad Gateway)                     \
146 |   XX(503, SERVICE_UNAVAILABLE,             Service Unavailable)             \
147 |   XX(504, GATEWAY_TIMEOUT,                 Gateway Timeout)                 \
148 |   XX(505, HTTP_VERSION_NOT_SUPPORTED,      HTTP Version Not Supported)      \
149 |   XX(506, VARIANT_ALSO_NEGOTIATES,         Variant Also Negotiates)         \
150 |   XX(507, INSUFFICIENT_STORAGE,            Insufficient Storage)            \
151 |   XX(508, LOOP_DETECTED,                   Loop Detected)                   \
152 |   XX(510, NOT_EXTENDED,                    Not Extended)                    \
153 |   XX(511, NETWORK_AUTHENTICATION_REQUIRED, Network Authentication Required) \
154 | 
155 | enum http_status
156 |   {
157 | #define XX(num, name, string) HTTP_STATUS_##name = num,
158 |   HTTP_STATUS_MAP(XX)
159 | #undef XX
160 |   };
161 | 
162 | 
163 | /* Request Methods */
164 | #define HTTP_METHOD_MAP(XX)         \
165 |   XX(0,  DELETE,      DELETE)       \
166 |   XX(1,  GET,         GET)          \
167 |   XX(2,  HEAD,        HEAD)         \
168 |   XX(3,  POST,        POST)         \
169 |   XX(4,  PUT,         PUT)          \
170 |   /* pathological */                \
171 |   XX(5,  CONNECT,     CONNECT)      \
172 |   XX(6,  OPTIONS,     OPTIONS)      \
173 |   XX(7,  TRACE,       TRACE)        \
174 |   /* WebDAV */                      \
175 |   XX(8,  COPY,        COPY)         \
176 |   XX(9,  LOCK,        LOCK)         \
177 |   XX(10, MKCOL,       MKCOL)        \
178 |   XX(11, MOVE,        MOVE)         \
179 |   XX(12, PROPFIND,    PROPFIND)     \
180 |   XX(13, PROPPATCH,   PROPPATCH)    \
181 |   XX(14, SEARCH,      SEARCH)       \
182 |   XX(15, UNLOCK,      UNLOCK)       \
183 |   XX(16, BIND,        BIND)         \
184 |   XX(17, REBIND,      REBIND)       \
185 |   XX(18, UNBIND,      UNBIND)       \
186 |   XX(19, ACL,         ACL)          \
187 |   /* subversion */                  \
188 |   XX(20, REPORT,      REPORT)       \
189 |   XX(21, MKACTIVITY,  MKACTIVITY)   \
190 |   XX(22, CHECKOUT,    CHECKOUT)     \
191 |   XX(23, MERGE,       MERGE)        \
192 |   /* upnp */                        \
193 |   XX(24, MSEARCH,     M-SEARCH)     \
194 |   XX(25, NOTIFY,      NOTIFY)       \
195 |   XX(26, SUBSCRIBE,   SUBSCRIBE)    \
196 |   XX(27, UNSUBSCRIBE, UNSUBSCRIBE)  \
197 |   /* RFC-5789 */                    \
198 |   XX(28, PATCH,       PATCH)        \
199 |   XX(29, PURGE,       PURGE)        \
200 |   /* CalDAV */                      \
201 |   XX(30, MKCALENDAR,  MKCALENDAR)   \
202 |   /* RFC-2068, section 19.6.1.2 */  \
203 |   XX(31, LINK,        LINK)         \
204 |   XX(32, UNLINK,      UNLINK)       \
205 | 
206 | enum http_method
207 |   {
208 | #define XX(num, name, string) HTTP_##name = num,
209 |   HTTP_METHOD_MAP(XX)
210 | #undef XX
211 |   };
212 | 
213 | 
214 | enum http_parser_type { HTTP_REQUEST, HTTP_RESPONSE, HTTP_BOTH };
215 | 
216 | 
217 | /* Flag values for http_parser.flags field */
218 | enum flags
219 |   { F_CHUNKED               = 1 << 0
220 |   , F_CONNECTION_KEEP_ALIVE = 1 << 1
221 |   , F_CONNECTION_CLOSE      = 1 << 2
222 |   , F_CONNECTION_UPGRADE    = 1 << 3
223 |   , F_TRAILING              = 1 << 4
224 |   , F_UPGRADE               = 1 << 5
225 |   , F_SKIPBODY              = 1 << 6
226 |   , F_CONTENTLENGTH         = 1 << 7
227 |   };
228 | 
229 | 
230 | /* Map for errno-related constants
231 |  *
232 |  * The provided argument should be a macro that takes 2 arguments.
233 |  */
234 | #define HTTP_ERRNO_MAP(XX)                                           \
235 |   /* No error */                                                     \
236 |   XX(OK, "success")                                                  \
237 |                                                                      \
238 |   /* Callback-related errors */                                      \
239 |   XX(CB_message_begin, "the on_message_begin callback failed")       \
240 |   XX(CB_url, "the on_url callback failed")                           \
241 |   XX(CB_header_field, "the on_header_field callback failed")         \
242 |   XX(CB_header_value, "the on_header_value callback failed")         \
243 |   XX(CB_headers_complete, "the on_headers_complete callback failed") \
244 |   XX(CB_body, "the on_body callback failed")                         \
245 |   XX(CB_message_complete, "the on_message_complete callback failed") \
246 |   XX(CB_status, "the on_status callback failed")                     \
247 |   XX(CB_chunk_header, "the on_chunk_header callback failed")         \
248 |   XX(CB_chunk_complete, "the on_chunk_complete callback failed")     \
249 |                                                                      \
250 |   /* Parsing-related errors */                                       \
251 |   XX(INVALID_EOF_STATE, "stream ended at an unexpected time")        \
252 |   XX(HEADER_OVERFLOW,                                                \
253 |      "too many header bytes seen; overflow detected")                \
254 |   XX(CLOSED_CONNECTION,                                              \
255 |      "data received after completed connection: close message")      \
256 |   XX(INVALID_VERSION, "invalid HTTP version")                        \
257 |   XX(INVALID_STATUS, "invalid HTTP status code")                     \
258 |   XX(INVALID_METHOD, "invalid HTTP method")                          \
259 |   XX(INVALID_URL, "invalid URL")                                     \
260 |   XX(INVALID_HOST, "invalid host")                                   \
261 |   XX(INVALID_PORT, "invalid port")                                   \
262 |   XX(INVALID_PATH, "invalid path")                                   \
263 |   XX(INVALID_QUERY_STRING, "invalid query string")                   \
264 |   XX(INVALID_FRAGMENT, "invalid fragment")                           \
265 |   XX(LF_EXPECTED, "LF character expected")                           \
266 |   XX(INVALID_HEADER_TOKEN, "invalid character in header")            \
267 |   XX(INVALID_CONTENT_LENGTH,                                         \
268 |      "invalid character in content-length header")                   \
269 |   XX(UNEXPECTED_CONTENT_LENGTH,                                      \
270 |      "unexpected content-length header")                             \
271 |   XX(INVALID_CHUNK_SIZE,                                             \
272 |      "invalid character in chunk size header")                       \
273 |   XX(INVALID_CONSTANT, "invalid constant string")                    \
274 |   XX(INVALID_INTERNAL_STATE, "encountered unexpected internal state")\
275 |   XX(STRICT, "strict mode assertion failed")                         \
276 |   XX(PAUSED, "parser is paused")                                     \
277 |   XX(UNKNOWN, "an unknown error occurred")
278 | 
279 | 
280 | /* Define HPE_* values for each errno value above */
281 | #define HTTP_ERRNO_GEN(n, s) HPE_##n,
282 | enum http_errno {
283 |   HTTP_ERRNO_MAP(HTTP_ERRNO_GEN)
284 | };
285 | #undef HTTP_ERRNO_GEN
286 | 
287 | 
288 | /* Get an http_errno value from an http_parser */
289 | #define HTTP_PARSER_ERRNO(p)            ((enum http_errno) (p)->http_errno)
290 | 
291 | 
292 | struct http_parser {
293 |   /** PRIVATE **/
294 |   unsigned int type : 2;         /* enum http_parser_type */
295 |   unsigned int flags : 8;        /* F_* values from 'flags' enum; semi-public */
296 |   unsigned int state : 7;        /* enum state from http_parser.c */
297 |   unsigned int header_state : 7; /* enum header_state from http_parser.c */
298 |   unsigned int index : 7;        /* index into current matcher */
299 |   unsigned int lenient_http_headers : 1;
300 | 
301 |   uint32_t nread;          /* # bytes read in various scenarios */
302 |   uint64_t content_length; /* # bytes in body (0 if no Content-Length header) */
303 | 
304 |   /** READ-ONLY **/
305 |   unsigned short http_major;
306 |   unsigned short http_minor;
307 |   unsigned int status_code : 16; /* responses only */
308 |   unsigned int method : 8;       /* requests only */
309 |   unsigned int http_errno : 7;
310 | 
311 |   /* 1 = Upgrade header was present and the parser has exited because of that.
312 |    * 0 = No upgrade header present.
313 |    * Should be checked when http_parser_execute() returns in addition to
314 |    * error checking.
315 |    */
316 |   unsigned int upgrade : 1;
317 | 
318 |   /** PUBLIC **/
319 |   void *data; /* A pointer to get hook to the "connection" or "socket" object */
320 | };
321 | 
322 | 
323 | struct http_parser_settings {
324 |   http_cb      on_message_begin;
325 |   http_data_cb on_url;
326 |   http_data_cb on_status;
327 |   http_data_cb on_header_field;
328 |   http_data_cb on_header_value;
329 |   http_cb      on_headers_complete;
330 |   http_data_cb on_body;
331 |   http_cb      on_message_complete;
332 |   /* When on_chunk_header is called, the current chunk length is stored
333 |    * in parser->content_length.
334 |    */
335 |   http_cb      on_chunk_header;
336 |   http_cb      on_chunk_complete;
337 | };
338 | 
339 | 
340 | enum http_parser_url_fields
341 |   { UF_SCHEMA           = 0
342 |   , UF_HOST             = 1
343 |   , UF_PORT             = 2
344 |   , UF_PATH             = 3
345 |   , UF_QUERY            = 4
346 |   , UF_FRAGMENT         = 5
347 |   , UF_USERINFO         = 6
348 |   , UF_MAX              = 7
349 |   };
350 | 
351 | 
352 | /* Result structure for http_parser_parse_url().
353 |  *
354 |  * Callers should index into field_data[] with UF_* values iff field_set
355 |  * has the relevant (1 << UF_*) bit set. As a courtesy to clients (and
356 |  * because we probably have padding left over), we convert any port to
357 |  * a uint16_t.
358 |  */
359 | struct http_parser_url {
360 |   uint16_t field_set;           /* Bitmask of (1 << UF_*) values */
361 |   uint16_t port;                /* Converted UF_PORT string */
362 | 
363 |   struct {
364 |     uint16_t off;               /* Offset into buffer in which field starts */
365 |     uint16_t len;               /* Length of run in buffer */
366 |   } field_data[UF_MAX];
367 | };
368 | 
369 | 
370 | /* Returns the library version. Bits 16-23 contain the major version number,
371 |  * bits 8-15 the minor version number and bits 0-7 the patch level.
372 |  * Usage example:
373 |  *
374 |  *   unsigned long version = http_parser_version();
375 |  *   unsigned major = (version >> 16) & 255;
376 |  *   unsigned minor = (version >> 8) & 255;
377 |  *   unsigned patch = version & 255;
378 |  *   printf("http_parser v%u.%u.%u\n", major, minor, patch);
379 |  */
380 | unsigned long http_parser_version(void);
381 | 
382 | void http_parser_init(http_parser *parser, enum http_parser_type type);
383 | 
384 | 
385 | /* Initialize http_parser_settings members to 0
386 |  */
387 | void http_parser_settings_init(http_parser_settings *settings);
388 | 
389 | 
390 | /* Executes the parser. Returns number of parsed bytes. Sets
391 |  * `parser->http_errno` on error. */
392 | size_t http_parser_execute(http_parser *parser,
393 |                            const http_parser_settings *settings,
394 |                            const char *data,
395 |                            size_t len);
396 | 
397 | 
398 | /* If http_should_keep_alive() in the on_headers_complete or
399 |  * on_message_complete callback returns 0, then this should be
400 |  * the last message on the connection.
401 |  * If you are the server, respond with the "Connection: close" header.
402 |  * If you are the client, close the connection.
403 |  */
404 | int http_should_keep_alive(const http_parser *parser);
405 | 
406 | /* Returns a string version of the HTTP method. */
407 | const char *http_method_str(enum http_method m);
408 | 
409 | /* Return a string name of the given error */
410 | const char *http_errno_name(enum http_errno err);
411 | 
412 | /* Return a string description of the given error */
413 | const char *http_errno_description(enum http_errno err);
414 | 
415 | /* Initialize all http_parser_url members to 0 */
416 | void http_parser_url_init(struct http_parser_url *u);
417 | 
418 | /* Parse a URL; return nonzero on failure */
419 | int http_parser_parse_url(const char *buf, size_t buflen,
420 |                           int is_connect,
421 |                           struct http_parser_url *u);
422 | 
423 | /* Pause or un-pause the parser; a nonzero value pauses */
424 | void http_parser_pause(http_parser *parser, int paused);
425 | 
426 | /* Checks if this is the final chunk of the body. */
427 | int http_body_is_final(const http_parser *parser);
428 | 
429 | #ifdef __cplusplus
430 | }
431 | #endif
432 | #endif
433 | 


--------------------------------------------------------------------------------
/http_parser.c:
--------------------------------------------------------------------------------
   1 | /* Based on src/http/ngx_http_parse.c from NGINX copyright Igor Sysoev
   2 |  *
   3 |  * Additional changes are licensed under the same terms as NGINX and
   4 |  * copyright Joyent, Inc. and other Node contributors. All rights reserved.
   5 |  *
   6 |  * Permission is hereby granted, free of charge, to any person obtaining a copy
   7 |  * of this software and associated documentation files (the "Software"), to
   8 |  * deal in the Software without restriction, including without limitation the
   9 |  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
  10 |  * sell copies of the Software, and to permit persons to whom the Software is
  11 |  * furnished to do so, subject to the following conditions:
  12 |  *
  13 |  * The above copyright notice and this permission notice shall be included in
  14 |  * all copies or substantial portions of the Software.
  15 |  *
  16 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  19 |  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  21 |  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  22 |  * IN THE SOFTWARE.
  23 |  */
  24 | #include "http_parser.h"
  25 | #include <assert.h>
  26 | #include <stddef.h>
  27 | #include <ctype.h>
  28 | #include <stdlib.h>
  29 | #include <string.h>
  30 | #include <limits.h>
  31 | 
  32 | #ifndef ULLONG_MAX
  33 | # define ULLONG_MAX ((uint64_t) -1) /* 2^64-1 */
  34 | #endif
  35 | 
  36 | #ifndef MIN
  37 | # define MIN(a,b) ((a) < (b) ? (a) : (b))
  38 | #endif
  39 | 
  40 | #ifndef ARRAY_SIZE
  41 | # define ARRAY_SIZE(a) (sizeof(a) / sizeof((a)[0]))
  42 | #endif
  43 | 
  44 | #ifndef BIT_AT
  45 | # define BIT_AT(a, i)                                                \
  46 |   (!!((unsigned int) (a)[(unsigned int) (i) >> 3] &                  \
  47 |    (1 << ((unsigned int) (i) & 7))))
  48 | #endif
  49 | 
  50 | #ifndef ELEM_AT
  51 | # define ELEM_AT(a, i, v) ((unsigned int) (i) < ARRAY_SIZE(a) ? (a)[(i)] : (v))
  52 | #endif
  53 | 
  54 | #define SET_ERRNO(e)                                                 \
  55 | do {                                                                 \
  56 |   parser->http_errno = (e);                                          \
  57 | } while(0)
  58 | 
  59 | #define CURRENT_STATE() p_state
  60 | #define UPDATE_STATE(V) p_state = (enum state) (V);
  61 | #define RETURN(V)                                                    \
  62 | do {                                                                 \
  63 |   parser->state = CURRENT_STATE();                                   \
  64 |   return (V);                                                        \
  65 | } while (0);
  66 | #define REEXECUTE()                                                  \
  67 |   goto reexecute;                                                    \
  68 | 
  69 | 
  70 | #ifdef __GNUC__
  71 | # define LIKELY(X) __builtin_expect(!!(X), 1)
  72 | # define UNLIKELY(X) __builtin_expect(!!(X), 0)
  73 | #else
  74 | # define LIKELY(X) (X)
  75 | # define UNLIKELY(X) (X)
  76 | #endif
  77 | 
  78 | 
  79 | /* Run the notify callback FOR, returning ER if it fails */
  80 | #define CALLBACK_NOTIFY_(FOR, ER)                                    \
  81 | do {                                                                 \
  82 |   assert(HTTP_PARSER_ERRNO(parser) == HPE_OK);                       \
  83 |                                                                      \
  84 |   if (LIKELY(settings->on_##FOR)) {                                  \
  85 |     parser->state = CURRENT_STATE();                                 \
  86 |     if (UNLIKELY(0 != settings->on_##FOR(parser))) {                 \
  87 |       SET_ERRNO(HPE_CB_##FOR);                                       \
  88 |     }                                                                \
  89 |     UPDATE_STATE(parser->state);                                     \
  90 |                                                                      \
  91 |     /* We either errored above or got paused; get out */             \
  92 |     if (UNLIKELY(HTTP_PARSER_ERRNO(parser) != HPE_OK)) {             \
  93 |       return (ER);                                                   \
  94 |     }                                                                \
  95 |   }                                                                  \
  96 | } while (0)
  97 | 
  98 | /* Run the notify callback FOR and consume the current byte */
  99 | #define CALLBACK_NOTIFY(FOR)            CALLBACK_NOTIFY_(FOR, p - data + 1)
 100 | 
 101 | /* Run the notify callback FOR and don't consume the current byte */
 102 | #define CALLBACK_NOTIFY_NOADVANCE(FOR)  CALLBACK_NOTIFY_(FOR, p - data)
 103 | 
 104 | /* Run data callback FOR with LEN bytes, returning ER if it fails */
 105 | #define CALLBACK_DATA_(FOR, LEN, ER)                                 \
 106 | do {                                                                 \
 107 |   assert(HTTP_PARSER_ERRNO(parser) == HPE_OK);                       \
 108 |                                                                      \
 109 |   if (FOR##_mark) {                                                  \
 110 |     if (LIKELY(settings->on_##FOR)) {                                \
 111 |       parser->state = CURRENT_STATE();                               \
 112 |       if (UNLIKELY(0 !=                                              \
 113 |                    settings->on_##FOR(parser, FOR##_mark, (LEN)))) { \
 114 |         SET_ERRNO(HPE_CB_##FOR);                                     \
 115 |       }                                                              \
 116 |       UPDATE_STATE(parser->state);                                   \
 117 |                                                                      \
 118 |       /* We either errored above or got paused; get out */           \
 119 |       if (UNLIKELY(HTTP_PARSER_ERRNO(parser) != HPE_OK)) {           \
 120 |         return (ER);                                                 \
 121 |       }                                                              \
 122 |     }                                                                \
 123 |     FOR##_mark = NULL;                                               \
 124 |   }                                                                  \
 125 | } while (0)
 126 | 
 127 | /* Run the data callback FOR and consume the current byte */
 128 | #define CALLBACK_DATA(FOR)                                           \
 129 |     CALLBACK_DATA_(FOR, p - FOR##_mark, p - data + 1)
 130 | 
 131 | /* Run the data callback FOR and don't consume the current byte */
 132 | #define CALLBACK_DATA_NOADVANCE(FOR)                                 \
 133 |     CALLBACK_DATA_(FOR, p - FOR##_mark, p - data)
 134 | 
 135 | /* Set the mark FOR; non-destructive if mark is already set */
 136 | #define MARK(FOR)                                                    \
 137 | do {                                                                 \
 138 |   if (!FOR##_mark) {                                                 \
 139 |     FOR##_mark = p;                                                  \
 140 |   }                                                                  \
 141 | } while (0)
 142 | 
 143 | /* Don't allow the total size of the HTTP headers (including the status
 144 |  * line) to exceed HTTP_MAX_HEADER_SIZE.  This check is here to protect
 145 |  * embedders against denial-of-service attacks where the attacker feeds
 146 |  * us a never-ending header that the embedder keeps buffering.
 147 |  *
 148 |  * This check is arguably the responsibility of embedders but we're doing
 149 |  * it on the embedder's behalf because most won't bother and this way we
 150 |  * make the web a little safer.  HTTP_MAX_HEADER_SIZE is still far bigger
 151 |  * than any reasonable request or response so this should never affect
 152 |  * day-to-day operation.
 153 |  */
 154 | #define COUNT_HEADER_SIZE(V)                                         \
 155 | do {                                                                 \
 156 |   parser->nread += (V);                                              \
 157 |   if (UNLIKELY(parser->nread > (HTTP_MAX_HEADER_SIZE))) {            \
 158 |     SET_ERRNO(HPE_HEADER_OVERFLOW);                                  \
 159 |     goto error;                                                      \
 160 |   }                                                                  \
 161 | } while (0)
 162 | 
 163 | 
 164 | #define PROXY_CONNECTION "proxy-connection"
 165 | #define CONNECTION "connection"
 166 | #define CONTENT_LENGTH "content-length"
 167 | #define TRANSFER_ENCODING "transfer-encoding"
 168 | #define UPGRADE "upgrade"
 169 | #define CHUNKED "chunked"
 170 | #define KEEP_ALIVE "keep-alive"
 171 | #define CLOSE "close"
 172 | 
 173 | 
 174 | static const char *method_strings[] =
 175 |   {
 176 | #define XX(num, name, string) #string,
 177 |   HTTP_METHOD_MAP(XX)
 178 | #undef XX
 179 |   };
 180 | 
 181 | 
 182 | /* Tokens as defined by rfc 2616. Also lowercases them.
 183 |  *        token       = 1*<any CHAR except CTLs or separators>
 184 |  *     separators     = "(" | ")" | "<" | ">" | "@"
 185 |  *                    | "," | ";" | ":" | "\" | <">
 186 |  *                    | "/" | "[" | "]" | "?" | "="
 187 |  *                    | "{" | "}" | SP | HT
 188 |  */
 189 | static const char tokens[256] = {
 190 | /*   0 nul    1 soh    2 stx    3 etx    4 eot    5 enq    6 ack    7 bel  */
 191 |         0,       0,       0,       0,       0,       0,       0,       0,
 192 | /*   8 bs     9 ht    10 nl    11 vt    12 np    13 cr    14 so    15 si   */
 193 |         0,       0,       0,       0,       0,       0,       0,       0,
 194 | /*  16 dle   17 dc1   18 dc2   19 dc3   20 dc4   21 nak   22 syn   23 etb */
 195 |         0,       0,       0,       0,       0,       0,       0,       0,
 196 | /*  24 can   25 em    26 sub   27 esc   28 fs    29 gs    30 rs    31 us  */
 197 |         0,       0,       0,       0,       0,       0,       0,       0,
 198 | /*  32 sp    33  !    34  "    35  #    36  $    37  %    38  &    39  '  */
 199 |         0,      '!',      0,      '#',     '$',     '%',     '&',    '\'',
 200 | /*  40  (    41  )    42  *    43  +    44  ,    45  -    46  .    47  /  */
 201 |         0,       0,      '*',     '+',      0,      '-',     '.',      0,
 202 | /*  48  0    49  1    50  2    51  3    52  4    53  5    54  6    55  7  */
 203 |        '0',     '1',     '2',     '3',     '4',     '5',     '6',     '7',
 204 | /*  56  8    57  9    58  :    59  ;    60  <    61  =    62  >    63  ?  */
 205 |        '8',     '9',      0,       0,       0,       0,       0,       0,
 206 | /*  64  @    65  A    66  B    67  C    68  D    69  E    70  F    71  G  */
 207 |         0,      'a',     'b',     'c',     'd',     'e',     'f',     'g',
 208 | /*  72  H    73  I    74  J    75  K    76  L    77  M    78  N    79  O  */
 209 |        'h',     'i',     'j',     'k',     'l',     'm',     'n',     'o',
 210 | /*  80  P    81  Q    82  R    83  S    84  T    85  U    86  V    87  W  */
 211 |        'p',     'q',     'r',     's',     't',     'u',     'v',     'w',
 212 | /*  88  X    89  Y    90  Z    91  [    92  \    93  ]    94  ^    95  _  */
 213 |        'x',     'y',     'z',      0,       0,       0,      '^',     '_',
 214 | /*  96  `    97  a    98  b    99  c   100  d   101  e   102  f   103  g  */
 215 |        '`',     'a',     'b',     'c',     'd',     'e',     'f',     'g',
 216 | /* 104  h   105  i   106  j   107  k   108  l   109  m   110  n   111  o  */
 217 |        'h',     'i',     'j',     'k',     'l',     'm',     'n',     'o',
 218 | /* 112  p   113  q   114  r   115  s   116  t   117  u   118  v   119  w  */
 219 |        'p',     'q',     'r',     's',     't',     'u',     'v',     'w',
 220 | /* 120  x   121  y   122  z   123  {   124  |   125  }   126  ~   127 del */
 221 |        'x',     'y',     'z',      0,      '|',      0,      '~',       0 };
 222 | 
 223 | 
 224 | static const int8_t unhex[256] =
 225 |   {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
 226 |   ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
 227 |   ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
 228 |   , 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1,-1,-1,-1,-1
 229 |   ,-1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1
 230 |   ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
 231 |   ,-1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1
 232 |   ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
 233 |   };
 234 | 
 235 | 
 236 | #if HTTP_PARSER_STRICT
 237 | # define T(v) 0
 238 | #else
 239 | # define T(v) v
 240 | #endif
 241 | 
 242 | 
 243 | static const uint8_t normal_url_char[32] = {
 244 | /*   0 nul    1 soh    2 stx    3 etx    4 eot    5 enq    6 ack    7 bel  */
 245 |         0    |   0    |   0    |   0    |   0    |   0    |   0    |   0,
 246 | /*   8 bs     9 ht    10 nl    11 vt    12 np    13 cr    14 so    15 si   */
 247 |         0    | T(2)   |   0    |   0    | T(16)  |   0    |   0    |   0,
 248 | /*  16 dle   17 dc1   18 dc2   19 dc3   20 dc4   21 nak   22 syn   23 etb */
 249 |         0    |   0    |   0    |   0    |   0    |   0    |   0    |   0,
 250 | /*  24 can   25 em    26 sub   27 esc   28 fs    29 gs    30 rs    31 us  */
 251 |         0    |   0    |   0    |   0    |   0    |   0    |   0    |   0,
 252 | /*  32 sp    33  !    34  "    35  #    36  $    37  %    38  &    39  '  */
 253 |         0    |   2    |   4    |   0    |   16   |   32   |   64   |  128,
 254 | /*  40  (    41  )    42  *    43  +    44  ,    45  -    46  .    47  /  */
 255 |         1    |   2    |   4    |   8    |   16   |   32   |   64   |  128,
 256 | /*  48  0    49  1    50  2    51  3    52  4    53  5    54  6    55  7  */
 257 |         1    |   2    |   4    |   8    |   16   |   32   |   64   |  128,
 258 | /*  56  8    57  9    58  :    59  ;    60  <    61  =    62  >    63  ?  */
 259 |         1    |   2    |   4    |   8    |   16   |   32   |   64   |   0,
 260 | /*  64  @    65  A    66  B    67  C    68  D    69  E    70  F    71  G  */
 261 |         1    |   2    |   4    |   8    |   16   |   32   |   64   |  128,
 262 | /*  72  H    73  I    74  J    75  K    76  L    77  M    78  N    79  O  */
 263 |         1    |   2    |   4    |   8    |   16   |   32   |   64   |  128,
 264 | /*  80  P    81  Q    82  R    83  S    84  T    85  U    86  V    87  W  */
 265 |         1    |   2    |   4    |   8    |   16   |   32   |   64   |  128,
 266 | /*  88  X    89  Y    90  Z    91  [    92  \    93  ]    94  ^    95  _  */
 267 |         1    |   2    |   4    |   8    |   16   |   32   |   64   |  128,
 268 | /*  96  `    97  a    98  b    99  c   100  d   101  e   102  f   103  g  */
 269 |         1    |   2    |   4    |   8    |   16   |   32   |   64   |  128,
 270 | /* 104  h   105  i   106  j   107  k   108  l   109  m   110  n   111  o  */
 271 |         1    |   2    |   4    |   8    |   16   |   32   |   64   |  128,
 272 | /* 112  p   113  q   114  r   115  s   116  t   117  u   118  v   119  w  */
 273 |         1    |   2    |   4    |   8    |   16   |   32   |   64   |  128,
 274 | /* 120  x   121  y   122  z   123  {   124  |   125  }   126  ~   127 del */
 275 |         1    |   2    |   4    |   8    |   16   |   32   |   64   |   0, };
 276 | 
 277 | #undef T
 278 | 
 279 | enum state
 280 |   { s_dead = 1 /* important that this is > 0 */
 281 | 
 282 |   , s_start_req_or_res
 283 |   , s_res_or_resp_H
 284 |   , s_start_res
 285 |   , s_res_H
 286 |   , s_res_HT
 287 |   , s_res_HTT
 288 |   , s_res_HTTP
 289 |   , s_res_first_http_major
 290 |   , s_res_http_major
 291 |   , s_res_first_http_minor
 292 |   , s_res_http_minor
 293 |   , s_res_first_status_code
 294 |   , s_res_status_code
 295 |   , s_res_status_start
 296 |   , s_res_status
 297 |   , s_res_line_almost_done
 298 | 
 299 |   , s_start_req
 300 | 
 301 |   , s_req_method
 302 |   , s_req_spaces_before_url
 303 |   , s_req_schema
 304 |   , s_req_schema_slash
 305 |   , s_req_schema_slash_slash
 306 |   , s_req_server_start
 307 |   , s_req_server
 308 |   , s_req_server_with_at
 309 |   , s_req_path
 310 |   , s_req_query_string_start
 311 |   , s_req_query_string
 312 |   , s_req_fragment_start
 313 |   , s_req_fragment
 314 |   , s_req_http_start
 315 |   , s_req_http_H
 316 |   , s_req_http_HT
 317 |   , s_req_http_HTT
 318 |   , s_req_http_HTTP
 319 |   , s_req_first_http_major
 320 |   , s_req_http_major
 321 |   , s_req_first_http_minor
 322 |   , s_req_http_minor
 323 |   , s_req_line_almost_done
 324 | 
 325 |   , s_header_field_start
 326 |   , s_header_field
 327 |   , s_header_value_discard_ws
 328 |   , s_header_value_discard_ws_almost_done
 329 |   , s_header_value_discard_lws
 330 |   , s_header_value_start
 331 |   , s_header_value
 332 |   , s_header_value_lws
 333 | 
 334 |   , s_header_almost_done
 335 | 
 336 |   , s_chunk_size_start
 337 |   , s_chunk_size
 338 |   , s_chunk_parameters
 339 |   , s_chunk_size_almost_done
 340 | 
 341 |   , s_headers_almost_done
 342 |   , s_headers_done
 343 | 
 344 |   /* Important: 's_headers_done' must be the last 'header' state. All
 345 |    * states beyond this must be 'body' states. It is used for overflow
 346 |    * checking. See the PARSING_HEADER() macro.
 347 |    */
 348 | 
 349 |   , s_chunk_data
 350 |   , s_chunk_data_almost_done
 351 |   , s_chunk_data_done
 352 | 
 353 |   , s_body_identity
 354 |   , s_body_identity_eof
 355 | 
 356 |   , s_message_done
 357 |   };
 358 | 
 359 | 
 360 | #define PARSING_HEADER(state) (state <= s_headers_done)
 361 | 
 362 | 
 363 | enum header_states
 364 |   { h_general = 0
 365 |   , h_C
 366 |   , h_CO
 367 |   , h_CON
 368 | 
 369 |   , h_matching_connection
 370 |   , h_matching_proxy_connection
 371 |   , h_matching_content_length
 372 |   , h_matching_transfer_encoding
 373 |   , h_matching_upgrade
 374 | 
 375 |   , h_connection
 376 |   , h_content_length
 377 |   , h_transfer_encoding
 378 |   , h_upgrade
 379 | 
 380 |   , h_matching_transfer_encoding_chunked
 381 |   , h_matching_connection_token_start
 382 |   , h_matching_connection_keep_alive
 383 |   , h_matching_connection_close
 384 |   , h_matching_connection_upgrade
 385 |   , h_matching_connection_token
 386 | 
 387 |   , h_transfer_encoding_chunked
 388 |   , h_connection_keep_alive
 389 |   , h_connection_close
 390 |   , h_connection_upgrade
 391 |   };
 392 | 
 393 | enum http_host_state
 394 |   {
 395 |     s_http_host_dead = 1
 396 |   , s_http_userinfo_start
 397 |   , s_http_userinfo
 398 |   , s_http_host_start
 399 |   , s_http_host_v6_start
 400 |   , s_http_host
 401 |   , s_http_host_v6
 402 |   , s_http_host_v6_end
 403 |   , s_http_host_v6_zone_start
 404 |   , s_http_host_v6_zone
 405 |   , s_http_host_port_start
 406 |   , s_http_host_port
 407 | };
 408 | 
 409 | /* Macros for character classes; depends on strict-mode  */
 410 | #define CR                  '\r'
 411 | #define LF                  '\n'
 412 | #define LOWER(c)            (unsigned char)(c | 0x20)
 413 | #define IS_ALPHA(c)         (LOWER(c) >= 'a' && LOWER(c) <= 'z')
 414 | #define IS_NUM(c)           ((c) >= '0' && (c) <= '9')
 415 | #define IS_ALPHANUM(c)      (IS_ALPHA(c) || IS_NUM(c))
 416 | #define IS_HEX(c)           (IS_NUM(c) || (LOWER(c) >= 'a' && LOWER(c) <= 'f'))
 417 | #define IS_MARK(c)          ((c) == '-' || (c) == '_' || (c) == '.' || \
 418 |   (c) == '!' || (c) == '~' || (c) == '*' || (c) == '\'' || (c) == '(' || \
 419 |   (c) == ')')
 420 | #define IS_USERINFO_CHAR(c) (IS_ALPHANUM(c) || IS_MARK(c) || (c) == '%' || \
 421 |   (c) == ';' || (c) == ':' || (c) == '&' || (c) == '=' || (c) == '+' || \
 422 |   (c) == '$' || (c) == ',')
 423 | 
 424 | #define STRICT_TOKEN(c)     (tokens[(unsigned char)c])
 425 | 
 426 | #if HTTP_PARSER_STRICT
 427 | #define TOKEN(c)            (tokens[(unsigned char)c])
 428 | #define IS_URL_CHAR(c)      (BIT_AT(normal_url_char, (unsigned char)c))
 429 | #define IS_HOST_CHAR(c)     (IS_ALPHANUM(c) || (c) == '.' || (c) == '-')
 430 | #else
 431 | #define TOKEN(c)            ((c == ' ') ? ' ' : tokens[(unsigned char)c])
 432 | #define IS_URL_CHAR(c)                                                         \
 433 |   (BIT_AT(normal_url_char, (unsigned char)c) || ((c) & 0x80))
 434 | #define IS_HOST_CHAR(c)                                                        \
 435 |   (IS_ALPHANUM(c) || (c) == '.' || (c) == '-' || (c) == '_')
 436 | #endif
 437 | 
 438 | /**
 439 |  * Verify that a char is a valid visible (printable) US-ASCII
 440 |  * character or %x80-FF
 441 |  **/
 442 | #define IS_HEADER_CHAR(ch)                                                     \
 443 |   (ch == CR || ch == LF || ch == 9 || ((unsigned char)ch > 31 && ch != 127))
 444 | 
 445 | #define start_state (parser->type == HTTP_REQUEST ? s_start_req : s_start_res)
 446 | 
 447 | 
 448 | #if HTTP_PARSER_STRICT
 449 | # define STRICT_CHECK(cond)                                          \
 450 | do {                                                                 \
 451 |   if (cond) {                                                        \
 452 |     SET_ERRNO(HPE_STRICT);                                           \
 453 |     goto error;                                                      \
 454 |   }                                                                  \
 455 | } while (0)
 456 | # define NEW_MESSAGE() (http_should_keep_alive(parser) ? start_state : s_dead)
 457 | #else
 458 | # define STRICT_CHECK(cond)
 459 | # define NEW_MESSAGE() start_state
 460 | #endif
 461 | 
 462 | 
 463 | /* Map errno values to strings for human-readable output */
 464 | #define HTTP_STRERROR_GEN(n, s) { "HPE_" #n, s },
 465 | static struct {
 466 |   const char *name;
 467 |   const char *description;
 468 | } http_strerror_tab[] = {
 469 |   HTTP_ERRNO_MAP(HTTP_STRERROR_GEN)
 470 | };
 471 | #undef HTTP_STRERROR_GEN
 472 | 
 473 | int http_message_needs_eof(const http_parser *parser);
 474 | 
 475 | /* Our URL parser.
 476 |  *
 477 |  * This is designed to be shared by http_parser_execute() for URL validation,
 478 |  * hence it has a state transition + byte-for-byte interface. In addition, it
 479 |  * is meant to be embedded in http_parser_parse_url(), which does the dirty
 480 |  * work of turning state transitions URL components for its API.
 481 |  *
 482 |  * This function should only be invoked with non-space characters. It is
 483 |  * assumed that the caller cares about (and can detect) the transition between
 484 |  * URL and non-URL states by looking for these.
 485 |  */
 486 | static enum state
 487 | parse_url_char(enum state s, const char ch)
 488 | {
 489 |   if (ch == ' ' || ch == '\r' || ch == '\n') {
 490 |     return s_dead;
 491 |   }
 492 | 
 493 | #if HTTP_PARSER_STRICT
 494 |   if (ch == '\t' || ch == '\f') {
 495 |     return s_dead;
 496 |   }
 497 | #endif
 498 | 
 499 |   switch (s) {
 500 |     case s_req_spaces_before_url:
 501 |       /* Proxied requests are followed by scheme of an absolute URI (alpha).
 502 |        * All methods except CONNECT are followed by '/' or '*'.
 503 |        */
 504 | 
 505 |       if (ch == '/' || ch == '*') {
 506 |         return s_req_path;
 507 |       }
 508 | 
 509 |       if (IS_ALPHA(ch)) {
 510 |         return s_req_schema;
 511 |       }
 512 | 
 513 |       break;
 514 | 
 515 |     case s_req_schema:
 516 |       if (IS_ALPHA(ch)) {
 517 |         return s;
 518 |       }
 519 | 
 520 |       if (ch == ':') {
 521 |         return s_req_schema_slash;
 522 |       }
 523 | 
 524 |       break;
 525 | 
 526 |     case s_req_schema_slash:
 527 |       if (ch == '/') {
 528 |         return s_req_schema_slash_slash;
 529 |       }
 530 | 
 531 |       break;
 532 | 
 533 |     case s_req_schema_slash_slash:
 534 |       if (ch == '/') {
 535 |         return s_req_server_start;
 536 |       }
 537 | 
 538 |       break;
 539 | 
 540 |     case s_req_server_with_at:
 541 |       if (ch == '@') {
 542 |         return s_dead;
 543 |       }
 544 | 
 545 |     /* FALLTHROUGH */
 546 |     case s_req_server_start:
 547 |     case s_req_server:
 548 |       if (ch == '/') {
 549 |         return s_req_path;
 550 |       }
 551 | 
 552 |       if (ch == '?') {
 553 |         return s_req_query_string_start;
 554 |       }
 555 | 
 556 |       if (ch == '@') {
 557 |         return s_req_server_with_at;
 558 |       }
 559 | 
 560 |       if (IS_USERINFO_CHAR(ch) || ch == '[' || ch == ']') {
 561 |         return s_req_server;
 562 |       }
 563 | 
 564 |       break;
 565 | 
 566 |     case s_req_path:
 567 |       if (IS_URL_CHAR(ch)) {
 568 |         return s;
 569 |       }
 570 | 
 571 |       switch (ch) {
 572 |         case '?':
 573 |           return s_req_query_string_start;
 574 | 
 575 |         case '#':
 576 |           return s_req_fragment_start;
 577 |       }
 578 | 
 579 |       break;
 580 | 
 581 |     case s_req_query_string_start:
 582 |     case s_req_query_string:
 583 |       if (IS_URL_CHAR(ch)) {
 584 |         return s_req_query_string;
 585 |       }
 586 | 
 587 |       switch (ch) {
 588 |         case '?':
 589 |           /* allow extra '?' in query string */
 590 |           return s_req_query_string;
 591 | 
 592 |         case '#':
 593 |           return s_req_fragment_start;
 594 |       }
 595 | 
 596 |       break;
 597 | 
 598 |     case s_req_fragment_start:
 599 |       if (IS_URL_CHAR(ch)) {
 600 |         return s_req_fragment;
 601 |       }
 602 | 
 603 |       switch (ch) {
 604 |         case '?':
 605 |           return s_req_fragment;
 606 | 
 607 |         case '#':
 608 |           return s;
 609 |       }
 610 | 
 611 |       break;
 612 | 
 613 |     case s_req_fragment:
 614 |       if (IS_URL_CHAR(ch)) {
 615 |         return s;
 616 |       }
 617 | 
 618 |       switch (ch) {
 619 |         case '?':
 620 |         case '#':
 621 |           return s;
 622 |       }
 623 | 
 624 |       break;
 625 | 
 626 |     default:
 627 |       break;
 628 |   }
 629 | 
 630 |   /* We should never fall out of the switch above unless there's an error */
 631 |   return s_dead;
 632 | }
 633 | 
 634 | size_t http_parser_execute (http_parser *parser,
 635 |                             const http_parser_settings *settings,
 636 |                             const char *data,
 637 |                             size_t len)
 638 | {
 639 |   char c, ch;
 640 |   int8_t unhex_val;
 641 |   const char *p = data;
 642 |   const char *header_field_mark = 0;
 643 |   const char *header_value_mark = 0;
 644 |   const char *url_mark = 0;
 645 |   const char *body_mark = 0;
 646 |   const char *status_mark = 0;
 647 |   enum state p_state = (enum state) parser->state;
 648 |   const unsigned int lenient = parser->lenient_http_headers;
 649 | 
 650 |   /* We're in an error state. Don't bother doing anything. */
 651 |   if (HTTP_PARSER_ERRNO(parser) != HPE_OK) {
 652 |     return 0;
 653 |   }
 654 | 
 655 |   if (len == 0) {
 656 |     switch (CURRENT_STATE()) {
 657 |       case s_body_identity_eof:
 658 |         /* Use of CALLBACK_NOTIFY() here would erroneously return 1 byte read if
 659 |          * we got paused.
 660 |          */
 661 |         CALLBACK_NOTIFY_NOADVANCE(message_complete);
 662 |         return 0;
 663 | 
 664 |       case s_dead:
 665 |       case s_start_req_or_res:
 666 |       case s_start_res:
 667 |       case s_start_req:
 668 |         return 0;
 669 | 
 670 |       default:
 671 |         SET_ERRNO(HPE_INVALID_EOF_STATE);
 672 |         return 1;
 673 |     }
 674 |   }
 675 | 
 676 | 
 677 |   if (CURRENT_STATE() == s_header_field)
 678 |     header_field_mark = data;
 679 |   if (CURRENT_STATE() == s_header_value)
 680 |     header_value_mark = data;
 681 |   switch (CURRENT_STATE()) {
 682 |   case s_req_path:
 683 |   case s_req_schema:
 684 |   case s_req_schema_slash:
 685 |   case s_req_schema_slash_slash:
 686 |   case s_req_server_start:
 687 |   case s_req_server:
 688 |   case s_req_server_with_at:
 689 |   case s_req_query_string_start:
 690 |   case s_req_query_string:
 691 |   case s_req_fragment_start:
 692 |   case s_req_fragment:
 693 |     url_mark = data;
 694 |     break;
 695 |   case s_res_status:
 696 |     status_mark = data;
 697 |     break;
 698 |   default:
 699 |     break;
 700 |   }
 701 | 
 702 |   for (p=data; p != data + len; p++) {
 703 |     ch = *p;
 704 | 
 705 |     if (PARSING_HEADER(CURRENT_STATE()))
 706 |       COUNT_HEADER_SIZE(1);
 707 | 
 708 | reexecute:
 709 |     switch (CURRENT_STATE()) {
 710 | 
 711 |       case s_dead:
 712 |         /* this state is used after a 'Connection: close' message
 713 |          * the parser will error out if it reads another message
 714 |          */
 715 |         if (LIKELY(ch == CR || ch == LF))
 716 |           break;
 717 | 
 718 |         SET_ERRNO(HPE_CLOSED_CONNECTION);
 719 |         goto error;
 720 | 
 721 |       case s_start_req_or_res:
 722 |       {
 723 |         if (ch == CR || ch == LF)
 724 |           break;
 725 |         parser->flags = 0;
 726 |         parser->content_length = ULLONG_MAX;
 727 | 
 728 |         if (ch == 'H') {
 729 |           UPDATE_STATE(s_res_or_resp_H);
 730 | 
 731 |           CALLBACK_NOTIFY(message_begin);
 732 |         } else {
 733 |           parser->type = HTTP_REQUEST;
 734 |           UPDATE_STATE(s_start_req);
 735 |           REEXECUTE();
 736 |         }
 737 | 
 738 |         break;
 739 |       }
 740 | 
 741 |       case s_res_or_resp_H:
 742 |         if (ch == 'T') {
 743 |           parser->type = HTTP_RESPONSE;
 744 |           UPDATE_STATE(s_res_HT);
 745 |         } else {
 746 |           if (UNLIKELY(ch != 'E')) {
 747 |             SET_ERRNO(HPE_INVALID_CONSTANT);
 748 |             goto error;
 749 |           }
 750 | 
 751 |           parser->type = HTTP_REQUEST;
 752 |           parser->method = HTTP_HEAD;
 753 |           parser->index = 2;
 754 |           UPDATE_STATE(s_req_method);
 755 |         }
 756 |         break;
 757 | 
 758 |       case s_start_res:
 759 |       {
 760 |         parser->flags = 0;
 761 |         parser->content_length = ULLONG_MAX;
 762 | 
 763 |         switch (ch) {
 764 |           case 'H':
 765 |             UPDATE_STATE(s_res_H);
 766 |             break;
 767 | 
 768 |           case CR:
 769 |           case LF:
 770 |             break;
 771 | 
 772 |           default:
 773 |             SET_ERRNO(HPE_INVALID_CONSTANT);
 774 |             goto error;
 775 |         }
 776 | 
 777 |         CALLBACK_NOTIFY(message_begin);
 778 |         break;
 779 |       }
 780 | 
 781 |       case s_res_H:
 782 |         STRICT_CHECK(ch != 'T');
 783 |         UPDATE_STATE(s_res_HT);
 784 |         break;
 785 | 
 786 |       case s_res_HT:
 787 |         STRICT_CHECK(ch != 'T');
 788 |         UPDATE_STATE(s_res_HTT);
 789 |         break;
 790 | 
 791 |       case s_res_HTT:
 792 |         STRICT_CHECK(ch != 'P');
 793 |         UPDATE_STATE(s_res_HTTP);
 794 |         break;
 795 | 
 796 |       case s_res_HTTP:
 797 |         STRICT_CHECK(ch != '/');
 798 |         UPDATE_STATE(s_res_first_http_major);
 799 |         break;
 800 | 
 801 |       case s_res_first_http_major:
 802 |         if (UNLIKELY(ch < '0' || ch > '9')) {
 803 |           SET_ERRNO(HPE_INVALID_VERSION);
 804 |           goto error;
 805 |         }
 806 | 
 807 |         parser->http_major = ch - '0';
 808 |         UPDATE_STATE(s_res_http_major);
 809 |         break;
 810 | 
 811 |       /* major HTTP version or dot */
 812 |       case s_res_http_major:
 813 |       {
 814 |         if (ch == '.') {
 815 |           UPDATE_STATE(s_res_first_http_minor);
 816 |           break;
 817 |         }
 818 | 
 819 |         if (!IS_NUM(ch)) {
 820 |           SET_ERRNO(HPE_INVALID_VERSION);
 821 |           goto error;
 822 |         }
 823 | 
 824 |         parser->http_major *= 10;
 825 |         parser->http_major += ch - '0';
 826 | 
 827 |         if (UNLIKELY(parser->http_major > 999)) {
 828 |           SET_ERRNO(HPE_INVALID_VERSION);
 829 |           goto error;
 830 |         }
 831 | 
 832 |         break;
 833 |       }
 834 | 
 835 |       /* first digit of minor HTTP version */
 836 |       case s_res_first_http_minor:
 837 |         if (UNLIKELY(!IS_NUM(ch))) {
 838 |           SET_ERRNO(HPE_INVALID_VERSION);
 839 |           goto error;
 840 |         }
 841 | 
 842 |         parser->http_minor = ch - '0';
 843 |         UPDATE_STATE(s_res_http_minor);
 844 |         break;
 845 | 
 846 |       /* minor HTTP version or end of request line */
 847 |       case s_res_http_minor:
 848 |       {
 849 |         if (ch == ' ') {
 850 |           UPDATE_STATE(s_res_first_status_code);
 851 |           break;
 852 |         }
 853 | 
 854 |         if (UNLIKELY(!IS_NUM(ch))) {
 855 |           SET_ERRNO(HPE_INVALID_VERSION);
 856 |           goto error;
 857 |         }
 858 | 
 859 |         parser->http_minor *= 10;
 860 |         parser->http_minor += ch - '0';
 861 | 
 862 |         if (UNLIKELY(parser->http_minor > 999)) {
 863 |           SET_ERRNO(HPE_INVALID_VERSION);
 864 |           goto error;
 865 |         }
 866 | 
 867 |         break;
 868 |       }
 869 | 
 870 |       case s_res_first_status_code:
 871 |       {
 872 |         if (!IS_NUM(ch)) {
 873 |           if (ch == ' ') {
 874 |             break;
 875 |           }
 876 | 
 877 |           SET_ERRNO(HPE_INVALID_STATUS);
 878 |           goto error;
 879 |         }
 880 |         parser->status_code = ch - '0';
 881 |         UPDATE_STATE(s_res_status_code);
 882 |         break;
 883 |       }
 884 | 
 885 |       case s_res_status_code:
 886 |       {
 887 |         if (!IS_NUM(ch)) {
 888 |           switch (ch) {
 889 |             case ' ':
 890 |               UPDATE_STATE(s_res_status_start);
 891 |               break;
 892 |             case CR:
 893 |               UPDATE_STATE(s_res_line_almost_done);
 894 |               break;
 895 |             case LF:
 896 |               UPDATE_STATE(s_header_field_start);
 897 |               break;
 898 |             default:
 899 |               SET_ERRNO(HPE_INVALID_STATUS);
 900 |               goto error;
 901 |           }
 902 |           break;
 903 |         }
 904 | 
 905 |         parser->status_code *= 10;
 906 |         parser->status_code += ch - '0';
 907 | 
 908 |         if (UNLIKELY(parser->status_code > 999)) {
 909 |           SET_ERRNO(HPE_INVALID_STATUS);
 910 |           goto error;
 911 |         }
 912 | 
 913 |         break;
 914 |       }
 915 | 
 916 |       case s_res_status_start:
 917 |       {
 918 |         if (ch == CR) {
 919 |           UPDATE_STATE(s_res_line_almost_done);
 920 |           break;
 921 |         }
 922 | 
 923 |         if (ch == LF) {
 924 |           UPDATE_STATE(s_header_field_start);
 925 |           break;
 926 |         }
 927 | 
 928 |         MARK(status);
 929 |         UPDATE_STATE(s_res_status);
 930 |         parser->index = 0;
 931 |         break;
 932 |       }
 933 | 
 934 |       case s_res_status:
 935 |         if (ch == CR) {
 936 |           UPDATE_STATE(s_res_line_almost_done);
 937 |           CALLBACK_DATA(status);
 938 |           break;
 939 |         }
 940 | 
 941 |         if (ch == LF) {
 942 |           UPDATE_STATE(s_header_field_start);
 943 |           CALLBACK_DATA(status);
 944 |           break;
 945 |         }
 946 | 
 947 |         break;
 948 | 
 949 |       case s_res_line_almost_done:
 950 |         STRICT_CHECK(ch != LF);
 951 |         UPDATE_STATE(s_header_field_start);
 952 |         break;
 953 | 
 954 |       case s_start_req:
 955 |       {
 956 |         if (ch == CR || ch == LF)
 957 |           break;
 958 |         parser->flags = 0;
 959 |         parser->content_length = ULLONG_MAX;
 960 | 
 961 |         if (UNLIKELY(!IS_ALPHA(ch))) {
 962 |           SET_ERRNO(HPE_INVALID_METHOD);
 963 |           goto error;
 964 |         }
 965 | 
 966 |         parser->method = (enum http_method) 0;
 967 |         parser->index = 1;
 968 |         switch (ch) {
 969 |           case 'A': parser->method = HTTP_ACL; break;
 970 |           case 'B': parser->method = HTTP_BIND; break;
 971 |           case 'C': parser->method = HTTP_CONNECT; /* or COPY, CHECKOUT */ break;
 972 |           case 'D': parser->method = HTTP_DELETE; break;
 973 |           case 'G': parser->method = HTTP_GET; break;
 974 |           case 'H': parser->method = HTTP_HEAD; break;
 975 |           case 'L': parser->method = HTTP_LOCK; /* or LINK */ break;
 976 |           case 'M': parser->method = HTTP_MKCOL; /* or MOVE, MKACTIVITY, MERGE, M-SEARCH, MKCALENDAR */ break;
 977 |           case 'N': parser->method = HTTP_NOTIFY; break;
 978 |           case 'O': parser->method = HTTP_OPTIONS; break;
 979 |           case 'P': parser->method = HTTP_POST;
 980 |             /* or PROPFIND|PROPPATCH|PUT|PATCH|PURGE */
 981 |             break;
 982 |           case 'R': parser->method = HTTP_REPORT; /* or REBIND */ break;
 983 |           case 'S': parser->method = HTTP_SUBSCRIBE; /* or SEARCH */ break;
 984 |           case 'T': parser->method = HTTP_TRACE; break;
 985 |           case 'U': parser->method = HTTP_UNLOCK; /* or UNSUBSCRIBE, UNBIND, UNLINK */ break;
 986 |           default:
 987 |             SET_ERRNO(HPE_INVALID_METHOD);
 988 |             goto error;
 989 |         }
 990 |         UPDATE_STATE(s_req_method);
 991 | 
 992 |         CALLBACK_NOTIFY(message_begin);
 993 | 
 994 |         break;
 995 |       }
 996 | 
 997 |       case s_req_method:
 998 |       {
 999 |         const char *matcher;
1000 |         if (UNLIKELY(ch == '\0')) {
1001 |           SET_ERRNO(HPE_INVALID_METHOD);
1002 |           goto error;
1003 |         }
1004 | 
1005 |         matcher = method_strings[parser->method];
1006 |         if (ch == ' ' && matcher[parser->index] == '\0') {
1007 |           UPDATE_STATE(s_req_spaces_before_url);
1008 |         } else if (ch == matcher[parser->index]) {
1009 |           ; /* nada */
1010 |         } else if (IS_ALPHA(ch)) {
1011 | 
1012 |           switch (parser->method << 16 | parser->index << 8 | ch) {
1013 | #define XX(meth, pos, ch, new_meth) \
1014 |             case (HTTP_##meth << 16 | pos << 8 | ch): \
1015 |               parser->method = HTTP_##new_meth; break;
1016 | 
1017 |             XX(POST,      1, 'U', PUT)
1018 |             XX(POST,      1, 'A', PATCH)
1019 |             XX(CONNECT,   1, 'H', CHECKOUT)
1020 |             XX(CONNECT,   2, 'P', COPY)
1021 |             XX(MKCOL,     1, 'O', MOVE)
1022 |             XX(MKCOL,     1, 'E', MERGE)
1023 |             XX(MKCOL,     2, 'A', MKACTIVITY)
1024 |             XX(MKCOL,     3, 'A', MKCALENDAR)
1025 |             XX(SUBSCRIBE, 1, 'E', SEARCH)
1026 |             XX(REPORT,    2, 'B', REBIND)
1027 |             XX(POST,      1, 'R', PROPFIND)
1028 |             XX(PROPFIND,  4, 'P', PROPPATCH)
1029 |             XX(PUT,       2, 'R', PURGE)
1030 |             XX(LOCK,      1, 'I', LINK)
1031 |             XX(UNLOCK,    2, 'S', UNSUBSCRIBE)
1032 |             XX(UNLOCK,    2, 'B', UNBIND)
1033 |             XX(UNLOCK,    3, 'I', UNLINK)
1034 | #undef XX
1035 | 
1036 |             default:
1037 |               SET_ERRNO(HPE_INVALID_METHOD);
1038 |               goto error;
1039 |           }
1040 |         } else if (ch == '-' &&
1041 |                    parser->index == 1 &&
1042 |                    parser->method == HTTP_MKCOL) {
1043 |           parser->method = HTTP_MSEARCH;
1044 |         } else {
1045 |           SET_ERRNO(HPE_INVALID_METHOD);
1046 |           goto error;
1047 |         }
1048 | 
1049 |         ++parser->index;
1050 |         break;
1051 |       }
1052 | 
1053 |       case s_req_spaces_before_url:
1054 |       {
1055 |         if (ch == ' ') break;
1056 | 
1057 |         MARK(url);
1058 |         if (parser->method == HTTP_CONNECT) {
1059 |           UPDATE_STATE(s_req_server_start);
1060 |         }
1061 | 
1062 |         UPDATE_STATE(parse_url_char(CURRENT_STATE(), ch));
1063 |         if (UNLIKELY(CURRENT_STATE() == s_dead)) {
1064 |           SET_ERRNO(HPE_INVALID_URL);
1065 |           goto error;
1066 |         }
1067 | 
1068 |         break;
1069 |       }
1070 | 
1071 |       case s_req_schema:
1072 |       case s_req_schema_slash:
1073 |       case s_req_schema_slash_slash:
1074 |       case s_req_server_start:
1075 |       {
1076 |         switch (ch) {
1077 |           /* No whitespace allowed here */
1078 |           case ' ':
1079 |           case CR:
1080 |           case LF:
1081 |             SET_ERRNO(HPE_INVALID_URL);
1082 |             goto error;
1083 |           default:
1084 |             UPDATE_STATE(parse_url_char(CURRENT_STATE(), ch));
1085 |             if (UNLIKELY(CURRENT_STATE() == s_dead)) {
1086 |               SET_ERRNO(HPE_INVALID_URL);
1087 |               goto error;
1088 |             }
1089 |         }
1090 | 
1091 |         break;
1092 |       }
1093 | 
1094 |       case s_req_server:
1095 |       case s_req_server_with_at:
1096 |       case s_req_path:
1097 |       case s_req_query_string_start:
1098 |       case s_req_query_string:
1099 |       case s_req_fragment_start:
1100 |       case s_req_fragment:
1101 |       {
1102 |         switch (ch) {
1103 |           case ' ':
1104 |             UPDATE_STATE(s_req_http_start);
1105 |             CALLBACK_DATA(url);
1106 |             break;
1107 |           case CR:
1108 |           case LF:
1109 |             parser->http_major = 0;
1110 |             parser->http_minor = 9;
1111 |             UPDATE_STATE((ch == CR) ?
1112 |               s_req_line_almost_done :
1113 |               s_header_field_start);
1114 |             CALLBACK_DATA(url);
1115 |             break;
1116 |           default:
1117 |             UPDATE_STATE(parse_url_char(CURRENT_STATE(), ch));
1118 |             if (UNLIKELY(CURRENT_STATE() == s_dead)) {
1119 |               SET_ERRNO(HPE_INVALID_URL);
1120 |               goto error;
1121 |             }
1122 |         }
1123 |         break;
1124 |       }
1125 | 
1126 |       case s_req_http_start:
1127 |         switch (ch) {
1128 |           case 'H':
1129 |             UPDATE_STATE(s_req_http_H);
1130 |             break;
1131 |           case ' ':
1132 |             break;
1133 |           default:
1134 |             SET_ERRNO(HPE_INVALID_CONSTANT);
1135 |             goto error;
1136 |         }
1137 |         break;
1138 | 
1139 |       case s_req_http_H:
1140 |         STRICT_CHECK(ch != 'T');
1141 |         UPDATE_STATE(s_req_http_HT);
1142 |         break;
1143 | 
1144 |       case s_req_http_HT:
1145 |         STRICT_CHECK(ch != 'T');
1146 |         UPDATE_STATE(s_req_http_HTT);
1147 |         break;
1148 | 
1149 |       case s_req_http_HTT:
1150 |         STRICT_CHECK(ch != 'P');
1151 |         UPDATE_STATE(s_req_http_HTTP);
1152 |         break;
1153 | 
1154 |       case s_req_http_HTTP:
1155 |         STRICT_CHECK(ch != '/');
1156 |         UPDATE_STATE(s_req_first_http_major);
1157 |         break;
1158 | 
1159 |       /* first digit of major HTTP version */
1160 |       case s_req_first_http_major:
1161 |         if (UNLIKELY(ch < '1' || ch > '9')) {
1162 |           SET_ERRNO(HPE_INVALID_VERSION);
1163 |           goto error;
1164 |         }
1165 | 
1166 |         parser->http_major = ch - '0';
1167 |         UPDATE_STATE(s_req_http_major);
1168 |         break;
1169 | 
1170 |       /* major HTTP version or dot */
1171 |       case s_req_http_major:
1172 |       {
1173 |         if (ch == '.') {
1174 |           UPDATE_STATE(s_req_first_http_minor);
1175 |           break;
1176 |         }
1177 | 
1178 |         if (UNLIKELY(!IS_NUM(ch))) {
1179 |           SET_ERRNO(HPE_INVALID_VERSION);
1180 |           goto error;
1181 |         }
1182 | 
1183 |         parser->http_major *= 10;
1184 |         parser->http_major += ch - '0';
1185 | 
1186 |         if (UNLIKELY(parser->http_major > 999)) {
1187 |           SET_ERRNO(HPE_INVALID_VERSION);
1188 |           goto error;
1189 |         }
1190 | 
1191 |         break;
1192 |       }
1193 | 
1194 |       /* first digit of minor HTTP version */
1195 |       case s_req_first_http_minor:
1196 |         if (UNLIKELY(!IS_NUM(ch))) {
1197 |           SET_ERRNO(HPE_INVALID_VERSION);
1198 |           goto error;
1199 |         }
1200 | 
1201 |         parser->http_minor = ch - '0';
1202 |         UPDATE_STATE(s_req_http_minor);
1203 |         break;
1204 | 
1205 |       /* minor HTTP version or end of request line */
1206 |       case s_req_http_minor:
1207 |       {
1208 |         if (ch == CR) {
1209 |           UPDATE_STATE(s_req_line_almost_done);
1210 |           break;
1211 |         }
1212 | 
1213 |         if (ch == LF) {
1214 |           UPDATE_STATE(s_header_field_start);
1215 |           break;
1216 |         }
1217 | 
1218 |         /* XXX allow spaces after digit? */
1219 | 
1220 |         if (UNLIKELY(!IS_NUM(ch))) {
1221 |           SET_ERRNO(HPE_INVALID_VERSION);
1222 |           goto error;
1223 |         }
1224 | 
1225 |         parser->http_minor *= 10;
1226 |         parser->http_minor += ch - '0';
1227 | 
1228 |         if (UNLIKELY(parser->http_minor > 999)) {
1229 |           SET_ERRNO(HPE_INVALID_VERSION);
1230 |           goto error;
1231 |         }
1232 | 
1233 |         break;
1234 |       }
1235 | 
1236 |       /* end of request line */
1237 |       case s_req_line_almost_done:
1238 |       {
1239 |         if (UNLIKELY(ch != LF)) {
1240 |           SET_ERRNO(HPE_LF_EXPECTED);
1241 |           goto error;
1242 |         }
1243 | 
1244 |         UPDATE_STATE(s_header_field_start);
1245 |         break;
1246 |       }
1247 | 
1248 |       case s_header_field_start:
1249 |       {
1250 |         if (ch == CR) {
1251 |           UPDATE_STATE(s_headers_almost_done);
1252 |           break;
1253 |         }
1254 | 
1255 |         if (ch == LF) {
1256 |           /* they might be just sending \n instead of \r\n so this would be
1257 |            * the second \n to denote the end of headers*/
1258 |           UPDATE_STATE(s_headers_almost_done);
1259 |           REEXECUTE();
1260 |         }
1261 | 
1262 |         c = TOKEN(ch);
1263 | 
1264 |         if (UNLIKELY(!c)) {
1265 |           SET_ERRNO(HPE_INVALID_HEADER_TOKEN);
1266 |           goto error;
1267 |         }
1268 | 
1269 |         MARK(header_field);
1270 | 
1271 |         parser->index = 0;
1272 |         UPDATE_STATE(s_header_field);
1273 | 
1274 |         switch (c) {
1275 |           case 'c':
1276 |             parser->header_state = h_C;
1277 |             break;
1278 | 
1279 |           case 'p':
1280 |             parser->header_state = h_matching_proxy_connection;
1281 |             break;
1282 | 
1283 |           case 't':
1284 |             parser->header_state = h_matching_transfer_encoding;
1285 |             break;
1286 | 
1287 |           case 'u':
1288 |             parser->header_state = h_matching_upgrade;
1289 |             break;
1290 | 
1291 |           default:
1292 |             parser->header_state = h_general;
1293 |             break;
1294 |         }
1295 |         break;
1296 |       }
1297 | 
1298 |       case s_header_field:
1299 |       {
1300 |         const char* start = p;
1301 |         for (; p != data + len; p++) {
1302 |           ch = *p;
1303 |           c = TOKEN(ch);
1304 | 
1305 |           if (!c)
1306 |             break;
1307 | 
1308 |           switch (parser->header_state) {
1309 |             case h_general:
1310 |               break;
1311 | 
1312 |             case h_C:
1313 |               parser->index++;
1314 |               parser->header_state = (c == 'o' ? h_CO : h_general);
1315 |               break;
1316 | 
1317 |             case h_CO:
1318 |               parser->index++;
1319 |               parser->header_state = (c == 'n' ? h_CON : h_general);
1320 |               break;
1321 | 
1322 |             case h_CON:
1323 |               parser->index++;
1324 |               switch (c) {
1325 |                 case 'n':
1326 |                   parser->header_state = h_matching_connection;
1327 |                   break;
1328 |                 case 't':
1329 |                   parser->header_state = h_matching_content_length;
1330 |                   break;
1331 |                 default:
1332 |                   parser->header_state = h_general;
1333 |                   break;
1334 |               }
1335 |               break;
1336 | 
1337 |             /* connection */
1338 | 
1339 |             case h_matching_connection:
1340 |               parser->index++;
1341 |               if (parser->index > sizeof(CONNECTION)-1
1342 |                   || c != CONNECTION[parser->index]) {
1343 |                 parser->header_state = h_general;
1344 |               } else if (parser->index == sizeof(CONNECTION)-2) {
1345 |                 parser->header_state = h_connection;
1346 |               }
1347 |               break;
1348 | 
1349 |             /* proxy-connection */
1350 | 
1351 |             case h_matching_proxy_connection:
1352 |               parser->index++;
1353 |               if (parser->index > sizeof(PROXY_CONNECTION)-1
1354 |                   || c != PROXY_CONNECTION[parser->index]) {
1355 |                 parser->header_state = h_general;
1356 |               } else if (parser->index == sizeof(PROXY_CONNECTION)-2) {
1357 |                 parser->header_state = h_connection;
1358 |               }
1359 |               break;
1360 | 
1361 |             /* content-length */
1362 | 
1363 |             case h_matching_content_length:
1364 |               parser->index++;
1365 |               if (parser->index > sizeof(CONTENT_LENGTH)-1
1366 |                   || c != CONTENT_LENGTH[parser->index]) {
1367 |                 parser->header_state = h_general;
1368 |               } else if (parser->index == sizeof(CONTENT_LENGTH)-2) {
1369 |                 parser->header_state = h_content_length;
1370 |               }
1371 |               break;
1372 | 
1373 |             /* transfer-encoding */
1374 | 
1375 |             case h_matching_transfer_encoding:
1376 |               parser->index++;
1377 |               if (parser->index > sizeof(TRANSFER_ENCODING)-1
1378 |                   || c != TRANSFER_ENCODING[parser->index]) {
1379 |                 parser->header_state = h_general;
1380 |               } else if (parser->index == sizeof(TRANSFER_ENCODING)-2) {
1381 |                 parser->header_state = h_transfer_encoding;
1382 |               }
1383 |               break;
1384 | 
1385 |             /* upgrade */
1386 | 
1387 |             case h_matching_upgrade:
1388 |               parser->index++;
1389 |               if (parser->index > sizeof(UPGRADE)-1
1390 |                   || c != UPGRADE[parser->index]) {
1391 |                 parser->header_state = h_general;
1392 |               } else if (parser->index == sizeof(UPGRADE)-2) {
1393 |                 parser->header_state = h_upgrade;
1394 |               }
1395 |               break;
1396 | 
1397 |             case h_connection:
1398 |             case h_content_length:
1399 |             case h_transfer_encoding:
1400 |             case h_upgrade:
1401 |               if (ch != ' ') parser->header_state = h_general;
1402 |               break;
1403 | 
1404 |             default:
1405 |               assert(0 && "Unknown header_state");
1406 |               break;
1407 |           }
1408 |         }
1409 | 
1410 |         COUNT_HEADER_SIZE(p - start);
1411 | 
1412 |         if (p == data + len) {
1413 |           --p;
1414 |           break;
1415 |         }
1416 | 
1417 |         if (ch == ':') {
1418 |           UPDATE_STATE(s_header_value_discard_ws);
1419 |           CALLBACK_DATA(header_field);
1420 |           break;
1421 |         }
1422 | 
1423 |         SET_ERRNO(HPE_INVALID_HEADER_TOKEN);
1424 |         goto error;
1425 |       }
1426 | 
1427 |       case s_header_value_discard_ws:
1428 |         if (ch == ' ' || ch == '\t') break;
1429 | 
1430 |         if (ch == CR) {
1431 |           UPDATE_STATE(s_header_value_discard_ws_almost_done);
1432 |           break;
1433 |         }
1434 | 
1435 |         if (ch == LF) {
1436 |           UPDATE_STATE(s_header_value_discard_lws);
1437 |           break;
1438 |         }
1439 | 
1440 |         /* FALLTHROUGH */
1441 | 
1442 |       case s_header_value_start:
1443 |       {
1444 |         MARK(header_value);
1445 | 
1446 |         UPDATE_STATE(s_header_value);
1447 |         parser->index = 0;
1448 | 
1449 |         c = LOWER(ch);
1450 | 
1451 |         switch (parser->header_state) {
1452 |           case h_upgrade:
1453 |             parser->flags |= F_UPGRADE;
1454 |             parser->header_state = h_general;
1455 |             break;
1456 | 
1457 |           case h_transfer_encoding:
1458 |             /* looking for 'Transfer-Encoding: chunked' */
1459 |             if ('c' == c) {
1460 |               parser->header_state = h_matching_transfer_encoding_chunked;
1461 |             } else {
1462 |               parser->header_state = h_general;
1463 |             }
1464 |             break;
1465 | 
1466 |           case h_content_length:
1467 |             if (UNLIKELY(!IS_NUM(ch))) {
1468 |               SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1469 |               goto error;
1470 |             }
1471 | 
1472 |             if (parser->flags & F_CONTENTLENGTH) {
1473 |               SET_ERRNO(HPE_UNEXPECTED_CONTENT_LENGTH);
1474 |               goto error;
1475 |             }
1476 | 
1477 |             parser->flags |= F_CONTENTLENGTH;
1478 |             parser->content_length = ch - '0';
1479 |             break;
1480 | 
1481 |           case h_connection:
1482 |             /* looking for 'Connection: keep-alive' */
1483 |             if (c == 'k') {
1484 |               parser->header_state = h_matching_connection_keep_alive;
1485 |             /* looking for 'Connection: close' */
1486 |             } else if (c == 'c') {
1487 |               parser->header_state = h_matching_connection_close;
1488 |             } else if (c == 'u') {
1489 |               parser->header_state = h_matching_connection_upgrade;
1490 |             } else {
1491 |               parser->header_state = h_matching_connection_token;
1492 |             }
1493 |             break;
1494 | 
1495 |           /* Multi-value `Connection` header */
1496 |           case h_matching_connection_token_start:
1497 |             break;
1498 | 
1499 |           default:
1500 |             parser->header_state = h_general;
1501 |             break;
1502 |         }
1503 |         break;
1504 |       }
1505 | 
1506 |       case s_header_value:
1507 |       {
1508 |         const char* start = p;
1509 |         enum header_states h_state = (enum header_states) parser->header_state;
1510 |         for (; p != data + len; p++) {
1511 |           ch = *p;
1512 |           if (ch == CR) {
1513 |             UPDATE_STATE(s_header_almost_done);
1514 |             parser->header_state = h_state;
1515 |             CALLBACK_DATA(header_value);
1516 |             break;
1517 |           }
1518 | 
1519 |           if (ch == LF) {
1520 |             UPDATE_STATE(s_header_almost_done);
1521 |             COUNT_HEADER_SIZE(p - start);
1522 |             parser->header_state = h_state;
1523 |             CALLBACK_DATA_NOADVANCE(header_value);
1524 |             REEXECUTE();
1525 |           }
1526 | 
1527 |           if (!lenient && !IS_HEADER_CHAR(ch)) {
1528 |             SET_ERRNO(HPE_INVALID_HEADER_TOKEN);
1529 |             goto error;
1530 |           }
1531 | 
1532 |           c = LOWER(ch);
1533 | 
1534 |           switch (h_state) {
1535 |             case h_general:
1536 |             {
1537 |               const char* p_cr;
1538 |               const char* p_lf;
1539 |               size_t limit = data + len - p;
1540 | 
1541 |               limit = MIN(limit, HTTP_MAX_HEADER_SIZE);
1542 | 
1543 |               p_cr = (const char*) memchr(p, CR, limit);
1544 |               p_lf = (const char*) memchr(p, LF, limit);
1545 |               if (p_cr != NULL) {
1546 |                 if (p_lf != NULL && p_cr >= p_lf)
1547 |                   p = p_lf;
1548 |                 else
1549 |                   p = p_cr;
1550 |               } else if (UNLIKELY(p_lf != NULL)) {
1551 |                 p = p_lf;
1552 |               } else {
1553 |                 p = data + len;
1554 |               }
1555 |               --p;
1556 | 
1557 |               break;
1558 |             }
1559 | 
1560 |             case h_connection:
1561 |             case h_transfer_encoding:
1562 |               assert(0 && "Shouldn't get here.");
1563 |               break;
1564 | 
1565 |             case h_content_length:
1566 |             {
1567 |               uint64_t t;
1568 | 
1569 |               if (ch == ' ') break;
1570 | 
1571 |               if (UNLIKELY(!IS_NUM(ch))) {
1572 |                 SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1573 |                 parser->header_state = h_state;
1574 |                 goto error;
1575 |               }
1576 | 
1577 |               t = parser->content_length;
1578 |               t *= 10;
1579 |               t += ch - '0';
1580 | 
1581 |               /* Overflow? Test against a conservative limit for simplicity. */
1582 |               if (UNLIKELY((ULLONG_MAX - 10) / 10 < parser->content_length)) {
1583 |                 SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1584 |                 parser->header_state = h_state;
1585 |                 goto error;
1586 |               }
1587 | 
1588 |               parser->content_length = t;
1589 |               break;
1590 |             }
1591 | 
1592 |             /* Transfer-Encoding: chunked */
1593 |             case h_matching_transfer_encoding_chunked:
1594 |               parser->index++;
1595 |               if (parser->index > sizeof(CHUNKED)-1
1596 |                   || c != CHUNKED[parser->index]) {
1597 |                 h_state = h_general;
1598 |               } else if (parser->index == sizeof(CHUNKED)-2) {
1599 |                 h_state = h_transfer_encoding_chunked;
1600 |               }
1601 |               break;
1602 | 
1603 |             case h_matching_connection_token_start:
1604 |               /* looking for 'Connection: keep-alive' */
1605 |               if (c == 'k') {
1606 |                 h_state = h_matching_connection_keep_alive;
1607 |               /* looking for 'Connection: close' */
1608 |               } else if (c == 'c') {
1609 |                 h_state = h_matching_connection_close;
1610 |               } else if (c == 'u') {
1611 |                 h_state = h_matching_connection_upgrade;
1612 |               } else if (STRICT_TOKEN(c)) {
1613 |                 h_state = h_matching_connection_token;
1614 |               } else if (c == ' ' || c == '\t') {
1615 |                 /* Skip lws */
1616 |               } else {
1617 |                 h_state = h_general;
1618 |               }
1619 |               break;
1620 | 
1621 |             /* looking for 'Connection: keep-alive' */
1622 |             case h_matching_connection_keep_alive:
1623 |               parser->index++;
1624 |               if (parser->index > sizeof(KEEP_ALIVE)-1
1625 |                   || c != KEEP_ALIVE[parser->index]) {
1626 |                 h_state = h_matching_connection_token;
1627 |               } else if (parser->index == sizeof(KEEP_ALIVE)-2) {
1628 |                 h_state = h_connection_keep_alive;
1629 |               }
1630 |               break;
1631 | 
1632 |             /* looking for 'Connection: close' */
1633 |             case h_matching_connection_close:
1634 |               parser->index++;
1635 |               if (parser->index > sizeof(CLOSE)-1 || c != CLOSE[parser->index]) {
1636 |                 h_state = h_matching_connection_token;
1637 |               } else if (parser->index == sizeof(CLOSE)-2) {
1638 |                 h_state = h_connection_close;
1639 |               }
1640 |               break;
1641 | 
1642 |             /* looking for 'Connection: upgrade' */
1643 |             case h_matching_connection_upgrade:
1644 |               parser->index++;
1645 |               if (parser->index > sizeof(UPGRADE) - 1 ||
1646 |                   c != UPGRADE[parser->index]) {
1647 |                 h_state = h_matching_connection_token;
1648 |               } else if (parser->index == sizeof(UPGRADE)-2) {
1649 |                 h_state = h_connection_upgrade;
1650 |               }
1651 |               break;
1652 | 
1653 |             case h_matching_connection_token:
1654 |               if (ch == ',') {
1655 |                 h_state = h_matching_connection_token_start;
1656 |                 parser->index = 0;
1657 |               }
1658 |               break;
1659 | 
1660 |             case h_transfer_encoding_chunked:
1661 |               if (ch != ' ') h_state = h_general;
1662 |               break;
1663 | 
1664 |             case h_connection_keep_alive:
1665 |             case h_connection_close:
1666 |             case h_connection_upgrade:
1667 |               if (ch == ',') {
1668 |                 if (h_state == h_connection_keep_alive) {
1669 |                   parser->flags |= F_CONNECTION_KEEP_ALIVE;
1670 |                 } else if (h_state == h_connection_close) {
1671 |                   parser->flags |= F_CONNECTION_CLOSE;
1672 |                 } else if (h_state == h_connection_upgrade) {
1673 |                   parser->flags |= F_CONNECTION_UPGRADE;
1674 |                 }
1675 |                 h_state = h_matching_connection_token_start;
1676 |                 parser->index = 0;
1677 |               } else if (ch != ' ') {
1678 |                 h_state = h_matching_connection_token;
1679 |               }
1680 |               break;
1681 | 
1682 |             default:
1683 |               UPDATE_STATE(s_header_value);
1684 |               h_state = h_general;
1685 |               break;
1686 |           }
1687 |         }
1688 |         parser->header_state = h_state;
1689 | 
1690 |         COUNT_HEADER_SIZE(p - start);
1691 | 
1692 |         if (p == data + len)
1693 |           --p;
1694 |         break;
1695 |       }
1696 | 
1697 |       case s_header_almost_done:
1698 |       {
1699 |         if (UNLIKELY(ch != LF)) {
1700 |           SET_ERRNO(HPE_LF_EXPECTED);
1701 |           goto error;
1702 |         }
1703 | 
1704 |         UPDATE_STATE(s_header_value_lws);
1705 |         break;
1706 |       }
1707 | 
1708 |       case s_header_value_lws:
1709 |       {
1710 |         if (ch == ' ' || ch == '\t') {
1711 |           UPDATE_STATE(s_header_value_start);
1712 |           REEXECUTE();
1713 |         }
1714 | 
1715 |         /* finished the header */
1716 |         switch (parser->header_state) {
1717 |           case h_connection_keep_alive:
1718 |             parser->flags |= F_CONNECTION_KEEP_ALIVE;
1719 |             break;
1720 |           case h_connection_close:
1721 |             parser->flags |= F_CONNECTION_CLOSE;
1722 |             break;
1723 |           case h_transfer_encoding_chunked:
1724 |             parser->flags |= F_CHUNKED;
1725 |             break;
1726 |           case h_connection_upgrade:
1727 |             parser->flags |= F_CONNECTION_UPGRADE;
1728 |             break;
1729 |           default:
1730 |             break;
1731 |         }
1732 | 
1733 |         UPDATE_STATE(s_header_field_start);
1734 |         REEXECUTE();
1735 |       }
1736 | 
1737 |       case s_header_value_discard_ws_almost_done:
1738 |       {
1739 |         STRICT_CHECK(ch != LF);
1740 |         UPDATE_STATE(s_header_value_discard_lws);
1741 |         break;
1742 |       }
1743 | 
1744 |       case s_header_value_discard_lws:
1745 |       {
1746 |         if (ch == ' ' || ch == '\t') {
1747 |           UPDATE_STATE(s_header_value_discard_ws);
1748 |           break;
1749 |         } else {
1750 |           switch (parser->header_state) {
1751 |             case h_connection_keep_alive:
1752 |               parser->flags |= F_CONNECTION_KEEP_ALIVE;
1753 |               break;
1754 |             case h_connection_close:
1755 |               parser->flags |= F_CONNECTION_CLOSE;
1756 |               break;
1757 |             case h_connection_upgrade:
1758 |               parser->flags |= F_CONNECTION_UPGRADE;
1759 |               break;
1760 |             case h_transfer_encoding_chunked:
1761 |               parser->flags |= F_CHUNKED;
1762 |               break;
1763 |             default:
1764 |               break;
1765 |           }
1766 | 
1767 |           /* header value was empty */
1768 |           MARK(header_value);
1769 |           UPDATE_STATE(s_header_field_start);
1770 |           CALLBACK_DATA_NOADVANCE(header_value);
1771 |           REEXECUTE();
1772 |         }
1773 |       }
1774 | 
1775 |       case s_headers_almost_done:
1776 |       {
1777 |         STRICT_CHECK(ch != LF);
1778 | 
1779 |         if (parser->flags & F_TRAILING) {
1780 |           /* End of a chunked request */
1781 |           UPDATE_STATE(s_message_done);
1782 |           CALLBACK_NOTIFY_NOADVANCE(chunk_complete);
1783 |           REEXECUTE();
1784 |         }
1785 | 
1786 |         /* Cannot use chunked encoding and a content-length header together
1787 |            per the HTTP specification. */
1788 |         if ((parser->flags & F_CHUNKED) &&
1789 |             (parser->flags & F_CONTENTLENGTH)) {
1790 |           SET_ERRNO(HPE_UNEXPECTED_CONTENT_LENGTH);
1791 |           goto error;
1792 |         }
1793 | 
1794 |         UPDATE_STATE(s_headers_done);
1795 | 
1796 |         /* Set this here so that on_headers_complete() callbacks can see it */
1797 |         parser->upgrade =
1798 |           ((parser->flags & (F_UPGRADE | F_CONNECTION_UPGRADE)) ==
1799 |            (F_UPGRADE | F_CONNECTION_UPGRADE) ||
1800 |            parser->method == HTTP_CONNECT);
1801 | 
1802 |         /* Here we call the headers_complete callback. This is somewhat
1803 |          * different than other callbacks because if the user returns 1, we
1804 |          * will interpret that as saying that this message has no body. This
1805 |          * is needed for the annoying case of recieving a response to a HEAD
1806 |          * request.
1807 |          *
1808 |          * We'd like to use CALLBACK_NOTIFY_NOADVANCE() here but we cannot, so
1809 |          * we have to simulate it by handling a change in errno below.
1810 |          */
1811 |         if (settings->on_headers_complete) {
1812 |           switch (settings->on_headers_complete(parser)) {
1813 |             case 0:
1814 |               break;
1815 | 
1816 |             case 2:
1817 |               parser->upgrade = 1;
1818 | 
1819 |             case 1:
1820 |               parser->flags |= F_SKIPBODY;
1821 |               break;
1822 | 
1823 |             default:
1824 |               SET_ERRNO(HPE_CB_headers_complete);
1825 |               RETURN(p - data); /* Error */
1826 |           }
1827 |         }
1828 | 
1829 |         if (HTTP_PARSER_ERRNO(parser) != HPE_OK) {
1830 |           RETURN(p - data);
1831 |         }
1832 | 
1833 |         REEXECUTE();
1834 |       }
1835 | 
1836 |       case s_headers_done:
1837 |       {
1838 |         int hasBody;
1839 |         STRICT_CHECK(ch != LF);
1840 | 
1841 |         parser->nread = 0;
1842 | 
1843 |         hasBody = parser->flags & F_CHUNKED ||
1844 |           (parser->content_length > 0 && parser->content_length != ULLONG_MAX);
1845 |         if (parser->upgrade && (parser->method == HTTP_CONNECT ||
1846 |                                 (parser->flags & F_SKIPBODY) || !hasBody)) {
1847 |           /* Exit, the rest of the message is in a different protocol. */
1848 |           UPDATE_STATE(NEW_MESSAGE());
1849 |           CALLBACK_NOTIFY(message_complete);
1850 |           RETURN((p - data) + 1);
1851 |         }
1852 | 
1853 |         if (parser->flags & F_SKIPBODY) {
1854 |           UPDATE_STATE(NEW_MESSAGE());
1855 |           CALLBACK_NOTIFY(message_complete);
1856 |         } else if (parser->flags & F_CHUNKED) {
1857 |           /* chunked encoding - ignore Content-Length header */
1858 |           UPDATE_STATE(s_chunk_size_start);
1859 |         } else {
1860 |           if (parser->content_length == 0) {
1861 |             /* Content-Length header given but zero: Content-Length: 0\r\n */
1862 |             UPDATE_STATE(NEW_MESSAGE());
1863 |             CALLBACK_NOTIFY(message_complete);
1864 |           } else if (parser->content_length != ULLONG_MAX) {
1865 |             /* Content-Length header given and non-zero */
1866 |             UPDATE_STATE(s_body_identity);
1867 |           } else {
1868 |             if (!http_message_needs_eof(parser)) {
1869 |               /* Assume content-length 0 - read the next */
1870 |               UPDATE_STATE(NEW_MESSAGE());
1871 |               CALLBACK_NOTIFY(message_complete);
1872 |             } else {
1873 |               /* Read body until EOF */
1874 |               UPDATE_STATE(s_body_identity_eof);
1875 |             }
1876 |           }
1877 |         }
1878 | 
1879 |         break;
1880 |       }
1881 | 
1882 |       case s_body_identity:
1883 |       {
1884 |         uint64_t to_read = MIN(parser->content_length,
1885 |                                (uint64_t) ((data + len) - p));
1886 | 
1887 |         assert(parser->content_length != 0
1888 |             && parser->content_length != ULLONG_MAX);
1889 | 
1890 |         /* The difference between advancing content_length and p is because
1891 |          * the latter will automaticaly advance on the next loop iteration.
1892 |          * Further, if content_length ends up at 0, we want to see the last
1893 |          * byte again for our message complete callback.
1894 |          */
1895 |         MARK(body);
1896 |         parser->content_length -= to_read;
1897 |         p += to_read - 1;
1898 | 
1899 |         if (parser->content_length == 0) {
1900 |           UPDATE_STATE(s_message_done);
1901 | 
1902 |           /* Mimic CALLBACK_DATA_NOADVANCE() but with one extra byte.
1903 |            *
1904 |            * The alternative to doing this is to wait for the next byte to
1905 |            * trigger the data callback, just as in every other case. The
1906 |            * problem with this is that this makes it difficult for the test
1907 |            * harness to distinguish between complete-on-EOF and
1908 |            * complete-on-length. It's not clear that this distinction is
1909 |            * important for applications, but let's keep it for now.
1910 |            */
1911 |           CALLBACK_DATA_(body, p - body_mark + 1, p - data);
1912 |           REEXECUTE();
1913 |         }
1914 | 
1915 |         break;
1916 |       }
1917 | 
1918 |       /* read until EOF */
1919 |       case s_body_identity_eof:
1920 |         MARK(body);
1921 |         p = data + len - 1;
1922 | 
1923 |         break;
1924 | 
1925 |       case s_message_done:
1926 |         UPDATE_STATE(NEW_MESSAGE());
1927 |         CALLBACK_NOTIFY(message_complete);
1928 |         if (parser->upgrade) {
1929 |           /* Exit, the rest of the message is in a different protocol. */
1930 |           RETURN((p - data) + 1);
1931 |         }
1932 |         break;
1933 | 
1934 |       case s_chunk_size_start:
1935 |       {
1936 |         assert(parser->nread == 1);
1937 |         assert(parser->flags & F_CHUNKED);
1938 | 
1939 |         unhex_val = unhex[(unsigned char)ch];
1940 |         if (UNLIKELY(unhex_val == -1)) {
1941 |           SET_ERRNO(HPE_INVALID_CHUNK_SIZE);
1942 |           goto error;
1943 |         }
1944 | 
1945 |         parser->content_length = unhex_val;
1946 |         UPDATE_STATE(s_chunk_size);
1947 |         break;
1948 |       }
1949 | 
1950 |       case s_chunk_size:
1951 |       {
1952 |         uint64_t t;
1953 | 
1954 |         assert(parser->flags & F_CHUNKED);
1955 | 
1956 |         if (ch == CR) {
1957 |           UPDATE_STATE(s_chunk_size_almost_done);
1958 |           break;
1959 |         }
1960 | 
1961 |         unhex_val = unhex[(unsigned char)ch];
1962 | 
1963 |         if (unhex_val == -1) {
1964 |           if (ch == ';' || ch == ' ') {
1965 |             UPDATE_STATE(s_chunk_parameters);
1966 |             break;
1967 |           }
1968 | 
1969 |           SET_ERRNO(HPE_INVALID_CHUNK_SIZE);
1970 |           goto error;
1971 |         }
1972 | 
1973 |         t = parser->content_length;
1974 |         t *= 16;
1975 |         t += unhex_val;
1976 | 
1977 |         /* Overflow? Test against a conservative limit for simplicity. */
1978 |         if (UNLIKELY((ULLONG_MAX - 16) / 16 < parser->content_length)) {
1979 |           SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1980 |           goto error;
1981 |         }
1982 | 
1983 |         parser->content_length = t;
1984 |         break;
1985 |       }
1986 | 
1987 |       case s_chunk_parameters:
1988 |       {
1989 |         assert(parser->flags & F_CHUNKED);
1990 |         /* just ignore this shit. TODO check for overflow */
1991 |         if (ch == CR) {
1992 |           UPDATE_STATE(s_chunk_size_almost_done);
1993 |           break;
1994 |         }
1995 |         break;
1996 |       }
1997 | 
1998 |       case s_chunk_size_almost_done:
1999 |       {
2000 |         assert(parser->flags & F_CHUNKED);
2001 |         STRICT_CHECK(ch != LF);
2002 | 
2003 |         parser->nread = 0;
2004 | 
2005 |         if (parser->content_length == 0) {
2006 |           parser->flags |= F_TRAILING;
2007 |           UPDATE_STATE(s_header_field_start);
2008 |         } else {
2009 |           UPDATE_STATE(s_chunk_data);
2010 |         }
2011 |         CALLBACK_NOTIFY(chunk_header);
2012 |         break;
2013 |       }
2014 | 
2015 |       case s_chunk_data:
2016 |       {
2017 |         uint64_t to_read = MIN(parser->content_length,
2018 |                                (uint64_t) ((data + len) - p));
2019 | 
2020 |         assert(parser->flags & F_CHUNKED);
2021 |         assert(parser->content_length != 0
2022 |             && parser->content_length != ULLONG_MAX);
2023 | 
2024 |         /* See the explanation in s_body_identity for why the content
2025 |          * length and data pointers are managed this way.
2026 |          */
2027 |         MARK(body);
2028 |         parser->content_length -= to_read;
2029 |         p += to_read - 1;
2030 | 
2031 |         if (parser->content_length == 0) {
2032 |           UPDATE_STATE(s_chunk_data_almost_done);
2033 |         }
2034 | 
2035 |         break;
2036 |       }
2037 | 
2038 |       case s_chunk_data_almost_done:
2039 |         assert(parser->flags & F_CHUNKED);
2040 |         assert(parser->content_length == 0);
2041 |         STRICT_CHECK(ch != CR);
2042 |         UPDATE_STATE(s_chunk_data_done);
2043 |         CALLBACK_DATA(body);
2044 |         break;
2045 | 
2046 |       case s_chunk_data_done:
2047 |         assert(parser->flags & F_CHUNKED);
2048 |         STRICT_CHECK(ch != LF);
2049 |         parser->nread = 0;
2050 |         UPDATE_STATE(s_chunk_size_start);
2051 |         CALLBACK_NOTIFY(chunk_complete);
2052 |         break;
2053 | 
2054 |       default:
2055 |         assert(0 && "unhandled state");
2056 |         SET_ERRNO(HPE_INVALID_INTERNAL_STATE);
2057 |         goto error;
2058 |     }
2059 |   }
2060 | 
2061 |   /* Run callbacks for any marks that we have leftover after we ran our of
2062 |    * bytes. There should be at most one of these set, so it's OK to invoke
2063 |    * them in series (unset marks will not result in callbacks).
2064 |    *
2065 |    * We use the NOADVANCE() variety of callbacks here because 'p' has already
2066 |    * overflowed 'data' and this allows us to correct for the off-by-one that
2067 |    * we'd otherwise have (since CALLBACK_DATA() is meant to be run with a 'p'
2068 |    * value that's in-bounds).
2069 |    */
2070 | 
2071 |   assert(((header_field_mark ? 1 : 0) +
2072 |           (header_value_mark ? 1 : 0) +
2073 |           (url_mark ? 1 : 0)  +
2074 |           (body_mark ? 1 : 0) +
2075 |           (status_mark ? 1 : 0)) <= 1);
2076 | 
2077 |   CALLBACK_DATA_NOADVANCE(header_field);
2078 |   CALLBACK_DATA_NOADVANCE(header_value);
2079 |   CALLBACK_DATA_NOADVANCE(url);
2080 |   CALLBACK_DATA_NOADVANCE(body);
2081 |   CALLBACK_DATA_NOADVANCE(status);
2082 | 
2083 |   RETURN(len);
2084 | 
2085 | error:
2086 |   if (HTTP_PARSER_ERRNO(parser) == HPE_OK) {
2087 |     SET_ERRNO(HPE_UNKNOWN);
2088 |   }
2089 | 
2090 |   RETURN(p - data);
2091 | }
2092 | 
2093 | 
2094 | /* Does the parser need to see an EOF to find the end of the message? */
2095 | int
2096 | http_message_needs_eof (const http_parser *parser)
2097 | {
2098 |   if (parser->type == HTTP_REQUEST) {
2099 |     return 0;
2100 |   }
2101 | 
2102 |   /* See RFC 2616 section 4.4 */
2103 |   if (parser->status_code / 100 == 1 || /* 1xx e.g. Continue */
2104 |       parser->status_code == 204 ||     /* No Content */
2105 |       parser->status_code == 304 ||     /* Not Modified */
2106 |       parser->flags & F_SKIPBODY) {     /* response to a HEAD request */
2107 |     return 0;
2108 |   }
2109 | 
2110 |   if ((parser->flags & F_CHUNKED) || parser->content_length != ULLONG_MAX) {
2111 |     return 0;
2112 |   }
2113 | 
2114 |   return 1;
2115 | }
2116 | 
2117 | 
2118 | int
2119 | http_should_keep_alive (const http_parser *parser)
2120 | {
2121 |   if (parser->http_major > 0 && parser->http_minor > 0) {
2122 |     /* HTTP/1.1 */
2123 |     if (parser->flags & F_CONNECTION_CLOSE) {
2124 |       return 0;
2125 |     }
2126 |   } else {
2127 |     /* HTTP/1.0 or earlier */
2128 |     if (!(parser->flags & F_CONNECTION_KEEP_ALIVE)) {
2129 |       return 0;
2130 |     }
2131 |   }
2132 | 
2133 |   return !http_message_needs_eof(parser);
2134 | }
2135 | 
2136 | 
2137 | const char *
2138 | http_method_str (enum http_method m)
2139 | {
2140 |   return ELEM_AT(method_strings, m, "<unknown>");
2141 | }
2142 | 
2143 | 
2144 | void
2145 | http_parser_init (http_parser *parser, enum http_parser_type t)
2146 | {
2147 |   void *data = parser->data; /* preserve application data */
2148 |   memset(parser, 0, sizeof(*parser));
2149 |   parser->data = data;
2150 |   parser->type = t;
2151 |   parser->state = (t == HTTP_REQUEST ? s_start_req : (t == HTTP_RESPONSE ? s_start_res : s_start_req_or_res));
2152 |   parser->http_errno = HPE_OK;
2153 | }
2154 | 
2155 | void
2156 | http_parser_settings_init(http_parser_settings *settings)
2157 | {
2158 |   memset(settings, 0, sizeof(*settings));
2159 | }
2160 | 
2161 | const char *
2162 | http_errno_name(enum http_errno err) {
2163 |   assert(((size_t) err) < ARRAY_SIZE(http_strerror_tab));
2164 |   return http_strerror_tab[err].name;
2165 | }
2166 | 
2167 | const char *
2168 | http_errno_description(enum http_errno err) {
2169 |   assert(((size_t) err) < ARRAY_SIZE(http_strerror_tab));
2170 |   return http_strerror_tab[err].description;
2171 | }
2172 | 
2173 | static enum http_host_state
2174 | http_parse_host_char(enum http_host_state s, const char ch) {
2175 |   switch(s) {
2176 |     case s_http_userinfo:
2177 |     case s_http_userinfo_start:
2178 |       if (ch == '@') {
2179 |         return s_http_host_start;
2180 |       }
2181 | 
2182 |       if (IS_USERINFO_CHAR(ch)) {
2183 |         return s_http_userinfo;
2184 |       }
2185 |       break;
2186 | 
2187 |     case s_http_host_start:
2188 |       if (ch == '[') {
2189 |         return s_http_host_v6_start;
2190 |       }
2191 | 
2192 |       if (IS_HOST_CHAR(ch)) {
2193 |         return s_http_host;
2194 |       }
2195 | 
2196 |       break;
2197 | 
2198 |     case s_http_host:
2199 |       if (IS_HOST_CHAR(ch)) {
2200 |         return s_http_host;
2201 |       }
2202 | 
2203 |     /* FALLTHROUGH */
2204 |     case s_http_host_v6_end:
2205 |       if (ch == ':') {
2206 |         return s_http_host_port_start;
2207 |       }
2208 | 
2209 |       break;
2210 | 
2211 |     case s_http_host_v6:
2212 |       if (ch == ']') {
2213 |         return s_http_host_v6_end;
2214 |       }
2215 | 
2216 |     /* FALLTHROUGH */
2217 |     case s_http_host_v6_start:
2218 |       if (IS_HEX(ch) || ch == ':' || ch == '.') {
2219 |         return s_http_host_v6;
2220 |       }
2221 | 
2222 |       if (s == s_http_host_v6 && ch == '%') {
2223 |         return s_http_host_v6_zone_start;
2224 |       }
2225 |       break;
2226 | 
2227 |     case s_http_host_v6_zone:
2228 |       if (ch == ']') {
2229 |         return s_http_host_v6_end;
2230 |       }
2231 | 
2232 |     /* FALLTHROUGH */
2233 |     case s_http_host_v6_zone_start:
2234 |       /* RFC 6874 Zone ID consists of 1*( unreserved / pct-encoded) */
2235 |       if (IS_ALPHANUM(ch) || ch == '%' || ch == '.' || ch == '-' || ch == '_' ||
2236 |           ch == '~') {
2237 |         return s_http_host_v6_zone;
2238 |       }
2239 |       break;
2240 | 
2241 |     case s_http_host_port:
2242 |     case s_http_host_port_start:
2243 |       if (IS_NUM(ch)) {
2244 |         return s_http_host_port;
2245 |       }
2246 | 
2247 |       break;
2248 | 
2249 |     default:
2250 |       break;
2251 |   }
2252 |   return s_http_host_dead;
2253 | }
2254 | 
2255 | static int
2256 | http_parse_host(const char * buf, struct http_parser_url *u, int found_at) {
2257 |   enum http_host_state s;
2258 | 
2259 |   const char *p;
2260 |   size_t buflen = u->field_data[UF_HOST].off + u->field_data[UF_HOST].len;
2261 | 
2262 |   assert(u->field_set & (1 << UF_HOST));
2263 | 
2264 |   u->field_data[UF_HOST].len = 0;
2265 | 
2266 |   s = found_at ? s_http_userinfo_start : s_http_host_start;
2267 | 
2268 |   for (p = buf + u->field_data[UF_HOST].off; p < buf + buflen; p++) {
2269 |     enum http_host_state new_s = http_parse_host_char(s, *p);
2270 | 
2271 |     if (new_s == s_http_host_dead) {
2272 |       return 1;
2273 |     }
2274 | 
2275 |     switch(new_s) {
2276 |       case s_http_host:
2277 |         if (s != s_http_host) {
2278 |           u->field_data[UF_HOST].off = p - buf;
2279 |         }
2280 |         u->field_data[UF_HOST].len++;
2281 |         break;
2282 | 
2283 |       case s_http_host_v6:
2284 |         if (s != s_http_host_v6) {
2285 |           u->field_data[UF_HOST].off = p - buf;
2286 |         }
2287 |         u->field_data[UF_HOST].len++;
2288 |         break;
2289 | 
2290 |       case s_http_host_v6_zone_start:
2291 |       case s_http_host_v6_zone:
2292 |         u->field_data[UF_HOST].len++;
2293 |         break;
2294 | 
2295 |       case s_http_host_port:
2296 |         if (s != s_http_host_port) {
2297 |           u->field_data[UF_PORT].off = p - buf;
2298 |           u->field_data[UF_PORT].len = 0;
2299 |           u->field_set |= (1 << UF_PORT);
2300 |         }
2301 |         u->field_data[UF_PORT].len++;
2302 |         break;
2303 | 
2304 |       case s_http_userinfo:
2305 |         if (s != s_http_userinfo) {
2306 |           u->field_data[UF_USERINFO].off = p - buf ;
2307 |           u->field_data[UF_USERINFO].len = 0;
2308 |           u->field_set |= (1 << UF_USERINFO);
2309 |         }
2310 |         u->field_data[UF_USERINFO].len++;
2311 |         break;
2312 | 
2313 |       default:
2314 |         break;
2315 |     }
2316 |     s = new_s;
2317 |   }
2318 | 
2319 |   /* Make sure we don't end somewhere unexpected */
2320 |   switch (s) {
2321 |     case s_http_host_start:
2322 |     case s_http_host_v6_start:
2323 |     case s_http_host_v6:
2324 |     case s_http_host_v6_zone_start:
2325 |     case s_http_host_v6_zone:
2326 |     case s_http_host_port_start:
2327 |     case s_http_userinfo:
2328 |     case s_http_userinfo_start:
2329 |       return 1;
2330 |     default:
2331 |       break;
2332 |   }
2333 | 
2334 |   return 0;
2335 | }
2336 | 
2337 | void
2338 | http_parser_url_init(struct http_parser_url *u) {
2339 |   memset(u, 0, sizeof(*u));
2340 | }
2341 | 
2342 | int
2343 | http_parser_parse_url(const char *buf, size_t buflen, int is_connect,
2344 |                       struct http_parser_url *u)
2345 | {
2346 |   enum state s;
2347 |   const char *p;
2348 |   enum http_parser_url_fields uf, old_uf;
2349 |   int found_at = 0;
2350 | 
2351 |   u->port = u->field_set = 0;
2352 |   s = is_connect ? s_req_server_start : s_req_spaces_before_url;
2353 |   old_uf = UF_MAX;
2354 | 
2355 |   for (p = buf; p < buf + buflen; p++) {
2356 |     s = parse_url_char(s, *p);
2357 | 
2358 |     /* Figure out the next field that we're operating on */
2359 |     switch (s) {
2360 |       case s_dead:
2361 |         return 1;
2362 | 
2363 |       /* Skip delimeters */
2364 |       case s_req_schema_slash:
2365 |       case s_req_schema_slash_slash:
2366 |       case s_req_server_start:
2367 |       case s_req_query_string_start:
2368 |       case s_req_fragment_start:
2369 |         continue;
2370 | 
2371 |       case s_req_schema:
2372 |         uf = UF_SCHEMA;
2373 |         break;
2374 | 
2375 |       case s_req_server_with_at:
2376 |         found_at = 1;
2377 | 
2378 |       /* FALLTROUGH */
2379 |       case s_req_server:
2380 |         uf = UF_HOST;
2381 |         break;
2382 | 
2383 |       case s_req_path:
2384 |         uf = UF_PATH;
2385 |         break;
2386 | 
2387 |       case s_req_query_string:
2388 |         uf = UF_QUERY;
2389 |         break;
2390 | 
2391 |       case s_req_fragment:
2392 |         uf = UF_FRAGMENT;
2393 |         break;
2394 | 
2395 |       default:
2396 |         assert(!"Unexpected state");
2397 |         return 1;
2398 |     }
2399 | 
2400 |     /* Nothing's changed; soldier on */
2401 |     if (uf == old_uf) {
2402 |       u->field_data[uf].len++;
2403 |       continue;
2404 |     }
2405 | 
2406 |     u->field_data[uf].off = p - buf;
2407 |     u->field_data[uf].len = 1;
2408 | 
2409 |     u->field_set |= (1 << uf);
2410 |     old_uf = uf;
2411 |   }
2412 | 
2413 |   /* host must be present if there is a schema */
2414 |   /* parsing http:///toto will fail */
2415 |   if ((u->field_set & (1 << UF_SCHEMA)) &&
2416 |       (u->field_set & (1 << UF_HOST)) == 0) {
2417 |     return 1;
2418 |   }
2419 | 
2420 |   if (u->field_set & (1 << UF_HOST)) {
2421 |     if (http_parse_host(buf, u, found_at) != 0) {
2422 |       return 1;
2423 |     }
2424 |   }
2425 | 
2426 |   /* CONNECT requests can only contain "hostname:port" */
2427 |   if (is_connect && u->field_set != ((1 << UF_HOST)|(1 << UF_PORT))) {
2428 |     return 1;
2429 |   }
2430 | 
2431 |   if (u->field_set & (1 << UF_PORT)) {
2432 |     /* Don't bother with endp; we've already validated the string */
2433 |     unsigned long v = strtoul(buf + u->field_data[UF_PORT].off, NULL, 10);
2434 | 
2435 |     /* Ports have a max value of 2^16 */
2436 |     if (v > 0xffff) {
2437 |       return 1;
2438 |     }
2439 | 
2440 |     u->port = (uint16_t) v;
2441 |   }
2442 | 
2443 |   return 0;
2444 | }
2445 | 
2446 | void
2447 | http_parser_pause(http_parser *parser, int paused) {
2448 |   /* Users should only be pausing/unpausing a parser that is not in an error
2449 |    * state. In non-debug builds, there's not much that we can do about this
2450 |    * other than ignore it.
2451 |    */
2452 |   if (HTTP_PARSER_ERRNO(parser) == HPE_OK ||
2453 |       HTTP_PARSER_ERRNO(parser) == HPE_PAUSED) {
2454 |     SET_ERRNO((paused) ? HPE_PAUSED : HPE_OK);
2455 |   } else {
2456 |     assert(0 && "Attempting to pause parser in error state");
2457 |   }
2458 | }
2459 | 
2460 | int
2461 | http_body_is_final(const struct http_parser *parser) {
2462 |     return parser->state == s_message_done;
2463 | }
2464 | 
2465 | unsigned long
2466 | http_parser_version(void) {
2467 |   return HTTP_PARSER_VERSION_MAJOR * 0x10000 |
2468 |          HTTP_PARSER_VERSION_MINOR * 0x00100 |
2469 |          HTTP_PARSER_VERSION_PATCH * 0x00001;
2470 | }
2471 | 


--------------------------------------------------------------------------------