├── run.sh ├── .travis.yml ├── .gitignore ├── .mailmap ├── LICENSE-MIT ├── contrib ├── url_parser.c └── parsertrace.c ├── AUTHORS ├── http_parser.gyp ├── bench.c ├── Makefile ├── demo.c ├── README.md ├── http_parser.h └── http_parser.c /run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # make 3 | # make parsertrace 4 | # make url_parser 5 | # make http_parser.o 6 | # gcc -Wall -Wextra -O3 http_parser.o demo.c -o demo -g 7 | # ./demo 8 | 9 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: c 2 | 3 | compiler: 4 | - clang 5 | - gcc 6 | 7 | script: 8 | - "make" 9 | 10 | notifications: 11 | email: false 12 | irc: 13 | - "irc.freenode.net#node-ci" 14 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /out/ 2 | core 3 | tags 4 | *.o 5 | test 6 | test_g 7 | test_fast 8 | bench 9 | url_parser 10 | parsertrace 11 | parsertrace_g 12 | *.mk 13 | *.Makefile 14 | *.so.* 15 | *.exe.* 16 | *.exe 17 | *.a 18 | 19 | 20 | # Visual Studio uglies 21 | *.suo 22 | *.sln 23 | *.vcxproj 24 | *.vcxproj.filters 25 | *.vcxproj.user 26 | *.opensdf 27 | *.ncrunchsolution* 28 | *.sdf 29 | *.vsp 30 | *.psess 31 | -------------------------------------------------------------------------------- /.mailmap: -------------------------------------------------------------------------------- 1 | # update AUTHORS with: 2 | # git log --all --reverse --format='%aN <%aE>' | perl -ne 'BEGIN{print "# Authors ordered by first contribution.\n"} print unless $h{$_}; $h{$_} = 1' > AUTHORS 3 | Ryan Dahl 4 | Salman Haq 5 | Simon Zimmermann 6 | Thomas LE ROUX LE ROUX Thomas 7 | Thomas LE ROUX Thomas LE ROUX 8 | Fedor Indutny 9 | -------------------------------------------------------------------------------- /LICENSE-MIT: -------------------------------------------------------------------------------- 1 | http_parser.c is based on src/http/ngx_http_parse.c from NGINX copyright 2 | Igor Sysoev. 3 | 4 | Additional changes are licensed under the same terms as NGINX and 5 | copyright Joyent, Inc. and other Node contributors. All rights reserved. 6 | 7 | Permission is hereby granted, free of charge, to any person obtaining a copy 8 | of this software and associated documentation files (the "Software"), to 9 | deal in the Software without restriction, including without limitation the 10 | rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 11 | sell copies of the Software, and to permit persons to whom the Software is 12 | furnished to do so, subject to the following conditions: 13 | 14 | The above copyright notice and this permission notice shall be included in 15 | all copies or substantial portions of the Software. 16 | 17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 22 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 23 | IN THE SOFTWARE. 24 | -------------------------------------------------------------------------------- /contrib/url_parser.c: -------------------------------------------------------------------------------- 1 | #include "http_parser.h" 2 | #include 3 | #include 4 | 5 | void 6 | dump_url (const char *url, const struct http_parser_url *u) 7 | { 8 | unsigned int i; 9 | 10 | printf("\tfield_set: 0x%x, port: %u\n", u->field_set, u->port); 11 | for (i = 0; i < UF_MAX; i++) { 12 | if ((u->field_set & (1 << i)) == 0) { 13 | printf("\tfield_data[%u]: unset\n", i); 14 | continue; 15 | } 16 | 17 | printf("\tfield_data[%u]: off: %u, len: %u, part: %.*s\n", 18 | i, 19 | u->field_data[i].off, 20 | u->field_data[i].len, 21 | u->field_data[i].len, 22 | url + u->field_data[i].off); 23 | } 24 | } 25 | 26 | int main(int argc, char ** argv) { 27 | struct http_parser_url u; 28 | int len, connect, result; 29 | 30 | if (argc != 3) { 31 | printf("Syntax : %s connect|get url\n", argv[0]); 32 | return 1; 33 | } 34 | len = strlen(argv[2]); 35 | connect = strcmp("connect", argv[1]) == 0 ? 1 : 0; 36 | printf("Parsing %s, connect %d\n", argv[2], connect); 37 | 38 | http_parser_url_init(&u); 39 | result = http_parser_parse_url(argv[2], len, connect, &u); 40 | if (result != 0) { 41 | printf("Parse error : %d\n", result); 42 | return result; 43 | } 44 | printf("Parse ok, result : \n"); 45 | dump_url(argv[2], &u); 46 | return 0; 47 | } 48 | -------------------------------------------------------------------------------- /AUTHORS: -------------------------------------------------------------------------------- 1 | # Authors ordered by first contribution. 2 | Ryan Dahl 3 | Jeremy Hinegardner 4 | Sergey Shepelev 5 | Joe Damato 6 | tomika 7 | Phoenix Sol 8 | Cliff Frey 9 | Ewen Cheslack-Postava 10 | Santiago Gala 11 | Tim Becker 12 | Jeff Terrace 13 | Ben Noordhuis 14 | Nathan Rajlich 15 | Mark Nottingham 16 | Aman Gupta 17 | Tim Becker 18 | Sean Cunningham 19 | Peter Griess 20 | Salman Haq 21 | Cliff Frey 22 | Jon Kolb 23 | Fouad Mardini 24 | Paul Querna 25 | Felix Geisendörfer 26 | koichik 27 | Andre Caron 28 | Ivo Raisr 29 | James McLaughlin 30 | David Gwynne 31 | Thomas LE ROUX 32 | Randy Rizun 33 | Andre Louis Caron 34 | Simon Zimmermann 35 | Erik Dubbelboer 36 | Martell Malone 37 | Bertrand Paquet 38 | BogDan Vatra 39 | Peter Faiman 40 | Corey Richardson 41 | Tóth Tamás 42 | Cam Swords 43 | Chris Dickinson 44 | Uli Köhler 45 | Charlie Somerville 46 | Patrik Stutz 47 | Fedor Indutny 48 | runner 49 | Alexis Campailla 50 | David Wragg 51 | Vinnie Falco 52 | Alex Butum 53 | Rex Feng 54 | Alex Kocharin 55 | Mark Koopman 56 | Helge Heß 57 | Alexis La Goutte 58 | George Miroshnykov 59 | Maciej Małecki 60 | Marc O'Morain 61 | Jeff Pinner 62 | Timothy J Fontaine 63 | Akagi201 64 | Romain Giraud 65 | Jay Satiro 66 | Arne Steen 67 | Kjell Schubert 68 | Olivier Mengué 69 | -------------------------------------------------------------------------------- /http_parser.gyp: -------------------------------------------------------------------------------- 1 | # This file is used with the GYP meta build system. 2 | # http://code.google.com/p/gyp/ 3 | # To build try this: 4 | # svn co http://gyp.googlecode.com/svn/trunk gyp 5 | # ./gyp/gyp -f make --depth=`pwd` http_parser.gyp 6 | # ./out/Debug/test 7 | { 8 | 'target_defaults': { 9 | 'default_configuration': 'Debug', 10 | 'configurations': { 11 | # TODO: hoist these out and put them somewhere common, because 12 | # RuntimeLibrary MUST MATCH across the entire project 13 | 'Debug': { 14 | 'defines': [ 'DEBUG', '_DEBUG' ], 15 | 'cflags': [ '-Wall', '-Wextra', '-O0', '-g', '-ftrapv' ], 16 | 'msvs_settings': { 17 | 'VCCLCompilerTool': { 18 | 'RuntimeLibrary': 1, # static debug 19 | }, 20 | }, 21 | }, 22 | 'Release': { 23 | 'defines': [ 'NDEBUG' ], 24 | 'cflags': [ '-Wall', '-Wextra', '-O3' ], 25 | 'msvs_settings': { 26 | 'VCCLCompilerTool': { 27 | 'RuntimeLibrary': 0, # static release 28 | }, 29 | }, 30 | } 31 | }, 32 | 'msvs_settings': { 33 | 'VCCLCompilerTool': { 34 | }, 35 | 'VCLibrarianTool': { 36 | }, 37 | 'VCLinkerTool': { 38 | 'GenerateDebugInformation': 'true', 39 | }, 40 | }, 41 | 'conditions': [ 42 | ['OS == "win"', { 43 | 'defines': [ 44 | 'WIN32' 45 | ], 46 | }] 47 | ], 48 | }, 49 | 50 | 'targets': [ 51 | { 52 | 'target_name': 'http_parser', 53 | 'type': 'static_library', 54 | 'include_dirs': [ '.' ], 55 | 'direct_dependent_settings': { 56 | 'defines': [ 'HTTP_PARSER_STRICT=0' ], 57 | 'include_dirs': [ '.' ], 58 | }, 59 | 'defines': [ 'HTTP_PARSER_STRICT=0' ], 60 | 'sources': [ './http_parser.c', ], 61 | 'conditions': [ 62 | ['OS=="win"', { 63 | 'msvs_settings': { 64 | 'VCCLCompilerTool': { 65 | # Compile as C++. http_parser.c is actually C99, but C++ is 66 | # close enough in this case. 67 | 'CompileAs': 2, 68 | }, 69 | }, 70 | }] 71 | ], 72 | }, 73 | 74 | { 75 | 'target_name': 'http_parser_strict', 76 | 'type': 'static_library', 77 | 'include_dirs': [ '.' ], 78 | 'direct_dependent_settings': { 79 | 'defines': [ 'HTTP_PARSER_STRICT=1' ], 80 | 'include_dirs': [ '.' ], 81 | }, 82 | 'defines': [ 'HTTP_PARSER_STRICT=1' ], 83 | 'sources': [ './http_parser.c', ], 84 | 'conditions': [ 85 | ['OS=="win"', { 86 | 'msvs_settings': { 87 | 'VCCLCompilerTool': { 88 | # Compile as C++. http_parser.c is actually C99, but C++ is 89 | # close enough in this case. 90 | 'CompileAs': 2, 91 | }, 92 | }, 93 | }] 94 | ], 95 | }, 96 | 97 | { 98 | 'target_name': 'test-nonstrict', 99 | 'type': 'executable', 100 | 'dependencies': [ 'http_parser' ], 101 | 'sources': [ 'test.c' ] 102 | }, 103 | 104 | { 105 | 'target_name': 'test-strict', 106 | 'type': 'executable', 107 | 'dependencies': [ 'http_parser_strict' ], 108 | 'sources': [ 'test.c' ] 109 | } 110 | ] 111 | } 112 | -------------------------------------------------------------------------------- /bench.c: -------------------------------------------------------------------------------- 1 | /* Copyright Fedor Indutny. All rights reserved. 2 | * 3 | * Permission is hereby granted, free of charge, to any person obtaining a copy 4 | * of this software and associated documentation files (the "Software"), to 5 | * deal in the Software without restriction, including without limitation the 6 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 7 | * sell copies of the Software, and to permit persons to whom the Software is 8 | * furnished to do so, subject to the following conditions: 9 | * 10 | * The above copyright notice and this permission notice shall be included in 11 | * all copies or substantial portions of the Software. 12 | * 13 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 18 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 19 | * IN THE SOFTWARE. 20 | */ 21 | #include "http_parser.h" 22 | #include 23 | #include 24 | #include 25 | #include 26 | 27 | static const char data[] = 28 | "POST /joyent/http-parser HTTP/1.1\r\n" 29 | "Host: github.com\r\n" 30 | "DNT: 1\r\n" 31 | "Accept-Encoding: gzip, deflate, sdch\r\n" 32 | "Accept-Language: ru-RU,ru;q=0.8,en-US;q=0.6,en;q=0.4\r\n" 33 | "User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) " 34 | "AppleWebKit/537.36 (KHTML, like Gecko) " 35 | "Chrome/39.0.2171.65 Safari/537.36\r\n" 36 | "Accept: text/html,application/xhtml+xml,application/xml;q=0.9," 37 | "image/webp,*/*;q=0.8\r\n" 38 | "Referer: https://github.com/joyent/http-parser\r\n" 39 | "Connection: keep-alive\r\n" 40 | "Transfer-Encoding: chunked\r\n" 41 | "Cache-Control: max-age=0\r\n\r\nb\r\nhello world\r\n0\r\n\r\n"; 42 | static const size_t data_len = sizeof(data) - 1; 43 | 44 | static int on_info(http_parser* p) { 45 | return 0; 46 | } 47 | 48 | 49 | static int on_data(http_parser* p, const char *at, size_t length) { 50 | return 0; 51 | } 52 | 53 | static http_parser_settings settings = { 54 | .on_message_begin = on_info, 55 | .on_headers_complete = on_info, 56 | .on_message_complete = on_info, 57 | .on_header_field = on_data, 58 | .on_header_value = on_data, 59 | .on_url = on_data, 60 | .on_status = on_data, 61 | .on_body = on_data 62 | }; 63 | 64 | int bench(int iter_count, int silent) { 65 | struct http_parser parser; 66 | int i; 67 | int err; 68 | struct timeval start; 69 | struct timeval end; 70 | float rps; 71 | 72 | if (!silent) { 73 | err = gettimeofday(&start, NULL); 74 | assert(err == 0); 75 | } 76 | 77 | for (i = 0; i < iter_count; i++) { 78 | size_t parsed; 79 | http_parser_init(&parser, HTTP_REQUEST); 80 | 81 | parsed = http_parser_execute(&parser, &settings, data, data_len); 82 | assert(parsed == data_len); 83 | } 84 | 85 | if (!silent) { 86 | err = gettimeofday(&end, NULL); 87 | assert(err == 0); 88 | 89 | fprintf(stdout, "Benchmark result:\n"); 90 | 91 | rps = (float) (end.tv_sec - start.tv_sec) + 92 | (end.tv_usec - start.tv_usec) * 1e-6f; 93 | fprintf(stdout, "Took %f seconds to run\n", rps); 94 | 95 | rps = (float) iter_count / rps; 96 | fprintf(stdout, "%f req/sec\n", rps); 97 | fflush(stdout); 98 | } 99 | 100 | return 0; 101 | } 102 | 103 | int main(int argc, char** argv) { 104 | if (argc == 2 && strcmp(argv[1], "infinite") == 0) { 105 | for (;;) 106 | bench(5000000, 1); 107 | return 0; 108 | } else { 109 | return bench(5000000, 0); 110 | } 111 | } 112 | -------------------------------------------------------------------------------- /contrib/parsertrace.c: -------------------------------------------------------------------------------- 1 | /* Based on src/http/ngx_http_parse.c from NGINX copyright Igor Sysoev 2 | * 3 | * Additional changes are licensed under the same terms as NGINX and 4 | * copyright Joyent, Inc. and other Node contributors. All rights reserved. 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a copy 7 | * of this software and associated documentation files (the "Software"), to 8 | * deal in the Software without restriction, including without limitation the 9 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 10 | * sell copies of the Software, and to permit persons to whom the Software is 11 | * furnished to do so, subject to the following conditions: 12 | * 13 | * The above copyright notice and this permission notice shall be included in 14 | * all copies or substantial portions of the Software. 15 | * 16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 21 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 22 | * IN THE SOFTWARE. 23 | */ 24 | 25 | /* Dump what the parser finds to stdout as it happen */ 26 | 27 | #include "http_parser.h" 28 | #include 29 | #include 30 | #include 31 | 32 | int on_message_begin(http_parser* _) { 33 | (void)_; 34 | printf("\n***MESSAGE BEGIN***\n\n"); 35 | return 0; 36 | } 37 | 38 | int on_headers_complete(http_parser* _) { 39 | (void)_; 40 | printf("\n***HEADERS COMPLETE***\n\n"); 41 | return 0; 42 | } 43 | 44 | int on_message_complete(http_parser* _) { 45 | (void)_; 46 | printf("\n***MESSAGE COMPLETE***\n\n"); 47 | return 0; 48 | } 49 | 50 | int on_url(http_parser* _, const char* at, size_t length) { 51 | (void)_; 52 | printf("Url: %.*s\n", (int)length, at); 53 | return 0; 54 | } 55 | 56 | int on_header_field(http_parser* _, const char* at, size_t length) { 57 | (void)_; 58 | printf("Header field: %.*s\n", (int)length, at); 59 | return 0; 60 | } 61 | 62 | int on_header_value(http_parser* _, const char* at, size_t length) { 63 | (void)_; 64 | printf("Header value: %.*s\n", (int)length, at); 65 | return 0; 66 | } 67 | 68 | int on_body(http_parser* _, const char* at, size_t length) { 69 | (void)_; 70 | printf("Body: %.*s\n", (int)length, at); 71 | return 0; 72 | } 73 | 74 | void usage(const char* name) { 75 | fprintf(stderr, 76 | "Usage: %s $type $filename\n" 77 | " type: -x, where x is one of {r,b,q}\n" 78 | " parses file as a Response, reQuest, or Both\n", 79 | name); 80 | exit(EXIT_FAILURE); 81 | } 82 | 83 | int main(int argc, char* argv[]) { 84 | enum http_parser_type file_type; 85 | 86 | if (argc != 3) { 87 | usage(argv[0]); 88 | } 89 | 90 | char* type = argv[1]; 91 | if (type[0] != '-') { 92 | usage(argv[0]); 93 | } 94 | 95 | switch (type[1]) { 96 | /* in the case of "-", type[1] will be NUL */ 97 | case 'r': 98 | file_type = HTTP_RESPONSE; 99 | break; 100 | case 'q': 101 | file_type = HTTP_REQUEST; 102 | break; 103 | case 'b': 104 | file_type = HTTP_BOTH; 105 | break; 106 | default: 107 | usage(argv[0]); 108 | } 109 | 110 | char* filename = argv[2]; 111 | FILE* file = fopen(filename, "r"); 112 | if (file == NULL) { 113 | perror("fopen"); 114 | goto fail; 115 | } 116 | 117 | fseek(file, 0, SEEK_END); 118 | long file_length = ftell(file); 119 | if (file_length == -1) { 120 | perror("ftell"); 121 | goto fail; 122 | } 123 | fseek(file, 0, SEEK_SET); 124 | 125 | char* data = malloc(file_length); 126 | if (fread(data, 1, file_length, file) != (size_t)file_length) { 127 | fprintf(stderr, "couldn't read entire file\n"); 128 | free(data); 129 | goto fail; 130 | } 131 | 132 | http_parser_settings settings; 133 | memset(&settings, 0, sizeof(settings)); 134 | settings.on_message_begin = on_message_begin; 135 | settings.on_url = on_url; 136 | settings.on_header_field = on_header_field; 137 | settings.on_header_value = on_header_value; 138 | settings.on_headers_complete = on_headers_complete; 139 | settings.on_body = on_body; 140 | settings.on_message_complete = on_message_complete; 141 | 142 | http_parser parser; 143 | http_parser_init(&parser, file_type); 144 | size_t nparsed = http_parser_execute(&parser, &settings, data, file_length); 145 | free(data); 146 | 147 | if (nparsed != (size_t)file_length) { 148 | fprintf(stderr, 149 | "Error: %s (%s)\n", 150 | http_errno_description(HTTP_PARSER_ERRNO(&parser)), 151 | http_errno_name(HTTP_PARSER_ERRNO(&parser))); 152 | goto fail; 153 | } 154 | 155 | return EXIT_SUCCESS; 156 | 157 | fail: 158 | fclose(file); 159 | return EXIT_FAILURE; 160 | } 161 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | # Copyright Joyent, Inc. and other Node contributors. All rights reserved. 2 | # 3 | # Permission is hereby granted, free of charge, to any person obtaining a copy 4 | # of this software and associated documentation files (the "Software"), to 5 | # deal in the Software without restriction, including without limitation the 6 | # rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 7 | # sell copies of the Software, and to permit persons to whom the Software is 8 | # furnished to do so, subject to the following conditions: 9 | # 10 | # The above copyright notice and this permission notice shall be included in 11 | # all copies or substantial portions of the Software. 12 | # 13 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 18 | # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 19 | # IN THE SOFTWARE. 20 | 21 | PLATFORM ?= $(shell sh -c 'uname -s | tr "[A-Z]" "[a-z]"') 22 | HELPER ?= 23 | BINEXT ?= 24 | ifeq (darwin,$(PLATFORM)) 25 | SONAME ?= libhttp_parser.2.7.1.dylib 26 | SOEXT ?= dylib 27 | else ifeq (wine,$(PLATFORM)) 28 | CC = winegcc 29 | BINEXT = .exe.so 30 | HELPER = wine 31 | else 32 | SONAME ?= libhttp_parser.so.2.7.1 33 | SOEXT ?= so 34 | endif 35 | 36 | CC?=gcc 37 | AR?=ar 38 | 39 | CPPFLAGS ?= 40 | LDFLAGS ?= 41 | 42 | CPPFLAGS += -I. 43 | CPPFLAGS_DEBUG = $(CPPFLAGS) -DHTTP_PARSER_STRICT=1 44 | CPPFLAGS_DEBUG += $(CPPFLAGS_DEBUG_EXTRA) 45 | CPPFLAGS_FAST = $(CPPFLAGS) -DHTTP_PARSER_STRICT=0 46 | CPPFLAGS_FAST += $(CPPFLAGS_FAST_EXTRA) 47 | CPPFLAGS_BENCH = $(CPPFLAGS_FAST) 48 | 49 | CFLAGS += -Wall -Wextra -Werror 50 | CFLAGS_DEBUG = $(CFLAGS) -O0 -g $(CFLAGS_DEBUG_EXTRA) 51 | CFLAGS_FAST = $(CFLAGS) -O3 $(CFLAGS_FAST_EXTRA) 52 | CFLAGS_BENCH = $(CFLAGS_FAST) -Wno-unused-parameter 53 | CFLAGS_LIB = $(CFLAGS_FAST) -fPIC 54 | 55 | LDFLAGS_LIB = $(LDFLAGS) -shared 56 | 57 | INSTALL ?= install 58 | PREFIX ?= $(DESTDIR)/usr 59 | LIBDIR = $(PREFIX)/lib 60 | INCLUDEDIR = $(PREFIX)/include 61 | 62 | ifneq (darwin,$(PLATFORM)) 63 | # TODO(bnoordhuis) The native SunOS linker expects -h rather than -soname... 64 | LDFLAGS_LIB += -Wl,-soname=$(SONAME) 65 | endif 66 | 67 | test: test_g test_fast demo 68 | $(HELPER) ./test_g$(BINEXT) 69 | $(HELPER) ./test_fast$(BINEXT) 70 | 71 | test_g: http_parser_g.o test_g.o 72 | $(CC) $(CFLAGS_DEBUG) $(LDFLAGS) http_parser_g.o test_g.o -o $@ 73 | 74 | demo: http_parser.o demo.o 75 | $(CC) -g http_parser.o demo.o -o $@ 76 | 77 | test_g.o: test.c http_parser.h Makefile 78 | $(CC) $(CPPFLAGS_DEBUG) $(CFLAGS_DEBUG) -c test.c -o $@ 79 | 80 | http_parser_g.o: http_parser.c http_parser.h Makefile 81 | $(CC) $(CPPFLAGS_DEBUG) $(CFLAGS_DEBUG) -c http_parser.c -o $@ 82 | 83 | test_fast: http_parser.o test.o http_parser.h 84 | $(CC) $(CFLAGS_FAST) $(LDFLAGS) http_parser.o test.o -o $@ 85 | 86 | test.o: test.c http_parser.h Makefile 87 | $(CC) $(CPPFLAGS_FAST) $(CFLAGS_FAST) -c test.c -o $@ 88 | 89 | demo.o: demo.c http_parser.h Makefile 90 | $(CC) -c -g demo.c -o $@ 91 | 92 | bench: http_parser.o bench.o 93 | $(CC) $(CFLAGS_BENCH) $(LDFLAGS) http_parser.o bench.o -o $@ 94 | 95 | bench.o: bench.c http_parser.h Makefile 96 | $(CC) $(CPPFLAGS_BENCH) $(CFLAGS_BENCH) -c bench.c -o $@ 97 | 98 | http_parser.o: http_parser.c http_parser.h Makefile 99 | $(CC) $(CPPFLAGS_FAST) $(CFLAGS_FAST) -c http_parser.c 100 | 101 | test-run-timed: test_fast 102 | while(true) do time $(HELPER) ./test_fast$(BINEXT) > /dev/null; done 103 | 104 | test-valgrind: test_g 105 | valgrind ./test_g 106 | 107 | libhttp_parser.o: http_parser.c http_parser.h Makefile 108 | $(CC) $(CPPFLAGS_FAST) $(CFLAGS_LIB) -c http_parser.c -o libhttp_parser.o 109 | 110 | library: libhttp_parser.o 111 | $(CC) $(LDFLAGS_LIB) -o $(SONAME) $< 112 | 113 | package: http_parser.o 114 | $(AR) rcs libhttp_parser.a http_parser.o 115 | 116 | url_parser: http_parser.o contrib/url_parser.c 117 | $(CC) $(CPPFLAGS_FAST) $(CFLAGS_FAST) $^ -o $@ 118 | 119 | url_parser_g: http_parser_g.o contrib/url_parser.c 120 | $(CC) $(CPPFLAGS_DEBUG) $(CFLAGS_DEBUG) $^ -o $@ 121 | 122 | parsertrace: http_parser.o contrib/parsertrace.c 123 | $(CC) $(CPPFLAGS_FAST) $(CFLAGS_FAST) $^ -o parsertrace$(BINEXT) 124 | 125 | parsertrace_g: http_parser_g.o contrib/parsertrace.c 126 | $(CC) $(CPPFLAGS_DEBUG) $(CFLAGS_DEBUG) $^ -o parsertrace_g$(BINEXT) 127 | 128 | tags: http_parser.c http_parser.h test.c 129 | ctags $^ 130 | 131 | install: library 132 | $(INSTALL) -D http_parser.h $(INCLUDEDIR)/http_parser.h 133 | $(INSTALL) -D $(SONAME) $(LIBDIR)/$(SONAME) 134 | ln -s $(LIBDIR)/$(SONAME) $(LIBDIR)/libhttp_parser.$(SOEXT) 135 | 136 | install-strip: library 137 | $(INSTALL) -D http_parser.h $(INCLUDEDIR)/http_parser.h 138 | $(INSTALL) -D -s $(SONAME) $(LIBDIR)/$(SONAME) 139 | ln -s $(LIBDIR)/$(SONAME) $(LIBDIR)/libhttp_parser.$(SOEXT) 140 | 141 | uninstall: 142 | rm $(INCLUDEDIR)/http_parser.h 143 | rm $(LIBDIR)/$(SONAME) 144 | rm $(LIBDIR)/libhttp_parser.so 145 | 146 | clean: 147 | rm -f *.o *.a tags test test_fast test_g \ 148 | http_parser.tar libhttp_parser.so.* \ 149 | url_parser url_parser_g parsertrace parsertrace_g \ 150 | *.exe *.exe.so 151 | 152 | contrib/url_parser.c: http_parser.h 153 | contrib/parsertrace.c: http_parser.h 154 | 155 | .PHONY: clean package test-run test-run-timed test-valgrind install install-strip uninstall 156 | -------------------------------------------------------------------------------- /demo.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | #include "http_parser.h" 13 | 14 | 15 | char content_type_flag = 0; 16 | char content_length_flag = 0; 17 | 18 | #define BUFSIZZ 1024*8 19 | 20 | #define IMAGE_TYPE_PNG "image/png" 21 | #define IMAGE_TYPE_PNG_EXT ".png" 22 | #define IMAGE_TYPE_JPEG "image/jpeg" 23 | #define IMAGE_TYPE_JPEG_EXT ".jpeg" 24 | #define IMAGE_TYPE_GIF "image/gif" 25 | #define IMAGE_TYPE_GIF_EXT ".gif" 26 | #define IMAGE_TYPE_BMP "image/bmp" 27 | #define IMAGE_TYPE_BMP_EXT ".bmp" 28 | 29 | #define IMAGE_TYPE_DEFAULT_EXT ".html" 30 | 31 | struct http_content{ 32 | char type[32]; 33 | unsigned int response_size; 34 | unsigned int content_size; 35 | char *content_start; 36 | char header_checked; 37 | char mem_realloced; 38 | }http_img ; 39 | 40 | static http_parser *parser; 41 | 42 | int on_message_begin(http_parser* _) 43 | { 44 | (void)_; 45 | printf("\n***MESSAGE BEGIN***\n\n"); 46 | return 0; 47 | } 48 | 49 | int on_headers_complete(http_parser* _) 50 | { 51 | (void)_; 52 | printf("\n***HEADERS COMPLETE***\n\n"); 53 | return 0; 54 | } 55 | 56 | int on_message_complete(http_parser* _) 57 | { 58 | (void)_; 59 | printf("\n***MESSAGE COMPLETE***\n\n"); 60 | return 0; 61 | } 62 | 63 | int on_url(http_parser* _, const char* at, size_t length) 64 | { 65 | (void)_; 66 | printf("Url: %.*s\n", (int)length, at); 67 | return 0; 68 | } 69 | int on_header_field(http_parser* _, const char* at, size_t length) 70 | { 71 | (void)_; 72 | printf("Header field: %.*s\n", (int)length, at); 73 | if(!memcmp("Content-Type", at, length)) 74 | { 75 | //printf("Found Content-Type\n"); 76 | content_type_flag = 1; 77 | } 78 | if(!memcmp("Content-Length", at, length)) 79 | { 80 | //printf("Found Content-Length\n"); 81 | content_length_flag = 1; 82 | } 83 | 84 | return 0; 85 | } 86 | 87 | int on_header_value(http_parser* _, const char* at, size_t length) 88 | { 89 | (void)_; 90 | printf("Header value: %.*s\n", (int)length, at); 91 | if(content_type_flag) 92 | { 93 | memcpy(http_img.type, at, length); 94 | //printf("http_img.type = %s\n", http_img.type); 95 | content_type_flag = 0; 96 | } 97 | 98 | if(content_length_flag) 99 | { 100 | char value[32]; 101 | memcpy(value, at, length); 102 | //printf("http_img.content_size = %s\n", value); 103 | http_img.content_size = atoi(value); 104 | content_length_flag = 0; 105 | } 106 | return 0; 107 | } 108 | 109 | int on_body(http_parser* _, const char* at, size_t length) 110 | { 111 | (void)_; 112 | unsigned int z; 113 | char *p = at; 114 | 115 | http_img.content_start=at; 116 | // if(!memcmp("image/png", http_img.type, strlen("image/png"))) 117 | // { 118 | // printf("Found PNG body!http_img.content_start=%p\n", http_img.content_start); 119 | // } 120 | return 0; 121 | } 122 | 123 | static http_parser_settings settings_null = 124 | { 125 | .on_message_begin = on_message_begin, 126 | .on_header_field = on_header_field, 127 | .on_header_value = on_header_value, 128 | .on_url = on_url, 129 | .on_status = 0, 130 | .on_body = on_body, 131 | .on_headers_complete = on_headers_complete, 132 | .on_message_complete = on_message_complete 133 | }; 134 | 135 | 136 | static double tminterval(struct timeval tmstart) 137 | { 138 | double ret = 0; 139 | struct timeval now; 140 | 141 | gettimeofday(&now, NULL); 142 | 143 | ret = ((now.tv_sec + now.tv_usec * 1e-6) 144 | - (tmstart.tv_sec + tmstart.tv_usec * 1e-6)); 145 | tmstart = now; 146 | 147 | return ret; 148 | } 149 | 150 | int main (void) 151 | { 152 | float start, end; 153 | size_t parsed; 154 | struct sockaddr_in address; 155 | int client_sock; 156 | int len,result; 157 | int n; 158 | char buffer[BUFSIZZ]; 159 | char *host="admin.omsg.cn"; 160 | short port = 80; 161 | struct hostent *he; 162 | struct in_addr **addr_list; 163 | int i; 164 | struct timeval tmstart; 165 | char *http_buf, *mbuf; 166 | unsigned int http_buf_len = 0; 167 | char check_http_header=0; 168 | char outfile[20] = "logo"; 169 | char *file_ext; 170 | FILE *fp; 171 | int mbuf_len = 0; 172 | char *p; 173 | 174 | parser = malloc(sizeof(http_parser)); 175 | memset(&http_img, 0, sizeof(http_img)); 176 | http_buf = malloc(BUFSIZZ); 177 | memset(http_buf,0, sizeof(http_buf)); 178 | mbuf = malloc(BUFSIZZ); 179 | memset(mbuf,0, sizeof(mbuf)); 180 | 181 | 182 | //Connect to server 183 | /*PNG*///char *buf = "GET http://admin.omsg.cn/uploadpic/2016121034000012.png HTTP/1.1\r\nHost: admin.omsg.cn\r\nAccept: */*\r\nConnection: Keep-Alive\r\n\r\n"; 184 | /*JPEG*/char *buf = "GET http://pic67.nipic.com/file/20150515/19533051_112209270000_2.jpg HTTP/1.1\r\nHost: pic67.nipic.com\r\nAccept: */*\r\nConnection: Keep-Alive\r\n\r\n"; 185 | 186 | if ((he = gethostbyname(host)) == NULL) { // get the host info 187 | printf("gethostbyname error\n"); 188 | return 1; 189 | } 190 | printf("Official name is: %s\n", he->h_name); 191 | printf(" IP addresses: "); 192 | addr_list = (struct in_addr **)he->h_addr_list; 193 | for(i = 0; addr_list[i] != NULL; i++) { 194 | printf("\t%s \n", inet_ntoa(addr_list[0])); 195 | } 196 | 197 | client_sock=socket(AF_INET,SOCK_STREAM,0); 198 | address.sin_addr.s_addr=inet_addr(inet_ntoa(*addr_list[0])); 199 | address.sin_family=AF_INET; 200 | address.sin_port=htons(port); 201 | len=sizeof(address); 202 | result=connect(client_sock,(struct sockaddr *)&address,len); 203 | if(result==-1){ 204 | printf("error!\n"); 205 | exit(-1); 206 | } 207 | 208 | //Parse request 209 | http_parser_init(parser, HTTP_REQUEST); 210 | parsed = http_parser_execute(parser, &settings_null, buf, strlen(buf)); 211 | 212 | //Send request 213 | n=write(client_sock,buf, strlen(buf)); 214 | if(n<0){ 215 | printf("error write\n"); 216 | } 217 | 218 | //Receive response 219 | do{ 220 | if((http_img.response_size>BUFSIZZ) && (!http_img.mem_realloced)) 221 | { 222 | //Need more memory, realloc it. 223 | http_buf = realloc(http_buf,http_img.response_size); 224 | http_img.mem_realloced = 1; 225 | printf("realloc memory size to %d\n", http_img.response_size); 226 | } 227 | memcpy(http_buf+http_buf_len, mbuf, mbuf_len); 228 | http_buf_len += mbuf_len; 229 | mbuf_len=recv(client_sock,mbuf,sizeof(mbuf),0); 230 | if(!http_img.header_checked && (http_buf_len>1024)) 231 | { 232 | //Parse http response 233 | http_parser_init(parser, HTTP_RESPONSE); 234 | parsed = http_parser_execute(parser, &settings_null, http_buf, strlen(http_buf)); 235 | http_img.response_size = http_img.content_start - http_buf + http_img.content_size; 236 | printf("response_size=%d content_size=%d\n", http_img.response_size, http_img.content_size); 237 | http_img.header_checked =1; 238 | } 239 | 240 | }while(mbuf_len>0); 241 | 242 | //Reparse http response, in case realloc change the http_buf address. 243 | http_parser_init(parser, HTTP_RESPONSE); 244 | parsed = http_parser_execute(parser, &settings_null, http_buf, strlen(http_buf)); 245 | 246 | //Checkout PNG body, and write to local file. 247 | p = strtok(http_img.type, ";"); 248 | if(!strcmp(p, IMAGE_TYPE_PNG)) 249 | file_ext = IMAGE_TYPE_PNG_EXT; 250 | else if(!strcmp(p, IMAGE_TYPE_JPEG)) 251 | file_ext = IMAGE_TYPE_JPEG_EXT; 252 | else if(!strcmp(p, IMAGE_TYPE_GIF)) 253 | file_ext = IMAGE_TYPE_GIF_EXT; 254 | else if(!strcmp(p, IMAGE_TYPE_BMP)) 255 | file_ext = IMAGE_TYPE_BMP_EXT; 256 | else 257 | file_ext = IMAGE_TYPE_DEFAULT_EXT; 258 | strcat(outfile, file_ext); 259 | printf("Content-Type=%s\n", outfile); 260 | if((fp = fopen(outfile,"wra+"))==NULL) 261 | { 262 | printf("can't open abc.txt\n"); 263 | } 264 | if(fp != NULL) 265 | if(fwrite(http_img.content_start,sizeof(char),http_img.content_size,fp)!=http_img.content_size) 266 | printf("can't write %s\n", outfile); 267 | if(fp != NULL) 268 | fclose(fp); 269 | 270 | 271 | #if 0 272 | buf = "GET http://admin.omsg.cn/uploadpic/2016121034000012.png HTTP/1.1\r\nHost: admin.omsg.cn\r\nAccept: */*\r\nConnection: Keep-Alive\r\n\r\n"; 273 | 274 | start = (float)clock()/CLOCKS_PER_SEC; 275 | 276 | http_parser_init(parser, HTTP_REQUEST); 277 | parsed = http_parser_execute(parser, &settings_null, buf, strlen(buf)); 278 | 279 | end = (float)clock()/CLOCKS_PER_SEC; 280 | 281 | 282 | buf="HTTP/1.1 200 OK\r\n" 283 | "Date: Tue, 04 Aug 2009 07:59:32 GMT\r\n" 284 | "Server: Apache\r\n" 285 | "X-Powered-By: Servlet/2.5 JSP/2.1\r\n" 286 | "Content-Type: text/xml; charset=utf-8\r\n" 287 | "Connection: close\r\n" 288 | "\r\n" 289 | "\n" 290 | "\n" 291 | " \n" 292 | " \n" 293 | " SOAP-ENV:Client\n" 294 | " Client Error\n" 295 | " \n" 296 | " \n" 297 | ""; 298 | 299 | http_parser_init(parser, HTTP_RESPONSE); 300 | parsed = http_parser_execute(parser, &settings_null, buf, strlen(buf)); 301 | #endif 302 | 303 | //Free allocated buffer 304 | if(parser) 305 | free(parser); 306 | if(http_buf) 307 | free(http_buf); 308 | if(mbuf) 309 | free(mbuf); 310 | 311 | printf("Elapsed %f seconds.\n", (end - start)); 312 | 313 | return (EXIT_SUCCESS); 314 | } 315 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | HTTP Parser 2 | =========== 3 | 4 | [![Build Status](https://api.travis-ci.org/nodejs/http-parser.svg?branch=master)](https://travis-ci.org/nodejs/http-parser) 5 | 6 | This is a parser for HTTP messages written in C. It parses both requests and 7 | responses. The parser is designed to be used in performance HTTP 8 | applications. It does not make any syscalls nor allocations, it does not 9 | buffer data, it can be interrupted at anytime. Depending on your 10 | architecture, it only requires about 40 bytes of data per message 11 | stream (in a web server that is per connection). 12 | 13 | Features: 14 | 15 | * No dependencies 16 | * Handles persistent streams (keep-alive). 17 | * Decodes chunked encoding. 18 | * Upgrade support 19 | * Defends against buffer overflow attacks. 20 | 21 | The parser extracts the following information from HTTP messages: 22 | 23 | * Header fields and values 24 | * Content-Length 25 | * Request method 26 | * Response status code 27 | * Transfer-Encoding 28 | * HTTP version 29 | * Request URL 30 | * Message body 31 | 32 | 33 | Usage 34 | ----- 35 | 36 | One `http_parser` object is used per TCP connection. Initialize the struct 37 | using `http_parser_init()` and set the callbacks. That might look something 38 | like this for a request parser: 39 | ```c 40 | http_parser_settings settings; 41 | settings.on_url = my_url_callback; 42 | settings.on_header_field = my_header_field_callback; 43 | /* ... */ 44 | 45 | http_parser *parser = malloc(sizeof(http_parser)); 46 | http_parser_init(parser, HTTP_REQUEST); 47 | parser->data = my_socket; 48 | ``` 49 | 50 | When data is received on the socket execute the parser and check for errors. 51 | 52 | ```c 53 | size_t len = 80*1024, nparsed; 54 | char buf[len]; 55 | ssize_t recved; 56 | 57 | recved = recv(fd, buf, len, 0); 58 | 59 | if (recved < 0) { 60 | /* Handle error. */ 61 | } 62 | 63 | /* Start up / continue the parser. 64 | * Note we pass recved==0 to signal that EOF has been received. 65 | */ 66 | nparsed = http_parser_execute(parser, &settings, buf, recved); 67 | 68 | if (parser->upgrade) { 69 | /* handle new protocol */ 70 | } else if (nparsed != recved) { 71 | /* Handle error. Usually just close the connection. */ 72 | } 73 | ``` 74 | 75 | HTTP needs to know where the end of the stream is. For example, sometimes 76 | servers send responses without Content-Length and expect the client to 77 | consume input (for the body) until EOF. To tell http_parser about EOF, give 78 | `0` as the fourth parameter to `http_parser_execute()`. Callbacks and errors 79 | can still be encountered during an EOF, so one must still be prepared 80 | to receive them. 81 | 82 | Scalar valued message information such as `status_code`, `method`, and the 83 | HTTP version are stored in the parser structure. This data is only 84 | temporally stored in `http_parser` and gets reset on each new message. If 85 | this information is needed later, copy it out of the structure during the 86 | `headers_complete` callback. 87 | 88 | The parser decodes the transfer-encoding for both requests and responses 89 | transparently. That is, a chunked encoding is decoded before being sent to 90 | the on_body callback. 91 | 92 | 93 | The Special Problem of Upgrade 94 | ------------------------------ 95 | 96 | HTTP supports upgrading the connection to a different protocol. An 97 | increasingly common example of this is the WebSocket protocol which sends 98 | a request like 99 | 100 | GET /demo HTTP/1.1 101 | Upgrade: WebSocket 102 | Connection: Upgrade 103 | Host: example.com 104 | Origin: http://example.com 105 | WebSocket-Protocol: sample 106 | 107 | followed by non-HTTP data. 108 | 109 | (See [RFC6455](https://tools.ietf.org/html/rfc6455) for more information the 110 | WebSocket protocol.) 111 | 112 | To support this, the parser will treat this as a normal HTTP message without a 113 | body, issuing both on_headers_complete and on_message_complete callbacks. However 114 | http_parser_execute() will stop parsing at the end of the headers and return. 115 | 116 | The user is expected to check if `parser->upgrade` has been set to 1 after 117 | `http_parser_execute()` returns. Non-HTTP data begins at the buffer supplied 118 | offset by the return value of `http_parser_execute()`. 119 | 120 | 121 | Callbacks 122 | --------- 123 | 124 | During the `http_parser_execute()` call, the callbacks set in 125 | `http_parser_settings` will be executed. The parser maintains state and 126 | never looks behind, so buffering the data is not necessary. If you need to 127 | save certain data for later usage, you can do that from the callbacks. 128 | 129 | There are two types of callbacks: 130 | 131 | * notification `typedef int (*http_cb) (http_parser*);` 132 | Callbacks: on_message_begin, on_headers_complete, on_message_complete. 133 | * data `typedef int (*http_data_cb) (http_parser*, const char *at, size_t length);` 134 | Callbacks: (requests only) on_url, 135 | (common) on_header_field, on_header_value, on_body; 136 | 137 | Callbacks must return 0 on success. Returning a non-zero value indicates 138 | error to the parser, making it exit immediately. 139 | 140 | For cases where it is necessary to pass local information to/from a callback, 141 | the `http_parser` object's `data` field can be used. 142 | An example of such a case is when using threads to handle a socket connection, 143 | parse a request, and then give a response over that socket. By instantiation 144 | of a thread-local struct containing relevant data (e.g. accepted socket, 145 | allocated memory for callbacks to write into, etc), a parser's callbacks are 146 | able to communicate data between the scope of the thread and the scope of the 147 | callback in a threadsafe manner. This allows http-parser to be used in 148 | multi-threaded contexts. 149 | 150 | Example: 151 | ```c 152 | typedef struct { 153 | socket_t sock; 154 | void* buffer; 155 | int buf_len; 156 | } custom_data_t; 157 | 158 | 159 | int my_url_callback(http_parser* parser, const char *at, size_t length) { 160 | /* access to thread local custom_data_t struct. 161 | Use this access save parsed data for later use into thread local 162 | buffer, or communicate over socket 163 | */ 164 | parser->data; 165 | ... 166 | return 0; 167 | } 168 | 169 | ... 170 | 171 | void http_parser_thread(socket_t sock) { 172 | int nparsed = 0; 173 | /* allocate memory for user data */ 174 | custom_data_t *my_data = malloc(sizeof(custom_data_t)); 175 | 176 | /* some information for use by callbacks. 177 | * achieves thread -> callback information flow */ 178 | my_data->sock = sock; 179 | 180 | /* instantiate a thread-local parser */ 181 | http_parser *parser = malloc(sizeof(http_parser)); 182 | http_parser_init(parser, HTTP_REQUEST); /* initialise parser */ 183 | /* this custom data reference is accessible through the reference to the 184 | parser supplied to callback functions */ 185 | parser->data = my_data; 186 | 187 | http_parser_settings settings; /* set up callbacks */ 188 | settings.on_url = my_url_callback; 189 | 190 | /* execute parser */ 191 | nparsed = http_parser_execute(parser, &settings, buf, recved); 192 | 193 | ... 194 | /* parsed information copied from callback. 195 | can now perform action on data copied into thread-local memory from callbacks. 196 | achieves callback -> thread information flow */ 197 | my_data->buffer; 198 | ... 199 | } 200 | 201 | ``` 202 | 203 | In case you parse HTTP message in chunks (i.e. `read()` request line 204 | from socket, parse, read half headers, parse, etc) your data callbacks 205 | may be called more than once. Http-parser guarantees that data pointer is only 206 | valid for the lifetime of callback. You can also `read()` into a heap allocated 207 | buffer to avoid copying memory around if this fits your application. 208 | 209 | Reading headers may be a tricky task if you read/parse headers partially. 210 | Basically, you need to remember whether last header callback was field or value 211 | and apply the following logic: 212 | 213 | (on_header_field and on_header_value shortened to on_h_*) 214 | ------------------------ ------------ -------------------------------------------- 215 | | State (prev. callback) | Callback | Description/action | 216 | ------------------------ ------------ -------------------------------------------- 217 | | nothing (first call) | on_h_field | Allocate new buffer and copy callback data | 218 | | | | into it | 219 | ------------------------ ------------ -------------------------------------------- 220 | | value | on_h_field | New header started. | 221 | | | | Copy current name,value buffers to headers | 222 | | | | list and allocate new buffer for new name | 223 | ------------------------ ------------ -------------------------------------------- 224 | | field | on_h_field | Previous name continues. Reallocate name | 225 | | | | buffer and append callback data to it | 226 | ------------------------ ------------ -------------------------------------------- 227 | | field | on_h_value | Value for current header started. Allocate | 228 | | | | new buffer and copy callback data to it | 229 | ------------------------ ------------ -------------------------------------------- 230 | | value | on_h_value | Value continues. Reallocate value buffer | 231 | | | | and append callback data to it | 232 | ------------------------ ------------ -------------------------------------------- 233 | 234 | 235 | Parsing URLs 236 | ------------ 237 | 238 | A simplistic zero-copy URL parser is provided as `http_parser_parse_url()`. 239 | Users of this library may wish to use it to parse URLs constructed from 240 | consecutive `on_url` callbacks. 241 | 242 | See examples of reading in headers: 243 | 244 | * [partial example](http://gist.github.com/155877) in C 245 | * [from http-parser tests](http://github.com/joyent/http-parser/blob/37a0ff8/test.c#L403) in C 246 | * [from Node library](http://github.com/joyent/node/blob/842eaf4/src/http.js#L284) in Javascript 247 | -------------------------------------------------------------------------------- /http_parser.h: -------------------------------------------------------------------------------- 1 | /* Copyright Joyent, Inc. and other Node contributors. All rights reserved. 2 | * 3 | * Permission is hereby granted, free of charge, to any person obtaining a copy 4 | * of this software and associated documentation files (the "Software"), to 5 | * deal in the Software without restriction, including without limitation the 6 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 7 | * sell copies of the Software, and to permit persons to whom the Software is 8 | * furnished to do so, subject to the following conditions: 9 | * 10 | * The above copyright notice and this permission notice shall be included in 11 | * all copies or substantial portions of the Software. 12 | * 13 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 18 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 19 | * IN THE SOFTWARE. 20 | */ 21 | #ifndef http_parser_h 22 | #define http_parser_h 23 | #ifdef __cplusplus 24 | extern "C" { 25 | #endif 26 | 27 | /* Also update SONAME in the Makefile whenever you change these. */ 28 | #define HTTP_PARSER_VERSION_MAJOR 2 29 | #define HTTP_PARSER_VERSION_MINOR 7 30 | #define HTTP_PARSER_VERSION_PATCH 1 31 | 32 | #include 33 | #if defined(_WIN32) && !defined(__MINGW32__) && \ 34 | (!defined(_MSC_VER) || _MSC_VER<1600) && !defined(__WINE__) 35 | #include 36 | #include 37 | typedef __int8 int8_t; 38 | typedef unsigned __int8 uint8_t; 39 | typedef __int16 int16_t; 40 | typedef unsigned __int16 uint16_t; 41 | typedef __int32 int32_t; 42 | typedef unsigned __int32 uint32_t; 43 | typedef __int64 int64_t; 44 | typedef unsigned __int64 uint64_t; 45 | #else 46 | #include 47 | #endif 48 | 49 | /* Compile with -DHTTP_PARSER_STRICT=0 to make less checks, but run 50 | * faster 51 | */ 52 | #ifndef HTTP_PARSER_STRICT 53 | # define HTTP_PARSER_STRICT 1 54 | #endif 55 | 56 | /* Maximium header size allowed. If the macro is not defined 57 | * before including this header then the default is used. To 58 | * change the maximum header size, define the macro in the build 59 | * environment (e.g. -DHTTP_MAX_HEADER_SIZE=). To remove 60 | * the effective limit on the size of the header, define the macro 61 | * to a very large number (e.g. -DHTTP_MAX_HEADER_SIZE=0x7fffffff) 62 | */ 63 | #ifndef HTTP_MAX_HEADER_SIZE 64 | # define HTTP_MAX_HEADER_SIZE (80*1024) 65 | #endif 66 | 67 | typedef struct http_parser http_parser; 68 | typedef struct http_parser_settings http_parser_settings; 69 | 70 | 71 | /* Callbacks should return non-zero to indicate an error. The parser will 72 | * then halt execution. 73 | * 74 | * The one exception is on_headers_complete. In a HTTP_RESPONSE parser 75 | * returning '1' from on_headers_complete will tell the parser that it 76 | * should not expect a body. This is used when receiving a response to a 77 | * HEAD request which may contain 'Content-Length' or 'Transfer-Encoding: 78 | * chunked' headers that indicate the presence of a body. 79 | * 80 | * Returning `2` from on_headers_complete will tell parser that it should not 81 | * expect neither a body nor any futher responses on this connection. This is 82 | * useful for handling responses to a CONNECT request which may not contain 83 | * `Upgrade` or `Connection: upgrade` headers. 84 | * 85 | * http_data_cb does not return data chunks. It will be called arbitrarily 86 | * many times for each string. E.G. you might get 10 callbacks for "on_url" 87 | * each providing just a few characters more data. 88 | */ 89 | typedef int (*http_data_cb) (http_parser*, const char *at, size_t length); 90 | typedef int (*http_cb) (http_parser*); 91 | 92 | 93 | /* Status Codes */ 94 | #define HTTP_STATUS_MAP(XX) \ 95 | XX(100, CONTINUE, Continue) \ 96 | XX(101, SWITCHING_PROTOCOLS, Switching Protocols) \ 97 | XX(102, PROCESSING, Processing) \ 98 | XX(200, OK, OK) \ 99 | XX(201, CREATED, Created) \ 100 | XX(202, ACCEPTED, Accepted) \ 101 | XX(203, NON_AUTHORITATIVE_INFORMATION, Non-Authoritative Information) \ 102 | XX(204, NO_CONTENT, No Content) \ 103 | XX(205, RESET_CONTENT, Reset Content) \ 104 | XX(206, PARTIAL_CONTENT, Partial Content) \ 105 | XX(207, MULTI_STATUS, Multi-Status) \ 106 | XX(208, ALREADY_REPORTED, Already Reported) \ 107 | XX(226, IM_USED, IM Used) \ 108 | XX(300, MULTIPLE_CHOICES, Multiple Choices) \ 109 | XX(301, MOVED_PERMANENTLY, Moved Permanently) \ 110 | XX(302, FOUND, Found) \ 111 | XX(303, SEE_OTHER, See Other) \ 112 | XX(304, NOT_MODIFIED, Not Modified) \ 113 | XX(305, USE_PROXY, Use Proxy) \ 114 | XX(307, TEMPORARY_REDIRECT, Temporary Redirect) \ 115 | XX(308, PERMANENT_REDIRECT, Permanent Redirect) \ 116 | XX(400, BAD_REQUEST, Bad Request) \ 117 | XX(401, UNAUTHORIZED, Unauthorized) \ 118 | XX(402, PAYMENT_REQUIRED, Payment Required) \ 119 | XX(403, FORBIDDEN, Forbidden) \ 120 | XX(404, NOT_FOUND, Not Found) \ 121 | XX(405, METHOD_NOT_ALLOWED, Method Not Allowed) \ 122 | XX(406, NOT_ACCEPTABLE, Not Acceptable) \ 123 | XX(407, PROXY_AUTHENTICATION_REQUIRED, Proxy Authentication Required) \ 124 | XX(408, REQUEST_TIMEOUT, Request Timeout) \ 125 | XX(409, CONFLICT, Conflict) \ 126 | XX(410, GONE, Gone) \ 127 | XX(411, LENGTH_REQUIRED, Length Required) \ 128 | XX(412, PRECONDITION_FAILED, Precondition Failed) \ 129 | XX(413, PAYLOAD_TOO_LARGE, Payload Too Large) \ 130 | XX(414, URI_TOO_LONG, URI Too Long) \ 131 | XX(415, UNSUPPORTED_MEDIA_TYPE, Unsupported Media Type) \ 132 | XX(416, RANGE_NOT_SATISFIABLE, Range Not Satisfiable) \ 133 | XX(417, EXPECTATION_FAILED, Expectation Failed) \ 134 | XX(421, MISDIRECTED_REQUEST, Misdirected Request) \ 135 | XX(422, UNPROCESSABLE_ENTITY, Unprocessable Entity) \ 136 | XX(423, LOCKED, Locked) \ 137 | XX(424, FAILED_DEPENDENCY, Failed Dependency) \ 138 | XX(426, UPGRADE_REQUIRED, Upgrade Required) \ 139 | XX(428, PRECONDITION_REQUIRED, Precondition Required) \ 140 | XX(429, TOO_MANY_REQUESTS, Too Many Requests) \ 141 | XX(431, REQUEST_HEADER_FIELDS_TOO_LARGE, Request Header Fields Too Large) \ 142 | XX(451, UNAVAILABLE_FOR_LEGAL_REASONS, Unavailable For Legal Reasons) \ 143 | XX(500, INTERNAL_SERVER_ERROR, Internal Server Error) \ 144 | XX(501, NOT_IMPLEMENTED, Not Implemented) \ 145 | XX(502, BAD_GATEWAY, Bad Gateway) \ 146 | XX(503, SERVICE_UNAVAILABLE, Service Unavailable) \ 147 | XX(504, GATEWAY_TIMEOUT, Gateway Timeout) \ 148 | XX(505, HTTP_VERSION_NOT_SUPPORTED, HTTP Version Not Supported) \ 149 | XX(506, VARIANT_ALSO_NEGOTIATES, Variant Also Negotiates) \ 150 | XX(507, INSUFFICIENT_STORAGE, Insufficient Storage) \ 151 | XX(508, LOOP_DETECTED, Loop Detected) \ 152 | XX(510, NOT_EXTENDED, Not Extended) \ 153 | XX(511, NETWORK_AUTHENTICATION_REQUIRED, Network Authentication Required) \ 154 | 155 | enum http_status 156 | { 157 | #define XX(num, name, string) HTTP_STATUS_##name = num, 158 | HTTP_STATUS_MAP(XX) 159 | #undef XX 160 | }; 161 | 162 | 163 | /* Request Methods */ 164 | #define HTTP_METHOD_MAP(XX) \ 165 | XX(0, DELETE, DELETE) \ 166 | XX(1, GET, GET) \ 167 | XX(2, HEAD, HEAD) \ 168 | XX(3, POST, POST) \ 169 | XX(4, PUT, PUT) \ 170 | /* pathological */ \ 171 | XX(5, CONNECT, CONNECT) \ 172 | XX(6, OPTIONS, OPTIONS) \ 173 | XX(7, TRACE, TRACE) \ 174 | /* WebDAV */ \ 175 | XX(8, COPY, COPY) \ 176 | XX(9, LOCK, LOCK) \ 177 | XX(10, MKCOL, MKCOL) \ 178 | XX(11, MOVE, MOVE) \ 179 | XX(12, PROPFIND, PROPFIND) \ 180 | XX(13, PROPPATCH, PROPPATCH) \ 181 | XX(14, SEARCH, SEARCH) \ 182 | XX(15, UNLOCK, UNLOCK) \ 183 | XX(16, BIND, BIND) \ 184 | XX(17, REBIND, REBIND) \ 185 | XX(18, UNBIND, UNBIND) \ 186 | XX(19, ACL, ACL) \ 187 | /* subversion */ \ 188 | XX(20, REPORT, REPORT) \ 189 | XX(21, MKACTIVITY, MKACTIVITY) \ 190 | XX(22, CHECKOUT, CHECKOUT) \ 191 | XX(23, MERGE, MERGE) \ 192 | /* upnp */ \ 193 | XX(24, MSEARCH, M-SEARCH) \ 194 | XX(25, NOTIFY, NOTIFY) \ 195 | XX(26, SUBSCRIBE, SUBSCRIBE) \ 196 | XX(27, UNSUBSCRIBE, UNSUBSCRIBE) \ 197 | /* RFC-5789 */ \ 198 | XX(28, PATCH, PATCH) \ 199 | XX(29, PURGE, PURGE) \ 200 | /* CalDAV */ \ 201 | XX(30, MKCALENDAR, MKCALENDAR) \ 202 | /* RFC-2068, section 19.6.1.2 */ \ 203 | XX(31, LINK, LINK) \ 204 | XX(32, UNLINK, UNLINK) \ 205 | 206 | enum http_method 207 | { 208 | #define XX(num, name, string) HTTP_##name = num, 209 | HTTP_METHOD_MAP(XX) 210 | #undef XX 211 | }; 212 | 213 | 214 | enum http_parser_type { HTTP_REQUEST, HTTP_RESPONSE, HTTP_BOTH }; 215 | 216 | 217 | /* Flag values for http_parser.flags field */ 218 | enum flags 219 | { F_CHUNKED = 1 << 0 220 | , F_CONNECTION_KEEP_ALIVE = 1 << 1 221 | , F_CONNECTION_CLOSE = 1 << 2 222 | , F_CONNECTION_UPGRADE = 1 << 3 223 | , F_TRAILING = 1 << 4 224 | , F_UPGRADE = 1 << 5 225 | , F_SKIPBODY = 1 << 6 226 | , F_CONTENTLENGTH = 1 << 7 227 | }; 228 | 229 | 230 | /* Map for errno-related constants 231 | * 232 | * The provided argument should be a macro that takes 2 arguments. 233 | */ 234 | #define HTTP_ERRNO_MAP(XX) \ 235 | /* No error */ \ 236 | XX(OK, "success") \ 237 | \ 238 | /* Callback-related errors */ \ 239 | XX(CB_message_begin, "the on_message_begin callback failed") \ 240 | XX(CB_url, "the on_url callback failed") \ 241 | XX(CB_header_field, "the on_header_field callback failed") \ 242 | XX(CB_header_value, "the on_header_value callback failed") \ 243 | XX(CB_headers_complete, "the on_headers_complete callback failed") \ 244 | XX(CB_body, "the on_body callback failed") \ 245 | XX(CB_message_complete, "the on_message_complete callback failed") \ 246 | XX(CB_status, "the on_status callback failed") \ 247 | XX(CB_chunk_header, "the on_chunk_header callback failed") \ 248 | XX(CB_chunk_complete, "the on_chunk_complete callback failed") \ 249 | \ 250 | /* Parsing-related errors */ \ 251 | XX(INVALID_EOF_STATE, "stream ended at an unexpected time") \ 252 | XX(HEADER_OVERFLOW, \ 253 | "too many header bytes seen; overflow detected") \ 254 | XX(CLOSED_CONNECTION, \ 255 | "data received after completed connection: close message") \ 256 | XX(INVALID_VERSION, "invalid HTTP version") \ 257 | XX(INVALID_STATUS, "invalid HTTP status code") \ 258 | XX(INVALID_METHOD, "invalid HTTP method") \ 259 | XX(INVALID_URL, "invalid URL") \ 260 | XX(INVALID_HOST, "invalid host") \ 261 | XX(INVALID_PORT, "invalid port") \ 262 | XX(INVALID_PATH, "invalid path") \ 263 | XX(INVALID_QUERY_STRING, "invalid query string") \ 264 | XX(INVALID_FRAGMENT, "invalid fragment") \ 265 | XX(LF_EXPECTED, "LF character expected") \ 266 | XX(INVALID_HEADER_TOKEN, "invalid character in header") \ 267 | XX(INVALID_CONTENT_LENGTH, \ 268 | "invalid character in content-length header") \ 269 | XX(UNEXPECTED_CONTENT_LENGTH, \ 270 | "unexpected content-length header") \ 271 | XX(INVALID_CHUNK_SIZE, \ 272 | "invalid character in chunk size header") \ 273 | XX(INVALID_CONSTANT, "invalid constant string") \ 274 | XX(INVALID_INTERNAL_STATE, "encountered unexpected internal state")\ 275 | XX(STRICT, "strict mode assertion failed") \ 276 | XX(PAUSED, "parser is paused") \ 277 | XX(UNKNOWN, "an unknown error occurred") 278 | 279 | 280 | /* Define HPE_* values for each errno value above */ 281 | #define HTTP_ERRNO_GEN(n, s) HPE_##n, 282 | enum http_errno { 283 | HTTP_ERRNO_MAP(HTTP_ERRNO_GEN) 284 | }; 285 | #undef HTTP_ERRNO_GEN 286 | 287 | 288 | /* Get an http_errno value from an http_parser */ 289 | #define HTTP_PARSER_ERRNO(p) ((enum http_errno) (p)->http_errno) 290 | 291 | 292 | struct http_parser { 293 | /** PRIVATE **/ 294 | unsigned int type : 2; /* enum http_parser_type */ 295 | unsigned int flags : 8; /* F_* values from 'flags' enum; semi-public */ 296 | unsigned int state : 7; /* enum state from http_parser.c */ 297 | unsigned int header_state : 7; /* enum header_state from http_parser.c */ 298 | unsigned int index : 7; /* index into current matcher */ 299 | unsigned int lenient_http_headers : 1; 300 | 301 | uint32_t nread; /* # bytes read in various scenarios */ 302 | uint64_t content_length; /* # bytes in body (0 if no Content-Length header) */ 303 | 304 | /** READ-ONLY **/ 305 | unsigned short http_major; 306 | unsigned short http_minor; 307 | unsigned int status_code : 16; /* responses only */ 308 | unsigned int method : 8; /* requests only */ 309 | unsigned int http_errno : 7; 310 | 311 | /* 1 = Upgrade header was present and the parser has exited because of that. 312 | * 0 = No upgrade header present. 313 | * Should be checked when http_parser_execute() returns in addition to 314 | * error checking. 315 | */ 316 | unsigned int upgrade : 1; 317 | 318 | /** PUBLIC **/ 319 | void *data; /* A pointer to get hook to the "connection" or "socket" object */ 320 | }; 321 | 322 | 323 | struct http_parser_settings { 324 | http_cb on_message_begin; 325 | http_data_cb on_url; 326 | http_data_cb on_status; 327 | http_data_cb on_header_field; 328 | http_data_cb on_header_value; 329 | http_cb on_headers_complete; 330 | http_data_cb on_body; 331 | http_cb on_message_complete; 332 | /* When on_chunk_header is called, the current chunk length is stored 333 | * in parser->content_length. 334 | */ 335 | http_cb on_chunk_header; 336 | http_cb on_chunk_complete; 337 | }; 338 | 339 | 340 | enum http_parser_url_fields 341 | { UF_SCHEMA = 0 342 | , UF_HOST = 1 343 | , UF_PORT = 2 344 | , UF_PATH = 3 345 | , UF_QUERY = 4 346 | , UF_FRAGMENT = 5 347 | , UF_USERINFO = 6 348 | , UF_MAX = 7 349 | }; 350 | 351 | 352 | /* Result structure for http_parser_parse_url(). 353 | * 354 | * Callers should index into field_data[] with UF_* values iff field_set 355 | * has the relevant (1 << UF_*) bit set. As a courtesy to clients (and 356 | * because we probably have padding left over), we convert any port to 357 | * a uint16_t. 358 | */ 359 | struct http_parser_url { 360 | uint16_t field_set; /* Bitmask of (1 << UF_*) values */ 361 | uint16_t port; /* Converted UF_PORT string */ 362 | 363 | struct { 364 | uint16_t off; /* Offset into buffer in which field starts */ 365 | uint16_t len; /* Length of run in buffer */ 366 | } field_data[UF_MAX]; 367 | }; 368 | 369 | 370 | /* Returns the library version. Bits 16-23 contain the major version number, 371 | * bits 8-15 the minor version number and bits 0-7 the patch level. 372 | * Usage example: 373 | * 374 | * unsigned long version = http_parser_version(); 375 | * unsigned major = (version >> 16) & 255; 376 | * unsigned minor = (version >> 8) & 255; 377 | * unsigned patch = version & 255; 378 | * printf("http_parser v%u.%u.%u\n", major, minor, patch); 379 | */ 380 | unsigned long http_parser_version(void); 381 | 382 | void http_parser_init(http_parser *parser, enum http_parser_type type); 383 | 384 | 385 | /* Initialize http_parser_settings members to 0 386 | */ 387 | void http_parser_settings_init(http_parser_settings *settings); 388 | 389 | 390 | /* Executes the parser. Returns number of parsed bytes. Sets 391 | * `parser->http_errno` on error. */ 392 | size_t http_parser_execute(http_parser *parser, 393 | const http_parser_settings *settings, 394 | const char *data, 395 | size_t len); 396 | 397 | 398 | /* If http_should_keep_alive() in the on_headers_complete or 399 | * on_message_complete callback returns 0, then this should be 400 | * the last message on the connection. 401 | * If you are the server, respond with the "Connection: close" header. 402 | * If you are the client, close the connection. 403 | */ 404 | int http_should_keep_alive(const http_parser *parser); 405 | 406 | /* Returns a string version of the HTTP method. */ 407 | const char *http_method_str(enum http_method m); 408 | 409 | /* Return a string name of the given error */ 410 | const char *http_errno_name(enum http_errno err); 411 | 412 | /* Return a string description of the given error */ 413 | const char *http_errno_description(enum http_errno err); 414 | 415 | /* Initialize all http_parser_url members to 0 */ 416 | void http_parser_url_init(struct http_parser_url *u); 417 | 418 | /* Parse a URL; return nonzero on failure */ 419 | int http_parser_parse_url(const char *buf, size_t buflen, 420 | int is_connect, 421 | struct http_parser_url *u); 422 | 423 | /* Pause or un-pause the parser; a nonzero value pauses */ 424 | void http_parser_pause(http_parser *parser, int paused); 425 | 426 | /* Checks if this is the final chunk of the body. */ 427 | int http_body_is_final(const http_parser *parser); 428 | 429 | #ifdef __cplusplus 430 | } 431 | #endif 432 | #endif 433 | -------------------------------------------------------------------------------- /http_parser.c: -------------------------------------------------------------------------------- 1 | /* Based on src/http/ngx_http_parse.c from NGINX copyright Igor Sysoev 2 | * 3 | * Additional changes are licensed under the same terms as NGINX and 4 | * copyright Joyent, Inc. and other Node contributors. All rights reserved. 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a copy 7 | * of this software and associated documentation files (the "Software"), to 8 | * deal in the Software without restriction, including without limitation the 9 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 10 | * sell copies of the Software, and to permit persons to whom the Software is 11 | * furnished to do so, subject to the following conditions: 12 | * 13 | * The above copyright notice and this permission notice shall be included in 14 | * all copies or substantial portions of the Software. 15 | * 16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 21 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 22 | * IN THE SOFTWARE. 23 | */ 24 | #include "http_parser.h" 25 | #include 26 | #include 27 | #include 28 | #include 29 | #include 30 | #include 31 | 32 | #ifndef ULLONG_MAX 33 | # define ULLONG_MAX ((uint64_t) -1) /* 2^64-1 */ 34 | #endif 35 | 36 | #ifndef MIN 37 | # define MIN(a,b) ((a) < (b) ? (a) : (b)) 38 | #endif 39 | 40 | #ifndef ARRAY_SIZE 41 | # define ARRAY_SIZE(a) (sizeof(a) / sizeof((a)[0])) 42 | #endif 43 | 44 | #ifndef BIT_AT 45 | # define BIT_AT(a, i) \ 46 | (!!((unsigned int) (a)[(unsigned int) (i) >> 3] & \ 47 | (1 << ((unsigned int) (i) & 7)))) 48 | #endif 49 | 50 | #ifndef ELEM_AT 51 | # define ELEM_AT(a, i, v) ((unsigned int) (i) < ARRAY_SIZE(a) ? (a)[(i)] : (v)) 52 | #endif 53 | 54 | #define SET_ERRNO(e) \ 55 | do { \ 56 | parser->http_errno = (e); \ 57 | } while(0) 58 | 59 | #define CURRENT_STATE() p_state 60 | #define UPDATE_STATE(V) p_state = (enum state) (V); 61 | #define RETURN(V) \ 62 | do { \ 63 | parser->state = CURRENT_STATE(); \ 64 | return (V); \ 65 | } while (0); 66 | #define REEXECUTE() \ 67 | goto reexecute; \ 68 | 69 | 70 | #ifdef __GNUC__ 71 | # define LIKELY(X) __builtin_expect(!!(X), 1) 72 | # define UNLIKELY(X) __builtin_expect(!!(X), 0) 73 | #else 74 | # define LIKELY(X) (X) 75 | # define UNLIKELY(X) (X) 76 | #endif 77 | 78 | 79 | /* Run the notify callback FOR, returning ER if it fails */ 80 | #define CALLBACK_NOTIFY_(FOR, ER) \ 81 | do { \ 82 | assert(HTTP_PARSER_ERRNO(parser) == HPE_OK); \ 83 | \ 84 | if (LIKELY(settings->on_##FOR)) { \ 85 | parser->state = CURRENT_STATE(); \ 86 | if (UNLIKELY(0 != settings->on_##FOR(parser))) { \ 87 | SET_ERRNO(HPE_CB_##FOR); \ 88 | } \ 89 | UPDATE_STATE(parser->state); \ 90 | \ 91 | /* We either errored above or got paused; get out */ \ 92 | if (UNLIKELY(HTTP_PARSER_ERRNO(parser) != HPE_OK)) { \ 93 | return (ER); \ 94 | } \ 95 | } \ 96 | } while (0) 97 | 98 | /* Run the notify callback FOR and consume the current byte */ 99 | #define CALLBACK_NOTIFY(FOR) CALLBACK_NOTIFY_(FOR, p - data + 1) 100 | 101 | /* Run the notify callback FOR and don't consume the current byte */ 102 | #define CALLBACK_NOTIFY_NOADVANCE(FOR) CALLBACK_NOTIFY_(FOR, p - data) 103 | 104 | /* Run data callback FOR with LEN bytes, returning ER if it fails */ 105 | #define CALLBACK_DATA_(FOR, LEN, ER) \ 106 | do { \ 107 | assert(HTTP_PARSER_ERRNO(parser) == HPE_OK); \ 108 | \ 109 | if (FOR##_mark) { \ 110 | if (LIKELY(settings->on_##FOR)) { \ 111 | parser->state = CURRENT_STATE(); \ 112 | if (UNLIKELY(0 != \ 113 | settings->on_##FOR(parser, FOR##_mark, (LEN)))) { \ 114 | SET_ERRNO(HPE_CB_##FOR); \ 115 | } \ 116 | UPDATE_STATE(parser->state); \ 117 | \ 118 | /* We either errored above or got paused; get out */ \ 119 | if (UNLIKELY(HTTP_PARSER_ERRNO(parser) != HPE_OK)) { \ 120 | return (ER); \ 121 | } \ 122 | } \ 123 | FOR##_mark = NULL; \ 124 | } \ 125 | } while (0) 126 | 127 | /* Run the data callback FOR and consume the current byte */ 128 | #define CALLBACK_DATA(FOR) \ 129 | CALLBACK_DATA_(FOR, p - FOR##_mark, p - data + 1) 130 | 131 | /* Run the data callback FOR and don't consume the current byte */ 132 | #define CALLBACK_DATA_NOADVANCE(FOR) \ 133 | CALLBACK_DATA_(FOR, p - FOR##_mark, p - data) 134 | 135 | /* Set the mark FOR; non-destructive if mark is already set */ 136 | #define MARK(FOR) \ 137 | do { \ 138 | if (!FOR##_mark) { \ 139 | FOR##_mark = p; \ 140 | } \ 141 | } while (0) 142 | 143 | /* Don't allow the total size of the HTTP headers (including the status 144 | * line) to exceed HTTP_MAX_HEADER_SIZE. This check is here to protect 145 | * embedders against denial-of-service attacks where the attacker feeds 146 | * us a never-ending header that the embedder keeps buffering. 147 | * 148 | * This check is arguably the responsibility of embedders but we're doing 149 | * it on the embedder's behalf because most won't bother and this way we 150 | * make the web a little safer. HTTP_MAX_HEADER_SIZE is still far bigger 151 | * than any reasonable request or response so this should never affect 152 | * day-to-day operation. 153 | */ 154 | #define COUNT_HEADER_SIZE(V) \ 155 | do { \ 156 | parser->nread += (V); \ 157 | if (UNLIKELY(parser->nread > (HTTP_MAX_HEADER_SIZE))) { \ 158 | SET_ERRNO(HPE_HEADER_OVERFLOW); \ 159 | goto error; \ 160 | } \ 161 | } while (0) 162 | 163 | 164 | #define PROXY_CONNECTION "proxy-connection" 165 | #define CONNECTION "connection" 166 | #define CONTENT_LENGTH "content-length" 167 | #define TRANSFER_ENCODING "transfer-encoding" 168 | #define UPGRADE "upgrade" 169 | #define CHUNKED "chunked" 170 | #define KEEP_ALIVE "keep-alive" 171 | #define CLOSE "close" 172 | 173 | 174 | static const char *method_strings[] = 175 | { 176 | #define XX(num, name, string) #string, 177 | HTTP_METHOD_MAP(XX) 178 | #undef XX 179 | }; 180 | 181 | 182 | /* Tokens as defined by rfc 2616. Also lowercases them. 183 | * token = 1* 184 | * separators = "(" | ")" | "<" | ">" | "@" 185 | * | "," | ";" | ":" | "\" | <"> 186 | * | "/" | "[" | "]" | "?" | "=" 187 | * | "{" | "}" | SP | HT 188 | */ 189 | static const char tokens[256] = { 190 | /* 0 nul 1 soh 2 stx 3 etx 4 eot 5 enq 6 ack 7 bel */ 191 | 0, 0, 0, 0, 0, 0, 0, 0, 192 | /* 8 bs 9 ht 10 nl 11 vt 12 np 13 cr 14 so 15 si */ 193 | 0, 0, 0, 0, 0, 0, 0, 0, 194 | /* 16 dle 17 dc1 18 dc2 19 dc3 20 dc4 21 nak 22 syn 23 etb */ 195 | 0, 0, 0, 0, 0, 0, 0, 0, 196 | /* 24 can 25 em 26 sub 27 esc 28 fs 29 gs 30 rs 31 us */ 197 | 0, 0, 0, 0, 0, 0, 0, 0, 198 | /* 32 sp 33 ! 34 " 35 # 36 $ 37 % 38 & 39 ' */ 199 | 0, '!', 0, '#', '$', '%', '&', '\'', 200 | /* 40 ( 41 ) 42 * 43 + 44 , 45 - 46 . 47 / */ 201 | 0, 0, '*', '+', 0, '-', '.', 0, 202 | /* 48 0 49 1 50 2 51 3 52 4 53 5 54 6 55 7 */ 203 | '0', '1', '2', '3', '4', '5', '6', '7', 204 | /* 56 8 57 9 58 : 59 ; 60 < 61 = 62 > 63 ? */ 205 | '8', '9', 0, 0, 0, 0, 0, 0, 206 | /* 64 @ 65 A 66 B 67 C 68 D 69 E 70 F 71 G */ 207 | 0, 'a', 'b', 'c', 'd', 'e', 'f', 'g', 208 | /* 72 H 73 I 74 J 75 K 76 L 77 M 78 N 79 O */ 209 | 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 210 | /* 80 P 81 Q 82 R 83 S 84 T 85 U 86 V 87 W */ 211 | 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 212 | /* 88 X 89 Y 90 Z 91 [ 92 \ 93 ] 94 ^ 95 _ */ 213 | 'x', 'y', 'z', 0, 0, 0, '^', '_', 214 | /* 96 ` 97 a 98 b 99 c 100 d 101 e 102 f 103 g */ 215 | '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 216 | /* 104 h 105 i 106 j 107 k 108 l 109 m 110 n 111 o */ 217 | 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 218 | /* 112 p 113 q 114 r 115 s 116 t 117 u 118 v 119 w */ 219 | 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 220 | /* 120 x 121 y 122 z 123 { 124 | 125 } 126 ~ 127 del */ 221 | 'x', 'y', 'z', 0, '|', 0, '~', 0 }; 222 | 223 | 224 | static const int8_t unhex[256] = 225 | {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1 226 | ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1 227 | ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1 228 | , 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1,-1,-1,-1,-1 229 | ,-1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1 230 | ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1 231 | ,-1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1 232 | ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1 233 | }; 234 | 235 | 236 | #if HTTP_PARSER_STRICT 237 | # define T(v) 0 238 | #else 239 | # define T(v) v 240 | #endif 241 | 242 | 243 | static const uint8_t normal_url_char[32] = { 244 | /* 0 nul 1 soh 2 stx 3 etx 4 eot 5 enq 6 ack 7 bel */ 245 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0, 246 | /* 8 bs 9 ht 10 nl 11 vt 12 np 13 cr 14 so 15 si */ 247 | 0 | T(2) | 0 | 0 | T(16) | 0 | 0 | 0, 248 | /* 16 dle 17 dc1 18 dc2 19 dc3 20 dc4 21 nak 22 syn 23 etb */ 249 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0, 250 | /* 24 can 25 em 26 sub 27 esc 28 fs 29 gs 30 rs 31 us */ 251 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0, 252 | /* 32 sp 33 ! 34 " 35 # 36 $ 37 % 38 & 39 ' */ 253 | 0 | 2 | 4 | 0 | 16 | 32 | 64 | 128, 254 | /* 40 ( 41 ) 42 * 43 + 44 , 45 - 46 . 47 / */ 255 | 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128, 256 | /* 48 0 49 1 50 2 51 3 52 4 53 5 54 6 55 7 */ 257 | 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128, 258 | /* 56 8 57 9 58 : 59 ; 60 < 61 = 62 > 63 ? */ 259 | 1 | 2 | 4 | 8 | 16 | 32 | 64 | 0, 260 | /* 64 @ 65 A 66 B 67 C 68 D 69 E 70 F 71 G */ 261 | 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128, 262 | /* 72 H 73 I 74 J 75 K 76 L 77 M 78 N 79 O */ 263 | 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128, 264 | /* 80 P 81 Q 82 R 83 S 84 T 85 U 86 V 87 W */ 265 | 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128, 266 | /* 88 X 89 Y 90 Z 91 [ 92 \ 93 ] 94 ^ 95 _ */ 267 | 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128, 268 | /* 96 ` 97 a 98 b 99 c 100 d 101 e 102 f 103 g */ 269 | 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128, 270 | /* 104 h 105 i 106 j 107 k 108 l 109 m 110 n 111 o */ 271 | 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128, 272 | /* 112 p 113 q 114 r 115 s 116 t 117 u 118 v 119 w */ 273 | 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128, 274 | /* 120 x 121 y 122 z 123 { 124 | 125 } 126 ~ 127 del */ 275 | 1 | 2 | 4 | 8 | 16 | 32 | 64 | 0, }; 276 | 277 | #undef T 278 | 279 | enum state 280 | { s_dead = 1 /* important that this is > 0 */ 281 | 282 | , s_start_req_or_res 283 | , s_res_or_resp_H 284 | , s_start_res 285 | , s_res_H 286 | , s_res_HT 287 | , s_res_HTT 288 | , s_res_HTTP 289 | , s_res_first_http_major 290 | , s_res_http_major 291 | , s_res_first_http_minor 292 | , s_res_http_minor 293 | , s_res_first_status_code 294 | , s_res_status_code 295 | , s_res_status_start 296 | , s_res_status 297 | , s_res_line_almost_done 298 | 299 | , s_start_req 300 | 301 | , s_req_method 302 | , s_req_spaces_before_url 303 | , s_req_schema 304 | , s_req_schema_slash 305 | , s_req_schema_slash_slash 306 | , s_req_server_start 307 | , s_req_server 308 | , s_req_server_with_at 309 | , s_req_path 310 | , s_req_query_string_start 311 | , s_req_query_string 312 | , s_req_fragment_start 313 | , s_req_fragment 314 | , s_req_http_start 315 | , s_req_http_H 316 | , s_req_http_HT 317 | , s_req_http_HTT 318 | , s_req_http_HTTP 319 | , s_req_first_http_major 320 | , s_req_http_major 321 | , s_req_first_http_minor 322 | , s_req_http_minor 323 | , s_req_line_almost_done 324 | 325 | , s_header_field_start 326 | , s_header_field 327 | , s_header_value_discard_ws 328 | , s_header_value_discard_ws_almost_done 329 | , s_header_value_discard_lws 330 | , s_header_value_start 331 | , s_header_value 332 | , s_header_value_lws 333 | 334 | , s_header_almost_done 335 | 336 | , s_chunk_size_start 337 | , s_chunk_size 338 | , s_chunk_parameters 339 | , s_chunk_size_almost_done 340 | 341 | , s_headers_almost_done 342 | , s_headers_done 343 | 344 | /* Important: 's_headers_done' must be the last 'header' state. All 345 | * states beyond this must be 'body' states. It is used for overflow 346 | * checking. See the PARSING_HEADER() macro. 347 | */ 348 | 349 | , s_chunk_data 350 | , s_chunk_data_almost_done 351 | , s_chunk_data_done 352 | 353 | , s_body_identity 354 | , s_body_identity_eof 355 | 356 | , s_message_done 357 | }; 358 | 359 | 360 | #define PARSING_HEADER(state) (state <= s_headers_done) 361 | 362 | 363 | enum header_states 364 | { h_general = 0 365 | , h_C 366 | , h_CO 367 | , h_CON 368 | 369 | , h_matching_connection 370 | , h_matching_proxy_connection 371 | , h_matching_content_length 372 | , h_matching_transfer_encoding 373 | , h_matching_upgrade 374 | 375 | , h_connection 376 | , h_content_length 377 | , h_transfer_encoding 378 | , h_upgrade 379 | 380 | , h_matching_transfer_encoding_chunked 381 | , h_matching_connection_token_start 382 | , h_matching_connection_keep_alive 383 | , h_matching_connection_close 384 | , h_matching_connection_upgrade 385 | , h_matching_connection_token 386 | 387 | , h_transfer_encoding_chunked 388 | , h_connection_keep_alive 389 | , h_connection_close 390 | , h_connection_upgrade 391 | }; 392 | 393 | enum http_host_state 394 | { 395 | s_http_host_dead = 1 396 | , s_http_userinfo_start 397 | , s_http_userinfo 398 | , s_http_host_start 399 | , s_http_host_v6_start 400 | , s_http_host 401 | , s_http_host_v6 402 | , s_http_host_v6_end 403 | , s_http_host_v6_zone_start 404 | , s_http_host_v6_zone 405 | , s_http_host_port_start 406 | , s_http_host_port 407 | }; 408 | 409 | /* Macros for character classes; depends on strict-mode */ 410 | #define CR '\r' 411 | #define LF '\n' 412 | #define LOWER(c) (unsigned char)(c | 0x20) 413 | #define IS_ALPHA(c) (LOWER(c) >= 'a' && LOWER(c) <= 'z') 414 | #define IS_NUM(c) ((c) >= '0' && (c) <= '9') 415 | #define IS_ALPHANUM(c) (IS_ALPHA(c) || IS_NUM(c)) 416 | #define IS_HEX(c) (IS_NUM(c) || (LOWER(c) >= 'a' && LOWER(c) <= 'f')) 417 | #define IS_MARK(c) ((c) == '-' || (c) == '_' || (c) == '.' || \ 418 | (c) == '!' || (c) == '~' || (c) == '*' || (c) == '\'' || (c) == '(' || \ 419 | (c) == ')') 420 | #define IS_USERINFO_CHAR(c) (IS_ALPHANUM(c) || IS_MARK(c) || (c) == '%' || \ 421 | (c) == ';' || (c) == ':' || (c) == '&' || (c) == '=' || (c) == '+' || \ 422 | (c) == '$' || (c) == ',') 423 | 424 | #define STRICT_TOKEN(c) (tokens[(unsigned char)c]) 425 | 426 | #if HTTP_PARSER_STRICT 427 | #define TOKEN(c) (tokens[(unsigned char)c]) 428 | #define IS_URL_CHAR(c) (BIT_AT(normal_url_char, (unsigned char)c)) 429 | #define IS_HOST_CHAR(c) (IS_ALPHANUM(c) || (c) == '.' || (c) == '-') 430 | #else 431 | #define TOKEN(c) ((c == ' ') ? ' ' : tokens[(unsigned char)c]) 432 | #define IS_URL_CHAR(c) \ 433 | (BIT_AT(normal_url_char, (unsigned char)c) || ((c) & 0x80)) 434 | #define IS_HOST_CHAR(c) \ 435 | (IS_ALPHANUM(c) || (c) == '.' || (c) == '-' || (c) == '_') 436 | #endif 437 | 438 | /** 439 | * Verify that a char is a valid visible (printable) US-ASCII 440 | * character or %x80-FF 441 | **/ 442 | #define IS_HEADER_CHAR(ch) \ 443 | (ch == CR || ch == LF || ch == 9 || ((unsigned char)ch > 31 && ch != 127)) 444 | 445 | #define start_state (parser->type == HTTP_REQUEST ? s_start_req : s_start_res) 446 | 447 | 448 | #if HTTP_PARSER_STRICT 449 | # define STRICT_CHECK(cond) \ 450 | do { \ 451 | if (cond) { \ 452 | SET_ERRNO(HPE_STRICT); \ 453 | goto error; \ 454 | } \ 455 | } while (0) 456 | # define NEW_MESSAGE() (http_should_keep_alive(parser) ? start_state : s_dead) 457 | #else 458 | # define STRICT_CHECK(cond) 459 | # define NEW_MESSAGE() start_state 460 | #endif 461 | 462 | 463 | /* Map errno values to strings for human-readable output */ 464 | #define HTTP_STRERROR_GEN(n, s) { "HPE_" #n, s }, 465 | static struct { 466 | const char *name; 467 | const char *description; 468 | } http_strerror_tab[] = { 469 | HTTP_ERRNO_MAP(HTTP_STRERROR_GEN) 470 | }; 471 | #undef HTTP_STRERROR_GEN 472 | 473 | int http_message_needs_eof(const http_parser *parser); 474 | 475 | /* Our URL parser. 476 | * 477 | * This is designed to be shared by http_parser_execute() for URL validation, 478 | * hence it has a state transition + byte-for-byte interface. In addition, it 479 | * is meant to be embedded in http_parser_parse_url(), which does the dirty 480 | * work of turning state transitions URL components for its API. 481 | * 482 | * This function should only be invoked with non-space characters. It is 483 | * assumed that the caller cares about (and can detect) the transition between 484 | * URL and non-URL states by looking for these. 485 | */ 486 | static enum state 487 | parse_url_char(enum state s, const char ch) 488 | { 489 | if (ch == ' ' || ch == '\r' || ch == '\n') { 490 | return s_dead; 491 | } 492 | 493 | #if HTTP_PARSER_STRICT 494 | if (ch == '\t' || ch == '\f') { 495 | return s_dead; 496 | } 497 | #endif 498 | 499 | switch (s) { 500 | case s_req_spaces_before_url: 501 | /* Proxied requests are followed by scheme of an absolute URI (alpha). 502 | * All methods except CONNECT are followed by '/' or '*'. 503 | */ 504 | 505 | if (ch == '/' || ch == '*') { 506 | return s_req_path; 507 | } 508 | 509 | if (IS_ALPHA(ch)) { 510 | return s_req_schema; 511 | } 512 | 513 | break; 514 | 515 | case s_req_schema: 516 | if (IS_ALPHA(ch)) { 517 | return s; 518 | } 519 | 520 | if (ch == ':') { 521 | return s_req_schema_slash; 522 | } 523 | 524 | break; 525 | 526 | case s_req_schema_slash: 527 | if (ch == '/') { 528 | return s_req_schema_slash_slash; 529 | } 530 | 531 | break; 532 | 533 | case s_req_schema_slash_slash: 534 | if (ch == '/') { 535 | return s_req_server_start; 536 | } 537 | 538 | break; 539 | 540 | case s_req_server_with_at: 541 | if (ch == '@') { 542 | return s_dead; 543 | } 544 | 545 | /* FALLTHROUGH */ 546 | case s_req_server_start: 547 | case s_req_server: 548 | if (ch == '/') { 549 | return s_req_path; 550 | } 551 | 552 | if (ch == '?') { 553 | return s_req_query_string_start; 554 | } 555 | 556 | if (ch == '@') { 557 | return s_req_server_with_at; 558 | } 559 | 560 | if (IS_USERINFO_CHAR(ch) || ch == '[' || ch == ']') { 561 | return s_req_server; 562 | } 563 | 564 | break; 565 | 566 | case s_req_path: 567 | if (IS_URL_CHAR(ch)) { 568 | return s; 569 | } 570 | 571 | switch (ch) { 572 | case '?': 573 | return s_req_query_string_start; 574 | 575 | case '#': 576 | return s_req_fragment_start; 577 | } 578 | 579 | break; 580 | 581 | case s_req_query_string_start: 582 | case s_req_query_string: 583 | if (IS_URL_CHAR(ch)) { 584 | return s_req_query_string; 585 | } 586 | 587 | switch (ch) { 588 | case '?': 589 | /* allow extra '?' in query string */ 590 | return s_req_query_string; 591 | 592 | case '#': 593 | return s_req_fragment_start; 594 | } 595 | 596 | break; 597 | 598 | case s_req_fragment_start: 599 | if (IS_URL_CHAR(ch)) { 600 | return s_req_fragment; 601 | } 602 | 603 | switch (ch) { 604 | case '?': 605 | return s_req_fragment; 606 | 607 | case '#': 608 | return s; 609 | } 610 | 611 | break; 612 | 613 | case s_req_fragment: 614 | if (IS_URL_CHAR(ch)) { 615 | return s; 616 | } 617 | 618 | switch (ch) { 619 | case '?': 620 | case '#': 621 | return s; 622 | } 623 | 624 | break; 625 | 626 | default: 627 | break; 628 | } 629 | 630 | /* We should never fall out of the switch above unless there's an error */ 631 | return s_dead; 632 | } 633 | 634 | size_t http_parser_execute (http_parser *parser, 635 | const http_parser_settings *settings, 636 | const char *data, 637 | size_t len) 638 | { 639 | char c, ch; 640 | int8_t unhex_val; 641 | const char *p = data; 642 | const char *header_field_mark = 0; 643 | const char *header_value_mark = 0; 644 | const char *url_mark = 0; 645 | const char *body_mark = 0; 646 | const char *status_mark = 0; 647 | enum state p_state = (enum state) parser->state; 648 | const unsigned int lenient = parser->lenient_http_headers; 649 | 650 | /* We're in an error state. Don't bother doing anything. */ 651 | if (HTTP_PARSER_ERRNO(parser) != HPE_OK) { 652 | return 0; 653 | } 654 | 655 | if (len == 0) { 656 | switch (CURRENT_STATE()) { 657 | case s_body_identity_eof: 658 | /* Use of CALLBACK_NOTIFY() here would erroneously return 1 byte read if 659 | * we got paused. 660 | */ 661 | CALLBACK_NOTIFY_NOADVANCE(message_complete); 662 | return 0; 663 | 664 | case s_dead: 665 | case s_start_req_or_res: 666 | case s_start_res: 667 | case s_start_req: 668 | return 0; 669 | 670 | default: 671 | SET_ERRNO(HPE_INVALID_EOF_STATE); 672 | return 1; 673 | } 674 | } 675 | 676 | 677 | if (CURRENT_STATE() == s_header_field) 678 | header_field_mark = data; 679 | if (CURRENT_STATE() == s_header_value) 680 | header_value_mark = data; 681 | switch (CURRENT_STATE()) { 682 | case s_req_path: 683 | case s_req_schema: 684 | case s_req_schema_slash: 685 | case s_req_schema_slash_slash: 686 | case s_req_server_start: 687 | case s_req_server: 688 | case s_req_server_with_at: 689 | case s_req_query_string_start: 690 | case s_req_query_string: 691 | case s_req_fragment_start: 692 | case s_req_fragment: 693 | url_mark = data; 694 | break; 695 | case s_res_status: 696 | status_mark = data; 697 | break; 698 | default: 699 | break; 700 | } 701 | 702 | for (p=data; p != data + len; p++) { 703 | ch = *p; 704 | 705 | if (PARSING_HEADER(CURRENT_STATE())) 706 | COUNT_HEADER_SIZE(1); 707 | 708 | reexecute: 709 | switch (CURRENT_STATE()) { 710 | 711 | case s_dead: 712 | /* this state is used after a 'Connection: close' message 713 | * the parser will error out if it reads another message 714 | */ 715 | if (LIKELY(ch == CR || ch == LF)) 716 | break; 717 | 718 | SET_ERRNO(HPE_CLOSED_CONNECTION); 719 | goto error; 720 | 721 | case s_start_req_or_res: 722 | { 723 | if (ch == CR || ch == LF) 724 | break; 725 | parser->flags = 0; 726 | parser->content_length = ULLONG_MAX; 727 | 728 | if (ch == 'H') { 729 | UPDATE_STATE(s_res_or_resp_H); 730 | 731 | CALLBACK_NOTIFY(message_begin); 732 | } else { 733 | parser->type = HTTP_REQUEST; 734 | UPDATE_STATE(s_start_req); 735 | REEXECUTE(); 736 | } 737 | 738 | break; 739 | } 740 | 741 | case s_res_or_resp_H: 742 | if (ch == 'T') { 743 | parser->type = HTTP_RESPONSE; 744 | UPDATE_STATE(s_res_HT); 745 | } else { 746 | if (UNLIKELY(ch != 'E')) { 747 | SET_ERRNO(HPE_INVALID_CONSTANT); 748 | goto error; 749 | } 750 | 751 | parser->type = HTTP_REQUEST; 752 | parser->method = HTTP_HEAD; 753 | parser->index = 2; 754 | UPDATE_STATE(s_req_method); 755 | } 756 | break; 757 | 758 | case s_start_res: 759 | { 760 | parser->flags = 0; 761 | parser->content_length = ULLONG_MAX; 762 | 763 | switch (ch) { 764 | case 'H': 765 | UPDATE_STATE(s_res_H); 766 | break; 767 | 768 | case CR: 769 | case LF: 770 | break; 771 | 772 | default: 773 | SET_ERRNO(HPE_INVALID_CONSTANT); 774 | goto error; 775 | } 776 | 777 | CALLBACK_NOTIFY(message_begin); 778 | break; 779 | } 780 | 781 | case s_res_H: 782 | STRICT_CHECK(ch != 'T'); 783 | UPDATE_STATE(s_res_HT); 784 | break; 785 | 786 | case s_res_HT: 787 | STRICT_CHECK(ch != 'T'); 788 | UPDATE_STATE(s_res_HTT); 789 | break; 790 | 791 | case s_res_HTT: 792 | STRICT_CHECK(ch != 'P'); 793 | UPDATE_STATE(s_res_HTTP); 794 | break; 795 | 796 | case s_res_HTTP: 797 | STRICT_CHECK(ch != '/'); 798 | UPDATE_STATE(s_res_first_http_major); 799 | break; 800 | 801 | case s_res_first_http_major: 802 | if (UNLIKELY(ch < '0' || ch > '9')) { 803 | SET_ERRNO(HPE_INVALID_VERSION); 804 | goto error; 805 | } 806 | 807 | parser->http_major = ch - '0'; 808 | UPDATE_STATE(s_res_http_major); 809 | break; 810 | 811 | /* major HTTP version or dot */ 812 | case s_res_http_major: 813 | { 814 | if (ch == '.') { 815 | UPDATE_STATE(s_res_first_http_minor); 816 | break; 817 | } 818 | 819 | if (!IS_NUM(ch)) { 820 | SET_ERRNO(HPE_INVALID_VERSION); 821 | goto error; 822 | } 823 | 824 | parser->http_major *= 10; 825 | parser->http_major += ch - '0'; 826 | 827 | if (UNLIKELY(parser->http_major > 999)) { 828 | SET_ERRNO(HPE_INVALID_VERSION); 829 | goto error; 830 | } 831 | 832 | break; 833 | } 834 | 835 | /* first digit of minor HTTP version */ 836 | case s_res_first_http_minor: 837 | if (UNLIKELY(!IS_NUM(ch))) { 838 | SET_ERRNO(HPE_INVALID_VERSION); 839 | goto error; 840 | } 841 | 842 | parser->http_minor = ch - '0'; 843 | UPDATE_STATE(s_res_http_minor); 844 | break; 845 | 846 | /* minor HTTP version or end of request line */ 847 | case s_res_http_minor: 848 | { 849 | if (ch == ' ') { 850 | UPDATE_STATE(s_res_first_status_code); 851 | break; 852 | } 853 | 854 | if (UNLIKELY(!IS_NUM(ch))) { 855 | SET_ERRNO(HPE_INVALID_VERSION); 856 | goto error; 857 | } 858 | 859 | parser->http_minor *= 10; 860 | parser->http_minor += ch - '0'; 861 | 862 | if (UNLIKELY(parser->http_minor > 999)) { 863 | SET_ERRNO(HPE_INVALID_VERSION); 864 | goto error; 865 | } 866 | 867 | break; 868 | } 869 | 870 | case s_res_first_status_code: 871 | { 872 | if (!IS_NUM(ch)) { 873 | if (ch == ' ') { 874 | break; 875 | } 876 | 877 | SET_ERRNO(HPE_INVALID_STATUS); 878 | goto error; 879 | } 880 | parser->status_code = ch - '0'; 881 | UPDATE_STATE(s_res_status_code); 882 | break; 883 | } 884 | 885 | case s_res_status_code: 886 | { 887 | if (!IS_NUM(ch)) { 888 | switch (ch) { 889 | case ' ': 890 | UPDATE_STATE(s_res_status_start); 891 | break; 892 | case CR: 893 | UPDATE_STATE(s_res_line_almost_done); 894 | break; 895 | case LF: 896 | UPDATE_STATE(s_header_field_start); 897 | break; 898 | default: 899 | SET_ERRNO(HPE_INVALID_STATUS); 900 | goto error; 901 | } 902 | break; 903 | } 904 | 905 | parser->status_code *= 10; 906 | parser->status_code += ch - '0'; 907 | 908 | if (UNLIKELY(parser->status_code > 999)) { 909 | SET_ERRNO(HPE_INVALID_STATUS); 910 | goto error; 911 | } 912 | 913 | break; 914 | } 915 | 916 | case s_res_status_start: 917 | { 918 | if (ch == CR) { 919 | UPDATE_STATE(s_res_line_almost_done); 920 | break; 921 | } 922 | 923 | if (ch == LF) { 924 | UPDATE_STATE(s_header_field_start); 925 | break; 926 | } 927 | 928 | MARK(status); 929 | UPDATE_STATE(s_res_status); 930 | parser->index = 0; 931 | break; 932 | } 933 | 934 | case s_res_status: 935 | if (ch == CR) { 936 | UPDATE_STATE(s_res_line_almost_done); 937 | CALLBACK_DATA(status); 938 | break; 939 | } 940 | 941 | if (ch == LF) { 942 | UPDATE_STATE(s_header_field_start); 943 | CALLBACK_DATA(status); 944 | break; 945 | } 946 | 947 | break; 948 | 949 | case s_res_line_almost_done: 950 | STRICT_CHECK(ch != LF); 951 | UPDATE_STATE(s_header_field_start); 952 | break; 953 | 954 | case s_start_req: 955 | { 956 | if (ch == CR || ch == LF) 957 | break; 958 | parser->flags = 0; 959 | parser->content_length = ULLONG_MAX; 960 | 961 | if (UNLIKELY(!IS_ALPHA(ch))) { 962 | SET_ERRNO(HPE_INVALID_METHOD); 963 | goto error; 964 | } 965 | 966 | parser->method = (enum http_method) 0; 967 | parser->index = 1; 968 | switch (ch) { 969 | case 'A': parser->method = HTTP_ACL; break; 970 | case 'B': parser->method = HTTP_BIND; break; 971 | case 'C': parser->method = HTTP_CONNECT; /* or COPY, CHECKOUT */ break; 972 | case 'D': parser->method = HTTP_DELETE; break; 973 | case 'G': parser->method = HTTP_GET; break; 974 | case 'H': parser->method = HTTP_HEAD; break; 975 | case 'L': parser->method = HTTP_LOCK; /* or LINK */ break; 976 | case 'M': parser->method = HTTP_MKCOL; /* or MOVE, MKACTIVITY, MERGE, M-SEARCH, MKCALENDAR */ break; 977 | case 'N': parser->method = HTTP_NOTIFY; break; 978 | case 'O': parser->method = HTTP_OPTIONS; break; 979 | case 'P': parser->method = HTTP_POST; 980 | /* or PROPFIND|PROPPATCH|PUT|PATCH|PURGE */ 981 | break; 982 | case 'R': parser->method = HTTP_REPORT; /* or REBIND */ break; 983 | case 'S': parser->method = HTTP_SUBSCRIBE; /* or SEARCH */ break; 984 | case 'T': parser->method = HTTP_TRACE; break; 985 | case 'U': parser->method = HTTP_UNLOCK; /* or UNSUBSCRIBE, UNBIND, UNLINK */ break; 986 | default: 987 | SET_ERRNO(HPE_INVALID_METHOD); 988 | goto error; 989 | } 990 | UPDATE_STATE(s_req_method); 991 | 992 | CALLBACK_NOTIFY(message_begin); 993 | 994 | break; 995 | } 996 | 997 | case s_req_method: 998 | { 999 | const char *matcher; 1000 | if (UNLIKELY(ch == '\0')) { 1001 | SET_ERRNO(HPE_INVALID_METHOD); 1002 | goto error; 1003 | } 1004 | 1005 | matcher = method_strings[parser->method]; 1006 | if (ch == ' ' && matcher[parser->index] == '\0') { 1007 | UPDATE_STATE(s_req_spaces_before_url); 1008 | } else if (ch == matcher[parser->index]) { 1009 | ; /* nada */ 1010 | } else if (IS_ALPHA(ch)) { 1011 | 1012 | switch (parser->method << 16 | parser->index << 8 | ch) { 1013 | #define XX(meth, pos, ch, new_meth) \ 1014 | case (HTTP_##meth << 16 | pos << 8 | ch): \ 1015 | parser->method = HTTP_##new_meth; break; 1016 | 1017 | XX(POST, 1, 'U', PUT) 1018 | XX(POST, 1, 'A', PATCH) 1019 | XX(CONNECT, 1, 'H', CHECKOUT) 1020 | XX(CONNECT, 2, 'P', COPY) 1021 | XX(MKCOL, 1, 'O', MOVE) 1022 | XX(MKCOL, 1, 'E', MERGE) 1023 | XX(MKCOL, 2, 'A', MKACTIVITY) 1024 | XX(MKCOL, 3, 'A', MKCALENDAR) 1025 | XX(SUBSCRIBE, 1, 'E', SEARCH) 1026 | XX(REPORT, 2, 'B', REBIND) 1027 | XX(POST, 1, 'R', PROPFIND) 1028 | XX(PROPFIND, 4, 'P', PROPPATCH) 1029 | XX(PUT, 2, 'R', PURGE) 1030 | XX(LOCK, 1, 'I', LINK) 1031 | XX(UNLOCK, 2, 'S', UNSUBSCRIBE) 1032 | XX(UNLOCK, 2, 'B', UNBIND) 1033 | XX(UNLOCK, 3, 'I', UNLINK) 1034 | #undef XX 1035 | 1036 | default: 1037 | SET_ERRNO(HPE_INVALID_METHOD); 1038 | goto error; 1039 | } 1040 | } else if (ch == '-' && 1041 | parser->index == 1 && 1042 | parser->method == HTTP_MKCOL) { 1043 | parser->method = HTTP_MSEARCH; 1044 | } else { 1045 | SET_ERRNO(HPE_INVALID_METHOD); 1046 | goto error; 1047 | } 1048 | 1049 | ++parser->index; 1050 | break; 1051 | } 1052 | 1053 | case s_req_spaces_before_url: 1054 | { 1055 | if (ch == ' ') break; 1056 | 1057 | MARK(url); 1058 | if (parser->method == HTTP_CONNECT) { 1059 | UPDATE_STATE(s_req_server_start); 1060 | } 1061 | 1062 | UPDATE_STATE(parse_url_char(CURRENT_STATE(), ch)); 1063 | if (UNLIKELY(CURRENT_STATE() == s_dead)) { 1064 | SET_ERRNO(HPE_INVALID_URL); 1065 | goto error; 1066 | } 1067 | 1068 | break; 1069 | } 1070 | 1071 | case s_req_schema: 1072 | case s_req_schema_slash: 1073 | case s_req_schema_slash_slash: 1074 | case s_req_server_start: 1075 | { 1076 | switch (ch) { 1077 | /* No whitespace allowed here */ 1078 | case ' ': 1079 | case CR: 1080 | case LF: 1081 | SET_ERRNO(HPE_INVALID_URL); 1082 | goto error; 1083 | default: 1084 | UPDATE_STATE(parse_url_char(CURRENT_STATE(), ch)); 1085 | if (UNLIKELY(CURRENT_STATE() == s_dead)) { 1086 | SET_ERRNO(HPE_INVALID_URL); 1087 | goto error; 1088 | } 1089 | } 1090 | 1091 | break; 1092 | } 1093 | 1094 | case s_req_server: 1095 | case s_req_server_with_at: 1096 | case s_req_path: 1097 | case s_req_query_string_start: 1098 | case s_req_query_string: 1099 | case s_req_fragment_start: 1100 | case s_req_fragment: 1101 | { 1102 | switch (ch) { 1103 | case ' ': 1104 | UPDATE_STATE(s_req_http_start); 1105 | CALLBACK_DATA(url); 1106 | break; 1107 | case CR: 1108 | case LF: 1109 | parser->http_major = 0; 1110 | parser->http_minor = 9; 1111 | UPDATE_STATE((ch == CR) ? 1112 | s_req_line_almost_done : 1113 | s_header_field_start); 1114 | CALLBACK_DATA(url); 1115 | break; 1116 | default: 1117 | UPDATE_STATE(parse_url_char(CURRENT_STATE(), ch)); 1118 | if (UNLIKELY(CURRENT_STATE() == s_dead)) { 1119 | SET_ERRNO(HPE_INVALID_URL); 1120 | goto error; 1121 | } 1122 | } 1123 | break; 1124 | } 1125 | 1126 | case s_req_http_start: 1127 | switch (ch) { 1128 | case 'H': 1129 | UPDATE_STATE(s_req_http_H); 1130 | break; 1131 | case ' ': 1132 | break; 1133 | default: 1134 | SET_ERRNO(HPE_INVALID_CONSTANT); 1135 | goto error; 1136 | } 1137 | break; 1138 | 1139 | case s_req_http_H: 1140 | STRICT_CHECK(ch != 'T'); 1141 | UPDATE_STATE(s_req_http_HT); 1142 | break; 1143 | 1144 | case s_req_http_HT: 1145 | STRICT_CHECK(ch != 'T'); 1146 | UPDATE_STATE(s_req_http_HTT); 1147 | break; 1148 | 1149 | case s_req_http_HTT: 1150 | STRICT_CHECK(ch != 'P'); 1151 | UPDATE_STATE(s_req_http_HTTP); 1152 | break; 1153 | 1154 | case s_req_http_HTTP: 1155 | STRICT_CHECK(ch != '/'); 1156 | UPDATE_STATE(s_req_first_http_major); 1157 | break; 1158 | 1159 | /* first digit of major HTTP version */ 1160 | case s_req_first_http_major: 1161 | if (UNLIKELY(ch < '1' || ch > '9')) { 1162 | SET_ERRNO(HPE_INVALID_VERSION); 1163 | goto error; 1164 | } 1165 | 1166 | parser->http_major = ch - '0'; 1167 | UPDATE_STATE(s_req_http_major); 1168 | break; 1169 | 1170 | /* major HTTP version or dot */ 1171 | case s_req_http_major: 1172 | { 1173 | if (ch == '.') { 1174 | UPDATE_STATE(s_req_first_http_minor); 1175 | break; 1176 | } 1177 | 1178 | if (UNLIKELY(!IS_NUM(ch))) { 1179 | SET_ERRNO(HPE_INVALID_VERSION); 1180 | goto error; 1181 | } 1182 | 1183 | parser->http_major *= 10; 1184 | parser->http_major += ch - '0'; 1185 | 1186 | if (UNLIKELY(parser->http_major > 999)) { 1187 | SET_ERRNO(HPE_INVALID_VERSION); 1188 | goto error; 1189 | } 1190 | 1191 | break; 1192 | } 1193 | 1194 | /* first digit of minor HTTP version */ 1195 | case s_req_first_http_minor: 1196 | if (UNLIKELY(!IS_NUM(ch))) { 1197 | SET_ERRNO(HPE_INVALID_VERSION); 1198 | goto error; 1199 | } 1200 | 1201 | parser->http_minor = ch - '0'; 1202 | UPDATE_STATE(s_req_http_minor); 1203 | break; 1204 | 1205 | /* minor HTTP version or end of request line */ 1206 | case s_req_http_minor: 1207 | { 1208 | if (ch == CR) { 1209 | UPDATE_STATE(s_req_line_almost_done); 1210 | break; 1211 | } 1212 | 1213 | if (ch == LF) { 1214 | UPDATE_STATE(s_header_field_start); 1215 | break; 1216 | } 1217 | 1218 | /* XXX allow spaces after digit? */ 1219 | 1220 | if (UNLIKELY(!IS_NUM(ch))) { 1221 | SET_ERRNO(HPE_INVALID_VERSION); 1222 | goto error; 1223 | } 1224 | 1225 | parser->http_minor *= 10; 1226 | parser->http_minor += ch - '0'; 1227 | 1228 | if (UNLIKELY(parser->http_minor > 999)) { 1229 | SET_ERRNO(HPE_INVALID_VERSION); 1230 | goto error; 1231 | } 1232 | 1233 | break; 1234 | } 1235 | 1236 | /* end of request line */ 1237 | case s_req_line_almost_done: 1238 | { 1239 | if (UNLIKELY(ch != LF)) { 1240 | SET_ERRNO(HPE_LF_EXPECTED); 1241 | goto error; 1242 | } 1243 | 1244 | UPDATE_STATE(s_header_field_start); 1245 | break; 1246 | } 1247 | 1248 | case s_header_field_start: 1249 | { 1250 | if (ch == CR) { 1251 | UPDATE_STATE(s_headers_almost_done); 1252 | break; 1253 | } 1254 | 1255 | if (ch == LF) { 1256 | /* they might be just sending \n instead of \r\n so this would be 1257 | * the second \n to denote the end of headers*/ 1258 | UPDATE_STATE(s_headers_almost_done); 1259 | REEXECUTE(); 1260 | } 1261 | 1262 | c = TOKEN(ch); 1263 | 1264 | if (UNLIKELY(!c)) { 1265 | SET_ERRNO(HPE_INVALID_HEADER_TOKEN); 1266 | goto error; 1267 | } 1268 | 1269 | MARK(header_field); 1270 | 1271 | parser->index = 0; 1272 | UPDATE_STATE(s_header_field); 1273 | 1274 | switch (c) { 1275 | case 'c': 1276 | parser->header_state = h_C; 1277 | break; 1278 | 1279 | case 'p': 1280 | parser->header_state = h_matching_proxy_connection; 1281 | break; 1282 | 1283 | case 't': 1284 | parser->header_state = h_matching_transfer_encoding; 1285 | break; 1286 | 1287 | case 'u': 1288 | parser->header_state = h_matching_upgrade; 1289 | break; 1290 | 1291 | default: 1292 | parser->header_state = h_general; 1293 | break; 1294 | } 1295 | break; 1296 | } 1297 | 1298 | case s_header_field: 1299 | { 1300 | const char* start = p; 1301 | for (; p != data + len; p++) { 1302 | ch = *p; 1303 | c = TOKEN(ch); 1304 | 1305 | if (!c) 1306 | break; 1307 | 1308 | switch (parser->header_state) { 1309 | case h_general: 1310 | break; 1311 | 1312 | case h_C: 1313 | parser->index++; 1314 | parser->header_state = (c == 'o' ? h_CO : h_general); 1315 | break; 1316 | 1317 | case h_CO: 1318 | parser->index++; 1319 | parser->header_state = (c == 'n' ? h_CON : h_general); 1320 | break; 1321 | 1322 | case h_CON: 1323 | parser->index++; 1324 | switch (c) { 1325 | case 'n': 1326 | parser->header_state = h_matching_connection; 1327 | break; 1328 | case 't': 1329 | parser->header_state = h_matching_content_length; 1330 | break; 1331 | default: 1332 | parser->header_state = h_general; 1333 | break; 1334 | } 1335 | break; 1336 | 1337 | /* connection */ 1338 | 1339 | case h_matching_connection: 1340 | parser->index++; 1341 | if (parser->index > sizeof(CONNECTION)-1 1342 | || c != CONNECTION[parser->index]) { 1343 | parser->header_state = h_general; 1344 | } else if (parser->index == sizeof(CONNECTION)-2) { 1345 | parser->header_state = h_connection; 1346 | } 1347 | break; 1348 | 1349 | /* proxy-connection */ 1350 | 1351 | case h_matching_proxy_connection: 1352 | parser->index++; 1353 | if (parser->index > sizeof(PROXY_CONNECTION)-1 1354 | || c != PROXY_CONNECTION[parser->index]) { 1355 | parser->header_state = h_general; 1356 | } else if (parser->index == sizeof(PROXY_CONNECTION)-2) { 1357 | parser->header_state = h_connection; 1358 | } 1359 | break; 1360 | 1361 | /* content-length */ 1362 | 1363 | case h_matching_content_length: 1364 | parser->index++; 1365 | if (parser->index > sizeof(CONTENT_LENGTH)-1 1366 | || c != CONTENT_LENGTH[parser->index]) { 1367 | parser->header_state = h_general; 1368 | } else if (parser->index == sizeof(CONTENT_LENGTH)-2) { 1369 | parser->header_state = h_content_length; 1370 | } 1371 | break; 1372 | 1373 | /* transfer-encoding */ 1374 | 1375 | case h_matching_transfer_encoding: 1376 | parser->index++; 1377 | if (parser->index > sizeof(TRANSFER_ENCODING)-1 1378 | || c != TRANSFER_ENCODING[parser->index]) { 1379 | parser->header_state = h_general; 1380 | } else if (parser->index == sizeof(TRANSFER_ENCODING)-2) { 1381 | parser->header_state = h_transfer_encoding; 1382 | } 1383 | break; 1384 | 1385 | /* upgrade */ 1386 | 1387 | case h_matching_upgrade: 1388 | parser->index++; 1389 | if (parser->index > sizeof(UPGRADE)-1 1390 | || c != UPGRADE[parser->index]) { 1391 | parser->header_state = h_general; 1392 | } else if (parser->index == sizeof(UPGRADE)-2) { 1393 | parser->header_state = h_upgrade; 1394 | } 1395 | break; 1396 | 1397 | case h_connection: 1398 | case h_content_length: 1399 | case h_transfer_encoding: 1400 | case h_upgrade: 1401 | if (ch != ' ') parser->header_state = h_general; 1402 | break; 1403 | 1404 | default: 1405 | assert(0 && "Unknown header_state"); 1406 | break; 1407 | } 1408 | } 1409 | 1410 | COUNT_HEADER_SIZE(p - start); 1411 | 1412 | if (p == data + len) { 1413 | --p; 1414 | break; 1415 | } 1416 | 1417 | if (ch == ':') { 1418 | UPDATE_STATE(s_header_value_discard_ws); 1419 | CALLBACK_DATA(header_field); 1420 | break; 1421 | } 1422 | 1423 | SET_ERRNO(HPE_INVALID_HEADER_TOKEN); 1424 | goto error; 1425 | } 1426 | 1427 | case s_header_value_discard_ws: 1428 | if (ch == ' ' || ch == '\t') break; 1429 | 1430 | if (ch == CR) { 1431 | UPDATE_STATE(s_header_value_discard_ws_almost_done); 1432 | break; 1433 | } 1434 | 1435 | if (ch == LF) { 1436 | UPDATE_STATE(s_header_value_discard_lws); 1437 | break; 1438 | } 1439 | 1440 | /* FALLTHROUGH */ 1441 | 1442 | case s_header_value_start: 1443 | { 1444 | MARK(header_value); 1445 | 1446 | UPDATE_STATE(s_header_value); 1447 | parser->index = 0; 1448 | 1449 | c = LOWER(ch); 1450 | 1451 | switch (parser->header_state) { 1452 | case h_upgrade: 1453 | parser->flags |= F_UPGRADE; 1454 | parser->header_state = h_general; 1455 | break; 1456 | 1457 | case h_transfer_encoding: 1458 | /* looking for 'Transfer-Encoding: chunked' */ 1459 | if ('c' == c) { 1460 | parser->header_state = h_matching_transfer_encoding_chunked; 1461 | } else { 1462 | parser->header_state = h_general; 1463 | } 1464 | break; 1465 | 1466 | case h_content_length: 1467 | if (UNLIKELY(!IS_NUM(ch))) { 1468 | SET_ERRNO(HPE_INVALID_CONTENT_LENGTH); 1469 | goto error; 1470 | } 1471 | 1472 | if (parser->flags & F_CONTENTLENGTH) { 1473 | SET_ERRNO(HPE_UNEXPECTED_CONTENT_LENGTH); 1474 | goto error; 1475 | } 1476 | 1477 | parser->flags |= F_CONTENTLENGTH; 1478 | parser->content_length = ch - '0'; 1479 | break; 1480 | 1481 | case h_connection: 1482 | /* looking for 'Connection: keep-alive' */ 1483 | if (c == 'k') { 1484 | parser->header_state = h_matching_connection_keep_alive; 1485 | /* looking for 'Connection: close' */ 1486 | } else if (c == 'c') { 1487 | parser->header_state = h_matching_connection_close; 1488 | } else if (c == 'u') { 1489 | parser->header_state = h_matching_connection_upgrade; 1490 | } else { 1491 | parser->header_state = h_matching_connection_token; 1492 | } 1493 | break; 1494 | 1495 | /* Multi-value `Connection` header */ 1496 | case h_matching_connection_token_start: 1497 | break; 1498 | 1499 | default: 1500 | parser->header_state = h_general; 1501 | break; 1502 | } 1503 | break; 1504 | } 1505 | 1506 | case s_header_value: 1507 | { 1508 | const char* start = p; 1509 | enum header_states h_state = (enum header_states) parser->header_state; 1510 | for (; p != data + len; p++) { 1511 | ch = *p; 1512 | if (ch == CR) { 1513 | UPDATE_STATE(s_header_almost_done); 1514 | parser->header_state = h_state; 1515 | CALLBACK_DATA(header_value); 1516 | break; 1517 | } 1518 | 1519 | if (ch == LF) { 1520 | UPDATE_STATE(s_header_almost_done); 1521 | COUNT_HEADER_SIZE(p - start); 1522 | parser->header_state = h_state; 1523 | CALLBACK_DATA_NOADVANCE(header_value); 1524 | REEXECUTE(); 1525 | } 1526 | 1527 | if (!lenient && !IS_HEADER_CHAR(ch)) { 1528 | SET_ERRNO(HPE_INVALID_HEADER_TOKEN); 1529 | goto error; 1530 | } 1531 | 1532 | c = LOWER(ch); 1533 | 1534 | switch (h_state) { 1535 | case h_general: 1536 | { 1537 | const char* p_cr; 1538 | const char* p_lf; 1539 | size_t limit = data + len - p; 1540 | 1541 | limit = MIN(limit, HTTP_MAX_HEADER_SIZE); 1542 | 1543 | p_cr = (const char*) memchr(p, CR, limit); 1544 | p_lf = (const char*) memchr(p, LF, limit); 1545 | if (p_cr != NULL) { 1546 | if (p_lf != NULL && p_cr >= p_lf) 1547 | p = p_lf; 1548 | else 1549 | p = p_cr; 1550 | } else if (UNLIKELY(p_lf != NULL)) { 1551 | p = p_lf; 1552 | } else { 1553 | p = data + len; 1554 | } 1555 | --p; 1556 | 1557 | break; 1558 | } 1559 | 1560 | case h_connection: 1561 | case h_transfer_encoding: 1562 | assert(0 && "Shouldn't get here."); 1563 | break; 1564 | 1565 | case h_content_length: 1566 | { 1567 | uint64_t t; 1568 | 1569 | if (ch == ' ') break; 1570 | 1571 | if (UNLIKELY(!IS_NUM(ch))) { 1572 | SET_ERRNO(HPE_INVALID_CONTENT_LENGTH); 1573 | parser->header_state = h_state; 1574 | goto error; 1575 | } 1576 | 1577 | t = parser->content_length; 1578 | t *= 10; 1579 | t += ch - '0'; 1580 | 1581 | /* Overflow? Test against a conservative limit for simplicity. */ 1582 | if (UNLIKELY((ULLONG_MAX - 10) / 10 < parser->content_length)) { 1583 | SET_ERRNO(HPE_INVALID_CONTENT_LENGTH); 1584 | parser->header_state = h_state; 1585 | goto error; 1586 | } 1587 | 1588 | parser->content_length = t; 1589 | break; 1590 | } 1591 | 1592 | /* Transfer-Encoding: chunked */ 1593 | case h_matching_transfer_encoding_chunked: 1594 | parser->index++; 1595 | if (parser->index > sizeof(CHUNKED)-1 1596 | || c != CHUNKED[parser->index]) { 1597 | h_state = h_general; 1598 | } else if (parser->index == sizeof(CHUNKED)-2) { 1599 | h_state = h_transfer_encoding_chunked; 1600 | } 1601 | break; 1602 | 1603 | case h_matching_connection_token_start: 1604 | /* looking for 'Connection: keep-alive' */ 1605 | if (c == 'k') { 1606 | h_state = h_matching_connection_keep_alive; 1607 | /* looking for 'Connection: close' */ 1608 | } else if (c == 'c') { 1609 | h_state = h_matching_connection_close; 1610 | } else if (c == 'u') { 1611 | h_state = h_matching_connection_upgrade; 1612 | } else if (STRICT_TOKEN(c)) { 1613 | h_state = h_matching_connection_token; 1614 | } else if (c == ' ' || c == '\t') { 1615 | /* Skip lws */ 1616 | } else { 1617 | h_state = h_general; 1618 | } 1619 | break; 1620 | 1621 | /* looking for 'Connection: keep-alive' */ 1622 | case h_matching_connection_keep_alive: 1623 | parser->index++; 1624 | if (parser->index > sizeof(KEEP_ALIVE)-1 1625 | || c != KEEP_ALIVE[parser->index]) { 1626 | h_state = h_matching_connection_token; 1627 | } else if (parser->index == sizeof(KEEP_ALIVE)-2) { 1628 | h_state = h_connection_keep_alive; 1629 | } 1630 | break; 1631 | 1632 | /* looking for 'Connection: close' */ 1633 | case h_matching_connection_close: 1634 | parser->index++; 1635 | if (parser->index > sizeof(CLOSE)-1 || c != CLOSE[parser->index]) { 1636 | h_state = h_matching_connection_token; 1637 | } else if (parser->index == sizeof(CLOSE)-2) { 1638 | h_state = h_connection_close; 1639 | } 1640 | break; 1641 | 1642 | /* looking for 'Connection: upgrade' */ 1643 | case h_matching_connection_upgrade: 1644 | parser->index++; 1645 | if (parser->index > sizeof(UPGRADE) - 1 || 1646 | c != UPGRADE[parser->index]) { 1647 | h_state = h_matching_connection_token; 1648 | } else if (parser->index == sizeof(UPGRADE)-2) { 1649 | h_state = h_connection_upgrade; 1650 | } 1651 | break; 1652 | 1653 | case h_matching_connection_token: 1654 | if (ch == ',') { 1655 | h_state = h_matching_connection_token_start; 1656 | parser->index = 0; 1657 | } 1658 | break; 1659 | 1660 | case h_transfer_encoding_chunked: 1661 | if (ch != ' ') h_state = h_general; 1662 | break; 1663 | 1664 | case h_connection_keep_alive: 1665 | case h_connection_close: 1666 | case h_connection_upgrade: 1667 | if (ch == ',') { 1668 | if (h_state == h_connection_keep_alive) { 1669 | parser->flags |= F_CONNECTION_KEEP_ALIVE; 1670 | } else if (h_state == h_connection_close) { 1671 | parser->flags |= F_CONNECTION_CLOSE; 1672 | } else if (h_state == h_connection_upgrade) { 1673 | parser->flags |= F_CONNECTION_UPGRADE; 1674 | } 1675 | h_state = h_matching_connection_token_start; 1676 | parser->index = 0; 1677 | } else if (ch != ' ') { 1678 | h_state = h_matching_connection_token; 1679 | } 1680 | break; 1681 | 1682 | default: 1683 | UPDATE_STATE(s_header_value); 1684 | h_state = h_general; 1685 | break; 1686 | } 1687 | } 1688 | parser->header_state = h_state; 1689 | 1690 | COUNT_HEADER_SIZE(p - start); 1691 | 1692 | if (p == data + len) 1693 | --p; 1694 | break; 1695 | } 1696 | 1697 | case s_header_almost_done: 1698 | { 1699 | if (UNLIKELY(ch != LF)) { 1700 | SET_ERRNO(HPE_LF_EXPECTED); 1701 | goto error; 1702 | } 1703 | 1704 | UPDATE_STATE(s_header_value_lws); 1705 | break; 1706 | } 1707 | 1708 | case s_header_value_lws: 1709 | { 1710 | if (ch == ' ' || ch == '\t') { 1711 | UPDATE_STATE(s_header_value_start); 1712 | REEXECUTE(); 1713 | } 1714 | 1715 | /* finished the header */ 1716 | switch (parser->header_state) { 1717 | case h_connection_keep_alive: 1718 | parser->flags |= F_CONNECTION_KEEP_ALIVE; 1719 | break; 1720 | case h_connection_close: 1721 | parser->flags |= F_CONNECTION_CLOSE; 1722 | break; 1723 | case h_transfer_encoding_chunked: 1724 | parser->flags |= F_CHUNKED; 1725 | break; 1726 | case h_connection_upgrade: 1727 | parser->flags |= F_CONNECTION_UPGRADE; 1728 | break; 1729 | default: 1730 | break; 1731 | } 1732 | 1733 | UPDATE_STATE(s_header_field_start); 1734 | REEXECUTE(); 1735 | } 1736 | 1737 | case s_header_value_discard_ws_almost_done: 1738 | { 1739 | STRICT_CHECK(ch != LF); 1740 | UPDATE_STATE(s_header_value_discard_lws); 1741 | break; 1742 | } 1743 | 1744 | case s_header_value_discard_lws: 1745 | { 1746 | if (ch == ' ' || ch == '\t') { 1747 | UPDATE_STATE(s_header_value_discard_ws); 1748 | break; 1749 | } else { 1750 | switch (parser->header_state) { 1751 | case h_connection_keep_alive: 1752 | parser->flags |= F_CONNECTION_KEEP_ALIVE; 1753 | break; 1754 | case h_connection_close: 1755 | parser->flags |= F_CONNECTION_CLOSE; 1756 | break; 1757 | case h_connection_upgrade: 1758 | parser->flags |= F_CONNECTION_UPGRADE; 1759 | break; 1760 | case h_transfer_encoding_chunked: 1761 | parser->flags |= F_CHUNKED; 1762 | break; 1763 | default: 1764 | break; 1765 | } 1766 | 1767 | /* header value was empty */ 1768 | MARK(header_value); 1769 | UPDATE_STATE(s_header_field_start); 1770 | CALLBACK_DATA_NOADVANCE(header_value); 1771 | REEXECUTE(); 1772 | } 1773 | } 1774 | 1775 | case s_headers_almost_done: 1776 | { 1777 | STRICT_CHECK(ch != LF); 1778 | 1779 | if (parser->flags & F_TRAILING) { 1780 | /* End of a chunked request */ 1781 | UPDATE_STATE(s_message_done); 1782 | CALLBACK_NOTIFY_NOADVANCE(chunk_complete); 1783 | REEXECUTE(); 1784 | } 1785 | 1786 | /* Cannot use chunked encoding and a content-length header together 1787 | per the HTTP specification. */ 1788 | if ((parser->flags & F_CHUNKED) && 1789 | (parser->flags & F_CONTENTLENGTH)) { 1790 | SET_ERRNO(HPE_UNEXPECTED_CONTENT_LENGTH); 1791 | goto error; 1792 | } 1793 | 1794 | UPDATE_STATE(s_headers_done); 1795 | 1796 | /* Set this here so that on_headers_complete() callbacks can see it */ 1797 | parser->upgrade = 1798 | ((parser->flags & (F_UPGRADE | F_CONNECTION_UPGRADE)) == 1799 | (F_UPGRADE | F_CONNECTION_UPGRADE) || 1800 | parser->method == HTTP_CONNECT); 1801 | 1802 | /* Here we call the headers_complete callback. This is somewhat 1803 | * different than other callbacks because if the user returns 1, we 1804 | * will interpret that as saying that this message has no body. This 1805 | * is needed for the annoying case of recieving a response to a HEAD 1806 | * request. 1807 | * 1808 | * We'd like to use CALLBACK_NOTIFY_NOADVANCE() here but we cannot, so 1809 | * we have to simulate it by handling a change in errno below. 1810 | */ 1811 | if (settings->on_headers_complete) { 1812 | switch (settings->on_headers_complete(parser)) { 1813 | case 0: 1814 | break; 1815 | 1816 | case 2: 1817 | parser->upgrade = 1; 1818 | 1819 | case 1: 1820 | parser->flags |= F_SKIPBODY; 1821 | break; 1822 | 1823 | default: 1824 | SET_ERRNO(HPE_CB_headers_complete); 1825 | RETURN(p - data); /* Error */ 1826 | } 1827 | } 1828 | 1829 | if (HTTP_PARSER_ERRNO(parser) != HPE_OK) { 1830 | RETURN(p - data); 1831 | } 1832 | 1833 | REEXECUTE(); 1834 | } 1835 | 1836 | case s_headers_done: 1837 | { 1838 | int hasBody; 1839 | STRICT_CHECK(ch != LF); 1840 | 1841 | parser->nread = 0; 1842 | 1843 | hasBody = parser->flags & F_CHUNKED || 1844 | (parser->content_length > 0 && parser->content_length != ULLONG_MAX); 1845 | if (parser->upgrade && (parser->method == HTTP_CONNECT || 1846 | (parser->flags & F_SKIPBODY) || !hasBody)) { 1847 | /* Exit, the rest of the message is in a different protocol. */ 1848 | UPDATE_STATE(NEW_MESSAGE()); 1849 | CALLBACK_NOTIFY(message_complete); 1850 | RETURN((p - data) + 1); 1851 | } 1852 | 1853 | if (parser->flags & F_SKIPBODY) { 1854 | UPDATE_STATE(NEW_MESSAGE()); 1855 | CALLBACK_NOTIFY(message_complete); 1856 | } else if (parser->flags & F_CHUNKED) { 1857 | /* chunked encoding - ignore Content-Length header */ 1858 | UPDATE_STATE(s_chunk_size_start); 1859 | } else { 1860 | if (parser->content_length == 0) { 1861 | /* Content-Length header given but zero: Content-Length: 0\r\n */ 1862 | UPDATE_STATE(NEW_MESSAGE()); 1863 | CALLBACK_NOTIFY(message_complete); 1864 | } else if (parser->content_length != ULLONG_MAX) { 1865 | /* Content-Length header given and non-zero */ 1866 | UPDATE_STATE(s_body_identity); 1867 | } else { 1868 | if (!http_message_needs_eof(parser)) { 1869 | /* Assume content-length 0 - read the next */ 1870 | UPDATE_STATE(NEW_MESSAGE()); 1871 | CALLBACK_NOTIFY(message_complete); 1872 | } else { 1873 | /* Read body until EOF */ 1874 | UPDATE_STATE(s_body_identity_eof); 1875 | } 1876 | } 1877 | } 1878 | 1879 | break; 1880 | } 1881 | 1882 | case s_body_identity: 1883 | { 1884 | uint64_t to_read = MIN(parser->content_length, 1885 | (uint64_t) ((data + len) - p)); 1886 | 1887 | assert(parser->content_length != 0 1888 | && parser->content_length != ULLONG_MAX); 1889 | 1890 | /* The difference between advancing content_length and p is because 1891 | * the latter will automaticaly advance on the next loop iteration. 1892 | * Further, if content_length ends up at 0, we want to see the last 1893 | * byte again for our message complete callback. 1894 | */ 1895 | MARK(body); 1896 | parser->content_length -= to_read; 1897 | p += to_read - 1; 1898 | 1899 | if (parser->content_length == 0) { 1900 | UPDATE_STATE(s_message_done); 1901 | 1902 | /* Mimic CALLBACK_DATA_NOADVANCE() but with one extra byte. 1903 | * 1904 | * The alternative to doing this is to wait for the next byte to 1905 | * trigger the data callback, just as in every other case. The 1906 | * problem with this is that this makes it difficult for the test 1907 | * harness to distinguish between complete-on-EOF and 1908 | * complete-on-length. It's not clear that this distinction is 1909 | * important for applications, but let's keep it for now. 1910 | */ 1911 | CALLBACK_DATA_(body, p - body_mark + 1, p - data); 1912 | REEXECUTE(); 1913 | } 1914 | 1915 | break; 1916 | } 1917 | 1918 | /* read until EOF */ 1919 | case s_body_identity_eof: 1920 | MARK(body); 1921 | p = data + len - 1; 1922 | 1923 | break; 1924 | 1925 | case s_message_done: 1926 | UPDATE_STATE(NEW_MESSAGE()); 1927 | CALLBACK_NOTIFY(message_complete); 1928 | if (parser->upgrade) { 1929 | /* Exit, the rest of the message is in a different protocol. */ 1930 | RETURN((p - data) + 1); 1931 | } 1932 | break; 1933 | 1934 | case s_chunk_size_start: 1935 | { 1936 | assert(parser->nread == 1); 1937 | assert(parser->flags & F_CHUNKED); 1938 | 1939 | unhex_val = unhex[(unsigned char)ch]; 1940 | if (UNLIKELY(unhex_val == -1)) { 1941 | SET_ERRNO(HPE_INVALID_CHUNK_SIZE); 1942 | goto error; 1943 | } 1944 | 1945 | parser->content_length = unhex_val; 1946 | UPDATE_STATE(s_chunk_size); 1947 | break; 1948 | } 1949 | 1950 | case s_chunk_size: 1951 | { 1952 | uint64_t t; 1953 | 1954 | assert(parser->flags & F_CHUNKED); 1955 | 1956 | if (ch == CR) { 1957 | UPDATE_STATE(s_chunk_size_almost_done); 1958 | break; 1959 | } 1960 | 1961 | unhex_val = unhex[(unsigned char)ch]; 1962 | 1963 | if (unhex_val == -1) { 1964 | if (ch == ';' || ch == ' ') { 1965 | UPDATE_STATE(s_chunk_parameters); 1966 | break; 1967 | } 1968 | 1969 | SET_ERRNO(HPE_INVALID_CHUNK_SIZE); 1970 | goto error; 1971 | } 1972 | 1973 | t = parser->content_length; 1974 | t *= 16; 1975 | t += unhex_val; 1976 | 1977 | /* Overflow? Test against a conservative limit for simplicity. */ 1978 | if (UNLIKELY((ULLONG_MAX - 16) / 16 < parser->content_length)) { 1979 | SET_ERRNO(HPE_INVALID_CONTENT_LENGTH); 1980 | goto error; 1981 | } 1982 | 1983 | parser->content_length = t; 1984 | break; 1985 | } 1986 | 1987 | case s_chunk_parameters: 1988 | { 1989 | assert(parser->flags & F_CHUNKED); 1990 | /* just ignore this shit. TODO check for overflow */ 1991 | if (ch == CR) { 1992 | UPDATE_STATE(s_chunk_size_almost_done); 1993 | break; 1994 | } 1995 | break; 1996 | } 1997 | 1998 | case s_chunk_size_almost_done: 1999 | { 2000 | assert(parser->flags & F_CHUNKED); 2001 | STRICT_CHECK(ch != LF); 2002 | 2003 | parser->nread = 0; 2004 | 2005 | if (parser->content_length == 0) { 2006 | parser->flags |= F_TRAILING; 2007 | UPDATE_STATE(s_header_field_start); 2008 | } else { 2009 | UPDATE_STATE(s_chunk_data); 2010 | } 2011 | CALLBACK_NOTIFY(chunk_header); 2012 | break; 2013 | } 2014 | 2015 | case s_chunk_data: 2016 | { 2017 | uint64_t to_read = MIN(parser->content_length, 2018 | (uint64_t) ((data + len) - p)); 2019 | 2020 | assert(parser->flags & F_CHUNKED); 2021 | assert(parser->content_length != 0 2022 | && parser->content_length != ULLONG_MAX); 2023 | 2024 | /* See the explanation in s_body_identity for why the content 2025 | * length and data pointers are managed this way. 2026 | */ 2027 | MARK(body); 2028 | parser->content_length -= to_read; 2029 | p += to_read - 1; 2030 | 2031 | if (parser->content_length == 0) { 2032 | UPDATE_STATE(s_chunk_data_almost_done); 2033 | } 2034 | 2035 | break; 2036 | } 2037 | 2038 | case s_chunk_data_almost_done: 2039 | assert(parser->flags & F_CHUNKED); 2040 | assert(parser->content_length == 0); 2041 | STRICT_CHECK(ch != CR); 2042 | UPDATE_STATE(s_chunk_data_done); 2043 | CALLBACK_DATA(body); 2044 | break; 2045 | 2046 | case s_chunk_data_done: 2047 | assert(parser->flags & F_CHUNKED); 2048 | STRICT_CHECK(ch != LF); 2049 | parser->nread = 0; 2050 | UPDATE_STATE(s_chunk_size_start); 2051 | CALLBACK_NOTIFY(chunk_complete); 2052 | break; 2053 | 2054 | default: 2055 | assert(0 && "unhandled state"); 2056 | SET_ERRNO(HPE_INVALID_INTERNAL_STATE); 2057 | goto error; 2058 | } 2059 | } 2060 | 2061 | /* Run callbacks for any marks that we have leftover after we ran our of 2062 | * bytes. There should be at most one of these set, so it's OK to invoke 2063 | * them in series (unset marks will not result in callbacks). 2064 | * 2065 | * We use the NOADVANCE() variety of callbacks here because 'p' has already 2066 | * overflowed 'data' and this allows us to correct for the off-by-one that 2067 | * we'd otherwise have (since CALLBACK_DATA() is meant to be run with a 'p' 2068 | * value that's in-bounds). 2069 | */ 2070 | 2071 | assert(((header_field_mark ? 1 : 0) + 2072 | (header_value_mark ? 1 : 0) + 2073 | (url_mark ? 1 : 0) + 2074 | (body_mark ? 1 : 0) + 2075 | (status_mark ? 1 : 0)) <= 1); 2076 | 2077 | CALLBACK_DATA_NOADVANCE(header_field); 2078 | CALLBACK_DATA_NOADVANCE(header_value); 2079 | CALLBACK_DATA_NOADVANCE(url); 2080 | CALLBACK_DATA_NOADVANCE(body); 2081 | CALLBACK_DATA_NOADVANCE(status); 2082 | 2083 | RETURN(len); 2084 | 2085 | error: 2086 | if (HTTP_PARSER_ERRNO(parser) == HPE_OK) { 2087 | SET_ERRNO(HPE_UNKNOWN); 2088 | } 2089 | 2090 | RETURN(p - data); 2091 | } 2092 | 2093 | 2094 | /* Does the parser need to see an EOF to find the end of the message? */ 2095 | int 2096 | http_message_needs_eof (const http_parser *parser) 2097 | { 2098 | if (parser->type == HTTP_REQUEST) { 2099 | return 0; 2100 | } 2101 | 2102 | /* See RFC 2616 section 4.4 */ 2103 | if (parser->status_code / 100 == 1 || /* 1xx e.g. Continue */ 2104 | parser->status_code == 204 || /* No Content */ 2105 | parser->status_code == 304 || /* Not Modified */ 2106 | parser->flags & F_SKIPBODY) { /* response to a HEAD request */ 2107 | return 0; 2108 | } 2109 | 2110 | if ((parser->flags & F_CHUNKED) || parser->content_length != ULLONG_MAX) { 2111 | return 0; 2112 | } 2113 | 2114 | return 1; 2115 | } 2116 | 2117 | 2118 | int 2119 | http_should_keep_alive (const http_parser *parser) 2120 | { 2121 | if (parser->http_major > 0 && parser->http_minor > 0) { 2122 | /* HTTP/1.1 */ 2123 | if (parser->flags & F_CONNECTION_CLOSE) { 2124 | return 0; 2125 | } 2126 | } else { 2127 | /* HTTP/1.0 or earlier */ 2128 | if (!(parser->flags & F_CONNECTION_KEEP_ALIVE)) { 2129 | return 0; 2130 | } 2131 | } 2132 | 2133 | return !http_message_needs_eof(parser); 2134 | } 2135 | 2136 | 2137 | const char * 2138 | http_method_str (enum http_method m) 2139 | { 2140 | return ELEM_AT(method_strings, m, ""); 2141 | } 2142 | 2143 | 2144 | void 2145 | http_parser_init (http_parser *parser, enum http_parser_type t) 2146 | { 2147 | void *data = parser->data; /* preserve application data */ 2148 | memset(parser, 0, sizeof(*parser)); 2149 | parser->data = data; 2150 | parser->type = t; 2151 | parser->state = (t == HTTP_REQUEST ? s_start_req : (t == HTTP_RESPONSE ? s_start_res : s_start_req_or_res)); 2152 | parser->http_errno = HPE_OK; 2153 | } 2154 | 2155 | void 2156 | http_parser_settings_init(http_parser_settings *settings) 2157 | { 2158 | memset(settings, 0, sizeof(*settings)); 2159 | } 2160 | 2161 | const char * 2162 | http_errno_name(enum http_errno err) { 2163 | assert(((size_t) err) < ARRAY_SIZE(http_strerror_tab)); 2164 | return http_strerror_tab[err].name; 2165 | } 2166 | 2167 | const char * 2168 | http_errno_description(enum http_errno err) { 2169 | assert(((size_t) err) < ARRAY_SIZE(http_strerror_tab)); 2170 | return http_strerror_tab[err].description; 2171 | } 2172 | 2173 | static enum http_host_state 2174 | http_parse_host_char(enum http_host_state s, const char ch) { 2175 | switch(s) { 2176 | case s_http_userinfo: 2177 | case s_http_userinfo_start: 2178 | if (ch == '@') { 2179 | return s_http_host_start; 2180 | } 2181 | 2182 | if (IS_USERINFO_CHAR(ch)) { 2183 | return s_http_userinfo; 2184 | } 2185 | break; 2186 | 2187 | case s_http_host_start: 2188 | if (ch == '[') { 2189 | return s_http_host_v6_start; 2190 | } 2191 | 2192 | if (IS_HOST_CHAR(ch)) { 2193 | return s_http_host; 2194 | } 2195 | 2196 | break; 2197 | 2198 | case s_http_host: 2199 | if (IS_HOST_CHAR(ch)) { 2200 | return s_http_host; 2201 | } 2202 | 2203 | /* FALLTHROUGH */ 2204 | case s_http_host_v6_end: 2205 | if (ch == ':') { 2206 | return s_http_host_port_start; 2207 | } 2208 | 2209 | break; 2210 | 2211 | case s_http_host_v6: 2212 | if (ch == ']') { 2213 | return s_http_host_v6_end; 2214 | } 2215 | 2216 | /* FALLTHROUGH */ 2217 | case s_http_host_v6_start: 2218 | if (IS_HEX(ch) || ch == ':' || ch == '.') { 2219 | return s_http_host_v6; 2220 | } 2221 | 2222 | if (s == s_http_host_v6 && ch == '%') { 2223 | return s_http_host_v6_zone_start; 2224 | } 2225 | break; 2226 | 2227 | case s_http_host_v6_zone: 2228 | if (ch == ']') { 2229 | return s_http_host_v6_end; 2230 | } 2231 | 2232 | /* FALLTHROUGH */ 2233 | case s_http_host_v6_zone_start: 2234 | /* RFC 6874 Zone ID consists of 1*( unreserved / pct-encoded) */ 2235 | if (IS_ALPHANUM(ch) || ch == '%' || ch == '.' || ch == '-' || ch == '_' || 2236 | ch == '~') { 2237 | return s_http_host_v6_zone; 2238 | } 2239 | break; 2240 | 2241 | case s_http_host_port: 2242 | case s_http_host_port_start: 2243 | if (IS_NUM(ch)) { 2244 | return s_http_host_port; 2245 | } 2246 | 2247 | break; 2248 | 2249 | default: 2250 | break; 2251 | } 2252 | return s_http_host_dead; 2253 | } 2254 | 2255 | static int 2256 | http_parse_host(const char * buf, struct http_parser_url *u, int found_at) { 2257 | enum http_host_state s; 2258 | 2259 | const char *p; 2260 | size_t buflen = u->field_data[UF_HOST].off + u->field_data[UF_HOST].len; 2261 | 2262 | assert(u->field_set & (1 << UF_HOST)); 2263 | 2264 | u->field_data[UF_HOST].len = 0; 2265 | 2266 | s = found_at ? s_http_userinfo_start : s_http_host_start; 2267 | 2268 | for (p = buf + u->field_data[UF_HOST].off; p < buf + buflen; p++) { 2269 | enum http_host_state new_s = http_parse_host_char(s, *p); 2270 | 2271 | if (new_s == s_http_host_dead) { 2272 | return 1; 2273 | } 2274 | 2275 | switch(new_s) { 2276 | case s_http_host: 2277 | if (s != s_http_host) { 2278 | u->field_data[UF_HOST].off = p - buf; 2279 | } 2280 | u->field_data[UF_HOST].len++; 2281 | break; 2282 | 2283 | case s_http_host_v6: 2284 | if (s != s_http_host_v6) { 2285 | u->field_data[UF_HOST].off = p - buf; 2286 | } 2287 | u->field_data[UF_HOST].len++; 2288 | break; 2289 | 2290 | case s_http_host_v6_zone_start: 2291 | case s_http_host_v6_zone: 2292 | u->field_data[UF_HOST].len++; 2293 | break; 2294 | 2295 | case s_http_host_port: 2296 | if (s != s_http_host_port) { 2297 | u->field_data[UF_PORT].off = p - buf; 2298 | u->field_data[UF_PORT].len = 0; 2299 | u->field_set |= (1 << UF_PORT); 2300 | } 2301 | u->field_data[UF_PORT].len++; 2302 | break; 2303 | 2304 | case s_http_userinfo: 2305 | if (s != s_http_userinfo) { 2306 | u->field_data[UF_USERINFO].off = p - buf ; 2307 | u->field_data[UF_USERINFO].len = 0; 2308 | u->field_set |= (1 << UF_USERINFO); 2309 | } 2310 | u->field_data[UF_USERINFO].len++; 2311 | break; 2312 | 2313 | default: 2314 | break; 2315 | } 2316 | s = new_s; 2317 | } 2318 | 2319 | /* Make sure we don't end somewhere unexpected */ 2320 | switch (s) { 2321 | case s_http_host_start: 2322 | case s_http_host_v6_start: 2323 | case s_http_host_v6: 2324 | case s_http_host_v6_zone_start: 2325 | case s_http_host_v6_zone: 2326 | case s_http_host_port_start: 2327 | case s_http_userinfo: 2328 | case s_http_userinfo_start: 2329 | return 1; 2330 | default: 2331 | break; 2332 | } 2333 | 2334 | return 0; 2335 | } 2336 | 2337 | void 2338 | http_parser_url_init(struct http_parser_url *u) { 2339 | memset(u, 0, sizeof(*u)); 2340 | } 2341 | 2342 | int 2343 | http_parser_parse_url(const char *buf, size_t buflen, int is_connect, 2344 | struct http_parser_url *u) 2345 | { 2346 | enum state s; 2347 | const char *p; 2348 | enum http_parser_url_fields uf, old_uf; 2349 | int found_at = 0; 2350 | 2351 | u->port = u->field_set = 0; 2352 | s = is_connect ? s_req_server_start : s_req_spaces_before_url; 2353 | old_uf = UF_MAX; 2354 | 2355 | for (p = buf; p < buf + buflen; p++) { 2356 | s = parse_url_char(s, *p); 2357 | 2358 | /* Figure out the next field that we're operating on */ 2359 | switch (s) { 2360 | case s_dead: 2361 | return 1; 2362 | 2363 | /* Skip delimeters */ 2364 | case s_req_schema_slash: 2365 | case s_req_schema_slash_slash: 2366 | case s_req_server_start: 2367 | case s_req_query_string_start: 2368 | case s_req_fragment_start: 2369 | continue; 2370 | 2371 | case s_req_schema: 2372 | uf = UF_SCHEMA; 2373 | break; 2374 | 2375 | case s_req_server_with_at: 2376 | found_at = 1; 2377 | 2378 | /* FALLTROUGH */ 2379 | case s_req_server: 2380 | uf = UF_HOST; 2381 | break; 2382 | 2383 | case s_req_path: 2384 | uf = UF_PATH; 2385 | break; 2386 | 2387 | case s_req_query_string: 2388 | uf = UF_QUERY; 2389 | break; 2390 | 2391 | case s_req_fragment: 2392 | uf = UF_FRAGMENT; 2393 | break; 2394 | 2395 | default: 2396 | assert(!"Unexpected state"); 2397 | return 1; 2398 | } 2399 | 2400 | /* Nothing's changed; soldier on */ 2401 | if (uf == old_uf) { 2402 | u->field_data[uf].len++; 2403 | continue; 2404 | } 2405 | 2406 | u->field_data[uf].off = p - buf; 2407 | u->field_data[uf].len = 1; 2408 | 2409 | u->field_set |= (1 << uf); 2410 | old_uf = uf; 2411 | } 2412 | 2413 | /* host must be present if there is a schema */ 2414 | /* parsing http:///toto will fail */ 2415 | if ((u->field_set & (1 << UF_SCHEMA)) && 2416 | (u->field_set & (1 << UF_HOST)) == 0) { 2417 | return 1; 2418 | } 2419 | 2420 | if (u->field_set & (1 << UF_HOST)) { 2421 | if (http_parse_host(buf, u, found_at) != 0) { 2422 | return 1; 2423 | } 2424 | } 2425 | 2426 | /* CONNECT requests can only contain "hostname:port" */ 2427 | if (is_connect && u->field_set != ((1 << UF_HOST)|(1 << UF_PORT))) { 2428 | return 1; 2429 | } 2430 | 2431 | if (u->field_set & (1 << UF_PORT)) { 2432 | /* Don't bother with endp; we've already validated the string */ 2433 | unsigned long v = strtoul(buf + u->field_data[UF_PORT].off, NULL, 10); 2434 | 2435 | /* Ports have a max value of 2^16 */ 2436 | if (v > 0xffff) { 2437 | return 1; 2438 | } 2439 | 2440 | u->port = (uint16_t) v; 2441 | } 2442 | 2443 | return 0; 2444 | } 2445 | 2446 | void 2447 | http_parser_pause(http_parser *parser, int paused) { 2448 | /* Users should only be pausing/unpausing a parser that is not in an error 2449 | * state. In non-debug builds, there's not much that we can do about this 2450 | * other than ignore it. 2451 | */ 2452 | if (HTTP_PARSER_ERRNO(parser) == HPE_OK || 2453 | HTTP_PARSER_ERRNO(parser) == HPE_PAUSED) { 2454 | SET_ERRNO((paused) ? HPE_PAUSED : HPE_OK); 2455 | } else { 2456 | assert(0 && "Attempting to pause parser in error state"); 2457 | } 2458 | } 2459 | 2460 | int 2461 | http_body_is_final(const struct http_parser *parser) { 2462 | return parser->state == s_message_done; 2463 | } 2464 | 2465 | unsigned long 2466 | http_parser_version(void) { 2467 | return HTTP_PARSER_VERSION_MAJOR * 0x10000 | 2468 | HTTP_PARSER_VERSION_MINOR * 0x00100 | 2469 | HTTP_PARSER_VERSION_PATCH * 0x00001; 2470 | } 2471 | --------------------------------------------------------------------------------