├── .gitignore ├── .mailmap ├── .travis.yml ├── AUTHORS ├── LICENSE-MIT ├── Makefile ├── README.md ├── bench.c ├── contrib ├── parsertrace.c └── url_parser.c ├── fuzzers ├── fuzz_parser.c └── fuzz_url.c ├── http_parser.c ├── http_parser.gyp ├── http_parser.h └── test.c /.gitignore: -------------------------------------------------------------------------------- 1 | /out/ 2 | core 3 | tags 4 | *.o 5 | test 6 | test_g 7 | test_fast 8 | bench 9 | url_parser 10 | parsertrace 11 | parsertrace_g 12 | *.mk 13 | *.Makefile 14 | *.so.* 15 | *.exe.* 16 | *.exe 17 | *.a 18 | 19 | 20 | # Visual Studio uglies 21 | *.suo 22 | *.sln 23 | *.vcxproj 24 | *.vcxproj.filters 25 | *.vcxproj.user 26 | *.opensdf 27 | *.ncrunchsolution* 28 | *.sdf 29 | *.vsp 30 | *.psess 31 | -------------------------------------------------------------------------------- /.mailmap: -------------------------------------------------------------------------------- 1 | # update AUTHORS with: 2 | # git log --all --reverse --format='%aN <%aE>' | perl -ne 'BEGIN{print "# Authors ordered by first contribution.\n"} print unless $h{$_}; $h{$_} = 1' > AUTHORS 3 | Ryan Dahl 4 | Salman Haq 5 | Simon Zimmermann 6 | Thomas LE ROUX LE ROUX Thomas 7 | Thomas LE ROUX Thomas LE ROUX 8 | Fedor Indutny 9 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: c 2 | 3 | compiler: 4 | - clang 5 | - gcc 6 | 7 | script: 8 | - "make" 9 | 10 | notifications: 11 | email: false 12 | irc: 13 | - "irc.freenode.net#node-ci" 14 | -------------------------------------------------------------------------------- /AUTHORS: -------------------------------------------------------------------------------- 1 | # Authors ordered by first contribution. 2 | Ryan Dahl 3 | Jeremy Hinegardner 4 | Sergey Shepelev 5 | Joe Damato 6 | tomika 7 | Phoenix Sol 8 | Cliff Frey 9 | Ewen Cheslack-Postava 10 | Santiago Gala 11 | Tim Becker 12 | Jeff Terrace 13 | Ben Noordhuis 14 | Nathan Rajlich 15 | Mark Nottingham 16 | Aman Gupta 17 | Tim Becker 18 | Sean Cunningham 19 | Peter Griess 20 | Salman Haq 21 | Cliff Frey 22 | Jon Kolb 23 | Fouad Mardini 24 | Paul Querna 25 | Felix Geisendörfer 26 | koichik 27 | Andre Caron 28 | Ivo Raisr 29 | James McLaughlin 30 | David Gwynne 31 | Thomas LE ROUX 32 | Randy Rizun 33 | Andre Louis Caron 34 | Simon Zimmermann 35 | Erik Dubbelboer 36 | Martell Malone 37 | Bertrand Paquet 38 | BogDan Vatra 39 | Peter Faiman 40 | Corey Richardson 41 | Tóth Tamás 42 | Cam Swords 43 | Chris Dickinson 44 | Uli Köhler 45 | Charlie Somerville 46 | Patrik Stutz 47 | Fedor Indutny 48 | runner 49 | Alexis Campailla 50 | David Wragg 51 | Vinnie Falco 52 | Alex Butum 53 | Rex Feng 54 | Alex Kocharin 55 | Mark Koopman 56 | Helge Heß 57 | Alexis La Goutte 58 | George Miroshnykov 59 | Maciej Małecki 60 | Marc O'Morain 61 | Jeff Pinner 62 | Timothy J Fontaine 63 | Akagi201 64 | Romain Giraud 65 | Jay Satiro 66 | Arne Steen 67 | Kjell Schubert 68 | Olivier Mengué 69 | -------------------------------------------------------------------------------- /LICENSE-MIT: -------------------------------------------------------------------------------- 1 | Copyright Joyent, Inc. and other Node contributors. 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), to 5 | deal in the Software without restriction, including without limitation the 6 | rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 7 | sell copies of the Software, and to permit persons to whom the Software is 8 | furnished to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in 11 | all copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 18 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 19 | IN THE SOFTWARE. 20 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | # Copyright Joyent, Inc. and other Node contributors. All rights reserved. 2 | # 3 | # Permission is hereby granted, free of charge, to any person obtaining a copy 4 | # of this software and associated documentation files (the "Software"), to 5 | # deal in the Software without restriction, including without limitation the 6 | # rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 7 | # sell copies of the Software, and to permit persons to whom the Software is 8 | # furnished to do so, subject to the following conditions: 9 | # 10 | # The above copyright notice and this permission notice shall be included in 11 | # all copies or substantial portions of the Software. 12 | # 13 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 18 | # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 19 | # IN THE SOFTWARE. 20 | 21 | PLATFORM ?= $(shell sh -c 'uname -s | tr "[A-Z]" "[a-z]"') 22 | HELPER ?= 23 | BINEXT ?= 24 | SOLIBNAME = libhttp_parser 25 | SOMAJOR = 2 26 | SOMINOR = 9 27 | SOREV = 4 28 | ifeq (darwin,$(PLATFORM)) 29 | SOEXT ?= dylib 30 | SONAME ?= $(SOLIBNAME).$(SOMAJOR).$(SOMINOR).$(SOEXT) 31 | LIBNAME ?= $(SOLIBNAME).$(SOMAJOR).$(SOMINOR).$(SOREV).$(SOEXT) 32 | else ifeq (wine,$(PLATFORM)) 33 | CC = winegcc 34 | BINEXT = .exe.so 35 | HELPER = wine 36 | else 37 | SOEXT ?= so 38 | SONAME ?= $(SOLIBNAME).$(SOEXT).$(SOMAJOR).$(SOMINOR) 39 | LIBNAME ?= $(SOLIBNAME).$(SOEXT).$(SOMAJOR).$(SOMINOR).$(SOREV) 40 | endif 41 | 42 | CC?=gcc 43 | AR?=ar 44 | 45 | CPPFLAGS ?= 46 | LDFLAGS ?= 47 | 48 | CPPFLAGS += -I. 49 | CPPFLAGS_DEBUG = $(CPPFLAGS) -DHTTP_PARSER_STRICT=1 50 | CPPFLAGS_DEBUG += $(CPPFLAGS_DEBUG_EXTRA) 51 | CPPFLAGS_FAST = $(CPPFLAGS) -DHTTP_PARSER_STRICT=0 52 | CPPFLAGS_FAST += $(CPPFLAGS_FAST_EXTRA) 53 | CPPFLAGS_BENCH = $(CPPFLAGS_FAST) 54 | 55 | CFLAGS += -Wall -Wextra -Werror 56 | CFLAGS_DEBUG = $(CFLAGS) -O0 -g $(CFLAGS_DEBUG_EXTRA) 57 | CFLAGS_FAST = $(CFLAGS) -O3 $(CFLAGS_FAST_EXTRA) 58 | CFLAGS_BENCH = $(CFLAGS_FAST) -Wno-unused-parameter 59 | CFLAGS_LIB = $(CFLAGS_FAST) -fPIC 60 | 61 | LDFLAGS_LIB = $(LDFLAGS) -shared 62 | 63 | INSTALL ?= install 64 | PREFIX ?= /usr/local 65 | LIBDIR = $(PREFIX)/lib 66 | INCLUDEDIR = $(PREFIX)/include 67 | 68 | ifeq (darwin,$(PLATFORM)) 69 | LDFLAGS_LIB += -Wl,-install_name,$(LIBDIR)/$(SONAME) 70 | else 71 | # TODO(bnoordhuis) The native SunOS linker expects -h rather than -soname... 72 | LDFLAGS_LIB += -Wl,-soname=$(SONAME) 73 | endif 74 | 75 | test: test_g test_fast 76 | $(HELPER) ./test_g$(BINEXT) 77 | $(HELPER) ./test_fast$(BINEXT) 78 | 79 | test_g: http_parser_g.o test_g.o 80 | $(CC) $(CFLAGS_DEBUG) $(LDFLAGS) http_parser_g.o test_g.o -o $@ 81 | 82 | test_g.o: test.c http_parser.h Makefile 83 | $(CC) $(CPPFLAGS_DEBUG) $(CFLAGS_DEBUG) -c test.c -o $@ 84 | 85 | http_parser_g.o: http_parser.c http_parser.h Makefile 86 | $(CC) $(CPPFLAGS_DEBUG) $(CFLAGS_DEBUG) -c http_parser.c -o $@ 87 | 88 | test_fast: http_parser.o test.o http_parser.h 89 | $(CC) $(CFLAGS_FAST) $(LDFLAGS) http_parser.o test.o -o $@ 90 | 91 | test.o: test.c http_parser.h Makefile 92 | $(CC) $(CPPFLAGS_FAST) $(CFLAGS_FAST) -c test.c -o $@ 93 | 94 | bench: http_parser.o bench.o 95 | $(CC) $(CFLAGS_BENCH) $(LDFLAGS) http_parser.o bench.o -o $@ 96 | 97 | bench.o: bench.c http_parser.h Makefile 98 | $(CC) $(CPPFLAGS_BENCH) $(CFLAGS_BENCH) -c bench.c -o $@ 99 | 100 | http_parser.o: http_parser.c http_parser.h Makefile 101 | $(CC) $(CPPFLAGS_FAST) $(CFLAGS_FAST) -c http_parser.c 102 | 103 | test-run-timed: test_fast 104 | while(true) do time $(HELPER) ./test_fast$(BINEXT) > /dev/null; done 105 | 106 | test-valgrind: test_g 107 | valgrind ./test_g 108 | 109 | libhttp_parser.o: http_parser.c http_parser.h Makefile 110 | $(CC) $(CPPFLAGS_FAST) $(CFLAGS_LIB) -c http_parser.c -o libhttp_parser.o 111 | 112 | library: libhttp_parser.o 113 | $(CC) $(LDFLAGS_LIB) -o $(LIBNAME) $< 114 | 115 | package: http_parser.o 116 | $(AR) rcs libhttp_parser.a http_parser.o 117 | 118 | url_parser: http_parser.o contrib/url_parser.c 119 | $(CC) $(CPPFLAGS_FAST) $(CFLAGS_FAST) $^ -o $@ 120 | 121 | url_parser_g: http_parser_g.o contrib/url_parser.c 122 | $(CC) $(CPPFLAGS_DEBUG) $(CFLAGS_DEBUG) $^ -o $@ 123 | 124 | parsertrace: http_parser.o contrib/parsertrace.c 125 | $(CC) $(CPPFLAGS_FAST) $(CFLAGS_FAST) $^ -o parsertrace$(BINEXT) 126 | 127 | parsertrace_g: http_parser_g.o contrib/parsertrace.c 128 | $(CC) $(CPPFLAGS_DEBUG) $(CFLAGS_DEBUG) $^ -o parsertrace_g$(BINEXT) 129 | 130 | tags: http_parser.c http_parser.h test.c 131 | ctags $^ 132 | 133 | install: library 134 | $(INSTALL) -D http_parser.h $(DESTDIR)$(INCLUDEDIR)/http_parser.h 135 | $(INSTALL) -D $(LIBNAME) $(DESTDIR)$(LIBDIR)/$(LIBNAME) 136 | ln -sf $(LIBNAME) $(DESTDIR)$(LIBDIR)/$(SONAME) 137 | ln -sf $(LIBNAME) $(DESTDIR)$(LIBDIR)/$(SOLIBNAME).$(SOEXT) 138 | 139 | install-strip: library 140 | $(INSTALL) -D http_parser.h $(DESTDIR)$(INCLUDEDIR)/http_parser.h 141 | $(INSTALL) -D -s $(LIBNAME) $(DESTDIR)$(LIBDIR)/$(LIBNAME) 142 | ln -sf $(LIBNAME) $(DESTDIR)$(LIBDIR)/$(SONAME) 143 | ln -sf $(LIBNAME) $(DESTDIR)$(LIBDIR)/$(SOLIBNAME).$(SOEXT) 144 | 145 | uninstall: 146 | rm $(DESTDIR)$(INCLUDEDIR)/http_parser.h 147 | rm $(DESTDIR)$(LIBDIR)/$(SOLIBNAME).$(SOEXT) 148 | rm $(DESTDIR)$(LIBDIR)/$(SONAME) 149 | rm $(DESTDIR)$(LIBDIR)/$(LIBNAME) 150 | 151 | clean: 152 | rm -f *.o *.a tags test test_fast test_g \ 153 | http_parser.tar libhttp_parser.so.* \ 154 | url_parser url_parser_g parsertrace parsertrace_g \ 155 | *.exe *.exe.so 156 | 157 | contrib/url_parser.c: http_parser.h 158 | contrib/parsertrace.c: http_parser.h 159 | 160 | .PHONY: clean package test-run test-run-timed test-valgrind install install-strip uninstall 161 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | HTTP Parser 2 | =========== 3 | 4 | http-parser is [**not** actively maintained](https://github.com/nodejs/http-parser/issues/522). 5 | New projects and projects looking to migrate should consider [llhttp](https://github.com/nodejs/llhttp). 6 | 7 | [![Build Status](https://api.travis-ci.org/nodejs/http-parser.svg?branch=master)](https://travis-ci.org/nodejs/http-parser) 8 | 9 | This is a parser for HTTP messages written in C. It parses both requests and 10 | responses. The parser is designed to be used in performance HTTP 11 | applications. It does not make any syscalls nor allocations, it does not 12 | buffer data, it can be interrupted at anytime. Depending on your 13 | architecture, it only requires about 40 bytes of data per message 14 | stream (in a web server that is per connection). 15 | 16 | Features: 17 | 18 | * No dependencies 19 | * Handles persistent streams (keep-alive). 20 | * Decodes chunked encoding. 21 | * Upgrade support 22 | * Defends against buffer overflow attacks. 23 | 24 | The parser extracts the following information from HTTP messages: 25 | 26 | * Header fields and values 27 | * Content-Length 28 | * Request method 29 | * Response status code 30 | * Transfer-Encoding 31 | * HTTP version 32 | * Request URL 33 | * Message body 34 | 35 | 36 | Usage 37 | ----- 38 | 39 | One `http_parser` object is used per TCP connection. Initialize the struct 40 | using `http_parser_init()` and set the callbacks. That might look something 41 | like this for a request parser: 42 | ```c 43 | http_parser_settings settings; 44 | settings.on_url = my_url_callback; 45 | settings.on_header_field = my_header_field_callback; 46 | /* ... */ 47 | 48 | http_parser *parser = malloc(sizeof(http_parser)); 49 | http_parser_init(parser, HTTP_REQUEST); 50 | parser->data = my_socket; 51 | ``` 52 | 53 | When data is received on the socket execute the parser and check for errors. 54 | 55 | ```c 56 | size_t len = 80*1024, nparsed; 57 | char buf[len]; 58 | ssize_t recved; 59 | 60 | recved = recv(fd, buf, len, 0); 61 | 62 | if (recved < 0) { 63 | /* Handle error. */ 64 | } 65 | 66 | /* Start up / continue the parser. 67 | * Note we pass recved==0 to signal that EOF has been received. 68 | */ 69 | nparsed = http_parser_execute(parser, &settings, buf, recved); 70 | 71 | if (parser->upgrade) { 72 | /* handle new protocol */ 73 | } else if (nparsed != recved) { 74 | /* Handle error. Usually just close the connection. */ 75 | } 76 | ``` 77 | 78 | `http_parser` needs to know where the end of the stream is. For example, sometimes 79 | servers send responses without Content-Length and expect the client to 80 | consume input (for the body) until EOF. To tell `http_parser` about EOF, give 81 | `0` as the fourth parameter to `http_parser_execute()`. Callbacks and errors 82 | can still be encountered during an EOF, so one must still be prepared 83 | to receive them. 84 | 85 | Scalar valued message information such as `status_code`, `method`, and the 86 | HTTP version are stored in the parser structure. This data is only 87 | temporally stored in `http_parser` and gets reset on each new message. If 88 | this information is needed later, copy it out of the structure during the 89 | `headers_complete` callback. 90 | 91 | The parser decodes the transfer-encoding for both requests and responses 92 | transparently. That is, a chunked encoding is decoded before being sent to 93 | the on_body callback. 94 | 95 | 96 | The Special Problem of Upgrade 97 | ------------------------------ 98 | 99 | `http_parser` supports upgrading the connection to a different protocol. An 100 | increasingly common example of this is the WebSocket protocol which sends 101 | a request like 102 | 103 | GET /demo HTTP/1.1 104 | Upgrade: WebSocket 105 | Connection: Upgrade 106 | Host: example.com 107 | Origin: http://example.com 108 | WebSocket-Protocol: sample 109 | 110 | followed by non-HTTP data. 111 | 112 | (See [RFC6455](https://tools.ietf.org/html/rfc6455) for more information the 113 | WebSocket protocol.) 114 | 115 | To support this, the parser will treat this as a normal HTTP message without a 116 | body, issuing both on_headers_complete and on_message_complete callbacks. However 117 | http_parser_execute() will stop parsing at the end of the headers and return. 118 | 119 | The user is expected to check if `parser->upgrade` has been set to 1 after 120 | `http_parser_execute()` returns. Non-HTTP data begins at the buffer supplied 121 | offset by the return value of `http_parser_execute()`. 122 | 123 | 124 | Callbacks 125 | --------- 126 | 127 | During the `http_parser_execute()` call, the callbacks set in 128 | `http_parser_settings` will be executed. The parser maintains state and 129 | never looks behind, so buffering the data is not necessary. If you need to 130 | save certain data for later usage, you can do that from the callbacks. 131 | 132 | There are two types of callbacks: 133 | 134 | * notification `typedef int (*http_cb) (http_parser*);` 135 | Callbacks: on_message_begin, on_headers_complete, on_message_complete. 136 | * data `typedef int (*http_data_cb) (http_parser*, const char *at, size_t length);` 137 | Callbacks: (requests only) on_url, 138 | (common) on_header_field, on_header_value, on_body; 139 | 140 | Callbacks must return 0 on success. Returning a non-zero value indicates 141 | error to the parser, making it exit immediately. 142 | 143 | For cases where it is necessary to pass local information to/from a callback, 144 | the `http_parser` object's `data` field can be used. 145 | An example of such a case is when using threads to handle a socket connection, 146 | parse a request, and then give a response over that socket. By instantiation 147 | of a thread-local struct containing relevant data (e.g. accepted socket, 148 | allocated memory for callbacks to write into, etc), a parser's callbacks are 149 | able to communicate data between the scope of the thread and the scope of the 150 | callback in a threadsafe manner. This allows `http_parser` to be used in 151 | multi-threaded contexts. 152 | 153 | Example: 154 | ```c 155 | typedef struct { 156 | socket_t sock; 157 | void* buffer; 158 | int buf_len; 159 | } custom_data_t; 160 | 161 | 162 | int my_url_callback(http_parser* parser, const char *at, size_t length) { 163 | /* access to thread local custom_data_t struct. 164 | Use this access save parsed data for later use into thread local 165 | buffer, or communicate over socket 166 | */ 167 | parser->data; 168 | ... 169 | return 0; 170 | } 171 | 172 | ... 173 | 174 | void http_parser_thread(socket_t sock) { 175 | int nparsed = 0; 176 | /* allocate memory for user data */ 177 | custom_data_t *my_data = malloc(sizeof(custom_data_t)); 178 | 179 | /* some information for use by callbacks. 180 | * achieves thread -> callback information flow */ 181 | my_data->sock = sock; 182 | 183 | /* instantiate a thread-local parser */ 184 | http_parser *parser = malloc(sizeof(http_parser)); 185 | http_parser_init(parser, HTTP_REQUEST); /* initialise parser */ 186 | /* this custom data reference is accessible through the reference to the 187 | parser supplied to callback functions */ 188 | parser->data = my_data; 189 | 190 | http_parser_settings settings; /* set up callbacks */ 191 | settings.on_url = my_url_callback; 192 | 193 | /* execute parser */ 194 | nparsed = http_parser_execute(parser, &settings, buf, recved); 195 | 196 | ... 197 | /* parsed information copied from callback. 198 | can now perform action on data copied into thread-local memory from callbacks. 199 | achieves callback -> thread information flow */ 200 | my_data->buffer; 201 | ... 202 | } 203 | 204 | ``` 205 | 206 | In case you parse HTTP message in chunks (i.e. `read()` request line 207 | from socket, parse, read half headers, parse, etc) your data callbacks 208 | may be called more than once. `http_parser` guarantees that data pointer is only 209 | valid for the lifetime of callback. You can also `read()` into a heap allocated 210 | buffer to avoid copying memory around if this fits your application. 211 | 212 | Reading headers may be a tricky task if you read/parse headers partially. 213 | Basically, you need to remember whether last header callback was field or value 214 | and apply the following logic: 215 | 216 | (on_header_field and on_header_value shortened to on_h_*) 217 | ------------------------ ------------ -------------------------------------------- 218 | | State (prev. callback) | Callback | Description/action | 219 | ------------------------ ------------ -------------------------------------------- 220 | | nothing (first call) | on_h_field | Allocate new buffer and copy callback data | 221 | | | | into it | 222 | ------------------------ ------------ -------------------------------------------- 223 | | value | on_h_field | New header started. | 224 | | | | Copy current name,value buffers to headers | 225 | | | | list and allocate new buffer for new name | 226 | ------------------------ ------------ -------------------------------------------- 227 | | field | on_h_field | Previous name continues. Reallocate name | 228 | | | | buffer and append callback data to it | 229 | ------------------------ ------------ -------------------------------------------- 230 | | field | on_h_value | Value for current header started. Allocate | 231 | | | | new buffer and copy callback data to it | 232 | ------------------------ ------------ -------------------------------------------- 233 | | value | on_h_value | Value continues. Reallocate value buffer | 234 | | | | and append callback data to it | 235 | ------------------------ ------------ -------------------------------------------- 236 | 237 | 238 | Parsing URLs 239 | ------------ 240 | 241 | A simplistic zero-copy URL parser is provided as `http_parser_parse_url()`. 242 | Users of this library may wish to use it to parse URLs constructed from 243 | consecutive `on_url` callbacks. 244 | 245 | See examples of reading in headers: 246 | 247 | * [partial example](http://gist.github.com/155877) in C 248 | * [from http-parser tests](http://github.com/joyent/http-parser/blob/37a0ff8/test.c#L403) in C 249 | * [from Node library](http://github.com/joyent/node/blob/842eaf4/src/http.js#L284) in Javascript 250 | -------------------------------------------------------------------------------- /bench.c: -------------------------------------------------------------------------------- 1 | /* Copyright Fedor Indutny. All rights reserved. 2 | * 3 | * Permission is hereby granted, free of charge, to any person obtaining a copy 4 | * of this software and associated documentation files (the "Software"), to 5 | * deal in the Software without restriction, including without limitation the 6 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 7 | * sell copies of the Software, and to permit persons to whom the Software is 8 | * furnished to do so, subject to the following conditions: 9 | * 10 | * The above copyright notice and this permission notice shall be included in 11 | * all copies or substantial portions of the Software. 12 | * 13 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 18 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 19 | * IN THE SOFTWARE. 20 | */ 21 | #include "http_parser.h" 22 | #include 23 | #include 24 | #include 25 | #include 26 | #include 27 | 28 | /* 8 gb */ 29 | static const int64_t kBytes = 8LL << 30; 30 | 31 | static const char data[] = 32 | "POST /joyent/http-parser HTTP/1.1\r\n" 33 | "Host: github.com\r\n" 34 | "DNT: 1\r\n" 35 | "Accept-Encoding: gzip, deflate, sdch\r\n" 36 | "Accept-Language: ru-RU,ru;q=0.8,en-US;q=0.6,en;q=0.4\r\n" 37 | "User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) " 38 | "AppleWebKit/537.36 (KHTML, like Gecko) " 39 | "Chrome/39.0.2171.65 Safari/537.36\r\n" 40 | "Accept: text/html,application/xhtml+xml,application/xml;q=0.9," 41 | "image/webp,*/*;q=0.8\r\n" 42 | "Referer: https://github.com/joyent/http-parser\r\n" 43 | "Connection: keep-alive\r\n" 44 | "Transfer-Encoding: chunked\r\n" 45 | "Cache-Control: max-age=0\r\n\r\nb\r\nhello world\r\n0\r\n"; 46 | static const size_t data_len = sizeof(data) - 1; 47 | 48 | static int on_info(http_parser* p) { 49 | return 0; 50 | } 51 | 52 | 53 | static int on_data(http_parser* p, const char *at, size_t length) { 54 | return 0; 55 | } 56 | 57 | static http_parser_settings settings = { 58 | .on_message_begin = on_info, 59 | .on_headers_complete = on_info, 60 | .on_message_complete = on_info, 61 | .on_header_field = on_data, 62 | .on_header_value = on_data, 63 | .on_url = on_data, 64 | .on_status = on_data, 65 | .on_body = on_data 66 | }; 67 | 68 | int bench(int iter_count, int silent) { 69 | struct http_parser parser; 70 | int i; 71 | int err; 72 | struct timeval start; 73 | struct timeval end; 74 | 75 | if (!silent) { 76 | err = gettimeofday(&start, NULL); 77 | assert(err == 0); 78 | } 79 | 80 | fprintf(stderr, "req_len=%d\n", (int) data_len); 81 | for (i = 0; i < iter_count; i++) { 82 | size_t parsed; 83 | http_parser_init(&parser, HTTP_REQUEST); 84 | 85 | parsed = http_parser_execute(&parser, &settings, data, data_len); 86 | assert(parsed == data_len); 87 | } 88 | 89 | if (!silent) { 90 | double elapsed; 91 | double bw; 92 | double total; 93 | 94 | err = gettimeofday(&end, NULL); 95 | assert(err == 0); 96 | 97 | fprintf(stdout, "Benchmark result:\n"); 98 | 99 | elapsed = (double) (end.tv_sec - start.tv_sec) + 100 | (end.tv_usec - start.tv_usec) * 1e-6f; 101 | 102 | total = (double) iter_count * data_len; 103 | bw = (double) total / elapsed; 104 | 105 | fprintf(stdout, "%.2f mb | %.2f mb/s | %.2f req/sec | %.2f s\n", 106 | (double) total / (1024 * 1024), 107 | bw / (1024 * 1024), 108 | (double) iter_count / elapsed, 109 | elapsed); 110 | 111 | fflush(stdout); 112 | } 113 | 114 | return 0; 115 | } 116 | 117 | int main(int argc, char** argv) { 118 | int64_t iterations; 119 | 120 | iterations = kBytes / (int64_t) data_len; 121 | if (argc == 2 && strcmp(argv[1], "infinite") == 0) { 122 | for (;;) 123 | bench(iterations, 1); 124 | return 0; 125 | } else { 126 | return bench(iterations, 0); 127 | } 128 | } 129 | -------------------------------------------------------------------------------- /contrib/parsertrace.c: -------------------------------------------------------------------------------- 1 | /* Copyright Joyent, Inc. and other Node contributors. 2 | * 3 | * Permission is hereby granted, free of charge, to any person obtaining a copy 4 | * of this software and associated documentation files (the "Software"), to 5 | * deal in the Software without restriction, including without limitation the 6 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 7 | * sell copies of the Software, and to permit persons to whom the Software is 8 | * furnished to do so, subject to the following conditions: 9 | * 10 | * The above copyright notice and this permission notice shall be included in 11 | * all copies or substantial portions of the Software. 12 | * 13 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 18 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 19 | * IN THE SOFTWARE. 20 | */ 21 | 22 | /* Dump what the parser finds to stdout as it happen */ 23 | 24 | #include "http_parser.h" 25 | #include 26 | #include 27 | #include 28 | 29 | int on_message_begin(http_parser* _) { 30 | (void)_; 31 | printf("\n***MESSAGE BEGIN***\n\n"); 32 | return 0; 33 | } 34 | 35 | int on_headers_complete(http_parser* _) { 36 | (void)_; 37 | printf("\n***HEADERS COMPLETE***\n\n"); 38 | return 0; 39 | } 40 | 41 | int on_message_complete(http_parser* _) { 42 | (void)_; 43 | printf("\n***MESSAGE COMPLETE***\n\n"); 44 | return 0; 45 | } 46 | 47 | int on_url(http_parser* _, const char* at, size_t length) { 48 | (void)_; 49 | printf("Url: %.*s\n", (int)length, at); 50 | return 0; 51 | } 52 | 53 | int on_header_field(http_parser* _, const char* at, size_t length) { 54 | (void)_; 55 | printf("Header field: %.*s\n", (int)length, at); 56 | return 0; 57 | } 58 | 59 | int on_header_value(http_parser* _, const char* at, size_t length) { 60 | (void)_; 61 | printf("Header value: %.*s\n", (int)length, at); 62 | return 0; 63 | } 64 | 65 | int on_body(http_parser* _, const char* at, size_t length) { 66 | (void)_; 67 | printf("Body: %.*s\n", (int)length, at); 68 | return 0; 69 | } 70 | 71 | void usage(const char* name) { 72 | fprintf(stderr, 73 | "Usage: %s $type $filename\n" 74 | " type: -x, where x is one of {r,b,q}\n" 75 | " parses file as a Response, reQuest, or Both\n", 76 | name); 77 | exit(EXIT_FAILURE); 78 | } 79 | 80 | int main(int argc, char* argv[]) { 81 | enum http_parser_type file_type; 82 | 83 | if (argc != 3) { 84 | usage(argv[0]); 85 | } 86 | 87 | char* type = argv[1]; 88 | if (type[0] != '-') { 89 | usage(argv[0]); 90 | } 91 | 92 | switch (type[1]) { 93 | /* in the case of "-", type[1] will be NUL */ 94 | case 'r': 95 | file_type = HTTP_RESPONSE; 96 | break; 97 | case 'q': 98 | file_type = HTTP_REQUEST; 99 | break; 100 | case 'b': 101 | file_type = HTTP_BOTH; 102 | break; 103 | default: 104 | usage(argv[0]); 105 | } 106 | 107 | char* filename = argv[2]; 108 | FILE* file = fopen(filename, "r"); 109 | if (file == NULL) { 110 | perror("fopen"); 111 | goto fail; 112 | } 113 | 114 | fseek(file, 0, SEEK_END); 115 | long file_length = ftell(file); 116 | if (file_length == -1) { 117 | perror("ftell"); 118 | goto fail; 119 | } 120 | fseek(file, 0, SEEK_SET); 121 | 122 | char* data = malloc(file_length); 123 | if (fread(data, 1, file_length, file) != (size_t)file_length) { 124 | fprintf(stderr, "couldn't read entire file\n"); 125 | free(data); 126 | goto fail; 127 | } 128 | 129 | http_parser_settings settings; 130 | memset(&settings, 0, sizeof(settings)); 131 | settings.on_message_begin = on_message_begin; 132 | settings.on_url = on_url; 133 | settings.on_header_field = on_header_field; 134 | settings.on_header_value = on_header_value; 135 | settings.on_headers_complete = on_headers_complete; 136 | settings.on_body = on_body; 137 | settings.on_message_complete = on_message_complete; 138 | 139 | http_parser parser; 140 | http_parser_init(&parser, file_type); 141 | size_t nparsed = http_parser_execute(&parser, &settings, data, file_length); 142 | free(data); 143 | 144 | if (nparsed != (size_t)file_length) { 145 | fprintf(stderr, 146 | "Error: %s (%s)\n", 147 | http_errno_description(HTTP_PARSER_ERRNO(&parser)), 148 | http_errno_name(HTTP_PARSER_ERRNO(&parser))); 149 | goto fail; 150 | } 151 | 152 | return EXIT_SUCCESS; 153 | 154 | fail: 155 | fclose(file); 156 | return EXIT_FAILURE; 157 | } 158 | -------------------------------------------------------------------------------- /contrib/url_parser.c: -------------------------------------------------------------------------------- 1 | #include "http_parser.h" 2 | #include 3 | #include 4 | 5 | void 6 | dump_url (const char *url, const struct http_parser_url *u) 7 | { 8 | unsigned int i; 9 | 10 | printf("\tfield_set: 0x%x, port: %u\n", u->field_set, u->port); 11 | for (i = 0; i < UF_MAX; i++) { 12 | if ((u->field_set & (1 << i)) == 0) { 13 | printf("\tfield_data[%u]: unset\n", i); 14 | continue; 15 | } 16 | 17 | printf("\tfield_data[%u]: off: %u, len: %u, part: %.*s\n", 18 | i, 19 | u->field_data[i].off, 20 | u->field_data[i].len, 21 | u->field_data[i].len, 22 | url + u->field_data[i].off); 23 | } 24 | } 25 | 26 | int main(int argc, char ** argv) { 27 | struct http_parser_url u; 28 | int len, connect, result; 29 | 30 | if (argc != 3) { 31 | printf("Syntax : %s connect|get url\n", argv[0]); 32 | return 1; 33 | } 34 | len = strlen(argv[2]); 35 | connect = strcmp("connect", argv[1]) == 0 ? 1 : 0; 36 | printf("Parsing %s, connect %d\n", argv[2], connect); 37 | 38 | http_parser_url_init(&u); 39 | result = http_parser_parse_url(argv[2], len, connect, &u); 40 | if (result != 0) { 41 | printf("Parse error : %d\n", result); 42 | return result; 43 | } 44 | printf("Parse ok, result : \n"); 45 | dump_url(argv[2], &u); 46 | return 0; 47 | } 48 | -------------------------------------------------------------------------------- /fuzzers/fuzz_parser.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "http_parser.h" 5 | 6 | int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) 7 | { 8 | static const http_parser_settings settings_null = { 9 | .on_message_begin = 0 10 | , .on_header_field = 0 11 | ,.on_header_value = 0 12 | ,.on_url = 0 13 | ,.on_status = 0 14 | ,.on_body = 0 15 | ,.on_headers_complete = 0 16 | ,.on_message_complete = 0 17 | ,.on_chunk_header = 0 18 | ,.on_chunk_complete = 0 19 | }; 20 | 21 | http_parser parser; 22 | http_parser_init(&parser, HTTP_BOTH); 23 | http_parser_execute(&parser, &settings_null, (char*)data, size); 24 | 25 | return 0; 26 | } 27 | -------------------------------------------------------------------------------- /fuzzers/fuzz_url.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "http_parser.h" 5 | 6 | int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) 7 | { 8 | struct http_parser_url u; 9 | http_parser_url_init(&u); 10 | http_parser_parse_url((char*)data, size, 0, &u); 11 | http_parser_parse_url((char*)data, size, 1, &u); 12 | 13 | return 0; 14 | } 15 | -------------------------------------------------------------------------------- /http_parser.c: -------------------------------------------------------------------------------- 1 | /* Copyright Joyent, Inc. and other Node contributors. 2 | * 3 | * Permission is hereby granted, free of charge, to any person obtaining a copy 4 | * of this software and associated documentation files (the "Software"), to 5 | * deal in the Software without restriction, including without limitation the 6 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 7 | * sell copies of the Software, and to permit persons to whom the Software is 8 | * furnished to do so, subject to the following conditions: 9 | * 10 | * The above copyright notice and this permission notice shall be included in 11 | * all copies or substantial portions of the Software. 12 | * 13 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 18 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 19 | * IN THE SOFTWARE. 20 | */ 21 | #include "http_parser.h" 22 | #include 23 | #include 24 | #include 25 | #include 26 | #include 27 | 28 | static uint32_t max_header_size = HTTP_MAX_HEADER_SIZE; 29 | 30 | #ifndef ULLONG_MAX 31 | # define ULLONG_MAX ((uint64_t) -1) /* 2^64-1 */ 32 | #endif 33 | 34 | #ifndef MIN 35 | # define MIN(a,b) ((a) < (b) ? (a) : (b)) 36 | #endif 37 | 38 | #ifndef ARRAY_SIZE 39 | # define ARRAY_SIZE(a) (sizeof(a) / sizeof((a)[0])) 40 | #endif 41 | 42 | #ifndef BIT_AT 43 | # define BIT_AT(a, i) \ 44 | (!!((unsigned int) (a)[(unsigned int) (i) >> 3] & \ 45 | (1 << ((unsigned int) (i) & 7)))) 46 | #endif 47 | 48 | #ifndef ELEM_AT 49 | # define ELEM_AT(a, i, v) ((unsigned int) (i) < ARRAY_SIZE(a) ? (a)[(i)] : (v)) 50 | #endif 51 | 52 | #define SET_ERRNO(e) \ 53 | do { \ 54 | parser->nread = nread; \ 55 | parser->http_errno = (e); \ 56 | } while(0) 57 | 58 | #define CURRENT_STATE() p_state 59 | #define UPDATE_STATE(V) p_state = (enum state) (V); 60 | #define RETURN(V) \ 61 | do { \ 62 | parser->nread = nread; \ 63 | parser->state = CURRENT_STATE(); \ 64 | return (V); \ 65 | } while (0); 66 | #define REEXECUTE() \ 67 | goto reexecute; \ 68 | 69 | 70 | #ifdef __GNUC__ 71 | # define LIKELY(X) __builtin_expect(!!(X), 1) 72 | # define UNLIKELY(X) __builtin_expect(!!(X), 0) 73 | #else 74 | # define LIKELY(X) (X) 75 | # define UNLIKELY(X) (X) 76 | #endif 77 | 78 | 79 | /* Run the notify callback FOR, returning ER if it fails */ 80 | #define CALLBACK_NOTIFY_(FOR, ER) \ 81 | do { \ 82 | assert(HTTP_PARSER_ERRNO(parser) == HPE_OK); \ 83 | \ 84 | if (LIKELY(settings->on_##FOR)) { \ 85 | parser->state = CURRENT_STATE(); \ 86 | if (UNLIKELY(0 != settings->on_##FOR(parser))) { \ 87 | SET_ERRNO(HPE_CB_##FOR); \ 88 | } \ 89 | UPDATE_STATE(parser->state); \ 90 | \ 91 | /* We either errored above or got paused; get out */ \ 92 | if (UNLIKELY(HTTP_PARSER_ERRNO(parser) != HPE_OK)) { \ 93 | return (ER); \ 94 | } \ 95 | } \ 96 | } while (0) 97 | 98 | /* Run the notify callback FOR and consume the current byte */ 99 | #define CALLBACK_NOTIFY(FOR) CALLBACK_NOTIFY_(FOR, p - data + 1) 100 | 101 | /* Run the notify callback FOR and don't consume the current byte */ 102 | #define CALLBACK_NOTIFY_NOADVANCE(FOR) CALLBACK_NOTIFY_(FOR, p - data) 103 | 104 | /* Run data callback FOR with LEN bytes, returning ER if it fails */ 105 | #define CALLBACK_DATA_(FOR, LEN, ER) \ 106 | do { \ 107 | assert(HTTP_PARSER_ERRNO(parser) == HPE_OK); \ 108 | \ 109 | if (FOR##_mark) { \ 110 | if (LIKELY(settings->on_##FOR)) { \ 111 | parser->state = CURRENT_STATE(); \ 112 | if (UNLIKELY(0 != \ 113 | settings->on_##FOR(parser, FOR##_mark, (LEN)))) { \ 114 | SET_ERRNO(HPE_CB_##FOR); \ 115 | } \ 116 | UPDATE_STATE(parser->state); \ 117 | \ 118 | /* We either errored above or got paused; get out */ \ 119 | if (UNLIKELY(HTTP_PARSER_ERRNO(parser) != HPE_OK)) { \ 120 | return (ER); \ 121 | } \ 122 | } \ 123 | FOR##_mark = NULL; \ 124 | } \ 125 | } while (0) 126 | 127 | /* Run the data callback FOR and consume the current byte */ 128 | #define CALLBACK_DATA(FOR) \ 129 | CALLBACK_DATA_(FOR, p - FOR##_mark, p - data + 1) 130 | 131 | /* Run the data callback FOR and don't consume the current byte */ 132 | #define CALLBACK_DATA_NOADVANCE(FOR) \ 133 | CALLBACK_DATA_(FOR, p - FOR##_mark, p - data) 134 | 135 | /* Set the mark FOR; non-destructive if mark is already set */ 136 | #define MARK(FOR) \ 137 | do { \ 138 | if (!FOR##_mark) { \ 139 | FOR##_mark = p; \ 140 | } \ 141 | } while (0) 142 | 143 | /* Don't allow the total size of the HTTP headers (including the status 144 | * line) to exceed max_header_size. This check is here to protect 145 | * embedders against denial-of-service attacks where the attacker feeds 146 | * us a never-ending header that the embedder keeps buffering. 147 | * 148 | * This check is arguably the responsibility of embedders but we're doing 149 | * it on the embedder's behalf because most won't bother and this way we 150 | * make the web a little safer. max_header_size is still far bigger 151 | * than any reasonable request or response so this should never affect 152 | * day-to-day operation. 153 | */ 154 | #define COUNT_HEADER_SIZE(V) \ 155 | do { \ 156 | nread += (uint32_t)(V); \ 157 | if (UNLIKELY(nread > max_header_size)) { \ 158 | SET_ERRNO(HPE_HEADER_OVERFLOW); \ 159 | goto error; \ 160 | } \ 161 | } while (0) 162 | 163 | 164 | #define PROXY_CONNECTION "proxy-connection" 165 | #define CONNECTION "connection" 166 | #define CONTENT_LENGTH "content-length" 167 | #define TRANSFER_ENCODING "transfer-encoding" 168 | #define UPGRADE "upgrade" 169 | #define CHUNKED "chunked" 170 | #define KEEP_ALIVE "keep-alive" 171 | #define CLOSE "close" 172 | 173 | 174 | static const char *method_strings[] = 175 | { 176 | #define XX(num, name, string) #string, 177 | HTTP_METHOD_MAP(XX) 178 | #undef XX 179 | }; 180 | 181 | 182 | /* Tokens as defined by rfc 2616. Also lowercases them. 183 | * token = 1* 184 | * separators = "(" | ")" | "<" | ">" | "@" 185 | * | "," | ";" | ":" | "\" | <"> 186 | * | "/" | "[" | "]" | "?" | "=" 187 | * | "{" | "}" | SP | HT 188 | */ 189 | static const char tokens[256] = { 190 | /* 0 nul 1 soh 2 stx 3 etx 4 eot 5 enq 6 ack 7 bel */ 191 | 0, 0, 0, 0, 0, 0, 0, 0, 192 | /* 8 bs 9 ht 10 nl 11 vt 12 np 13 cr 14 so 15 si */ 193 | 0, 0, 0, 0, 0, 0, 0, 0, 194 | /* 16 dle 17 dc1 18 dc2 19 dc3 20 dc4 21 nak 22 syn 23 etb */ 195 | 0, 0, 0, 0, 0, 0, 0, 0, 196 | /* 24 can 25 em 26 sub 27 esc 28 fs 29 gs 30 rs 31 us */ 197 | 0, 0, 0, 0, 0, 0, 0, 0, 198 | /* 32 sp 33 ! 34 " 35 # 36 $ 37 % 38 & 39 ' */ 199 | ' ', '!', 0, '#', '$', '%', '&', '\'', 200 | /* 40 ( 41 ) 42 * 43 + 44 , 45 - 46 . 47 / */ 201 | 0, 0, '*', '+', 0, '-', '.', 0, 202 | /* 48 0 49 1 50 2 51 3 52 4 53 5 54 6 55 7 */ 203 | '0', '1', '2', '3', '4', '5', '6', '7', 204 | /* 56 8 57 9 58 : 59 ; 60 < 61 = 62 > 63 ? */ 205 | '8', '9', 0, 0, 0, 0, 0, 0, 206 | /* 64 @ 65 A 66 B 67 C 68 D 69 E 70 F 71 G */ 207 | 0, 'a', 'b', 'c', 'd', 'e', 'f', 'g', 208 | /* 72 H 73 I 74 J 75 K 76 L 77 M 78 N 79 O */ 209 | 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 210 | /* 80 P 81 Q 82 R 83 S 84 T 85 U 86 V 87 W */ 211 | 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 212 | /* 88 X 89 Y 90 Z 91 [ 92 \ 93 ] 94 ^ 95 _ */ 213 | 'x', 'y', 'z', 0, 0, 0, '^', '_', 214 | /* 96 ` 97 a 98 b 99 c 100 d 101 e 102 f 103 g */ 215 | '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 216 | /* 104 h 105 i 106 j 107 k 108 l 109 m 110 n 111 o */ 217 | 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 218 | /* 112 p 113 q 114 r 115 s 116 t 117 u 118 v 119 w */ 219 | 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 220 | /* 120 x 121 y 122 z 123 { 124 | 125 } 126 ~ 127 del */ 221 | 'x', 'y', 'z', 0, '|', 0, '~', 0 }; 222 | 223 | 224 | static const int8_t unhex[256] = 225 | {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1 226 | ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1 227 | ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1 228 | , 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1,-1,-1,-1,-1 229 | ,-1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1 230 | ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1 231 | ,-1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1 232 | ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1 233 | }; 234 | 235 | 236 | #if HTTP_PARSER_STRICT 237 | # define T(v) 0 238 | #else 239 | # define T(v) v 240 | #endif 241 | 242 | 243 | static const uint8_t normal_url_char[32] = { 244 | /* 0 nul 1 soh 2 stx 3 etx 4 eot 5 enq 6 ack 7 bel */ 245 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0, 246 | /* 8 bs 9 ht 10 nl 11 vt 12 np 13 cr 14 so 15 si */ 247 | 0 | T(2) | 0 | 0 | T(16) | 0 | 0 | 0, 248 | /* 16 dle 17 dc1 18 dc2 19 dc3 20 dc4 21 nak 22 syn 23 etb */ 249 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0, 250 | /* 24 can 25 em 26 sub 27 esc 28 fs 29 gs 30 rs 31 us */ 251 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0, 252 | /* 32 sp 33 ! 34 " 35 # 36 $ 37 % 38 & 39 ' */ 253 | 0 | 2 | 4 | 0 | 16 | 32 | 64 | 128, 254 | /* 40 ( 41 ) 42 * 43 + 44 , 45 - 46 . 47 / */ 255 | 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128, 256 | /* 48 0 49 1 50 2 51 3 52 4 53 5 54 6 55 7 */ 257 | 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128, 258 | /* 56 8 57 9 58 : 59 ; 60 < 61 = 62 > 63 ? */ 259 | 1 | 2 | 4 | 8 | 16 | 32 | 64 | 0, 260 | /* 64 @ 65 A 66 B 67 C 68 D 69 E 70 F 71 G */ 261 | 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128, 262 | /* 72 H 73 I 74 J 75 K 76 L 77 M 78 N 79 O */ 263 | 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128, 264 | /* 80 P 81 Q 82 R 83 S 84 T 85 U 86 V 87 W */ 265 | 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128, 266 | /* 88 X 89 Y 90 Z 91 [ 92 \ 93 ] 94 ^ 95 _ */ 267 | 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128, 268 | /* 96 ` 97 a 98 b 99 c 100 d 101 e 102 f 103 g */ 269 | 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128, 270 | /* 104 h 105 i 106 j 107 k 108 l 109 m 110 n 111 o */ 271 | 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128, 272 | /* 112 p 113 q 114 r 115 s 116 t 117 u 118 v 119 w */ 273 | 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128, 274 | /* 120 x 121 y 122 z 123 { 124 | 125 } 126 ~ 127 del */ 275 | 1 | 2 | 4 | 8 | 16 | 32 | 64 | 0, }; 276 | 277 | #undef T 278 | 279 | enum state 280 | { s_dead = 1 /* important that this is > 0 */ 281 | 282 | , s_start_req_or_res 283 | , s_res_or_resp_H 284 | , s_start_res 285 | , s_res_H 286 | , s_res_HT 287 | , s_res_HTT 288 | , s_res_HTTP 289 | , s_res_http_major 290 | , s_res_http_dot 291 | , s_res_http_minor 292 | , s_res_http_end 293 | , s_res_first_status_code 294 | , s_res_status_code 295 | , s_res_status_start 296 | , s_res_status 297 | , s_res_line_almost_done 298 | 299 | , s_start_req 300 | 301 | , s_req_method 302 | , s_req_spaces_before_url 303 | , s_req_schema 304 | , s_req_schema_slash 305 | , s_req_schema_slash_slash 306 | , s_req_server_start 307 | , s_req_server 308 | , s_req_server_with_at 309 | , s_req_path 310 | , s_req_query_string_start 311 | , s_req_query_string 312 | , s_req_fragment_start 313 | , s_req_fragment 314 | , s_req_http_start 315 | , s_req_http_H 316 | , s_req_http_HT 317 | , s_req_http_HTT 318 | , s_req_http_HTTP 319 | , s_req_http_I 320 | , s_req_http_IC 321 | , s_req_http_major 322 | , s_req_http_dot 323 | , s_req_http_minor 324 | , s_req_http_end 325 | , s_req_line_almost_done 326 | 327 | , s_header_field_start 328 | , s_header_field 329 | , s_header_value_discard_ws 330 | , s_header_value_discard_ws_almost_done 331 | , s_header_value_discard_lws 332 | , s_header_value_start 333 | , s_header_value 334 | , s_header_value_lws 335 | 336 | , s_header_almost_done 337 | 338 | , s_chunk_size_start 339 | , s_chunk_size 340 | , s_chunk_parameters 341 | , s_chunk_size_almost_done 342 | 343 | , s_headers_almost_done 344 | , s_headers_done 345 | 346 | /* Important: 's_headers_done' must be the last 'header' state. All 347 | * states beyond this must be 'body' states. It is used for overflow 348 | * checking. See the PARSING_HEADER() macro. 349 | */ 350 | 351 | , s_chunk_data 352 | , s_chunk_data_almost_done 353 | , s_chunk_data_done 354 | 355 | , s_body_identity 356 | , s_body_identity_eof 357 | 358 | , s_message_done 359 | }; 360 | 361 | 362 | #define PARSING_HEADER(state) (state <= s_headers_done) 363 | 364 | 365 | enum header_states 366 | { h_general = 0 367 | , h_C 368 | , h_CO 369 | , h_CON 370 | 371 | , h_matching_connection 372 | , h_matching_proxy_connection 373 | , h_matching_content_length 374 | , h_matching_transfer_encoding 375 | , h_matching_upgrade 376 | 377 | , h_connection 378 | , h_content_length 379 | , h_content_length_num 380 | , h_content_length_ws 381 | , h_transfer_encoding 382 | , h_upgrade 383 | 384 | , h_matching_transfer_encoding_token_start 385 | , h_matching_transfer_encoding_chunked 386 | , h_matching_transfer_encoding_token 387 | 388 | , h_matching_connection_token_start 389 | , h_matching_connection_keep_alive 390 | , h_matching_connection_close 391 | , h_matching_connection_upgrade 392 | , h_matching_connection_token 393 | 394 | , h_transfer_encoding_chunked 395 | , h_connection_keep_alive 396 | , h_connection_close 397 | , h_connection_upgrade 398 | }; 399 | 400 | enum http_host_state 401 | { 402 | s_http_host_dead = 1 403 | , s_http_userinfo_start 404 | , s_http_userinfo 405 | , s_http_host_start 406 | , s_http_host_v6_start 407 | , s_http_host 408 | , s_http_host_v6 409 | , s_http_host_v6_end 410 | , s_http_host_v6_zone_start 411 | , s_http_host_v6_zone 412 | , s_http_host_port_start 413 | , s_http_host_port 414 | }; 415 | 416 | /* Macros for character classes; depends on strict-mode */ 417 | #define CR '\r' 418 | #define LF '\n' 419 | #define LOWER(c) (unsigned char)(c | 0x20) 420 | #define IS_ALPHA(c) (LOWER(c) >= 'a' && LOWER(c) <= 'z') 421 | #define IS_NUM(c) ((c) >= '0' && (c) <= '9') 422 | #define IS_ALPHANUM(c) (IS_ALPHA(c) || IS_NUM(c)) 423 | #define IS_HEX(c) (IS_NUM(c) || (LOWER(c) >= 'a' && LOWER(c) <= 'f')) 424 | #define IS_MARK(c) ((c) == '-' || (c) == '_' || (c) == '.' || \ 425 | (c) == '!' || (c) == '~' || (c) == '*' || (c) == '\'' || (c) == '(' || \ 426 | (c) == ')') 427 | #define IS_USERINFO_CHAR(c) (IS_ALPHANUM(c) || IS_MARK(c) || (c) == '%' || \ 428 | (c) == ';' || (c) == ':' || (c) == '&' || (c) == '=' || (c) == '+' || \ 429 | (c) == '$' || (c) == ',') 430 | 431 | #define STRICT_TOKEN(c) ((c == ' ') ? 0 : tokens[(unsigned char)c]) 432 | 433 | #if HTTP_PARSER_STRICT 434 | #define TOKEN(c) STRICT_TOKEN(c) 435 | #define IS_URL_CHAR(c) (BIT_AT(normal_url_char, (unsigned char)c)) 436 | #define IS_HOST_CHAR(c) (IS_ALPHANUM(c) || (c) == '.' || (c) == '-') 437 | #else 438 | #define TOKEN(c) tokens[(unsigned char)c] 439 | #define IS_URL_CHAR(c) \ 440 | (BIT_AT(normal_url_char, (unsigned char)c) || ((c) & 0x80)) 441 | #define IS_HOST_CHAR(c) \ 442 | (IS_ALPHANUM(c) || (c) == '.' || (c) == '-' || (c) == '_') 443 | #endif 444 | 445 | /** 446 | * Verify that a char is a valid visible (printable) US-ASCII 447 | * character or %x80-FF 448 | **/ 449 | #define IS_HEADER_CHAR(ch) \ 450 | (ch == CR || ch == LF || ch == 9 || ((unsigned char)ch > 31 && ch != 127)) 451 | 452 | #define start_state (parser->type == HTTP_REQUEST ? s_start_req : s_start_res) 453 | 454 | 455 | #if HTTP_PARSER_STRICT 456 | # define STRICT_CHECK(cond) \ 457 | do { \ 458 | if (cond) { \ 459 | SET_ERRNO(HPE_STRICT); \ 460 | goto error; \ 461 | } \ 462 | } while (0) 463 | # define NEW_MESSAGE() (http_should_keep_alive(parser) ? start_state : s_dead) 464 | #else 465 | # define STRICT_CHECK(cond) 466 | # define NEW_MESSAGE() start_state 467 | #endif 468 | 469 | 470 | /* Map errno values to strings for human-readable output */ 471 | #define HTTP_STRERROR_GEN(n, s) { "HPE_" #n, s }, 472 | static struct { 473 | const char *name; 474 | const char *description; 475 | } http_strerror_tab[] = { 476 | HTTP_ERRNO_MAP(HTTP_STRERROR_GEN) 477 | }; 478 | #undef HTTP_STRERROR_GEN 479 | 480 | int http_message_needs_eof(const http_parser *parser); 481 | 482 | /* Our URL parser. 483 | * 484 | * This is designed to be shared by http_parser_execute() for URL validation, 485 | * hence it has a state transition + byte-for-byte interface. In addition, it 486 | * is meant to be embedded in http_parser_parse_url(), which does the dirty 487 | * work of turning state transitions URL components for its API. 488 | * 489 | * This function should only be invoked with non-space characters. It is 490 | * assumed that the caller cares about (and can detect) the transition between 491 | * URL and non-URL states by looking for these. 492 | */ 493 | static enum state 494 | parse_url_char(enum state s, const char ch) 495 | { 496 | if (ch == ' ' || ch == '\r' || ch == '\n') { 497 | return s_dead; 498 | } 499 | 500 | #if HTTP_PARSER_STRICT 501 | if (ch == '\t' || ch == '\f') { 502 | return s_dead; 503 | } 504 | #endif 505 | 506 | switch (s) { 507 | case s_req_spaces_before_url: 508 | /* Proxied requests are followed by scheme of an absolute URI (alpha). 509 | * All methods except CONNECT are followed by '/' or '*'. 510 | */ 511 | 512 | if (ch == '/' || ch == '*') { 513 | return s_req_path; 514 | } 515 | 516 | if (IS_ALPHA(ch)) { 517 | return s_req_schema; 518 | } 519 | 520 | break; 521 | 522 | case s_req_schema: 523 | if (IS_ALPHA(ch)) { 524 | return s; 525 | } 526 | 527 | if (ch == ':') { 528 | return s_req_schema_slash; 529 | } 530 | 531 | break; 532 | 533 | case s_req_schema_slash: 534 | if (ch == '/') { 535 | return s_req_schema_slash_slash; 536 | } 537 | 538 | break; 539 | 540 | case s_req_schema_slash_slash: 541 | if (ch == '/') { 542 | return s_req_server_start; 543 | } 544 | 545 | break; 546 | 547 | case s_req_server_with_at: 548 | if (ch == '@') { 549 | return s_dead; 550 | } 551 | 552 | /* fall through */ 553 | case s_req_server_start: 554 | case s_req_server: 555 | if (ch == '/') { 556 | return s_req_path; 557 | } 558 | 559 | if (ch == '?') { 560 | return s_req_query_string_start; 561 | } 562 | 563 | if (ch == '@') { 564 | return s_req_server_with_at; 565 | } 566 | 567 | if (IS_USERINFO_CHAR(ch) || ch == '[' || ch == ']') { 568 | return s_req_server; 569 | } 570 | 571 | break; 572 | 573 | case s_req_path: 574 | if (IS_URL_CHAR(ch)) { 575 | return s; 576 | } 577 | 578 | switch (ch) { 579 | case '?': 580 | return s_req_query_string_start; 581 | 582 | case '#': 583 | return s_req_fragment_start; 584 | } 585 | 586 | break; 587 | 588 | case s_req_query_string_start: 589 | case s_req_query_string: 590 | if (IS_URL_CHAR(ch)) { 591 | return s_req_query_string; 592 | } 593 | 594 | switch (ch) { 595 | case '?': 596 | /* allow extra '?' in query string */ 597 | return s_req_query_string; 598 | 599 | case '#': 600 | return s_req_fragment_start; 601 | } 602 | 603 | break; 604 | 605 | case s_req_fragment_start: 606 | if (IS_URL_CHAR(ch)) { 607 | return s_req_fragment; 608 | } 609 | 610 | switch (ch) { 611 | case '?': 612 | return s_req_fragment; 613 | 614 | case '#': 615 | return s; 616 | } 617 | 618 | break; 619 | 620 | case s_req_fragment: 621 | if (IS_URL_CHAR(ch)) { 622 | return s; 623 | } 624 | 625 | switch (ch) { 626 | case '?': 627 | case '#': 628 | return s; 629 | } 630 | 631 | break; 632 | 633 | default: 634 | break; 635 | } 636 | 637 | /* We should never fall out of the switch above unless there's an error */ 638 | return s_dead; 639 | } 640 | 641 | size_t http_parser_execute (http_parser *parser, 642 | const http_parser_settings *settings, 643 | const char *data, 644 | size_t len) 645 | { 646 | char c, ch; 647 | int8_t unhex_val; 648 | const char *p = data; 649 | const char *header_field_mark = 0; 650 | const char *header_value_mark = 0; 651 | const char *url_mark = 0; 652 | const char *body_mark = 0; 653 | const char *status_mark = 0; 654 | enum state p_state = (enum state) parser->state; 655 | const unsigned int lenient = parser->lenient_http_headers; 656 | const unsigned int allow_chunked_length = parser->allow_chunked_length; 657 | 658 | uint32_t nread = parser->nread; 659 | 660 | /* We're in an error state. Don't bother doing anything. */ 661 | if (HTTP_PARSER_ERRNO(parser) != HPE_OK) { 662 | return 0; 663 | } 664 | 665 | if (len == 0) { 666 | switch (CURRENT_STATE()) { 667 | case s_body_identity_eof: 668 | /* Use of CALLBACK_NOTIFY() here would erroneously return 1 byte read if 669 | * we got paused. 670 | */ 671 | CALLBACK_NOTIFY_NOADVANCE(message_complete); 672 | return 0; 673 | 674 | case s_dead: 675 | case s_start_req_or_res: 676 | case s_start_res: 677 | case s_start_req: 678 | return 0; 679 | 680 | default: 681 | SET_ERRNO(HPE_INVALID_EOF_STATE); 682 | return 1; 683 | } 684 | } 685 | 686 | 687 | if (CURRENT_STATE() == s_header_field) 688 | header_field_mark = data; 689 | if (CURRENT_STATE() == s_header_value) 690 | header_value_mark = data; 691 | switch (CURRENT_STATE()) { 692 | case s_req_path: 693 | case s_req_schema: 694 | case s_req_schema_slash: 695 | case s_req_schema_slash_slash: 696 | case s_req_server_start: 697 | case s_req_server: 698 | case s_req_server_with_at: 699 | case s_req_query_string_start: 700 | case s_req_query_string: 701 | case s_req_fragment_start: 702 | case s_req_fragment: 703 | url_mark = data; 704 | break; 705 | case s_res_status: 706 | status_mark = data; 707 | break; 708 | default: 709 | break; 710 | } 711 | 712 | for (p=data; p != data + len; p++) { 713 | ch = *p; 714 | 715 | if (PARSING_HEADER(CURRENT_STATE())) 716 | COUNT_HEADER_SIZE(1); 717 | 718 | reexecute: 719 | switch (CURRENT_STATE()) { 720 | 721 | case s_dead: 722 | /* this state is used after a 'Connection: close' message 723 | * the parser will error out if it reads another message 724 | */ 725 | if (LIKELY(ch == CR || ch == LF)) 726 | break; 727 | 728 | SET_ERRNO(HPE_CLOSED_CONNECTION); 729 | goto error; 730 | 731 | case s_start_req_or_res: 732 | { 733 | if (ch == CR || ch == LF) 734 | break; 735 | parser->flags = 0; 736 | parser->uses_transfer_encoding = 0; 737 | parser->content_length = ULLONG_MAX; 738 | 739 | if (ch == 'H') { 740 | UPDATE_STATE(s_res_or_resp_H); 741 | 742 | CALLBACK_NOTIFY(message_begin); 743 | } else { 744 | parser->type = HTTP_REQUEST; 745 | UPDATE_STATE(s_start_req); 746 | REEXECUTE(); 747 | } 748 | 749 | break; 750 | } 751 | 752 | case s_res_or_resp_H: 753 | if (ch == 'T') { 754 | parser->type = HTTP_RESPONSE; 755 | UPDATE_STATE(s_res_HT); 756 | } else { 757 | if (UNLIKELY(ch != 'E')) { 758 | SET_ERRNO(HPE_INVALID_CONSTANT); 759 | goto error; 760 | } 761 | 762 | parser->type = HTTP_REQUEST; 763 | parser->method = HTTP_HEAD; 764 | parser->index = 2; 765 | UPDATE_STATE(s_req_method); 766 | } 767 | break; 768 | 769 | case s_start_res: 770 | { 771 | if (ch == CR || ch == LF) 772 | break; 773 | parser->flags = 0; 774 | parser->uses_transfer_encoding = 0; 775 | parser->content_length = ULLONG_MAX; 776 | 777 | if (ch == 'H') { 778 | UPDATE_STATE(s_res_H); 779 | } else { 780 | SET_ERRNO(HPE_INVALID_CONSTANT); 781 | goto error; 782 | } 783 | 784 | CALLBACK_NOTIFY(message_begin); 785 | break; 786 | } 787 | 788 | case s_res_H: 789 | STRICT_CHECK(ch != 'T'); 790 | UPDATE_STATE(s_res_HT); 791 | break; 792 | 793 | case s_res_HT: 794 | STRICT_CHECK(ch != 'T'); 795 | UPDATE_STATE(s_res_HTT); 796 | break; 797 | 798 | case s_res_HTT: 799 | STRICT_CHECK(ch != 'P'); 800 | UPDATE_STATE(s_res_HTTP); 801 | break; 802 | 803 | case s_res_HTTP: 804 | STRICT_CHECK(ch != '/'); 805 | UPDATE_STATE(s_res_http_major); 806 | break; 807 | 808 | case s_res_http_major: 809 | if (UNLIKELY(!IS_NUM(ch))) { 810 | SET_ERRNO(HPE_INVALID_VERSION); 811 | goto error; 812 | } 813 | 814 | parser->http_major = ch - '0'; 815 | UPDATE_STATE(s_res_http_dot); 816 | break; 817 | 818 | case s_res_http_dot: 819 | { 820 | if (UNLIKELY(ch != '.')) { 821 | SET_ERRNO(HPE_INVALID_VERSION); 822 | goto error; 823 | } 824 | 825 | UPDATE_STATE(s_res_http_minor); 826 | break; 827 | } 828 | 829 | case s_res_http_minor: 830 | if (UNLIKELY(!IS_NUM(ch))) { 831 | SET_ERRNO(HPE_INVALID_VERSION); 832 | goto error; 833 | } 834 | 835 | parser->http_minor = ch - '0'; 836 | UPDATE_STATE(s_res_http_end); 837 | break; 838 | 839 | case s_res_http_end: 840 | { 841 | if (UNLIKELY(ch != ' ')) { 842 | SET_ERRNO(HPE_INVALID_VERSION); 843 | goto error; 844 | } 845 | 846 | UPDATE_STATE(s_res_first_status_code); 847 | break; 848 | } 849 | 850 | case s_res_first_status_code: 851 | { 852 | if (!IS_NUM(ch)) { 853 | if (ch == ' ') { 854 | break; 855 | } 856 | 857 | SET_ERRNO(HPE_INVALID_STATUS); 858 | goto error; 859 | } 860 | parser->status_code = ch - '0'; 861 | UPDATE_STATE(s_res_status_code); 862 | break; 863 | } 864 | 865 | case s_res_status_code: 866 | { 867 | if (!IS_NUM(ch)) { 868 | switch (ch) { 869 | case ' ': 870 | UPDATE_STATE(s_res_status_start); 871 | break; 872 | case CR: 873 | case LF: 874 | UPDATE_STATE(s_res_status_start); 875 | REEXECUTE(); 876 | break; 877 | default: 878 | SET_ERRNO(HPE_INVALID_STATUS); 879 | goto error; 880 | } 881 | break; 882 | } 883 | 884 | parser->status_code *= 10; 885 | parser->status_code += ch - '0'; 886 | 887 | if (UNLIKELY(parser->status_code > 999)) { 888 | SET_ERRNO(HPE_INVALID_STATUS); 889 | goto error; 890 | } 891 | 892 | break; 893 | } 894 | 895 | case s_res_status_start: 896 | { 897 | MARK(status); 898 | UPDATE_STATE(s_res_status); 899 | parser->index = 0; 900 | 901 | if (ch == CR || ch == LF) 902 | REEXECUTE(); 903 | 904 | break; 905 | } 906 | 907 | case s_res_status: 908 | if (ch == CR) { 909 | UPDATE_STATE(s_res_line_almost_done); 910 | CALLBACK_DATA(status); 911 | break; 912 | } 913 | 914 | if (ch == LF) { 915 | UPDATE_STATE(s_header_field_start); 916 | CALLBACK_DATA(status); 917 | break; 918 | } 919 | 920 | break; 921 | 922 | case s_res_line_almost_done: 923 | STRICT_CHECK(ch != LF); 924 | UPDATE_STATE(s_header_field_start); 925 | break; 926 | 927 | case s_start_req: 928 | { 929 | if (ch == CR || ch == LF) 930 | break; 931 | parser->flags = 0; 932 | parser->uses_transfer_encoding = 0; 933 | parser->content_length = ULLONG_MAX; 934 | 935 | if (UNLIKELY(!IS_ALPHA(ch))) { 936 | SET_ERRNO(HPE_INVALID_METHOD); 937 | goto error; 938 | } 939 | 940 | parser->method = (enum http_method) 0; 941 | parser->index = 1; 942 | switch (ch) { 943 | case 'A': parser->method = HTTP_ACL; break; 944 | case 'B': parser->method = HTTP_BIND; break; 945 | case 'C': parser->method = HTTP_CONNECT; /* or COPY, CHECKOUT */ break; 946 | case 'D': parser->method = HTTP_DELETE; break; 947 | case 'G': parser->method = HTTP_GET; break; 948 | case 'H': parser->method = HTTP_HEAD; break; 949 | case 'L': parser->method = HTTP_LOCK; /* or LINK */ break; 950 | case 'M': parser->method = HTTP_MKCOL; /* or MOVE, MKACTIVITY, MERGE, M-SEARCH, MKCALENDAR */ break; 951 | case 'N': parser->method = HTTP_NOTIFY; break; 952 | case 'O': parser->method = HTTP_OPTIONS; break; 953 | case 'P': parser->method = HTTP_POST; 954 | /* or PROPFIND|PROPPATCH|PUT|PATCH|PURGE */ 955 | break; 956 | case 'R': parser->method = HTTP_REPORT; /* or REBIND */ break; 957 | case 'S': parser->method = HTTP_SUBSCRIBE; /* or SEARCH, SOURCE */ break; 958 | case 'T': parser->method = HTTP_TRACE; break; 959 | case 'U': parser->method = HTTP_UNLOCK; /* or UNSUBSCRIBE, UNBIND, UNLINK */ break; 960 | default: 961 | SET_ERRNO(HPE_INVALID_METHOD); 962 | goto error; 963 | } 964 | UPDATE_STATE(s_req_method); 965 | 966 | CALLBACK_NOTIFY(message_begin); 967 | 968 | break; 969 | } 970 | 971 | case s_req_method: 972 | { 973 | const char *matcher; 974 | if (UNLIKELY(ch == '\0')) { 975 | SET_ERRNO(HPE_INVALID_METHOD); 976 | goto error; 977 | } 978 | 979 | matcher = method_strings[parser->method]; 980 | if (ch == ' ' && matcher[parser->index] == '\0') { 981 | UPDATE_STATE(s_req_spaces_before_url); 982 | } else if (ch == matcher[parser->index]) { 983 | ; /* nada */ 984 | } else if ((ch >= 'A' && ch <= 'Z') || ch == '-') { 985 | 986 | switch (parser->method << 16 | parser->index << 8 | ch) { 987 | #define XX(meth, pos, ch, new_meth) \ 988 | case (HTTP_##meth << 16 | pos << 8 | ch): \ 989 | parser->method = HTTP_##new_meth; break; 990 | 991 | XX(POST, 1, 'U', PUT) 992 | XX(POST, 1, 'A', PATCH) 993 | XX(POST, 1, 'R', PROPFIND) 994 | XX(PUT, 2, 'R', PURGE) 995 | XX(CONNECT, 1, 'H', CHECKOUT) 996 | XX(CONNECT, 2, 'P', COPY) 997 | XX(MKCOL, 1, 'O', MOVE) 998 | XX(MKCOL, 1, 'E', MERGE) 999 | XX(MKCOL, 1, '-', MSEARCH) 1000 | XX(MKCOL, 2, 'A', MKACTIVITY) 1001 | XX(MKCOL, 3, 'A', MKCALENDAR) 1002 | XX(SUBSCRIBE, 1, 'E', SEARCH) 1003 | XX(SUBSCRIBE, 1, 'O', SOURCE) 1004 | XX(REPORT, 2, 'B', REBIND) 1005 | XX(PROPFIND, 4, 'P', PROPPATCH) 1006 | XX(LOCK, 1, 'I', LINK) 1007 | XX(UNLOCK, 2, 'S', UNSUBSCRIBE) 1008 | XX(UNLOCK, 2, 'B', UNBIND) 1009 | XX(UNLOCK, 3, 'I', UNLINK) 1010 | #undef XX 1011 | default: 1012 | SET_ERRNO(HPE_INVALID_METHOD); 1013 | goto error; 1014 | } 1015 | } else { 1016 | SET_ERRNO(HPE_INVALID_METHOD); 1017 | goto error; 1018 | } 1019 | 1020 | ++parser->index; 1021 | break; 1022 | } 1023 | 1024 | case s_req_spaces_before_url: 1025 | { 1026 | if (ch == ' ') break; 1027 | 1028 | MARK(url); 1029 | if (parser->method == HTTP_CONNECT) { 1030 | UPDATE_STATE(s_req_server_start); 1031 | } 1032 | 1033 | UPDATE_STATE(parse_url_char(CURRENT_STATE(), ch)); 1034 | if (UNLIKELY(CURRENT_STATE() == s_dead)) { 1035 | SET_ERRNO(HPE_INVALID_URL); 1036 | goto error; 1037 | } 1038 | 1039 | break; 1040 | } 1041 | 1042 | case s_req_schema: 1043 | case s_req_schema_slash: 1044 | case s_req_schema_slash_slash: 1045 | case s_req_server_start: 1046 | { 1047 | switch (ch) { 1048 | /* No whitespace allowed here */ 1049 | case ' ': 1050 | case CR: 1051 | case LF: 1052 | SET_ERRNO(HPE_INVALID_URL); 1053 | goto error; 1054 | default: 1055 | UPDATE_STATE(parse_url_char(CURRENT_STATE(), ch)); 1056 | if (UNLIKELY(CURRENT_STATE() == s_dead)) { 1057 | SET_ERRNO(HPE_INVALID_URL); 1058 | goto error; 1059 | } 1060 | } 1061 | 1062 | break; 1063 | } 1064 | 1065 | case s_req_server: 1066 | case s_req_server_with_at: 1067 | case s_req_path: 1068 | case s_req_query_string_start: 1069 | case s_req_query_string: 1070 | case s_req_fragment_start: 1071 | case s_req_fragment: 1072 | { 1073 | switch (ch) { 1074 | case ' ': 1075 | UPDATE_STATE(s_req_http_start); 1076 | CALLBACK_DATA(url); 1077 | break; 1078 | case CR: 1079 | case LF: 1080 | parser->http_major = 0; 1081 | parser->http_minor = 9; 1082 | UPDATE_STATE((ch == CR) ? 1083 | s_req_line_almost_done : 1084 | s_header_field_start); 1085 | CALLBACK_DATA(url); 1086 | break; 1087 | default: 1088 | UPDATE_STATE(parse_url_char(CURRENT_STATE(), ch)); 1089 | if (UNLIKELY(CURRENT_STATE() == s_dead)) { 1090 | SET_ERRNO(HPE_INVALID_URL); 1091 | goto error; 1092 | } 1093 | } 1094 | break; 1095 | } 1096 | 1097 | case s_req_http_start: 1098 | switch (ch) { 1099 | case ' ': 1100 | break; 1101 | case 'H': 1102 | UPDATE_STATE(s_req_http_H); 1103 | break; 1104 | case 'I': 1105 | if (parser->method == HTTP_SOURCE) { 1106 | UPDATE_STATE(s_req_http_I); 1107 | break; 1108 | } 1109 | /* fall through */ 1110 | default: 1111 | SET_ERRNO(HPE_INVALID_CONSTANT); 1112 | goto error; 1113 | } 1114 | break; 1115 | 1116 | case s_req_http_H: 1117 | STRICT_CHECK(ch != 'T'); 1118 | UPDATE_STATE(s_req_http_HT); 1119 | break; 1120 | 1121 | case s_req_http_HT: 1122 | STRICT_CHECK(ch != 'T'); 1123 | UPDATE_STATE(s_req_http_HTT); 1124 | break; 1125 | 1126 | case s_req_http_HTT: 1127 | STRICT_CHECK(ch != 'P'); 1128 | UPDATE_STATE(s_req_http_HTTP); 1129 | break; 1130 | 1131 | case s_req_http_I: 1132 | STRICT_CHECK(ch != 'C'); 1133 | UPDATE_STATE(s_req_http_IC); 1134 | break; 1135 | 1136 | case s_req_http_IC: 1137 | STRICT_CHECK(ch != 'E'); 1138 | UPDATE_STATE(s_req_http_HTTP); /* Treat "ICE" as "HTTP". */ 1139 | break; 1140 | 1141 | case s_req_http_HTTP: 1142 | STRICT_CHECK(ch != '/'); 1143 | UPDATE_STATE(s_req_http_major); 1144 | break; 1145 | 1146 | case s_req_http_major: 1147 | if (UNLIKELY(!IS_NUM(ch))) { 1148 | SET_ERRNO(HPE_INVALID_VERSION); 1149 | goto error; 1150 | } 1151 | 1152 | parser->http_major = ch - '0'; 1153 | UPDATE_STATE(s_req_http_dot); 1154 | break; 1155 | 1156 | case s_req_http_dot: 1157 | { 1158 | if (UNLIKELY(ch != '.')) { 1159 | SET_ERRNO(HPE_INVALID_VERSION); 1160 | goto error; 1161 | } 1162 | 1163 | UPDATE_STATE(s_req_http_minor); 1164 | break; 1165 | } 1166 | 1167 | case s_req_http_minor: 1168 | if (UNLIKELY(!IS_NUM(ch))) { 1169 | SET_ERRNO(HPE_INVALID_VERSION); 1170 | goto error; 1171 | } 1172 | 1173 | parser->http_minor = ch - '0'; 1174 | UPDATE_STATE(s_req_http_end); 1175 | break; 1176 | 1177 | case s_req_http_end: 1178 | { 1179 | if (ch == CR) { 1180 | UPDATE_STATE(s_req_line_almost_done); 1181 | break; 1182 | } 1183 | 1184 | if (ch == LF) { 1185 | UPDATE_STATE(s_header_field_start); 1186 | break; 1187 | } 1188 | 1189 | SET_ERRNO(HPE_INVALID_VERSION); 1190 | goto error; 1191 | break; 1192 | } 1193 | 1194 | /* end of request line */ 1195 | case s_req_line_almost_done: 1196 | { 1197 | if (UNLIKELY(ch != LF)) { 1198 | SET_ERRNO(HPE_LF_EXPECTED); 1199 | goto error; 1200 | } 1201 | 1202 | UPDATE_STATE(s_header_field_start); 1203 | break; 1204 | } 1205 | 1206 | case s_header_field_start: 1207 | { 1208 | if (ch == CR) { 1209 | UPDATE_STATE(s_headers_almost_done); 1210 | break; 1211 | } 1212 | 1213 | if (ch == LF) { 1214 | /* they might be just sending \n instead of \r\n so this would be 1215 | * the second \n to denote the end of headers*/ 1216 | UPDATE_STATE(s_headers_almost_done); 1217 | REEXECUTE(); 1218 | } 1219 | 1220 | c = TOKEN(ch); 1221 | 1222 | if (UNLIKELY(!c)) { 1223 | SET_ERRNO(HPE_INVALID_HEADER_TOKEN); 1224 | goto error; 1225 | } 1226 | 1227 | MARK(header_field); 1228 | 1229 | parser->index = 0; 1230 | UPDATE_STATE(s_header_field); 1231 | 1232 | switch (c) { 1233 | case 'c': 1234 | parser->header_state = h_C; 1235 | break; 1236 | 1237 | case 'p': 1238 | parser->header_state = h_matching_proxy_connection; 1239 | break; 1240 | 1241 | case 't': 1242 | parser->header_state = h_matching_transfer_encoding; 1243 | break; 1244 | 1245 | case 'u': 1246 | parser->header_state = h_matching_upgrade; 1247 | break; 1248 | 1249 | default: 1250 | parser->header_state = h_general; 1251 | break; 1252 | } 1253 | break; 1254 | } 1255 | 1256 | case s_header_field: 1257 | { 1258 | const char* start = p; 1259 | for (; p != data + len; p++) { 1260 | ch = *p; 1261 | c = TOKEN(ch); 1262 | 1263 | if (!c) 1264 | break; 1265 | 1266 | switch (parser->header_state) { 1267 | case h_general: { 1268 | size_t left = data + len - p; 1269 | const char* pe = p + MIN(left, max_header_size); 1270 | while (p+1 < pe && TOKEN(p[1])) { 1271 | p++; 1272 | } 1273 | break; 1274 | } 1275 | 1276 | case h_C: 1277 | parser->index++; 1278 | parser->header_state = (c == 'o' ? h_CO : h_general); 1279 | break; 1280 | 1281 | case h_CO: 1282 | parser->index++; 1283 | parser->header_state = (c == 'n' ? h_CON : h_general); 1284 | break; 1285 | 1286 | case h_CON: 1287 | parser->index++; 1288 | switch (c) { 1289 | case 'n': 1290 | parser->header_state = h_matching_connection; 1291 | break; 1292 | case 't': 1293 | parser->header_state = h_matching_content_length; 1294 | break; 1295 | default: 1296 | parser->header_state = h_general; 1297 | break; 1298 | } 1299 | break; 1300 | 1301 | /* connection */ 1302 | 1303 | case h_matching_connection: 1304 | parser->index++; 1305 | if (parser->index > sizeof(CONNECTION)-1 1306 | || c != CONNECTION[parser->index]) { 1307 | parser->header_state = h_general; 1308 | } else if (parser->index == sizeof(CONNECTION)-2) { 1309 | parser->header_state = h_connection; 1310 | } 1311 | break; 1312 | 1313 | /* proxy-connection */ 1314 | 1315 | case h_matching_proxy_connection: 1316 | parser->index++; 1317 | if (parser->index > sizeof(PROXY_CONNECTION)-1 1318 | || c != PROXY_CONNECTION[parser->index]) { 1319 | parser->header_state = h_general; 1320 | } else if (parser->index == sizeof(PROXY_CONNECTION)-2) { 1321 | parser->header_state = h_connection; 1322 | } 1323 | break; 1324 | 1325 | /* content-length */ 1326 | 1327 | case h_matching_content_length: 1328 | parser->index++; 1329 | if (parser->index > sizeof(CONTENT_LENGTH)-1 1330 | || c != CONTENT_LENGTH[parser->index]) { 1331 | parser->header_state = h_general; 1332 | } else if (parser->index == sizeof(CONTENT_LENGTH)-2) { 1333 | parser->header_state = h_content_length; 1334 | } 1335 | break; 1336 | 1337 | /* transfer-encoding */ 1338 | 1339 | case h_matching_transfer_encoding: 1340 | parser->index++; 1341 | if (parser->index > sizeof(TRANSFER_ENCODING)-1 1342 | || c != TRANSFER_ENCODING[parser->index]) { 1343 | parser->header_state = h_general; 1344 | } else if (parser->index == sizeof(TRANSFER_ENCODING)-2) { 1345 | parser->header_state = h_transfer_encoding; 1346 | parser->uses_transfer_encoding = 1; 1347 | } 1348 | break; 1349 | 1350 | /* upgrade */ 1351 | 1352 | case h_matching_upgrade: 1353 | parser->index++; 1354 | if (parser->index > sizeof(UPGRADE)-1 1355 | || c != UPGRADE[parser->index]) { 1356 | parser->header_state = h_general; 1357 | } else if (parser->index == sizeof(UPGRADE)-2) { 1358 | parser->header_state = h_upgrade; 1359 | } 1360 | break; 1361 | 1362 | case h_connection: 1363 | case h_content_length: 1364 | case h_transfer_encoding: 1365 | case h_upgrade: 1366 | if (ch != ' ') parser->header_state = h_general; 1367 | break; 1368 | 1369 | default: 1370 | assert(0 && "Unknown header_state"); 1371 | break; 1372 | } 1373 | } 1374 | 1375 | if (p == data + len) { 1376 | --p; 1377 | COUNT_HEADER_SIZE(p - start); 1378 | break; 1379 | } 1380 | 1381 | COUNT_HEADER_SIZE(p - start); 1382 | 1383 | if (ch == ':') { 1384 | UPDATE_STATE(s_header_value_discard_ws); 1385 | CALLBACK_DATA(header_field); 1386 | break; 1387 | } 1388 | 1389 | SET_ERRNO(HPE_INVALID_HEADER_TOKEN); 1390 | goto error; 1391 | } 1392 | 1393 | case s_header_value_discard_ws: 1394 | if (ch == ' ' || ch == '\t') break; 1395 | 1396 | if (ch == CR) { 1397 | UPDATE_STATE(s_header_value_discard_ws_almost_done); 1398 | break; 1399 | } 1400 | 1401 | if (ch == LF) { 1402 | UPDATE_STATE(s_header_value_discard_lws); 1403 | break; 1404 | } 1405 | 1406 | /* fall through */ 1407 | 1408 | case s_header_value_start: 1409 | { 1410 | MARK(header_value); 1411 | 1412 | UPDATE_STATE(s_header_value); 1413 | parser->index = 0; 1414 | 1415 | c = LOWER(ch); 1416 | 1417 | switch (parser->header_state) { 1418 | case h_upgrade: 1419 | parser->flags |= F_UPGRADE; 1420 | parser->header_state = h_general; 1421 | break; 1422 | 1423 | case h_transfer_encoding: 1424 | /* looking for 'Transfer-Encoding: chunked' */ 1425 | if ('c' == c) { 1426 | parser->header_state = h_matching_transfer_encoding_chunked; 1427 | } else { 1428 | parser->header_state = h_matching_transfer_encoding_token; 1429 | } 1430 | break; 1431 | 1432 | /* Multi-value `Transfer-Encoding` header */ 1433 | case h_matching_transfer_encoding_token_start: 1434 | break; 1435 | 1436 | case h_content_length: 1437 | if (UNLIKELY(!IS_NUM(ch))) { 1438 | SET_ERRNO(HPE_INVALID_CONTENT_LENGTH); 1439 | goto error; 1440 | } 1441 | 1442 | if (parser->flags & F_CONTENTLENGTH) { 1443 | SET_ERRNO(HPE_UNEXPECTED_CONTENT_LENGTH); 1444 | goto error; 1445 | } 1446 | 1447 | parser->flags |= F_CONTENTLENGTH; 1448 | parser->content_length = ch - '0'; 1449 | parser->header_state = h_content_length_num; 1450 | break; 1451 | 1452 | /* when obsolete line folding is encountered for content length 1453 | * continue to the s_header_value state */ 1454 | case h_content_length_ws: 1455 | break; 1456 | 1457 | case h_connection: 1458 | /* looking for 'Connection: keep-alive' */ 1459 | if (c == 'k') { 1460 | parser->header_state = h_matching_connection_keep_alive; 1461 | /* looking for 'Connection: close' */ 1462 | } else if (c == 'c') { 1463 | parser->header_state = h_matching_connection_close; 1464 | } else if (c == 'u') { 1465 | parser->header_state = h_matching_connection_upgrade; 1466 | } else { 1467 | parser->header_state = h_matching_connection_token; 1468 | } 1469 | break; 1470 | 1471 | /* Multi-value `Connection` header */ 1472 | case h_matching_connection_token_start: 1473 | break; 1474 | 1475 | default: 1476 | parser->header_state = h_general; 1477 | break; 1478 | } 1479 | break; 1480 | } 1481 | 1482 | case s_header_value: 1483 | { 1484 | const char* start = p; 1485 | enum header_states h_state = (enum header_states) parser->header_state; 1486 | for (; p != data + len; p++) { 1487 | ch = *p; 1488 | if (ch == CR) { 1489 | UPDATE_STATE(s_header_almost_done); 1490 | parser->header_state = h_state; 1491 | CALLBACK_DATA(header_value); 1492 | break; 1493 | } 1494 | 1495 | if (ch == LF) { 1496 | UPDATE_STATE(s_header_almost_done); 1497 | COUNT_HEADER_SIZE(p - start); 1498 | parser->header_state = h_state; 1499 | CALLBACK_DATA_NOADVANCE(header_value); 1500 | REEXECUTE(); 1501 | } 1502 | 1503 | if (!lenient && !IS_HEADER_CHAR(ch)) { 1504 | SET_ERRNO(HPE_INVALID_HEADER_TOKEN); 1505 | goto error; 1506 | } 1507 | 1508 | c = LOWER(ch); 1509 | 1510 | switch (h_state) { 1511 | case h_general: 1512 | { 1513 | size_t left = data + len - p; 1514 | const char* pe = p + MIN(left, max_header_size); 1515 | 1516 | for (; p != pe; p++) { 1517 | ch = *p; 1518 | if (ch == CR || ch == LF) { 1519 | --p; 1520 | break; 1521 | } 1522 | if (!lenient && !IS_HEADER_CHAR(ch)) { 1523 | SET_ERRNO(HPE_INVALID_HEADER_TOKEN); 1524 | goto error; 1525 | } 1526 | } 1527 | if (p == data + len) 1528 | --p; 1529 | break; 1530 | } 1531 | 1532 | case h_connection: 1533 | case h_transfer_encoding: 1534 | assert(0 && "Shouldn't get here."); 1535 | break; 1536 | 1537 | case h_content_length: 1538 | if (ch == ' ') break; 1539 | h_state = h_content_length_num; 1540 | /* fall through */ 1541 | 1542 | case h_content_length_num: 1543 | { 1544 | uint64_t t; 1545 | 1546 | if (ch == ' ') { 1547 | h_state = h_content_length_ws; 1548 | break; 1549 | } 1550 | 1551 | if (UNLIKELY(!IS_NUM(ch))) { 1552 | SET_ERRNO(HPE_INVALID_CONTENT_LENGTH); 1553 | parser->header_state = h_state; 1554 | goto error; 1555 | } 1556 | 1557 | t = parser->content_length; 1558 | t *= 10; 1559 | t += ch - '0'; 1560 | 1561 | /* Overflow? Test against a conservative limit for simplicity. */ 1562 | if (UNLIKELY((ULLONG_MAX - 10) / 10 < parser->content_length)) { 1563 | SET_ERRNO(HPE_INVALID_CONTENT_LENGTH); 1564 | parser->header_state = h_state; 1565 | goto error; 1566 | } 1567 | 1568 | parser->content_length = t; 1569 | break; 1570 | } 1571 | 1572 | case h_content_length_ws: 1573 | if (ch == ' ') break; 1574 | SET_ERRNO(HPE_INVALID_CONTENT_LENGTH); 1575 | parser->header_state = h_state; 1576 | goto error; 1577 | 1578 | /* Transfer-Encoding: chunked */ 1579 | case h_matching_transfer_encoding_token_start: 1580 | /* looking for 'Transfer-Encoding: chunked' */ 1581 | if ('c' == c) { 1582 | h_state = h_matching_transfer_encoding_chunked; 1583 | } else if (STRICT_TOKEN(c)) { 1584 | /* TODO(indutny): similar code below does this, but why? 1585 | * At the very least it seems to be inconsistent given that 1586 | * h_matching_transfer_encoding_token does not check for 1587 | * `STRICT_TOKEN` 1588 | */ 1589 | h_state = h_matching_transfer_encoding_token; 1590 | } else if (c == ' ' || c == '\t') { 1591 | /* Skip lws */ 1592 | } else { 1593 | h_state = h_general; 1594 | } 1595 | break; 1596 | 1597 | case h_matching_transfer_encoding_chunked: 1598 | parser->index++; 1599 | if (parser->index > sizeof(CHUNKED)-1 1600 | || c != CHUNKED[parser->index]) { 1601 | h_state = h_matching_transfer_encoding_token; 1602 | } else if (parser->index == sizeof(CHUNKED)-2) { 1603 | h_state = h_transfer_encoding_chunked; 1604 | } 1605 | break; 1606 | 1607 | case h_matching_transfer_encoding_token: 1608 | if (ch == ',') { 1609 | h_state = h_matching_transfer_encoding_token_start; 1610 | parser->index = 0; 1611 | } 1612 | break; 1613 | 1614 | case h_matching_connection_token_start: 1615 | /* looking for 'Connection: keep-alive' */ 1616 | if (c == 'k') { 1617 | h_state = h_matching_connection_keep_alive; 1618 | /* looking for 'Connection: close' */ 1619 | } else if (c == 'c') { 1620 | h_state = h_matching_connection_close; 1621 | } else if (c == 'u') { 1622 | h_state = h_matching_connection_upgrade; 1623 | } else if (STRICT_TOKEN(c)) { 1624 | h_state = h_matching_connection_token; 1625 | } else if (c == ' ' || c == '\t') { 1626 | /* Skip lws */ 1627 | } else { 1628 | h_state = h_general; 1629 | } 1630 | break; 1631 | 1632 | /* looking for 'Connection: keep-alive' */ 1633 | case h_matching_connection_keep_alive: 1634 | parser->index++; 1635 | if (parser->index > sizeof(KEEP_ALIVE)-1 1636 | || c != KEEP_ALIVE[parser->index]) { 1637 | h_state = h_matching_connection_token; 1638 | } else if (parser->index == sizeof(KEEP_ALIVE)-2) { 1639 | h_state = h_connection_keep_alive; 1640 | } 1641 | break; 1642 | 1643 | /* looking for 'Connection: close' */ 1644 | case h_matching_connection_close: 1645 | parser->index++; 1646 | if (parser->index > sizeof(CLOSE)-1 || c != CLOSE[parser->index]) { 1647 | h_state = h_matching_connection_token; 1648 | } else if (parser->index == sizeof(CLOSE)-2) { 1649 | h_state = h_connection_close; 1650 | } 1651 | break; 1652 | 1653 | /* looking for 'Connection: upgrade' */ 1654 | case h_matching_connection_upgrade: 1655 | parser->index++; 1656 | if (parser->index > sizeof(UPGRADE) - 1 || 1657 | c != UPGRADE[parser->index]) { 1658 | h_state = h_matching_connection_token; 1659 | } else if (parser->index == sizeof(UPGRADE)-2) { 1660 | h_state = h_connection_upgrade; 1661 | } 1662 | break; 1663 | 1664 | case h_matching_connection_token: 1665 | if (ch == ',') { 1666 | h_state = h_matching_connection_token_start; 1667 | parser->index = 0; 1668 | } 1669 | break; 1670 | 1671 | case h_transfer_encoding_chunked: 1672 | if (ch != ' ') h_state = h_matching_transfer_encoding_token; 1673 | break; 1674 | 1675 | case h_connection_keep_alive: 1676 | case h_connection_close: 1677 | case h_connection_upgrade: 1678 | if (ch == ',') { 1679 | if (h_state == h_connection_keep_alive) { 1680 | parser->flags |= F_CONNECTION_KEEP_ALIVE; 1681 | } else if (h_state == h_connection_close) { 1682 | parser->flags |= F_CONNECTION_CLOSE; 1683 | } else if (h_state == h_connection_upgrade) { 1684 | parser->flags |= F_CONNECTION_UPGRADE; 1685 | } 1686 | h_state = h_matching_connection_token_start; 1687 | parser->index = 0; 1688 | } else if (ch != ' ') { 1689 | h_state = h_matching_connection_token; 1690 | } 1691 | break; 1692 | 1693 | default: 1694 | UPDATE_STATE(s_header_value); 1695 | h_state = h_general; 1696 | break; 1697 | } 1698 | } 1699 | parser->header_state = h_state; 1700 | 1701 | if (p == data + len) 1702 | --p; 1703 | 1704 | COUNT_HEADER_SIZE(p - start); 1705 | break; 1706 | } 1707 | 1708 | case s_header_almost_done: 1709 | { 1710 | if (UNLIKELY(ch != LF)) { 1711 | SET_ERRNO(HPE_LF_EXPECTED); 1712 | goto error; 1713 | } 1714 | 1715 | UPDATE_STATE(s_header_value_lws); 1716 | break; 1717 | } 1718 | 1719 | case s_header_value_lws: 1720 | { 1721 | if (ch == ' ' || ch == '\t') { 1722 | if (parser->header_state == h_content_length_num) { 1723 | /* treat obsolete line folding as space */ 1724 | parser->header_state = h_content_length_ws; 1725 | } 1726 | UPDATE_STATE(s_header_value_start); 1727 | REEXECUTE(); 1728 | } 1729 | 1730 | /* finished the header */ 1731 | switch (parser->header_state) { 1732 | case h_connection_keep_alive: 1733 | parser->flags |= F_CONNECTION_KEEP_ALIVE; 1734 | break; 1735 | case h_connection_close: 1736 | parser->flags |= F_CONNECTION_CLOSE; 1737 | break; 1738 | case h_transfer_encoding_chunked: 1739 | parser->flags |= F_CHUNKED; 1740 | break; 1741 | case h_connection_upgrade: 1742 | parser->flags |= F_CONNECTION_UPGRADE; 1743 | break; 1744 | default: 1745 | break; 1746 | } 1747 | 1748 | UPDATE_STATE(s_header_field_start); 1749 | REEXECUTE(); 1750 | } 1751 | 1752 | case s_header_value_discard_ws_almost_done: 1753 | { 1754 | STRICT_CHECK(ch != LF); 1755 | UPDATE_STATE(s_header_value_discard_lws); 1756 | break; 1757 | } 1758 | 1759 | case s_header_value_discard_lws: 1760 | { 1761 | if (ch == ' ' || ch == '\t') { 1762 | UPDATE_STATE(s_header_value_discard_ws); 1763 | break; 1764 | } else { 1765 | switch (parser->header_state) { 1766 | case h_connection_keep_alive: 1767 | parser->flags |= F_CONNECTION_KEEP_ALIVE; 1768 | break; 1769 | case h_connection_close: 1770 | parser->flags |= F_CONNECTION_CLOSE; 1771 | break; 1772 | case h_connection_upgrade: 1773 | parser->flags |= F_CONNECTION_UPGRADE; 1774 | break; 1775 | case h_transfer_encoding_chunked: 1776 | parser->flags |= F_CHUNKED; 1777 | break; 1778 | case h_content_length: 1779 | /* do not allow empty content length */ 1780 | SET_ERRNO(HPE_INVALID_CONTENT_LENGTH); 1781 | goto error; 1782 | break; 1783 | default: 1784 | break; 1785 | } 1786 | 1787 | /* header value was empty */ 1788 | MARK(header_value); 1789 | UPDATE_STATE(s_header_field_start); 1790 | CALLBACK_DATA_NOADVANCE(header_value); 1791 | REEXECUTE(); 1792 | } 1793 | } 1794 | 1795 | case s_headers_almost_done: 1796 | { 1797 | STRICT_CHECK(ch != LF); 1798 | 1799 | if (parser->flags & F_TRAILING) { 1800 | /* End of a chunked request */ 1801 | UPDATE_STATE(s_message_done); 1802 | CALLBACK_NOTIFY_NOADVANCE(chunk_complete); 1803 | REEXECUTE(); 1804 | } 1805 | 1806 | /* Cannot use transfer-encoding and a content-length header together 1807 | per the HTTP specification. (RFC 7230 Section 3.3.3) */ 1808 | if ((parser->uses_transfer_encoding == 1) && 1809 | (parser->flags & F_CONTENTLENGTH)) { 1810 | /* Allow it for lenient parsing as long as `Transfer-Encoding` is 1811 | * not `chunked` or allow_length_with_encoding is set 1812 | */ 1813 | if (parser->flags & F_CHUNKED) { 1814 | if (!allow_chunked_length) { 1815 | SET_ERRNO(HPE_UNEXPECTED_CONTENT_LENGTH); 1816 | goto error; 1817 | } 1818 | } else if (!lenient) { 1819 | SET_ERRNO(HPE_UNEXPECTED_CONTENT_LENGTH); 1820 | goto error; 1821 | } 1822 | } 1823 | 1824 | UPDATE_STATE(s_headers_done); 1825 | 1826 | /* Set this here so that on_headers_complete() callbacks can see it */ 1827 | if ((parser->flags & F_UPGRADE) && 1828 | (parser->flags & F_CONNECTION_UPGRADE)) { 1829 | /* For responses, "Upgrade: foo" and "Connection: upgrade" are 1830 | * mandatory only when it is a 101 Switching Protocols response, 1831 | * otherwise it is purely informational, to announce support. 1832 | */ 1833 | parser->upgrade = 1834 | (parser->type == HTTP_REQUEST || parser->status_code == 101); 1835 | } else { 1836 | parser->upgrade = (parser->method == HTTP_CONNECT); 1837 | } 1838 | 1839 | /* Here we call the headers_complete callback. This is somewhat 1840 | * different than other callbacks because if the user returns 1, we 1841 | * will interpret that as saying that this message has no body. This 1842 | * is needed for the annoying case of recieving a response to a HEAD 1843 | * request. 1844 | * 1845 | * We'd like to use CALLBACK_NOTIFY_NOADVANCE() here but we cannot, so 1846 | * we have to simulate it by handling a change in errno below. 1847 | */ 1848 | if (settings->on_headers_complete) { 1849 | switch (settings->on_headers_complete(parser)) { 1850 | case 0: 1851 | break; 1852 | 1853 | case 2: 1854 | parser->upgrade = 1; 1855 | 1856 | /* fall through */ 1857 | case 1: 1858 | parser->flags |= F_SKIPBODY; 1859 | break; 1860 | 1861 | default: 1862 | SET_ERRNO(HPE_CB_headers_complete); 1863 | RETURN(p - data); /* Error */ 1864 | } 1865 | } 1866 | 1867 | if (HTTP_PARSER_ERRNO(parser) != HPE_OK) { 1868 | RETURN(p - data); 1869 | } 1870 | 1871 | REEXECUTE(); 1872 | } 1873 | 1874 | case s_headers_done: 1875 | { 1876 | int hasBody; 1877 | STRICT_CHECK(ch != LF); 1878 | 1879 | parser->nread = 0; 1880 | nread = 0; 1881 | 1882 | hasBody = parser->flags & F_CHUNKED || 1883 | (parser->content_length > 0 && parser->content_length != ULLONG_MAX); 1884 | if (parser->upgrade && (parser->method == HTTP_CONNECT || 1885 | (parser->flags & F_SKIPBODY) || !hasBody)) { 1886 | /* Exit, the rest of the message is in a different protocol. */ 1887 | UPDATE_STATE(NEW_MESSAGE()); 1888 | CALLBACK_NOTIFY(message_complete); 1889 | RETURN((p - data) + 1); 1890 | } 1891 | 1892 | if (parser->flags & F_SKIPBODY) { 1893 | UPDATE_STATE(NEW_MESSAGE()); 1894 | CALLBACK_NOTIFY(message_complete); 1895 | } else if (parser->flags & F_CHUNKED) { 1896 | /* chunked encoding - ignore Content-Length header, 1897 | * prepare for a chunk */ 1898 | UPDATE_STATE(s_chunk_size_start); 1899 | } else if (parser->uses_transfer_encoding == 1) { 1900 | if (parser->type == HTTP_REQUEST && !lenient) { 1901 | /* RFC 7230 3.3.3 */ 1902 | 1903 | /* If a Transfer-Encoding header field 1904 | * is present in a request and the chunked transfer coding is not 1905 | * the final encoding, the message body length cannot be determined 1906 | * reliably; the server MUST respond with the 400 (Bad Request) 1907 | * status code and then close the connection. 1908 | */ 1909 | SET_ERRNO(HPE_INVALID_TRANSFER_ENCODING); 1910 | RETURN(p - data); /* Error */ 1911 | } else { 1912 | /* RFC 7230 3.3.3 */ 1913 | 1914 | /* If a Transfer-Encoding header field is present in a response and 1915 | * the chunked transfer coding is not the final encoding, the 1916 | * message body length is determined by reading the connection until 1917 | * it is closed by the server. 1918 | */ 1919 | UPDATE_STATE(s_body_identity_eof); 1920 | } 1921 | } else { 1922 | if (parser->content_length == 0) { 1923 | /* Content-Length header given but zero: Content-Length: 0\r\n */ 1924 | UPDATE_STATE(NEW_MESSAGE()); 1925 | CALLBACK_NOTIFY(message_complete); 1926 | } else if (parser->content_length != ULLONG_MAX) { 1927 | /* Content-Length header given and non-zero */ 1928 | UPDATE_STATE(s_body_identity); 1929 | } else { 1930 | if (!http_message_needs_eof(parser)) { 1931 | /* Assume content-length 0 - read the next */ 1932 | UPDATE_STATE(NEW_MESSAGE()); 1933 | CALLBACK_NOTIFY(message_complete); 1934 | } else { 1935 | /* Read body until EOF */ 1936 | UPDATE_STATE(s_body_identity_eof); 1937 | } 1938 | } 1939 | } 1940 | 1941 | break; 1942 | } 1943 | 1944 | case s_body_identity: 1945 | { 1946 | uint64_t to_read = MIN(parser->content_length, 1947 | (uint64_t) ((data + len) - p)); 1948 | 1949 | assert(parser->content_length != 0 1950 | && parser->content_length != ULLONG_MAX); 1951 | 1952 | /* The difference between advancing content_length and p is because 1953 | * the latter will automaticaly advance on the next loop iteration. 1954 | * Further, if content_length ends up at 0, we want to see the last 1955 | * byte again for our message complete callback. 1956 | */ 1957 | MARK(body); 1958 | parser->content_length -= to_read; 1959 | p += to_read - 1; 1960 | 1961 | if (parser->content_length == 0) { 1962 | UPDATE_STATE(s_message_done); 1963 | 1964 | /* Mimic CALLBACK_DATA_NOADVANCE() but with one extra byte. 1965 | * 1966 | * The alternative to doing this is to wait for the next byte to 1967 | * trigger the data callback, just as in every other case. The 1968 | * problem with this is that this makes it difficult for the test 1969 | * harness to distinguish between complete-on-EOF and 1970 | * complete-on-length. It's not clear that this distinction is 1971 | * important for applications, but let's keep it for now. 1972 | */ 1973 | CALLBACK_DATA_(body, p - body_mark + 1, p - data); 1974 | REEXECUTE(); 1975 | } 1976 | 1977 | break; 1978 | } 1979 | 1980 | /* read until EOF */ 1981 | case s_body_identity_eof: 1982 | MARK(body); 1983 | p = data + len - 1; 1984 | 1985 | break; 1986 | 1987 | case s_message_done: 1988 | UPDATE_STATE(NEW_MESSAGE()); 1989 | CALLBACK_NOTIFY(message_complete); 1990 | if (parser->upgrade) { 1991 | /* Exit, the rest of the message is in a different protocol. */ 1992 | RETURN((p - data) + 1); 1993 | } 1994 | break; 1995 | 1996 | case s_chunk_size_start: 1997 | { 1998 | assert(nread == 1); 1999 | assert(parser->flags & F_CHUNKED); 2000 | 2001 | unhex_val = unhex[(unsigned char)ch]; 2002 | if (UNLIKELY(unhex_val == -1)) { 2003 | SET_ERRNO(HPE_INVALID_CHUNK_SIZE); 2004 | goto error; 2005 | } 2006 | 2007 | parser->content_length = unhex_val; 2008 | UPDATE_STATE(s_chunk_size); 2009 | break; 2010 | } 2011 | 2012 | case s_chunk_size: 2013 | { 2014 | uint64_t t; 2015 | 2016 | assert(parser->flags & F_CHUNKED); 2017 | 2018 | if (ch == CR) { 2019 | UPDATE_STATE(s_chunk_size_almost_done); 2020 | break; 2021 | } 2022 | 2023 | unhex_val = unhex[(unsigned char)ch]; 2024 | 2025 | if (unhex_val == -1) { 2026 | if (ch == ';' || ch == ' ') { 2027 | UPDATE_STATE(s_chunk_parameters); 2028 | break; 2029 | } 2030 | 2031 | SET_ERRNO(HPE_INVALID_CHUNK_SIZE); 2032 | goto error; 2033 | } 2034 | 2035 | t = parser->content_length; 2036 | t *= 16; 2037 | t += unhex_val; 2038 | 2039 | /* Overflow? Test against a conservative limit for simplicity. */ 2040 | if (UNLIKELY((ULLONG_MAX - 16) / 16 < parser->content_length)) { 2041 | SET_ERRNO(HPE_INVALID_CONTENT_LENGTH); 2042 | goto error; 2043 | } 2044 | 2045 | parser->content_length = t; 2046 | break; 2047 | } 2048 | 2049 | case s_chunk_parameters: 2050 | { 2051 | assert(parser->flags & F_CHUNKED); 2052 | /* just ignore this shit. TODO check for overflow */ 2053 | if (ch == CR) { 2054 | UPDATE_STATE(s_chunk_size_almost_done); 2055 | break; 2056 | } 2057 | break; 2058 | } 2059 | 2060 | case s_chunk_size_almost_done: 2061 | { 2062 | assert(parser->flags & F_CHUNKED); 2063 | STRICT_CHECK(ch != LF); 2064 | 2065 | parser->nread = 0; 2066 | nread = 0; 2067 | 2068 | if (parser->content_length == 0) { 2069 | parser->flags |= F_TRAILING; 2070 | UPDATE_STATE(s_header_field_start); 2071 | } else { 2072 | UPDATE_STATE(s_chunk_data); 2073 | } 2074 | CALLBACK_NOTIFY(chunk_header); 2075 | break; 2076 | } 2077 | 2078 | case s_chunk_data: 2079 | { 2080 | uint64_t to_read = MIN(parser->content_length, 2081 | (uint64_t) ((data + len) - p)); 2082 | 2083 | assert(parser->flags & F_CHUNKED); 2084 | assert(parser->content_length != 0 2085 | && parser->content_length != ULLONG_MAX); 2086 | 2087 | /* See the explanation in s_body_identity for why the content 2088 | * length and data pointers are managed this way. 2089 | */ 2090 | MARK(body); 2091 | parser->content_length -= to_read; 2092 | p += to_read - 1; 2093 | 2094 | if (parser->content_length == 0) { 2095 | UPDATE_STATE(s_chunk_data_almost_done); 2096 | } 2097 | 2098 | break; 2099 | } 2100 | 2101 | case s_chunk_data_almost_done: 2102 | assert(parser->flags & F_CHUNKED); 2103 | assert(parser->content_length == 0); 2104 | STRICT_CHECK(ch != CR); 2105 | UPDATE_STATE(s_chunk_data_done); 2106 | CALLBACK_DATA(body); 2107 | break; 2108 | 2109 | case s_chunk_data_done: 2110 | assert(parser->flags & F_CHUNKED); 2111 | STRICT_CHECK(ch != LF); 2112 | parser->nread = 0; 2113 | nread = 0; 2114 | UPDATE_STATE(s_chunk_size_start); 2115 | CALLBACK_NOTIFY(chunk_complete); 2116 | break; 2117 | 2118 | default: 2119 | assert(0 && "unhandled state"); 2120 | SET_ERRNO(HPE_INVALID_INTERNAL_STATE); 2121 | goto error; 2122 | } 2123 | } 2124 | 2125 | /* Run callbacks for any marks that we have leftover after we ran out of 2126 | * bytes. There should be at most one of these set, so it's OK to invoke 2127 | * them in series (unset marks will not result in callbacks). 2128 | * 2129 | * We use the NOADVANCE() variety of callbacks here because 'p' has already 2130 | * overflowed 'data' and this allows us to correct for the off-by-one that 2131 | * we'd otherwise have (since CALLBACK_DATA() is meant to be run with a 'p' 2132 | * value that's in-bounds). 2133 | */ 2134 | 2135 | assert(((header_field_mark ? 1 : 0) + 2136 | (header_value_mark ? 1 : 0) + 2137 | (url_mark ? 1 : 0) + 2138 | (body_mark ? 1 : 0) + 2139 | (status_mark ? 1 : 0)) <= 1); 2140 | 2141 | CALLBACK_DATA_NOADVANCE(header_field); 2142 | CALLBACK_DATA_NOADVANCE(header_value); 2143 | CALLBACK_DATA_NOADVANCE(url); 2144 | CALLBACK_DATA_NOADVANCE(body); 2145 | CALLBACK_DATA_NOADVANCE(status); 2146 | 2147 | RETURN(len); 2148 | 2149 | error: 2150 | if (HTTP_PARSER_ERRNO(parser) == HPE_OK) { 2151 | SET_ERRNO(HPE_UNKNOWN); 2152 | } 2153 | 2154 | RETURN(p - data); 2155 | } 2156 | 2157 | 2158 | /* Does the parser need to see an EOF to find the end of the message? */ 2159 | int 2160 | http_message_needs_eof (const http_parser *parser) 2161 | { 2162 | if (parser->type == HTTP_REQUEST) { 2163 | return 0; 2164 | } 2165 | 2166 | /* See RFC 2616 section 4.4 */ 2167 | if (parser->status_code / 100 == 1 || /* 1xx e.g. Continue */ 2168 | parser->status_code == 204 || /* No Content */ 2169 | parser->status_code == 304 || /* Not Modified */ 2170 | parser->flags & F_SKIPBODY) { /* response to a HEAD request */ 2171 | return 0; 2172 | } 2173 | 2174 | /* RFC 7230 3.3.3, see `s_headers_almost_done` */ 2175 | if ((parser->uses_transfer_encoding == 1) && 2176 | (parser->flags & F_CHUNKED) == 0) { 2177 | return 1; 2178 | } 2179 | 2180 | if ((parser->flags & F_CHUNKED) || parser->content_length != ULLONG_MAX) { 2181 | return 0; 2182 | } 2183 | 2184 | return 1; 2185 | } 2186 | 2187 | 2188 | int 2189 | http_should_keep_alive (const http_parser *parser) 2190 | { 2191 | if (parser->http_major > 0 && parser->http_minor > 0) { 2192 | /* HTTP/1.1 */ 2193 | if (parser->flags & F_CONNECTION_CLOSE) { 2194 | return 0; 2195 | } 2196 | } else { 2197 | /* HTTP/1.0 or earlier */ 2198 | if (!(parser->flags & F_CONNECTION_KEEP_ALIVE)) { 2199 | return 0; 2200 | } 2201 | } 2202 | 2203 | return !http_message_needs_eof(parser); 2204 | } 2205 | 2206 | 2207 | const char * 2208 | http_method_str (enum http_method m) 2209 | { 2210 | return ELEM_AT(method_strings, m, ""); 2211 | } 2212 | 2213 | const char * 2214 | http_status_str (enum http_status s) 2215 | { 2216 | switch (s) { 2217 | #define XX(num, name, string) case HTTP_STATUS_##name: return #string; 2218 | HTTP_STATUS_MAP(XX) 2219 | #undef XX 2220 | default: return ""; 2221 | } 2222 | } 2223 | 2224 | void 2225 | http_parser_init (http_parser *parser, enum http_parser_type t) 2226 | { 2227 | void *data = parser->data; /* preserve application data */ 2228 | memset(parser, 0, sizeof(*parser)); 2229 | parser->data = data; 2230 | parser->type = t; 2231 | parser->state = (t == HTTP_REQUEST ? s_start_req : (t == HTTP_RESPONSE ? s_start_res : s_start_req_or_res)); 2232 | parser->http_errno = HPE_OK; 2233 | } 2234 | 2235 | void 2236 | http_parser_settings_init(http_parser_settings *settings) 2237 | { 2238 | memset(settings, 0, sizeof(*settings)); 2239 | } 2240 | 2241 | const char * 2242 | http_errno_name(enum http_errno err) { 2243 | assert(((size_t) err) < ARRAY_SIZE(http_strerror_tab)); 2244 | return http_strerror_tab[err].name; 2245 | } 2246 | 2247 | const char * 2248 | http_errno_description(enum http_errno err) { 2249 | assert(((size_t) err) < ARRAY_SIZE(http_strerror_tab)); 2250 | return http_strerror_tab[err].description; 2251 | } 2252 | 2253 | static enum http_host_state 2254 | http_parse_host_char(enum http_host_state s, const char ch) { 2255 | switch(s) { 2256 | case s_http_userinfo: 2257 | case s_http_userinfo_start: 2258 | if (ch == '@') { 2259 | return s_http_host_start; 2260 | } 2261 | 2262 | if (IS_USERINFO_CHAR(ch)) { 2263 | return s_http_userinfo; 2264 | } 2265 | break; 2266 | 2267 | case s_http_host_start: 2268 | if (ch == '[') { 2269 | return s_http_host_v6_start; 2270 | } 2271 | 2272 | if (IS_HOST_CHAR(ch)) { 2273 | return s_http_host; 2274 | } 2275 | 2276 | break; 2277 | 2278 | case s_http_host: 2279 | if (IS_HOST_CHAR(ch)) { 2280 | return s_http_host; 2281 | } 2282 | 2283 | /* fall through */ 2284 | case s_http_host_v6_end: 2285 | if (ch == ':') { 2286 | return s_http_host_port_start; 2287 | } 2288 | 2289 | break; 2290 | 2291 | case s_http_host_v6: 2292 | if (ch == ']') { 2293 | return s_http_host_v6_end; 2294 | } 2295 | 2296 | /* fall through */ 2297 | case s_http_host_v6_start: 2298 | if (IS_HEX(ch) || ch == ':' || ch == '.') { 2299 | return s_http_host_v6; 2300 | } 2301 | 2302 | if (s == s_http_host_v6 && ch == '%') { 2303 | return s_http_host_v6_zone_start; 2304 | } 2305 | break; 2306 | 2307 | case s_http_host_v6_zone: 2308 | if (ch == ']') { 2309 | return s_http_host_v6_end; 2310 | } 2311 | 2312 | /* fall through */ 2313 | case s_http_host_v6_zone_start: 2314 | /* RFC 6874 Zone ID consists of 1*( unreserved / pct-encoded) */ 2315 | if (IS_ALPHANUM(ch) || ch == '%' || ch == '.' || ch == '-' || ch == '_' || 2316 | ch == '~') { 2317 | return s_http_host_v6_zone; 2318 | } 2319 | break; 2320 | 2321 | case s_http_host_port: 2322 | case s_http_host_port_start: 2323 | if (IS_NUM(ch)) { 2324 | return s_http_host_port; 2325 | } 2326 | 2327 | break; 2328 | 2329 | default: 2330 | break; 2331 | } 2332 | return s_http_host_dead; 2333 | } 2334 | 2335 | static int 2336 | http_parse_host(const char * buf, struct http_parser_url *u, int found_at) { 2337 | enum http_host_state s; 2338 | 2339 | const char *p; 2340 | size_t buflen = u->field_data[UF_HOST].off + u->field_data[UF_HOST].len; 2341 | 2342 | assert(u->field_set & (1 << UF_HOST)); 2343 | 2344 | u->field_data[UF_HOST].len = 0; 2345 | 2346 | s = found_at ? s_http_userinfo_start : s_http_host_start; 2347 | 2348 | for (p = buf + u->field_data[UF_HOST].off; p < buf + buflen; p++) { 2349 | enum http_host_state new_s = http_parse_host_char(s, *p); 2350 | 2351 | if (new_s == s_http_host_dead) { 2352 | return 1; 2353 | } 2354 | 2355 | switch(new_s) { 2356 | case s_http_host: 2357 | if (s != s_http_host) { 2358 | u->field_data[UF_HOST].off = (uint16_t)(p - buf); 2359 | } 2360 | u->field_data[UF_HOST].len++; 2361 | break; 2362 | 2363 | case s_http_host_v6: 2364 | if (s != s_http_host_v6) { 2365 | u->field_data[UF_HOST].off = (uint16_t)(p - buf); 2366 | } 2367 | u->field_data[UF_HOST].len++; 2368 | break; 2369 | 2370 | case s_http_host_v6_zone_start: 2371 | case s_http_host_v6_zone: 2372 | u->field_data[UF_HOST].len++; 2373 | break; 2374 | 2375 | case s_http_host_port: 2376 | if (s != s_http_host_port) { 2377 | u->field_data[UF_PORT].off = (uint16_t)(p - buf); 2378 | u->field_data[UF_PORT].len = 0; 2379 | u->field_set |= (1 << UF_PORT); 2380 | } 2381 | u->field_data[UF_PORT].len++; 2382 | break; 2383 | 2384 | case s_http_userinfo: 2385 | if (s != s_http_userinfo) { 2386 | u->field_data[UF_USERINFO].off = (uint16_t)(p - buf); 2387 | u->field_data[UF_USERINFO].len = 0; 2388 | u->field_set |= (1 << UF_USERINFO); 2389 | } 2390 | u->field_data[UF_USERINFO].len++; 2391 | break; 2392 | 2393 | default: 2394 | break; 2395 | } 2396 | s = new_s; 2397 | } 2398 | 2399 | /* Make sure we don't end somewhere unexpected */ 2400 | switch (s) { 2401 | case s_http_host_start: 2402 | case s_http_host_v6_start: 2403 | case s_http_host_v6: 2404 | case s_http_host_v6_zone_start: 2405 | case s_http_host_v6_zone: 2406 | case s_http_host_port_start: 2407 | case s_http_userinfo: 2408 | case s_http_userinfo_start: 2409 | return 1; 2410 | default: 2411 | break; 2412 | } 2413 | 2414 | return 0; 2415 | } 2416 | 2417 | void 2418 | http_parser_url_init(struct http_parser_url *u) { 2419 | memset(u, 0, sizeof(*u)); 2420 | } 2421 | 2422 | int 2423 | http_parser_parse_url(const char *buf, size_t buflen, int is_connect, 2424 | struct http_parser_url *u) 2425 | { 2426 | enum state s; 2427 | const char *p; 2428 | enum http_parser_url_fields uf, old_uf; 2429 | int found_at = 0; 2430 | 2431 | if (buflen == 0) { 2432 | return 1; 2433 | } 2434 | 2435 | u->port = u->field_set = 0; 2436 | s = is_connect ? s_req_server_start : s_req_spaces_before_url; 2437 | old_uf = UF_MAX; 2438 | 2439 | for (p = buf; p < buf + buflen; p++) { 2440 | s = parse_url_char(s, *p); 2441 | 2442 | /* Figure out the next field that we're operating on */ 2443 | switch (s) { 2444 | case s_dead: 2445 | return 1; 2446 | 2447 | /* Skip delimeters */ 2448 | case s_req_schema_slash: 2449 | case s_req_schema_slash_slash: 2450 | case s_req_server_start: 2451 | case s_req_query_string_start: 2452 | case s_req_fragment_start: 2453 | continue; 2454 | 2455 | case s_req_schema: 2456 | uf = UF_SCHEMA; 2457 | break; 2458 | 2459 | case s_req_server_with_at: 2460 | found_at = 1; 2461 | 2462 | /* fall through */ 2463 | case s_req_server: 2464 | uf = UF_HOST; 2465 | break; 2466 | 2467 | case s_req_path: 2468 | uf = UF_PATH; 2469 | break; 2470 | 2471 | case s_req_query_string: 2472 | uf = UF_QUERY; 2473 | break; 2474 | 2475 | case s_req_fragment: 2476 | uf = UF_FRAGMENT; 2477 | break; 2478 | 2479 | default: 2480 | assert(!"Unexpected state"); 2481 | return 1; 2482 | } 2483 | 2484 | /* Nothing's changed; soldier on */ 2485 | if (uf == old_uf) { 2486 | u->field_data[uf].len++; 2487 | continue; 2488 | } 2489 | 2490 | u->field_data[uf].off = (uint16_t)(p - buf); 2491 | u->field_data[uf].len = 1; 2492 | 2493 | u->field_set |= (1 << uf); 2494 | old_uf = uf; 2495 | } 2496 | 2497 | /* host must be present if there is a schema */ 2498 | /* parsing http:///toto will fail */ 2499 | if ((u->field_set & (1 << UF_SCHEMA)) && 2500 | (u->field_set & (1 << UF_HOST)) == 0) { 2501 | return 1; 2502 | } 2503 | 2504 | if (u->field_set & (1 << UF_HOST)) { 2505 | if (http_parse_host(buf, u, found_at) != 0) { 2506 | return 1; 2507 | } 2508 | } 2509 | 2510 | /* CONNECT requests can only contain "hostname:port" */ 2511 | if (is_connect && u->field_set != ((1 << UF_HOST)|(1 << UF_PORT))) { 2512 | return 1; 2513 | } 2514 | 2515 | if (u->field_set & (1 << UF_PORT)) { 2516 | uint16_t off; 2517 | uint16_t len; 2518 | const char* p; 2519 | const char* end; 2520 | unsigned long v; 2521 | 2522 | off = u->field_data[UF_PORT].off; 2523 | len = u->field_data[UF_PORT].len; 2524 | end = buf + off + len; 2525 | 2526 | /* NOTE: The characters are already validated and are in the [0-9] range */ 2527 | assert((size_t) (off + len) <= buflen && "Port number overflow"); 2528 | v = 0; 2529 | for (p = buf + off; p < end; p++) { 2530 | v *= 10; 2531 | v += *p - '0'; 2532 | 2533 | /* Ports have a max value of 2^16 */ 2534 | if (v > 0xffff) { 2535 | return 1; 2536 | } 2537 | } 2538 | 2539 | u->port = (uint16_t) v; 2540 | } 2541 | 2542 | return 0; 2543 | } 2544 | 2545 | void 2546 | http_parser_pause(http_parser *parser, int paused) { 2547 | /* Users should only be pausing/unpausing a parser that is not in an error 2548 | * state. In non-debug builds, there's not much that we can do about this 2549 | * other than ignore it. 2550 | */ 2551 | if (HTTP_PARSER_ERRNO(parser) == HPE_OK || 2552 | HTTP_PARSER_ERRNO(parser) == HPE_PAUSED) { 2553 | uint32_t nread = parser->nread; /* used by the SET_ERRNO macro */ 2554 | SET_ERRNO((paused) ? HPE_PAUSED : HPE_OK); 2555 | } else { 2556 | assert(0 && "Attempting to pause parser in error state"); 2557 | } 2558 | } 2559 | 2560 | int 2561 | http_body_is_final(const struct http_parser *parser) { 2562 | return parser->state == s_message_done; 2563 | } 2564 | 2565 | unsigned long 2566 | http_parser_version(void) { 2567 | return HTTP_PARSER_VERSION_MAJOR * 0x10000 | 2568 | HTTP_PARSER_VERSION_MINOR * 0x00100 | 2569 | HTTP_PARSER_VERSION_PATCH * 0x00001; 2570 | } 2571 | 2572 | void 2573 | http_parser_set_max_header_size(uint32_t size) { 2574 | max_header_size = size; 2575 | } 2576 | -------------------------------------------------------------------------------- /http_parser.gyp: -------------------------------------------------------------------------------- 1 | # This file is used with the GYP meta build system. 2 | # http://code.google.com/p/gyp/ 3 | # To build try this: 4 | # svn co http://gyp.googlecode.com/svn/trunk gyp 5 | # ./gyp/gyp -f make --depth=`pwd` http_parser.gyp 6 | # ./out/Debug/test 7 | { 8 | 'target_defaults': { 9 | 'default_configuration': 'Debug', 10 | 'configurations': { 11 | # TODO: hoist these out and put them somewhere common, because 12 | # RuntimeLibrary MUST MATCH across the entire project 13 | 'Debug': { 14 | 'defines': [ 'DEBUG', '_DEBUG' ], 15 | 'cflags': [ '-Wall', '-Wextra', '-O0', '-g', '-ftrapv' ], 16 | 'msvs_settings': { 17 | 'VCCLCompilerTool': { 18 | 'RuntimeLibrary': 1, # static debug 19 | }, 20 | }, 21 | }, 22 | 'Release': { 23 | 'defines': [ 'NDEBUG' ], 24 | 'cflags': [ '-Wall', '-Wextra', '-O3' ], 25 | 'msvs_settings': { 26 | 'VCCLCompilerTool': { 27 | 'RuntimeLibrary': 0, # static release 28 | }, 29 | }, 30 | } 31 | }, 32 | 'msvs_settings': { 33 | 'VCCLCompilerTool': { 34 | }, 35 | 'VCLibrarianTool': { 36 | }, 37 | 'VCLinkerTool': { 38 | 'GenerateDebugInformation': 'true', 39 | }, 40 | }, 41 | 'conditions': [ 42 | ['OS == "win"', { 43 | 'defines': [ 44 | 'WIN32' 45 | ], 46 | }] 47 | ], 48 | }, 49 | 50 | 'targets': [ 51 | { 52 | 'target_name': 'http_parser', 53 | 'type': 'static_library', 54 | 'include_dirs': [ '.' ], 55 | 'direct_dependent_settings': { 56 | 'defines': [ 'HTTP_PARSER_STRICT=0' ], 57 | 'include_dirs': [ '.' ], 58 | }, 59 | 'defines': [ 'HTTP_PARSER_STRICT=0' ], 60 | 'sources': [ './http_parser.c', ], 61 | 'conditions': [ 62 | ['OS=="win"', { 63 | 'msvs_settings': { 64 | 'VCCLCompilerTool': { 65 | # Compile as C++. http_parser.c is actually C99, but C++ is 66 | # close enough in this case. 67 | 'CompileAs': 2, 68 | }, 69 | }, 70 | }] 71 | ], 72 | }, 73 | 74 | { 75 | 'target_name': 'http_parser_strict', 76 | 'type': 'static_library', 77 | 'include_dirs': [ '.' ], 78 | 'direct_dependent_settings': { 79 | 'defines': [ 'HTTP_PARSER_STRICT=1' ], 80 | 'include_dirs': [ '.' ], 81 | }, 82 | 'defines': [ 'HTTP_PARSER_STRICT=1' ], 83 | 'sources': [ './http_parser.c', ], 84 | 'conditions': [ 85 | ['OS=="win"', { 86 | 'msvs_settings': { 87 | 'VCCLCompilerTool': { 88 | # Compile as C++. http_parser.c is actually C99, but C++ is 89 | # close enough in this case. 90 | 'CompileAs': 2, 91 | }, 92 | }, 93 | }] 94 | ], 95 | }, 96 | 97 | { 98 | 'target_name': 'test-nonstrict', 99 | 'type': 'executable', 100 | 'dependencies': [ 'http_parser' ], 101 | 'sources': [ 'test.c' ] 102 | }, 103 | 104 | { 105 | 'target_name': 'test-strict', 106 | 'type': 'executable', 107 | 'dependencies': [ 'http_parser_strict' ], 108 | 'sources': [ 'test.c' ] 109 | } 110 | ] 111 | } 112 | -------------------------------------------------------------------------------- /http_parser.h: -------------------------------------------------------------------------------- 1 | /* Copyright Joyent, Inc. and other Node contributors. All rights reserved. 2 | * 3 | * Permission is hereby granted, free of charge, to any person obtaining a copy 4 | * of this software and associated documentation files (the "Software"), to 5 | * deal in the Software without restriction, including without limitation the 6 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 7 | * sell copies of the Software, and to permit persons to whom the Software is 8 | * furnished to do so, subject to the following conditions: 9 | * 10 | * The above copyright notice and this permission notice shall be included in 11 | * all copies or substantial portions of the Software. 12 | * 13 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 18 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 19 | * IN THE SOFTWARE. 20 | */ 21 | #ifndef http_parser_h 22 | #define http_parser_h 23 | #ifdef __cplusplus 24 | extern "C" { 25 | #endif 26 | 27 | /* Also update SONAME in the Makefile whenever you change these. */ 28 | #define HTTP_PARSER_VERSION_MAJOR 2 29 | #define HTTP_PARSER_VERSION_MINOR 9 30 | #define HTTP_PARSER_VERSION_PATCH 4 31 | 32 | #include 33 | #if defined(_WIN32) && !defined(__MINGW32__) && \ 34 | (!defined(_MSC_VER) || _MSC_VER<1600) && !defined(__WINE__) 35 | #include 36 | typedef __int8 int8_t; 37 | typedef unsigned __int8 uint8_t; 38 | typedef __int16 int16_t; 39 | typedef unsigned __int16 uint16_t; 40 | typedef __int32 int32_t; 41 | typedef unsigned __int32 uint32_t; 42 | typedef __int64 int64_t; 43 | typedef unsigned __int64 uint64_t; 44 | #elif (defined(__sun) || defined(__sun__)) && defined(__SunOS_5_9) 45 | #include 46 | #else 47 | #include 48 | #endif 49 | 50 | /* Compile with -DHTTP_PARSER_STRICT=0 to make less checks, but run 51 | * faster 52 | */ 53 | #ifndef HTTP_PARSER_STRICT 54 | # define HTTP_PARSER_STRICT 1 55 | #endif 56 | 57 | /* Maximium header size allowed. If the macro is not defined 58 | * before including this header then the default is used. To 59 | * change the maximum header size, define the macro in the build 60 | * environment (e.g. -DHTTP_MAX_HEADER_SIZE=). To remove 61 | * the effective limit on the size of the header, define the macro 62 | * to a very large number (e.g. -DHTTP_MAX_HEADER_SIZE=0x7fffffff) 63 | */ 64 | #ifndef HTTP_MAX_HEADER_SIZE 65 | # define HTTP_MAX_HEADER_SIZE (80*1024) 66 | #endif 67 | 68 | typedef struct http_parser http_parser; 69 | typedef struct http_parser_settings http_parser_settings; 70 | 71 | 72 | /* Callbacks should return non-zero to indicate an error. The parser will 73 | * then halt execution. 74 | * 75 | * The one exception is on_headers_complete. In a HTTP_RESPONSE parser 76 | * returning '1' from on_headers_complete will tell the parser that it 77 | * should not expect a body. This is used when receiving a response to a 78 | * HEAD request which may contain 'Content-Length' or 'Transfer-Encoding: 79 | * chunked' headers that indicate the presence of a body. 80 | * 81 | * Returning `2` from on_headers_complete will tell parser that it should not 82 | * expect neither a body nor any futher responses on this connection. This is 83 | * useful for handling responses to a CONNECT request which may not contain 84 | * `Upgrade` or `Connection: upgrade` headers. 85 | * 86 | * http_data_cb does not return data chunks. It will be called arbitrarily 87 | * many times for each string. E.G. you might get 10 callbacks for "on_url" 88 | * each providing just a few characters more data. 89 | */ 90 | typedef int (*http_data_cb) (http_parser*, const char *at, size_t length); 91 | typedef int (*http_cb) (http_parser*); 92 | 93 | 94 | /* Status Codes */ 95 | #define HTTP_STATUS_MAP(XX) \ 96 | XX(100, CONTINUE, Continue) \ 97 | XX(101, SWITCHING_PROTOCOLS, Switching Protocols) \ 98 | XX(102, PROCESSING, Processing) \ 99 | XX(200, OK, OK) \ 100 | XX(201, CREATED, Created) \ 101 | XX(202, ACCEPTED, Accepted) \ 102 | XX(203, NON_AUTHORITATIVE_INFORMATION, Non-Authoritative Information) \ 103 | XX(204, NO_CONTENT, No Content) \ 104 | XX(205, RESET_CONTENT, Reset Content) \ 105 | XX(206, PARTIAL_CONTENT, Partial Content) \ 106 | XX(207, MULTI_STATUS, Multi-Status) \ 107 | XX(208, ALREADY_REPORTED, Already Reported) \ 108 | XX(226, IM_USED, IM Used) \ 109 | XX(300, MULTIPLE_CHOICES, Multiple Choices) \ 110 | XX(301, MOVED_PERMANENTLY, Moved Permanently) \ 111 | XX(302, FOUND, Found) \ 112 | XX(303, SEE_OTHER, See Other) \ 113 | XX(304, NOT_MODIFIED, Not Modified) \ 114 | XX(305, USE_PROXY, Use Proxy) \ 115 | XX(307, TEMPORARY_REDIRECT, Temporary Redirect) \ 116 | XX(308, PERMANENT_REDIRECT, Permanent Redirect) \ 117 | XX(400, BAD_REQUEST, Bad Request) \ 118 | XX(401, UNAUTHORIZED, Unauthorized) \ 119 | XX(402, PAYMENT_REQUIRED, Payment Required) \ 120 | XX(403, FORBIDDEN, Forbidden) \ 121 | XX(404, NOT_FOUND, Not Found) \ 122 | XX(405, METHOD_NOT_ALLOWED, Method Not Allowed) \ 123 | XX(406, NOT_ACCEPTABLE, Not Acceptable) \ 124 | XX(407, PROXY_AUTHENTICATION_REQUIRED, Proxy Authentication Required) \ 125 | XX(408, REQUEST_TIMEOUT, Request Timeout) \ 126 | XX(409, CONFLICT, Conflict) \ 127 | XX(410, GONE, Gone) \ 128 | XX(411, LENGTH_REQUIRED, Length Required) \ 129 | XX(412, PRECONDITION_FAILED, Precondition Failed) \ 130 | XX(413, PAYLOAD_TOO_LARGE, Payload Too Large) \ 131 | XX(414, URI_TOO_LONG, URI Too Long) \ 132 | XX(415, UNSUPPORTED_MEDIA_TYPE, Unsupported Media Type) \ 133 | XX(416, RANGE_NOT_SATISFIABLE, Range Not Satisfiable) \ 134 | XX(417, EXPECTATION_FAILED, Expectation Failed) \ 135 | XX(421, MISDIRECTED_REQUEST, Misdirected Request) \ 136 | XX(422, UNPROCESSABLE_ENTITY, Unprocessable Entity) \ 137 | XX(423, LOCKED, Locked) \ 138 | XX(424, FAILED_DEPENDENCY, Failed Dependency) \ 139 | XX(426, UPGRADE_REQUIRED, Upgrade Required) \ 140 | XX(428, PRECONDITION_REQUIRED, Precondition Required) \ 141 | XX(429, TOO_MANY_REQUESTS, Too Many Requests) \ 142 | XX(431, REQUEST_HEADER_FIELDS_TOO_LARGE, Request Header Fields Too Large) \ 143 | XX(451, UNAVAILABLE_FOR_LEGAL_REASONS, Unavailable For Legal Reasons) \ 144 | XX(500, INTERNAL_SERVER_ERROR, Internal Server Error) \ 145 | XX(501, NOT_IMPLEMENTED, Not Implemented) \ 146 | XX(502, BAD_GATEWAY, Bad Gateway) \ 147 | XX(503, SERVICE_UNAVAILABLE, Service Unavailable) \ 148 | XX(504, GATEWAY_TIMEOUT, Gateway Timeout) \ 149 | XX(505, HTTP_VERSION_NOT_SUPPORTED, HTTP Version Not Supported) \ 150 | XX(506, VARIANT_ALSO_NEGOTIATES, Variant Also Negotiates) \ 151 | XX(507, INSUFFICIENT_STORAGE, Insufficient Storage) \ 152 | XX(508, LOOP_DETECTED, Loop Detected) \ 153 | XX(510, NOT_EXTENDED, Not Extended) \ 154 | XX(511, NETWORK_AUTHENTICATION_REQUIRED, Network Authentication Required) \ 155 | 156 | enum http_status 157 | { 158 | #define XX(num, name, string) HTTP_STATUS_##name = num, 159 | HTTP_STATUS_MAP(XX) 160 | #undef XX 161 | }; 162 | 163 | 164 | /* Request Methods */ 165 | #define HTTP_METHOD_MAP(XX) \ 166 | XX(0, DELETE, DELETE) \ 167 | XX(1, GET, GET) \ 168 | XX(2, HEAD, HEAD) \ 169 | XX(3, POST, POST) \ 170 | XX(4, PUT, PUT) \ 171 | /* pathological */ \ 172 | XX(5, CONNECT, CONNECT) \ 173 | XX(6, OPTIONS, OPTIONS) \ 174 | XX(7, TRACE, TRACE) \ 175 | /* WebDAV */ \ 176 | XX(8, COPY, COPY) \ 177 | XX(9, LOCK, LOCK) \ 178 | XX(10, MKCOL, MKCOL) \ 179 | XX(11, MOVE, MOVE) \ 180 | XX(12, PROPFIND, PROPFIND) \ 181 | XX(13, PROPPATCH, PROPPATCH) \ 182 | XX(14, SEARCH, SEARCH) \ 183 | XX(15, UNLOCK, UNLOCK) \ 184 | XX(16, BIND, BIND) \ 185 | XX(17, REBIND, REBIND) \ 186 | XX(18, UNBIND, UNBIND) \ 187 | XX(19, ACL, ACL) \ 188 | /* subversion */ \ 189 | XX(20, REPORT, REPORT) \ 190 | XX(21, MKACTIVITY, MKACTIVITY) \ 191 | XX(22, CHECKOUT, CHECKOUT) \ 192 | XX(23, MERGE, MERGE) \ 193 | /* upnp */ \ 194 | XX(24, MSEARCH, M-SEARCH) \ 195 | XX(25, NOTIFY, NOTIFY) \ 196 | XX(26, SUBSCRIBE, SUBSCRIBE) \ 197 | XX(27, UNSUBSCRIBE, UNSUBSCRIBE) \ 198 | /* RFC-5789 */ \ 199 | XX(28, PATCH, PATCH) \ 200 | XX(29, PURGE, PURGE) \ 201 | /* CalDAV */ \ 202 | XX(30, MKCALENDAR, MKCALENDAR) \ 203 | /* RFC-2068, section 19.6.1.2 */ \ 204 | XX(31, LINK, LINK) \ 205 | XX(32, UNLINK, UNLINK) \ 206 | /* icecast */ \ 207 | XX(33, SOURCE, SOURCE) \ 208 | 209 | enum http_method 210 | { 211 | #define XX(num, name, string) HTTP_##name = num, 212 | HTTP_METHOD_MAP(XX) 213 | #undef XX 214 | }; 215 | 216 | 217 | enum http_parser_type { HTTP_REQUEST, HTTP_RESPONSE, HTTP_BOTH }; 218 | 219 | 220 | /* Flag values for http_parser.flags field */ 221 | enum flags 222 | { F_CHUNKED = 1 << 0 223 | , F_CONNECTION_KEEP_ALIVE = 1 << 1 224 | , F_CONNECTION_CLOSE = 1 << 2 225 | , F_CONNECTION_UPGRADE = 1 << 3 226 | , F_TRAILING = 1 << 4 227 | , F_UPGRADE = 1 << 5 228 | , F_SKIPBODY = 1 << 6 229 | , F_CONTENTLENGTH = 1 << 7 230 | }; 231 | 232 | 233 | /* Map for errno-related constants 234 | * 235 | * The provided argument should be a macro that takes 2 arguments. 236 | */ 237 | #define HTTP_ERRNO_MAP(XX) \ 238 | /* No error */ \ 239 | XX(OK, "success") \ 240 | \ 241 | /* Callback-related errors */ \ 242 | XX(CB_message_begin, "the on_message_begin callback failed") \ 243 | XX(CB_url, "the on_url callback failed") \ 244 | XX(CB_header_field, "the on_header_field callback failed") \ 245 | XX(CB_header_value, "the on_header_value callback failed") \ 246 | XX(CB_headers_complete, "the on_headers_complete callback failed") \ 247 | XX(CB_body, "the on_body callback failed") \ 248 | XX(CB_message_complete, "the on_message_complete callback failed") \ 249 | XX(CB_status, "the on_status callback failed") \ 250 | XX(CB_chunk_header, "the on_chunk_header callback failed") \ 251 | XX(CB_chunk_complete, "the on_chunk_complete callback failed") \ 252 | \ 253 | /* Parsing-related errors */ \ 254 | XX(INVALID_EOF_STATE, "stream ended at an unexpected time") \ 255 | XX(HEADER_OVERFLOW, \ 256 | "too many header bytes seen; overflow detected") \ 257 | XX(CLOSED_CONNECTION, \ 258 | "data received after completed connection: close message") \ 259 | XX(INVALID_VERSION, "invalid HTTP version") \ 260 | XX(INVALID_STATUS, "invalid HTTP status code") \ 261 | XX(INVALID_METHOD, "invalid HTTP method") \ 262 | XX(INVALID_URL, "invalid URL") \ 263 | XX(INVALID_HOST, "invalid host") \ 264 | XX(INVALID_PORT, "invalid port") \ 265 | XX(INVALID_PATH, "invalid path") \ 266 | XX(INVALID_QUERY_STRING, "invalid query string") \ 267 | XX(INVALID_FRAGMENT, "invalid fragment") \ 268 | XX(LF_EXPECTED, "LF character expected") \ 269 | XX(INVALID_HEADER_TOKEN, "invalid character in header") \ 270 | XX(INVALID_CONTENT_LENGTH, \ 271 | "invalid character in content-length header") \ 272 | XX(UNEXPECTED_CONTENT_LENGTH, \ 273 | "unexpected content-length header") \ 274 | XX(INVALID_CHUNK_SIZE, \ 275 | "invalid character in chunk size header") \ 276 | XX(INVALID_CONSTANT, "invalid constant string") \ 277 | XX(INVALID_INTERNAL_STATE, "encountered unexpected internal state")\ 278 | XX(STRICT, "strict mode assertion failed") \ 279 | XX(PAUSED, "parser is paused") \ 280 | XX(UNKNOWN, "an unknown error occurred") \ 281 | XX(INVALID_TRANSFER_ENCODING, \ 282 | "request has invalid transfer-encoding") \ 283 | 284 | 285 | /* Define HPE_* values for each errno value above */ 286 | #define HTTP_ERRNO_GEN(n, s) HPE_##n, 287 | enum http_errno { 288 | HTTP_ERRNO_MAP(HTTP_ERRNO_GEN) 289 | }; 290 | #undef HTTP_ERRNO_GEN 291 | 292 | 293 | /* Get an http_errno value from an http_parser */ 294 | #define HTTP_PARSER_ERRNO(p) ((enum http_errno) (p)->http_errno) 295 | 296 | 297 | struct http_parser { 298 | /** PRIVATE **/ 299 | unsigned int type : 2; /* enum http_parser_type */ 300 | unsigned int flags : 8; /* F_* values from 'flags' enum; semi-public */ 301 | unsigned int state : 7; /* enum state from http_parser.c */ 302 | unsigned int header_state : 7; /* enum header_state from http_parser.c */ 303 | unsigned int index : 5; /* index into current matcher */ 304 | unsigned int uses_transfer_encoding : 1; /* Transfer-Encoding header is present */ 305 | unsigned int allow_chunked_length : 1; /* Allow headers with both 306 | * `Content-Length` and 307 | * `Transfer-Encoding: chunked` set */ 308 | unsigned int lenient_http_headers : 1; 309 | 310 | uint32_t nread; /* # bytes read in various scenarios */ 311 | uint64_t content_length; /* # bytes in body. `(uint64_t) -1` (all bits one) 312 | * if no Content-Length header. 313 | */ 314 | 315 | /** READ-ONLY **/ 316 | unsigned short http_major; 317 | unsigned short http_minor; 318 | unsigned int status_code : 16; /* responses only */ 319 | unsigned int method : 8; /* requests only */ 320 | unsigned int http_errno : 7; 321 | 322 | /* 1 = Upgrade header was present and the parser has exited because of that. 323 | * 0 = No upgrade header present. 324 | * Should be checked when http_parser_execute() returns in addition to 325 | * error checking. 326 | */ 327 | unsigned int upgrade : 1; 328 | 329 | /** PUBLIC **/ 330 | void *data; /* A pointer to get hook to the "connection" or "socket" object */ 331 | }; 332 | 333 | 334 | struct http_parser_settings { 335 | http_cb on_message_begin; 336 | http_data_cb on_url; 337 | http_data_cb on_status; 338 | http_data_cb on_header_field; 339 | http_data_cb on_header_value; 340 | http_cb on_headers_complete; 341 | http_data_cb on_body; 342 | http_cb on_message_complete; 343 | /* When on_chunk_header is called, the current chunk length is stored 344 | * in parser->content_length. 345 | */ 346 | http_cb on_chunk_header; 347 | http_cb on_chunk_complete; 348 | }; 349 | 350 | 351 | enum http_parser_url_fields 352 | { UF_SCHEMA = 0 353 | , UF_HOST = 1 354 | , UF_PORT = 2 355 | , UF_PATH = 3 356 | , UF_QUERY = 4 357 | , UF_FRAGMENT = 5 358 | , UF_USERINFO = 6 359 | , UF_MAX = 7 360 | }; 361 | 362 | 363 | /* Result structure for http_parser_parse_url(). 364 | * 365 | * Callers should index into field_data[] with UF_* values iff field_set 366 | * has the relevant (1 << UF_*) bit set. As a courtesy to clients (and 367 | * because we probably have padding left over), we convert any port to 368 | * a uint16_t. 369 | */ 370 | struct http_parser_url { 371 | uint16_t field_set; /* Bitmask of (1 << UF_*) values */ 372 | uint16_t port; /* Converted UF_PORT string */ 373 | 374 | struct { 375 | uint16_t off; /* Offset into buffer in which field starts */ 376 | uint16_t len; /* Length of run in buffer */ 377 | } field_data[UF_MAX]; 378 | }; 379 | 380 | 381 | /* Returns the library version. Bits 16-23 contain the major version number, 382 | * bits 8-15 the minor version number and bits 0-7 the patch level. 383 | * Usage example: 384 | * 385 | * unsigned long version = http_parser_version(); 386 | * unsigned major = (version >> 16) & 255; 387 | * unsigned minor = (version >> 8) & 255; 388 | * unsigned patch = version & 255; 389 | * printf("http_parser v%u.%u.%u\n", major, minor, patch); 390 | */ 391 | unsigned long http_parser_version(void); 392 | 393 | void http_parser_init(http_parser *parser, enum http_parser_type type); 394 | 395 | 396 | /* Initialize http_parser_settings members to 0 397 | */ 398 | void http_parser_settings_init(http_parser_settings *settings); 399 | 400 | 401 | /* Executes the parser. Returns number of parsed bytes. Sets 402 | * `parser->http_errno` on error. */ 403 | size_t http_parser_execute(http_parser *parser, 404 | const http_parser_settings *settings, 405 | const char *data, 406 | size_t len); 407 | 408 | 409 | /* If http_should_keep_alive() in the on_headers_complete or 410 | * on_message_complete callback returns 0, then this should be 411 | * the last message on the connection. 412 | * If you are the server, respond with the "Connection: close" header. 413 | * If you are the client, close the connection. 414 | */ 415 | int http_should_keep_alive(const http_parser *parser); 416 | 417 | /* Returns a string version of the HTTP method. */ 418 | const char *http_method_str(enum http_method m); 419 | 420 | /* Returns a string version of the HTTP status code. */ 421 | const char *http_status_str(enum http_status s); 422 | 423 | /* Return a string name of the given error */ 424 | const char *http_errno_name(enum http_errno err); 425 | 426 | /* Return a string description of the given error */ 427 | const char *http_errno_description(enum http_errno err); 428 | 429 | /* Initialize all http_parser_url members to 0 */ 430 | void http_parser_url_init(struct http_parser_url *u); 431 | 432 | /* Parse a URL; return nonzero on failure */ 433 | int http_parser_parse_url(const char *buf, size_t buflen, 434 | int is_connect, 435 | struct http_parser_url *u); 436 | 437 | /* Pause or un-pause the parser; a nonzero value pauses */ 438 | void http_parser_pause(http_parser *parser, int paused); 439 | 440 | /* Checks if this is the final chunk of the body. */ 441 | int http_body_is_final(const http_parser *parser); 442 | 443 | /* Change the maximum header size provided at compile time. */ 444 | void http_parser_set_max_header_size(uint32_t size); 445 | 446 | #ifdef __cplusplus 447 | } 448 | #endif 449 | #endif 450 | --------------------------------------------------------------------------------