├── .gitignore ├── .gitmodules ├── CMakeLists.txt ├── README.md ├── benchmark └── gbench.cpp ├── include └── rapidhttp │ ├── cmake_config.h │ ├── cmake_config.h.in │ ├── constants.h │ ├── document.h │ ├── document.hpp │ ├── error_code.h │ ├── layer.hpp │ ├── rapidhttp.h │ ├── stringref.h │ └── util.h ├── install.sh ├── scripts ├── extract_http_parser.sh └── extract_pico.sh ├── test ├── parse_request.cpp └── parse_response.cpp ├── tutorial ├── parse.cpp └── serialize.cpp └── uninstall.sh /.gitignore: -------------------------------------------------------------------------------- 1 | build/* 2 | 3 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "http-parser"] 2 | path = third_party/http-parser 3 | url = https://github.com/nodejs/http-parser.git 4 | [submodule "picohttpparser"] 5 | path = third_party/picohttpparser 6 | url = https://github.com/h2o/picohttpparser.git 7 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 2.8) 2 | 3 | ################################################################################### 4 | project(RapidHttpTest) 5 | 6 | if (CMAKE_BUILD_TYPE) 7 | else() 8 | #set(CMAKE_BUILD_TYPE DEBUG) 9 | set(CMAKE_BUILD_TYPE RELEASE) 10 | endif() 11 | 12 | set(CMAKE_CXX_FLAGS "-std=c++11 -g -Wall") 13 | 14 | option(WITH_PROFILE "link benchmark with profiler" OFF) 15 | option(USE_PICO "based picohttpparser" OFF) 16 | message("------------ Options -------------") 17 | message(" CMAKE_BUILD_TYPE: ${CMAKE_BUILD_TYPE}") 18 | message(" CMAKE_CXX_FLAGS_FINAL: ${CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE}}") 19 | message(" WITH_PROFILE: ${WITH_PROFILE}") 20 | 21 | if (USE_PICO) 22 | message(" USE_PICO: ON") 23 | set(USE_PICO 1) 24 | execute_process(COMMAND ${PROJECT_SOURCE_DIR}/scripts/extract_pico.sh "${PROJECT_SOURCE_DIR}") 25 | else() 26 | message(" USE_PICO: OFF") 27 | set(USE_PICO 0) 28 | execute_process(COMMAND ${PROJECT_SOURCE_DIR}/scripts/extract_http_parser.sh "${PROJECT_SOURCE_DIR}") 29 | endif() 30 | message("----------------------------------") 31 | configure_file(${PROJECT_SOURCE_DIR}/include/rapidhttp/cmake_config.h.in ${PROJECT_SOURCE_DIR}/include/rapidhttp/cmake_config.h) 32 | 33 | include_directories("${PROJECT_SOURCE_DIR}/include") 34 | 35 | aux_source_directory(${PROJECT_SOURCE_DIR}/test TEST_SRC_LIST) 36 | add_executable(unittest ${TEST_SRC_LIST}) 37 | target_link_libraries(unittest -lgtest -lgtest_main -pthread) 38 | 39 | add_executable(tutorial_parse ${PROJECT_SOURCE_DIR}/tutorial/parse.cpp) 40 | add_executable(tutorial_serialize ${PROJECT_SOURCE_DIR}/tutorial/serialize.cpp) 41 | 42 | aux_source_directory(${PROJECT_SOURCE_DIR}/benchmark BM_SRC_LIST) 43 | add_executable(benchmark ${BM_SRC_LIST}) 44 | if (WITH_PROFILE) 45 | message("link benchmark with profile") 46 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DPROFILE=1") 47 | target_link_libraries(benchmark -lprofiler -lunwind) 48 | endif() 49 | target_link_libraries(benchmark -lbenchmark -ltcmalloc_minimal -pthread) 50 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # rapidhttp 2 | -------------------------------------------------------------------------------- /benchmark/gbench.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #if PROFILE 5 | #include 6 | #endif 7 | 8 | static std::string c_http_request_0 = 9 | "GET /uri/abc HTTP/1.1\r\n" 10 | "\r\n"; 11 | 12 | static std::string c_http_request_1 = 13 | "GET /uri/abc HTTP/1.1\r\n" 14 | "Accept: XAccept\r\n" 15 | "\r\n"; 16 | 17 | static std::string c_http_request_2 = 18 | "GET /uri/abc HTTP/1.1\r\n" 19 | "Accept: XAccept\r\n" 20 | "Host: domain.com\r\n" 21 | "\r\n"; 22 | 23 | static std::string c_http_request = 24 | "GET /uri/abc HTTP/1.1\r\n" 25 | "Accept: XAccept\r\n" 26 | "Host: domain.com\r\n" 27 | "Connection: Keep-Alive\r\n" 28 | "\r\n"; 29 | 30 | static std::string c_http_response = 31 | "HTTP/1.1 200 OK\r\n" 32 | "Accept: XAccept\r\n" 33 | "Host: domain.com\r\n" 34 | "Content-Length: 3\r\n" 35 | "\r\nabc"; 36 | 37 | static std::string c_big_request = 38 | "POST /joyent/http-parser HTTP/1.1\r\n" 39 | "Host: github.com\r\n" 40 | "DNT: 1\r\n" 41 | "Accept-Encoding: gzip, deflate, sdch\r\n" 42 | "Accept-Language: ru-RU,ru;q=0.8,en-US;q=0.6,en;q=0.4\r\n" 43 | "User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) " 44 | "AppleWebKit/537.36 (KHTML, like Gecko) " 45 | "Chrome/39.0.2171.65 Safari/537.36\r\n" 46 | "Accept: text/html,application/xhtml+xml,application/xml;q=0.9," 47 | "image/webp,*/*;q=0.8\r\n" 48 | "Referer: https://github.com/joyent/http-parser\r\n" 49 | "Connection: keep-alive\r\n" 50 | "Transfer-Encoding: chunked\r\n" 51 | "Cache-Control: max-age=0\r\n\r\nb\r\nhello world\r\n0\r\n\r\n"; 52 | 53 | template void BM_ParseRequest_0_field(benchmark::State& state) 54 | { 55 | while (state.KeepRunning()) { 56 | for (int x = 0; x < state.range(0); ++x) { 57 | DocType doc(rapidhttp::Request); 58 | size_t bytes = doc.PartailParse(c_http_request_0); 59 | (void)bytes; 60 | // printf("parse bytes: %d. error:%s\n", (int)bytes, doc.ParseError().message().c_str()); 61 | } 62 | } 63 | } 64 | 65 | template void BM_ParseRequest_1_field(benchmark::State& state) 66 | { 67 | while (state.KeepRunning()) { 68 | for (int x = 0; x < state.range(0); ++x) { 69 | DocType doc(rapidhttp::Request); 70 | size_t bytes = doc.PartailParse(c_http_request_1); 71 | (void)bytes; 72 | // printf("parse bytes: %d. error:%s\n", (int)bytes, doc.ParseError().message().c_str()); 73 | } 74 | } 75 | } 76 | 77 | template void BM_ParseRequest_2_field(benchmark::State& state) 78 | { 79 | while (state.KeepRunning()) { 80 | for (int x = 0; x < state.range(0); ++x) { 81 | DocType doc(rapidhttp::Request); 82 | size_t bytes = doc.PartailParse(c_http_request_2); 83 | (void)bytes; 84 | // printf("parse bytes: %d. error:%s\n", (int)bytes, doc.ParseError().message().c_str()); 85 | } 86 | } 87 | } 88 | 89 | template void BM_ParseRequest_3_field(benchmark::State& state) 90 | { 91 | while (state.KeepRunning()) { 92 | for (int x = 0; x < state.range(0); ++x) { 93 | DocType doc(rapidhttp::Request); 94 | size_t bytes = doc.PartailParse(c_http_request.c_str(), c_http_request.size()); 95 | (void)bytes; 96 | // printf("parse bytes: %d. error:%s\n", (int)bytes, doc.ParseError().message().c_str()); 97 | } 98 | } 99 | } 100 | 101 | template void BM_ParseRequest_big(benchmark::State& state) 102 | { 103 | while (state.KeepRunning()) { 104 | for (int x = 0; x < state.range(0); ++x) { 105 | DocType doc(rapidhttp::Request); 106 | size_t bytes = doc.PartailParse(c_big_request); 107 | (void)bytes; 108 | // printf("parse bytes: %d. error:%s\n", (int)bytes, doc.ParseError().message().c_str()); 109 | } 110 | } 111 | } 112 | 113 | template void BM_ParseResponse(benchmark::State& state) 114 | { 115 | while (state.KeepRunning()) { 116 | for (int x = 0; x < state.range(0); ++x) { 117 | DocType doc(rapidhttp::Response); 118 | size_t bytes = doc.PartailParse(c_http_response.c_str(), c_http_response.size()); 119 | (void)bytes; 120 | // printf("parse bytes: %d. error:%s\n", (int)bytes, doc.ParseError().message().c_str()); 121 | } 122 | } 123 | } 124 | 125 | template 126 | DocType& GetDoc() 127 | { 128 | static DocType doc(rapidhttp::Response); 129 | return doc; 130 | } 131 | 132 | template void BM_PartialParseResponse(benchmark::State& state) 133 | { 134 | while (state.KeepRunning()) { 135 | for (int x = 0; x < state.range(0); ++x) { 136 | auto & doc = GetDoc(); 137 | size_t bytes = doc.PartailParse(c_http_response.c_str(), c_http_response.size() / 2); 138 | bytes += doc.PartailParse(c_http_response.c_str() + bytes, c_http_response.size() - bytes); 139 | // printf("parse bytes: %d. error:%s done:%d\n", (int)bytes, doc.ParseError().message().c_str(), doc.ParseDone()); 140 | } 141 | } 142 | } 143 | 144 | template void BM_Serialize(benchmark::State& state) 145 | { 146 | while (state.KeepRunning()) { 147 | for (int x = 0; x < state.range(0); ++x) { 148 | auto & doc = GetDoc(); 149 | char buf[128] = {}; 150 | bool b = doc.Serialize(buf, sizeof(buf)); 151 | (void)b; 152 | // printf("response:\n%s\nByteSize:%d\n", buf, (int)doc.ByteSize()); 153 | // exit(0); 154 | // printf("serialize ok: %d\n", b); 155 | } 156 | } 157 | } 158 | 159 | template void BM_CopyTo(benchmark::State& state) 160 | { 161 | while (state.KeepRunning()) { 162 | for (int x = 0; x < state.range(0); ++x) { 163 | auto & src = GetDoc(); 164 | auto & dst = GetDoc(); 165 | src.CopyTo(dst); 166 | } 167 | } 168 | } 169 | 170 | BENCHMARK_TEMPLATE(BM_ParseRequest_0_field, rapidhttp::HttpDocument)->Arg(1); 171 | BENCHMARK_TEMPLATE(BM_ParseRequest_1_field, rapidhttp::HttpDocument)->Arg(1); 172 | BENCHMARK_TEMPLATE(BM_ParseRequest_2_field, rapidhttp::HttpDocument)->Arg(1); 173 | BENCHMARK_TEMPLATE(BM_ParseRequest_3_field, rapidhttp::HttpDocument)->Arg(1); 174 | BENCHMARK_TEMPLATE(BM_ParseRequest_big, rapidhttp::HttpDocument)->Arg(1); 175 | BENCHMARK_TEMPLATE(BM_ParseResponse, rapidhttp::HttpDocument)->Arg(1); 176 | BENCHMARK_TEMPLATE(BM_PartialParseResponse, rapidhttp::HttpDocument)->Arg(1); 177 | BENCHMARK_TEMPLATE(BM_Serialize, rapidhttp::HttpDocument)->Arg(1); 178 | 179 | BENCHMARK_TEMPLATE(BM_ParseRequest_0_field, rapidhttp::HttpDocumentRef)->Arg(1); 180 | BENCHMARK_TEMPLATE(BM_ParseRequest_1_field, rapidhttp::HttpDocumentRef)->Arg(1); 181 | BENCHMARK_TEMPLATE(BM_ParseRequest_2_field, rapidhttp::HttpDocumentRef)->Arg(1); 182 | BENCHMARK_TEMPLATE(BM_ParseRequest_3_field, rapidhttp::HttpDocumentRef)->Arg(1); 183 | BENCHMARK_TEMPLATE(BM_ParseRequest_big, rapidhttp::HttpDocumentRef)->Arg(1); 184 | BENCHMARK_TEMPLATE(BM_ParseResponse, rapidhttp::HttpDocumentRef)->Arg(1); 185 | BENCHMARK_TEMPLATE(BM_PartialParseResponse, rapidhttp::HttpDocumentRef)->Arg(1); 186 | BENCHMARK_TEMPLATE(BM_Serialize, rapidhttp::HttpDocumentRef)->Arg(1); 187 | 188 | BENCHMARK_TEMPLATE(BM_CopyTo, rapidhttp::HttpDocumentRef, rapidhttp::HttpDocument)->Arg(1); 189 | BENCHMARK_TEMPLATE(BM_CopyTo, rapidhttp::HttpDocumentRef, rapidhttp::HttpDocumentRef)->Arg(1); 190 | BENCHMARK_TEMPLATE(BM_CopyTo, rapidhttp::HttpDocument, rapidhttp::HttpDocumentRef)->Arg(1); 191 | BENCHMARK_TEMPLATE(BM_CopyTo, rapidhttp::HttpDocument, rapidhttp::HttpDocument)->Arg(1); 192 | 193 | int main(int argc, char** argv) { 194 | ::benchmark::Initialize(&argc, argv); 195 | #if PROFILE 196 | ProfilerStart("bench.prof"); 197 | #endif 198 | ::benchmark::RunSpecifiedBenchmarks(); 199 | #if PROFILE 200 | ProfilerStop(); 201 | #endif 202 | } 203 | -------------------------------------------------------------------------------- /include/rapidhttp/cmake_config.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #define USE_PICO 0 4 | -------------------------------------------------------------------------------- /include/rapidhttp/cmake_config.h.in: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #define USE_PICO ${USE_PICO} 4 | -------------------------------------------------------------------------------- /include/rapidhttp/constants.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | namespace rapidhttp { 6 | 7 | // CRLF 8 | static const std::string c_crlf = "\r\n"; 9 | 10 | // HTTP头结束符 11 | static const std::string c_header_end = "\r\n\r\n"; 12 | 13 | // 头部域分隔符 14 | static const char c_field_split = ':'; 15 | 16 | } //namespace rapidhttp 17 | -------------------------------------------------------------------------------- /include/rapidhttp/document.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include "cmake_config.h" 12 | 13 | namespace rapidhttp { 14 | 15 | enum DocumentType 16 | { 17 | Request, 18 | Response, 19 | }; 20 | 21 | // Http Header document class. 22 | template 23 | class THttpDocument 24 | { 25 | public: 26 | typedef StringT string_t; 27 | 28 | explicit THttpDocument(DocumentType type); 29 | THttpDocument(THttpDocument const& other) = delete; 30 | THttpDocument(THttpDocument && other) = delete; 31 | THttpDocument& operator=(THttpDocument const& other) = delete; 32 | THttpDocument& operator=(THttpDocument && other) = delete; 33 | 34 | template 35 | void CopyTo(THttpDocument & clone) const; 36 | 37 | /// ------------------- parse/generate --------------------- 38 | /// 流式解析 39 | // @buf_ref: 外部传入的缓冲区首地址 40 | // @len: 缓冲区长度 41 | // @returns:返回已成功解析到的数据长度 42 | inline size_t PartailParse(const char* buf_ref, size_t len); 43 | inline size_t PartailParse(std::string const& buf); 44 | 45 | /// 解析eof 46 | // 解析Response时, 断开链接时要调用这个接口, 因为有些response协议需要读取到 47 | // 网络链接断开为止. 48 | inline bool PartailParseEof(); 49 | 50 | /// 是否解析成功 51 | inline bool ParseDone(); 52 | 53 | /// 重置解析流状态 54 | // 同时清除解析流状态和已解析成功的数据状态 55 | inline void Reset(); 56 | 57 | /// 返回解析错误码 58 | inline std::error_code ParseError(); 59 | 60 | /// 是否全部初始化完成, Serialize之前会做这个校验 61 | inline bool IsInitialized() const; 62 | 63 | /// Serialize后的数据长度 64 | inline size_t ByteSize() const; 65 | 66 | /// 序列化 67 | inline bool Serialize(char *buf, size_t len); 68 | inline std::string SerializeAsString(); 69 | /// -------------------------------------------------------- 70 | 71 | /// ------------------- fields get/set --------------------- 72 | inline string_t const& GetMethod(); 73 | inline void SetMethod(const char* m); 74 | inline void SetMethod(std::string const& m); 75 | 76 | inline string_t const& GetUri(); 77 | inline void SetUri(const char* m); 78 | inline void SetUri(std::string const& m); 79 | 80 | inline string_t const& GetStatus(); 81 | inline void SetStatus(const char* m); 82 | inline void SetStatus(std::string const& m); 83 | 84 | inline int GetStatusCode(); 85 | inline void SetStatusCode(int code); 86 | 87 | inline int GetMajor(); 88 | inline void SetMajor(int v); 89 | 90 | inline int GetMinor(); 91 | inline void SetMinor(int v); 92 | 93 | inline string_t const& GetField(std::string const& k); 94 | inline void SetField(std::string const& k, const char* m); 95 | inline void SetField(std::string const& k, std::string const& m); 96 | 97 | inline string_t const& GetBody(); 98 | inline void SetBody(const char* m); 99 | inline void SetBody(std::string const& m); 100 | /// -------------------------------------------------------- 101 | 102 | inline bool IsRequest() const { return type_ == Request; } 103 | inline bool IsResponse() const { return type_ == Response; } 104 | 105 | private: 106 | inline bool CheckMethod() const; 107 | inline bool CheckUri() const; 108 | inline bool CheckStatusCode() const; 109 | inline bool CheckStatus() const; 110 | inline bool CheckVersion() const; 111 | 112 | #if USE_PICO 113 | #else 114 | // http-parser 115 | private: 116 | static inline int sOnHeadersComplete(http_parser *parser); 117 | static inline int sOnMessageComplete(http_parser *parser); 118 | static inline int sOnUrl(http_parser *parser, const char *at, size_t length); 119 | static inline int sOnStatus(http_parser *parser, const char *at, size_t length); 120 | static inline int sOnHeaderField(http_parser *parser, const char *at, size_t length); 121 | static inline int sOnHeaderValue(http_parser *parser, const char *at, size_t length); 122 | static inline int sOnBody(http_parser *parser, const char *at, size_t length); 123 | 124 | inline int OnHeadersComplete(http_parser *parser); 125 | inline int OnMessageComplete(http_parser *parser); 126 | inline int OnUrl(http_parser *parser, const char *at, size_t length); 127 | inline int OnStatus(http_parser *parser, const char *at, size_t length); 128 | inline int OnHeaderField(http_parser *parser, const char *at, size_t length); 129 | inline int OnHeaderValue(http_parser *parser, const char *at, size_t length); 130 | inline int OnBody(http_parser *parser, const char *at, size_t length); 131 | #endif 132 | 133 | private: 134 | DocumentType type_; // 类型 135 | 136 | bool parse_done_ = false; 137 | std::error_code ec_; // 解析错状态 138 | 139 | #if USE_PICO 140 | #else 141 | struct http_parser parser_; 142 | struct http_parser_settings settings_; 143 | #endif 144 | 145 | int kv_state_ = 0; 146 | string_t callback_header_key_cache_; 147 | string_t callback_header_value_cache_; 148 | 149 | // 默认版本号: HTTP/1.1 150 | uint32_t major_ = 1; 151 | uint32_t minor_ = 1; 152 | 153 | string_t request_method_; 154 | string_t request_uri_; 155 | 156 | uint32_t response_status_code_ = 0; 157 | string_t response_status_; 158 | 159 | std::vector> header_fields_; 160 | 161 | string_t body_; 162 | 163 | template 164 | friend class THttpDocument; 165 | }; 166 | 167 | } //namespace rapidhttp 168 | 169 | #include "document.hpp" 170 | -------------------------------------------------------------------------------- /include/rapidhttp/document.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "document.h" 3 | #include 4 | #include 5 | #include 6 | 7 | namespace rapidhttp { 8 | 9 | template 10 | inline THttpDocument::THttpDocument(DocumentType type) 11 | : type_(type) 12 | { 13 | Reset(); 14 | #if USE_PICO 15 | #else 16 | memset(&settings_, 0, sizeof(settings_)); 17 | settings_.on_headers_complete = sOnHeadersComplete; 18 | settings_.on_message_complete = sOnMessageComplete; 19 | settings_.on_url = sOnUrl; 20 | settings_.on_status = sOnStatus; 21 | settings_.on_header_field = sOnHeaderField; 22 | settings_.on_header_value = sOnHeaderValue; 23 | settings_.on_body = sOnBody; 24 | #endif 25 | } 26 | 27 | template 28 | template 29 | void THttpDocument::CopyTo(THttpDocument & clone) const 30 | { 31 | #define _COPY_TO(param) \ 32 | clone.param = this->param 33 | 34 | _COPY_TO(type_); 35 | _COPY_TO(parse_done_); 36 | _COPY_TO(ec_); 37 | _COPY_TO(parser_); 38 | clone.parser_.data = &clone; 39 | _COPY_TO(kv_state_); 40 | _COPY_TO(callback_header_key_cache_); 41 | _COPY_TO(callback_header_value_cache_); 42 | _COPY_TO(major_); 43 | _COPY_TO(minor_); 44 | _COPY_TO(request_method_); 45 | _COPY_TO(request_uri_); 46 | _COPY_TO(response_status_code_); 47 | _COPY_TO(response_status_); 48 | _COPY_TO(body_); 49 | 50 | clone.header_fields_.clear(); 51 | clone.header_fields_.reserve(this->header_fields_.size()); 52 | for (auto const& kv : this->header_fields_) 53 | { 54 | clone.header_fields_.emplace_back(std::pair( 55 | (OStringT)kv.first, (OStringT)kv.second)); 56 | } 57 | 58 | #undef _COPY_TO 59 | } 60 | 61 | /// ------------------- parse/generate --------------------- 62 | /// 流式解析 63 | // @buf_ref: 外部传入的缓冲区首地址, 再调用Storage前必须保证缓冲区有效且不变. 64 | // @len: 缓冲区长度 65 | // @returns:解析完成返回error_code=0, 解析一半返回error_code=1, 解析失败返回其他错误码. 66 | template 67 | inline size_t THttpDocument::PartailParse(std::string const& buf) 68 | { 69 | return PartailParse(buf.c_str(), buf.size()); 70 | } 71 | 72 | #if USE_PICO 73 | #else 74 | template 75 | inline size_t THttpDocument::PartailParse(const char* buf_ref, size_t len) 76 | { 77 | if (ParseDone() || ParseError()) 78 | Reset(); 79 | 80 | size_t parsed = http_parser_execute(&parser_, &settings_, buf_ref, len); 81 | if (parser_.http_errno) { 82 | // TODO: support pause 83 | ec_ = MakeParseErrorCode(parser_.http_errno); 84 | } 85 | return parsed; 86 | } 87 | template 88 | inline bool THttpDocument::PartailParseEof() 89 | { 90 | if (ParseDone() || ParseError()) 91 | return false; 92 | 93 | PartailParse("", 0); 94 | return ParseDone(); 95 | } 96 | template 97 | inline bool THttpDocument::ParseDone() 98 | { 99 | return parse_done_; 100 | } 101 | 102 | template 103 | inline int THttpDocument::sOnHeadersComplete(http_parser *parser) 104 | { 105 | return ((THttpDocument*)parser->data)->OnHeadersComplete(parser); 106 | } 107 | template 108 | inline int THttpDocument::sOnMessageComplete(http_parser *parser) 109 | { 110 | return ((THttpDocument*)parser->data)->OnMessageComplete(parser); 111 | } 112 | template 113 | inline int THttpDocument::sOnUrl(http_parser *parser, const char *at, size_t length) 114 | { 115 | return ((THttpDocument*)parser->data)->OnUrl(parser, at, length); 116 | } 117 | template 118 | inline int THttpDocument::sOnStatus(http_parser *parser, const char *at, size_t length) 119 | { 120 | return ((THttpDocument*)parser->data)->OnStatus(parser, at, length); 121 | } 122 | template 123 | inline int THttpDocument::sOnHeaderField(http_parser *parser, const char *at, size_t length) 124 | { 125 | return ((THttpDocument*)parser->data)->OnHeaderField(parser, at, length); 126 | } 127 | template 128 | inline int THttpDocument::sOnHeaderValue(http_parser *parser, const char *at, size_t length) 129 | { 130 | return ((THttpDocument*)parser->data)->OnHeaderValue(parser, at, length); 131 | } 132 | template 133 | inline int THttpDocument::sOnBody(http_parser *parser, const char *at, size_t length) 134 | { 135 | return ((THttpDocument*)parser->data)->OnBody(parser, at, length); 136 | } 137 | 138 | template 139 | inline int THttpDocument::OnHeadersComplete(http_parser *parser) 140 | { 141 | if (IsRequest()) 142 | request_method_ = http_method_str((http_method)parser->method); 143 | else 144 | response_status_code_ = parser->status_code; 145 | major_ = parser->http_major; 146 | minor_ = parser->http_minor; 147 | if (kv_state_ == 1) { 148 | header_fields_.emplace_back(std::move(callback_header_key_cache_), 149 | std::move(callback_header_value_cache_)); 150 | kv_state_ = 0; 151 | } 152 | return 0; 153 | } 154 | template 155 | inline int THttpDocument::OnMessageComplete(http_parser *parser) 156 | { 157 | parse_done_ = true; 158 | return 0; 159 | } 160 | template 161 | inline int THttpDocument::OnUrl(http_parser *parser, const char *at, size_t length) 162 | { 163 | request_uri_.append(at, length); 164 | return 0; 165 | } 166 | template 167 | inline int THttpDocument::OnStatus(http_parser *parser, const char *at, size_t length) 168 | { 169 | response_status_.append(at, length); 170 | return 0; 171 | } 172 | template 173 | inline int THttpDocument::OnHeaderField(http_parser *parser, const char *at, size_t length) 174 | { 175 | if (kv_state_ == 1) { 176 | header_fields_.emplace_back(std::move(callback_header_key_cache_), 177 | std::move(callback_header_value_cache_)); 178 | kv_state_ = 0; 179 | } 180 | 181 | callback_header_key_cache_.append(at, length); 182 | return 0; 183 | } 184 | template 185 | inline int THttpDocument::OnHeaderValue(http_parser *parser, const char *at, size_t length) 186 | { 187 | kv_state_ = 1; 188 | callback_header_value_cache_.append(at, length); 189 | return 0; 190 | } 191 | template 192 | inline int THttpDocument::OnBody(http_parser *parser, const char *at, size_t length) 193 | { 194 | body_.append(at, length); 195 | return 0; 196 | } 197 | #endif 198 | 199 | template 200 | inline void THttpDocument::Reset() 201 | { 202 | #if USE_PICO 203 | #else 204 | http_parser_init(&parser_, IsRequest() ? HTTP_REQUEST : HTTP_RESPONSE); 205 | parser_.data = this; 206 | #endif 207 | 208 | parse_done_ = false; 209 | ec_ = std::error_code(); 210 | kv_state_ = 0; 211 | callback_header_key_cache_.clear(); 212 | callback_header_value_cache_.clear(); 213 | major_ = 1; 214 | minor_ = 1; 215 | request_method_.clear(); 216 | request_uri_.clear(); 217 | response_status_code_ = 0; 218 | response_status_.clear(); 219 | header_fields_.clear(); 220 | body_.clear(); 221 | } 222 | 223 | // 返回解析错误码 224 | template 225 | inline std::error_code THttpDocument::ParseError() 226 | { 227 | return ec_; 228 | } 229 | 230 | template 231 | inline bool THttpDocument::IsInitialized() const 232 | { 233 | if (IsRequest()) 234 | return CheckMethod() && CheckUri() && CheckVersion(); 235 | else 236 | return CheckVersion() && CheckStatusCode() && CheckStatus(); 237 | } 238 | 239 | template 240 | inline size_t THttpDocument::ByteSize() const 241 | { 242 | if (!IsInitialized()) return 0; 243 | 244 | size_t bytes = 0; 245 | if (IsRequest()) { 246 | bytes += request_method_.size() + 1; // GET\s 247 | bytes += request_uri_.size() + 1; // /uri\s 248 | bytes += 10; // HTTP/1.1CRLF 249 | } else { 250 | bytes += 9; // HTTP/1.1\s 251 | bytes += UIntegerByteSize(response_status_code_) + 1; // 200\s 252 | bytes += response_status_.size() + 2; // okCRLF 253 | } 254 | for (auto const& kv : header_fields_) { 255 | bytes += kv.first.size() + 2 + kv.second.size() + 2; 256 | } 257 | bytes += 2; 258 | bytes += body_.size(); 259 | return bytes; 260 | } 261 | 262 | template 263 | inline bool THttpDocument::Serialize(char *buf, size_t len) 264 | { 265 | size_t bytes = ByteSize(); 266 | if (!bytes || len < bytes) return false; 267 | #define _WRITE_STRING(ss) \ 268 | do {\ 269 | memcpy(buf, ss.c_str(), ss.size()); \ 270 | buf += ss.size(); \ 271 | } while(0); 272 | 273 | #define _WRITE_C_STR(c_str, length) \ 274 | do {\ 275 | memcpy(buf, c_str, length); \ 276 | buf += length; \ 277 | } while(0); 278 | 279 | #define _WRITE_CRLF() \ 280 | *buf++ = '\r';\ 281 | *buf++ = '\n' 282 | 283 | char *ori = buf; 284 | if (IsRequest()) { 285 | _WRITE_STRING(request_method_); 286 | *buf++ = ' '; 287 | _WRITE_STRING(request_uri_); 288 | _WRITE_C_STR(" HTTP/", 6); 289 | *buf++ = major_ + '0'; 290 | *buf++ = '.'; 291 | *buf++ = minor_ + '0'; 292 | } else { 293 | _WRITE_C_STR("HTTP/", 5); 294 | *buf++ = major_ + '0'; 295 | *buf++ = '.'; 296 | *buf++ = minor_ + '0'; 297 | *buf++ = ' '; 298 | *buf++ = (response_status_code_ / 100) + '0'; 299 | *buf++ = (response_status_code_ % 100) / 10 + '0'; 300 | *buf++ = (response_status_code_ % 10) + '0'; 301 | *buf++ = ' '; 302 | _WRITE_STRING(response_status_); 303 | } 304 | _WRITE_CRLF(); 305 | for (auto const& kv : header_fields_) { 306 | _WRITE_STRING(kv.first); 307 | *buf++ = ':'; 308 | *buf++ = ' '; 309 | _WRITE_STRING(kv.second); 310 | _WRITE_CRLF(); 311 | } 312 | _WRITE_CRLF(); 313 | _WRITE_STRING(body_); 314 | size_t length = buf - ori; 315 | (void)length; 316 | return true; 317 | #undef _WRITE_CRLF 318 | #undef _WRITE_C_STR 319 | #undef _WRITE_STRING 320 | } 321 | template 322 | inline std::string THttpDocument::SerializeAsString() 323 | { 324 | std::string s; 325 | size_t bytes = ByteSize(); 326 | if (!bytes) return ""; 327 | s.resize(bytes); 328 | if (!Serialize(&s[0], bytes)) return ""; 329 | return s; 330 | } 331 | template 332 | inline bool THttpDocument::CheckMethod() const 333 | { 334 | return !request_method_.empty(); 335 | } 336 | template 337 | inline bool THttpDocument::CheckUri() const 338 | { 339 | return !request_uri_.empty() && request_uri_[0] == '/'; 340 | } 341 | template 342 | inline bool THttpDocument::CheckStatusCode() const 343 | { 344 | return response_status_code_ >= 100 && response_status_code_ < 1000; 345 | } 346 | template 347 | inline bool THttpDocument::CheckStatus() const 348 | { 349 | return !response_status_.empty(); 350 | } 351 | template 352 | inline bool THttpDocument::CheckVersion() const 353 | { 354 | return major_ >= 0 && major_ <= 9 && minor_ >= 0 && minor_ <= 9; 355 | } 356 | /// -------------------------------------------------------- 357 | 358 | /// ------------------- fields get/set --------------------- 359 | template 360 | inline StringT const& THttpDocument::GetMethod() 361 | { 362 | return request_method_; 363 | } 364 | 365 | template 366 | inline void THttpDocument::SetMethod(const char* m) 367 | { 368 | request_method_ = m; 369 | } 370 | template 371 | inline void THttpDocument::SetMethod(std::string const& m) 372 | { 373 | request_method_ = m; 374 | } 375 | template 376 | inline StringT const& THttpDocument::GetUri() 377 | { 378 | return request_uri_; 379 | } 380 | template 381 | inline void THttpDocument::SetUri(const char* m) 382 | { 383 | request_uri_ = m; 384 | } 385 | template 386 | inline void THttpDocument::SetUri(std::string const& m) 387 | { 388 | request_uri_ = m; 389 | } 390 | template 391 | inline StringT const& THttpDocument::GetStatus() 392 | { 393 | return response_status_; 394 | } 395 | template 396 | inline void THttpDocument::SetStatus(const char* m) 397 | { 398 | response_status_ = m; 399 | } 400 | template 401 | inline void THttpDocument::SetStatus(std::string const& m) 402 | { 403 | response_status_ = m; 404 | } 405 | template 406 | inline int THttpDocument::GetStatusCode() 407 | { 408 | return response_status_code_; 409 | } 410 | template 411 | inline void THttpDocument::SetStatusCode(int code) 412 | { 413 | response_status_code_ = code; 414 | } 415 | template 416 | inline int THttpDocument::GetMajor() 417 | { 418 | return major_; 419 | } 420 | template 421 | inline void THttpDocument::SetMajor(int v) 422 | { 423 | major_ = v; 424 | } 425 | template 426 | inline int THttpDocument::GetMinor() 427 | { 428 | return minor_; 429 | } 430 | template 431 | inline void THttpDocument::SetMinor(int v) 432 | { 433 | minor_ = v; 434 | } 435 | template 436 | inline StringT const& THttpDocument::GetField(std::string const& k) 437 | { 438 | static const string_t empty_string; 439 | auto it = std::find_if(header_fields_.begin(), header_fields_.end(), 440 | [&](std::pair const& kv) 441 | { 442 | return kv.first == k; 443 | }); 444 | if (header_fields_.end() == it) 445 | return empty_string; 446 | else 447 | return it->second; 448 | } 449 | template 450 | inline void THttpDocument::SetField(std::string const& k, const char* m) 451 | { 452 | auto it = std::find_if(header_fields_.begin(), header_fields_.end(), 453 | [&](std::pair const& kv) 454 | { 455 | return kv.first == k; 456 | }); 457 | if (header_fields_.end() == it) 458 | header_fields_.emplace_back(k, m); 459 | else 460 | it->second = m; 461 | } 462 | template 463 | inline void THttpDocument::SetField(std::string const& k, std::string const& m) 464 | { 465 | return SetField(k, m.c_str()); 466 | } 467 | template 468 | inline StringT const& THttpDocument::GetBody() 469 | { 470 | return body_; 471 | } 472 | template 473 | inline void THttpDocument::SetBody(const char* m) 474 | { 475 | body_ = m; 476 | } 477 | template 478 | inline void THttpDocument::SetBody(std::string const& m) 479 | { 480 | body_ = m; 481 | } 482 | /// -------------------------------------------------------- 483 | 484 | typedef THttpDocument HttpDocument; 485 | typedef THttpDocument HttpDocumentRef; 486 | 487 | } //namespace rapidhttp 488 | -------------------------------------------------------------------------------- /include/rapidhttp/error_code.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | namespace rapidhttp { 8 | 9 | enum class eErrorCode 10 | { 11 | success = 0, 12 | parse_progress = 1, 13 | parse_error = 2, 14 | }; 15 | 16 | class ErrorCategory : public std::error_category 17 | { 18 | public: 19 | virtual const char* name() const noexcept override 20 | { 21 | return "RapidHttp Error"; 22 | } 23 | 24 | virtual std::string message(int code) const override 25 | { 26 | switch (code) { 27 | case (int)eErrorCode::success: 28 | return "success"; 29 | 30 | case (int)eErrorCode::parse_error: 31 | return "parse error"; 32 | 33 | default: 34 | return "unkown error"; 35 | } 36 | } 37 | }; 38 | 39 | class ParseErrorCategory : public std::error_category 40 | { 41 | public: 42 | virtual const char* name() const noexcept override 43 | { 44 | return "RapidHttp Parse Error"; 45 | } 46 | 47 | virtual std::string message(int code) const override 48 | { 49 | return http_errno_description((http_errno)code); 50 | } 51 | }; 52 | 53 | inline std::error_code MakeErrorCode(eErrorCode code) 54 | { 55 | static ErrorCategory category; 56 | return std::error_code((int)code, category); 57 | } 58 | 59 | inline std::error_code MakeParseErrorCode(int code) 60 | { 61 | static ParseErrorCategory category; 62 | return std::error_code(code, category); 63 | } 64 | 65 | } //namespace rapidhttp 66 | -------------------------------------------------------------------------------- /include/rapidhttp/layer.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | /* Copyright Joyent, Inc. and other Node contributors. All rights reserved. 3 | * 4 | * Permission is hereby granted, free of charge, to any person obtaining a copy 5 | * of this software and associated documentation files (the "Software"), to 6 | * deal in the Software without restriction, including without limitation the 7 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 8 | * sell copies of the Software, and to permit persons to whom the Software is 9 | * furnished to do so, subject to the following conditions: 10 | * 11 | * The above copyright notice and this permission notice shall be included in 12 | * all copies or substantial portions of the Software. 13 | * 14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 19 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 20 | * IN THE SOFTWARE. 21 | */ 22 | #ifndef http_parser_h 23 | #define http_parser_h 24 | #ifdef __cplusplus 25 | namespace rapidhttp { 26 | #endif 27 | 28 | /* Also update SONAME in the Makefile whenever you change these. */ 29 | #define HTTP_PARSER_VERSION_MAJOR 2 30 | #define HTTP_PARSER_VERSION_MINOR 7 31 | #define HTTP_PARSER_VERSION_PATCH 1 32 | 33 | #include 34 | #if defined(_WIN32) && !defined(__MINGW32__) && \ 35 | (!defined(_MSC_VER) || _MSC_VER<1600) && !defined(__WINE__) 36 | #include 37 | #include 38 | typedef __int8 int8_t; 39 | typedef unsigned __int8 uint8_t; 40 | typedef __int16 int16_t; 41 | typedef unsigned __int16 uint16_t; 42 | typedef __int32 int32_t; 43 | typedef unsigned __int32 uint32_t; 44 | typedef __int64 int64_t; 45 | typedef unsigned __int64 uint64_t; 46 | #else 47 | #include 48 | #endif 49 | 50 | /* Compile with -DHTTP_PARSER_STRICT=0 to make less checks, but run 51 | * faster 52 | */ 53 | #ifndef HTTP_PARSER_STRICT 54 | # define HTTP_PARSER_STRICT 1 55 | #endif 56 | 57 | /* Maximium header size allowed. If the macro is not defined 58 | * before including this header then the default is used. To 59 | * change the maximum header size, define the macro in the build 60 | * environment (e.g. -DHTTP_MAX_HEADER_SIZE=). To remove 61 | * the effective limit on the size of the header, define the macro 62 | * to a very large number (e.g. -DHTTP_MAX_HEADER_SIZE=0x7fffffff) 63 | */ 64 | #ifndef HTTP_MAX_HEADER_SIZE 65 | # define HTTP_MAX_HEADER_SIZE (80*1024) 66 | #endif 67 | 68 | typedef struct http_parser http_parser; 69 | typedef struct http_parser_settings http_parser_settings; 70 | 71 | 72 | /* Callbacks should return non-zero to indicate an error. The parser will 73 | * then halt execution. 74 | * 75 | * The one exception is on_headers_complete. In a HTTP_RESPONSE parser 76 | * returning '1' from on_headers_complete will tell the parser that it 77 | * should not expect a body. This is used when receiving a response to a 78 | * HEAD request which may contain 'Content-Length' or 'Transfer-Encoding: 79 | * chunked' headers that indicate the presence of a body. 80 | * 81 | * Returning `2` from on_headers_complete will tell parser that it should not 82 | * expect neither a body nor any futher responses on this connection. This is 83 | * useful for handling responses to a CONNECT request which may not contain 84 | * `Upgrade` or `Connection: upgrade` headers. 85 | * 86 | * http_data_cb does not return data chunks. It will be called arbitrarily 87 | * many times for each string. E.G. you might get 10 callbacks for "on_url" 88 | * each providing just a few characters more data. 89 | */ 90 | typedef int (*http_data_cb) (http_parser*, const char *at, size_t length); 91 | typedef int (*http_cb) (http_parser*); 92 | 93 | 94 | /* Request Methods */ 95 | #define HTTP_METHOD_MAP(XX) \ 96 | XX(0, DELETE, DELETE) \ 97 | XX(1, GET, GET) \ 98 | XX(2, HEAD, HEAD) \ 99 | XX(3, POST, POST) \ 100 | XX(4, PUT, PUT) \ 101 | /* pathological */ \ 102 | XX(5, CONNECT, CONNECT) \ 103 | XX(6, OPTIONS, OPTIONS) \ 104 | XX(7, TRACE, TRACE) \ 105 | /* WebDAV */ \ 106 | XX(8, COPY, COPY) \ 107 | XX(9, LOCK, LOCK) \ 108 | XX(10, MKCOL, MKCOL) \ 109 | XX(11, MOVE, MOVE) \ 110 | XX(12, PROPFIND, PROPFIND) \ 111 | XX(13, PROPPATCH, PROPPATCH) \ 112 | XX(14, SEARCH, SEARCH) \ 113 | XX(15, UNLOCK, UNLOCK) \ 114 | XX(16, BIND, BIND) \ 115 | XX(17, REBIND, REBIND) \ 116 | XX(18, UNBIND, UNBIND) \ 117 | XX(19, ACL, ACL) \ 118 | /* subversion */ \ 119 | XX(20, REPORT, REPORT) \ 120 | XX(21, MKACTIVITY, MKACTIVITY) \ 121 | XX(22, CHECKOUT, CHECKOUT) \ 122 | XX(23, MERGE, MERGE) \ 123 | /* upnp */ \ 124 | XX(24, MSEARCH, M-SEARCH) \ 125 | XX(25, NOTIFY, NOTIFY) \ 126 | XX(26, SUBSCRIBE, SUBSCRIBE) \ 127 | XX(27, UNSUBSCRIBE, UNSUBSCRIBE) \ 128 | /* RFC-5789 */ \ 129 | XX(28, PATCH, PATCH) \ 130 | XX(29, PURGE, PURGE) \ 131 | /* CalDAV */ \ 132 | XX(30, MKCALENDAR, MKCALENDAR) \ 133 | /* RFC-2068, section 19.6.1.2 */ \ 134 | XX(31, LINK, LINK) \ 135 | XX(32, UNLINK, UNLINK) \ 136 | 137 | enum http_method 138 | { 139 | #define XX(num, name, string) HTTP_##name = num, 140 | HTTP_METHOD_MAP(XX) 141 | #undef XX 142 | }; 143 | 144 | 145 | enum http_parser_type { HTTP_REQUEST, HTTP_RESPONSE, HTTP_BOTH }; 146 | 147 | 148 | /* Flag values for http_parser.flags field */ 149 | enum flags 150 | { F_CHUNKED = 1 << 0 151 | , F_CONNECTION_KEEP_ALIVE = 1 << 1 152 | , F_CONNECTION_CLOSE = 1 << 2 153 | , F_CONNECTION_UPGRADE = 1 << 3 154 | , F_TRAILING = 1 << 4 155 | , F_UPGRADE = 1 << 5 156 | , F_SKIPBODY = 1 << 6 157 | , F_CONTENTLENGTH = 1 << 7 158 | }; 159 | 160 | 161 | /* Map for errno-related constants 162 | * 163 | * The provided argument should be a macro that takes 2 arguments. 164 | */ 165 | #define HTTP_ERRNO_MAP(XX) \ 166 | /* No error */ \ 167 | XX(OK, "success") \ 168 | \ 169 | /* Callback-related errors */ \ 170 | XX(CB_message_begin, "the on_message_begin callback failed") \ 171 | XX(CB_url, "the on_url callback failed") \ 172 | XX(CB_header_field, "the on_header_field callback failed") \ 173 | XX(CB_header_value, "the on_header_value callback failed") \ 174 | XX(CB_headers_complete, "the on_headers_complete callback failed") \ 175 | XX(CB_body, "the on_body callback failed") \ 176 | XX(CB_message_complete, "the on_message_complete callback failed") \ 177 | XX(CB_status, "the on_status callback failed") \ 178 | XX(CB_chunk_header, "the on_chunk_header callback failed") \ 179 | XX(CB_chunk_complete, "the on_chunk_complete callback failed") \ 180 | \ 181 | /* Parsing-related errors */ \ 182 | XX(INVALID_EOF_STATE, "stream ended at an unexpected time") \ 183 | XX(HEADER_OVERFLOW, \ 184 | "too many header bytes seen; overflow detected") \ 185 | XX(CLOSED_CONNECTION, \ 186 | "data received after completed connection: close message") \ 187 | XX(INVALID_VERSION, "invalid HTTP version") \ 188 | XX(INVALID_STATUS, "invalid HTTP status code") \ 189 | XX(INVALID_METHOD, "invalid HTTP method") \ 190 | XX(INVALID_URL, "invalid URL") \ 191 | XX(INVALID_HOST, "invalid host") \ 192 | XX(INVALID_PORT, "invalid port") \ 193 | XX(INVALID_PATH, "invalid path") \ 194 | XX(INVALID_QUERY_STRING, "invalid query string") \ 195 | XX(INVALID_FRAGMENT, "invalid fragment") \ 196 | XX(LF_EXPECTED, "LF character expected") \ 197 | XX(INVALID_HEADER_TOKEN, "invalid character in header") \ 198 | XX(INVALID_CONTENT_LENGTH, \ 199 | "invalid character in content-length header") \ 200 | XX(UNEXPECTED_CONTENT_LENGTH, \ 201 | "unexpected content-length header") \ 202 | XX(INVALID_CHUNK_SIZE, \ 203 | "invalid character in chunk size header") \ 204 | XX(INVALID_CONSTANT, "invalid constant string") \ 205 | XX(INVALID_INTERNAL_STATE, "encountered unexpected internal state")\ 206 | XX(STRICT, "strict mode assertion failed") \ 207 | XX(PAUSED, "parser is paused") \ 208 | XX(UNKNOWN, "an unknown error occurred") 209 | 210 | 211 | /* Define HPE_* values for each errno value above */ 212 | #define HTTP_ERRNO_GEN(n, s) HPE_##n, 213 | enum http_errno { 214 | HTTP_ERRNO_MAP(HTTP_ERRNO_GEN) 215 | }; 216 | #undef HTTP_ERRNO_GEN 217 | 218 | 219 | /* Get an http_errno value from an http_parser */ 220 | #define HTTP_PARSER_ERRNO(p) ((enum http_errno) (p)->http_errno) 221 | 222 | 223 | struct http_parser { 224 | /** PRIVATE **/ 225 | unsigned int type : 2; /* enum http_parser_type */ 226 | unsigned int flags : 8; /* F_* values from 'flags' enum; semi-public */ 227 | unsigned int state : 7; /* enum state from http_parser.c */ 228 | unsigned int header_state : 7; /* enum header_state from http_parser.c */ 229 | unsigned int index : 7; /* index into current matcher */ 230 | unsigned int lenient_http_headers : 1; 231 | 232 | uint32_t nread; /* # bytes read in various scenarios */ 233 | uint64_t content_length; /* # bytes in body (0 if no Content-Length header) */ 234 | 235 | /** READ-ONLY **/ 236 | unsigned short http_major; 237 | unsigned short http_minor; 238 | unsigned int status_code : 16; /* responses only */ 239 | unsigned int method : 8; /* requests only */ 240 | unsigned int http_errno : 7; 241 | 242 | /* 1 = Upgrade header was present and the parser has exited because of that. 243 | * 0 = No upgrade header present. 244 | * Should be checked when http_parser_execute() returns in addition to 245 | * error checking. 246 | */ 247 | unsigned int upgrade : 1; 248 | 249 | /** PUBLIC **/ 250 | void *data; /* A pointer to get hook to the "connection" or "socket" object */ 251 | }; 252 | 253 | 254 | struct http_parser_settings { 255 | http_cb on_message_begin; 256 | http_data_cb on_url; 257 | http_data_cb on_status; 258 | http_data_cb on_header_field; 259 | http_data_cb on_header_value; 260 | http_cb on_headers_complete; 261 | http_data_cb on_body; 262 | http_cb on_message_complete; 263 | /* When on_chunk_header is called, the current chunk length is stored 264 | * in parser->content_length. 265 | */ 266 | http_cb on_chunk_header; 267 | http_cb on_chunk_complete; 268 | }; 269 | 270 | 271 | enum http_parser_url_fields 272 | { UF_SCHEMA = 0 273 | , UF_HOST = 1 274 | , UF_PORT = 2 275 | , UF_PATH = 3 276 | , UF_QUERY = 4 277 | , UF_FRAGMENT = 5 278 | , UF_USERINFO = 6 279 | , UF_MAX = 7 280 | }; 281 | 282 | 283 | /* Result structure for http_parser_parse_url(). 284 | * 285 | * Callers should index into field_data[] with UF_* values iff field_set 286 | * has the relevant (1 << UF_*) bit set. As a courtesy to clients (and 287 | * because we probably have padding left over), we convert any port to 288 | * a uint16_t. 289 | */ 290 | struct http_parser_url { 291 | uint16_t field_set; /* Bitmask of (1 << UF_*) values */ 292 | uint16_t port; /* Converted UF_PORT string */ 293 | 294 | struct { 295 | uint16_t off; /* Offset into buffer in which field starts */ 296 | uint16_t len; /* Length of run in buffer */ 297 | } field_data[UF_MAX]; 298 | }; 299 | 300 | 301 | /* Returns the library version. Bits 16-23 contain the major version number, 302 | * bits 8-15 the minor version number and bits 0-7 the patch level. 303 | * Usage example: 304 | * 305 | * unsigned long version = http_parser_version(); 306 | * unsigned major = (version >> 16) & 255; 307 | * unsigned minor = (version >> 8) & 255; 308 | * unsigned patch = version & 255; 309 | * printf("http_parser v%u.%u.%u\n", major, minor, patch); 310 | */ 311 | inline unsigned long http_parser_version(void); 312 | 313 | inline void http_parser_init(http_parser *parser, enum http_parser_type type); 314 | 315 | 316 | /* Initialize http_parser_settings members to 0 317 | */ 318 | inline void http_parser_settings_init(http_parser_settings *settings); 319 | 320 | 321 | /* Executes the parser. Returns number of parsed bytes. Sets 322 | * `parser->http_errno` on error. */ 323 | inline size_t http_parser_execute(http_parser *parser, 324 | const http_parser_settings *settings, 325 | const char *data, 326 | size_t len); 327 | 328 | 329 | /* If http_should_keep_alive() in the on_headers_complete or 330 | * on_message_complete callback returns 0, then this should be 331 | * the last message on the connection. 332 | * If you are the server, respond with the "Connection: close" header. 333 | * If you are the client, close the connection. 334 | */ 335 | inline int http_should_keep_alive(const http_parser *parser); 336 | 337 | /* Returns a string version of the HTTP method. */ 338 | inline const char *http_method_str(enum http_method m); 339 | 340 | /* Return a string name of the given error */ 341 | inline const char *http_errno_name(enum http_errno err); 342 | 343 | /* Return a string description of the given error */ 344 | inline const char *http_errno_description(enum http_errno err); 345 | 346 | /* Initialize all http_parser_url members to 0 */ 347 | inline void http_parser_url_init(struct http_parser_url *u); 348 | 349 | /* Parse a URL; return nonzero on failure */ 350 | inline int http_parser_parse_url(const char *buf, size_t buflen, 351 | int is_connect, 352 | struct http_parser_url *u); 353 | 354 | /* Pause or un-pause the parser; a nonzero value pauses */ 355 | inline void http_parser_pause(http_parser *parser, int paused); 356 | 357 | /* Checks if this is the final chunk of the body. */ 358 | inline int http_body_is_final(const http_parser *parser); 359 | 360 | #ifdef __cplusplus 361 | } 362 | #endif 363 | #endif 364 | /* Based on src/http/ngx_http_parse.c from NGINX copyright Igor Sysoev 365 | * 366 | * Additional changes are licensed under the same terms as NGINX and 367 | * copyright Joyent, Inc. and other Node contributors. All rights reserved. 368 | * 369 | * Permission is hereby granted, free of charge, to any person obtaining a copy 370 | * of this software and associated documentation files (the "Software"), to 371 | * deal in the Software without restriction, including without limitation the 372 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 373 | * sell copies of the Software, and to permit persons to whom the Software is 374 | * furnished to do so, subject to the following conditions: 375 | * 376 | * The above copyright notice and this permission notice shall be included in 377 | * all copies or substantial portions of the Software. 378 | * 379 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 380 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 381 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 382 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 383 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 384 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 385 | * IN THE SOFTWARE. 386 | */ 387 | 388 | #include 389 | #include 390 | #include 391 | #include 392 | #include 393 | #include 394 | namespace rapidhttp { 395 | 396 | #ifndef ULLONG_MAX 397 | # define ULLONG_MAX ((uint64_t) -1) /* 2^64-1 */ 398 | #endif 399 | 400 | #ifndef MIN 401 | # define MIN(a,b) ((a) < (b) ? (a) : (b)) 402 | #endif 403 | 404 | #ifndef ARRAY_SIZE 405 | # define ARRAY_SIZE(a) (sizeof(a) / sizeof((a)[0])) 406 | #endif 407 | 408 | #ifndef BIT_AT 409 | # define BIT_AT(a, i) \ 410 | (!!((unsigned int) (a)[(unsigned int) (i) >> 3] & \ 411 | (1 << ((unsigned int) (i) & 7)))) 412 | #endif 413 | 414 | #ifndef ELEM_AT 415 | # define ELEM_AT(a, i, v) ((unsigned int) (i) < ARRAY_SIZE(a) ? (a)[(i)] : (v)) 416 | #endif 417 | 418 | #define SET_ERRNO(e) \ 419 | do { \ 420 | parser->http_errno = (e); \ 421 | } while(0) 422 | 423 | #define CURRENT_STATE() p_state 424 | #define UPDATE_STATE(V) p_state = (enum state) (V); 425 | #define RETURN(V) \ 426 | do { \ 427 | parser->state = CURRENT_STATE(); \ 428 | return (V); \ 429 | } while (0); 430 | #define REEXECUTE() \ 431 | goto reexecute; \ 432 | 433 | 434 | #ifdef __GNUC__ 435 | # define LIKELY(X) __builtin_expect(!!(X), 1) 436 | # define UNLIKELY(X) __builtin_expect(!!(X), 0) 437 | #else 438 | # define LIKELY(X) (X) 439 | # define UNLIKELY(X) (X) 440 | #endif 441 | 442 | 443 | /* Run the notify callback FOR, returning ER if it fails */ 444 | #define CALLBACK_NOTIFY_(FOR, ER) \ 445 | do { \ 446 | assert(HTTP_PARSER_ERRNO(parser) == HPE_OK); \ 447 | \ 448 | if (LIKELY(settings->on_##FOR)) { \ 449 | parser->state = CURRENT_STATE(); \ 450 | if (UNLIKELY(0 != settings->on_##FOR(parser))) { \ 451 | SET_ERRNO(HPE_CB_##FOR); \ 452 | } \ 453 | UPDATE_STATE(parser->state); \ 454 | \ 455 | /* We either errored above or got paused; get out */ \ 456 | if (UNLIKELY(HTTP_PARSER_ERRNO(parser) != HPE_OK)) { \ 457 | return (ER); \ 458 | } \ 459 | } \ 460 | } while (0) 461 | 462 | /* Run the notify callback FOR and consume the current byte */ 463 | #define CALLBACK_NOTIFY(FOR) CALLBACK_NOTIFY_(FOR, p - data + 1) 464 | 465 | /* Run the notify callback FOR and don't consume the current byte */ 466 | #define CALLBACK_NOTIFY_NOADVANCE(FOR) CALLBACK_NOTIFY_(FOR, p - data) 467 | 468 | /* Run data callback FOR with LEN bytes, returning ER if it fails */ 469 | #define CALLBACK_DATA_(FOR, LEN, ER) \ 470 | do { \ 471 | assert(HTTP_PARSER_ERRNO(parser) == HPE_OK); \ 472 | \ 473 | if (FOR##_mark) { \ 474 | if (LIKELY(settings->on_##FOR)) { \ 475 | parser->state = CURRENT_STATE(); \ 476 | if (UNLIKELY(0 != \ 477 | settings->on_##FOR(parser, FOR##_mark, (LEN)))) { \ 478 | SET_ERRNO(HPE_CB_##FOR); \ 479 | } \ 480 | UPDATE_STATE(parser->state); \ 481 | \ 482 | /* We either errored above or got paused; get out */ \ 483 | if (UNLIKELY(HTTP_PARSER_ERRNO(parser) != HPE_OK)) { \ 484 | return (ER); \ 485 | } \ 486 | } \ 487 | FOR##_mark = NULL; \ 488 | } \ 489 | } while (0) 490 | 491 | /* Run the data callback FOR and consume the current byte */ 492 | #define CALLBACK_DATA(FOR) \ 493 | CALLBACK_DATA_(FOR, p - FOR##_mark, p - data + 1) 494 | 495 | /* Run the data callback FOR and don't consume the current byte */ 496 | #define CALLBACK_DATA_NOADVANCE(FOR) \ 497 | CALLBACK_DATA_(FOR, p - FOR##_mark, p - data) 498 | 499 | /* Set the mark FOR; non-destructive if mark is already set */ 500 | #define MARK(FOR) \ 501 | do { \ 502 | if (!FOR##_mark) { \ 503 | FOR##_mark = p; \ 504 | } \ 505 | } while (0) 506 | 507 | /* Don't allow the total size of the HTTP headers (including the status 508 | * line) to exceed HTTP_MAX_HEADER_SIZE. This check is here to protect 509 | * embedders against denial-of-service attacks where the attacker feeds 510 | * us a never-ending header that the embedder keeps buffering. 511 | * 512 | * This check is arguably the responsibility of embedders but we're doing 513 | * it on the embedder's behalf because most won't bother and this way we 514 | * make the web a little safer. HTTP_MAX_HEADER_SIZE is still far bigger 515 | * than any reasonable request or response so this should never affect 516 | * day-to-day operation. 517 | */ 518 | #define COUNT_HEADER_SIZE(V) \ 519 | do { \ 520 | parser->nread += (V); \ 521 | if (UNLIKELY(parser->nread > (HTTP_MAX_HEADER_SIZE))) { \ 522 | SET_ERRNO(HPE_HEADER_OVERFLOW); \ 523 | goto error; \ 524 | } \ 525 | } while (0) 526 | 527 | 528 | #define PROXY_CONNECTION "proxy-connection" 529 | #define CONNECTION "connection" 530 | #define CONTENT_LENGTH "content-length" 531 | #define TRANSFER_ENCODING "transfer-encoding" 532 | #define UPGRADE "upgrade" 533 | #define CHUNKED "chunked" 534 | #define KEEP_ALIVE "keep-alive" 535 | #define CLOSE "close" 536 | 537 | 538 | static const char *method_strings[] = 539 | { 540 | #define XX(num, name, string) #string, 541 | HTTP_METHOD_MAP(XX) 542 | #undef XX 543 | }; 544 | 545 | 546 | /* Tokens as defined by rfc 2616. Also lowercases them. 547 | * token = 1* 548 | * separators = "(" | ")" | "<" | ">" | "@" 549 | * | "," | ";" | ":" | "\" | <"> 550 | * | "/" | "[" | "]" | "?" | "=" 551 | * | "{" | "}" | SP | HT 552 | */ 553 | static const char tokens[256] = { 554 | /* 0 nul 1 soh 2 stx 3 etx 4 eot 5 enq 6 ack 7 bel */ 555 | 0, 0, 0, 0, 0, 0, 0, 0, 556 | /* 8 bs 9 ht 10 nl 11 vt 12 np 13 cr 14 so 15 si */ 557 | 0, 0, 0, 0, 0, 0, 0, 0, 558 | /* 16 dle 17 dc1 18 dc2 19 dc3 20 dc4 21 nak 22 syn 23 etb */ 559 | 0, 0, 0, 0, 0, 0, 0, 0, 560 | /* 24 can 25 em 26 sub 27 esc 28 fs 29 gs 30 rs 31 us */ 561 | 0, 0, 0, 0, 0, 0, 0, 0, 562 | /* 32 sp 33 ! 34 " 35 # 36 $ 37 % 38 & 39 ' */ 563 | 0, '!', 0, '#', '$', '%', '&', '\'', 564 | /* 40 ( 41 ) 42 * 43 + 44 , 45 - 46 . 47 / */ 565 | 0, 0, '*', '+', 0, '-', '.', 0, 566 | /* 48 0 49 1 50 2 51 3 52 4 53 5 54 6 55 7 */ 567 | '0', '1', '2', '3', '4', '5', '6', '7', 568 | /* 56 8 57 9 58 : 59 ; 60 < 61 = 62 > 63 ? */ 569 | '8', '9', 0, 0, 0, 0, 0, 0, 570 | /* 64 @ 65 A 66 B 67 C 68 D 69 E 70 F 71 G */ 571 | 0, 'a', 'b', 'c', 'd', 'e', 'f', 'g', 572 | /* 72 H 73 I 74 J 75 K 76 L 77 M 78 N 79 O */ 573 | 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 574 | /* 80 P 81 Q 82 R 83 S 84 T 85 U 86 V 87 W */ 575 | 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 576 | /* 88 X 89 Y 90 Z 91 [ 92 \ 93 ] 94 ^ 95 _ */ 577 | 'x', 'y', 'z', 0, 0, 0, '^', '_', 578 | /* 96 ` 97 a 98 b 99 c 100 d 101 e 102 f 103 g */ 579 | '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 580 | /* 104 h 105 i 106 j 107 k 108 l 109 m 110 n 111 o */ 581 | 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 582 | /* 112 p 113 q 114 r 115 s 116 t 117 u 118 v 119 w */ 583 | 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 584 | /* 120 x 121 y 122 z 123 { 124 | 125 } 126 ~ 127 del */ 585 | 'x', 'y', 'z', 0, '|', 0, '~', 0 }; 586 | 587 | 588 | static const int8_t unhex[256] = 589 | {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1 590 | ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1 591 | ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1 592 | , 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1,-1,-1,-1,-1 593 | ,-1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1 594 | ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1 595 | ,-1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1 596 | ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1 597 | }; 598 | 599 | 600 | #if HTTP_PARSER_STRICT 601 | # define T(v) 0 602 | #else 603 | # define T(v) v 604 | #endif 605 | 606 | 607 | static const uint8_t normal_url_char[32] = { 608 | /* 0 nul 1 soh 2 stx 3 etx 4 eot 5 enq 6 ack 7 bel */ 609 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0, 610 | /* 8 bs 9 ht 10 nl 11 vt 12 np 13 cr 14 so 15 si */ 611 | 0 | T(2) | 0 | 0 | T(16) | 0 | 0 | 0, 612 | /* 16 dle 17 dc1 18 dc2 19 dc3 20 dc4 21 nak 22 syn 23 etb */ 613 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0, 614 | /* 24 can 25 em 26 sub 27 esc 28 fs 29 gs 30 rs 31 us */ 615 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0, 616 | /* 32 sp 33 ! 34 " 35 # 36 $ 37 % 38 & 39 ' */ 617 | 0 | 2 | 4 | 0 | 16 | 32 | 64 | 128, 618 | /* 40 ( 41 ) 42 * 43 + 44 , 45 - 46 . 47 / */ 619 | 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128, 620 | /* 48 0 49 1 50 2 51 3 52 4 53 5 54 6 55 7 */ 621 | 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128, 622 | /* 56 8 57 9 58 : 59 ; 60 < 61 = 62 > 63 ? */ 623 | 1 | 2 | 4 | 8 | 16 | 32 | 64 | 0, 624 | /* 64 @ 65 A 66 B 67 C 68 D 69 E 70 F 71 G */ 625 | 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128, 626 | /* 72 H 73 I 74 J 75 K 76 L 77 M 78 N 79 O */ 627 | 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128, 628 | /* 80 P 81 Q 82 R 83 S 84 T 85 U 86 V 87 W */ 629 | 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128, 630 | /* 88 X 89 Y 90 Z 91 [ 92 \ 93 ] 94 ^ 95 _ */ 631 | 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128, 632 | /* 96 ` 97 a 98 b 99 c 100 d 101 e 102 f 103 g */ 633 | 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128, 634 | /* 104 h 105 i 106 j 107 k 108 l 109 m 110 n 111 o */ 635 | 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128, 636 | /* 112 p 113 q 114 r 115 s 116 t 117 u 118 v 119 w */ 637 | 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128, 638 | /* 120 x 121 y 122 z 123 { 124 | 125 } 126 ~ 127 del */ 639 | 1 | 2 | 4 | 8 | 16 | 32 | 64 | 0, }; 640 | 641 | #undef T 642 | 643 | enum state 644 | { s_dead = 1 /* important that this is > 0 */ 645 | 646 | , s_start_req_or_res 647 | , s_res_or_resp_H 648 | , s_start_res 649 | , s_res_H 650 | , s_res_HT 651 | , s_res_HTT 652 | , s_res_HTTP 653 | , s_res_first_http_major 654 | , s_res_http_major 655 | , s_res_first_http_minor 656 | , s_res_http_minor 657 | , s_res_first_status_code 658 | , s_res_status_code 659 | , s_res_status_start 660 | , s_res_status 661 | , s_res_line_almost_done 662 | 663 | , s_start_req 664 | 665 | , s_req_method 666 | , s_req_spaces_before_url 667 | , s_req_schema 668 | , s_req_schema_slash 669 | , s_req_schema_slash_slash 670 | , s_req_server_start 671 | , s_req_server 672 | , s_req_server_with_at 673 | , s_req_path 674 | , s_req_query_string_start 675 | , s_req_query_string 676 | , s_req_fragment_start 677 | , s_req_fragment 678 | , s_req_http_start 679 | , s_req_http_H 680 | , s_req_http_HT 681 | , s_req_http_HTT 682 | , s_req_http_HTTP 683 | , s_req_first_http_major 684 | , s_req_http_major 685 | , s_req_first_http_minor 686 | , s_req_http_minor 687 | , s_req_line_almost_done 688 | 689 | , s_header_field_start 690 | , s_header_field 691 | , s_header_value_discard_ws 692 | , s_header_value_discard_ws_almost_done 693 | , s_header_value_discard_lws 694 | , s_header_value_start 695 | , s_header_value 696 | , s_header_value_lws 697 | 698 | , s_header_almost_done 699 | 700 | , s_chunk_size_start 701 | , s_chunk_size 702 | , s_chunk_parameters 703 | , s_chunk_size_almost_done 704 | 705 | , s_headers_almost_done 706 | , s_headers_done 707 | 708 | /* Important: 's_headers_done' must be the last 'header' state. All 709 | * states beyond this must be 'body' states. It is used for overflow 710 | * checking. See the PARSING_HEADER() macro. 711 | */ 712 | 713 | , s_chunk_data 714 | , s_chunk_data_almost_done 715 | , s_chunk_data_done 716 | 717 | , s_body_identity 718 | , s_body_identity_eof 719 | 720 | , s_message_done 721 | }; 722 | 723 | 724 | #define PARSING_HEADER(state) (state <= s_headers_done) 725 | 726 | 727 | enum header_states 728 | { h_general = 0 729 | , h_C 730 | , h_CO 731 | , h_CON 732 | 733 | , h_matching_connection 734 | , h_matching_proxy_connection 735 | , h_matching_content_length 736 | , h_matching_transfer_encoding 737 | , h_matching_upgrade 738 | 739 | , h_connection 740 | , h_content_length 741 | , h_transfer_encoding 742 | , h_upgrade 743 | 744 | , h_matching_transfer_encoding_chunked 745 | , h_matching_connection_token_start 746 | , h_matching_connection_keep_alive 747 | , h_matching_connection_close 748 | , h_matching_connection_upgrade 749 | , h_matching_connection_token 750 | 751 | , h_transfer_encoding_chunked 752 | , h_connection_keep_alive 753 | , h_connection_close 754 | , h_connection_upgrade 755 | }; 756 | 757 | enum http_host_state 758 | { 759 | s_http_host_dead = 1 760 | , s_http_userinfo_start 761 | , s_http_userinfo 762 | , s_http_host_start 763 | , s_http_host_v6_start 764 | , s_http_host 765 | , s_http_host_v6 766 | , s_http_host_v6_end 767 | , s_http_host_v6_zone_start 768 | , s_http_host_v6_zone 769 | , s_http_host_port_start 770 | , s_http_host_port 771 | }; 772 | 773 | /* Macros for character classes; depends on strict-mode */ 774 | #define CR '\r' 775 | #define LF '\n' 776 | #define LOWER(c) (unsigned char)(c | 0x20) 777 | #define IS_ALPHA(c) (LOWER(c) >= 'a' && LOWER(c) <= 'z') 778 | #define IS_NUM(c) ((c) >= '0' && (c) <= '9') 779 | #define IS_ALPHANUM(c) (IS_ALPHA(c) || IS_NUM(c)) 780 | #define IS_HEX(c) (IS_NUM(c) || (LOWER(c) >= 'a' && LOWER(c) <= 'f')) 781 | #define IS_MARK(c) ((c) == '-' || (c) == '_' || (c) == '.' || \ 782 | (c) == '!' || (c) == '~' || (c) == '*' || (c) == '\'' || (c) == '(' || \ 783 | (c) == ')') 784 | #define IS_USERINFO_CHAR(c) (IS_ALPHANUM(c) || IS_MARK(c) || (c) == '%' || \ 785 | (c) == ';' || (c) == ':' || (c) == '&' || (c) == '=' || (c) == '+' || \ 786 | (c) == '$' || (c) == ',') 787 | 788 | #define STRICT_TOKEN(c) (tokens[(unsigned char)c]) 789 | 790 | #if HTTP_PARSER_STRICT 791 | #define TOKEN(c) (tokens[(unsigned char)c]) 792 | #define IS_URL_CHAR(c) (BIT_AT(normal_url_char, (unsigned char)c)) 793 | #define IS_HOST_CHAR(c) (IS_ALPHANUM(c) || (c) == '.' || (c) == '-') 794 | #else 795 | #define TOKEN(c) ((c == ' ') ? ' ' : tokens[(unsigned char)c]) 796 | #define IS_URL_CHAR(c) \ 797 | (BIT_AT(normal_url_char, (unsigned char)c) || ((c) & 0x80)) 798 | #define IS_HOST_CHAR(c) \ 799 | (IS_ALPHANUM(c) || (c) == '.' || (c) == '-' || (c) == '_') 800 | #endif 801 | 802 | /** 803 | * Verify that a char is a valid visible (printable) US-ASCII 804 | * character or %x80-FF 805 | **/ 806 | #define IS_HEADER_CHAR(ch) \ 807 | (ch == CR || ch == LF || ch == 9 || ((unsigned char)ch > 31 && ch != 127)) 808 | 809 | #define start_state (parser->type == HTTP_REQUEST ? s_start_req : s_start_res) 810 | 811 | 812 | #if HTTP_PARSER_STRICT 813 | # define STRICT_CHECK(cond) \ 814 | do { \ 815 | if (cond) { \ 816 | SET_ERRNO(HPE_STRICT); \ 817 | goto error; \ 818 | } \ 819 | } while (0) 820 | # define NEW_MESSAGE() (http_should_keep_alive(parser) ? start_state : s_dead) 821 | #else 822 | # define STRICT_CHECK(cond) 823 | # define NEW_MESSAGE() start_state 824 | #endif 825 | 826 | 827 | /* Map errno values to strings for human-readable output */ 828 | #define HTTP_STRERROR_GEN(n, s) { "HPE_" #n, s }, 829 | static struct { 830 | const char *name; 831 | const char *description; 832 | } http_strerror_tab[] = { 833 | HTTP_ERRNO_MAP(HTTP_STRERROR_GEN) 834 | }; 835 | #undef HTTP_STRERROR_GEN 836 | 837 | int http_message_needs_eof(const http_parser *parser); 838 | 839 | /* Our URL parser. 840 | * 841 | * This is designed to be shared by http_parser_execute() for URL validation, 842 | * hence it has a state transition + byte-for-byte interface. In addition, it 843 | * is meant to be embedded in http_parser_parse_url(), which does the dirty 844 | * work of turning state transitions URL components for its API. 845 | * 846 | * This function should only be invoked with non-space characters. It is 847 | * assumed that the caller cares about (and can detect) the transition between 848 | * URL and non-URL states by looking for these. 849 | */ 850 | inline static enum state 851 | parse_url_char(enum state s, const char ch) 852 | { 853 | if (ch == ' ' || ch == '\r' || ch == '\n') { 854 | return s_dead; 855 | } 856 | 857 | #if HTTP_PARSER_STRICT 858 | if (ch == '\t' || ch == '\f') { 859 | return s_dead; 860 | } 861 | #endif 862 | 863 | switch (s) { 864 | case s_req_spaces_before_url: 865 | /* Proxied requests are followed by scheme of an absolute URI (alpha). 866 | * All methods except CONNECT are followed by '/' or '*'. 867 | */ 868 | 869 | if (ch == '/' || ch == '*') { 870 | return s_req_path; 871 | } 872 | 873 | if (IS_ALPHA(ch)) { 874 | return s_req_schema; 875 | } 876 | 877 | break; 878 | 879 | case s_req_schema: 880 | if (IS_ALPHA(ch)) { 881 | return s; 882 | } 883 | 884 | if (ch == ':') { 885 | return s_req_schema_slash; 886 | } 887 | 888 | break; 889 | 890 | case s_req_schema_slash: 891 | if (ch == '/') { 892 | return s_req_schema_slash_slash; 893 | } 894 | 895 | break; 896 | 897 | case s_req_schema_slash_slash: 898 | if (ch == '/') { 899 | return s_req_server_start; 900 | } 901 | 902 | break; 903 | 904 | case s_req_server_with_at: 905 | if (ch == '@') { 906 | return s_dead; 907 | } 908 | 909 | /* FALLTHROUGH */ 910 | case s_req_server_start: 911 | case s_req_server: 912 | if (ch == '/') { 913 | return s_req_path; 914 | } 915 | 916 | if (ch == '?') { 917 | return s_req_query_string_start; 918 | } 919 | 920 | if (ch == '@') { 921 | return s_req_server_with_at; 922 | } 923 | 924 | if (IS_USERINFO_CHAR(ch) || ch == '[' || ch == ']') { 925 | return s_req_server; 926 | } 927 | 928 | break; 929 | 930 | case s_req_path: 931 | if (IS_URL_CHAR(ch)) { 932 | return s; 933 | } 934 | 935 | switch (ch) { 936 | case '?': 937 | return s_req_query_string_start; 938 | 939 | case '#': 940 | return s_req_fragment_start; 941 | } 942 | 943 | break; 944 | 945 | case s_req_query_string_start: 946 | case s_req_query_string: 947 | if (IS_URL_CHAR(ch)) { 948 | return s_req_query_string; 949 | } 950 | 951 | switch (ch) { 952 | case '?': 953 | /* allow extra '?' in query string */ 954 | return s_req_query_string; 955 | 956 | case '#': 957 | return s_req_fragment_start; 958 | } 959 | 960 | break; 961 | 962 | case s_req_fragment_start: 963 | if (IS_URL_CHAR(ch)) { 964 | return s_req_fragment; 965 | } 966 | 967 | switch (ch) { 968 | case '?': 969 | return s_req_fragment; 970 | 971 | case '#': 972 | return s; 973 | } 974 | 975 | break; 976 | 977 | case s_req_fragment: 978 | if (IS_URL_CHAR(ch)) { 979 | return s; 980 | } 981 | 982 | switch (ch) { 983 | case '?': 984 | case '#': 985 | return s; 986 | } 987 | 988 | break; 989 | 990 | default: 991 | break; 992 | } 993 | 994 | /* We should never fall out of the switch above unless there's an error */ 995 | return s_dead; 996 | } 997 | 998 | inline size_t http_parser_execute (http_parser *parser, 999 | const http_parser_settings *settings, 1000 | const char *data, 1001 | size_t len) 1002 | { 1003 | char c, ch; 1004 | int8_t unhex_val; 1005 | const char *p = data; 1006 | const char *header_field_mark = 0; 1007 | const char *header_value_mark = 0; 1008 | const char *url_mark = 0; 1009 | const char *body_mark = 0; 1010 | const char *status_mark = 0; 1011 | enum state p_state = (enum state) parser->state; 1012 | const unsigned int lenient = parser->lenient_http_headers; 1013 | 1014 | /* We're in an error state. Don't bother doing anything. */ 1015 | if (HTTP_PARSER_ERRNO(parser) != HPE_OK) { 1016 | return 0; 1017 | } 1018 | 1019 | if (len == 0) { 1020 | switch (CURRENT_STATE()) { 1021 | case s_body_identity_eof: 1022 | /* Use of CALLBACK_NOTIFY() here would erroneously return 1 byte read if 1023 | * we got paused. 1024 | */ 1025 | CALLBACK_NOTIFY_NOADVANCE(message_complete); 1026 | return 0; 1027 | 1028 | case s_dead: 1029 | case s_start_req_or_res: 1030 | case s_start_res: 1031 | case s_start_req: 1032 | return 0; 1033 | 1034 | default: 1035 | SET_ERRNO(HPE_INVALID_EOF_STATE); 1036 | return 1; 1037 | } 1038 | } 1039 | 1040 | 1041 | if (CURRENT_STATE() == s_header_field) 1042 | header_field_mark = data; 1043 | if (CURRENT_STATE() == s_header_value) 1044 | header_value_mark = data; 1045 | switch (CURRENT_STATE()) { 1046 | case s_req_path: 1047 | case s_req_schema: 1048 | case s_req_schema_slash: 1049 | case s_req_schema_slash_slash: 1050 | case s_req_server_start: 1051 | case s_req_server: 1052 | case s_req_server_with_at: 1053 | case s_req_query_string_start: 1054 | case s_req_query_string: 1055 | case s_req_fragment_start: 1056 | case s_req_fragment: 1057 | url_mark = data; 1058 | break; 1059 | case s_res_status: 1060 | status_mark = data; 1061 | break; 1062 | default: 1063 | break; 1064 | } 1065 | 1066 | for (p=data; p != data + len; p++) { 1067 | ch = *p; 1068 | 1069 | if (PARSING_HEADER(CURRENT_STATE())) 1070 | COUNT_HEADER_SIZE(1); 1071 | 1072 | reexecute: 1073 | switch (CURRENT_STATE()) { 1074 | 1075 | case s_dead: 1076 | /* this state is used after a 'Connection: close' message 1077 | * the parser will error out if it reads another message 1078 | */ 1079 | if (LIKELY(ch == CR || ch == LF)) 1080 | break; 1081 | 1082 | SET_ERRNO(HPE_CLOSED_CONNECTION); 1083 | goto error; 1084 | 1085 | case s_start_req_or_res: 1086 | { 1087 | if (ch == CR || ch == LF) 1088 | break; 1089 | parser->flags = 0; 1090 | parser->content_length = ULLONG_MAX; 1091 | 1092 | if (ch == 'H') { 1093 | UPDATE_STATE(s_res_or_resp_H); 1094 | 1095 | CALLBACK_NOTIFY(message_begin); 1096 | } else { 1097 | parser->type = HTTP_REQUEST; 1098 | UPDATE_STATE(s_start_req); 1099 | REEXECUTE(); 1100 | } 1101 | 1102 | break; 1103 | } 1104 | 1105 | case s_res_or_resp_H: 1106 | if (ch == 'T') { 1107 | parser->type = HTTP_RESPONSE; 1108 | UPDATE_STATE(s_res_HT); 1109 | } else { 1110 | if (UNLIKELY(ch != 'E')) { 1111 | SET_ERRNO(HPE_INVALID_CONSTANT); 1112 | goto error; 1113 | } 1114 | 1115 | parser->type = HTTP_REQUEST; 1116 | parser->method = HTTP_HEAD; 1117 | parser->index = 2; 1118 | UPDATE_STATE(s_req_method); 1119 | } 1120 | break; 1121 | 1122 | case s_start_res: 1123 | { 1124 | parser->flags = 0; 1125 | parser->content_length = ULLONG_MAX; 1126 | 1127 | switch (ch) { 1128 | case 'H': 1129 | UPDATE_STATE(s_res_H); 1130 | break; 1131 | 1132 | case CR: 1133 | case LF: 1134 | break; 1135 | 1136 | default: 1137 | SET_ERRNO(HPE_INVALID_CONSTANT); 1138 | goto error; 1139 | } 1140 | 1141 | CALLBACK_NOTIFY(message_begin); 1142 | break; 1143 | } 1144 | 1145 | case s_res_H: 1146 | STRICT_CHECK(ch != 'T'); 1147 | UPDATE_STATE(s_res_HT); 1148 | break; 1149 | 1150 | case s_res_HT: 1151 | STRICT_CHECK(ch != 'T'); 1152 | UPDATE_STATE(s_res_HTT); 1153 | break; 1154 | 1155 | case s_res_HTT: 1156 | STRICT_CHECK(ch != 'P'); 1157 | UPDATE_STATE(s_res_HTTP); 1158 | break; 1159 | 1160 | case s_res_HTTP: 1161 | STRICT_CHECK(ch != '/'); 1162 | UPDATE_STATE(s_res_first_http_major); 1163 | break; 1164 | 1165 | case s_res_first_http_major: 1166 | if (UNLIKELY(ch < '0' || ch > '9')) { 1167 | SET_ERRNO(HPE_INVALID_VERSION); 1168 | goto error; 1169 | } 1170 | 1171 | parser->http_major = ch - '0'; 1172 | UPDATE_STATE(s_res_http_major); 1173 | break; 1174 | 1175 | /* major HTTP version or dot */ 1176 | case s_res_http_major: 1177 | { 1178 | if (ch == '.') { 1179 | UPDATE_STATE(s_res_first_http_minor); 1180 | break; 1181 | } 1182 | 1183 | if (!IS_NUM(ch)) { 1184 | SET_ERRNO(HPE_INVALID_VERSION); 1185 | goto error; 1186 | } 1187 | 1188 | parser->http_major *= 10; 1189 | parser->http_major += ch - '0'; 1190 | 1191 | if (UNLIKELY(parser->http_major > 999)) { 1192 | SET_ERRNO(HPE_INVALID_VERSION); 1193 | goto error; 1194 | } 1195 | 1196 | break; 1197 | } 1198 | 1199 | /* first digit of minor HTTP version */ 1200 | case s_res_first_http_minor: 1201 | if (UNLIKELY(!IS_NUM(ch))) { 1202 | SET_ERRNO(HPE_INVALID_VERSION); 1203 | goto error; 1204 | } 1205 | 1206 | parser->http_minor = ch - '0'; 1207 | UPDATE_STATE(s_res_http_minor); 1208 | break; 1209 | 1210 | /* minor HTTP version or end of request line */ 1211 | case s_res_http_minor: 1212 | { 1213 | if (ch == ' ') { 1214 | UPDATE_STATE(s_res_first_status_code); 1215 | break; 1216 | } 1217 | 1218 | if (UNLIKELY(!IS_NUM(ch))) { 1219 | SET_ERRNO(HPE_INVALID_VERSION); 1220 | goto error; 1221 | } 1222 | 1223 | parser->http_minor *= 10; 1224 | parser->http_minor += ch - '0'; 1225 | 1226 | if (UNLIKELY(parser->http_minor > 999)) { 1227 | SET_ERRNO(HPE_INVALID_VERSION); 1228 | goto error; 1229 | } 1230 | 1231 | break; 1232 | } 1233 | 1234 | case s_res_first_status_code: 1235 | { 1236 | if (!IS_NUM(ch)) { 1237 | if (ch == ' ') { 1238 | break; 1239 | } 1240 | 1241 | SET_ERRNO(HPE_INVALID_STATUS); 1242 | goto error; 1243 | } 1244 | parser->status_code = ch - '0'; 1245 | UPDATE_STATE(s_res_status_code); 1246 | break; 1247 | } 1248 | 1249 | case s_res_status_code: 1250 | { 1251 | if (!IS_NUM(ch)) { 1252 | switch (ch) { 1253 | case ' ': 1254 | UPDATE_STATE(s_res_status_start); 1255 | break; 1256 | case CR: 1257 | UPDATE_STATE(s_res_line_almost_done); 1258 | break; 1259 | case LF: 1260 | UPDATE_STATE(s_header_field_start); 1261 | break; 1262 | default: 1263 | SET_ERRNO(HPE_INVALID_STATUS); 1264 | goto error; 1265 | } 1266 | break; 1267 | } 1268 | 1269 | parser->status_code *= 10; 1270 | parser->status_code += ch - '0'; 1271 | 1272 | if (UNLIKELY(parser->status_code > 999)) { 1273 | SET_ERRNO(HPE_INVALID_STATUS); 1274 | goto error; 1275 | } 1276 | 1277 | break; 1278 | } 1279 | 1280 | case s_res_status_start: 1281 | { 1282 | if (ch == CR) { 1283 | UPDATE_STATE(s_res_line_almost_done); 1284 | break; 1285 | } 1286 | 1287 | if (ch == LF) { 1288 | UPDATE_STATE(s_header_field_start); 1289 | break; 1290 | } 1291 | 1292 | MARK(status); 1293 | UPDATE_STATE(s_res_status); 1294 | parser->index = 0; 1295 | break; 1296 | } 1297 | 1298 | case s_res_status: 1299 | if (ch == CR) { 1300 | UPDATE_STATE(s_res_line_almost_done); 1301 | CALLBACK_DATA(status); 1302 | break; 1303 | } 1304 | 1305 | if (ch == LF) { 1306 | UPDATE_STATE(s_header_field_start); 1307 | CALLBACK_DATA(status); 1308 | break; 1309 | } 1310 | 1311 | break; 1312 | 1313 | case s_res_line_almost_done: 1314 | STRICT_CHECK(ch != LF); 1315 | UPDATE_STATE(s_header_field_start); 1316 | break; 1317 | 1318 | case s_start_req: 1319 | { 1320 | if (ch == CR || ch == LF) 1321 | break; 1322 | parser->flags = 0; 1323 | parser->content_length = ULLONG_MAX; 1324 | 1325 | if (UNLIKELY(!IS_ALPHA(ch))) { 1326 | SET_ERRNO(HPE_INVALID_METHOD); 1327 | goto error; 1328 | } 1329 | 1330 | parser->method = (enum http_method) 0; 1331 | parser->index = 1; 1332 | switch (ch) { 1333 | case 'A': parser->method = HTTP_ACL; break; 1334 | case 'B': parser->method = HTTP_BIND; break; 1335 | case 'C': parser->method = HTTP_CONNECT; /* or COPY, CHECKOUT */ break; 1336 | case 'D': parser->method = HTTP_DELETE; break; 1337 | case 'G': parser->method = HTTP_GET; break; 1338 | case 'H': parser->method = HTTP_HEAD; break; 1339 | case 'L': parser->method = HTTP_LOCK; /* or LINK */ break; 1340 | case 'M': parser->method = HTTP_MKCOL; /* or MOVE, MKACTIVITY, MERGE, M-SEARCH, MKCALENDAR */ break; 1341 | case 'N': parser->method = HTTP_NOTIFY; break; 1342 | case 'O': parser->method = HTTP_OPTIONS; break; 1343 | case 'P': parser->method = HTTP_POST; 1344 | /* or PROPFIND|PROPPATCH|PUT|PATCH|PURGE */ 1345 | break; 1346 | case 'R': parser->method = HTTP_REPORT; /* or REBIND */ break; 1347 | case 'S': parser->method = HTTP_SUBSCRIBE; /* or SEARCH */ break; 1348 | case 'T': parser->method = HTTP_TRACE; break; 1349 | case 'U': parser->method = HTTP_UNLOCK; /* or UNSUBSCRIBE, UNBIND, UNLINK */ break; 1350 | default: 1351 | SET_ERRNO(HPE_INVALID_METHOD); 1352 | goto error; 1353 | } 1354 | UPDATE_STATE(s_req_method); 1355 | 1356 | CALLBACK_NOTIFY(message_begin); 1357 | 1358 | break; 1359 | } 1360 | 1361 | case s_req_method: 1362 | { 1363 | const char *matcher; 1364 | if (UNLIKELY(ch == '\0')) { 1365 | SET_ERRNO(HPE_INVALID_METHOD); 1366 | goto error; 1367 | } 1368 | 1369 | matcher = method_strings[parser->method]; 1370 | if (ch == ' ' && matcher[parser->index] == '\0') { 1371 | UPDATE_STATE(s_req_spaces_before_url); 1372 | } else if (ch == matcher[parser->index]) { 1373 | ; /* nada */ 1374 | } else if (IS_ALPHA(ch)) { 1375 | 1376 | switch (parser->method << 16 | parser->index << 8 | ch) { 1377 | #define XX(meth, pos, ch, new_meth) \ 1378 | case (HTTP_##meth << 16 | pos << 8 | ch): \ 1379 | parser->method = HTTP_##new_meth; break; 1380 | 1381 | XX(POST, 1, 'U', PUT) 1382 | XX(POST, 1, 'A', PATCH) 1383 | XX(CONNECT, 1, 'H', CHECKOUT) 1384 | XX(CONNECT, 2, 'P', COPY) 1385 | XX(MKCOL, 1, 'O', MOVE) 1386 | XX(MKCOL, 1, 'E', MERGE) 1387 | XX(MKCOL, 2, 'A', MKACTIVITY) 1388 | XX(MKCOL, 3, 'A', MKCALENDAR) 1389 | XX(SUBSCRIBE, 1, 'E', SEARCH) 1390 | XX(REPORT, 2, 'B', REBIND) 1391 | XX(POST, 1, 'R', PROPFIND) 1392 | XX(PROPFIND, 4, 'P', PROPPATCH) 1393 | XX(PUT, 2, 'R', PURGE) 1394 | XX(LOCK, 1, 'I', LINK) 1395 | XX(UNLOCK, 2, 'S', UNSUBSCRIBE) 1396 | XX(UNLOCK, 2, 'B', UNBIND) 1397 | XX(UNLOCK, 3, 'I', UNLINK) 1398 | #undef XX 1399 | 1400 | default: 1401 | SET_ERRNO(HPE_INVALID_METHOD); 1402 | goto error; 1403 | } 1404 | } else if (ch == '-' && 1405 | parser->index == 1 && 1406 | parser->method == HTTP_MKCOL) { 1407 | parser->method = HTTP_MSEARCH; 1408 | } else { 1409 | SET_ERRNO(HPE_INVALID_METHOD); 1410 | goto error; 1411 | } 1412 | 1413 | ++parser->index; 1414 | break; 1415 | } 1416 | 1417 | case s_req_spaces_before_url: 1418 | { 1419 | if (ch == ' ') break; 1420 | 1421 | MARK(url); 1422 | if (parser->method == HTTP_CONNECT) { 1423 | UPDATE_STATE(s_req_server_start); 1424 | } 1425 | 1426 | UPDATE_STATE(parse_url_char(CURRENT_STATE(), ch)); 1427 | if (UNLIKELY(CURRENT_STATE() == s_dead)) { 1428 | SET_ERRNO(HPE_INVALID_URL); 1429 | goto error; 1430 | } 1431 | 1432 | break; 1433 | } 1434 | 1435 | case s_req_schema: 1436 | case s_req_schema_slash: 1437 | case s_req_schema_slash_slash: 1438 | case s_req_server_start: 1439 | { 1440 | switch (ch) { 1441 | /* No whitespace allowed here */ 1442 | case ' ': 1443 | case CR: 1444 | case LF: 1445 | SET_ERRNO(HPE_INVALID_URL); 1446 | goto error; 1447 | default: 1448 | UPDATE_STATE(parse_url_char(CURRENT_STATE(), ch)); 1449 | if (UNLIKELY(CURRENT_STATE() == s_dead)) { 1450 | SET_ERRNO(HPE_INVALID_URL); 1451 | goto error; 1452 | } 1453 | } 1454 | 1455 | break; 1456 | } 1457 | 1458 | case s_req_server: 1459 | case s_req_server_with_at: 1460 | case s_req_path: 1461 | case s_req_query_string_start: 1462 | case s_req_query_string: 1463 | case s_req_fragment_start: 1464 | case s_req_fragment: 1465 | { 1466 | switch (ch) { 1467 | case ' ': 1468 | UPDATE_STATE(s_req_http_start); 1469 | CALLBACK_DATA(url); 1470 | break; 1471 | case CR: 1472 | case LF: 1473 | parser->http_major = 0; 1474 | parser->http_minor = 9; 1475 | UPDATE_STATE((ch == CR) ? 1476 | s_req_line_almost_done : 1477 | s_header_field_start); 1478 | CALLBACK_DATA(url); 1479 | break; 1480 | default: 1481 | UPDATE_STATE(parse_url_char(CURRENT_STATE(), ch)); 1482 | if (UNLIKELY(CURRENT_STATE() == s_dead)) { 1483 | SET_ERRNO(HPE_INVALID_URL); 1484 | goto error; 1485 | } 1486 | } 1487 | break; 1488 | } 1489 | 1490 | case s_req_http_start: 1491 | switch (ch) { 1492 | case 'H': 1493 | UPDATE_STATE(s_req_http_H); 1494 | break; 1495 | case ' ': 1496 | break; 1497 | default: 1498 | SET_ERRNO(HPE_INVALID_CONSTANT); 1499 | goto error; 1500 | } 1501 | break; 1502 | 1503 | case s_req_http_H: 1504 | STRICT_CHECK(ch != 'T'); 1505 | UPDATE_STATE(s_req_http_HT); 1506 | break; 1507 | 1508 | case s_req_http_HT: 1509 | STRICT_CHECK(ch != 'T'); 1510 | UPDATE_STATE(s_req_http_HTT); 1511 | break; 1512 | 1513 | case s_req_http_HTT: 1514 | STRICT_CHECK(ch != 'P'); 1515 | UPDATE_STATE(s_req_http_HTTP); 1516 | break; 1517 | 1518 | case s_req_http_HTTP: 1519 | STRICT_CHECK(ch != '/'); 1520 | UPDATE_STATE(s_req_first_http_major); 1521 | break; 1522 | 1523 | /* first digit of major HTTP version */ 1524 | case s_req_first_http_major: 1525 | if (UNLIKELY(ch < '1' || ch > '9')) { 1526 | SET_ERRNO(HPE_INVALID_VERSION); 1527 | goto error; 1528 | } 1529 | 1530 | parser->http_major = ch - '0'; 1531 | UPDATE_STATE(s_req_http_major); 1532 | break; 1533 | 1534 | /* major HTTP version or dot */ 1535 | case s_req_http_major: 1536 | { 1537 | if (ch == '.') { 1538 | UPDATE_STATE(s_req_first_http_minor); 1539 | break; 1540 | } 1541 | 1542 | if (UNLIKELY(!IS_NUM(ch))) { 1543 | SET_ERRNO(HPE_INVALID_VERSION); 1544 | goto error; 1545 | } 1546 | 1547 | parser->http_major *= 10; 1548 | parser->http_major += ch - '0'; 1549 | 1550 | if (UNLIKELY(parser->http_major > 999)) { 1551 | SET_ERRNO(HPE_INVALID_VERSION); 1552 | goto error; 1553 | } 1554 | 1555 | break; 1556 | } 1557 | 1558 | /* first digit of minor HTTP version */ 1559 | case s_req_first_http_minor: 1560 | if (UNLIKELY(!IS_NUM(ch))) { 1561 | SET_ERRNO(HPE_INVALID_VERSION); 1562 | goto error; 1563 | } 1564 | 1565 | parser->http_minor = ch - '0'; 1566 | UPDATE_STATE(s_req_http_minor); 1567 | break; 1568 | 1569 | /* minor HTTP version or end of request line */ 1570 | case s_req_http_minor: 1571 | { 1572 | if (ch == CR) { 1573 | UPDATE_STATE(s_req_line_almost_done); 1574 | break; 1575 | } 1576 | 1577 | if (ch == LF) { 1578 | UPDATE_STATE(s_header_field_start); 1579 | break; 1580 | } 1581 | 1582 | /* XXX allow spaces after digit? */ 1583 | 1584 | if (UNLIKELY(!IS_NUM(ch))) { 1585 | SET_ERRNO(HPE_INVALID_VERSION); 1586 | goto error; 1587 | } 1588 | 1589 | parser->http_minor *= 10; 1590 | parser->http_minor += ch - '0'; 1591 | 1592 | if (UNLIKELY(parser->http_minor > 999)) { 1593 | SET_ERRNO(HPE_INVALID_VERSION); 1594 | goto error; 1595 | } 1596 | 1597 | break; 1598 | } 1599 | 1600 | /* end of request line */ 1601 | case s_req_line_almost_done: 1602 | { 1603 | if (UNLIKELY(ch != LF)) { 1604 | SET_ERRNO(HPE_LF_EXPECTED); 1605 | goto error; 1606 | } 1607 | 1608 | UPDATE_STATE(s_header_field_start); 1609 | break; 1610 | } 1611 | 1612 | case s_header_field_start: 1613 | { 1614 | if (ch == CR) { 1615 | UPDATE_STATE(s_headers_almost_done); 1616 | break; 1617 | } 1618 | 1619 | if (ch == LF) { 1620 | /* they might be just sending \n instead of \r\n so this would be 1621 | * the second \n to denote the end of headers*/ 1622 | UPDATE_STATE(s_headers_almost_done); 1623 | REEXECUTE(); 1624 | } 1625 | 1626 | c = TOKEN(ch); 1627 | 1628 | if (UNLIKELY(!c)) { 1629 | SET_ERRNO(HPE_INVALID_HEADER_TOKEN); 1630 | goto error; 1631 | } 1632 | 1633 | MARK(header_field); 1634 | 1635 | parser->index = 0; 1636 | UPDATE_STATE(s_header_field); 1637 | 1638 | switch (c) { 1639 | case 'c': 1640 | parser->header_state = h_C; 1641 | break; 1642 | 1643 | case 'p': 1644 | parser->header_state = h_matching_proxy_connection; 1645 | break; 1646 | 1647 | case 't': 1648 | parser->header_state = h_matching_transfer_encoding; 1649 | break; 1650 | 1651 | case 'u': 1652 | parser->header_state = h_matching_upgrade; 1653 | break; 1654 | 1655 | default: 1656 | parser->header_state = h_general; 1657 | break; 1658 | } 1659 | break; 1660 | } 1661 | 1662 | case s_header_field: 1663 | { 1664 | const char* start = p; 1665 | for (; p != data + len; p++) { 1666 | ch = *p; 1667 | c = TOKEN(ch); 1668 | 1669 | if (!c) 1670 | break; 1671 | 1672 | switch (parser->header_state) { 1673 | case h_general: 1674 | break; 1675 | 1676 | case h_C: 1677 | parser->index++; 1678 | parser->header_state = (c == 'o' ? h_CO : h_general); 1679 | break; 1680 | 1681 | case h_CO: 1682 | parser->index++; 1683 | parser->header_state = (c == 'n' ? h_CON : h_general); 1684 | break; 1685 | 1686 | case h_CON: 1687 | parser->index++; 1688 | switch (c) { 1689 | case 'n': 1690 | parser->header_state = h_matching_connection; 1691 | break; 1692 | case 't': 1693 | parser->header_state = h_matching_content_length; 1694 | break; 1695 | default: 1696 | parser->header_state = h_general; 1697 | break; 1698 | } 1699 | break; 1700 | 1701 | /* connection */ 1702 | 1703 | case h_matching_connection: 1704 | parser->index++; 1705 | if (parser->index > sizeof(CONNECTION)-1 1706 | || c != CONNECTION[parser->index]) { 1707 | parser->header_state = h_general; 1708 | } else if (parser->index == sizeof(CONNECTION)-2) { 1709 | parser->header_state = h_connection; 1710 | } 1711 | break; 1712 | 1713 | /* proxy-connection */ 1714 | 1715 | case h_matching_proxy_connection: 1716 | parser->index++; 1717 | if (parser->index > sizeof(PROXY_CONNECTION)-1 1718 | || c != PROXY_CONNECTION[parser->index]) { 1719 | parser->header_state = h_general; 1720 | } else if (parser->index == sizeof(PROXY_CONNECTION)-2) { 1721 | parser->header_state = h_connection; 1722 | } 1723 | break; 1724 | 1725 | /* content-length */ 1726 | 1727 | case h_matching_content_length: 1728 | parser->index++; 1729 | if (parser->index > sizeof(CONTENT_LENGTH)-1 1730 | || c != CONTENT_LENGTH[parser->index]) { 1731 | parser->header_state = h_general; 1732 | } else if (parser->index == sizeof(CONTENT_LENGTH)-2) { 1733 | parser->header_state = h_content_length; 1734 | } 1735 | break; 1736 | 1737 | /* transfer-encoding */ 1738 | 1739 | case h_matching_transfer_encoding: 1740 | parser->index++; 1741 | if (parser->index > sizeof(TRANSFER_ENCODING)-1 1742 | || c != TRANSFER_ENCODING[parser->index]) { 1743 | parser->header_state = h_general; 1744 | } else if (parser->index == sizeof(TRANSFER_ENCODING)-2) { 1745 | parser->header_state = h_transfer_encoding; 1746 | } 1747 | break; 1748 | 1749 | /* upgrade */ 1750 | 1751 | case h_matching_upgrade: 1752 | parser->index++; 1753 | if (parser->index > sizeof(UPGRADE)-1 1754 | || c != UPGRADE[parser->index]) { 1755 | parser->header_state = h_general; 1756 | } else if (parser->index == sizeof(UPGRADE)-2) { 1757 | parser->header_state = h_upgrade; 1758 | } 1759 | break; 1760 | 1761 | case h_connection: 1762 | case h_content_length: 1763 | case h_transfer_encoding: 1764 | case h_upgrade: 1765 | if (ch != ' ') parser->header_state = h_general; 1766 | break; 1767 | 1768 | default: 1769 | assert(0 && "Unknown header_state"); 1770 | break; 1771 | } 1772 | } 1773 | 1774 | COUNT_HEADER_SIZE(p - start); 1775 | 1776 | if (p == data + len) { 1777 | --p; 1778 | break; 1779 | } 1780 | 1781 | if (ch == ':') { 1782 | UPDATE_STATE(s_header_value_discard_ws); 1783 | CALLBACK_DATA(header_field); 1784 | break; 1785 | } 1786 | 1787 | SET_ERRNO(HPE_INVALID_HEADER_TOKEN); 1788 | goto error; 1789 | } 1790 | 1791 | case s_header_value_discard_ws: 1792 | if (ch == ' ' || ch == '\t') break; 1793 | 1794 | if (ch == CR) { 1795 | UPDATE_STATE(s_header_value_discard_ws_almost_done); 1796 | break; 1797 | } 1798 | 1799 | if (ch == LF) { 1800 | UPDATE_STATE(s_header_value_discard_lws); 1801 | break; 1802 | } 1803 | 1804 | /* FALLTHROUGH */ 1805 | 1806 | case s_header_value_start: 1807 | { 1808 | MARK(header_value); 1809 | 1810 | UPDATE_STATE(s_header_value); 1811 | parser->index = 0; 1812 | 1813 | c = LOWER(ch); 1814 | 1815 | switch (parser->header_state) { 1816 | case h_upgrade: 1817 | parser->flags |= F_UPGRADE; 1818 | parser->header_state = h_general; 1819 | break; 1820 | 1821 | case h_transfer_encoding: 1822 | /* looking for 'Transfer-Encoding: chunked' */ 1823 | if ('c' == c) { 1824 | parser->header_state = h_matching_transfer_encoding_chunked; 1825 | } else { 1826 | parser->header_state = h_general; 1827 | } 1828 | break; 1829 | 1830 | case h_content_length: 1831 | if (UNLIKELY(!IS_NUM(ch))) { 1832 | SET_ERRNO(HPE_INVALID_CONTENT_LENGTH); 1833 | goto error; 1834 | } 1835 | 1836 | if (parser->flags & F_CONTENTLENGTH) { 1837 | SET_ERRNO(HPE_UNEXPECTED_CONTENT_LENGTH); 1838 | goto error; 1839 | } 1840 | 1841 | parser->flags |= F_CONTENTLENGTH; 1842 | parser->content_length = ch - '0'; 1843 | break; 1844 | 1845 | case h_connection: 1846 | /* looking for 'Connection: keep-alive' */ 1847 | if (c == 'k') { 1848 | parser->header_state = h_matching_connection_keep_alive; 1849 | /* looking for 'Connection: close' */ 1850 | } else if (c == 'c') { 1851 | parser->header_state = h_matching_connection_close; 1852 | } else if (c == 'u') { 1853 | parser->header_state = h_matching_connection_upgrade; 1854 | } else { 1855 | parser->header_state = h_matching_connection_token; 1856 | } 1857 | break; 1858 | 1859 | /* Multi-value `Connection` header */ 1860 | case h_matching_connection_token_start: 1861 | break; 1862 | 1863 | default: 1864 | parser->header_state = h_general; 1865 | break; 1866 | } 1867 | break; 1868 | } 1869 | 1870 | case s_header_value: 1871 | { 1872 | const char* start = p; 1873 | enum header_states h_state = (enum header_states) parser->header_state; 1874 | for (; p != data + len; p++) { 1875 | ch = *p; 1876 | if (ch == CR) { 1877 | UPDATE_STATE(s_header_almost_done); 1878 | parser->header_state = h_state; 1879 | CALLBACK_DATA(header_value); 1880 | break; 1881 | } 1882 | 1883 | if (ch == LF) { 1884 | UPDATE_STATE(s_header_almost_done); 1885 | COUNT_HEADER_SIZE(p - start); 1886 | parser->header_state = h_state; 1887 | CALLBACK_DATA_NOADVANCE(header_value); 1888 | REEXECUTE(); 1889 | } 1890 | 1891 | if (!lenient && !IS_HEADER_CHAR(ch)) { 1892 | SET_ERRNO(HPE_INVALID_HEADER_TOKEN); 1893 | goto error; 1894 | } 1895 | 1896 | c = LOWER(ch); 1897 | 1898 | switch (h_state) { 1899 | case h_general: 1900 | { 1901 | const char* p_cr; 1902 | const char* p_lf; 1903 | size_t limit = data + len - p; 1904 | 1905 | limit = MIN(limit, HTTP_MAX_HEADER_SIZE); 1906 | 1907 | p_cr = (const char*) memchr(p, CR, limit); 1908 | p_lf = (const char*) memchr(p, LF, limit); 1909 | if (p_cr != NULL) { 1910 | if (p_lf != NULL && p_cr >= p_lf) 1911 | p = p_lf; 1912 | else 1913 | p = p_cr; 1914 | } else if (UNLIKELY(p_lf != NULL)) { 1915 | p = p_lf; 1916 | } else { 1917 | p = data + len; 1918 | } 1919 | --p; 1920 | 1921 | break; 1922 | } 1923 | 1924 | case h_connection: 1925 | case h_transfer_encoding: 1926 | assert(0 && "Shouldn't get here."); 1927 | break; 1928 | 1929 | case h_content_length: 1930 | { 1931 | uint64_t t; 1932 | 1933 | if (ch == ' ') break; 1934 | 1935 | if (UNLIKELY(!IS_NUM(ch))) { 1936 | SET_ERRNO(HPE_INVALID_CONTENT_LENGTH); 1937 | parser->header_state = h_state; 1938 | goto error; 1939 | } 1940 | 1941 | t = parser->content_length; 1942 | t *= 10; 1943 | t += ch - '0'; 1944 | 1945 | /* Overflow? Test against a conservative limit for simplicity. */ 1946 | if (UNLIKELY((ULLONG_MAX - 10) / 10 < parser->content_length)) { 1947 | SET_ERRNO(HPE_INVALID_CONTENT_LENGTH); 1948 | parser->header_state = h_state; 1949 | goto error; 1950 | } 1951 | 1952 | parser->content_length = t; 1953 | break; 1954 | } 1955 | 1956 | /* Transfer-Encoding: chunked */ 1957 | case h_matching_transfer_encoding_chunked: 1958 | parser->index++; 1959 | if (parser->index > sizeof(CHUNKED)-1 1960 | || c != CHUNKED[parser->index]) { 1961 | h_state = h_general; 1962 | } else if (parser->index == sizeof(CHUNKED)-2) { 1963 | h_state = h_transfer_encoding_chunked; 1964 | } 1965 | break; 1966 | 1967 | case h_matching_connection_token_start: 1968 | /* looking for 'Connection: keep-alive' */ 1969 | if (c == 'k') { 1970 | h_state = h_matching_connection_keep_alive; 1971 | /* looking for 'Connection: close' */ 1972 | } else if (c == 'c') { 1973 | h_state = h_matching_connection_close; 1974 | } else if (c == 'u') { 1975 | h_state = h_matching_connection_upgrade; 1976 | } else if (STRICT_TOKEN(c)) { 1977 | h_state = h_matching_connection_token; 1978 | } else if (c == ' ' || c == '\t') { 1979 | /* Skip lws */ 1980 | } else { 1981 | h_state = h_general; 1982 | } 1983 | break; 1984 | 1985 | /* looking for 'Connection: keep-alive' */ 1986 | case h_matching_connection_keep_alive: 1987 | parser->index++; 1988 | if (parser->index > sizeof(KEEP_ALIVE)-1 1989 | || c != KEEP_ALIVE[parser->index]) { 1990 | h_state = h_matching_connection_token; 1991 | } else if (parser->index == sizeof(KEEP_ALIVE)-2) { 1992 | h_state = h_connection_keep_alive; 1993 | } 1994 | break; 1995 | 1996 | /* looking for 'Connection: close' */ 1997 | case h_matching_connection_close: 1998 | parser->index++; 1999 | if (parser->index > sizeof(CLOSE)-1 || c != CLOSE[parser->index]) { 2000 | h_state = h_matching_connection_token; 2001 | } else if (parser->index == sizeof(CLOSE)-2) { 2002 | h_state = h_connection_close; 2003 | } 2004 | break; 2005 | 2006 | /* looking for 'Connection: upgrade' */ 2007 | case h_matching_connection_upgrade: 2008 | parser->index++; 2009 | if (parser->index > sizeof(UPGRADE) - 1 || 2010 | c != UPGRADE[parser->index]) { 2011 | h_state = h_matching_connection_token; 2012 | } else if (parser->index == sizeof(UPGRADE)-2) { 2013 | h_state = h_connection_upgrade; 2014 | } 2015 | break; 2016 | 2017 | case h_matching_connection_token: 2018 | if (ch == ',') { 2019 | h_state = h_matching_connection_token_start; 2020 | parser->index = 0; 2021 | } 2022 | break; 2023 | 2024 | case h_transfer_encoding_chunked: 2025 | if (ch != ' ') h_state = h_general; 2026 | break; 2027 | 2028 | case h_connection_keep_alive: 2029 | case h_connection_close: 2030 | case h_connection_upgrade: 2031 | if (ch == ',') { 2032 | if (h_state == h_connection_keep_alive) { 2033 | parser->flags |= F_CONNECTION_KEEP_ALIVE; 2034 | } else if (h_state == h_connection_close) { 2035 | parser->flags |= F_CONNECTION_CLOSE; 2036 | } else if (h_state == h_connection_upgrade) { 2037 | parser->flags |= F_CONNECTION_UPGRADE; 2038 | } 2039 | h_state = h_matching_connection_token_start; 2040 | parser->index = 0; 2041 | } else if (ch != ' ') { 2042 | h_state = h_matching_connection_token; 2043 | } 2044 | break; 2045 | 2046 | default: 2047 | UPDATE_STATE(s_header_value); 2048 | h_state = h_general; 2049 | break; 2050 | } 2051 | } 2052 | parser->header_state = h_state; 2053 | 2054 | COUNT_HEADER_SIZE(p - start); 2055 | 2056 | if (p == data + len) 2057 | --p; 2058 | break; 2059 | } 2060 | 2061 | case s_header_almost_done: 2062 | { 2063 | if (UNLIKELY(ch != LF)) { 2064 | SET_ERRNO(HPE_LF_EXPECTED); 2065 | goto error; 2066 | } 2067 | 2068 | UPDATE_STATE(s_header_value_lws); 2069 | break; 2070 | } 2071 | 2072 | case s_header_value_lws: 2073 | { 2074 | if (ch == ' ' || ch == '\t') { 2075 | UPDATE_STATE(s_header_value_start); 2076 | REEXECUTE(); 2077 | } 2078 | 2079 | /* finished the header */ 2080 | switch (parser->header_state) { 2081 | case h_connection_keep_alive: 2082 | parser->flags |= F_CONNECTION_KEEP_ALIVE; 2083 | break; 2084 | case h_connection_close: 2085 | parser->flags |= F_CONNECTION_CLOSE; 2086 | break; 2087 | case h_transfer_encoding_chunked: 2088 | parser->flags |= F_CHUNKED; 2089 | break; 2090 | case h_connection_upgrade: 2091 | parser->flags |= F_CONNECTION_UPGRADE; 2092 | break; 2093 | default: 2094 | break; 2095 | } 2096 | 2097 | UPDATE_STATE(s_header_field_start); 2098 | REEXECUTE(); 2099 | } 2100 | 2101 | case s_header_value_discard_ws_almost_done: 2102 | { 2103 | STRICT_CHECK(ch != LF); 2104 | UPDATE_STATE(s_header_value_discard_lws); 2105 | break; 2106 | } 2107 | 2108 | case s_header_value_discard_lws: 2109 | { 2110 | if (ch == ' ' || ch == '\t') { 2111 | UPDATE_STATE(s_header_value_discard_ws); 2112 | break; 2113 | } else { 2114 | switch (parser->header_state) { 2115 | case h_connection_keep_alive: 2116 | parser->flags |= F_CONNECTION_KEEP_ALIVE; 2117 | break; 2118 | case h_connection_close: 2119 | parser->flags |= F_CONNECTION_CLOSE; 2120 | break; 2121 | case h_connection_upgrade: 2122 | parser->flags |= F_CONNECTION_UPGRADE; 2123 | break; 2124 | case h_transfer_encoding_chunked: 2125 | parser->flags |= F_CHUNKED; 2126 | break; 2127 | default: 2128 | break; 2129 | } 2130 | 2131 | /* header value was empty */ 2132 | MARK(header_value); 2133 | UPDATE_STATE(s_header_field_start); 2134 | CALLBACK_DATA_NOADVANCE(header_value); 2135 | REEXECUTE(); 2136 | } 2137 | } 2138 | 2139 | case s_headers_almost_done: 2140 | { 2141 | STRICT_CHECK(ch != LF); 2142 | 2143 | if (parser->flags & F_TRAILING) { 2144 | /* End of a chunked request */ 2145 | UPDATE_STATE(s_message_done); 2146 | CALLBACK_NOTIFY_NOADVANCE(chunk_complete); 2147 | REEXECUTE(); 2148 | } 2149 | 2150 | /* Cannot use chunked encoding and a content-length header together 2151 | per the HTTP specification. */ 2152 | if ((parser->flags & F_CHUNKED) && 2153 | (parser->flags & F_CONTENTLENGTH)) { 2154 | SET_ERRNO(HPE_UNEXPECTED_CONTENT_LENGTH); 2155 | goto error; 2156 | } 2157 | 2158 | UPDATE_STATE(s_headers_done); 2159 | 2160 | /* Set this here so that on_headers_complete() callbacks can see it */ 2161 | parser->upgrade = 2162 | ((parser->flags & (F_UPGRADE | F_CONNECTION_UPGRADE)) == 2163 | (F_UPGRADE | F_CONNECTION_UPGRADE) || 2164 | parser->method == HTTP_CONNECT); 2165 | 2166 | /* Here we call the headers_complete callback. This is somewhat 2167 | * different than other callbacks because if the user returns 1, we 2168 | * will interpret that as saying that this message has no body. This 2169 | * is needed for the annoying case of recieving a response to a HEAD 2170 | * request. 2171 | * 2172 | * We'd like to use CALLBACK_NOTIFY_NOADVANCE() here but we cannot, so 2173 | * we have to simulate it by handling a change in errno below. 2174 | */ 2175 | if (settings->on_headers_complete) { 2176 | switch (settings->on_headers_complete(parser)) { 2177 | case 0: 2178 | break; 2179 | 2180 | case 2: 2181 | parser->upgrade = 1; 2182 | 2183 | case 1: 2184 | parser->flags |= F_SKIPBODY; 2185 | break; 2186 | 2187 | default: 2188 | SET_ERRNO(HPE_CB_headers_complete); 2189 | RETURN(p - data); /* Error */ 2190 | } 2191 | } 2192 | 2193 | if (HTTP_PARSER_ERRNO(parser) != HPE_OK) { 2194 | RETURN(p - data); 2195 | } 2196 | 2197 | REEXECUTE(); 2198 | } 2199 | 2200 | case s_headers_done: 2201 | { 2202 | int hasBody; 2203 | STRICT_CHECK(ch != LF); 2204 | 2205 | parser->nread = 0; 2206 | 2207 | hasBody = parser->flags & F_CHUNKED || 2208 | (parser->content_length > 0 && parser->content_length != ULLONG_MAX); 2209 | if (parser->upgrade && (parser->method == HTTP_CONNECT || 2210 | (parser->flags & F_SKIPBODY) || !hasBody)) { 2211 | /* Exit, the rest of the message is in a different protocol. */ 2212 | UPDATE_STATE(NEW_MESSAGE()); 2213 | CALLBACK_NOTIFY(message_complete); 2214 | RETURN((p - data) + 1); 2215 | } 2216 | 2217 | if (parser->flags & F_SKIPBODY) { 2218 | UPDATE_STATE(NEW_MESSAGE()); 2219 | CALLBACK_NOTIFY(message_complete); 2220 | } else if (parser->flags & F_CHUNKED) { 2221 | /* chunked encoding - ignore Content-Length header */ 2222 | UPDATE_STATE(s_chunk_size_start); 2223 | } else { 2224 | if (parser->content_length == 0) { 2225 | /* Content-Length header given but zero: Content-Length: 0\r\n */ 2226 | UPDATE_STATE(NEW_MESSAGE()); 2227 | CALLBACK_NOTIFY(message_complete); 2228 | } else if (parser->content_length != ULLONG_MAX) { 2229 | /* Content-Length header given and non-zero */ 2230 | UPDATE_STATE(s_body_identity); 2231 | } else { 2232 | if (!http_message_needs_eof(parser)) { 2233 | /* Assume content-length 0 - read the next */ 2234 | UPDATE_STATE(NEW_MESSAGE()); 2235 | CALLBACK_NOTIFY(message_complete); 2236 | } else { 2237 | /* Read body until EOF */ 2238 | UPDATE_STATE(s_body_identity_eof); 2239 | } 2240 | } 2241 | } 2242 | 2243 | break; 2244 | } 2245 | 2246 | case s_body_identity: 2247 | { 2248 | uint64_t to_read = MIN(parser->content_length, 2249 | (uint64_t) ((data + len) - p)); 2250 | 2251 | assert(parser->content_length != 0 2252 | && parser->content_length != ULLONG_MAX); 2253 | 2254 | /* The difference between advancing content_length and p is because 2255 | * the latter will automaticaly advance on the next loop iteration. 2256 | * Further, if content_length ends up at 0, we want to see the last 2257 | * byte again for our message complete callback. 2258 | */ 2259 | MARK(body); 2260 | parser->content_length -= to_read; 2261 | p += to_read - 1; 2262 | 2263 | if (parser->content_length == 0) { 2264 | UPDATE_STATE(s_message_done); 2265 | 2266 | /* Mimic CALLBACK_DATA_NOADVANCE() but with one extra byte. 2267 | * 2268 | * The alternative to doing this is to wait for the next byte to 2269 | * trigger the data callback, just as in every other case. The 2270 | * problem with this is that this makes it difficult for the test 2271 | * harness to distinguish between complete-on-EOF and 2272 | * complete-on-length. It's not clear that this distinction is 2273 | * important for applications, but let's keep it for now. 2274 | */ 2275 | CALLBACK_DATA_(body, p - body_mark + 1, p - data); 2276 | REEXECUTE(); 2277 | } 2278 | 2279 | break; 2280 | } 2281 | 2282 | /* read until EOF */ 2283 | case s_body_identity_eof: 2284 | MARK(body); 2285 | p = data + len - 1; 2286 | 2287 | break; 2288 | 2289 | case s_message_done: 2290 | UPDATE_STATE(NEW_MESSAGE()); 2291 | CALLBACK_NOTIFY(message_complete); 2292 | if (parser->upgrade) { 2293 | /* Exit, the rest of the message is in a different protocol. */ 2294 | RETURN((p - data) + 1); 2295 | } 2296 | break; 2297 | 2298 | case s_chunk_size_start: 2299 | { 2300 | assert(parser->nread == 1); 2301 | assert(parser->flags & F_CHUNKED); 2302 | 2303 | unhex_val = unhex[(unsigned char)ch]; 2304 | if (UNLIKELY(unhex_val == -1)) { 2305 | SET_ERRNO(HPE_INVALID_CHUNK_SIZE); 2306 | goto error; 2307 | } 2308 | 2309 | parser->content_length = unhex_val; 2310 | UPDATE_STATE(s_chunk_size); 2311 | break; 2312 | } 2313 | 2314 | case s_chunk_size: 2315 | { 2316 | uint64_t t; 2317 | 2318 | assert(parser->flags & F_CHUNKED); 2319 | 2320 | if (ch == CR) { 2321 | UPDATE_STATE(s_chunk_size_almost_done); 2322 | break; 2323 | } 2324 | 2325 | unhex_val = unhex[(unsigned char)ch]; 2326 | 2327 | if (unhex_val == -1) { 2328 | if (ch == ';' || ch == ' ') { 2329 | UPDATE_STATE(s_chunk_parameters); 2330 | break; 2331 | } 2332 | 2333 | SET_ERRNO(HPE_INVALID_CHUNK_SIZE); 2334 | goto error; 2335 | } 2336 | 2337 | t = parser->content_length; 2338 | t *= 16; 2339 | t += unhex_val; 2340 | 2341 | /* Overflow? Test against a conservative limit for simplicity. */ 2342 | if (UNLIKELY((ULLONG_MAX - 16) / 16 < parser->content_length)) { 2343 | SET_ERRNO(HPE_INVALID_CONTENT_LENGTH); 2344 | goto error; 2345 | } 2346 | 2347 | parser->content_length = t; 2348 | break; 2349 | } 2350 | 2351 | case s_chunk_parameters: 2352 | { 2353 | assert(parser->flags & F_CHUNKED); 2354 | /* just ignore this shit. TODO check for overflow */ 2355 | if (ch == CR) { 2356 | UPDATE_STATE(s_chunk_size_almost_done); 2357 | break; 2358 | } 2359 | break; 2360 | } 2361 | 2362 | case s_chunk_size_almost_done: 2363 | { 2364 | assert(parser->flags & F_CHUNKED); 2365 | STRICT_CHECK(ch != LF); 2366 | 2367 | parser->nread = 0; 2368 | 2369 | if (parser->content_length == 0) { 2370 | parser->flags |= F_TRAILING; 2371 | UPDATE_STATE(s_header_field_start); 2372 | } else { 2373 | UPDATE_STATE(s_chunk_data); 2374 | } 2375 | CALLBACK_NOTIFY(chunk_header); 2376 | break; 2377 | } 2378 | 2379 | case s_chunk_data: 2380 | { 2381 | uint64_t to_read = MIN(parser->content_length, 2382 | (uint64_t) ((data + len) - p)); 2383 | 2384 | assert(parser->flags & F_CHUNKED); 2385 | assert(parser->content_length != 0 2386 | && parser->content_length != ULLONG_MAX); 2387 | 2388 | /* See the explanation in s_body_identity for why the content 2389 | * length and data pointers are managed this way. 2390 | */ 2391 | MARK(body); 2392 | parser->content_length -= to_read; 2393 | p += to_read - 1; 2394 | 2395 | if (parser->content_length == 0) { 2396 | UPDATE_STATE(s_chunk_data_almost_done); 2397 | } 2398 | 2399 | break; 2400 | } 2401 | 2402 | case s_chunk_data_almost_done: 2403 | assert(parser->flags & F_CHUNKED); 2404 | assert(parser->content_length == 0); 2405 | STRICT_CHECK(ch != CR); 2406 | UPDATE_STATE(s_chunk_data_done); 2407 | CALLBACK_DATA(body); 2408 | break; 2409 | 2410 | case s_chunk_data_done: 2411 | assert(parser->flags & F_CHUNKED); 2412 | STRICT_CHECK(ch != LF); 2413 | parser->nread = 0; 2414 | UPDATE_STATE(s_chunk_size_start); 2415 | CALLBACK_NOTIFY(chunk_complete); 2416 | break; 2417 | 2418 | default: 2419 | assert(0 && "unhandled state"); 2420 | SET_ERRNO(HPE_INVALID_INTERNAL_STATE); 2421 | goto error; 2422 | } 2423 | } 2424 | 2425 | /* Run callbacks for any marks that we have leftover after we ran our of 2426 | * bytes. There should be at most one of these set, so it's OK to invoke 2427 | * them in series (unset marks will not result in callbacks). 2428 | * 2429 | * We use the NOADVANCE() variety of callbacks here because 'p' has already 2430 | * overflowed 'data' and this allows us to correct for the off-by-one that 2431 | * we'd otherwise have (since CALLBACK_DATA() is meant to be run with a 'p' 2432 | * value that's in-bounds). 2433 | */ 2434 | 2435 | assert(((header_field_mark ? 1 : 0) + 2436 | (header_value_mark ? 1 : 0) + 2437 | (url_mark ? 1 : 0) + 2438 | (body_mark ? 1 : 0) + 2439 | (status_mark ? 1 : 0)) <= 1); 2440 | 2441 | CALLBACK_DATA_NOADVANCE(header_field); 2442 | CALLBACK_DATA_NOADVANCE(header_value); 2443 | CALLBACK_DATA_NOADVANCE(url); 2444 | CALLBACK_DATA_NOADVANCE(body); 2445 | CALLBACK_DATA_NOADVANCE(status); 2446 | 2447 | RETURN(len); 2448 | 2449 | error: 2450 | if (HTTP_PARSER_ERRNO(parser) == HPE_OK) { 2451 | SET_ERRNO(HPE_UNKNOWN); 2452 | } 2453 | 2454 | RETURN(p - data); 2455 | } 2456 | 2457 | 2458 | /* Does the parser need to see an EOF to find the end of the message? */ 2459 | inline int 2460 | http_message_needs_eof (const http_parser *parser) 2461 | { 2462 | if (parser->type == HTTP_REQUEST) { 2463 | return 0; 2464 | } 2465 | 2466 | /* See RFC 2616 section 4.4 */ 2467 | if (parser->status_code / 100 == 1 || /* 1xx e.g. Continue */ 2468 | parser->status_code == 204 || /* No Content */ 2469 | parser->status_code == 304 || /* Not Modified */ 2470 | parser->flags & F_SKIPBODY) { /* response to a HEAD request */ 2471 | return 0; 2472 | } 2473 | 2474 | if ((parser->flags & F_CHUNKED) || parser->content_length != ULLONG_MAX) { 2475 | return 0; 2476 | } 2477 | 2478 | return 1; 2479 | } 2480 | 2481 | 2482 | inline int 2483 | http_should_keep_alive (const http_parser *parser) 2484 | { 2485 | if (parser->http_major > 0 && parser->http_minor > 0) { 2486 | /* HTTP/1.1 */ 2487 | if (parser->flags & F_CONNECTION_CLOSE) { 2488 | return 0; 2489 | } 2490 | } else { 2491 | /* HTTP/1.0 or earlier */ 2492 | if (!(parser->flags & F_CONNECTION_KEEP_ALIVE)) { 2493 | return 0; 2494 | } 2495 | } 2496 | 2497 | return !http_message_needs_eof(parser); 2498 | } 2499 | 2500 | 2501 | inline const char * 2502 | http_method_str (enum http_method m) 2503 | { 2504 | return ELEM_AT(method_strings, m, ""); 2505 | } 2506 | 2507 | 2508 | inline void 2509 | http_parser_init (http_parser *parser, enum http_parser_type t) 2510 | { 2511 | void *data = parser->data; /* preserve application data */ 2512 | memset(parser, 0, sizeof(*parser)); 2513 | parser->data = data; 2514 | parser->type = t; 2515 | parser->state = (t == HTTP_REQUEST ? s_start_req : (t == HTTP_RESPONSE ? s_start_res : s_start_req_or_res)); 2516 | parser->http_errno = HPE_OK; 2517 | } 2518 | 2519 | inline void 2520 | http_parser_settings_init(http_parser_settings *settings) 2521 | { 2522 | memset(settings, 0, sizeof(*settings)); 2523 | } 2524 | 2525 | inline const char * 2526 | http_errno_name(enum http_errno err) { 2527 | assert(((size_t) err) < ARRAY_SIZE(http_strerror_tab)); 2528 | return http_strerror_tab[err].name; 2529 | } 2530 | 2531 | inline const char * 2532 | http_errno_description(enum http_errno err) { 2533 | assert(((size_t) err) < ARRAY_SIZE(http_strerror_tab)); 2534 | return http_strerror_tab[err].description; 2535 | } 2536 | 2537 | inline static enum http_host_state 2538 | http_parse_host_char(enum http_host_state s, const char ch) { 2539 | switch(s) { 2540 | case s_http_userinfo: 2541 | case s_http_userinfo_start: 2542 | if (ch == '@') { 2543 | return s_http_host_start; 2544 | } 2545 | 2546 | if (IS_USERINFO_CHAR(ch)) { 2547 | return s_http_userinfo; 2548 | } 2549 | break; 2550 | 2551 | case s_http_host_start: 2552 | if (ch == '[') { 2553 | return s_http_host_v6_start; 2554 | } 2555 | 2556 | if (IS_HOST_CHAR(ch)) { 2557 | return s_http_host; 2558 | } 2559 | 2560 | break; 2561 | 2562 | case s_http_host: 2563 | if (IS_HOST_CHAR(ch)) { 2564 | return s_http_host; 2565 | } 2566 | 2567 | /* FALLTHROUGH */ 2568 | case s_http_host_v6_end: 2569 | if (ch == ':') { 2570 | return s_http_host_port_start; 2571 | } 2572 | 2573 | break; 2574 | 2575 | case s_http_host_v6: 2576 | if (ch == ']') { 2577 | return s_http_host_v6_end; 2578 | } 2579 | 2580 | /* FALLTHROUGH */ 2581 | case s_http_host_v6_start: 2582 | if (IS_HEX(ch) || ch == ':' || ch == '.') { 2583 | return s_http_host_v6; 2584 | } 2585 | 2586 | if (s == s_http_host_v6 && ch == '%') { 2587 | return s_http_host_v6_zone_start; 2588 | } 2589 | break; 2590 | 2591 | case s_http_host_v6_zone: 2592 | if (ch == ']') { 2593 | return s_http_host_v6_end; 2594 | } 2595 | 2596 | /* FALLTHROUGH */ 2597 | case s_http_host_v6_zone_start: 2598 | /* RFC 6874 Zone ID consists of 1*( unreserved / pct-encoded) */ 2599 | if (IS_ALPHANUM(ch) || ch == '%' || ch == '.' || ch == '-' || ch == '_' || 2600 | ch == '~') { 2601 | return s_http_host_v6_zone; 2602 | } 2603 | break; 2604 | 2605 | case s_http_host_port: 2606 | case s_http_host_port_start: 2607 | if (IS_NUM(ch)) { 2608 | return s_http_host_port; 2609 | } 2610 | 2611 | break; 2612 | 2613 | default: 2614 | break; 2615 | } 2616 | return s_http_host_dead; 2617 | } 2618 | 2619 | inline static int 2620 | http_parse_host(const char * buf, struct http_parser_url *u, int found_at) { 2621 | enum http_host_state s; 2622 | 2623 | const char *p; 2624 | size_t buflen = u->field_data[UF_HOST].off + u->field_data[UF_HOST].len; 2625 | 2626 | assert(u->field_set & (1 << UF_HOST)); 2627 | 2628 | u->field_data[UF_HOST].len = 0; 2629 | 2630 | s = found_at ? s_http_userinfo_start : s_http_host_start; 2631 | 2632 | for (p = buf + u->field_data[UF_HOST].off; p < buf + buflen; p++) { 2633 | enum http_host_state new_s = http_parse_host_char(s, *p); 2634 | 2635 | if (new_s == s_http_host_dead) { 2636 | return 1; 2637 | } 2638 | 2639 | switch(new_s) { 2640 | case s_http_host: 2641 | if (s != s_http_host) { 2642 | u->field_data[UF_HOST].off = p - buf; 2643 | } 2644 | u->field_data[UF_HOST].len++; 2645 | break; 2646 | 2647 | case s_http_host_v6: 2648 | if (s != s_http_host_v6) { 2649 | u->field_data[UF_HOST].off = p - buf; 2650 | } 2651 | u->field_data[UF_HOST].len++; 2652 | break; 2653 | 2654 | case s_http_host_v6_zone_start: 2655 | case s_http_host_v6_zone: 2656 | u->field_data[UF_HOST].len++; 2657 | break; 2658 | 2659 | case s_http_host_port: 2660 | if (s != s_http_host_port) { 2661 | u->field_data[UF_PORT].off = p - buf; 2662 | u->field_data[UF_PORT].len = 0; 2663 | u->field_set |= (1 << UF_PORT); 2664 | } 2665 | u->field_data[UF_PORT].len++; 2666 | break; 2667 | 2668 | case s_http_userinfo: 2669 | if (s != s_http_userinfo) { 2670 | u->field_data[UF_USERINFO].off = p - buf ; 2671 | u->field_data[UF_USERINFO].len = 0; 2672 | u->field_set |= (1 << UF_USERINFO); 2673 | } 2674 | u->field_data[UF_USERINFO].len++; 2675 | break; 2676 | 2677 | default: 2678 | break; 2679 | } 2680 | s = new_s; 2681 | } 2682 | 2683 | /* Make sure we don't end somewhere unexpected */ 2684 | switch (s) { 2685 | case s_http_host_start: 2686 | case s_http_host_v6_start: 2687 | case s_http_host_v6: 2688 | case s_http_host_v6_zone_start: 2689 | case s_http_host_v6_zone: 2690 | case s_http_host_port_start: 2691 | case s_http_userinfo: 2692 | case s_http_userinfo_start: 2693 | return 1; 2694 | default: 2695 | break; 2696 | } 2697 | 2698 | return 0; 2699 | } 2700 | 2701 | inline void 2702 | http_parser_url_init(struct http_parser_url *u) { 2703 | memset(u, 0, sizeof(*u)); 2704 | } 2705 | 2706 | inline int 2707 | http_parser_parse_url(const char *buf, size_t buflen, int is_connect, 2708 | struct http_parser_url *u) 2709 | { 2710 | enum state s; 2711 | const char *p; 2712 | enum http_parser_url_fields uf, old_uf; 2713 | int found_at = 0; 2714 | 2715 | u->port = u->field_set = 0; 2716 | s = is_connect ? s_req_server_start : s_req_spaces_before_url; 2717 | old_uf = UF_MAX; 2718 | 2719 | for (p = buf; p < buf + buflen; p++) { 2720 | s = parse_url_char(s, *p); 2721 | 2722 | /* Figure out the next field that we're operating on */ 2723 | switch (s) { 2724 | case s_dead: 2725 | return 1; 2726 | 2727 | /* Skip delimeters */ 2728 | case s_req_schema_slash: 2729 | case s_req_schema_slash_slash: 2730 | case s_req_server_start: 2731 | case s_req_query_string_start: 2732 | case s_req_fragment_start: 2733 | continue; 2734 | 2735 | case s_req_schema: 2736 | uf = UF_SCHEMA; 2737 | break; 2738 | 2739 | case s_req_server_with_at: 2740 | found_at = 1; 2741 | 2742 | /* FALLTROUGH */ 2743 | case s_req_server: 2744 | uf = UF_HOST; 2745 | break; 2746 | 2747 | case s_req_path: 2748 | uf = UF_PATH; 2749 | break; 2750 | 2751 | case s_req_query_string: 2752 | uf = UF_QUERY; 2753 | break; 2754 | 2755 | case s_req_fragment: 2756 | uf = UF_FRAGMENT; 2757 | break; 2758 | 2759 | default: 2760 | assert(!"Unexpected state"); 2761 | return 1; 2762 | } 2763 | 2764 | /* Nothing's changed; soldier on */ 2765 | if (uf == old_uf) { 2766 | u->field_data[uf].len++; 2767 | continue; 2768 | } 2769 | 2770 | u->field_data[uf].off = p - buf; 2771 | u->field_data[uf].len = 1; 2772 | 2773 | u->field_set |= (1 << uf); 2774 | old_uf = uf; 2775 | } 2776 | 2777 | /* host must be present if there is a schema */ 2778 | /* parsing http:///toto will fail */ 2779 | if ((u->field_set & (1 << UF_SCHEMA)) && 2780 | (u->field_set & (1 << UF_HOST)) == 0) { 2781 | return 1; 2782 | } 2783 | 2784 | if (u->field_set & (1 << UF_HOST)) { 2785 | if (http_parse_host(buf, u, found_at) != 0) { 2786 | return 1; 2787 | } 2788 | } 2789 | 2790 | /* CONNECT requests can only contain "hostname:port" */ 2791 | if (is_connect && u->field_set != ((1 << UF_HOST)|(1 << UF_PORT))) { 2792 | return 1; 2793 | } 2794 | 2795 | if (u->field_set & (1 << UF_PORT)) { 2796 | /* Don't bother with endp; we've already validated the string */ 2797 | unsigned long v = strtoul(buf + u->field_data[UF_PORT].off, NULL, 10); 2798 | 2799 | /* Ports have a max value of 2^16 */ 2800 | if (v > 0xffff) { 2801 | return 1; 2802 | } 2803 | 2804 | u->port = (uint16_t) v; 2805 | } 2806 | 2807 | return 0; 2808 | } 2809 | 2810 | inline void 2811 | http_parser_pause(http_parser *parser, int paused) { 2812 | /* Users should only be pausing/unpausing a parser that is not in an error 2813 | * state. In non-debug builds, there's not much that we can do about this 2814 | * other than ignore it. 2815 | */ 2816 | if (HTTP_PARSER_ERRNO(parser) == HPE_OK || 2817 | HTTP_PARSER_ERRNO(parser) == HPE_PAUSED) { 2818 | SET_ERRNO((paused) ? HPE_PAUSED : HPE_OK); 2819 | } else { 2820 | assert(0 && "Attempting to pause parser in error state"); 2821 | } 2822 | } 2823 | 2824 | inline int 2825 | http_body_is_final(const struct http_parser *parser) { 2826 | return parser->state == s_message_done; 2827 | } 2828 | 2829 | inline unsigned long 2830 | http_parser_version(void) { 2831 | return HTTP_PARSER_VERSION_MAJOR * 0x10000 | 2832 | HTTP_PARSER_VERSION_MINOR * 0x00100 | 2833 | HTTP_PARSER_VERSION_PATCH * 0x00001; 2834 | } 2835 | } //namespace rapidhttp 2836 | -------------------------------------------------------------------------------- /include/rapidhttp/rapidhttp.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | -------------------------------------------------------------------------------- /include/rapidhttp/stringref.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | namespace rapidhttp { 9 | 10 | class StringRef 11 | { 12 | public: 13 | StringRef() 14 | : owner_(false), len_(0), str_("") 15 | {} 16 | 17 | StringRef(const char* str, uint32_t len) 18 | : owner_(false), len_(len), str_(str) 19 | {} 20 | 21 | StringRef(StringRef const& other) 22 | { 23 | if (other.owner_ && other.len_) { 24 | char* buf = (char*)malloc(other.len_); 25 | memcpy(buf, other.str_, other.len_); 26 | str_ = buf; 27 | } else 28 | str_ = other.str_; 29 | len_ = other.len_; 30 | owner_ = other.owner_; 31 | } 32 | 33 | StringRef& operator=(StringRef const& other) 34 | { 35 | if (this == &other) return *this; 36 | 37 | if (owner_) 38 | free((void*)str_); 39 | 40 | if (other.owner_ && other.len_) { 41 | char* buf = (char*)malloc(other.len_); 42 | memcpy(buf, other.str_, other.len_); 43 | str_ = buf; 44 | } else 45 | str_ = other.str_; 46 | len_ = other.len_; 47 | owner_ = other.owner_; 48 | return *this; 49 | } 50 | 51 | StringRef(StringRef && other) 52 | { 53 | str_ = other.str_; 54 | len_ = other.len_; 55 | owner_ = other.owner_; 56 | 57 | other.owner_ = false; 58 | other.len_ = 0; 59 | other.str_ = ""; 60 | } 61 | 62 | StringRef& operator=(StringRef && other) 63 | { 64 | if (this == &other) return *this; 65 | 66 | if (owner_) 67 | free((void*)str_); 68 | 69 | str_ = other.str_; 70 | len_ = other.len_; 71 | owner_ = other.owner_; 72 | 73 | other.owner_ = false; 74 | other.len_ = 0; 75 | other.str_ = ""; 76 | return *this; 77 | } 78 | 79 | explicit StringRef(std::string const& s) 80 | : owner_(false), len_(s.size()), str_(s.c_str()) 81 | {} 82 | 83 | ~StringRef() 84 | { 85 | if (owner_) 86 | free((void*)str_); 87 | } 88 | 89 | const char* c_str() const 90 | { 91 | return str_; 92 | } 93 | 94 | size_t size() const 95 | { 96 | return len_; 97 | } 98 | 99 | bool empty() const 100 | { 101 | return !size(); 102 | } 103 | 104 | void clear() 105 | { 106 | if (owner_) 107 | free((void*)str_); 108 | 109 | str_ = ""; 110 | len_ = 0; 111 | owner_ = false; 112 | } 113 | 114 | operator std::string() const 115 | { 116 | return std::string(str_, len_); 117 | } 118 | 119 | void SetString(std::string const& s) 120 | { 121 | if (owner_) 122 | free((void*)str_); 123 | 124 | str_ = s.c_str(); 125 | len_ = s.size(); 126 | owner_ = false; 127 | } 128 | 129 | void SetOwner() 130 | { 131 | if (!owner_ && len_) { 132 | char* buf = (char*)malloc(len_); 133 | memcpy(buf, str_, len_); 134 | str_ = buf; 135 | owner_ = true; 136 | } 137 | } 138 | 139 | void append(const char* first, size_t length) 140 | { 141 | append(first, first + length); 142 | } 143 | 144 | void append(const char* first, const char* last) 145 | { 146 | if (first >= last) return ; 147 | 148 | if (!len_) { 149 | str_ = first; 150 | len_ = last - first; 151 | } else if (!owner_ && str_ + len_ == first) { 152 | len_ += last - first; 153 | } else { 154 | size_t new_len = len_ + (last - first); 155 | char* buf = nullptr; 156 | if (owner_) { 157 | buf = (char*)realloc((void*)str_, new_len); 158 | } else { 159 | buf = (char*)malloc(new_len); 160 | memcpy(buf, str_, len_); 161 | } 162 | 163 | memcpy(buf + len_, first, last - first); 164 | str_ = buf; 165 | len_ = new_len; 166 | owner_ = true; 167 | } 168 | } 169 | 170 | /// ------------- string assign operator --------------- 171 | public: 172 | StringRef& operator=(const char* cstr) 173 | { 174 | clear(); 175 | str_ = cstr; 176 | len_ = strlen(str_); 177 | return *this; 178 | } 179 | 180 | StringRef& operator=(std::string const& s) 181 | { 182 | SetString(s); 183 | return *this; 184 | } 185 | 186 | char const& operator[](int index) const 187 | { 188 | assert(index >= 0 && index < len_); 189 | return str_[index]; 190 | } 191 | 192 | /// ------------- string equal-compare operator --------------- 193 | public: 194 | friend bool operator==(StringRef const& lhs, StringRef const& rhs) 195 | { 196 | if (lhs.size() != rhs.size()) return false; 197 | if (lhs.c_str() == rhs.c_str()) return true; 198 | return memcmp(lhs.c_str(), rhs.c_str(), lhs.size()) == 0; 199 | } 200 | friend bool operator!=(StringRef const& lhs, StringRef const& rhs) 201 | { 202 | return !(lhs == rhs); 203 | } 204 | friend bool operator==(StringRef const& lhs, std::string const& rhs) 205 | { 206 | if (lhs.size() != rhs.size()) return false; 207 | return memcmp(lhs.c_str(), rhs.c_str(), lhs.size()) == 0; 208 | } 209 | friend bool operator!=(StringRef const& lhs, std::string const& rhs) 210 | { 211 | return !(lhs == rhs); 212 | } 213 | friend bool operator==(std::string const& lhs, StringRef const& rhs) 214 | { 215 | return rhs == lhs; 216 | } 217 | friend bool operator!=(std::string const& lhs, StringRef const& rhs) 218 | { 219 | return !(lhs == rhs); 220 | } 221 | friend bool operator==(StringRef const& lhs, const char* rhs) 222 | { 223 | assert(rhs); 224 | if (*lhs.c_str() != *rhs) return false; 225 | size_t len = strlen(rhs); 226 | if (lhs.size() != len) return false; 227 | return memcmp(lhs.c_str(), rhs, lhs.size()) == 0; 228 | } 229 | friend bool operator!=(StringRef const& lhs, const char* rhs) 230 | { 231 | assert(rhs); 232 | return !(lhs == rhs); 233 | } 234 | friend bool operator==(const char* lhs, StringRef const& rhs) 235 | { 236 | assert(lhs); 237 | return rhs == lhs; 238 | } 239 | friend bool operator!=(const char* lhs, StringRef const& rhs) 240 | { 241 | assert(lhs); 242 | return !(lhs == rhs); 243 | } 244 | /// ----------------------------------------------------- 245 | 246 | private: 247 | bool owner_ : 1; 248 | uint32_t len_ : 31; 249 | const char* str_; 250 | }; 251 | 252 | } //namespace rapidhttp 253 | -------------------------------------------------------------------------------- /include/rapidhttp/util.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | namespace rapidhttp { 8 | 9 | inline size_t UIntegerByteSize(uint32_t i) 10 | { 11 | if (i < 10) 12 | return 1; 13 | else if (i < 100) 14 | return 2; 15 | else if (i < 1000) 16 | return 3; 17 | else if (i < 10000) 18 | return 4; 19 | else if (i < 100000) 20 | return 5; 21 | else if (i < 1000000) 22 | return 6; 23 | else if (i < 10000000) 24 | return 7; 25 | else if (i < 100000000) 26 | return 8; 27 | else if (i < 1000000000) 28 | return 9; 29 | else 30 | return 10; 31 | } 32 | 33 | inline const char* SkipSpaces(const char* pos, const char* last) 34 | { 35 | for (; pos < last && *pos == ' '; ++pos) 36 | ; 37 | return pos; 38 | } 39 | 40 | inline const char* FindSpaces(const char* pos, const char* last) 41 | { 42 | for (; pos < last && *pos != ' '; ++pos) 43 | ; 44 | if (pos == last) return nullptr; 45 | return *pos == ' ' ? pos : nullptr; 46 | } 47 | inline const char* FindCRLF(const char* pos, const char* last, std::error_code & ec) 48 | { 49 | for (; pos < last - 1; ++pos) { 50 | if (*pos == '\r') { 51 | if (*(pos + 1) == '\n') { 52 | return pos; 53 | } else { 54 | ec = MakeErrorCode(eErrorCode::parse_error); 55 | return nullptr; 56 | } 57 | } else if (*pos == '\n') { 58 | ec = MakeErrorCode(eErrorCode::parse_error); 59 | return nullptr; 60 | } 61 | } 62 | 63 | return nullptr; 64 | } 65 | 66 | } //namespace rapidhttp 67 | -------------------------------------------------------------------------------- /install.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | set -e 4 | 5 | git submodule update --init 6 | mkdir build -p 7 | cd build 8 | cmake .. $@ 9 | cd ../ 10 | sudo rm /usr/local/include/rapidhttp -rf 11 | sudo cp ./include/rapidhttp /usr/local/include/rapidhttp -rv 12 | -------------------------------------------------------------------------------- /scripts/extract_http_parser.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | dest=$1/include/rapidhttp/layer.hpp 4 | 5 | echo "#pragma once" > $dest 6 | cat $1/third_party/http-parser/http_parser.h >> $dest 7 | sed -i 's/extern\ "C"/namespace rapidhttp/g' $dest 8 | 9 | last_include=`grep "^\#include" $1/third_party/http-parser/http_parser.c -n | tail -1 | cut -d: -f1` 10 | tail_start=`expr $last_include + 1` 11 | head -$last_include $1/third_party/http-parser/http_parser.c >> $dest 12 | 13 | echo "namespace rapidhttp {" >> $dest 14 | tail -n +$tail_start $1/third_party/http-parser/http_parser.c >> $dest 15 | echo "} //namespace rapidhttp" >> $dest 16 | 17 | # add inline key-word 18 | sed -i 's/^unsigned long http_parser_version(void);/inline &/g' $dest 19 | sed -i 's/^void http_parser_init(.*);/inline &/g' $dest 20 | sed -i 's/^void http_parser_settings_init(.*);/inline &/g' $dest 21 | sed -i 's/^size_t http_parser_execute\s*(.*/inline &/g' $dest 22 | sed -i 's/^int http_should_keep_alive(.*);/inline &/g' $dest 23 | 24 | sed -i 's/^const char \*http_method_str(.*);/inline &/g' $dest 25 | sed -i 's/^const char \*http_errno_name(.*);/inline &/g' $dest 26 | sed -i 's/^const char \*http_errno_description(.*);/inline &/g' $dest 27 | sed -i 's/^void http_parser_url_init(.*);/inline &/g' $dest 28 | 29 | sed -i 's/^int http_parser_parse_url(.*/inline &/g' $dest 30 | sed -i 's/^void http_parser_pause(.*);/inline &/g' $dest 31 | sed -i 's/^int http_body_is_final(.*);/inline &/g' $dest 32 | 33 | sed -i 's/^unsigned long$/inline &/g' $dest 34 | sed -i 's/^static enum state$/inline &/g' $dest 35 | sed -i 's/^static enum http_host_state$/inline &/g' $dest 36 | sed -i 's/^int$/inline &/g' $dest 37 | sed -i 's/^static int$/inline &/g' $dest 38 | sed -i 's/^void$/inline &/g' $dest 39 | sed -i 's/^const char \*$/inline &/g' $dest 40 | 41 | sed -i 's/^\#include "http_parser.h"//g' $dest 42 | 43 | echo "create http-parser $dest" 44 | -------------------------------------------------------------------------------- /scripts/extract_pico.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | dest=$1/include/rapidhttp/layer.hpp 4 | 5 | echo "#pragma once" > $dest 6 | cat $1/third_party/picohttpparser/picohttpparser.h >> $dest 7 | sed -i 's/extern\ "C"/namespace rapidhttp/g' $dest 8 | 9 | last_include=`grep "^\#include" $1/third_party/picohttpparser/picohttpparser.c -n | tail -1 | cut -d: -f1` 10 | tail_start=`expr $last_include + 1` 11 | 12 | head -$last_include $1/third_party/picohttpparser/picohttpparser.c >> $dest 13 | 14 | echo "namespace rapidhttp {" >> $dest 15 | tail -n +$tail_start $1/third_party/picohttpparser/picohttpparser.c >> $dest 16 | echo "} //namespace rapidhttp" >> $dest 17 | 18 | # add inline key-word 19 | sed -i 's/^int phr_parse_request(.*/inline &/g' $dest 20 | sed -i 's/^int phr_parse_response(.*/inline &/g' $dest 21 | sed -i 's/^int phr_parse_headers(.*/inline &/g' $dest 22 | sed -i 's/^ssize_t phr_decode_chunked(.*/inline &/g' $dest 23 | sed -i 's/^int phr_decode_chunked_is_in_data(.*/inline &/g' $dest 24 | 25 | sed -i 's/^static .*(.*/inline &/g' $dest 26 | sed -i 's/^\#include "picohttpparser.h"//g' $dest 27 | 28 | echo "create pico $dest" 29 | -------------------------------------------------------------------------------- /test/parse_request.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | using namespace std; 6 | using namespace rapidhttp; 7 | 8 | static std::string c_http_request = 9 | "GET /uri/abc HTTP/1.1\r\n" 10 | "Accept: XAccept\r\n" 11 | "Host: domain.com\r\n" 12 | "Connection: Keep-Alive\r\n" 13 | "\r\n"; 14 | 15 | static std::string c_http_request_2 = 16 | "POST /uri/abc HTTP/1.1\r\n" 17 | "Accept: XAccept\r\n" 18 | "Host: domain.com\r\n" 19 | "User-Agent: gtest.proxy\r\n" 20 | "Content-Length: 3\r\n" 21 | "\r\nabc"; 22 | 23 | // 错误的协议头 24 | static std::string c_http_request_err_1 = 25 | "POST/uri/abc HTTP/1.1\r\n" 26 | "Accept: XAccept\r\n" 27 | "Host: domain.com\r\n" 28 | "User-Agent: gtest.proxy\r\n" 29 | "\r\n"; 30 | 31 | // 兼容HTTP0.9的协议头 32 | static std::string c_http_request_http_0_9 = 33 | "POST /uri/abcHTTP/1.1\r\n" 34 | "Accept: XAccept\r\n" 35 | "Host: domain.com\r\n" 36 | "User-Agent: gtest.proxy\r\n" 37 | "\r\n"; 38 | 39 | // 一部分协议头, 缺少一个\r\n 40 | static std::string c_http_request_err_3 = 41 | "POST /uri/abc HTTP/1.1\r\n" 42 | "Accept: XAccept\r\n" 43 | "Host: domain.com\r\n" 44 | "User-Agent: gtest.proxy\r\n"; 45 | 46 | 47 | template 48 | void test_parse_request() 49 | { 50 | DocType doc(rapidhttp::Request); 51 | size_t bytes = doc.PartailParse(c_http_request); 52 | EXPECT_EQ(bytes, c_http_request.size()); 53 | EXPECT_TRUE(!doc.ParseError()); 54 | 55 | EXPECT_EQ(doc.GetMethod(), "GET"); 56 | EXPECT_EQ(doc.GetUri(), "/uri/abc"); 57 | EXPECT_EQ(doc.GetMajor(), 1); 58 | EXPECT_EQ(doc.GetMinor(), 1); 59 | EXPECT_EQ(doc.GetField("Accept"), "XAccept"); 60 | EXPECT_EQ(doc.GetField("Host"), "domain.com"); 61 | EXPECT_EQ(doc.GetField("Connection"), "Keep-Alive"); 62 | EXPECT_EQ(doc.GetField("User-Agent"), ""); 63 | 64 | for (int i = 0; i < 10; ++i) { 65 | size_t bytes = doc.PartailParse(c_http_request.c_str(), c_http_request.size()); 66 | EXPECT_EQ(bytes, c_http_request.size()); 67 | EXPECT_TRUE(!doc.ParseError()); 68 | } 69 | 70 | EXPECT_EQ(doc.GetMethod(), "GET"); 71 | EXPECT_EQ(doc.GetUri(), "/uri/abc"); 72 | EXPECT_EQ(doc.GetMajor(), 1); 73 | EXPECT_EQ(doc.GetMinor(), 1); 74 | EXPECT_EQ(doc.GetField("Accept"), "XAccept"); 75 | EXPECT_EQ(doc.GetField("Host"), "domain.com"); 76 | EXPECT_EQ(doc.GetField("Connection"), "Keep-Alive"); 77 | EXPECT_EQ(doc.GetField("User-Agent"), ""); 78 | 79 | bytes = doc.PartailParse(c_http_request_2); 80 | EXPECT_EQ(bytes, c_http_request_2.size()); 81 | EXPECT_TRUE(!doc.ParseError()); 82 | EXPECT_EQ(doc.GetMethod(), "POST"); 83 | EXPECT_EQ(doc.GetUri(), "/uri/abc"); 84 | EXPECT_EQ(doc.GetMajor(), 1); 85 | EXPECT_EQ(doc.GetMinor(), 1); 86 | EXPECT_EQ(doc.GetField("Accept"), "XAccept"); 87 | EXPECT_EQ(doc.GetField("Host"), "domain.com"); 88 | EXPECT_EQ(doc.GetField("Connection"), ""); 89 | EXPECT_EQ(doc.GetField("User-Agent"), "gtest.proxy"); 90 | EXPECT_EQ(doc.GetBody(), "abc"); 91 | 92 | bytes = doc.PartailParse(c_http_request_err_1); 93 | EXPECT_TRUE(doc.ParseError()); 94 | 95 | bytes = doc.PartailParse(c_http_request_http_0_9); 96 | EXPECT_FALSE(doc.ParseError()); 97 | EXPECT_TRUE(doc.ParseDone()); 98 | EXPECT_EQ(doc.GetMajor(), 0); 99 | EXPECT_EQ(doc.GetMinor(), 9); 100 | 101 | // partail parse logic 102 | cout << "parse partail" << endl; 103 | bytes = doc.PartailParse(c_http_request_err_3); 104 | EXPECT_EQ(bytes, c_http_request_err_3.size()); 105 | EXPECT_FALSE(doc.ParseError()); 106 | EXPECT_FALSE(doc.ParseDone()); 107 | 108 | bytes = doc.PartailParse("\r\n"); 109 | EXPECT_EQ(bytes, 2); 110 | EXPECT_FALSE(doc.ParseError()); 111 | 112 | for (size_t pos = 0; pos < c_http_request.size(); ++pos) 113 | { 114 | // cout << "parse split by " << pos << endl; 115 | std::string fp = c_http_request.substr(0, pos); 116 | size_t bytes = doc.PartailParse(fp); 117 | // EXPECT_EQ(bytes, pos); 118 | // EXPECT_EQ(doc.ParseError().value(), 1); 119 | EXPECT_FALSE(doc.ParseDone()); 120 | 121 | std::string sp = c_http_request.substr(bytes); 122 | bytes += doc.PartailParse(sp); 123 | EXPECT_EQ(bytes, c_http_request.size()); 124 | EXPECT_TRUE(!doc.ParseError()); 125 | EXPECT_TRUE(doc.ParseDone()); 126 | 127 | EXPECT_EQ(doc.GetMethod(), "GET"); 128 | EXPECT_EQ(doc.GetUri(), "/uri/abc"); 129 | EXPECT_EQ(doc.GetMajor(), 1); 130 | EXPECT_EQ(doc.GetMinor(), 1); 131 | EXPECT_EQ(doc.GetField("Accept"), "XAccept"); 132 | EXPECT_EQ(doc.GetField("Host"), "domain.com"); 133 | EXPECT_EQ(doc.GetField("Connection"), "Keep-Alive"); 134 | EXPECT_EQ(doc.GetField("User-Agent"), ""); 135 | } 136 | 137 | char buf[256] = {}; 138 | bool b = doc.Serialize(buf, sizeof(buf)); 139 | EXPECT_TRUE(b); 140 | bytes = doc.ByteSize(); 141 | EXPECT_EQ(bytes, c_http_request.size()); 142 | EXPECT_EQ(c_http_request, buf); 143 | 144 | bytes = doc.PartailParse(c_http_request_2); 145 | EXPECT_EQ(bytes, c_http_request_2.size()); 146 | EXPECT_TRUE(doc.ParseDone()); 147 | b = doc.Serialize(buf, sizeof(buf)); 148 | EXPECT_TRUE(b); 149 | bytes = doc.ByteSize(); 150 | EXPECT_EQ(bytes, c_http_request_2.size()); 151 | EXPECT_EQ(c_http_request_2, buf); 152 | } 153 | 154 | void copyto_request() 155 | { 156 | std::string s = c_http_request_2; 157 | 158 | rapidhttp::HttpDocumentRef doc(rapidhttp::Request); 159 | size_t bytes = doc.PartailParse(s); 160 | EXPECT_EQ(bytes, s.size()); 161 | EXPECT_TRUE(!doc.ParseError()); 162 | 163 | #define _CHECK_DOC(doc) \ 164 | EXPECT_EQ(doc.GetMethod(), "POST"); \ 165 | EXPECT_EQ(doc.GetUri(), "/uri/abc"); \ 166 | EXPECT_EQ(doc.GetMajor(), 1); \ 167 | EXPECT_EQ(doc.GetMinor(), 1); \ 168 | EXPECT_EQ(doc.GetField("Accept"), "XAccept"); \ 169 | EXPECT_EQ(doc.GetField("Host"), "domain.com"); \ 170 | EXPECT_EQ(doc.GetField("Connection"), ""); \ 171 | EXPECT_EQ(doc.GetField("User-Agent"), "gtest.proxy"); \ 172 | EXPECT_EQ(doc.GetBody(), "abc") 173 | 174 | _CHECK_DOC(doc); 175 | 176 | rapidhttp::HttpDocumentRef doc2(rapidhttp::Request); 177 | doc.CopyTo(doc2); 178 | _CHECK_DOC(doc2); 179 | 180 | rapidhttp::HttpDocument doc3(rapidhttp::Request); 181 | doc2.CopyTo(doc3); 182 | _CHECK_DOC(doc3); 183 | 184 | rapidhttp::HttpDocument doc4(rapidhttp::Request); 185 | doc2.CopyTo(doc4); 186 | _CHECK_DOC(doc4); 187 | doc3.CopyTo(doc4); 188 | _CHECK_DOC(doc4); 189 | 190 | s = "xx"; 191 | _CHECK_DOC(doc3); 192 | _CHECK_DOC(doc4); 193 | } 194 | 195 | TEST(parse, request) 196 | { 197 | test_parse_request(); 198 | test_parse_request(); 199 | copyto_request(); 200 | } 201 | -------------------------------------------------------------------------------- /test/parse_response.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | using namespace std; 6 | using namespace rapidhttp; 7 | 8 | static std::string c_http_response = 9 | "HTTP/1.1 200 OK\r\n" 10 | "Accept: XAccept\r\n" 11 | "Host: domain.com\r\n" 12 | "Connection: Keep-Alive\r\n" 13 | "Content-Length: 3\r\n" 14 | "\r\nxyz"; 15 | 16 | static std::string c_http_response_2 = 17 | "HTTP/1.1 404 Not Found\r\n" 18 | "Accept: XAccept\r\n" 19 | "Host: domain.com\r\n" 20 | "User-Agent: gtest.proxy\r\n" 21 | "\r\n"; 22 | 23 | // 错误的协议头 24 | static std::string c_http_response_err_1 = 25 | "HTTP/1.1200 OK\r\n" 26 | "Accept: XAccept\r\n" 27 | "Host: domain.com\r\n" 28 | "User-Agent: gtest.proxy\r\n" 29 | "\r\n"; 30 | 31 | // 错误的协议头 32 | static std::string c_http_response_err_2 = 33 | "HTTP/1.1 200OK\r\n" 34 | "Accept: XAccept\r\n" 35 | "Host: domain.com\r\n" 36 | "User-Agent: gtest.proxy\r\n" 37 | "\r\n"; 38 | 39 | // 一部分协议头, 缺少一个\r\n 40 | static std::string c_http_response_err_3 = 41 | "HTTP/1.1 200 OK\r\n" 42 | "Accept: XAccept\r\n" 43 | "Host: domain.com\r\n" 44 | "Content-Length: 0\r\n" 45 | "User-Agent: gtest.proxy\r\n"; 46 | 47 | 48 | template 49 | void test_parse_response() 50 | { 51 | DocType doc(rapidhttp::Response); 52 | size_t bytes = doc.PartailParse(c_http_response); 53 | EXPECT_EQ(bytes, c_http_response.size()); 54 | EXPECT_TRUE(!doc.ParseError()); 55 | EXPECT_TRUE(doc.ParseDone()); 56 | 57 | EXPECT_EQ(doc.GetStatus(), "OK"); 58 | EXPECT_EQ(doc.GetStatusCode(), 200); 59 | EXPECT_EQ(doc.GetMajor(), 1); 60 | EXPECT_EQ(doc.GetMinor(), 1); 61 | EXPECT_EQ(doc.GetField("Accept"), "XAccept"); 62 | EXPECT_EQ(doc.GetField("Host"), "domain.com"); 63 | EXPECT_EQ(doc.GetField("Connection"), "Keep-Alive"); 64 | EXPECT_EQ(doc.GetField("User-Agent"), ""); 65 | EXPECT_EQ(doc.GetBody(), "xyz"); 66 | 67 | for (int i = 0; i < 10; ++i) { 68 | size_t bytes = doc.PartailParse(c_http_response.c_str(), c_http_response.size()); 69 | EXPECT_EQ(bytes, c_http_response.size()); 70 | EXPECT_TRUE(!doc.ParseError()); 71 | EXPECT_TRUE(doc.ParseDone()); 72 | } 73 | 74 | EXPECT_EQ(doc.GetStatus(), "OK"); 75 | EXPECT_EQ(doc.GetStatusCode(), 200); 76 | EXPECT_EQ(doc.GetMajor(), 1); 77 | EXPECT_EQ(doc.GetMinor(), 1); 78 | EXPECT_EQ(doc.GetField("Accept"), "XAccept"); 79 | EXPECT_EQ(doc.GetField("Host"), "domain.com"); 80 | EXPECT_EQ(doc.GetField("Connection"), "Keep-Alive"); 81 | EXPECT_EQ(doc.GetField("User-Agent"), ""); 82 | 83 | bytes = doc.PartailParse(c_http_response_2); 84 | EXPECT_EQ(bytes, c_http_response_2.size()); 85 | EXPECT_TRUE(!doc.ParseError()); 86 | EXPECT_TRUE(!doc.ParseDone()); 87 | EXPECT_TRUE(doc.PartailParseEof()); 88 | EXPECT_TRUE(!doc.ParseError()); 89 | EXPECT_TRUE(doc.ParseDone()); 90 | 91 | EXPECT_EQ(doc.GetStatus(), "Not Found"); 92 | EXPECT_EQ(doc.GetStatusCode(), 404); 93 | EXPECT_EQ(doc.GetMajor(), 1); 94 | EXPECT_EQ(doc.GetMinor(), 1); 95 | EXPECT_EQ(doc.GetField("Accept"), "XAccept"); 96 | EXPECT_EQ(doc.GetField("Host"), "domain.com"); 97 | EXPECT_EQ(doc.GetField("Connection"), ""); 98 | EXPECT_EQ(doc.GetField("User-Agent"), "gtest.proxy"); 99 | 100 | bytes = doc.PartailParse(c_http_response_err_1); 101 | EXPECT_TRUE(doc.ParseError()); 102 | 103 | bytes = doc.PartailParse(c_http_response_err_2); 104 | EXPECT_TRUE(doc.ParseError()); 105 | 106 | // partail parse logic 107 | cout << "parse partail" << endl; 108 | bytes = doc.PartailParse(c_http_response_err_3); 109 | EXPECT_EQ(bytes, c_http_response_err_3.size()); 110 | EXPECT_TRUE(!doc.ParseError()); 111 | EXPECT_TRUE(!doc.ParseDone()); 112 | 113 | bytes = doc.PartailParse("\r\n"); 114 | EXPECT_EQ(bytes, 2); 115 | EXPECT_TRUE(!doc.ParseError()); 116 | EXPECT_TRUE(doc.ParseDone()); 117 | 118 | for (size_t pos = 0; pos < c_http_response.size(); ++pos) 119 | { 120 | // cout << "parse response split by " << pos << endl; 121 | // cout << "first partail: " << c_http_response.substr(0, pos) << endl << endl; 122 | std::string fp = c_http_response.substr(0, pos); 123 | size_t bytes = doc.PartailParse(fp); 124 | // EXPECT_EQ(bytes, pos); 125 | // EXPECT_EQ(doc.ParseError().value(), 1); 126 | EXPECT_FALSE(doc.ParseDone()); 127 | 128 | std::string sp = c_http_response.substr(bytes); 129 | bytes += doc.PartailParse(sp); 130 | EXPECT_EQ(bytes, c_http_response.size()); 131 | EXPECT_TRUE(!doc.ParseError()); 132 | EXPECT_TRUE(doc.ParseDone()); 133 | 134 | EXPECT_EQ(doc.GetStatus(), "OK"); 135 | EXPECT_EQ(doc.GetStatusCode(), 200); 136 | EXPECT_EQ(doc.GetMajor(), 1); 137 | EXPECT_EQ(doc.GetMinor(), 1); 138 | EXPECT_EQ(doc.GetField("Accept"), "XAccept"); 139 | EXPECT_EQ(doc.GetField("Host"), "domain.com"); 140 | EXPECT_EQ(doc.GetField("Connection"), "Keep-Alive"); 141 | EXPECT_EQ(doc.GetField("User-Agent"), ""); 142 | } 143 | 144 | char buf[256] = {}; 145 | bool b = doc.Serialize(buf, sizeof(buf)); 146 | EXPECT_TRUE(b); 147 | bytes = doc.ByteSize(); 148 | EXPECT_EQ(bytes, c_http_response.size()); 149 | EXPECT_EQ(c_http_response, buf); 150 | } 151 | 152 | void copyto_response() 153 | { 154 | std::string s = c_http_response; 155 | 156 | rapidhttp::HttpDocumentRef doc(rapidhttp::Response); 157 | size_t bytes = doc.PartailParse(s); 158 | EXPECT_EQ(bytes, s.size()); 159 | EXPECT_TRUE(!doc.ParseError()); 160 | 161 | #define _CHECK_DOC(doc) \ 162 | EXPECT_EQ(doc.GetStatus(), "OK"); \ 163 | EXPECT_EQ(doc.GetStatusCode(), 200); \ 164 | EXPECT_EQ(doc.GetMajor(), 1); \ 165 | EXPECT_EQ(doc.GetMinor(), 1); \ 166 | EXPECT_EQ(doc.GetField("Accept"), "XAccept"); \ 167 | EXPECT_EQ(doc.GetField("Host"), "domain.com"); \ 168 | EXPECT_EQ(doc.GetField("Connection"), "Keep-Alive"); \ 169 | EXPECT_EQ(doc.GetField("User-Agent"), ""); \ 170 | EXPECT_EQ(doc.GetBody(), "xyz") 171 | 172 | _CHECK_DOC(doc); 173 | 174 | rapidhttp::HttpDocumentRef doc2(rapidhttp::Response); 175 | doc.CopyTo(doc2); 176 | _CHECK_DOC(doc2); 177 | 178 | rapidhttp::HttpDocument doc3(rapidhttp::Response); 179 | doc2.CopyTo(doc3); 180 | _CHECK_DOC(doc3); 181 | 182 | rapidhttp::HttpDocument doc4(rapidhttp::Response); 183 | doc2.CopyTo(doc4); 184 | _CHECK_DOC(doc4); 185 | doc3.CopyTo(doc4); 186 | _CHECK_DOC(doc4); 187 | 188 | s = "xx"; 189 | _CHECK_DOC(doc3); 190 | _CHECK_DOC(doc4); 191 | } 192 | 193 | TEST(parse, response) 194 | { 195 | test_parse_response(); 196 | test_parse_response(); 197 | copyto_response(); 198 | } 199 | -------------------------------------------------------------------------------- /tutorial/parse.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | using namespace std; 5 | 6 | void parse() 7 | { 8 | static std::string c_http_request = 9 | "POST /uri/abc HTTP/1.1\r\n" 10 | "Accept: XAccept\r\n" 11 | "Host: domain.com\r\n" 12 | "User-Agent: gtest.proxy\r\n" 13 | "Content-Length: 3\r\n" 14 | "\r\nabc"; 15 | 16 | // 1.定义一个Document对象 17 | rapidhttp::HttpDocument doc(rapidhttp::Request); 18 | 19 | // 2.调用PartailParse解析数据流, 接口参数是 20 | // std::string 21 | // 或 (const char*, size_t) 22 | doc.PartailParse(c_http_request); 23 | 24 | // 3.判断解析是否出错 25 | if (doc.ParseError()) { 26 | // 打印错误描述信息 27 | cout << "parse error:" << doc.ParseError().message() << endl; 28 | return ; 29 | } 30 | 31 | // 4.判断解析是否完成 32 | if (doc.ParseDone()) { 33 | cout << "parse not done." << endl; 34 | } 35 | } 36 | 37 | void partail_parse() 38 | { 39 | static std::string c_http_request = 40 | "POST /uri/abc HTTP/1.1\r\n" 41 | "Accept: XAccept\r\n" 42 | "Host: domain.com\r\n" 43 | "User-Agent: gtest.proxy\r\n" 44 | "Content-Length: 3\r\n" 45 | "\r\nabc"; 46 | 47 | // 1.定义一个Document对象 48 | rapidhttp::HttpDocument doc(rapidhttp::Request); 49 | 50 | // 2.调用PartailParse解析数据流的一部分 51 | int pos = 27; 52 | size_t bytes = doc.PartailParse(c_http_request.c_str(), pos); 53 | 54 | // 3.判断解析是否出错 55 | if (doc.ParseError()) { 56 | // 打印错误描述信息 57 | cout << "parse error:" << doc.ParseError().message() << endl; 58 | return ; 59 | } 60 | 61 | // 4.判断解析是否完成 62 | if (!doc.ParseDone()) { 63 | // 未完成, 继续解析剩余部分 64 | bytes += doc.PartailParse(c_http_request.c_str() + pos, c_http_request.size() - pos); 65 | } 66 | 67 | // 4.判断解析是否完成 68 | if (doc.ParseDone()) { 69 | cout << "parse not done." << endl; 70 | } 71 | } 72 | 73 | /// 一种更加快速的解析方法: HttpDocumentRef 74 | void fast_parse() 75 | { 76 | std::string c_http_request = 77 | "POST /uri/abc HTTP/1.1\r\n" 78 | "Accept: XAccept\r\n" 79 | "Host: domain.com\r\n" 80 | "User-Agent: gtest.proxy\r\n" 81 | "Content-Length: 3\r\n" 82 | "\r\nabc"; 83 | 84 | // 1.定义一个HttpDocumentRef对象 85 | rapidhttp::HttpDocumentRef doc(rapidhttp::Request); 86 | 87 | // 2.调用PartailParse解析数据流, 接口参数是 88 | // std::string 89 | // 或 (const char*, size_t) 90 | doc.PartailParse(c_http_request); 91 | 92 | // 3.判断解析是否出错 93 | if (doc.ParseError()) { 94 | // 打印错误描述信息 95 | cout << "parse error:" << doc.ParseError().message() << endl; 96 | return ; 97 | } 98 | 99 | // 4.判断解析是否完成 100 | if (doc.ParseDone()) { 101 | cout << "parse not done." << endl; 102 | } 103 | 104 | // 5.使用HttpDocumentRef时, 不保存数据内容, 只是对数据流的引用. 105 | // 所以当数据流失效前, 如果要保存解析结果, 需要Copy到HttpDocument对象中. 106 | rapidhttp::HttpDocument storage_doc(rapidhttp::Request); 107 | doc.CopyTo(storage_doc); 108 | 109 | // 6.让缓冲区失效 110 | c_http_request = ""; 111 | 112 | // 7.此时doc已经不能用了, storage_doc还有效 113 | cout << "Valid DOM: " << storage_doc.GetMethod() << endl; 114 | } 115 | 116 | int main() 117 | { 118 | parse(); 119 | partail_parse(); 120 | fast_parse(); 121 | return 0; 122 | } 123 | -------------------------------------------------------------------------------- /tutorial/serialize.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | using namespace std; 5 | 6 | void serialize() 7 | { 8 | // 1.定义一个Document对象 9 | rapidhttp::HttpDocument doc(rapidhttp::Response); 10 | 11 | // 2.设置status/code 12 | doc.SetStatusCode(200); 13 | doc.SetStatus("OK"); 14 | 15 | // 3.设置版本号, 不设置时默认是HTTP/1.1 16 | doc.SetMajor(1); 17 | doc.SetMinor(1); 18 | 19 | // 4.设置域 20 | doc.SetField("Server", "rapidhttp"); 21 | doc.SetField("Connection", "close"); 22 | doc.SetField("Content-Length", "12"); 23 | 24 | // 5.设置body.(二进制body使用std::string设置) 25 | doc.SetBody("hello world!"); 26 | 27 | // 6.获取序列化长度 28 | size_t bytes = doc.ByteSize(); 29 | if (!bytes) { 30 | // 长度返回0表示有些字段没有正确初始化, 不允许序列化 31 | cout << "serialize error" << endl; 32 | } 33 | char *buf = new char[bytes]; 34 | 35 | // 7.调用Serialize 36 | bool b = doc.Serialize(buf, bytes); 37 | 38 | // 8.判断序列化是否成功 39 | if (!b) { 40 | cout << "serialize error" << endl; 41 | } else { 42 | cout << "serialize output:\n" << std::string(buf, bytes) << endl; 43 | } 44 | 45 | delete buf; 46 | 47 | // 9.不在乎性能时, 也可以直接序列化成std::string, 不必关心长度. 48 | std::string output = doc.SerializeAsString(); 49 | if (output.empty()) { 50 | // 返回空串表示有字段没有正确初始化, 不允许序列化 51 | cout << "serialize error" << endl; 52 | } 53 | cout << "serialize output:\n" << output << endl; 54 | } 55 | 56 | int main() 57 | { 58 | serialize(); 59 | return 0; 60 | } 61 | -------------------------------------------------------------------------------- /uninstall.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | set -e 4 | 5 | sudo rm /usr/local/include/rapidhttp -rf 6 | --------------------------------------------------------------------------------