├── .gitignore ├── Makefile ├── README.md ├── multipart_parser.c └── multipart_parser.h /.gitignore: -------------------------------------------------------------------------------- 1 | *.o 2 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | CFLAGS?=-std=c89 -ansi -pedantic -O4 -Wall -fPIC 2 | 3 | default: multipart_parser.o 4 | 5 | multipart_parser.o: multipart_parser.c multipart_parser.h 6 | 7 | solib: multipart_parser.o 8 | $(CC) -shared -Wl,-soname,libmultipart.so -o libmultipart.so multipart_parser.o 9 | 10 | clean: 11 | rm -f *.o *.so 12 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## Multipart form data parser 2 | 3 | ### Features 4 | * No dependencies 5 | * Works with chunks of a data - no need to buffer the whole request 6 | * Almost no internal buffering. Buffer size doesn't exceed the size of the boundary (~60-70 bytes) 7 | 8 | Tested as part of [Cosmonaut](https://github.com/iafonov/cosmonaut) HTTP server. 9 | 10 | Implementation based on [node-formidable](https://github.com/felixge/node-formidable) by [Felix Geisendörfer](https://github.com/felixge). 11 | 12 | Inspired by [http-parser](https://github.com/joyent/http-parser) by [Ryan Dahl](https://github.com/ry). 13 | 14 | ### Usage (C) 15 | This parser library works with several callbacks, which the user may set up at application initialization time. 16 | 17 | ```c 18 | multipart_parser_settings callbacks; 19 | 20 | memset(&callbacks, 0, sizeof(multipart_parser_settings)); 21 | 22 | callbacks.on_header_field = read_header_name; 23 | callbacks.on_header_value = read_header_value; 24 | ``` 25 | 26 | These functions must match the signatures defined in the multipart-parser header file. For this simple example, we'll just use two of the available callbacks to print all headers the library finds in multipart messages. 27 | 28 | Returning a value other than 0 from the callbacks will abort message processing. 29 | 30 | ```c 31 | int read_header_name(multipart_parser* p, const char *at, size_t length) 32 | { 33 | printf("%.*s: ", length, at); 34 | return 0; 35 | } 36 | 37 | int read_header_value(multipart_parser* p, const char *at, size_t length) 38 | { 39 | printf("%.*s\n", length, at); 40 | return 0; 41 | } 42 | ``` 43 | 44 | When a message arrives, callers must parse the multipart boundary from the **Content-Type** header (see the [RFC](http://tools.ietf.org/html/rfc2387#section-5.1) for more information and examples), and then execute the parser. 45 | 46 | ```c 47 | multipart_parser* parser = multipart_parser_init(boundary, &callbacks); 48 | multipart_parser_execute(parser, body, length); 49 | multipart_parser_free(parser); 50 | ``` 51 | 52 | ### Usage (C++) 53 | In C++, when the callbacks are static member functions it may be helpful to pass the instantiated multipart consumer along as context. The following (abbreviated) class called `MultipartConsumer` shows how to pass `this` to callback functions in order to access non-static member data. 54 | 55 | ```cpp 56 | class MultipartConsumer 57 | { 58 | public: 59 | MultipartConsumer(const std::string& boundary) 60 | { 61 | memset(&m_callbacks, 0, sizeof(multipart_parser_settings)); 62 | m_callbacks.on_header_field = ReadHeaderName; 63 | m_callbacks.on_header_value = ReadHeaderValue; 64 | 65 | m_parser = multipart_parser_init(boundary.c_str(), &m_callbacks); 66 | multipart_parser_set_data(m_parser, this); 67 | } 68 | 69 | ~MultipartConsumer() 70 | { 71 | multipart_parser_free(m_parser); 72 | } 73 | 74 | int CountHeaders(const std::string& body) 75 | { 76 | multipart_parser_execute(m_parser, body.c_str(), body.size()); 77 | return m_headers; 78 | } 79 | 80 | private: 81 | static int ReadHeaderName(multipart_parser* p, const char *at, size_t length) 82 | { 83 | MultipartConsumer* me = (MultipartConsumer*)multipart_parser_get_data(p); 84 | me->m_headers++; 85 | } 86 | 87 | multipart_parser* m_parser; 88 | multipart_parser_settings m_callbacks; 89 | int m_headers; 90 | }; 91 | ``` 92 | 93 | ### Contributors 94 | * [Daniel T. Wagner](http://www.danieltwagner.de/) 95 | * [James McLaughlin](http://udp.github.com/) 96 | * [Jay Miller](http://www.cryptofreak.org) 97 | 98 | © 2012 [Igor Afonov](http://iafonov.github.com) 99 | -------------------------------------------------------------------------------- /multipart_parser.c: -------------------------------------------------------------------------------- 1 | /* Based on node-formidable by Felix Geisendörfer 2 | * Igor Afonov - afonov@gmail.com - 2012 3 | * MIT License - http://www.opensource.org/licenses/mit-license.php 4 | */ 5 | 6 | #include "multipart_parser.h" 7 | 8 | #include 9 | #include 10 | #include 11 | 12 | static void multipart_log(const char * format, ...) 13 | { 14 | #ifdef DEBUG_MULTIPART 15 | va_list args; 16 | va_start(args, format); 17 | 18 | fprintf(stderr, "[HTTP_MULTIPART_PARSER] %s:%d: ", __FILE__, __LINE__); 19 | vfprintf(stderr, format, args); 20 | fprintf(stderr, "\n"); 21 | #endif 22 | } 23 | 24 | #define NOTIFY_CB(FOR) \ 25 | do { \ 26 | if (p->settings->on_##FOR) { \ 27 | if (p->settings->on_##FOR(p) != 0) { \ 28 | return i; \ 29 | } \ 30 | } \ 31 | } while (0) 32 | 33 | #define EMIT_DATA_CB(FOR, ptr, len) \ 34 | do { \ 35 | if (p->settings->on_##FOR) { \ 36 | if (p->settings->on_##FOR(p, ptr, len) != 0) { \ 37 | return i; \ 38 | } \ 39 | } \ 40 | } while (0) 41 | 42 | 43 | #define LF 10 44 | #define CR 13 45 | 46 | struct multipart_parser { 47 | void * data; 48 | 49 | size_t index; 50 | size_t boundary_length; 51 | 52 | unsigned char state; 53 | 54 | const multipart_parser_settings* settings; 55 | 56 | char* lookbehind; 57 | char multipart_boundary[1]; 58 | }; 59 | 60 | enum state { 61 | s_uninitialized = 1, 62 | s_start, 63 | s_start_boundary, 64 | s_header_field_start, 65 | s_header_field, 66 | s_headers_almost_done, 67 | s_header_value_start, 68 | s_header_value, 69 | s_header_value_almost_done, 70 | s_part_data_start, 71 | s_part_data, 72 | s_part_data_almost_boundary, 73 | s_part_data_boundary, 74 | s_part_data_almost_end, 75 | s_part_data_end, 76 | s_part_data_final_hyphen, 77 | s_end 78 | }; 79 | 80 | multipart_parser* multipart_parser_init 81 | (const char *boundary, const multipart_parser_settings* settings) { 82 | 83 | multipart_parser* p = malloc(sizeof(multipart_parser) + 84 | strlen(boundary) + 85 | strlen(boundary) + 9); 86 | 87 | strcpy(p->multipart_boundary, boundary); 88 | p->boundary_length = strlen(boundary); 89 | 90 | p->lookbehind = (p->multipart_boundary + p->boundary_length + 1); 91 | 92 | p->index = 0; 93 | p->state = s_start; 94 | p->settings = settings; 95 | 96 | return p; 97 | } 98 | 99 | void multipart_parser_free(multipart_parser* p) { 100 | free(p); 101 | } 102 | 103 | void multipart_parser_set_data(multipart_parser *p, void *data) { 104 | p->data = data; 105 | } 106 | 107 | void *multipart_parser_get_data(multipart_parser *p) { 108 | return p->data; 109 | } 110 | 111 | size_t multipart_parser_execute(multipart_parser* p, const char *buf, size_t len) { 112 | size_t i = 0; 113 | size_t mark = 0; 114 | char c, cl; 115 | int is_last = 0; 116 | 117 | while(i < len) { 118 | c = buf[i]; 119 | is_last = (i == (len - 1)); 120 | switch (p->state) { 121 | case s_start: 122 | multipart_log("s_start"); 123 | p->index = 0; 124 | p->state = s_start_boundary; 125 | 126 | /* fallthrough */ 127 | case s_start_boundary: 128 | multipart_log("s_start_boundary"); 129 | if (p->index == p->boundary_length) { 130 | if (c != CR) { 131 | return i; 132 | } 133 | p->index++; 134 | break; 135 | } else if (p->index == (p->boundary_length + 1)) { 136 | if (c != LF) { 137 | return i; 138 | } 139 | p->index = 0; 140 | NOTIFY_CB(part_data_begin); 141 | p->state = s_header_field_start; 142 | break; 143 | } 144 | if (c != p->multipart_boundary[p->index]) { 145 | return i; 146 | } 147 | p->index++; 148 | break; 149 | 150 | case s_header_field_start: 151 | multipart_log("s_header_field_start"); 152 | mark = i; 153 | p->state = s_header_field; 154 | 155 | /* fallthrough */ 156 | case s_header_field: 157 | multipart_log("s_header_field"); 158 | if (c == CR) { 159 | p->state = s_headers_almost_done; 160 | break; 161 | } 162 | 163 | if (c == ':') { 164 | EMIT_DATA_CB(header_field, buf + mark, i - mark); 165 | p->state = s_header_value_start; 166 | break; 167 | } 168 | 169 | cl = tolower(c); 170 | if ((c != '-') && (cl < 'a' || cl > 'z')) { 171 | multipart_log("invalid character in header name"); 172 | return i; 173 | } 174 | if (is_last) 175 | EMIT_DATA_CB(header_field, buf + mark, (i - mark) + 1); 176 | break; 177 | 178 | case s_headers_almost_done: 179 | multipart_log("s_headers_almost_done"); 180 | if (c != LF) { 181 | return i; 182 | } 183 | 184 | p->state = s_part_data_start; 185 | break; 186 | 187 | case s_header_value_start: 188 | multipart_log("s_header_value_start"); 189 | if (c == ' ') { 190 | break; 191 | } 192 | 193 | mark = i; 194 | p->state = s_header_value; 195 | 196 | /* fallthrough */ 197 | case s_header_value: 198 | multipart_log("s_header_value"); 199 | if (c == CR) { 200 | EMIT_DATA_CB(header_value, buf + mark, i - mark); 201 | p->state = s_header_value_almost_done; 202 | break; 203 | } 204 | if (is_last) 205 | EMIT_DATA_CB(header_value, buf + mark, (i - mark) + 1); 206 | break; 207 | 208 | case s_header_value_almost_done: 209 | multipart_log("s_header_value_almost_done"); 210 | if (c != LF) { 211 | return i; 212 | } 213 | p->state = s_header_field_start; 214 | break; 215 | 216 | case s_part_data_start: 217 | multipart_log("s_part_data_start"); 218 | NOTIFY_CB(headers_complete); 219 | mark = i; 220 | p->state = s_part_data; 221 | 222 | /* fallthrough */ 223 | case s_part_data: 224 | multipart_log("s_part_data"); 225 | if (c == CR) { 226 | EMIT_DATA_CB(part_data, buf + mark, i - mark); 227 | mark = i; 228 | p->state = s_part_data_almost_boundary; 229 | p->lookbehind[0] = CR; 230 | break; 231 | } 232 | if (is_last) 233 | EMIT_DATA_CB(part_data, buf + mark, (i - mark) + 1); 234 | break; 235 | 236 | case s_part_data_almost_boundary: 237 | multipart_log("s_part_data_almost_boundary"); 238 | if (c == LF) { 239 | p->state = s_part_data_boundary; 240 | p->lookbehind[1] = LF; 241 | p->index = 0; 242 | break; 243 | } 244 | EMIT_DATA_CB(part_data, p->lookbehind, 1); 245 | p->state = s_part_data; 246 | mark = i --; 247 | break; 248 | 249 | case s_part_data_boundary: 250 | multipart_log("s_part_data_boundary"); 251 | if (p->multipart_boundary[p->index] != c) { 252 | EMIT_DATA_CB(part_data, p->lookbehind, 2 + p->index); 253 | p->state = s_part_data; 254 | mark = i --; 255 | break; 256 | } 257 | p->lookbehind[2 + p->index] = c; 258 | if ((++ p->index) == p->boundary_length) { 259 | NOTIFY_CB(part_data_end); 260 | p->state = s_part_data_almost_end; 261 | } 262 | break; 263 | 264 | case s_part_data_almost_end: 265 | multipart_log("s_part_data_almost_end"); 266 | if (c == '-') { 267 | p->state = s_part_data_final_hyphen; 268 | break; 269 | } 270 | if (c == CR) { 271 | p->state = s_part_data_end; 272 | break; 273 | } 274 | return i; 275 | 276 | case s_part_data_final_hyphen: 277 | multipart_log("s_part_data_final_hyphen"); 278 | if (c == '-') { 279 | NOTIFY_CB(body_end); 280 | p->state = s_end; 281 | break; 282 | } 283 | return i; 284 | 285 | case s_part_data_end: 286 | multipart_log("s_part_data_end"); 287 | if (c == LF) { 288 | p->state = s_header_field_start; 289 | NOTIFY_CB(part_data_begin); 290 | break; 291 | } 292 | return i; 293 | 294 | case s_end: 295 | multipart_log("s_end: %02X", (int) c); 296 | break; 297 | 298 | default: 299 | multipart_log("Multipart parser unrecoverable error"); 300 | return 0; 301 | } 302 | ++ i; 303 | } 304 | 305 | return len; 306 | } 307 | -------------------------------------------------------------------------------- /multipart_parser.h: -------------------------------------------------------------------------------- 1 | /* Based on node-formidable by Felix Geisendörfer 2 | * Igor Afonov - afonov@gmail.com - 2012 3 | * MIT License - http://www.opensource.org/licenses/mit-license.php 4 | */ 5 | #ifndef _multipart_parser_h 6 | #define _multipart_parser_h 7 | 8 | #ifdef __cplusplus 9 | extern "C" 10 | { 11 | #endif 12 | 13 | #include 14 | #include 15 | 16 | typedef struct multipart_parser multipart_parser; 17 | typedef struct multipart_parser_settings multipart_parser_settings; 18 | typedef struct multipart_parser_state multipart_parser_state; 19 | 20 | typedef int (*multipart_data_cb) (multipart_parser*, const char *at, size_t length); 21 | typedef int (*multipart_notify_cb) (multipart_parser*); 22 | 23 | struct multipart_parser_settings { 24 | multipart_data_cb on_header_field; 25 | multipart_data_cb on_header_value; 26 | multipart_data_cb on_part_data; 27 | 28 | multipart_notify_cb on_part_data_begin; 29 | multipart_notify_cb on_headers_complete; 30 | multipart_notify_cb on_part_data_end; 31 | multipart_notify_cb on_body_end; 32 | }; 33 | 34 | multipart_parser* multipart_parser_init 35 | (const char *boundary, const multipart_parser_settings* settings); 36 | 37 | void multipart_parser_free(multipart_parser* p); 38 | 39 | size_t multipart_parser_execute(multipart_parser* p, const char *buf, size_t len); 40 | 41 | void multipart_parser_set_data(multipart_parser* p, void* data); 42 | void * multipart_parser_get_data(multipart_parser* p); 43 | 44 | #ifdef __cplusplus 45 | } /* extern "C" */ 46 | #endif 47 | 48 | #endif 49 | --------------------------------------------------------------------------------