├── .gitignore ├── AUTHORS ├── Makefile ├── LICENSE ├── websocket_parser.h ├── README.md └── websocket_parser.c /.gitignore: -------------------------------------------------------------------------------- 1 | *.o 2 | *.loT 3 | *.lo -------------------------------------------------------------------------------- /AUTHORS: -------------------------------------------------------------------------------- 1 | Ivan Shalganov -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | CFLAGS?=-std=gnu99 -pedantic -O4 -Wall -fPIC 2 | 3 | default: websocket_parser.o 4 | 5 | websocket_parser.o: websocket_parser.c websocket_parser.h 6 | 7 | solib: websocket_parser.o 8 | $(CC) -shared -Wl,-soname,libwebsocket_parser.so -o libwebsocket_parser.so websocket_parser.o 9 | 10 | alib: websocket_parser.o 11 | ar rcu libwebsocket_parser.a $< 12 | ranlib libwebsocket_parser.a 13 | 14 | clean: 15 | rm -f *.o *.so *.a 16 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | BSD 3-Clause License 2 | 3 | Copyright (c) 2019, PHP ION project 4 | All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions are met: 8 | 9 | 1. Redistributions of source code must retain the above copyright notice, this 10 | list of conditions and the following disclaimer. 11 | 12 | 2. Redistributions in binary form must reproduce the above copyright notice, 13 | this list of conditions and the following disclaimer in the documentation 14 | and/or other materials provided with the distribution. 15 | 16 | 3. Neither the name of the copyright holder nor the names of its 17 | contributors may be used to endorse or promote products derived from 18 | this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | -------------------------------------------------------------------------------- /websocket_parser.h: -------------------------------------------------------------------------------- 1 | #ifndef WEBSOCKET_PARSER_H 2 | #define WEBSOCKET_PARSER_H 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | 9 | #include 10 | #if defined(_WIN32) && !defined(__MINGW32__) && \ 11 | (!defined(_MSC_VER) || _MSC_VER<1600) && !defined(__WINE__) 12 | #include 13 | #include 14 | typedef __int8 int8_t; 15 | typedef unsigned __int8 uint8_t; 16 | typedef __int16 int16_t; 17 | typedef unsigned __int16 uint16_t; 18 | typedef __int32 int32_t; 19 | typedef unsigned __int32 uint32_t; 20 | typedef __int64 int64_t; 21 | typedef unsigned __int64 uint64_t; 22 | #else 23 | #include 24 | #endif 25 | 26 | #define WEBSOCKET_UUID "258EAFA5-E914-47DA-95CA-C5AB0DC85B11" 27 | 28 | typedef struct websocket_parser websocket_parser; 29 | typedef struct websocket_parser_settings websocket_parser_settings; 30 | 31 | typedef enum websocket_flags { 32 | // opcodes 33 | WS_OP_CONTINUE = 0x0, 34 | WS_OP_TEXT = 0x1, 35 | WS_OP_BINARY = 0x2, 36 | WS_OP_CLOSE = 0x8, 37 | WS_OP_PING = 0x9, 38 | WS_OP_PONG = 0xA, 39 | 40 | // marks 41 | WS_FINAL_FRAME = 0x10, 42 | WS_HAS_MASK = 0x20, 43 | } websocket_flags; 44 | 45 | #define WS_OP_MASK 0xF 46 | #define WS_FIN WS_FINAL_FRAME 47 | 48 | typedef int (*websocket_data_cb) (websocket_parser*, const char * at, size_t length); 49 | typedef int (*websocket_cb) (websocket_parser*); 50 | 51 | struct websocket_parser { 52 | uint32_t state; 53 | websocket_flags flags; 54 | 55 | char mask[4]; 56 | uint8_t mask_offset; 57 | 58 | size_t length; 59 | size_t require; 60 | size_t offset; 61 | 62 | void * data; 63 | }; 64 | 65 | struct websocket_parser_settings { 66 | websocket_cb on_frame_header; 67 | websocket_data_cb on_frame_body; 68 | websocket_cb on_frame_end; 69 | }; 70 | 71 | void websocket_parser_init(websocket_parser *parser); 72 | void websocket_parser_settings_init(websocket_parser_settings *settings); 73 | size_t websocket_parser_execute( 74 | websocket_parser * parser, 75 | const websocket_parser_settings *settings, 76 | const char * data, 77 | size_t len 78 | ); 79 | 80 | // Apply XOR mask (see https://tools.ietf.org/html/rfc6455#section-5.3) and store mask's offset 81 | void websocket_parser_decode(char * dst, const char * src, size_t len, websocket_parser * parser); 82 | 83 | // Apply XOR mask (see https://tools.ietf.org/html/rfc6455#section-5.3) and return mask's offset 84 | uint8_t websocket_decode(char * dst, const char * src, size_t len, const char mask[4], uint8_t mask_offset); 85 | #define websocket_encode(dst, src, len, mask, mask_offset) websocket_decode(dst, src, len, mask, mask_offset) 86 | 87 | // Calculate frame size using flags and data length 88 | size_t websocket_calc_frame_size(websocket_flags flags, size_t data_len); 89 | 90 | // Create string representation of frame 91 | size_t websocket_build_frame(char * frame, websocket_flags flags, const char mask[4], const char * data, size_t data_len); 92 | 93 | #define websocket_parser_get_opcode(p) (p->flags & WS_OP_MASK) 94 | #define websocket_parser_has_mask(p) (p->flags & WS_HAS_MASK) 95 | #define websocket_parser_has_final(p) (p->flags & WS_FIN) 96 | 97 | #ifdef __cplusplus 98 | } 99 | #endif 100 | #endif //WEBSOCKET_PARSER_H 101 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | WebSocket frame parser and builder 2 | ---------------------------------- 3 | 4 | This is a parser and builder for WebSocket messages (see [RFC6455](https://tools.ietf.org/html/rfc6455)) written in C. 5 | 6 | Table of Contents 7 | ----------------- 8 | 9 | * [Features](#features) 10 | * [Status](#status) 11 | * [Usage](#usage) 12 | * [Frame builder](#frame-builder) 13 | * [UUID](#uuid) 14 | * [Frame example](#frame-example) 15 | 16 | Features 17 | -------- 18 | 19 | * Fast parsing and building of websocket messages 20 | * No dependencies 21 | * No internal buffering 22 | * No need to buffer the whole frame — works with chunks of a data 23 | * No syscalls 24 | * No allocations 25 | * It can be interrupted at anytime 26 | 27 | Tested as part of [PHP-ION](https://github.com/php-ion/php-ion) extension. 28 | 29 | Inspired by [http-parser](https://github.com/joyent/http-parser) by [Ryan Dahl](https://github.com/ry) 30 | and [multipart-parser](https://github.com/iafonov/multipart-parser-c) by [Igor Afonov](https://github.com/iafonov). 31 | 32 | Status 33 | ------ 34 | 35 | Production ready. 36 | 37 | Usage 38 | ----- 39 | 40 | Use [http-parser](https://github.com/joyent/http-parser) for parsing headers. This library parse only websocket frames. 41 | 42 | This parser library works with several callbacks, which the user may set up at application initialization time. 43 | 44 | ```c 45 | websocket_parser_settings settings; 46 | 47 | websocket_parser_settings_init(&settings); 48 | 49 | settings.on_frame_header = websocket_frame_header; 50 | settings.on_frame_body = websocket_frame_body; 51 | settings.on_frame_end = websocket_frame_end; 52 | ``` 53 | 54 | These functions must match the signatures defined in the websocket-parser header file. 55 | 56 | Returning a value other than 0 from the callbacks will abort message processing. 57 | 58 | One websocket_parser object is used per TCP connection. Initialize `websocket_parser` struct using `websocket_parser_init()` and set callbacks: 59 | 60 | ```c 61 | websocket_parser_settings settings; 62 | 63 | websocket_parser_settings_init(&settings); 64 | 65 | settings.on_frame_header = websocket_frame_header; 66 | settings.on_frame_body = websocket_frame_body; 67 | settings.on_frame_end = websocket_frame_end; 68 | 69 | parser = malloc(sizeof(websocket_parser)); 70 | websocket_parser_init(parser); 71 | // Attention! Sets your after websocket_parser_init 72 | parser->data = my_frame_struct; 73 | ``` 74 | 75 | Basically, callback looks like that: 76 | 77 | ```c 78 | int websocket_frame_header(websocket_parser * parser) { 79 | parser->data->opcode = parser->flags & WS_OP_MASK; // gets opcode 80 | parser->data->is_final = parser->flags & WS_FIN; // checks is final frame 81 | if(parser->length) { 82 | parser->data->body = malloc(parser->length); // allocate memory for frame body, if body exists 83 | } 84 | return 0; 85 | } 86 | 87 | int ion_websocket_frame_body(websocket_parser * parser, const char *at, size_t size) { 88 | if(parser->flags & WS_HAS_MASK) { 89 | // if frame has mask, we have to copy and decode data via websocket_parser_copy_masked function 90 | websocket_parser_decode(&parser->data->body[parser->offset], at, length, parser); 91 | } else { 92 | memcpy(&parser->data->body[parser->offset], at, length); 93 | } 94 | return 0; 95 | } 96 | 97 | int websocket_frame_end(websocket_parser * parser) { 98 | my_app_push_frame(parser->data); // use parsed frame 99 | } 100 | ``` 101 | 102 | When data is received execute the parser and check for errors. 103 | 104 | ```c 105 | size_t nread; 106 | // .. init settitngs and parser ... 107 | 108 | nread = websocket_parser_execute(parser, &settings, data, data_len); 109 | if(nread != data_len) { 110 | // some callback return a value other than 0 111 | } 112 | 113 | // ... 114 | free(parser); 115 | ``` 116 | 117 | Frame builder 118 | ------------- 119 | 120 | To calculate how many bytes to allocate for a frame, use the `websocket_calc_frame_size` function: 121 | 122 | ```c 123 | size_t frame_len = websocket_calc_frame_size(WS_OP_TEXT | WS_FINAL_FRAME | WS_HAS_MASK, data_len); 124 | char * frame = malloc(sizeof(char) * frame_len); 125 | ``` 126 | 127 | After that you can build a frame 128 | 129 | ```c 130 | websocket_build_frame(frame, WS_OP_TEXT | WS_FINAL_FRAME | WS_HAS_MASK, mask, data, data_len); 131 | ``` 132 | 133 | and send binary string to the socket 134 | 135 | ```c 136 | write(sock, frame, frame_len); 137 | ``` 138 | 139 | UUID 140 | ---- 141 | 142 | Macros WEBSOCKET_UUID contains unique ID for handshake 143 | 144 | ```c 145 | #define WEBSOCKET_UUID "258EAFA5-E914-47DA-95CA-C5AB0DC85B11" 146 | ``` 147 | 148 | Frame example 149 | ------------- 150 | 151 | There is binary websocket frame example: 152 | 153 | * Raw frame: `\x81\x8Amask\x0B\x13\x12\x06\x08\x41\x17\x0A\x19\x00` 154 | * Has mask: yes 155 | * Mask: `mask` 156 | * Payload: `frame data` 157 | * Fin: yes 158 | * Opcode: `WS_OP_TEXT` 159 | -------------------------------------------------------------------------------- /websocket_parser.c: -------------------------------------------------------------------------------- 1 | #include "websocket_parser.h" 2 | #include 3 | #include 4 | 5 | #ifdef assert 6 | # define assertFalse(msg) assert(0 && msg) 7 | #else 8 | # define assertFalse(msg) 9 | #endif 10 | 11 | #define SET_STATE(V) parser->state = V 12 | #define HAS_DATA() (p < end ) 13 | #define CC (*p) 14 | #define GET_NPARSED() ( (p == end) ? len : (p - data) ) 15 | 16 | #define NOTIFY_CB(FOR) \ 17 | do { \ 18 | if (settings->on_##FOR) { \ 19 | if (settings->on_##FOR(parser) != 0) { \ 20 | return GET_NPARSED(); \ 21 | } \ 22 | } \ 23 | } while (0) 24 | 25 | #define EMIT_DATA_CB(FOR, ptr, len) \ 26 | do { \ 27 | if (settings->on_##FOR) { \ 28 | if (settings->on_##FOR(parser, ptr, len) != 0) { \ 29 | return GET_NPARSED(); \ 30 | } \ 31 | } \ 32 | } while (0) 33 | 34 | enum state { 35 | s_start, 36 | s_head, 37 | s_length, 38 | s_mask, 39 | s_body, 40 | }; 41 | 42 | void websocket_parser_init(websocket_parser * parser) { 43 | void *data = parser->data; /* preserve application data */ 44 | memset(parser, 0, sizeof(*parser)); 45 | parser->data = data; 46 | parser->state = s_start; 47 | } 48 | 49 | void websocket_parser_settings_init(websocket_parser_settings *settings) { 50 | memset(settings, 0, sizeof(*settings)); 51 | } 52 | 53 | size_t websocket_parser_execute(websocket_parser *parser, const websocket_parser_settings *settings, const char *data, size_t len) { 54 | const char * p; 55 | const char * end = data + len; 56 | size_t frame_offset = 0; 57 | 58 | for(p = data; p != end; p++) { 59 | switch(parser->state) { 60 | case s_start: 61 | parser->offset = 0; 62 | parser->length = 0; 63 | parser->mask_offset = 0; 64 | parser->flags = (websocket_flags) (CC & WS_OP_MASK); 65 | if(CC & (1<<7)) { 66 | parser->flags |= WS_FIN; 67 | } 68 | SET_STATE(s_head); 69 | 70 | frame_offset++; 71 | break; 72 | case s_head: 73 | parser->length = (size_t)CC & 0x7F; 74 | if(CC & 0x80) { 75 | parser->flags |= WS_HAS_MASK; 76 | } 77 | if(parser->length >= 126) { 78 | if(parser->length == 127) { 79 | parser->require = 8; 80 | } else { 81 | parser->require = 2; 82 | } 83 | parser->length = 0; 84 | SET_STATE(s_length); 85 | } else if (parser->flags & WS_HAS_MASK) { 86 | SET_STATE(s_mask); 87 | parser->require = 4; 88 | } else if (parser->length) { 89 | SET_STATE(s_body); 90 | parser->require = parser->length; 91 | NOTIFY_CB(frame_header); 92 | } else { 93 | SET_STATE(s_start); 94 | NOTIFY_CB(frame_header); 95 | NOTIFY_CB(frame_end); 96 | } 97 | 98 | frame_offset++; 99 | break; 100 | case s_length: 101 | while(HAS_DATA() && parser->require) { 102 | parser->length <<= 8; 103 | parser->length |= (unsigned char)CC; 104 | parser->require--; 105 | frame_offset++; 106 | p++; 107 | } 108 | p--; 109 | if(!parser->require) { 110 | if (parser->flags & WS_HAS_MASK) { 111 | SET_STATE(s_mask); 112 | parser->require = 4; 113 | } else if (parser->length) { 114 | SET_STATE(s_body); 115 | parser->require = parser->length; 116 | NOTIFY_CB(frame_header); 117 | } else { 118 | SET_STATE(s_start); 119 | NOTIFY_CB(frame_header); 120 | NOTIFY_CB(frame_end); 121 | } 122 | } 123 | break; 124 | case s_mask: 125 | while(HAS_DATA() && parser->require) { 126 | parser->mask[4 - parser->require--] = CC; 127 | frame_offset++; 128 | p++; 129 | } 130 | p--; 131 | if(!parser->require) { 132 | if(parser->length) { 133 | SET_STATE(s_body); 134 | parser->require = parser->length; 135 | NOTIFY_CB(frame_header); 136 | } else { 137 | SET_STATE(s_start); 138 | NOTIFY_CB(frame_header); 139 | NOTIFY_CB(frame_end); 140 | } 141 | } 142 | break; 143 | case s_body: 144 | if(parser->require) { 145 | if(p + parser->require <= end) { 146 | EMIT_DATA_CB(frame_body, p, parser->require); 147 | p += parser->require; 148 | parser->require = 0; 149 | frame_offset = p - data; 150 | } else { 151 | EMIT_DATA_CB(frame_body, p, end - p); 152 | parser->require -= end - p; 153 | p = end; 154 | parser->offset += p - data - frame_offset; 155 | frame_offset = 0; 156 | } 157 | 158 | p--; 159 | } 160 | if(!parser->require) { 161 | NOTIFY_CB(frame_end); 162 | SET_STATE(s_start); 163 | } 164 | break; 165 | default: 166 | assertFalse("Unreachable case"); 167 | } 168 | } 169 | 170 | return GET_NPARSED(); 171 | } 172 | 173 | void websocket_parser_decode(char * dst, const char * src, size_t len, websocket_parser * parser) { 174 | size_t i = 0; 175 | for(; i < len; i++) { 176 | dst[i] = src[i] ^ parser->mask[(i + parser->mask_offset) % 4]; 177 | } 178 | 179 | parser->mask_offset = (uint8_t) ((i + parser->mask_offset) % 4); 180 | } 181 | 182 | uint8_t websocket_decode(char * dst, const char * src, size_t len, const char mask[4], uint8_t mask_offset) { 183 | size_t i = 0; 184 | for(; i < len; i++) { 185 | dst[i] = src[i] ^ mask[(i + mask_offset) % 4]; 186 | } 187 | 188 | return (uint8_t) ((i + mask_offset) % 4); 189 | } 190 | 191 | size_t websocket_calc_frame_size(websocket_flags flags, size_t data_len) { 192 | size_t size = data_len + 2; // body + 2 bytes of head 193 | if(data_len >= 126) { 194 | if(data_len > 0xFFFF) { 195 | size += 8; 196 | } else { 197 | size += 2; 198 | } 199 | } 200 | if(flags & WS_HAS_MASK) { 201 | size += 4; 202 | } 203 | 204 | return size; 205 | } 206 | 207 | size_t websocket_build_frame(char * frame, websocket_flags flags, const char mask[4], const char * data, size_t data_len) { 208 | size_t body_offset = 0; 209 | frame[0] = 0; 210 | frame[1] = 0; 211 | if(flags & WS_FIN) { 212 | frame[0] = (char) (1 << 7); 213 | } 214 | frame[0] |= flags & WS_OP_MASK; 215 | if(flags & WS_HAS_MASK) { 216 | frame[1] = (char) (1 << 7); 217 | } 218 | if(data_len < 126) { 219 | frame[1] |= data_len; 220 | body_offset = 2; 221 | } else if(data_len <= 0xFFFF) { 222 | frame[1] |= 126; 223 | frame[2] = (char) (data_len >> 8); 224 | frame[3] = (char) (data_len & 0xFF); 225 | body_offset = 4; 226 | } else { 227 | frame[1] |= 127; 228 | frame[2] = (char) ((data_len >> 56) & 0xFF); 229 | frame[3] = (char) ((data_len >> 48) & 0xFF); 230 | frame[4] = (char) ((data_len >> 40) & 0xFF); 231 | frame[5] = (char) ((data_len >> 32) & 0xFF); 232 | frame[6] = (char) ((data_len >> 24) & 0xFF); 233 | frame[7] = (char) ((data_len >> 16) & 0xFF); 234 | frame[8] = (char) ((data_len >> 8) & 0xFF); 235 | frame[9] = (char) ((data_len) & 0xFF); 236 | body_offset = 10; 237 | } 238 | if(flags & WS_HAS_MASK) { 239 | if(mask != NULL) { 240 | memcpy(&frame[body_offset], mask, 4); 241 | } 242 | websocket_decode(&frame[body_offset + 4], data, data_len, &frame[body_offset], 0); 243 | body_offset += 4; 244 | } else { 245 | memcpy(&frame[body_offset], data, data_len); 246 | } 247 | 248 | return body_offset + data_len; 249 | } --------------------------------------------------------------------------------