├── tests ├── urls ├── test_proxy.sh └── test_khash.c ├── .travis.yml ├── Makefile ├── src ├── mpool.h ├── picohttpparser.h ├── mpool.c ├── khash.h ├── picohttpparser.c └── webproxy.c └── README.md /tests/urls: -------------------------------------------------------------------------------- 1 | http://news.baidu.com 2 | http://www.ict.ac.cn 3 | http://www.newsmth.net 4 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | sudo: false 2 | 3 | os: linux 4 | language: c 5 | compiler: gcc 6 | 7 | script: make 8 | 9 | notifications: 10 | email: 11 | on_success: never 12 | on_failure: change 13 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | CC=gcc 2 | CFLAGS=-Os -W -Wall -Wpointer-arith -Wno-unused-parameter -Werror 3 | SRC=src 4 | DEPS=$(wildcard $(SRC)/*.h) 5 | BINDIR=objs 6 | OBJDIR=$(BINDIR)/$(SRC) 7 | TARGET=$(BINDIR)/webproxy 8 | OBJS=$(patsubst %.c,$(BINDIR)/%.o,$(wildcard $(SRC)/*.c)) 9 | VPATH=$(SRC) 10 | 11 | all: $(OBJDIR) $(TARGET) 12 | 13 | debug: CFLAGS+= -O0 -g 14 | debug: $(OBJDIR) $(TARGET) 15 | 16 | $(TARGET): $(OBJS) 17 | gcc -o $@ $^ $(CFLAGS) 18 | 19 | $(OBJDIR)/%.o: %.c $(DEPS) 20 | $(CC) -c -o $@ $< $(CFLAGS) 21 | 22 | $(OBJDIR): 23 | mkdir -p $(OBJDIR) 24 | 25 | clean: 26 | rm -rf $(BINDIR) 27 | -------------------------------------------------------------------------------- /tests/test_proxy.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | http_load_dir=http_load-09Mar2016 4 | package=$http_load_dir.tar.gz 5 | 6 | _install() 7 | { 8 | wget http://www.acme.com/software/http_load/$package 9 | tar -xvf $package 10 | cd $http_load_dir 11 | make 12 | cd .. 13 | } 14 | 15 | _test() 16 | { 17 | $http_load_dir/http_load -proxy 127.0.0.1:3128 -rate 5 -seconds 10 -parallel 5 ./urls 18 | } 19 | 20 | _main() 21 | { 22 | if [ "$1" = "install" ]; then 23 | _install 24 | elif [ "$1" = "test" ]; then 25 | _test 26 | else 27 | _help $0 28 | exit 1 29 | fi 30 | exit 0 31 | } 32 | 33 | _main $1 34 | -------------------------------------------------------------------------------- /tests/test_khash.c: -------------------------------------------------------------------------------- 1 | #include "src/khash.h" 2 | KHASH_MAP_INIT_INT(m32, char) // instantiate structs and methods 3 | int main() { 4 | int i; 5 | int absent, is_missing; 6 | khint_t k; 7 | khash_t(m32) *h = kh_init(m32); // allocate a hash table 8 | 9 | for(i=1;i<10000; i++){ 10 | k = kh_put(m32, h, i, &absent); // insert a key to the hash table 11 | if (!absent){ 12 | printf("absent= %u\n", absent); 13 | kh_del(m32, h, k); 14 | } 15 | kh_value(h, k) = 10; // set the value 16 | } 17 | k = kh_get(m32, h, 10); // query the hash table 18 | is_missing = (k == kh_end(h)); // test if the key is present 19 | k = kh_get(m32, h, 5); 20 | kh_del(m32, h, k); // remove a key-value pair 21 | for (k = kh_begin(h); k != kh_end(h); ++k) // traverse 22 | if (kh_exist(h, k)) // test if a bucket contains data 23 | kh_value(h, k) = 1; 24 | kh_destroy(m32, h); // deallocate the hash table 25 | return 0; 26 | } 27 | -------------------------------------------------------------------------------- /src/mpool.h: -------------------------------------------------------------------------------- 1 | #ifndef MPOOL_H 2 | #define MPOOL_H 3 | 4 | /* Turn on debugging traces */ 5 | #ifndef MPOOL_DEBUG 6 | #define MPOOL_DEBUG 0 7 | #endif 8 | 9 | /* Allow overriding malloc functions. */ 10 | #ifndef MPOOL_MALLOC 11 | #define MPOOL_MALLOC(sz) malloc(sz) 12 | #define MPOOL_REALLOC(p, sz) realloc(p, sz) 13 | #define MPOOL_FREE(p, sz) free(p) 14 | #endif 15 | 16 | typedef struct { 17 | int ct; /* actual pool count */ 18 | int pal; /* pool array length (2^x ceil of ct) */ 19 | int min_pool; /* minimum pool size */ 20 | int max_pool; /* maximum pool size */ 21 | int pg_sz; /* page size, typically 4096 */ 22 | void **ps; /* pools */ 23 | int *sizes; /* chunk size for each pool */ 24 | void *hs[1]; /* heads for pools' free lists */ 25 | } mpool; 26 | 27 | /* Initialize a memory pool for allocations between 2^min2 and 2^max2, 28 | * inclusive. (Larger allocations will be directly allocated and freed 29 | * via mmap / munmap.) */ 30 | mpool *mpool_init(int min2, int max2); 31 | 32 | /* Allocate SZ bytes. */ 33 | void *mpool_alloc(mpool *mp, int sz); 34 | 35 | /* mmap a new memory pool of TOTAL_SZ bytes, then build an internal 36 | * freelist of SZ-byte cells, with the head at (result)[0]. */ 37 | void **mpool_new_pool(unsigned int sz, unsigned int total_sz); 38 | 39 | /* Return pointer P (SZ bytes in size) to the appropriate pool. */ 40 | void mpool_repool(mpool *mp, void *p, int sz); 41 | 42 | /* Resize P from OLD_SZ to NEW_SZ, copying content. */ 43 | void *mpool_realloc(mpool *mp, void *p, int old_sz, int new_sz); 44 | 45 | /* Free the pool. */ 46 | void mpool_free(mpool *mp); 47 | 48 | #endif 49 | -------------------------------------------------------------------------------- /src/picohttpparser.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2009-2014 Kazuho Oku, Tokuhiro Matsuno, Daisuke Murase, 3 | * Shigeo Mitsunari 4 | * 5 | * The software is licensed under either the MIT License (below) or the Perl 6 | * license. 7 | * 8 | * Permission is hereby granted, free of charge, to any person obtaining a copy 9 | * of this software and associated documentation files (the "Software"), to 10 | * deal in the Software without restriction, including without limitation the 11 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 12 | * sell copies of the Software, and to permit persons to whom the Software is 13 | * furnished to do so, subject to the following conditions: 14 | * 15 | * The above copyright notice and this permission notice shall be included in 16 | * all copies or substantial portions of the Software. 17 | * 18 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 23 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 24 | * IN THE SOFTWARE. 25 | */ 26 | 27 | #ifndef picohttpparser_h 28 | #define picohttpparser_h 29 | 30 | #include 31 | 32 | #ifdef _MSC_VER 33 | #define ssize_t intptr_t 34 | #endif 35 | 36 | /* $Id$ */ 37 | 38 | #ifdef __cplusplus 39 | extern "C" { 40 | #endif 41 | 42 | /* contains name and value of a header (name == NULL if is a continuing line 43 | * of a multiline header */ 44 | struct phr_header { 45 | const char *name; 46 | size_t name_len; 47 | const char *value; 48 | size_t value_len; 49 | }; 50 | 51 | /* returns number of bytes consumed if successful, -2 if request is partial, 52 | * -1 if failed */ 53 | int phr_parse_request(const char *buf, size_t len, const char **method, size_t *method_len, const char **path, size_t *path_len, 54 | int *minor_version, struct phr_header *headers, size_t *num_headers, size_t last_len); 55 | 56 | /* ditto */ 57 | int phr_parse_response(const char *_buf, size_t len, int *minor_version, int *status, const char **msg, size_t *msg_len, 58 | struct phr_header *headers, size_t *num_headers, size_t last_len); 59 | 60 | /* ditto */ 61 | int phr_parse_headers(const char *buf, size_t len, struct phr_header *headers, size_t *num_headers, size_t last_len); 62 | 63 | /* should be zero-filled before start */ 64 | struct phr_chunked_decoder { 65 | size_t bytes_left_in_chunk; /* number of bytes left in current chunk */ 66 | char consume_trailer; /* if trailing headers should be consumed */ 67 | char _hex_count; 68 | char _state; 69 | }; 70 | 71 | /* the function rewrites the buffer given as (buf, bufsz) removing the chunked- 72 | * encoding headers. When the function returns without an error, bufsz is 73 | * updated to the length of the decoded data available. Applications should 74 | * repeatedly call the function while it returns -2 (incomplete) every time 75 | * supplying newly arrived data. If the end of the chunked-encoded data is 76 | * found, the function returns a non-negative number indicating the number of 77 | * octets left undecoded at the tail of the supplied buffer. Returns -1 on 78 | * error. 79 | */ 80 | ssize_t phr_decode_chunked(struct phr_chunked_decoder *decoder, char *buf, size_t *bufsz); 81 | 82 | /* returns if the chunked decoder is in middle of chunked data */ 83 | int phr_decode_chunked_is_in_data(struct phr_chunked_decoder *decoder); 84 | 85 | #ifdef __cplusplus 86 | } 87 | #endif 88 | 89 | #endif 90 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # webproxy [![Build Status](https://travis-ci.org/onestraw/simple-web-proxy.svg?branch=master)](https://travis-ci.org/onestraw/simple-web-proxy) 2 | 3 | A simple http proxy at the clent side. 4 | 5 | 6 | # workflow of web proxy 7 | 8 | +--------+ 9 | | | 10 | | DNS | 11 | | SERVER | 12 | | | 13 | +--------+ 14 | ^ 15 | | 16 | (3) resolve the host name 17 | | 18 | v 19 | +--------+ +-------+ +--------+ 20 | | | -- (1)TCP connection ----> | | | | 21 | | | -- (2)http request ------> | | --- (4)TCP connection ---> | | 22 | | | | | --- (5)http request -----> | | 23 | | CLIENT | | PROXY | | SERVER | 24 | | | <- (7)http response ------ | | <-- (6)http response ----- | | 25 | | | ... | | ... | | 26 | | | | | | | 27 | | | -- (n)close connection --> | | - (n+1)close connection -> | | 28 | +--------+ +-------+ +--------+ 29 | 30 | 31 | # key point 32 | 33 | * proxy plays two roles: server and client 34 | * we need to know how client interacts with server and every possible exceptional state 35 | 36 | * client send TCP SYN to server, pending in SYN queue, waiting for server accept. 37 | - from the view of programmer, epoll 38 | - connect() return 0 39 | - connect() return -1 && errno == EINPROGRESS 40 | - if the async connect() finish, the sockfd will become writable (POLLOUT is set), check the error with getsockopt(listenfd, SOL_SOCKET, SO_ERROR,...) 41 | 42 | - **errno in this phase:** 43 | - ECONNREFUSED: remote port is not open, or server connection reaches maximum [listen(fd, backlog)] 44 | - ETIMEOUT: connection timed out 45 | - EINPROGRESS: non-blocking connect request is pended to SYN queue 46 | 47 | 48 | * the connection is established after server received ACK 49 | - accept(listenfd, ) return success 50 | - listenfd become readable (POLLIN is set) 51 | 52 | - **errno in this phase:** 53 | - EAGAIN or EWOULDBLOCK (when fd is set to nonblocking): no pending connection 54 | - EBADF: listenfd is not an open file descriptor 55 | 56 | 57 | * client send/write data 58 | * client recv/read data 59 | - remove the session if the peer close the connection 60 | 61 | - **errno in this phase:** 62 | - EAGAIN or EWOULDBLOCK: The file descriptor fd refers to a socket and has been marked nonblocking (O_NONBLOCK), and the write/read would block. 63 | - EBADF: sockfd is not a valid open file descriptor. 64 | - ECONNRESET: Connection reset by peer. 65 | - EPIPE: The local end has been shut down on a connection oriented socket. 66 | 67 | - **todo:** check http Content-Length or Transfer-Encoding 68 | 69 | 70 | * client close the connection 71 | * server close the connection 72 | - call close() after read/write() finish 73 | - read() should return 0 on receipt of a FIN from the peer 74 | - when write() returns EPIPE, it also raises the SIGPIPE signal - you never see the EPIPE error unless you handle or ignore the signal 75 | - [how to tell if peer close the connection](https://goo.gl/Mi9sgD) 76 | 77 | 78 | # close vs shutdown 79 | 80 | close() will prevent any more reads and writes to the socket and free it. 81 | 82 | int shutdown(int sockfd, int how); 83 | 84 | The shutdown() call causes all or part of a full-duplex connection on the socket associated with sockfd to be shut down. If how is SHUT_RD, further receptions will be disallowed. If how is SHUT_WR, further transmissions will be disallowed. If how is SHUT_RDWR, further receptions and transmissions will be disallowed. 85 | But shutdown() doesn't free a the socket descriptor. 86 | 87 | 88 | # read/write vs recv/send 89 | 90 | read() is equivalent to recv() with a flags parameter of 0. Other values for the flags parameter change the behaviour of recv(). Similarly, write() is equivalent to send() with flags == 0. 91 | 92 | MSG_DONTWAIT (since Linux 2.2) 93 | Enables nonblocking operation; if the operation would block, 94 | the call fails with the error EAGAIN or EWOULDBLOCK. 95 | [reference](http://man7.org/linux/man-pages/man2/recv.2.html) 96 | 97 | 98 | # dependencies 99 | - http parser: https://github.com/h2o/picohttpparser 100 | - hash table : https://github.com/attractivechaos/klib 101 | - memory pool: https://github.com/silentbicycle/mpool 102 | 103 | # reference 104 | - http://www.cs.princeton.edu/courses/archive/spr08/cos461/web_proxy.html 105 | - https://www.w3.org/Protocols/ 106 | - [Beej's Guide to Network Programming](http://beej.us/guide/bgnet/output/html/singlepage/bgnet.html) 107 | - [ssl proxy](https://github.com/libevent/libevent/blob/master/sample/le-proxy.c) 108 | - [how to use epoll](http://man7.org/linux/man-pages/man7/epoll.7.html) 109 | -------------------------------------------------------------------------------- /src/mpool.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2011 Scott Vokes 3 | * 4 | * Permission to use, copy, modify, and/or distribute this software for any 5 | * purpose with or without fee is hereby granted, provided that the above 6 | * copyright notice and this permission notice appear in all copies. 7 | * 8 | * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 9 | * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 10 | * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 11 | * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 12 | * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 13 | * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 14 | * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 15 | */ 16 | 17 | /* 18 | * A memory pool allocator, designed for systems that need to 19 | * allocate/free pointers in amortized O(1) time. Memory is allocated a 20 | * page at a time, then added to a set of pools of equally sized 21 | * regions. A free list for each size is maintained in the unused 22 | * regions. When a pointer is repooled, it is linked back into the 23 | * pool with the given size's free list. 24 | * 25 | * Note that repooling with the wrong size leads to subtle/ugly memory 26 | * clobbering bugs. Turning on memory use logging via MPOOL_DEBUG 27 | * can help pin down the location of most such errors. 28 | * 29 | * Allocations larger than the page size are allocated whole via 30 | * mmap, and those larger than mp->max_pool (configurable) are 31 | * freed immediately via munmap; no free list is used. 32 | */ 33 | 34 | #include 35 | #include 36 | #include 37 | #include 38 | #include 39 | #include 40 | #include 41 | 42 | #include "mpool.h" 43 | 44 | #define DBG MPOOL_DEBUG 45 | 46 | static void *get_mmap(long sz) { 47 | void *p = mmap(0, sz, PROT_READ|PROT_WRITE, 48 | MAP_PRIVATE|MAP_ANON, -1, 0); 49 | if (p == MAP_FAILED) return NULL; 50 | return p; 51 | } 52 | 53 | /* Optimized base-2 integer ceiling, from _Hacker's Delight_ 54 | * by Henry S. Warren, pg. 48. Called 'clp2' there. */ 55 | static unsigned int iceil2(unsigned int x) { 56 | x = x - 1; 57 | x = x | (x >> 1); 58 | x = x | (x >> 2); 59 | x = x | (x >> 4); 60 | x = x | (x >> 8); 61 | x = x | (x >> 16); 62 | return x + 1; 63 | } 64 | 65 | /* mmap a new memory pool of TOTAL_SZ bytes, then build an internal 66 | * freelist of SZ-byte cells, with the head at (result)[0]. 67 | * Returns NULL on error. */ 68 | void **mpool_new_pool(unsigned int sz, unsigned int total_sz) { 69 | void *p = get_mmap(sz > total_sz ? sz : total_sz); 70 | int i, o=0, lim; /* o=offset */ 71 | int **pool; 72 | void *last = NULL; 73 | if (p == NULL) return NULL; 74 | pool = (int **)p;; 75 | assert(pool); 76 | assert(sz > sizeof(void *)); 77 | 78 | lim = (total_sz/sz); 79 | if (DBG) fprintf(stderr, 80 | "mpool_new_pool sz: %d lim: %d => %d %p\n", 81 | sz, lim, lim * sz, p); 82 | for (i=0; i 1) fprintf(stderr, "%d (%d / 0x%04x) -> %p = %p\n", 88 | i, o, o, &pool[o], pool[o]); 89 | } 90 | pool[o] = NULL; 91 | return p; 92 | } 93 | 94 | /* Add a new pool, resizing the pool array if necessary. */ 95 | static int add_pool(mpool *mp, void *p, int sz) { 96 | void **nps, *nsizes; /* new pools, new sizes */ 97 | assert(p); 98 | assert(sz > 0); 99 | if (DBG) fprintf(stderr, "mpool_add_pool (%d / %d) @ %p, sz %d\n", 100 | mp->ct, mp->pal, p, sz); 101 | if (mp->ct == mp->pal) { 102 | mp->pal *= 2; /* ram will exhaust before overflow... */ 103 | nps = MPOOL_REALLOC(mp->ps, mp->pal * sizeof(void **)); 104 | nsizes = MPOOL_REALLOC(mp->sizes, mp->pal * sizeof(int *)); 105 | if (nps == NULL || nsizes == NULL) return -1; 106 | mp->sizes = nsizes; 107 | mp->ps = nps; 108 | } 109 | 110 | mp->ps[mp->ct] = p; 111 | mp->sizes[mp->ct] = sz; 112 | mp->ct++; 113 | return 0; 114 | } 115 | 116 | /* Initialize a memory pool set, with pools in sizes 117 | * 2^min2 to 2^max2. Returns NULL on error. */ 118 | mpool *mpool_init(int min2, int max2) { 119 | int palen; /* length of pool array */ 120 | int ct = ct = max2 - min2 + 1; /* pool array count */ 121 | long pgsz = sysconf(_SC_PAGESIZE); 122 | mpool *mp; 123 | void *pools; 124 | int *sizes; 125 | 126 | palen = iceil2(ct); 127 | if (DBG) fprintf(stderr, "mpool_init for cells %d - %d bytes\n", 128 | 1 << min2, 1 << max2); 129 | 130 | assert(ct > 0); 131 | mp = MPOOL_MALLOC(sizeof(mpool) + (ct-1)*sizeof(void *)); 132 | pools = MPOOL_MALLOC(palen*sizeof(void **)); 133 | sizes = MPOOL_MALLOC(palen*sizeof(int)); 134 | if (mp == NULL || pools == NULL || sizes == NULL) return NULL; 135 | mp->ct = ct; 136 | mp->ps = pools; 137 | mp->pal = palen; 138 | mp->pg_sz = pgsz; 139 | mp->sizes = sizes; 140 | mp->min_pool = 1 << min2; 141 | mp->max_pool = 1 << max2; 142 | bzero(sizes, palen * sizeof(int)); 143 | bzero(pools, palen * sizeof(void *)); 144 | bzero(mp->hs, ct * sizeof(void *)); 145 | 146 | return mp; 147 | } 148 | 149 | /* Free a memory pool set. */ 150 | void mpool_free(mpool *mp) { 151 | long i, sz, pgsz = mp->pg_sz; 152 | assert(mp); 153 | if (DBG) fprintf(stderr, "%d/%d pools, freeing...\n", mp->ct, mp->pal); 154 | for (i=0; ict; i++) { 155 | void *p = mp->ps[i]; 156 | if (p) { 157 | sz = mp->sizes[i]; 158 | assert(sz > 0); 159 | sz = sz >= pgsz ? sz : pgsz; 160 | if (DBG) fprintf(stderr, "mpool_free %ld, sz %ld (%p)\n", i, sz, mp->ps[i]); 161 | if (munmap(mp->ps[i], sz) == -1) { 162 | fprintf(stderr, "munmap error while unmapping %lu bytes at %p\n", 163 | sz, mp->ps[i]); 164 | } 165 | } 166 | } 167 | MPOOL_FREE(mp->ps, mp->ct * sizeof(*ps)); 168 | MPOOL_FREE(mp, sizeof(*mp)); 169 | } 170 | 171 | /* Allocate memory out of the relevant memory pool. 172 | * If larger than max_pool, just mmap it. If pool is full, mmap a new one and 173 | * link it to the end of the current one. Returns NULL on error. */ 174 | void *mpool_alloc(mpool *mp, int sz) { 175 | void **cur, **np; /* new pool */ 176 | int i, p, szceil = 0; 177 | assert(mp); 178 | if (sz >= mp->max_pool) { 179 | cur = get_mmap(sz); /* just mmap it */ 180 | if (cur == NULL) return NULL; 181 | if (DBG) fprintf(stderr, 182 | "mpool_alloc mmap %d bytes @ %p\n", sz, cur); 183 | return cur; 184 | } 185 | 186 | for (i=0, p=mp->min_pool; ; i++, p*=2) { 187 | if (p > sz) { szceil = p; break; } 188 | } 189 | assert(szceil > 0); 190 | cur = mp->hs[i]; /* get current head */ 191 | if (cur == NULL) { /* lazily allocate & init pool */ 192 | void **pool = mpool_new_pool(szceil, mp->pg_sz); 193 | if (pool == NULL) return NULL; 194 | mp->ps[i] = pool; 195 | mp->hs[i] = &pool[0]; 196 | mp->sizes[i] = szceil; 197 | cur = mp->hs[i]; 198 | } 199 | assert(cur); 200 | 201 | if (*cur == NULL) { /* if at end, attach to a new page */ 202 | if (DBG) fprintf(stderr, 203 | "mpool_alloc adding pool w/ cell size %d\n", szceil); 204 | np = mpool_new_pool(szceil, mp->pg_sz); 205 | if (np == NULL) return NULL; 206 | *cur = &np[0]; 207 | assert(*cur); 208 | if (add_pool(mp, np, szceil) < 0) return NULL; 209 | } 210 | 211 | assert(*cur > (void *)4096); 212 | if (DBG) fprintf(stderr, 213 | "mpool_alloc pool %d bytes @ %p (list %d, szceil %d )\n", 214 | sz, (void*) cur, i, szceil); 215 | 216 | mp->hs[i] = *cur; /* set head to next head */ 217 | return cur; 218 | } 219 | 220 | /* Push an individual pointer P back on the freelist for 221 | * the pool with size SZ cells. 222 | * if SZ is > the max pool size, just munmap it. */ 223 | void mpool_repool(mpool *mp, void *p, int sz) { 224 | int i=0, szceil, max_pool = mp->max_pool; 225 | void **ip; 226 | 227 | if (sz > max_pool) { 228 | if (DBG) fprintf(stderr, "mpool_repool munmap sz %d @ %p\n", sz, p); 229 | if (munmap(p, sz) == -1) { 230 | fprintf(stderr, "munmap error while unmapping %d bytes at %p\n", 231 | sz, p); 232 | } 233 | return; 234 | } 235 | 236 | szceil = iceil2(sz); 237 | szceil = szceil > mp->min_pool ? szceil : mp->min_pool; 238 | 239 | ip = (void **)p; 240 | *ip = mp->hs[i]; 241 | assert(ip); 242 | mp->hs[i] = ip; 243 | if (DBG) fprintf(stderr, 244 | "mpool_repool list %d, %d bytes (ceil %d): %p\n", 245 | i, sz, szceil, ip); 246 | } 247 | 248 | /* Reallocate data, growing or shrinking and copying the contents. 249 | * Returns NULL on reallocation error. */ 250 | void *mpool_realloc(mpool *mp, void *p, int old_sz, int new_sz) { 251 | void *r = mpool_alloc(mp, new_sz); 252 | if (r == NULL) return NULL; 253 | memcpy(r, p, old_sz); 254 | mpool_repool(mp, p, old_sz); 255 | return r; 256 | } 257 | -------------------------------------------------------------------------------- /src/khash.h: -------------------------------------------------------------------------------- 1 | /* The MIT License 2 | 3 | Copyright (c) 2008, 2009, 2011 by Attractive Chaos 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining 6 | a copy of this software and associated documentation files (the 7 | "Software"), to deal in the Software without restriction, including 8 | without limitation the rights to use, copy, modify, merge, publish, 9 | distribute, sublicense, and/or sell copies of the Software, and to 10 | permit persons to whom the Software is furnished to do so, subject to 11 | the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be 14 | included in all copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 17 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 19 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 20 | BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 21 | ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 22 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | SOFTWARE. 24 | */ 25 | 26 | #ifndef __AC_KHASH_H 27 | #define __AC_KHASH_H 28 | 29 | /*! 30 | @header 31 | 32 | Generic hash table library. 33 | */ 34 | 35 | #define AC_VERSION_KHASH_H "0.2.8" 36 | 37 | #include 38 | #include 39 | #include 40 | 41 | /* compiler specific configuration */ 42 | 43 | #if UINT_MAX == 0xffffffffu 44 | typedef unsigned int khint32_t; 45 | #elif ULONG_MAX == 0xffffffffu 46 | typedef unsigned long khint32_t; 47 | #endif 48 | 49 | #if ULONG_MAX == ULLONG_MAX 50 | typedef unsigned long khint64_t; 51 | #else 52 | typedef unsigned long long khint64_t; 53 | #endif 54 | 55 | #ifndef kh_inline 56 | #ifdef _MSC_VER 57 | #define kh_inline __inline 58 | #else 59 | #define kh_inline inline 60 | #endif 61 | #endif /* kh_inline */ 62 | 63 | #ifndef klib_unused 64 | #if (defined __clang__ && __clang_major__ >= 3) || (defined __GNUC__ && __GNUC__ >= 3) 65 | #define klib_unused __attribute__ ((__unused__)) 66 | #else 67 | #define klib_unused 68 | #endif 69 | #endif /* klib_unused */ 70 | 71 | typedef khint32_t khint_t; 72 | typedef khint_t khiter_t; 73 | 74 | #define __ac_isempty(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&2) 75 | #define __ac_isdel(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&1) 76 | #define __ac_iseither(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&3) 77 | #define __ac_set_isdel_false(flag, i) (flag[i>>4]&=~(1ul<<((i&0xfU)<<1))) 78 | #define __ac_set_isempty_false(flag, i) (flag[i>>4]&=~(2ul<<((i&0xfU)<<1))) 79 | #define __ac_set_isboth_false(flag, i) (flag[i>>4]&=~(3ul<<((i&0xfU)<<1))) 80 | #define __ac_set_isdel_true(flag, i) (flag[i>>4]|=1ul<<((i&0xfU)<<1)) 81 | 82 | #define __ac_fsize(m) ((m) < 16? 1 : (m)>>4) 83 | 84 | #ifndef kroundup32 85 | #define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x)) 86 | #endif 87 | 88 | #ifndef kcalloc 89 | #define kcalloc(N,Z) calloc(N,Z) 90 | #endif 91 | #ifndef kmalloc 92 | #define kmalloc(Z) malloc(Z) 93 | #endif 94 | #ifndef krealloc 95 | #define krealloc(P,Z) realloc(P,Z) 96 | #endif 97 | #ifndef kfree 98 | #define kfree(P) free(P) 99 | #endif 100 | 101 | static const double __ac_HASH_UPPER = 0.77; 102 | 103 | #define __KHASH_TYPE(name, khkey_t, khval_t) \ 104 | typedef struct kh_##name##_s { \ 105 | khint_t n_buckets, size, n_occupied, upper_bound; \ 106 | khint32_t *flags; \ 107 | khkey_t *keys; \ 108 | khval_t *vals; \ 109 | } kh_##name##_t; 110 | 111 | #define __KHASH_PROTOTYPES(name, khkey_t, khval_t) \ 112 | extern kh_##name##_t *kh_init_##name(void); \ 113 | extern void kh_destroy_##name(kh_##name##_t *h); \ 114 | extern void kh_clear_##name(kh_##name##_t *h); \ 115 | extern khint_t kh_get_##name(const kh_##name##_t *h, khkey_t key); \ 116 | extern int kh_resize_##name(kh_##name##_t *h, khint_t new_n_buckets); \ 117 | extern khint_t kh_put_##name(kh_##name##_t *h, khkey_t key, int *ret); \ 118 | extern void kh_del_##name(kh_##name##_t *h, khint_t x); 119 | 120 | #define __KHASH_IMPL(name, SCOPE, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal) \ 121 | SCOPE kh_##name##_t *kh_init_##name(void) { \ 122 | return (kh_##name##_t*)kcalloc(1, sizeof(kh_##name##_t)); \ 123 | } \ 124 | SCOPE void kh_destroy_##name(kh_##name##_t *h) \ 125 | { \ 126 | if (h) { \ 127 | kfree((void *)h->keys); kfree(h->flags); \ 128 | kfree((void *)h->vals); \ 129 | kfree(h); \ 130 | } \ 131 | } \ 132 | SCOPE void kh_clear_##name(kh_##name##_t *h) \ 133 | { \ 134 | if (h && h->flags) { \ 135 | memset(h->flags, 0xaa, __ac_fsize(h->n_buckets) * sizeof(khint32_t)); \ 136 | h->size = h->n_occupied = 0; \ 137 | } \ 138 | } \ 139 | SCOPE khint_t kh_get_##name(const kh_##name##_t *h, khkey_t key) \ 140 | { \ 141 | if (h->n_buckets) { \ 142 | khint_t k, i, last, mask, step = 0; \ 143 | mask = h->n_buckets - 1; \ 144 | k = __hash_func(key); i = k & mask; \ 145 | last = i; \ 146 | while (!__ac_isempty(h->flags, i) && (__ac_isdel(h->flags, i) || !__hash_equal(h->keys[i], key))) { \ 147 | i = (i + (++step)) & mask; \ 148 | if (i == last) return h->n_buckets; \ 149 | } \ 150 | return __ac_iseither(h->flags, i)? h->n_buckets : i; \ 151 | } else return 0; \ 152 | } \ 153 | SCOPE int kh_resize_##name(kh_##name##_t *h, khint_t new_n_buckets) \ 154 | { /* This function uses 0.25*n_buckets bytes of working space instead of [sizeof(key_t+val_t)+.25]*n_buckets. */ \ 155 | khint32_t *new_flags = 0; \ 156 | khint_t j = 1; \ 157 | { \ 158 | kroundup32(new_n_buckets); \ 159 | if (new_n_buckets < 4) new_n_buckets = 4; \ 160 | if (h->size >= (khint_t)(new_n_buckets * __ac_HASH_UPPER + 0.5)) j = 0; /* requested size is too small */ \ 161 | else { /* hash table size to be changed (shrink or expand); rehash */ \ 162 | new_flags = (khint32_t*)kmalloc(__ac_fsize(new_n_buckets) * sizeof(khint32_t)); \ 163 | if (!new_flags) return -1; \ 164 | memset(new_flags, 0xaa, __ac_fsize(new_n_buckets) * sizeof(khint32_t)); \ 165 | if (h->n_buckets < new_n_buckets) { /* expand */ \ 166 | khkey_t *new_keys = (khkey_t*)krealloc((void *)h->keys, new_n_buckets * sizeof(khkey_t)); \ 167 | if (!new_keys) { kfree(new_flags); return -1; } \ 168 | h->keys = new_keys; \ 169 | if (kh_is_map) { \ 170 | khval_t *new_vals = (khval_t*)krealloc((void *)h->vals, new_n_buckets * sizeof(khval_t)); \ 171 | if (!new_vals) { kfree(new_flags); return -1; } \ 172 | h->vals = new_vals; \ 173 | } \ 174 | } /* otherwise shrink */ \ 175 | } \ 176 | } \ 177 | if (j) { /* rehashing is needed */ \ 178 | for (j = 0; j != h->n_buckets; ++j) { \ 179 | if (__ac_iseither(h->flags, j) == 0) { \ 180 | khkey_t key = h->keys[j]; \ 181 | khval_t val; \ 182 | khint_t new_mask; \ 183 | new_mask = new_n_buckets - 1; \ 184 | if (kh_is_map) val = h->vals[j]; \ 185 | __ac_set_isdel_true(h->flags, j); \ 186 | while (1) { /* kick-out process; sort of like in Cuckoo hashing */ \ 187 | khint_t k, i, step = 0; \ 188 | k = __hash_func(key); \ 189 | i = k & new_mask; \ 190 | while (!__ac_isempty(new_flags, i)) i = (i + (++step)) & new_mask; \ 191 | __ac_set_isempty_false(new_flags, i); \ 192 | if (i < h->n_buckets && __ac_iseither(h->flags, i) == 0) { /* kick out the existing element */ \ 193 | { khkey_t tmp = h->keys[i]; h->keys[i] = key; key = tmp; } \ 194 | if (kh_is_map) { khval_t tmp = h->vals[i]; h->vals[i] = val; val = tmp; } \ 195 | __ac_set_isdel_true(h->flags, i); /* mark it as deleted in the old hash table */ \ 196 | } else { /* write the element and jump out of the loop */ \ 197 | h->keys[i] = key; \ 198 | if (kh_is_map) h->vals[i] = val; \ 199 | break; \ 200 | } \ 201 | } \ 202 | } \ 203 | } \ 204 | if (h->n_buckets > new_n_buckets) { /* shrink the hash table */ \ 205 | h->keys = (khkey_t*)krealloc((void *)h->keys, new_n_buckets * sizeof(khkey_t)); \ 206 | if (kh_is_map) h->vals = (khval_t*)krealloc((void *)h->vals, new_n_buckets * sizeof(khval_t)); \ 207 | } \ 208 | kfree(h->flags); /* free the working space */ \ 209 | h->flags = new_flags; \ 210 | h->n_buckets = new_n_buckets; \ 211 | h->n_occupied = h->size; \ 212 | h->upper_bound = (khint_t)(h->n_buckets * __ac_HASH_UPPER + 0.5); \ 213 | } \ 214 | return 0; \ 215 | } \ 216 | SCOPE khint_t kh_put_##name(kh_##name##_t *h, khkey_t key, int *ret) \ 217 | { \ 218 | khint_t x; \ 219 | if (h->n_occupied >= h->upper_bound) { /* update the hash table */ \ 220 | if (h->n_buckets > (h->size<<1)) { \ 221 | if (kh_resize_##name(h, h->n_buckets - 1) < 0) { /* clear "deleted" elements */ \ 222 | *ret = -1; return h->n_buckets; \ 223 | } \ 224 | } else if (kh_resize_##name(h, h->n_buckets + 1) < 0) { /* expand the hash table */ \ 225 | *ret = -1; return h->n_buckets; \ 226 | } \ 227 | } /* TODO: to implement automatically shrinking; resize() already support shrinking */ \ 228 | { \ 229 | khint_t k, i, site, last, mask = h->n_buckets - 1, step = 0; \ 230 | x = site = h->n_buckets; k = __hash_func(key); i = k & mask; \ 231 | if (__ac_isempty(h->flags, i)) x = i; /* for speed up */ \ 232 | else { \ 233 | last = i; \ 234 | while (!__ac_isempty(h->flags, i) && (__ac_isdel(h->flags, i) || !__hash_equal(h->keys[i], key))) { \ 235 | if (__ac_isdel(h->flags, i)) site = i; \ 236 | i = (i + (++step)) & mask; \ 237 | if (i == last) { x = site; break; } \ 238 | } \ 239 | if (x == h->n_buckets) { \ 240 | if (__ac_isempty(h->flags, i) && site != h->n_buckets) x = site; \ 241 | else x = i; \ 242 | } \ 243 | } \ 244 | } \ 245 | if (__ac_isempty(h->flags, x)) { /* not present at all */ \ 246 | h->keys[x] = key; \ 247 | __ac_set_isboth_false(h->flags, x); \ 248 | ++h->size; ++h->n_occupied; \ 249 | *ret = 1; \ 250 | } else if (__ac_isdel(h->flags, x)) { /* deleted */ \ 251 | h->keys[x] = key; \ 252 | __ac_set_isboth_false(h->flags, x); \ 253 | ++h->size; \ 254 | *ret = 2; \ 255 | } else *ret = 0; /* Don't touch h->keys[x] if present and not deleted */ \ 256 | return x; \ 257 | } \ 258 | SCOPE void kh_del_##name(kh_##name##_t *h, khint_t x) \ 259 | { \ 260 | if (x != h->n_buckets && !__ac_iseither(h->flags, x)) { \ 261 | __ac_set_isdel_true(h->flags, x); \ 262 | --h->size; \ 263 | } \ 264 | } 265 | 266 | #define KHASH_DECLARE(name, khkey_t, khval_t) \ 267 | __KHASH_TYPE(name, khkey_t, khval_t) \ 268 | __KHASH_PROTOTYPES(name, khkey_t, khval_t) 269 | 270 | #define KHASH_INIT2(name, SCOPE, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal) \ 271 | __KHASH_TYPE(name, khkey_t, khval_t) \ 272 | __KHASH_IMPL(name, SCOPE, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal) 273 | 274 | #define KHASH_INIT(name, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal) \ 275 | KHASH_INIT2(name, static kh_inline klib_unused, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal) 276 | 277 | /* --- BEGIN OF HASH FUNCTIONS --- */ 278 | 279 | /*! @function 280 | @abstract Integer hash function 281 | @param key The integer [khint32_t] 282 | @return The hash value [khint_t] 283 | */ 284 | #define kh_int_hash_func(key) (khint32_t)(key) 285 | /*! @function 286 | @abstract Integer comparison function 287 | */ 288 | #define kh_int_hash_equal(a, b) ((a) == (b)) 289 | /*! @function 290 | @abstract 64-bit integer hash function 291 | @param key The integer [khint64_t] 292 | @return The hash value [khint_t] 293 | */ 294 | #define kh_int64_hash_func(key) (khint32_t)((key)>>33^(key)^(key)<<11) 295 | /*! @function 296 | @abstract 64-bit integer comparison function 297 | */ 298 | #define kh_int64_hash_equal(a, b) ((a) == (b)) 299 | /*! @function 300 | @abstract const char* hash function 301 | @param s Pointer to a null terminated string 302 | @return The hash value 303 | */ 304 | static kh_inline khint_t __ac_X31_hash_string(const char *s) 305 | { 306 | khint_t h = (khint_t)*s; 307 | if (h) for (++s ; *s; ++s) h = (h << 5) - h + (khint_t)*s; 308 | return h; 309 | } 310 | /*! @function 311 | @abstract Another interface to const char* hash function 312 | @param key Pointer to a null terminated string [const char*] 313 | @return The hash value [khint_t] 314 | */ 315 | #define kh_str_hash_func(key) __ac_X31_hash_string(key) 316 | /*! @function 317 | @abstract Const char* comparison function 318 | */ 319 | #define kh_str_hash_equal(a, b) (strcmp(a, b) == 0) 320 | 321 | static kh_inline khint_t __ac_Wang_hash(khint_t key) 322 | { 323 | key += ~(key << 15); 324 | key ^= (key >> 10); 325 | key += (key << 3); 326 | key ^= (key >> 6); 327 | key += ~(key << 11); 328 | key ^= (key >> 16); 329 | return key; 330 | } 331 | #define kh_int_hash_func2(key) __ac_Wang_hash((khint_t)key) 332 | 333 | /* --- END OF HASH FUNCTIONS --- */ 334 | 335 | /* Other convenient macros... */ 336 | 337 | /*! 338 | @abstract Type of the hash table. 339 | @param name Name of the hash table [symbol] 340 | */ 341 | #define khash_t(name) kh_##name##_t 342 | 343 | /*! @function 344 | @abstract Initiate a hash table. 345 | @param name Name of the hash table [symbol] 346 | @return Pointer to the hash table [khash_t(name)*] 347 | */ 348 | #define kh_init(name) kh_init_##name() 349 | 350 | /*! @function 351 | @abstract Destroy a hash table. 352 | @param name Name of the hash table [symbol] 353 | @param h Pointer to the hash table [khash_t(name)*] 354 | */ 355 | #define kh_destroy(name, h) kh_destroy_##name(h) 356 | 357 | /*! @function 358 | @abstract Reset a hash table without deallocating memory. 359 | @param name Name of the hash table [symbol] 360 | @param h Pointer to the hash table [khash_t(name)*] 361 | */ 362 | #define kh_clear(name, h) kh_clear_##name(h) 363 | 364 | /*! @function 365 | @abstract Resize a hash table. 366 | @param name Name of the hash table [symbol] 367 | @param h Pointer to the hash table [khash_t(name)*] 368 | @param s New size [khint_t] 369 | */ 370 | #define kh_resize(name, h, s) kh_resize_##name(h, s) 371 | 372 | /*! @function 373 | @abstract Insert a key to the hash table. 374 | @param name Name of the hash table [symbol] 375 | @param h Pointer to the hash table [khash_t(name)*] 376 | @param k Key [type of keys] 377 | @param r Extra return code: -1 if the operation failed; 378 | 0 if the key is present in the hash table; 379 | 1 if the bucket is empty (never used); 2 if the element in 380 | the bucket has been deleted [int*] 381 | @return Iterator to the inserted element [khint_t] 382 | */ 383 | #define kh_put(name, h, k, r) kh_put_##name(h, k, r) 384 | 385 | /*! @function 386 | @abstract Retrieve a key from the hash table. 387 | @param name Name of the hash table [symbol] 388 | @param h Pointer to the hash table [khash_t(name)*] 389 | @param k Key [type of keys] 390 | @return Iterator to the found element, or kh_end(h) if the element is absent [khint_t] 391 | */ 392 | #define kh_get(name, h, k) kh_get_##name(h, k) 393 | 394 | /*! @function 395 | @abstract Remove a key from the hash table. 396 | @param name Name of the hash table [symbol] 397 | @param h Pointer to the hash table [khash_t(name)*] 398 | @param k Iterator to the element to be deleted [khint_t] 399 | */ 400 | #define kh_del(name, h, k) kh_del_##name(h, k) 401 | 402 | /*! @function 403 | @abstract Test whether a bucket contains data. 404 | @param h Pointer to the hash table [khash_t(name)*] 405 | @param x Iterator to the bucket [khint_t] 406 | @return 1 if containing data; 0 otherwise [int] 407 | */ 408 | #define kh_exist(h, x) (!__ac_iseither((h)->flags, (x))) 409 | 410 | /*! @function 411 | @abstract Get key given an iterator 412 | @param h Pointer to the hash table [khash_t(name)*] 413 | @param x Iterator to the bucket [khint_t] 414 | @return Key [type of keys] 415 | */ 416 | #define kh_key(h, x) ((h)->keys[x]) 417 | 418 | /*! @function 419 | @abstract Get value given an iterator 420 | @param h Pointer to the hash table [khash_t(name)*] 421 | @param x Iterator to the bucket [khint_t] 422 | @return Value [type of values] 423 | @discussion For hash sets, calling this results in segfault. 424 | */ 425 | #define kh_val(h, x) ((h)->vals[x]) 426 | 427 | /*! @function 428 | @abstract Alias of kh_val() 429 | */ 430 | #define kh_value(h, x) ((h)->vals[x]) 431 | 432 | /*! @function 433 | @abstract Get the start iterator 434 | @param h Pointer to the hash table [khash_t(name)*] 435 | @return The start iterator [khint_t] 436 | */ 437 | #define kh_begin(h) (khint_t)(0) 438 | 439 | /*! @function 440 | @abstract Get the end iterator 441 | @param h Pointer to the hash table [khash_t(name)*] 442 | @return The end iterator [khint_t] 443 | */ 444 | #define kh_end(h) ((h)->n_buckets) 445 | 446 | /*! @function 447 | @abstract Get the number of elements in the hash table 448 | @param h Pointer to the hash table [khash_t(name)*] 449 | @return Number of elements in the hash table [khint_t] 450 | */ 451 | #define kh_size(h) ((h)->size) 452 | 453 | /*! @function 454 | @abstract Get the number of buckets in the hash table 455 | @param h Pointer to the hash table [khash_t(name)*] 456 | @return Number of buckets in the hash table [khint_t] 457 | */ 458 | #define kh_n_buckets(h) ((h)->n_buckets) 459 | 460 | /*! @function 461 | @abstract Iterate over the entries in the hash table 462 | @param h Pointer to the hash table [khash_t(name)*] 463 | @param kvar Variable to which key will be assigned 464 | @param vvar Variable to which value will be assigned 465 | @param code Block of code to execute 466 | */ 467 | #define kh_foreach(h, kvar, vvar, code) { khint_t __i; \ 468 | for (__i = kh_begin(h); __i != kh_end(h); ++__i) { \ 469 | if (!kh_exist(h,__i)) continue; \ 470 | (kvar) = kh_key(h,__i); \ 471 | (vvar) = kh_val(h,__i); \ 472 | code; \ 473 | } } 474 | 475 | /*! @function 476 | @abstract Iterate over the values in the hash table 477 | @param h Pointer to the hash table [khash_t(name)*] 478 | @param vvar Variable to which value will be assigned 479 | @param code Block of code to execute 480 | */ 481 | #define kh_foreach_value(h, vvar, code) { khint_t __i; \ 482 | for (__i = kh_begin(h); __i != kh_end(h); ++__i) { \ 483 | if (!kh_exist(h,__i)) continue; \ 484 | (vvar) = kh_val(h,__i); \ 485 | code; \ 486 | } } 487 | 488 | /* More conenient interfaces */ 489 | 490 | /*! @function 491 | @abstract Instantiate a hash set containing integer keys 492 | @param name Name of the hash table [symbol] 493 | */ 494 | #define KHASH_SET_INIT_INT(name) \ 495 | KHASH_INIT(name, khint32_t, char, 0, kh_int_hash_func, kh_int_hash_equal) 496 | 497 | /*! @function 498 | @abstract Instantiate a hash map containing integer keys 499 | @param name Name of the hash table [symbol] 500 | @param khval_t Type of values [type] 501 | */ 502 | #define KHASH_MAP_INIT_INT(name, khval_t) \ 503 | KHASH_INIT(name, khint32_t, khval_t, 1, kh_int_hash_func, kh_int_hash_equal) 504 | 505 | /*! @function 506 | @abstract Instantiate a hash map containing 64-bit integer keys 507 | @param name Name of the hash table [symbol] 508 | */ 509 | #define KHASH_SET_INIT_INT64(name) \ 510 | KHASH_INIT(name, khint64_t, char, 0, kh_int64_hash_func, kh_int64_hash_equal) 511 | 512 | /*! @function 513 | @abstract Instantiate a hash map containing 64-bit integer keys 514 | @param name Name of the hash table [symbol] 515 | @param khval_t Type of values [type] 516 | */ 517 | #define KHASH_MAP_INIT_INT64(name, khval_t) \ 518 | KHASH_INIT(name, khint64_t, khval_t, 1, kh_int64_hash_func, kh_int64_hash_equal) 519 | 520 | typedef const char *kh_cstr_t; 521 | /*! @function 522 | @abstract Instantiate a hash map containing const char* keys 523 | @param name Name of the hash table [symbol] 524 | */ 525 | #define KHASH_SET_INIT_STR(name) \ 526 | KHASH_INIT(name, kh_cstr_t, char, 0, kh_str_hash_func, kh_str_hash_equal) 527 | 528 | /*! @function 529 | @abstract Instantiate a hash map containing const char* keys 530 | @param name Name of the hash table [symbol] 531 | @param khval_t Type of values [type] 532 | */ 533 | #define KHASH_MAP_INIT_STR(name, khval_t) \ 534 | KHASH_INIT(name, kh_cstr_t, khval_t, 1, kh_str_hash_func, kh_str_hash_equal) 535 | 536 | #endif /* __AC_KHASH_H */ 537 | -------------------------------------------------------------------------------- /src/picohttpparser.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2009-2014 Kazuho Oku, Tokuhiro Matsuno, Daisuke Murase, 3 | * Shigeo Mitsunari 4 | * 5 | * The software is licensed under either the MIT License (below) or the Perl 6 | * license. 7 | * 8 | * Permission is hereby granted, free of charge, to any person obtaining a copy 9 | * of this software and associated documentation files (the "Software"), to 10 | * deal in the Software without restriction, including without limitation the 11 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 12 | * sell copies of the Software, and to permit persons to whom the Software is 13 | * furnished to do so, subject to the following conditions: 14 | * 15 | * The above copyright notice and this permission notice shall be included in 16 | * all copies or substantial portions of the Software. 17 | * 18 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 23 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 24 | * IN THE SOFTWARE. 25 | */ 26 | 27 | #include 28 | #include 29 | #include 30 | #ifdef __SSE4_2__ 31 | #ifdef _MSC_VER 32 | #include 33 | #else 34 | #include 35 | #endif 36 | #endif 37 | #include "picohttpparser.h" 38 | 39 | /* $Id$ */ 40 | 41 | #if __GNUC__ >= 3 42 | #define likely(x) __builtin_expect(!!(x), 1) 43 | #define unlikely(x) __builtin_expect(!!(x), 0) 44 | #else 45 | #define likely(x) (x) 46 | #define unlikely(x) (x) 47 | #endif 48 | 49 | #ifdef _MSC_VER 50 | #define ALIGNED(n) _declspec(align(n)) 51 | #else 52 | #define ALIGNED(n) __attribute__((aligned(n))) 53 | #endif 54 | 55 | #define IS_PRINTABLE_ASCII(c) ((unsigned char)(c)-040u < 0137u) 56 | 57 | #define CHECK_EOF() \ 58 | if (buf == buf_end) { \ 59 | *ret = -2; \ 60 | return NULL; \ 61 | } 62 | 63 | #define EXPECT_CHAR(ch) \ 64 | CHECK_EOF(); \ 65 | if (*buf++ != ch) { \ 66 | *ret = -1; \ 67 | return NULL; \ 68 | } 69 | 70 | #define ADVANCE_TOKEN(tok, toklen) \ 71 | do { \ 72 | const char *tok_start = buf; \ 73 | static const char ALIGNED(16) ranges2[] = "\000\040\177\177"; \ 74 | int found2; \ 75 | buf = findchar_fast(buf, buf_end, ranges2, sizeof(ranges2) - 1, &found2); \ 76 | if (!found2) { \ 77 | CHECK_EOF(); \ 78 | } \ 79 | while (1) { \ 80 | if (*buf == ' ') { \ 81 | break; \ 82 | } else if (unlikely(!IS_PRINTABLE_ASCII(*buf))) { \ 83 | if ((unsigned char)*buf < '\040' || *buf == '\177') { \ 84 | *ret = -1; \ 85 | return NULL; \ 86 | } \ 87 | } \ 88 | ++buf; \ 89 | CHECK_EOF(); \ 90 | } \ 91 | tok = tok_start; \ 92 | toklen = buf - tok_start; \ 93 | } while (0) 94 | 95 | static const char *token_char_map = "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0" 96 | "\0\1\0\1\1\1\1\1\0\0\1\1\0\1\1\0\1\1\1\1\1\1\1\1\1\1\0\0\0\0\0\0" 97 | "\0\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\0\0\0\1\1" 98 | "\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\0\1\0\1\0" 99 | "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0" 100 | "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0" 101 | "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0" 102 | "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"; 103 | 104 | static const char *findchar_fast(const char *buf, const char *buf_end, const char *ranges, size_t ranges_size, int *found) 105 | { 106 | *found = 0; 107 | #if __SSE4_2__ 108 | if (likely(buf_end - buf >= 16)) { 109 | __m128i ranges16 = _mm_loadu_si128((const __m128i *)ranges); 110 | 111 | size_t left = (buf_end - buf) & ~15; 112 | do { 113 | __m128i b16 = _mm_loadu_si128((void *)buf); 114 | int r = _mm_cmpestri(ranges16, ranges_size, b16, 16, _SIDD_LEAST_SIGNIFICANT | _SIDD_CMP_RANGES | _SIDD_UBYTE_OPS); 115 | if (unlikely(r != 16)) { 116 | buf += r; 117 | *found = 1; 118 | break; 119 | } 120 | buf += 16; 121 | left -= 16; 122 | } while (likely(left != 0)); 123 | } 124 | #else 125 | /* suppress unused parameter warning */ 126 | (void)buf_end; 127 | (void)ranges; 128 | (void)ranges_size; 129 | #endif 130 | return buf; 131 | } 132 | 133 | static const char *get_token_to_eol(const char *buf, const char *buf_end, const char **token, size_t *token_len, int *ret) 134 | { 135 | const char *token_start = buf; 136 | 137 | #ifdef __SSE4_2__ 138 | static const char ranges1[] = "\0\010" 139 | /* allow HT */ 140 | "\012\037" 141 | /* allow SP and up to but not including DEL */ 142 | "\177\177" 143 | /* allow chars w. MSB set */ 144 | ; 145 | int found; 146 | buf = findchar_fast(buf, buf_end, ranges1, sizeof(ranges1) - 1, &found); 147 | if (found) 148 | goto FOUND_CTL; 149 | #else 150 | /* find non-printable char within the next 8 bytes, this is the hottest code; manually inlined */ 151 | while (likely(buf_end - buf >= 8)) { 152 | #define DOIT() \ 153 | do { \ 154 | if (unlikely(!IS_PRINTABLE_ASCII(*buf))) \ 155 | goto NonPrintable; \ 156 | ++buf; \ 157 | } while (0) 158 | DOIT(); 159 | DOIT(); 160 | DOIT(); 161 | DOIT(); 162 | DOIT(); 163 | DOIT(); 164 | DOIT(); 165 | DOIT(); 166 | #undef DOIT 167 | continue; 168 | NonPrintable: 169 | if ((likely((unsigned char)*buf < '\040') && likely(*buf != '\011')) || unlikely(*buf == '\177')) { 170 | goto FOUND_CTL; 171 | } 172 | ++buf; 173 | } 174 | #endif 175 | for (;; ++buf) { 176 | CHECK_EOF(); 177 | if (unlikely(!IS_PRINTABLE_ASCII(*buf))) { 178 | if ((likely((unsigned char)*buf < '\040') && likely(*buf != '\011')) || unlikely(*buf == '\177')) { 179 | goto FOUND_CTL; 180 | } 181 | } 182 | } 183 | FOUND_CTL: 184 | if (likely(*buf == '\015')) { 185 | ++buf; 186 | EXPECT_CHAR('\012'); 187 | *token_len = buf - 2 - token_start; 188 | } else if (*buf == '\012') { 189 | *token_len = buf - token_start; 190 | ++buf; 191 | } else { 192 | *ret = -1; 193 | return NULL; 194 | } 195 | *token = token_start; 196 | 197 | return buf; 198 | } 199 | 200 | static const char *is_complete(const char *buf, const char *buf_end, size_t last_len, int *ret) 201 | { 202 | int ret_cnt = 0; 203 | buf = last_len < 3 ? buf : buf + last_len - 3; 204 | 205 | while (1) { 206 | CHECK_EOF(); 207 | if (*buf == '\015') { 208 | ++buf; 209 | CHECK_EOF(); 210 | EXPECT_CHAR('\012'); 211 | ++ret_cnt; 212 | } else if (*buf == '\012') { 213 | ++buf; 214 | ++ret_cnt; 215 | } else { 216 | ++buf; 217 | ret_cnt = 0; 218 | } 219 | if (ret_cnt == 2) { 220 | return buf; 221 | } 222 | } 223 | 224 | *ret = -2; 225 | return NULL; 226 | } 227 | 228 | /* *_buf is always within [buf, buf_end) upon success */ 229 | static const char *parse_int(const char *buf, const char *buf_end, int *value, int *ret) 230 | { 231 | int v; 232 | CHECK_EOF(); 233 | if (!('0' <= *buf && *buf <= '9')) { 234 | *ret = -1; 235 | return NULL; 236 | } 237 | v = 0; 238 | for (;; ++buf) { 239 | CHECK_EOF(); 240 | if ('0' <= *buf && *buf <= '9') { 241 | v = v * 10 + *buf - '0'; 242 | } else { 243 | break; 244 | } 245 | } 246 | 247 | *value = v; 248 | return buf; 249 | } 250 | 251 | /* returned pointer is always within [buf, buf_end), or null */ 252 | static const char *parse_http_version(const char *buf, const char *buf_end, int *minor_version, int *ret) 253 | { 254 | EXPECT_CHAR('H'); 255 | EXPECT_CHAR('T'); 256 | EXPECT_CHAR('T'); 257 | EXPECT_CHAR('P'); 258 | EXPECT_CHAR('/'); 259 | EXPECT_CHAR('1'); 260 | EXPECT_CHAR('.'); 261 | return parse_int(buf, buf_end, minor_version, ret); 262 | } 263 | 264 | static const char *parse_headers(const char *buf, const char *buf_end, struct phr_header *headers, size_t *num_headers, 265 | size_t max_headers, int *ret) 266 | { 267 | for (;; ++*num_headers) { 268 | CHECK_EOF(); 269 | if (*buf == '\015') { 270 | ++buf; 271 | EXPECT_CHAR('\012'); 272 | break; 273 | } else if (*buf == '\012') { 274 | ++buf; 275 | break; 276 | } 277 | if (*num_headers == max_headers) { 278 | *ret = -1; 279 | return NULL; 280 | } 281 | if (!(*num_headers != 0 && (*buf == ' ' || *buf == '\t'))) { 282 | /* parsing name, but do not discard SP before colon, see 283 | * http://www.mozilla.org/security/announce/2006/mfsa2006-33.html */ 284 | headers[*num_headers].name = buf; 285 | static const char ranges1[] __attribute__((aligned(16))) = "\x00 " /* control chars and up to SP */ 286 | "\"\"" /* 0x22 */ 287 | "()" /* 0x28,0x29 */ 288 | ",," /* 0x2c */ 289 | "//" /* 0x2f */ 290 | ":@" /* 0x3a-0x40 */ 291 | "[]" /* 0x5b-0x5d */ 292 | "{\377"; /* 0x7b-0xff */ 293 | int found; 294 | buf = findchar_fast(buf, buf_end, ranges1, sizeof(ranges1) - 1, &found); 295 | if (!found) { 296 | CHECK_EOF(); 297 | } 298 | while (1) { 299 | if (*buf == ':') { 300 | break; 301 | } else if (!token_char_map[(unsigned char)*buf]) { 302 | *ret = -1; 303 | return NULL; 304 | } 305 | ++buf; 306 | CHECK_EOF(); 307 | } 308 | if ((headers[*num_headers].name_len = buf - headers[*num_headers].name) == 0) { 309 | *ret = -1; 310 | return NULL; 311 | } 312 | ++buf; 313 | for (;; ++buf) { 314 | CHECK_EOF(); 315 | if (!(*buf == ' ' || *buf == '\t')) { 316 | break; 317 | } 318 | } 319 | } else { 320 | headers[*num_headers].name = NULL; 321 | headers[*num_headers].name_len = 0; 322 | } 323 | if ((buf = get_token_to_eol(buf, buf_end, &headers[*num_headers].value, &headers[*num_headers].value_len, ret)) == NULL) { 324 | return NULL; 325 | } 326 | } 327 | return buf; 328 | } 329 | 330 | static const char *parse_request(const char *buf, const char *buf_end, const char **method, size_t *method_len, const char **path, 331 | size_t *path_len, int *minor_version, struct phr_header *headers, size_t *num_headers, 332 | size_t max_headers, int *ret) 333 | { 334 | /* skip first empty line (some clients add CRLF after POST content) */ 335 | CHECK_EOF(); 336 | if (*buf == '\015') { 337 | ++buf; 338 | EXPECT_CHAR('\012'); 339 | } else if (*buf == '\012') { 340 | ++buf; 341 | } 342 | 343 | /* parse request line */ 344 | ADVANCE_TOKEN(*method, *method_len); 345 | ++buf; 346 | ADVANCE_TOKEN(*path, *path_len); 347 | ++buf; 348 | if ((buf = parse_http_version(buf, buf_end, minor_version, ret)) == NULL) { 349 | return NULL; 350 | } 351 | if (*buf == '\015') { 352 | ++buf; 353 | EXPECT_CHAR('\012'); 354 | } else if (*buf == '\012') { 355 | ++buf; 356 | } else { 357 | *ret = -1; 358 | return NULL; 359 | } 360 | 361 | return parse_headers(buf, buf_end, headers, num_headers, max_headers, ret); 362 | } 363 | 364 | int phr_parse_request(const char *buf_start, size_t len, const char **method, size_t *method_len, const char **path, 365 | size_t *path_len, int *minor_version, struct phr_header *headers, size_t *num_headers, size_t last_len) 366 | { 367 | const char *buf = buf_start, *buf_end = buf_start + len; 368 | size_t max_headers = *num_headers; 369 | int r; 370 | 371 | *method = NULL; 372 | *method_len = 0; 373 | *path = NULL; 374 | *path_len = 0; 375 | *minor_version = -1; 376 | *num_headers = 0; 377 | 378 | /* if last_len != 0, check if the request is complete (a fast countermeasure 379 | againt slowloris */ 380 | if (last_len != 0 && is_complete(buf, buf_end, last_len, &r) == NULL) { 381 | return r; 382 | } 383 | 384 | if ((buf = parse_request(buf, buf_end, method, method_len, path, path_len, minor_version, headers, num_headers, max_headers, 385 | &r)) == NULL) { 386 | return r; 387 | } 388 | 389 | return (int)(buf - buf_start); 390 | } 391 | 392 | static const char *parse_response(const char *buf, const char *buf_end, int *minor_version, int *status, const char **msg, 393 | size_t *msg_len, struct phr_header *headers, size_t *num_headers, size_t max_headers, int *ret) 394 | { 395 | /* parse "HTTP/1.x" */ 396 | if ((buf = parse_http_version(buf, buf_end, minor_version, ret)) == NULL) { 397 | return NULL; 398 | } 399 | /* skip space */ 400 | if (*buf++ != ' ') { 401 | *ret = -1; 402 | return NULL; 403 | } 404 | /* parse status code */ 405 | if ((buf = parse_int(buf, buf_end, status, ret)) == NULL) { 406 | return NULL; 407 | } 408 | /* skip space */ 409 | if (*buf++ != ' ') { 410 | *ret = -1; 411 | return NULL; 412 | } 413 | /* get message */ 414 | if ((buf = get_token_to_eol(buf, buf_end, msg, msg_len, ret)) == NULL) { 415 | return NULL; 416 | } 417 | 418 | return parse_headers(buf, buf_end, headers, num_headers, max_headers, ret); 419 | } 420 | 421 | int phr_parse_response(const char *buf_start, size_t len, int *minor_version, int *status, const char **msg, size_t *msg_len, 422 | struct phr_header *headers, size_t *num_headers, size_t last_len) 423 | { 424 | const char *buf = buf_start, *buf_end = buf + len; 425 | size_t max_headers = *num_headers; 426 | int r; 427 | 428 | *minor_version = -1; 429 | *status = 0; 430 | *msg = NULL; 431 | *msg_len = 0; 432 | *num_headers = 0; 433 | 434 | /* if last_len != 0, check if the response is complete (a fast countermeasure 435 | against slowloris */ 436 | if (last_len != 0 && is_complete(buf, buf_end, last_len, &r) == NULL) { 437 | return r; 438 | } 439 | 440 | if ((buf = parse_response(buf, buf_end, minor_version, status, msg, msg_len, headers, num_headers, max_headers, &r)) == NULL) { 441 | return r; 442 | } 443 | 444 | return (int)(buf - buf_start); 445 | } 446 | 447 | int phr_parse_headers(const char *buf_start, size_t len, struct phr_header *headers, size_t *num_headers, size_t last_len) 448 | { 449 | const char *buf = buf_start, *buf_end = buf + len; 450 | size_t max_headers = *num_headers; 451 | int r; 452 | 453 | *num_headers = 0; 454 | 455 | /* if last_len != 0, check if the response is complete (a fast countermeasure 456 | against slowloris */ 457 | if (last_len != 0 && is_complete(buf, buf_end, last_len, &r) == NULL) { 458 | return r; 459 | } 460 | 461 | if ((buf = parse_headers(buf, buf_end, headers, num_headers, max_headers, &r)) == NULL) { 462 | return r; 463 | } 464 | 465 | return (int)(buf - buf_start); 466 | } 467 | 468 | enum { 469 | CHUNKED_IN_CHUNK_SIZE, 470 | CHUNKED_IN_CHUNK_EXT, 471 | CHUNKED_IN_CHUNK_DATA, 472 | CHUNKED_IN_CHUNK_CRLF, 473 | CHUNKED_IN_TRAILERS_LINE_HEAD, 474 | CHUNKED_IN_TRAILERS_LINE_MIDDLE 475 | }; 476 | 477 | static int decode_hex(int ch) 478 | { 479 | if ('0' <= ch && ch <= '9') { 480 | return ch - '0'; 481 | } else if ('A' <= ch && ch <= 'F') { 482 | return ch - 'A' + 0xa; 483 | } else if ('a' <= ch && ch <= 'f') { 484 | return ch - 'a' + 0xa; 485 | } else { 486 | return -1; 487 | } 488 | } 489 | 490 | ssize_t phr_decode_chunked(struct phr_chunked_decoder *decoder, char *buf, size_t *_bufsz) 491 | { 492 | size_t dst = 0, src = 0, bufsz = *_bufsz; 493 | ssize_t ret = -2; /* incomplete */ 494 | 495 | while (1) { 496 | switch (decoder->_state) { 497 | case CHUNKED_IN_CHUNK_SIZE: 498 | for (;; ++src) { 499 | int v; 500 | if (src == bufsz) 501 | goto Exit; 502 | if ((v = decode_hex(buf[src])) == -1) { 503 | if (decoder->_hex_count == 0) { 504 | ret = -1; 505 | goto Exit; 506 | } 507 | break; 508 | } 509 | if (decoder->_hex_count == sizeof(size_t) * 2) { 510 | ret = -1; 511 | goto Exit; 512 | } 513 | decoder->bytes_left_in_chunk = decoder->bytes_left_in_chunk * 16 + v; 514 | ++decoder->_hex_count; 515 | } 516 | decoder->_hex_count = 0; 517 | decoder->_state = CHUNKED_IN_CHUNK_EXT; 518 | /* fallthru */ 519 | case CHUNKED_IN_CHUNK_EXT: 520 | /* RFC 7230 A.2 "Line folding in chunk extensions is disallowed" */ 521 | for (;; ++src) { 522 | if (src == bufsz) 523 | goto Exit; 524 | if (buf[src] == '\012') 525 | break; 526 | } 527 | ++src; 528 | if (decoder->bytes_left_in_chunk == 0) { 529 | if (decoder->consume_trailer) { 530 | decoder->_state = CHUNKED_IN_TRAILERS_LINE_HEAD; 531 | break; 532 | } else { 533 | goto Complete; 534 | } 535 | } 536 | decoder->_state = CHUNKED_IN_CHUNK_DATA; 537 | /* fallthru */ 538 | case CHUNKED_IN_CHUNK_DATA: { 539 | size_t avail = bufsz - src; 540 | if (avail < decoder->bytes_left_in_chunk) { 541 | if (dst != src) 542 | memmove(buf + dst, buf + src, avail); 543 | src += avail; 544 | dst += avail; 545 | decoder->bytes_left_in_chunk -= avail; 546 | goto Exit; 547 | } 548 | if (dst != src) 549 | memmove(buf + dst, buf + src, decoder->bytes_left_in_chunk); 550 | src += decoder->bytes_left_in_chunk; 551 | dst += decoder->bytes_left_in_chunk; 552 | decoder->bytes_left_in_chunk = 0; 553 | decoder->_state = CHUNKED_IN_CHUNK_CRLF; 554 | } 555 | /* fallthru */ 556 | case CHUNKED_IN_CHUNK_CRLF: 557 | for (;; ++src) { 558 | if (src == bufsz) 559 | goto Exit; 560 | if (buf[src] != '\015') 561 | break; 562 | } 563 | if (buf[src] != '\012') { 564 | ret = -1; 565 | goto Exit; 566 | } 567 | ++src; 568 | decoder->_state = CHUNKED_IN_CHUNK_SIZE; 569 | break; 570 | case CHUNKED_IN_TRAILERS_LINE_HEAD: 571 | for (;; ++src) { 572 | if (src == bufsz) 573 | goto Exit; 574 | if (buf[src] != '\015') 575 | break; 576 | } 577 | if (buf[src++] == '\012') 578 | goto Complete; 579 | decoder->_state = CHUNKED_IN_TRAILERS_LINE_MIDDLE; 580 | /* fallthru */ 581 | case CHUNKED_IN_TRAILERS_LINE_MIDDLE: 582 | for (;; ++src) { 583 | if (src == bufsz) 584 | goto Exit; 585 | if (buf[src] == '\012') 586 | break; 587 | } 588 | ++src; 589 | decoder->_state = CHUNKED_IN_TRAILERS_LINE_HEAD; 590 | break; 591 | default: 592 | assert(!"decoder is corrupt"); 593 | } 594 | } 595 | 596 | Complete: 597 | ret = bufsz - src; 598 | Exit: 599 | if (dst != src) 600 | memmove(buf + dst, buf + src, bufsz - src); 601 | *_bufsz = dst; 602 | return ret; 603 | } 604 | 605 | int phr_decode_chunked_is_in_data(struct phr_chunked_decoder *decoder) 606 | { 607 | return decoder->_state == CHUNKED_IN_CHUNK_DATA; 608 | } 609 | 610 | #undef CHECK_EOF 611 | #undef EXPECT_CHAR 612 | #undef ADVANCE_TOKEN 613 | -------------------------------------------------------------------------------- /src/webproxy.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) onestraw 3 | */ 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include "picohttpparser.h" 21 | #include "mpool.h" 22 | #include "khash.h" 23 | 24 | #define VERSION "beta-0.1" 25 | #define DEFAULT_ADDR "127.0.0.1" 26 | #define DEFAULT_PORT 3128 27 | #define MAX_CONN 4096 28 | #define MAX_EVENTS 4096 29 | #define BUF_SIZE 1024*10 /* 10k */ 30 | #define POOL_MAX 26 /* 2^26 = 64M */ 31 | #define HOST_MAX_LENGTH 128 32 | #define PAYLOAD_MAX_LENGTH 128 33 | #define DUMP_INTERVAL 30 /* seconds */ 34 | #define FORMAT "%-24s%-24s%-s\n" 35 | #define HEADER_FORMAT "%-16s"FORMAT 36 | #define ITEM_FORMAT "%-16u"FORMAT 37 | #define LOG_TIME_FORMAT "%4d/%02d/%02d %02d:%02d:%02d" 38 | #define LOG_TIME_STR_LEN sizeof("1970/09/28 12:00:00") 39 | #define LOG_DEBUG 1 40 | #define LOG_INFO 2 41 | #define EPOLL_PROXY_CLIENT 1 42 | #define EPOLL_PROXY_SERVER 2 43 | #define TO_CLIENT 1 44 | #define TO_SERVER 2 45 | 46 | #define MIN(a, b) ((a) < (b) ? (a):(b)) 47 | 48 | static void log_core(const char *prompt, const char *format, ...); 49 | #define log_debug(...) \ 50 | if (proxy.log_level <= LOG_DEBUG) log_core("debug", __VA_ARGS__) 51 | #define log_info(...) \ 52 | if (proxy.log_level <= LOG_INFO) log_core("info", __VA_ARGS__) 53 | 54 | #define log_error(s) fprintf(stderr, "%s [+error] %s:%d %s -- %s %s\n",\ 55 | cached_log_time, __FILE__, __LINE__, __FUNCTION__, s, strerror(errno)) 56 | 57 | typedef struct conn { 58 | struct sockaddr_in local; 59 | struct sockaddr_in remote; 60 | int sock; 61 | } conn_t; 62 | 63 | typedef struct session { 64 | conn_t conn_pc; /* proxy-client */ 65 | conn_t conn_ps; /* proxy-server */ 66 | char host[HOST_MAX_LENGTH]; 67 | char *request; 68 | ssize_t request_len; 69 | } session_t; 70 | 71 | typedef struct _epoll_data{ 72 | int fd; 73 | int flag; 74 | session_t *session; 75 | } epoll_data; 76 | 77 | KHASH_MAP_INIT_INT(32, session_t *); 78 | #define hash_t khash_t(32) 79 | 80 | typedef struct { 81 | int epfd; 82 | mpool *pool; 83 | hash_t *h; /* keep established socket */ 84 | hash_t *hc; /* keep connecting socket */ 85 | int (*put)(hash_t *, session_t *); 86 | session_t* (*get)(hash_t *, int); 87 | int (*del)(hash_t *, int); 88 | session_t* (*create)(mpool *, int); 89 | int (*close)(session_t *, int); 90 | void (*dump)(hash_t *, const char *); 91 | } session_manager_t; 92 | 93 | 94 | typedef struct { 95 | char addr[32]; 96 | unsigned int port; 97 | int log_level; 98 | int is_dump; 99 | } proxy_t; 100 | 101 | 102 | void proxy_run(void); 103 | void proxy_init(void); 104 | void proxy_exit(void); 105 | int handle_accept_event(int); 106 | int handle_epollhup_event(int); 107 | int handle_epollin_event(epoll_data *); 108 | int handle_epollout_event(epoll_data *); 109 | int parse_http_request(session_t *, char *, size_t); 110 | int cache_http_request(mpool *, session_t *, char *, ssize_t); 111 | int sec_send(session_t *, int, char *, size_t); 112 | int put_session(hash_t *, session_t *); 113 | session_t *get_session(hash_t *, int); 114 | int del_session(hash_t *, int); 115 | session_t *create_session(mpool *, int); 116 | int close_session(session_t *, int); 117 | void dump_one_session(session_t *); 118 | void dump_session(hash_t *, const char *); 119 | static int open_listening_socket(const char *, unsigned int, int); 120 | static void epoll_ctl_add(int, epoll_data *, uint32_t); 121 | static void set_sockaddr(struct sockaddr_in *, const char *, unsigned int); 122 | static int set_nonblocking(int); 123 | static int new_connection(session_t *, int); 124 | static int close_socket(int, int); 125 | static int parse_host_field(char *, char *, unsigned int *); 126 | static char *sockaddr_to_str(struct sockaddr_in *); 127 | static void INT_handler(int); 128 | static void timer_handler(void); 129 | void print_hex_ascii_line(const u_char *, int, int); 130 | void print_payload(const u_char *, int); 131 | 132 | 133 | static char *not_found_response = 134 | "HTTP/1.1 404 Not Found\n" 135 | "Content-type: text/html\n" "\n" 136 | "\n" 137 | " \n" 138 | "

Not Found

\n" 139 | "

The requested URL was not found on this server.

\n" 140 | " \n" "\n"; 141 | 142 | static char cached_log_time[LOG_TIME_STR_LEN]; 143 | static proxy_t proxy; 144 | 145 | static session_manager_t sm = { 146 | .put = put_session, 147 | .get = get_session, 148 | .del = del_session, 149 | .dump = dump_session, 150 | .create = create_session, 151 | .close = close_session, 152 | }; 153 | 154 | 155 | void show_help(char *cmd) 156 | { 157 | fprintf(stderr, "Usage: %s [-?hvVd] [-s address] [-p port]\n" 158 | "Options:\n" 159 | " -?, -h :print this help\n" 160 | " -v :print version number\n" 161 | " -V :verbose mode, dump session table\n" 162 | " -d :enable debug mode\n" 163 | " -s :proxy listening address\n" 164 | " -p :proxy listening port\n" 165 | , cmd); 166 | } 167 | 168 | 169 | void get_options(int argc, char *argv[]) 170 | { 171 | int i; 172 | u_char *p; 173 | 174 | for (i = 1; i < argc; i++) { 175 | p = (u_char *) argv[i]; 176 | 177 | if (*p++ != '-') { 178 | fprintf(stderr, "invalid options \"%s\"\n", argv[i]); 179 | exit(1); 180 | } 181 | 182 | while (*p) { 183 | switch (*p++) { 184 | case '?': 185 | case 'h': 186 | show_help(argv[0]); 187 | exit(0); 188 | break; 189 | case 'v': 190 | fprintf(stderr, "%s %s\n", argv[0], VERSION); 191 | exit(0); 192 | break; 193 | case 'V': 194 | proxy.is_dump = 1; 195 | break; 196 | case 'd': 197 | proxy.log_level = LOG_DEBUG; 198 | break; 199 | case 's': 200 | if (*p && strlen((char *) p) < 32) { 201 | strcpy(proxy.addr, (char *) p); 202 | } else if (argv[++i] && strlen(argv[i]) < 32) { 203 | strcpy(proxy.addr, argv[i]); 204 | } else { 205 | fprintf(stderr, "invalid option: %s\n", argv[i]); 206 | exit(0); 207 | } 208 | break; 209 | case 'p': 210 | if (*p) { 211 | proxy.port = atoi((char *) p); 212 | } else if (argv[++i]) { 213 | proxy.port = atoi(argv[i]); 214 | } else { 215 | fprintf(stderr, "invalid option: %s\n", argv[i]); 216 | exit(0); 217 | } 218 | break; 219 | default: 220 | break; 221 | } 222 | } 223 | } 224 | } 225 | 226 | 227 | int main(int argc, char *argv[]) 228 | { 229 | proxy_init(); 230 | get_options(argc, argv); 231 | 232 | if (signal(SIGINT, SIG_IGN) != SIG_IGN) { 233 | signal(SIGINT, INT_handler); 234 | } 235 | signal(SIGPIPE, SIG_IGN); 236 | 237 | proxy_run(); 238 | return 0; 239 | } 240 | 241 | 242 | void proxy_init(void) 243 | { 244 | strcpy(proxy.addr, DEFAULT_ADDR); 245 | proxy.port = DEFAULT_PORT; 246 | proxy.log_level = LOG_INFO; 247 | proxy.is_dump = 0; 248 | 249 | sm.epfd = epoll_create(1); 250 | sm.pool = mpool_init(10, POOL_MAX); 251 | sm.h = kh_init(32); 252 | sm.hc = kh_init(32); 253 | } 254 | 255 | 256 | void proxy_run(void) 257 | { 258 | int i; 259 | int nfds; 260 | int listen_sock; 261 | epoll_data ed; 262 | epoll_data *edp; 263 | struct epoll_event events[MAX_EVENTS]; 264 | 265 | listen_sock = open_listening_socket(proxy.addr, proxy.port, MAX_CONN); 266 | if (listen_sock == -1) { 267 | exit(1); 268 | } 269 | ed.fd = listen_sock; 270 | epoll_ctl_add(sm.epfd, &ed, EPOLLIN | EPOLLET); 271 | 272 | for (;;) { 273 | nfds = epoll_wait(sm.epfd, events, MAX_EVENTS, -1); 274 | 275 | /* hook function, such as dump session */ 276 | timer_handler(); 277 | 278 | log_debug("epoll_wait return: %d\n", nfds); 279 | if (nfds == -1) { 280 | log_error("epoll_wait()"); 281 | continue; 282 | } 283 | 284 | for (i = 0; i < nfds; i++) { 285 | edp = events[i].data.ptr; 286 | 287 | if (edp->fd == listen_sock) { 288 | handle_accept_event(listen_sock); 289 | continue; 290 | } 291 | 292 | if (events[i].events & EPOLLIN) { 293 | handle_epollin_event(edp); 294 | } 295 | 296 | if (events[i].events & EPOLLOUT) { 297 | handle_epollout_event(edp); 298 | } 299 | 300 | if (events[i].events & EPOLLERR) { 301 | if (errno != EAGAIN && errno != EWOULDBLOCK) { 302 | log_error("EPOLLERR:"); 303 | } 304 | } 305 | 306 | /* check if the connection is closing */ 307 | if (events[i].events & (EPOLLRDHUP | EPOLLHUP)) { 308 | handle_epollhup_event(edp->fd); 309 | } 310 | } 311 | } 312 | } 313 | 314 | 315 | void proxy_exit(void) 316 | { 317 | mpool_free(sm.pool); 318 | kh_destroy(32, sm.h); 319 | kh_destroy(32, sm.hc); 320 | } 321 | 322 | 323 | /* 324 | * accept new connection from client 325 | */ 326 | int handle_accept_event(int listen_sock) 327 | { 328 | int conn_sock; 329 | socklen_t len; 330 | epoll_data *ed; 331 | struct sockaddr_in cli_addr; 332 | 333 | /* one or more incoming connection */ 334 | while (1) { 335 | len = sizeof(struct sockaddr_in); 336 | conn_sock = accept(listen_sock, (struct sockaddr *)&cli_addr, &len); 337 | if (conn_sock == -1) { 338 | if (errno == EAGAIN || errno == EWOULDBLOCK) { 339 | break; 340 | } 341 | log_error("accept()"); 342 | return -1; 343 | } 344 | 345 | set_nonblocking(conn_sock); 346 | 347 | ed = (epoll_data *) mpool_alloc(sm.pool, sizeof(epoll_data)); 348 | ed->fd = conn_sock; 349 | ed->flag = EPOLL_PROXY_CLIENT; 350 | ed->session = NULL; 351 | epoll_ctl_add(sm.epfd, ed, EPOLLIN | EPOLLOUT | 352 | EPOLLET | EPOLLRDHUP | EPOLLHUP); 353 | 354 | log_info("accept connection from %s\n", sockaddr_to_str(&cli_addr)); 355 | } 356 | 357 | return 0; 358 | } 359 | 360 | 361 | /* 362 | * close tcp connection, unregister session and epoll 363 | */ 364 | int handle_epollhup_event(int fd) 365 | { 366 | log_debug("enter %s\n", __FUNCTION__); 367 | 368 | session_t *session = sm.get(sm.h, fd); 369 | if (session != NULL) { 370 | sm.close(session, sm.epfd); 371 | sm.del(sm.h, session->conn_ps.sock); 372 | } else { 373 | log_debug("do not find any session in cache \n"); 374 | //close_socket(fd, sm.epfd); 375 | } 376 | return 0; 377 | } 378 | 379 | 380 | /* 381 | * receive http request from client and http response from server 382 | */ 383 | int handle_epollin_event(epoll_data *edp) 384 | { 385 | int ret; 386 | char buf[BUF_SIZE]; 387 | ssize_t nread; 388 | ssize_t bytes_read; 389 | session_t *session; 390 | 391 | log_debug("enter %s\n", __FUNCTION__); 392 | 393 | bzero(buf, sizeof(buf)); 394 | bytes_read = 0; 395 | 396 | while ((nread = recv(edp->fd, buf + bytes_read, BUF_SIZE - bytes_read, MSG_DONTWAIT)) > 0) { 397 | bytes_read += nread; 398 | } 399 | 400 | if (nread == -1) { 401 | if (errno != EAGAIN && errno != EWOULDBLOCK) { 402 | log_error("recv()"); 403 | /* EBADF, EPIPE, ECONNRESET */ 404 | if ((session = edp->session)) { 405 | sm.close(session, sm.epfd); 406 | sm.del(sm.h, session->conn_ps.sock); 407 | } 408 | } 409 | } 410 | 411 | /* 412 | * the fd is proxy-server, forward the buf to client 413 | */ 414 | if (edp->flag == EPOLL_PROXY_SERVER) { 415 | session = sm.get(sm.h, edp->fd); 416 | if (session) { 417 | ret = sec_send(session, TO_CLIENT, buf, bytes_read); 418 | if (ret < 0) { 419 | return -1; 420 | } 421 | } else { 422 | log_error("unexpected error"); 423 | } 424 | } else if (edp->flag == EPOLL_PROXY_CLIENT) { 425 | /* 426 | * check if there exists session for the client 427 | * maybe keepalive, reuse the old connection 428 | */ 429 | if (edp->session) { 430 | return sec_send(edp->session, TO_SERVER, buf, bytes_read); 431 | } 432 | 433 | session = sm.create(sm.pool, edp->fd); 434 | if (session == NULL) { 435 | goto proxy_request_error; 436 | } 437 | 438 | if (parse_http_request(session, buf, bytes_read) < 0) { 439 | goto proxy_request_error; 440 | } 441 | 442 | if ((ret = new_connection(session, sm.epfd)) < 0) { 443 | goto proxy_request_error; 444 | } else if (ret == 1) { 445 | /* connection is still in progress */ 446 | cache_http_request(sm.pool, session, buf, bytes_read); 447 | sm.put(sm.hc, session); 448 | } else { 449 | /* connection is ready */ 450 | sm.put(sm.h, session); 451 | ret = sec_send(session, TO_SERVER, buf, bytes_read); 452 | if (ret < 0) { 453 | goto proxy_request_error; 454 | } 455 | } 456 | } 457 | 458 | return 0; 459 | 460 | proxy_request_error: 461 | log_error("proxy_request_error"); 462 | print_payload((u_char *) buf, MIN(bytes_read, PAYLOAD_MAX_LENGTH)); 463 | ret = write(edp->fd, not_found_response, strlen(not_found_response)); 464 | if (session) { 465 | sm.close(session, sm.epfd); 466 | sm.del(sm.h, session->conn_ps.sock); 467 | } 468 | return -1; 469 | } 470 | 471 | 472 | /* 473 | * mainly handle these async connect event 474 | */ 475 | int handle_epollout_event(epoll_data *edp) 476 | { 477 | session_t *s; 478 | 479 | log_debug("enter %s\n", __FUNCTION__); 480 | 481 | s = sm.get(sm.hc, edp->fd); 482 | if (edp->flag == EPOLL_PROXY_SERVER && s != NULL) { 483 | sm.put(sm.h, s); 484 | sm.del(sm.hc, edp->fd); 485 | if (sec_send(s, TO_SERVER, s->request, s->request_len) < 0) { 486 | log_error("sec_send"); 487 | print_payload((u_char *) s->request, 488 | MIN(s->request_len, PAYLOAD_MAX_LENGTH)); 489 | return sec_send(s, TO_CLIENT, not_found_response, 490 | strlen(not_found_response)); 491 | } 492 | } else { 493 | log_debug("%s: get_session fail\n", __FUNCTION__); 494 | } 495 | return 0; 496 | } 497 | 498 | 499 | int parse_http_request(session_t *session, char *buf, size_t buflen) 500 | { 501 | int pret, minor_version; 502 | size_t prevbuflen = 0, method_len, path_len, num_headers, i; 503 | const char *method, *path; 504 | struct phr_header headers[100]; 505 | 506 | num_headers = sizeof(headers) / sizeof(headers[0]); 507 | pret = phr_parse_request(buf, buflen, &method, &method_len, &path, 508 | &path_len, &minor_version, headers, &num_headers, prevbuflen); 509 | if (pret <= 0) { 510 | log_debug("parse request fail: %d\n", pret); 511 | return pret; 512 | } 513 | for (i = 0; i != num_headers; ++i) { 514 | if (strncasecmp(headers[i].name, "host", 4) == 0) { 515 | strncpy(session->host, headers[i].value, 516 | (int)headers[i].value_len); 517 | return 1; 518 | } 519 | } 520 | return pret; 521 | } 522 | 523 | 524 | int cache_http_request(mpool *pool, session_t *s, 525 | char *request, ssize_t request_len) 526 | { 527 | char *buf; 528 | 529 | buf = (char *) mpool_alloc(pool, request_len); 530 | if (buf == NULL) { 531 | log_error("mpool_alloc() error\n"); 532 | return -1; 533 | } 534 | 535 | memcpy(buf, request, request_len); 536 | s->request = buf; 537 | s->request_len = request_len; 538 | 539 | return 0; 540 | } 541 | 542 | 543 | int sec_send(session_t *s, int direction, char *buf, size_t len) 544 | { 545 | int sk; 546 | ssize_t nw; 547 | ssize_t bytes_send; 548 | 549 | if (direction != TO_CLIENT && direction != TO_SERVER) { 550 | log_error("unexpected direction"); 551 | return -1; 552 | } 553 | 554 | if (direction == TO_CLIENT) { 555 | sk = s->conn_pc.sock; 556 | } else { 557 | sk = s->conn_ps.sock; 558 | } 559 | 560 | bytes_send = 0; 561 | 562 | while ((nw = send(sk, buf + bytes_send, len - bytes_send, MSG_NOSIGNAL)) > 0) { 563 | bytes_send += nw; 564 | } 565 | 566 | if (nw == -1 && errno != EAGAIN) { 567 | log_error("send()"); 568 | dump_one_session(s); 569 | print_payload((u_char *) buf, MIN(len, PAYLOAD_MAX_LENGTH)); 570 | /* EBADF, EPIPE, ECONNRESET */ 571 | sm.close(s, sm.epfd); 572 | sm.del(sm.h, s->conn_ps.sock); 573 | 574 | return -1; 575 | } 576 | 577 | return 0; 578 | } 579 | 580 | 581 | static int open_listening_socket(const char *addr, unsigned int port, int max_conn) 582 | { 583 | int sock; 584 | struct sockaddr_in srv_addr; 585 | 586 | sock = socket(AF_INET, SOCK_STREAM, 0); 587 | if (sock == -1) { 588 | log_error("scoket()"); 589 | return -1; 590 | } 591 | 592 | if (setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, 593 | &(int) {1}, sizeof(int)) < 0) 594 | { 595 | log_error("setsockopt(SO_REUSEADDR)"); 596 | return -1; 597 | } 598 | 599 | set_sockaddr(&srv_addr, addr, port); 600 | if (bind(sock, (struct sockaddr *)&srv_addr, sizeof(srv_addr)) < 0) { 601 | log_error("bind()"); 602 | return -1; 603 | } 604 | 605 | if (set_nonblocking(sock) < 0) { 606 | log_error("set_nonblocking()"); 607 | return -1; 608 | } 609 | 610 | if (listen(sock, max_conn) < 0) { 611 | log_error("listen()"); 612 | return -1; 613 | } 614 | 615 | log_info("listen on %s:%d \n", addr, port); 616 | 617 | return sock; 618 | } 619 | 620 | 621 | /* 622 | * register events of fd to epfd 623 | */ 624 | static void epoll_ctl_add(int epfd, epoll_data *data, uint32_t events) 625 | { 626 | struct epoll_event ev; 627 | ev.events = events | EPOLLERR; 628 | ev.data.ptr = data; 629 | if (epoll_ctl(epfd, EPOLL_CTL_ADD, data->fd, &ev) == -1) { 630 | log_error("epoll_ctl()"); 631 | exit(1); 632 | } 633 | } 634 | 635 | 636 | static void set_sockaddr(struct sockaddr_in *addr, const char *ipaddr, 637 | unsigned int port) 638 | { 639 | bzero((char *)addr, sizeof(struct sockaddr_in)); 640 | addr->sin_family = AF_INET; 641 | inet_aton(ipaddr, &addr->sin_addr); 642 | addr->sin_port = htons(port); 643 | } 644 | 645 | 646 | static int set_nonblocking(int sockfd) 647 | { 648 | if (fcntl(sockfd, F_SETFL, 649 | fcntl(sockfd, F_GETFL, 0) | O_NONBLOCK) == -1) 650 | { 651 | return -1; 652 | } 653 | return 0; 654 | } 655 | 656 | 657 | /* 658 | * create new connection between proxy and remote server 659 | * return value: 660 | * -1: connect error 661 | * 0: connect success 662 | * 1: connect attempt is in progress 663 | */ 664 | static int new_connection(session_t *session, int epfd) 665 | { 666 | int connect_flag; 667 | int sockfd; 668 | char ip[16]; 669 | socklen_t len; 670 | epoll_data *ed; 671 | unsigned int port; 672 | struct sockaddr_in srv_addr; 673 | 674 | sockfd = socket(AF_INET, SOCK_STREAM, 0); 675 | if (sockfd == -1) { 676 | log_error("socket()"); 677 | goto error; 678 | } 679 | 680 | if (setsockopt(sockfd, SOL_SOCKET, SO_REUSEADDR, 681 | &(int) {1}, sizeof(int)) < 0) 682 | { 683 | log_error("setsockopt(SO_REUSEADDR)"); 684 | goto error; 685 | } 686 | 687 | if (setsockopt(sockfd, IPPROTO_TCP, TCP_NODELAY, 688 | &(int) {1}, sizeof(int)) < 0) 689 | { 690 | log_error("setsockopt(TCP_NODELAY)"); 691 | goto error; 692 | } 693 | 694 | if (set_nonblocking(sockfd) < 0) { 695 | log_error("set_nonblocking()"); 696 | goto error; 697 | } 698 | 699 | if (parse_host_field(session->host, ip, &port) < 0) { 700 | goto error; 701 | } 702 | 703 | set_sockaddr(&srv_addr, ip, port); 704 | connect_flag = connect(sockfd, (struct sockaddr *)&srv_addr, sizeof(srv_addr)); 705 | if (connect_flag < 0 && errno != EINPROGRESS) { 706 | log_error("connect()"); 707 | goto error; 708 | } 709 | 710 | session->conn_ps.sock = sockfd; 711 | session->conn_ps.remote = srv_addr; 712 | len = sizeof(struct sockaddr_in); 713 | if (getsockname(sockfd, (struct sockaddr *)&session->conn_ps.local, &len) 714 | == -1) { 715 | log_error("getsockname()"); 716 | goto error; 717 | } 718 | 719 | ed = (epoll_data *) mpool_alloc(sm.pool, sizeof(epoll_data)); 720 | ed->fd = sockfd; 721 | ed->flag = EPOLL_PROXY_SERVER; 722 | ed->session = session; 723 | epoll_ctl_add(epfd, ed, EPOLLIN | EPOLLOUT | 724 | EPOLLET | EPOLLRDHUP | EPOLLHUP); 725 | 726 | log_debug("client: %s === server: %s\n", 727 | sockaddr_to_str(&session->conn_pc.remote), session->host); 728 | 729 | return (errno == EINPROGRESS ? 1 : 0); 730 | 731 | error: 732 | close(sockfd); 733 | return -1; 734 | } 735 | 736 | 737 | static int close_socket(int sockfd, int epfd) 738 | { 739 | int ret = epoll_ctl(epfd, EPOLL_CTL_DEL, sockfd, NULL); 740 | return (ret == 0 ? close(sockfd) : -1); 741 | } 742 | 743 | 744 | /* 745 | * put session_t into hash table with conn_ps.sock as key 746 | */ 747 | int put_session(hash_t *h, session_t *s) 748 | { 749 | int ret; 750 | khint_t k; 751 | 752 | k = kh_put(32, h, s->conn_ps.sock, &ret); 753 | if (ret == -1) { 754 | log_info("kh_put error, sockfd=%d, ret=%d\n", 755 | s->conn_ps.sock, ret); 756 | return -1; 757 | } 758 | if (ret == 0 && k < kh_end(h)) { 759 | log_info("key conflict, following two line are new and old:\n"); 760 | dump_one_session(s); 761 | dump_one_session((session_t *) kh_val(h, k)); 762 | //return -1; 763 | } 764 | kh_val(h, k) = s; 765 | return 0; 766 | } 767 | 768 | 769 | /* 770 | * retrieve session_t by key 771 | */ 772 | session_t *get_session(hash_t *h, int fd) 773 | { 774 | khint_t k; 775 | k = kh_get(32, h, fd); 776 | if (k < kh_end(h)) { 777 | return (session_t *) kh_val(h, k); 778 | } 779 | return NULL; 780 | } 781 | 782 | 783 | int del_session(hash_t *h, int fd) 784 | { 785 | khint_t k; 786 | k = kh_get(32, h, fd); 787 | if (k < kh_end(h)) { 788 | kh_del(32, h, k); 789 | return 0; 790 | } 791 | return -1; 792 | } 793 | 794 | 795 | session_t *create_session(mpool *pool, int sockfd) 796 | { 797 | session_t *s; 798 | socklen_t len = sizeof(struct sockaddr_in); 799 | 800 | s = (session_t *) mpool_alloc(pool, sizeof(session_t)); 801 | if (s == NULL) { 802 | log_debug("%s: mpool_alloc() error\n", __FUNCTION__); 803 | return NULL; 804 | } 805 | 806 | bzero((u_char *)s, sizeof(session_t)); 807 | s->conn_pc.sock = sockfd; 808 | 809 | if (getsockname(sockfd, (struct sockaddr *)&s->conn_pc.local, &len) 810 | == -1) { 811 | log_error("getsockname()"); 812 | return NULL; 813 | } 814 | 815 | if (getpeername(sockfd, (struct sockaddr *)&s->conn_pc.remote, &len) 816 | == -1) { 817 | log_error("getpeername()"); 818 | return NULL; 819 | } 820 | 821 | return s; 822 | } 823 | 824 | 825 | int close_session(session_t *s, int epfd) 826 | { 827 | char buf[32]; 828 | 829 | if (s == NULL) { 830 | return 0; 831 | } 832 | 833 | strcpy(buf, sockaddr_to_str(&s->conn_pc.remote)); 834 | log_debug("closing %-24s%-24s%-24s\n", buf, 835 | sockaddr_to_str(&s->conn_ps.local), s->host); 836 | close_socket(s->conn_pc.sock, epfd); 837 | close_socket(s->conn_ps.sock, epfd); 838 | 839 | return 0; 840 | } 841 | 842 | 843 | void dump_one_session(session_t *s) 844 | { 845 | char buf[32]; 846 | 847 | strcpy(buf, sockaddr_to_str(&s->conn_pc.remote)); 848 | printf(ITEM_FORMAT, s->conn_ps.sock, buf, 849 | sockaddr_to_str(&s->conn_ps.local), s->host); 850 | } 851 | 852 | 853 | void dump_session(hash_t *table, const char *table_name) 854 | { 855 | khint_t k; 856 | 857 | if (kh_size(table) == 0){ 858 | log_info("%s session table is empty\n", table_name); 859 | return; 860 | } 861 | 862 | printf("+------------------------dump_session" 863 | " %s size:%d--------------------+\n", table_name, kh_size(table)); 864 | printf(HEADER_FORMAT, "key", "client", "proxy", "server"); 865 | 866 | for (k = kh_begin(table); k != kh_end(table); k++) { 867 | if (kh_exist(table, k)) { 868 | dump_one_session((session_t *) kh_val(table, k)); 869 | } 870 | } 871 | } 872 | 873 | 874 | /* 875 | * parse host field in http request header to get ip and port 876 | */ 877 | static int parse_host_field(char *hostname, char *ip, unsigned int *port) 878 | { 879 | int i; 880 | char *p; 881 | struct hostent *he; 882 | struct in_addr **addr_list; 883 | 884 | for (p = hostname; *p && *p != ':'; p++) ; 885 | 886 | if (*p != 0) { 887 | *port = atoi(p); 888 | *p = 0; 889 | } else { 890 | *port = 80; 891 | } 892 | 893 | if ((he = gethostbyname(hostname)) == NULL) { 894 | log_debug("gethostbyname(%s)", hostname); 895 | return -1; 896 | } 897 | 898 | addr_list = (struct in_addr **)he->h_addr_list; 899 | for (i = 0; addr_list[i] != NULL; i++) { 900 | inet_ntop(AF_INET, (char *)&(*addr_list[i]), ip, 16); 901 | return 0; 902 | } 903 | 904 | return 1; 905 | } 906 | 907 | 908 | static char *sockaddr_to_str(struct sockaddr_in *addr) 909 | { 910 | char buf[16]; 911 | static char addr_s[32]; 912 | 913 | inet_ntop(AF_INET, (char *)&(addr->sin_addr), 914 | buf, sizeof(struct sockaddr_in)); 915 | memset(addr_s, 0, sizeof(addr_s)); 916 | snprintf(addr_s, 32, "%s:%d", buf, ntohs(addr->sin_port)); 917 | return addr_s; 918 | } 919 | 920 | 921 | static void log_core(const char *prompt, const char *format, ...) 922 | { 923 | va_list argList; 924 | va_start(argList, format); 925 | printf("%s [+%s] ", cached_log_time, prompt); 926 | vprintf(format, argList); 927 | va_end(argList); 928 | } 929 | 930 | 931 | /* 932 | * handle Ctrl-C signal 933 | */ 934 | static void INT_handler(int sig) 935 | { 936 | char c; 937 | signal(sig, SIG_IGN); 938 | printf("Ouch, did you hit Ctrl-C?\n" 939 | "Do you really want to quit [y/n]?"); 940 | c = getchar(); 941 | if (c == 'y' || c == 'Y') { 942 | proxy_exit(); 943 | exit(0); 944 | } else { 945 | signal(SIGINT, INT_handler); 946 | } 947 | } 948 | 949 | 950 | static void timer_handler(void) 951 | { 952 | static time_t last_time; 953 | time_t now = time(NULL); 954 | struct tm *tm; 955 | 956 | tm = localtime(&now); 957 | sprintf(cached_log_time, LOG_TIME_FORMAT, 958 | tm->tm_year + 1900, tm->tm_mon, tm->tm_mday, 959 | tm->tm_hour, tm->tm_min, tm->tm_sec); 960 | 961 | if (proxy.is_dump == 1 && difftime(now, last_time) >= DUMP_INTERVAL) { 962 | last_time = now; 963 | sm.dump(sm.h, "established"); 964 | sm.dump(sm.hc, "connecting"); 965 | } 966 | } 967 | 968 | 969 | /* 970 | * print data in rows of 16 bytes: offset hex ascii 971 | * 972 | * 00000 47 45 54 20 2f 20 48 54 54 50 2f 31 2e 31 0d 0a GET / HTTP/1.1.. 973 | * 974 | * refer to: http://www.tcpdump.org/sniffex.c 975 | */ 976 | void print_hex_ascii_line(const u_char *payload, int len, int offset) 977 | { 978 | 979 | int i; 980 | int gap; 981 | const u_char *ch; 982 | 983 | /* offset */ 984 | printf("%05d ", offset); 985 | 986 | /* hex */ 987 | ch = payload; 988 | for (i = 0; i < len; i++) { 989 | printf("%02x ", *ch); 990 | ch++; 991 | /* print extra space after 8th byte for visual aid */ 992 | if (i == 7) 993 | printf(" "); 994 | } 995 | /* print space to handle line less than 8 bytes */ 996 | if (len < 8) 997 | printf(" "); 998 | 999 | /* fill hex gap with spaces if not full line */ 1000 | if (len < 16) { 1001 | gap = 16 - len; 1002 | for (i = 0; i < gap; i++) { 1003 | printf(" "); 1004 | } 1005 | } 1006 | printf(" "); 1007 | 1008 | /* ascii (if printable) */ 1009 | ch = payload; 1010 | for (i = 0; i < len; i++) { 1011 | if (isprint(*ch)) 1012 | printf("%c", *ch); 1013 | else 1014 | printf("."); 1015 | ch++; 1016 | } 1017 | 1018 | printf("\n"); 1019 | 1020 | return; 1021 | } 1022 | 1023 | 1024 | /* 1025 | * print packet payload data (avoid printing binary data) 1026 | * 1027 | * refer to: http://www.tcpdump.org/sniffex.c 1028 | */ 1029 | void print_payload(const u_char *payload, int len) 1030 | { 1031 | 1032 | int len_rem = len; 1033 | int line_width = 16; /* number of bytes per line */ 1034 | int line_len; 1035 | int offset = 0; /* zero-based offset counter */ 1036 | const u_char *ch = payload; 1037 | 1038 | if (len <= 0) 1039 | return; 1040 | 1041 | /* data fits on one line */ 1042 | if (len <= line_width) { 1043 | print_hex_ascii_line(ch, len, offset); 1044 | return; 1045 | } 1046 | 1047 | /* data spans multiple lines */ 1048 | for (;;) { 1049 | /* compute current line length */ 1050 | line_len = line_width % len_rem; 1051 | /* print line */ 1052 | print_hex_ascii_line(ch, line_len, offset); 1053 | /* compute total remaining */ 1054 | len_rem = len_rem - line_len; 1055 | /* shift pointer to remaining bytes to print */ 1056 | ch = ch + line_len; 1057 | /* add offset */ 1058 | offset = offset + line_width; 1059 | /* check if we have line width chars or less */ 1060 | if (len_rem <= line_width) { 1061 | /* print last line and get out */ 1062 | print_hex_ascii_line(ch, len_rem, offset); 1063 | break; 1064 | } 1065 | } 1066 | 1067 | return; 1068 | } 1069 | --------------------------------------------------------------------------------