├── .gitignore ├── .travis.yml ├── ChangeLog ├── Dockerfile ├── LICENSE ├── Makefile.am ├── NOTICE ├── README.md ├── conf ├── nutcracker.leaf.yml ├── nutcracker.rediscluster.yml ├── nutcracker.root.yml └── nutcracker.yml ├── configure.ac ├── contrib ├── .gitignore ├── LuaJIT-2.0.3.tar.gz ├── LuaJIT-2.0.3 │ └── .gitignore ├── Makefile.am ├── yaml-0.1.4.tar.gz └── yaml-0.1.4 │ └── .gitignore ├── entrypoint.sh ├── m4 └── .gitignore ├── man └── nutcracker.8 ├── notes ├── c-styleguide.txt ├── debug.txt ├── kqueue.pdf ├── memcache.txt ├── recommendation.md ├── redis.md ├── rediscluster.md └── socket.txt ├── nutcracker.yml ├── scripts ├── benchmark-mget.py ├── multi_get.sh ├── nutcracker.init ├── nutcracker.init.debian ├── nutcracker.spec ├── pipelined_read.sh ├── pipelined_write.sh ├── populate_memcached.sh ├── redis-check.py └── redis-check.sh ├── sources.list └── src ├── Makefile.am ├── event ├── Makefile.am ├── nc_epoll.c ├── nc_event.h ├── nc_evport.c └── nc_kqueue.c ├── hashkit ├── Makefile.am ├── nc_crc16.c ├── nc_crc32.c ├── nc_fnv.c ├── nc_hashkit.h ├── nc_hsieh.c ├── nc_jenkins.c ├── nc_ketama.c ├── nc_md5.c ├── nc_modula.c ├── nc_murmur.c ├── nc_one_at_a_time.c └── nc_random.c ├── lua ├── idcmap.lua ├── logic_idcmap.lua ├── pool.lua ├── redis.lua ├── replica_set.lua └── server.lua ├── nc.c ├── nc_array.c ├── nc_array.h ├── nc_assoc.c ├── nc_assoc.h ├── nc_client.c ├── nc_client.h ├── nc_conf.c ├── nc_conf.h ├── nc_connection.c ├── nc_connection.h ├── nc_core.c ├── nc_core.h ├── nc_ipwhitelist.c ├── nc_ipwhitelist.h ├── nc_log.c ├── nc_log.h ├── nc_mbuf.c ├── nc_mbuf.h ├── nc_message.c ├── nc_message.h ├── nc_proxy.c ├── nc_proxy.h ├── nc_queue.h ├── nc_rbtree.c ├── nc_rbtree.h ├── nc_request.c ├── nc_response.c ├── nc_script.c ├── nc_script.h ├── nc_server.c ├── nc_server.h ├── nc_signal.c ├── nc_signal.h ├── nc_stats.c ├── nc_stats.h ├── nc_string.c ├── nc_string.h ├── nc_util.c ├── nc_util.h └── proto ├── Makefile.am ├── nc_memcache.c ├── nc_proto.h └── nc_redis.c /.gitignore: -------------------------------------------------------------------------------- 1 | *~ 2 | # pyc 3 | *.pyc 4 | 5 | # Compiled Object files 6 | *.lo 7 | *.o 8 | 9 | # Compiled Dynamic libraries 10 | *.so 11 | 12 | # Compiled Static libraries 13 | *.la 14 | *.a 15 | 16 | # Compiled misc 17 | *.dep 18 | *.gcda 19 | *.gcno 20 | *.gcov 21 | 22 | # Packages 23 | *.tar.gz 24 | *.tar.bz2 25 | 26 | # Logs 27 | *.log 28 | 29 | # Temporary 30 | *.swp 31 | *.~ 32 | *.project 33 | *.cproject 34 | 35 | # Core and executable 36 | core* 37 | nutcracker 38 | 39 | # Autotools 40 | .deps 41 | .libs 42 | 43 | # extracted 44 | !/contrib/yaml-0.1.4.tar.gz 45 | !/contrib/LuaJIT-2.0.3.tar.gz 46 | 47 | # Merge tmp file 48 | *.orig 49 | 50 | /aclocal.m4 51 | /autom4te.cache 52 | /stamp-h1 53 | /autoscan.log 54 | /libtool 55 | 56 | /config/config.guess 57 | /config/config.sub 58 | /config/depcomp 59 | /config/install-sh 60 | /config/ltmain.sh 61 | /config/missing 62 | /config 63 | 64 | /config.h 65 | /config.h.in 66 | /config.h.in~ 67 | /config.log 68 | /config.status 69 | /configure.scan 70 | /configure 71 | 72 | Makefile 73 | Makefile.in 74 | 75 | /output 76 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: c 2 | script: CFLAGS="-ggdb3 -O0" autoreconf -fvi && ./configure --enable-debug=log && make && sudo make install 3 | 4 | -------------------------------------------------------------------------------- /ChangeLog: -------------------------------------------------------------------------------- 1 | 2014-18-10 idning 2 | 3 | * twemproxy: version 0.4.0 release 4 | mget improve (idning) 5 | many new commands supported: LEX, PFADD, PFMERGE, SORT, PING, QUIT, SCAN... (mattrobenolt, areina, idning) 6 | handle max open file limit(allenlz) 7 | add notice-log and use ms time in log(idning) 8 | fix bug in string_compare (andyqzb) 9 | fix deadlock in sighandler (idning) 10 | 11 | 2013-20-12 Manju Rajashekhar 12 | * twemproxy: version 0.3.0 release 13 | SRANDMEMBER support for the optional count argument (mkhq) 14 | Handle case where server responds while the request is still being sent (jdi-tagged) 15 | event ports (solaris/smartos) support 16 | add timestamp when the server was ejected 17 | support for set ex/px/nx/xx for redis 2.6.12 and up (ypocat) 18 | kqueue (bsd) support (ferenyx) 19 | fix parsing redis response to accept integer reply (charsyam) 20 | 21 | 2013-23-04 Manju Rajashekhar 22 | * twemproxy: version 0.2.4 release 23 | redis keys must be less than mbuf_data_size() in length (fifsky) 24 | Adds support for DUMP/RESTORE commands in Redis (remotezygote) 25 | Use of the weight value in the modula distribution (mezzatto) 26 | Add support to unix socket connections to servers (mezzatto) 27 | only check for duplicate server name and not 'host:port:weight' when 'name' is configured 28 | crc16 hash support added (mezzatto) 29 | 30 | 2013-31-01 Manju Rajashekhar 31 | * twemproxy: version 0.2.3 release 32 | RPOPLPUSH, SDIFF, SDIFFSTORE, SINTER, SINTERSTORE, SMOVE, SUNION, SUNIONSTORE, ZINTERSTORE, and ZUNIONSTORE support (dcartoon) 33 | EVAL and EVALSHA support (ferenyx) 34 | exit 1 if configuration file is invalid (cofyc) 35 | return non-zero exit status when nutcracker cannot start for some reason 36 | use server names in stats (charsyam) 37 | Fix failure to resolve long FQDN name resolve (conmame) 38 | add support for hash tags 39 | 40 | 2012-18-10 Manju Rajashekhar 41 | 42 | * twemproxy: version 0.2.2 release 43 | fix the off-by-one error when calculating redis key length 44 | 45 | 2012-12-10 Manju Rajashekhar 46 | 47 | * twemproxy: version 0.2.1 release 48 | don't use buf in conf_add_server 49 | allow an optional instance name for consistent hashing (charsyam) 50 | add --stats-addr=S option 51 | add stats-bind-any -a option (charsyam) 52 | 53 | 2012-12-03 Manju Rajashekhar 54 | 55 | * twemproxy: version 0.2.0 release 56 | add -D or --describe-stats command-line argument to print stats description 57 | redis support in twemproxy 58 | setup pre/post splitcopy and pre/post coalesce handlers in msg struct 59 | memcache pre_splitcopy, post_splitcopy, pre_coalesce and post_coalesce handlers 60 | every fragment of a msg vector keeps track of the first/last fragment, number of fragments and fragment owner 61 | set up msg parser handler for memcache connections 62 | refactor parsing code and create header file nc_proto.h 63 | stats_listen should use st->addr as the listening address string 64 | delete stats tracking memcache requests and responses; stats module no longer tracks protocol related stats 65 | 66 | 2012-10-27 Manju Rajashekhar 67 | 68 | * twemproxy: version 0.1.20 release 69 | on msg_repair, msg->pos should point to nbuf->pos and not nbuf->last 70 | refactor memcache parsing code into proto directory 71 | add redis option to configuration file 72 | fix macro definition strXcmp error for big endian 73 | fix log_hexdump and loga_hexdump 74 | 75 | 2012-07-31 Manju Rajashekhar 76 | 77 | * twemproxy: version 0.1.19 release 78 | close server connection on a stray response (yashh, bmatheny) 79 | 80 | 2012-06-19 Manju Rajashekhar 81 | 82 | * twemproxy: version 0.1.18 release 83 | command line option to set mbuf chunk size 84 | 85 | 2012-05-09 Manju Rajashekhar 86 | 87 | * twemproxy: version 0.1.17 release 88 | use _exit(0) instead of exit(0) when daemonizing 89 | use loga instead of log_stderr in nc_stacktrace 90 | 91 | 2012-02-09 Manju Rajashekhar 92 | 93 | * twemproxy: version 0.1.16 release 94 | twemproxy (aka nutcracker) is a fast and lightweight proxy for memcached protocol. 95 | 96 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ubuntu:14.04 2 | 3 | MAINTAINER YanMing, yanming02@baidu.com 4 | 5 | COPY . /opt/proxy/src/ 6 | COPY sources.list /etc/apt/sources.list 7 | 8 | RUN apt-get update && \ 9 | apt-get install -y --no-install-recommends libc6-dev gcc make autoconf automake libtool&& \ 10 | rm -rf /var/lib/apt/lists/* && \ 11 | cd /opt/proxy/src && \ 12 | autoreconf -fvi && ./configure --enable-debug=full && make && \ 13 | mkdir -p /opt/proxy/bin && mkdir -p /opt/proxy/conf && mkdir -p /opt/proxy/log && \ 14 | cp /opt/proxy/src/src/nutcracker /opt/proxy/bin/ && \ 15 | cp /opt/proxy/src/src/lua /opt/proxy/bin/ -r 16 | 17 | COPY entrypoint.sh . 18 | COPY nutcracker.yml /opt/proxy/conf/ 19 | 20 | ENTRYPOINT [ "./entrypoint.sh" ] 21 | -------------------------------------------------------------------------------- /Makefile.am: -------------------------------------------------------------------------------- 1 | MAINTAINERCLEANFILES = Makefile.in aclocal.m4 configure config.h.in config.h.in~ stamp-h.in 2 | 3 | ACLOCAL_AMFLAGS = -I m4 4 | 5 | SUBDIRS = contrib src 6 | 7 | dist_man_MANS = man/nutcracker.8 8 | 9 | EXTRA_DIST = README.md NOTICE LICENSE ChangeLog conf scripts notes 10 | -------------------------------------------------------------------------------- /NOTICE: -------------------------------------------------------------------------------- 1 | twemproxy is a fast and lightweight proxy for memcached protocol 2 | Copyright (C) 2012 Twitter, Inc. 3 | 4 | Portions of twemproxy were inspired from nginx: http://nginx.org/ 5 | 6 | The implementation of generic array (nc_array.[ch]) and red black tree 7 | (nc_rbtree.[ch]) also comes from nginx-0.8.55. 8 | 9 | /* 10 | * Copyright (C) 2002-2010 Igor Sysoev 11 | * 12 | * Redistribution and use in source and binary forms, with or without 13 | * modification, are permitted provided that the following conditions 14 | * are met: 15 | * 1. Redistributions of source code must retain the above copyright 16 | * notice, this list of conditions and the following disclaimer. 17 | * 2. Redistributions in binary form must reproduce the above copyright 18 | * notice, this list of conditions and the following disclaimer in the 19 | * documentation and/or other materials provided with the distribution. 20 | * 21 | * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND 22 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 | * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE 25 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 | * SUCH DAMAGE. 32 | */ 33 | 34 | The generic queue implementation comes from BSD 35 | 36 | /* 37 | * Copyright (c) 1991, 1993 38 | * The Regents of the University of California. All rights reserved. 39 | * 40 | * Redistribution and use in source and binary forms, with or without 41 | * modification, are permitted provided that the following conditions 42 | * are met: 43 | * 1. Redistributions of source code must retain the above copyright 44 | * notice, this list of conditions and the following disclaimer. 45 | * 2. Redistributions in binary form must reproduce the above copyright 46 | * notice, this list of conditions and the following disclaimer in the 47 | * documentation and/or other materials provided with the distribution. 48 | * 3. All advertising materials mentioning features or use of this software 49 | * must display the following acknowledgement: 50 | * This product includes software developed by the University of 51 | * California, Berkeley and its contributors. 52 | * 4. Neither the name of the University nor the names of its contributors 53 | * may be used to endorse or promote products derived from this software 54 | * without specific prior written permission. 55 | * 56 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 57 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 58 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 59 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 60 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 61 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 62 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 63 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 64 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 65 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 66 | * SUCH DAMAGE. 67 | */ 68 | 69 | The implementation of consistent hashing and individual hash algorithms were 70 | borrowed from libmemcached. 71 | 72 | Copyright (c) 2011, Data Differential (http://datadifferential.com/) 73 | Copyright (c) 2007-2010, TangentOrg (Brian Aker) 74 | All rights reserved. 75 | 76 | Redistribution and use in source and binary forms, with or without 77 | modification, are permitted provided that the following conditions are 78 | met: 79 | 80 | * Redistributions of source code must retain the above copyright 81 | notice, this list of conditions and the following disclaimer. 82 | 83 | * Redistributions in binary form must reproduce the above 84 | copyright notice, this list of conditions and the following disclaimer 85 | in the documentation and/or other materials provided with the 86 | distribution. 87 | 88 | * Neither the name of TangentOrg nor the names of its 89 | contributors may be used to endorse or promote products derived from 90 | this software without specific prior written permission. 91 | 92 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 93 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 94 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 95 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 96 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 97 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 98 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 99 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 100 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 101 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 102 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 103 | 104 | The source also includes libyaml (yaml-0.1.4) in contrib/ directory 105 | 106 | Copyright (c) 2006 Kirill Simonov 107 | 108 | Permission is hereby granted, free of charge, to any person obtaining a copy of 109 | this software and associated documentation files (the "Software"), to deal in 110 | the Software without restriction, including without limitation the rights to 111 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 112 | of the Software, and to permit persons to whom the Software is furnished to do 113 | so, subject to the following conditions: 114 | 115 | The above copyright notice and this permission notice shall be included in all 116 | copies or substantial portions of the Software. 117 | 118 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 119 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 120 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 121 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 122 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 123 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 124 | SOFTWARE. 125 | -------------------------------------------------------------------------------- /conf/nutcracker.leaf.yml: -------------------------------------------------------------------------------- 1 | leaf: 2 | listen: 127.0.0.1:22121 3 | hash: fnv1a_64 4 | distribution: ketama 5 | auto_eject_hosts: true 6 | server_retry_timeout: 2000 7 | server_failure_limit: 1 8 | servers: 9 | - 127.0.0.1:11212:1 10 | - 127.0.0.1:11213:1 11 | -------------------------------------------------------------------------------- /conf/nutcracker.rediscluster.yml: -------------------------------------------------------------------------------- 1 | leaf: 2 | listen: 127.0.0.1:2212 3 | hash: crc16 4 | distribution: random 5 | auto_eject_hosts: true 6 | server_retry_timeout: 2000 7 | server_failure_limit: 1 8 | preconnect: true 9 | redis: true 10 | rediscluster: true 11 | zone: tc 12 | servers: 13 | - 127.0.0.1:7000:1 14 | - 127.0.0.1:7001:1 15 | -------------------------------------------------------------------------------- /conf/nutcracker.root.yml: -------------------------------------------------------------------------------- 1 | root: 2 | listen: 127.0.0.1:22120 3 | hash: fnv1a_64 4 | distribution: ketama 5 | preconnect: true 6 | auto_eject_hosts: false 7 | servers: 8 | - 127.0.0.1:22121:1 9 | -------------------------------------------------------------------------------- /conf/nutcracker.yml: -------------------------------------------------------------------------------- 1 | alpha: 2 | listen: 127.0.0.1:22121 3 | hash: fnv1a_64 4 | distribution: ketama 5 | auto_eject_hosts: true 6 | redis: true 7 | server_retry_timeout: 2000 8 | server_failure_limit: 1 9 | servers: 10 | - 127.0.0.1:6379:1 11 | 12 | beta: 13 | listen: 127.0.0.1:22122 14 | hash: fnv1a_64 15 | hash_tag: "{}" 16 | distribution: ketama 17 | auto_eject_hosts: false 18 | timeout: 400 19 | redis: true 20 | servers: 21 | - 127.0.0.1:6380:1 server1 22 | - 127.0.0.1:6381:1 server2 23 | - 127.0.0.1:6382:1 server3 24 | - 127.0.0.1:6383:1 server4 25 | 26 | gamma: 27 | listen: 127.0.0.1:22123 28 | hash: fnv1a_64 29 | distribution: ketama 30 | timeout: 400 31 | backlog: 1024 32 | preconnect: true 33 | auto_eject_hosts: true 34 | server_retry_timeout: 2000 35 | server_failure_limit: 3 36 | servers: 37 | - 127.0.0.1:11212:1 38 | - 127.0.0.1:11213:1 39 | 40 | delta: 41 | listen: 127.0.0.1:22124 42 | hash: fnv1a_64 43 | distribution: ketama 44 | timeout: 100 45 | auto_eject_hosts: true 46 | server_retry_timeout: 2000 47 | server_failure_limit: 1 48 | servers: 49 | - 127.0.0.1:11214:1 50 | - 127.0.0.1:11215:1 51 | - 127.0.0.1:11216:1 52 | - 127.0.0.1:11217:1 53 | - 127.0.0.1:11218:1 54 | - 127.0.0.1:11219:1 55 | - 127.0.0.1:11220:1 56 | - 127.0.0.1:11221:1 57 | - 127.0.0.1:11222:1 58 | - 127.0.0.1:11223:1 59 | 60 | omega: 61 | listen: /tmp/gamma 62 | hash: hsieh 63 | distribution: ketama 64 | auto_eject_hosts: false 65 | servers: 66 | - 127.0.0.1:11214:100000 67 | - 127.0.0.1:11215:1 68 | -------------------------------------------------------------------------------- /configure.ac: -------------------------------------------------------------------------------- 1 | # Define the package version numbers and the bug reporting address 2 | m4_define([NC_MAJOR], 0) 3 | m4_define([NC_MINOR], 4) 4 | m4_define([NC_PATCH], 0) 5 | m4_define([NC_DATE], 2016.11.22) 6 | m4_define([NC_BUGS], [manj@cs.stanford.edu]) 7 | 8 | # Initialize autoconf 9 | AC_PREREQ([2.64]) 10 | AC_INIT([nutcracker], [NC_MAJOR.NC_MINOR.NC_PATCH], [NC_BUGS]) 11 | AC_CONFIG_SRCDIR([src/nc.c]) 12 | AC_CONFIG_AUX_DIR([config]) 13 | AC_CONFIG_HEADERS([config.h:config.h.in]) 14 | AC_CONFIG_MACRO_DIR([m4]) 15 | 16 | # Initialize automake 17 | AM_INIT_AUTOMAKE([1.9 foreign]) 18 | 19 | # Define macro variables for the package version numbers 20 | AC_DEFINE(NC_VERSION_MAJOR, NC_MAJOR, [Define the major version number]) 21 | AC_DEFINE(NC_VERSION_MINOR, NC_MINOR, [Define the minor version number]) 22 | AC_DEFINE(NC_VERSION_PATCH, NC_PATCH, [Define the patch Vversion number]) 23 | AC_DEFINE(NC_VERSION_DATE, NC_DATE, [Define the date compiler]) 24 | AC_DEFINE(NC_VERSION_STRING, "NC_MAJOR.NC_MINOR.NC_PATCH.NC_DATE", [Define the version string]) 25 | 26 | # Checks for language 27 | AC_LANG([C]) 28 | 29 | # Checks for programs 30 | AC_PROG_AWK 31 | AC_PROG_CC 32 | AC_PROG_CPP 33 | AC_PROG_CXX 34 | AC_PROG_INSTALL 35 | AC_PROG_LN_S 36 | AC_PROG_MAKE_SET 37 | AC_PROG_RANLIB 38 | AC_PROG_LIBTOOL 39 | 40 | # Checks for typedefs, structures, and compiler characteristics 41 | AC_C_INLINE 42 | AC_TYPE_INT8_T 43 | AC_TYPE_INT16_T 44 | AC_TYPE_INT32_T 45 | AC_TYPE_INT64_T 46 | AC_TYPE_INTMAX_T 47 | AC_TYPE_INTPTR_T 48 | AC_TYPE_UINT8_T 49 | AC_TYPE_UINT16_T 50 | AC_TYPE_UINT32_T 51 | AC_TYPE_UINT64_T 52 | AC_TYPE_UINTMAX_T 53 | AC_TYPE_UINTPTR_T 54 | AC_TYPE_OFF_T 55 | AC_TYPE_PID_T 56 | AC_TYPE_SIZE_T 57 | AC_TYPE_SSIZE_T 58 | 59 | AC_C_BIGENDIAN( 60 | [], 61 | [AC_DEFINE(HAVE_LITTLE_ENDIAN, 1, [Define to 1 if machine is little endian])], 62 | [AC_MSG_ERROR([endianess of this machine is unknown])], 63 | [AC_MSG_ERROR([universial endianess not supported])] 64 | ) 65 | 66 | # Checks for header files 67 | AC_HEADER_STDBOOL 68 | AC_CHECK_HEADERS([fcntl.h float.h limits.h stddef.h stdlib.h string.h unistd.h]) 69 | AC_CHECK_HEADERS([inttypes.h stdint.h]) 70 | AC_CHECK_HEADERS([sys/ioctl.h sys/time.h sys/uio.h]) 71 | AC_CHECK_HEADERS([sys/socket.h sys/un.h netinet/in.h arpa/inet.h netdb.h]) 72 | AC_CHECK_HEADERS([execinfo.h], 73 | [AC_DEFINE(HAVE_BACKTRACE, [1], [Define to 1 if backtrace is supported])], []) 74 | AC_CHECK_HEADERS([sys/epoll.h], [], []) 75 | AC_CHECK_HEADERS([sys/event.h], [], []) 76 | 77 | # Checks for libraries 78 | AC_CHECK_LIB([m], [pow]) 79 | AC_CHECK_LIB([pthread], [pthread_create]) 80 | 81 | # Checks for library functions 82 | AC_FUNC_FORK 83 | AC_FUNC_MALLOC 84 | AC_FUNC_REALLOC 85 | AC_CHECK_FUNCS([dup2 gethostname gettimeofday strerror]) 86 | AC_CHECK_FUNCS([socket]) 87 | AC_CHECK_FUNCS([memchr memmove memset]) 88 | AC_CHECK_FUNCS([strchr strndup strtoul]) 89 | 90 | AC_CACHE_CHECK([if epoll works], [ac_cv_epoll_works], 91 | AC_TRY_RUN([ 92 | #include 93 | #include 94 | #include 95 | int 96 | main(int argc, char **argv) 97 | { 98 | int fd; 99 | 100 | fd = epoll_create(256); 101 | if (fd < 0) { 102 | perror("epoll_create:"); 103 | exit(1); 104 | } 105 | exit(0); 106 | } 107 | ], [ac_cv_epoll_works=yes], [ac_cv_epoll_works=no])) 108 | AS_IF([test "x$ac_cv_epoll_works" = "xyes"], 109 | [AC_DEFINE([HAVE_EPOLL], [1], [Define to 1 if epoll is supported])], []) 110 | 111 | AC_CACHE_CHECK([if kqueue works], [ac_cv_kqueue_works], 112 | AC_TRY_RUN([ 113 | #include 114 | #include 115 | #include 116 | #include 117 | #include 118 | int 119 | main(int argc, char **argv) 120 | { 121 | int fd; 122 | 123 | fd = kqueue(); 124 | if (fd < 0) { 125 | perror("kqueue:"); 126 | exit(1); 127 | } 128 | exit(0); 129 | } 130 | ], [ac_cv_kqueue_works=yes], [ac_cv_kqueue_works=no])) 131 | AS_IF([test "x$ac_cv_kqueue_works" = "xyes"], 132 | [AC_DEFINE([HAVE_KQUEUE], [1], [Define to 1 if kqueue is supported])], []) 133 | 134 | AC_CACHE_CHECK([if event ports works], [ac_cv_evports_works], 135 | AC_TRY_RUN([ 136 | #include 137 | #include 138 | #include 139 | int 140 | main(int argc, char **argv) 141 | { 142 | int fd; 143 | 144 | fd = port_create(); 145 | if (fd < 0) { 146 | perror("port_create:"); 147 | exit(1); 148 | } 149 | exit(0); 150 | } 151 | ], [ac_cv_evports_works=yes], [ac_cv_evports_works=no])) 152 | AS_IF([test "x$ac_cv_evports_works" = "xyes"], 153 | [AC_DEFINE([HAVE_EVENT_PORTS], [1], [Define to 1 if event ports is supported])], []) 154 | 155 | AS_IF([test "x$ac_cv_epoll_works" = "xno" && 156 | test "x$ac_cv_kqueue_works" = "xno" && 157 | test "x$ac_cv_evports_works" = "xno"], 158 | [AC_MSG_ERROR([either epoll or kqueue or event ports support is required])], []) 159 | 160 | AM_CONDITIONAL([OS_LINUX], [test "x$ac_cv_epoll_works" = "xyes"]) 161 | AM_CONDITIONAL([OS_BSD], [test "x$ac_cv_kqueue_works" = "xyes"]) 162 | AM_CONDITIONAL([OS_SOLARIS], [test "x$ac_cv_evports_works" = "xyes"]) 163 | AM_CONDITIONAL([OS_FREEBSD], [test "$(uname -v | cut -c 1-10)" == "FreeBSD 10"]) 164 | 165 | # Package options 166 | AC_MSG_CHECKING([whether to enable debug logs and asserts]) 167 | AC_ARG_ENABLE([debug], 168 | [AS_HELP_STRING( 169 | [--enable-debug=@<:@full|yes|log|no@:>@], 170 | [enable debug logs and asserts @<:@default=no@:>@]) 171 | ], 172 | [], 173 | [enable_debug=no]) 174 | AS_CASE([x$enable_debug], 175 | [xfull], [AC_DEFINE([HAVE_ASSERT_PANIC], [1], 176 | [Define to 1 if panic on an assert is enabled]) 177 | AC_DEFINE([HAVE_DEBUG_LOG], [1], [Define to 1 if debug log is enabled]) 178 | ], 179 | [xyes], [AC_DEFINE([HAVE_ASSERT_LOG], [1], 180 | [Define to 1 if log on an assert is enabled]) 181 | AC_DEFINE([HAVE_DEBUG_LOG], [1], [Define to 1 if debug log is enabled]) 182 | ], 183 | [xlog], [AC_DEFINE([HAVE_DEBUG_LOG], [1], [Define to 1 if debug log is enabled])], 184 | [xno], [], 185 | [AC_MSG_FAILURE([invalid value ${enable_debug} for --enable-debug])]) 186 | AC_MSG_RESULT($enable_debug) 187 | 188 | AC_MSG_CHECKING([whether to disable stats]) 189 | AC_ARG_ENABLE([stats], 190 | [AS_HELP_STRING( 191 | [--disable-stats], 192 | [disable stats]) 193 | ], 194 | [disable_stats=yes], 195 | [disable_stats=no]) 196 | AS_IF([test "x$disable_stats" = xyes], 197 | [], 198 | [AC_DEFINE([HAVE_STATS], [1], [Define to 1 if stats is not disabled])]) 199 | AC_MSG_RESULT($disable_stats) 200 | 201 | # Untar the yaml-0.1.4 in contrib/ before config.status is rerun 202 | AC_CONFIG_COMMANDS_PRE([tar xvfz contrib/yaml-0.1.4.tar.gz -C contrib]) 203 | 204 | # Call yaml-0.1.4 ./configure recursively 205 | AC_CONFIG_SUBDIRS([contrib/yaml-0.1.4]) 206 | 207 | # Untar the LuaJIT-2.0.3 in contrib/ before config.status is rerun 208 | AC_CONFIG_COMMANDS_PRE([tar xvfz contrib/LuaJIT-2.0.3.tar.gz -C contrib]) 209 | 210 | # Define Makefiles 211 | AC_CONFIG_FILES([Makefile 212 | contrib/Makefile 213 | src/Makefile 214 | src/hashkit/Makefile 215 | src/proto/Makefile 216 | src/event/Makefile]) 217 | 218 | # Generate the "configure" script 219 | AC_OUTPUT 220 | -------------------------------------------------------------------------------- /contrib/.gitignore: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /contrib/LuaJIT-2.0.3.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ksarch-saas/r3proxy/4ba7caeb14e3a9f1a1795b2ed6638a6ec8a3c57e/contrib/LuaJIT-2.0.3.tar.gz -------------------------------------------------------------------------------- /contrib/LuaJIT-2.0.3/.gitignore: -------------------------------------------------------------------------------- 1 | * 2 | *.[oa] 3 | *.so 4 | *.obj 5 | *.lib 6 | *.exp 7 | *.dll 8 | *.exe 9 | *.manifest 10 | *.dmp 11 | *.swp 12 | .tags 13 | -------------------------------------------------------------------------------- /contrib/Makefile.am: -------------------------------------------------------------------------------- 1 | SUBDIRS = yaml-0.1.4 LuaJIT-2.0.3 2 | 3 | EXTRA_DIST = yaml-0.1.4.tar.gz 4 | EXTRA_DIST += LuaJIT-2.0.3.tar.gz 5 | -------------------------------------------------------------------------------- /contrib/yaml-0.1.4.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ksarch-saas/r3proxy/4ba7caeb14e3a9f1a1795b2ed6638a6ec8a3c57e/contrib/yaml-0.1.4.tar.gz -------------------------------------------------------------------------------- /contrib/yaml-0.1.4/.gitignore: -------------------------------------------------------------------------------- 1 | * 2 | -------------------------------------------------------------------------------- /entrypoint.sh: -------------------------------------------------------------------------------- 1 | #! /bin/sh 2 | # 3 | # entrypoint.sh 4 | # Copyright (C) 2017 yanming02 5 | # 6 | # Distributed under terms of the MIT license. 7 | # 8 | 9 | 10 | #!/bin/bash 11 | listen_port=$1 12 | seed_ip=$2 13 | seed_port=$3 14 | 15 | sed -i "s/LISTEN_PORT/$listen_port/g" /opt/proxy/conf/nutcracker.yml 16 | sed -i "s/SEEDIP/$seed_ip/g" /opt/proxy/conf/nutcracker.yml 17 | sed -i "s/SEEDPORT/$seed_port/g" /opt/proxy/conf/nutcracker.yml 18 | 19 | /opt/proxy/bin/nutcracker -v 6 -s 5380 -m 16384 -c /opt/proxy/conf/nutcracker.yml -l /opt/proxy/bin/lua -o /opt/proxy/log/nutcracker.log 20 | -------------------------------------------------------------------------------- /m4/.gitignore: -------------------------------------------------------------------------------- 1 | # Ignore everything 2 | * 3 | 4 | # Except me 5 | !.gitignore 6 | -------------------------------------------------------------------------------- /man/nutcracker.8: -------------------------------------------------------------------------------- 1 | .TH NUTCRACKER 8 "June 13, 2013" 2 | .SH NAME 3 | nutcracker \- Fast, light-weight proxy for memcached and Redis 4 | .SH SYNOPSIS 5 | .B nutcracker 6 | .RI [ options ] 7 | .SH DESCRIPTION 8 | \fBnutcracker\fP, also known as \fBtwemproxy\fP (pronounced "two-em-proxy"), is 9 | a fast and lightweight proxy for the memcached and Redis protocols. 10 | .PP 11 | It was primarily built to reduce the connection count on backend caching 12 | servers, but it has a number of features, such as: 13 | .IP \[bu] 14 | Maintains persistent server connections to backend servers. 15 | .IP \[bu] 16 | Enables pipelining of requests and responses. 17 | .IP \[bu] 18 | Supports multiple server pools simultaneously. 19 | .IP \[bu] 20 | Shard data automatically across multiple servers. 21 | .IP \[bu] 22 | Supports multiple hashing modes including consistent hashing and 23 | distribution. 24 | .IP \[bu] 25 | High-availability by disabling nodes on failures. 26 | .IP \[bu] 27 | Observability through stats exposed on stats monitoring port. 28 | .SH OPTIONS 29 | .TP 30 | .BR \-h ", " \-\-help 31 | Show usage information and exit. 32 | .TP 33 | .BR \-V ", " \-\-version 34 | Show version and exit. 35 | .TP 36 | .BR \-t ", " \-\-test-conf 37 | Test configuration for syntax errors and exit. 38 | .TP 39 | .BR \-D ", " \-\-describe-stats 40 | Print stats description and exit. 41 | .TP 42 | .BR \-v ", " \-\-verbose=\fIN\fP 43 | Set logging level to \fIN\fP. (default: 5, min: 0, max: 11) 44 | .TP 45 | .BR \-o ", " \-\-output=\fIfilename\fP 46 | Set logging file to \fIfilename\fP. 47 | .TP 48 | .BR \-c ", " \-\-conf-file=\fIfilename\fP 49 | Set configuration file to \fIfilename\fP. 50 | .TP 51 | .BR \-s ", " \-\-stats-port=\fIport\fP 52 | Set stats monitoring port to \fIport\fP. 53 | (default: 22222) 54 | .TP 55 | .BR \-a ", " \-\-stats-addr=\fIaddress\fP 56 | Set stats monitoring IP to \fIaddress\fP. 57 | (default: 0.0.0.0) 58 | .TP 59 | .BR \-i ", " \-\-stats-interval=\fIinterval\fP 60 | Set stats aggregation interval in msec to \fIinterval\fP. 61 | (default: 30000 msec) 62 | .TP 63 | .BR \-m ", " \-\-mbuf-size=\fIsize\fP 64 | Set size of mbuf chunk in bytes to \fIsize\fP. (default: 16384 bytes) 65 | .TP 66 | .BR \-d ", " \-\-daemonize 67 | Run as a daemon. 68 | .TP 69 | .BR \-p ", " \-\-pid-file=\fIfilename\fP 70 | Set pid file to \fIfilename\fP. 71 | .SH SEE ALSO 72 | .BR memcached (8), 73 | .BR redis-server (1) 74 | .br 75 | .SH AUTHOR 76 | nutcracker was written by Twitter, Inc. 77 | -------------------------------------------------------------------------------- /notes/debug.txt: -------------------------------------------------------------------------------- 1 | - strace 2 | strace -o strace.txt -ttT -s 1024 -p `pgrep nutcracker` 3 | 4 | - libyaml (yaml-0.1.4) 5 | 6 | - yaml tokens: 7 | 8 | 0 YAML_NO_TOKEN, 9 | 1 YAML_STREAM_START_TOKEN, 10 | 2 YAML_STREAM_END_TOKEN, 11 | 3 YAML_VERSION_DIRECTIVE_TOKEN, 12 | 4 YAML_TAG_DIRECTIVE_TOKEN, 13 | 5 YAML_DOCUMENT_START_TOKEN, 14 | 6 YAML_DOCUMENT_END_TOKEN, 15 | 7 YAML_BLOCK_SEQUENCE_START_TOKEN, 16 | 8 YAML_BLOCK_MAPPING_START_TOKEN, 17 | 9 YAML_BLOCK_END_TOKEN, 18 | 10 YAML_FLOW_SEQUENCE_START_TOKEN, 19 | 11 YAML_FLOW_SEQUENCE_END_TOKEN, 20 | 12 YAML_FLOW_MAPPING_START_TOKEN, 21 | 13 YAML_FLOW_MAPPING_END_TOKEN, 22 | 14 YAML_BLOCK_ENTRY_TOKEN, 23 | 15 YAML_FLOW_ENTRY_TOKEN, 24 | 16 YAML_KEY_TOKEN, 25 | 17 YAML_VALUE_TOKEN, 26 | 18 YAML_ALIAS_TOKEN, 27 | 19 YAML_ANCHOR_TOKEN, 28 | 20 YAML_TAG_TOKEN, 29 | 21 YAML_SCALAR_TOKEN 30 | 31 | - yaml events 32 | 33 | 0 YAML_NO_EVENT, 34 | 1 YAML_STREAM_START_EVENT, 35 | 2 YAML_STREAM_END_EVENT, 36 | 3 YAML_DOCUMENT_START_EVENT, 37 | 4 YAML_DOCUMENT_END_EVENT, 38 | 5 YAML_ALIAS_EVENT, 39 | 6 YAML_SCALAR_EVENT, 40 | 7 YAML_SEQUENCE_START_EVENT, 41 | 8 YAML_SEQUENCE_END_EVENT, 42 | 9 YAML_MAPPING_START_EVENT, 43 | 10 YAML_MAPPING_END_EVENT 44 | 45 | - sys/queue.h 46 | 47 | queue.h is a generic linked list library adapted from BSD. It has three 48 | macro knobs that are useful for debugging: 49 | 50 | - QUEUE_MACRO_SCRUB nullifies links (next and prev pointers) of deleted 51 | elements and catches cases where we are attempting to do operations 52 | on an element that has already been unlinked. 53 | - QUEUE_MACRO_TRACE keeps track of __FILE__ and __LINE__ of last two 54 | updates to the list data structure. 55 | - QUEUE_MACRO_ASSERT verifies the sanity of list data structure on every 56 | operation. 57 | 58 | - valgrind 59 | valgrind --tool=memcheck --leak-check=yes 60 | 61 | - Core dump 62 | ulimit -c unlimited 63 | 64 | - Generate ENOMEM to test "Out of Memory" 65 | ulimit -m # limit maximum memory size 66 | ulimit -v # limit virtual memory 67 | 68 | - get nutcracker stats 69 | printf "" | socat - TCP:localhost:22222 | tee stats.txt 70 | printf "" | nc localhost 22222 | python -mjson.tool 71 | 72 | - Signalling and Logging 73 | SIGTTIN - To up the log level 74 | SIGTTOU - To down the log level 75 | SIGHUP - To reopen log file 76 | 77 | - Error codes: 78 | http://www.cs.utah.edu/dept/old/texinfo/glibc-manual-0.02/library_2.html 79 | /usr/include/asm-generic/errno-base.h 80 | /usr/include/asm-generic/errno.h 81 | 82 | - epoll (linux) 83 | 84 | union epoll_data { 85 | void *ptr; 86 | int fd; 87 | uint32_t u32; 88 | uint64_t u64; 89 | }; 90 | 91 | struct epoll_event { 92 | uint32_t events; /* epoll events */ 93 | struct epoll_data data; /* user data variable */ 94 | }; 95 | 96 | /* events */ 97 | EPOLLIN = 0x001, 98 | EPOLLPRI = 0x002, 99 | EPOLLOUT = 0x004, 100 | EPOLLERR = 0x008, 101 | EPOLLHUP = 0x010, 102 | EPOLLRDNORM = 0x040, 103 | EPOLLRDBAND = 0x080, 104 | EPOLLWRNORM = 0x100, 105 | EPOLLWRBAND = 0x200, 106 | EPOLLMSG = 0x400, 107 | EPOLLRDHUP = 0x2000, 108 | EPOLLONESHOT = (1 << 30), 109 | EPOLLET = (1 << 31) 110 | 111 | /* opcodes */ 112 | EPOLL_CTL_ADD = 1 /* add a file decriptor to the interface */ 113 | EPOLL_CTL_DEL = 2 /* remove a file decriptor from the interface */ 114 | EPOLL_CTL_MOD = 3 /* change file decriptor epoll_event structure */ 115 | 116 | - kqueue (bsd) 117 | 118 | struct kevent { 119 | uintptr_t ident; /* identifier for this event */ 120 | int16_t filter; /* filter for event */ 121 | uint16_t flags; /* general flags */ 122 | uint32_t fflags; /* filter-specific flags */ 123 | intptr_t data; /* filter-specific data */ 124 | void *udata; /* opaque user data identifier */ 125 | }; 126 | 127 | /* flags / events */ 128 | EV_ADD = 0x0001 /* action - add event to kq (implies enable) */ 129 | EV_DELETE = 0x0002 /* action - delete event from kq */ 130 | EV_ENABLE = 0x0004 /* action - enable event */ 131 | EV_DISABLE = 0x0008 /* action - disable event (not reported) */ 132 | EV_RECEIPT = 0x0040 /* action - force EV_ERROR on success, data == 0 */ 133 | 134 | EV_ONESHOT = 0x0010 /* flags - only report one occurrence */ 135 | EV_CLEAR = 0x0020 /* flags - clear event state after reporting */ 136 | EV_DISPATCH = 0x0080 /* flags - disable event after reporting */ 137 | EV_SYSFLAGS = 0xF000 /* flags - reserved by system */ 138 | EV_FLAG0 = 0x1000 /* flags - filter-specific flag */ 139 | EV_FLAG1 = 0x2000 /* flags - filter-specific flag */ 140 | 141 | EV_EOF = 0x8000 /* returned values - EOF detected */ 142 | EV_ERROR = 0x4000 /* returned values - error, data contains errno */ 143 | 144 | /* filters */ 145 | EVFILT_READ (-1) /* readable */ 146 | EVFILT_WRITE (-2) /* writable */ 147 | EVFILT_AIO (-3) /* attached to aio requests */ 148 | EVFILT_VNODE (-4) /* attached to vnodes */ 149 | EVFILT_PROC (-5) /* attached to struct proc */ 150 | EVFILT_SIGNAL (-6) /* attached to struct proc */ 151 | EVFILT_TIMER (-7) /* timers */ 152 | EVFILT_MACHPORT (-8) /* mach portsets */ 153 | EVFILT_FS (-9) /* filesystem events */ 154 | EVFILT_USER (-10) /* user events */ 155 | EVFILT_VM (-12) /* virtual memory events */ 156 | 157 | EV_CLEAR behaves like EPOLLET because it resets the event after it is 158 | returned; without this flag, the event would be repeatedly returned. 159 | 160 | - poll (unix) 161 | 162 | POLLIN 0x001 /* there is data to read */ 163 | POLLPRI 0x002 /* there is urgent data to read */ 164 | POLLOUT 0x004 /* writing now will not block */ 165 | 166 | POLLRDNORM 0x040 /* normal data may be read */ 167 | POLLRDBAND 0x080 /* priority data may be read */ 168 | POLLWRNORM 0x100 /* writing now will not block */ 169 | POLLWRBAND 0x200 /* priority data may be written */ 170 | 171 | POLLMSG 0x400 172 | POLLREMOVE 0x1000 173 | POLLRDHUP 0x2000 174 | 175 | POLLERR 0x008 /* error condition */ 176 | POLLHUP 0x010 /* hung up */ 177 | POLLNVAL 0x020 /* invalid polling request */ 178 | 179 | - event ports (solaris) 180 | 181 | typedef struct port_event { 182 | int portev_events; /* event data is source specific */ 183 | ushort_t portev_source; /* event source */ 184 | ushort_t portev_pad; /* port internal use */ 185 | uintptr_t portev_object; /* source specific object */ 186 | void *portev_user; /* user cookie */ 187 | } port_event_t; 188 | 189 | /* port sources */ 190 | PORT_SOURCE_AIO 1 191 | PORT_SOURCE_TIMER 2 192 | PORT_SOURCE_USER 3 193 | PORT_SOURCE_FD 4 194 | PORT_SOURCE_ALERT 5 195 | PORT_SOURCE_MQ 6 196 | PORT_SOURCE_FILE 7 197 | -------------------------------------------------------------------------------- /notes/kqueue.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ksarch-saas/r3proxy/4ba7caeb14e3a9f1a1795b2ed6638a6ec8a3c57e/notes/kqueue.pdf -------------------------------------------------------------------------------- /notes/memcache.txt: -------------------------------------------------------------------------------- 1 | - ascii: 2 | 3 | - Storage Commands (set, add, replace, append, prepend, cas): 4 | 5 | set [noreply]\r\n\r\n 6 | add [noreply]\r\n\r\n 7 | replace [noreply]\r\n\r\n 8 | append [noreply]\r\n\r\n 9 | prepend [noreply]\r\n\r\n 10 | 11 | cas [noreply]\r\n\r\n 12 | 13 | where, 14 | - uint32_t : data specific client side flags 15 | - uint32_t : expiration time (in seconds) 16 | - uint32_t : size of the data (in bytes) 17 | - uint8_t[]: data block 18 | - uint64_t 19 | 20 | - Retrival Commands (get, gets): 21 | 22 | get \r\n 23 | get []+\r\n 24 | 25 | gets \r\n 26 | gets []+\r\n 27 | 28 | - Delete Command (delete): 29 | 30 | delete [noreply]\r\n 31 | 32 | - Arithmetic Commands (incr, decr): 33 | 34 | incr [noreply]\r\n 35 | decr [noreply]\r\n 36 | 37 | where, 38 | - uint64_t 39 | 40 | - Misc Commands (quit) 41 | 42 | quit\r\n 43 | flush_all [] [noreply]\r\n 44 | version\r\n 45 | verbosity [noreply]\r\n 46 | 47 | - Statistics Commands 48 | 49 | stats\r\n 50 | stats \r\n 51 | 52 | - Error Responses: 53 | 54 | ERROR\r\n 55 | CLIENT_ERROR [error]\r\n 56 | SERVER_ERROR [error]\r\n 57 | 58 | where, 59 | ERROR means client sent a non-existent command name 60 | CLIENT_ERROR means that command sent by the client does not conform to the protocol 61 | SERVER_ERROR means that there was an error on the server side that made processing of the command impossible 62 | 63 | - Storage Command Responses: 64 | 65 | STORED\r\n 66 | NOT_STORED\r\n 67 | EXISTS\r\n 68 | NOT_FOUND\r\n 69 | 70 | where, 71 | STORED indicates success. 72 | NOT_STORED indicates the data was not stored because condition for an add or replace wasn't met. 73 | EXISTS indicates that the item you are trying to store with a cas has been modified since you last fetched it. 74 | NOT_FOUND indicates that the item you are trying to store with a cas does not exist. 75 | 76 | - Delete Command Response: 77 | 78 | NOT_FOUND\r\n 79 | DELETED\r\n 80 | 81 | - Retrival Responses: 82 | 83 | END\r\n 84 | VALUE []\r\n\r\nEND\r\n 85 | VALUE []\r\n\r\n[VALUE []\r\n]+\r\nEND\r\n 86 | 87 | - Arithmetic Responses: 88 | 89 | NOT_FOUND\r\n 90 | \r\n 91 | 92 | where, 93 | - uint64_t : new key value after incr or decr operation 94 | 95 | - Statistics Response 96 | [STAT \r\n]+END\r\n 97 | 98 | - Misc Response 99 | 100 | OK\r\n 101 | VERSION \r\n 102 | 103 | - Notes: 104 | - set always creates mapping irrespective of whether it is present on not. 105 | - add, adds only if the mapping is not present 106 | - replace, only replaces if the mapping is present 107 | - append and prepend command ignore flags and expiry values 108 | - noreply instructs the server to not send the reply even if there is an error. 109 | - decr of 0 is 0, while incr of UINT64_MAX is 0 110 | - maximum length of the key is 250 characters 111 | - expiry of 0 means that item never expires, though it could be evicted from the cache 112 | - non-zero expiry is either unix time (# seconds since 01/01/1970) or, 113 | offset in seconds from the current time (< 60 x 60 x 24 x 30 seconds = 30 days) 114 | - expiry time is with respect to the server (not client) 115 | - can be zero and when it is, the block is empty. 116 | 117 | - Thoughts: 118 | - ascii protocol is easier to debug - think using strace or tcpdump to see 119 | protocol on the wire, Or using telnet or netcat or socat to build memcache 120 | requests and responses 121 | http://stackoverflow.com/questions/2525188/are-binary-protocols-dead 122 | 123 | - http://news.ycombinator.com/item?id=1712788 124 | -------------------------------------------------------------------------------- /notes/rediscluster.md: -------------------------------------------------------------------------------- 1 | - 在迁移时,slave应该不可读,需要先封,但是读写是针对整个server,如果全部封slave的读,会造成整个迁移过程(可能几小时)中,读都走主,所以需要做到可以按分片进行读写。如果需要proxy知道importing和migrating的状态,要么controller提供,要么redis集群提供,两者都比较罗嗦。简单的办法是,在迁移前,对slave也设置migrating状态(需要修改redis),这样在访问该slave的迁移slot时,会直接重定向到目标master。 2 | - 迁移状态只有该server自己才知道,向其他节点发送cluster nodes时不会返回 3 | 4 | - ASK,需要向目标server先行发送ASKING,再发送命令,如果server不存在(server_table),则设置cluster-nodes更新标志,该次请求失败。tick是每200ms出发一次,也就是说,在新加节点时,最多有200ms的访问失败。这部分可优化,也可不处理。 5 | 6 | - MOVED,判断目标slot和当前slot是否相同,相同则表示是slave定向到自己的master,不需要更新slot到replicaset的映射,否则就需要修改。MOVED时,无需发送ASKING。 7 | 8 | - 路由 9 | ```lua 10 | nearest = { 11 | tc = {tc,jx,nj02}, 12 | jx = {jx,tc,nj03}, 13 | nj02 = {nj02,nj03,hz01,{tc,jx}}, 14 | nj03 = {nj03,nj02,hz01,{tc,jx}}, 15 | hz01 = {hz01,{nj02,nj03},{tc,jx}} 16 | } 17 | ``` 18 | 19 | ```lua 20 | primaryPreferred = { 21 | tc = {$master,tc,jx,nj02}, 22 | jx = {$master,jx,tc,nj03}, 23 | nj02 = {$master,nj02,nj03,hz01,{tc,jx}}, 24 | nj03 = {$master,nj03,nj02,hz01,{tc,jx}}, 25 | hz01 = {$master,hz01,{nj02,nj03},{tc,jx}} 26 | } 27 | ``` 28 | 29 | ```lua 30 | primary = { 31 | tc = {$master}, 32 | jx = {$master}, 33 | nj02 = {$master}, 34 | nj03 = {$master}, 35 | hz01 = {$master}, 36 | } 37 | ``` 38 | - 主要修改: 39 | 1.处理ASK和MOVED。 40 | 2.根据'cluster nodes extra'返回的信息,处理数据分布,创建server, 41 | 3.根据idcmap设置server读优先级 42 | -------------------------------------------------------------------------------- /notes/socket.txt: -------------------------------------------------------------------------------- 1 | - int listen(int sockfd, int backlog); 2 | 3 | Linux: The backlog argument defines the maximum length to which the 4 | queue of pending connections for sockfd may grow. If a connection 5 | request arrives when the queue is full, the client may receive an error 6 | with an indication of ECONNREFUSED or, if the underlying protocol 7 | supports retransmission, the request may be ignored so that a later 8 | reattempt at connection succeeds. 9 | 10 | backlog specifies the queue length for completely established sockets 11 | waiting to be accepted, instead of the number of incomplete connection 12 | requests. The maximum length of the queue for incomplete sockets can 13 | be set using /proc/sys/net/ipv4/tcp_max_syn_backlog. 14 | 15 | If the backlog argument is greater than the value in /proc/sys/net/core/somaxconn, 16 | then it is silently truncated to that value; the default value in this 17 | file is 128. In kernels before 2.4.25, this limit was a hard coded value, 18 | SOMAXCONN, with the value 128. 19 | 20 | BSD: The backlog argument defines the maximum length the queue of pending 21 | connections may grow to. The real maximum queue length will be 1.5 times 22 | more than the value specified in the backlog argument. A subsequent 23 | listen() system call on the listening socket allows the caller to change 24 | the maximum queue length using a new backlog argument. If a connection 25 | request arrives with the queue full the client may receive an error with 26 | an indication of ECONNREFUSED, or, in the case of TCP, the connection 27 | will be silently dropped. 28 | 29 | The listen() system call appeared in 4.2BSD. The ability to configure 30 | the maximum backlog at run-time, and to use a negative backlog to request 31 | the maximum allowable value, was introduced in FreeBSD 2.2. 32 | 33 | - SO_LINGER (linger) socket option 34 | 35 | This option specifies what should happen when the socket of a type that 36 | promises reliable delivery still has untransmitted messages when it is 37 | closed 38 | 39 | struct linger { 40 | int l_onoff; /* nonzero to linger on close */ 41 | int l_linger; /* time to linger (in secs) */ 42 | }; 43 | 44 | l_onoff = 0 (default), then l_linger value is ignored and close returns 45 | immediately. But if there is any data still remaining in the socket send 46 | buffer, the system will try to deliver the data to the peer 47 | 48 | l_onoff = nonzero, then close blocks until data is transmitted or the 49 | l_linger timeout period expires 50 | a) l_linger = 0, TCP aborts connection, discards any data still remaining 51 | in the socket send buffer and sends RST to peer. This avoids the 52 | TCP's TIME_WAIT state 53 | b) l_linger = nonzero, then kernel will linger when socket is closed. If 54 | there is any pending data in the socket send buffer, the kernel waits 55 | until all the data is sent and acknowledged by peer TCP, or the 56 | linger time expires 57 | 58 | If a socket is set as nonblocking, it will not wait for close to complete 59 | even if linger time is nonzero 60 | 61 | - TIME_WAIT state 62 | 63 | The end that performs active close i.e. the end that sends the first FIN 64 | goes into TIME_WAIT state. After a FIN packet is sent to the peer and 65 | after that peers FIN/ACK arrvies and is ACKed, we go into a TIME_WAIT 66 | state. The duration that the end point remains in this state is 2 x MSL 67 | (maximum segment lifetime). The reason that the duration of the TIME_WAIT 68 | state is 2 x MSL is because the maximum amount of time a packet can wander 69 | around a network is assumed to be MSL seconds. The factor of 2 is for the 70 | round-trip. The recommended value for MSL is 120 seconds, but Berkeley 71 | derived implementations normally use 30 seconds instead. This means a 72 | TIME_WAIT delay is between 1 and 4 minutes. 73 | 74 | For Linux, the TIME_WAIT state duration is 1 minute (net/tcp.h): 75 | #define TCP_TIMEWAIT_LEN (60*HZ) /* how long to wait to destroy TIME-WAIT 76 | * state, about 60 seconds */ 77 | 78 | TIME_WAIT state on client, combined with limited number of ephermeral ports 79 | available for TCP connections severely limits the rate at which new 80 | connections to the server can be created. On Linux, by default ephemeral 81 | ports are in the range of 32768 to 61000: 82 | 83 | $ cat /proc/sys/net/ipv4/ip_local_port_range 84 | 32768 61000 85 | 86 | So with a TIME_WAIT state duration of 1 minute, the maximum sustained rate 87 | for any client is ~470 new connections per second 88 | 89 | - TCP keepalive 90 | 91 | TCP keepalive packet (TCP packet with no data and the ACK flag turned on) 92 | is used to assert that connection is still up and running. This is useful 93 | because if the remote peer goes away without closing their connection, the 94 | keepalive probe will detect this and notice that the connection is broken 95 | even if there is no traffic on it. 96 | 97 | Imagine, the following scenario: You have a valid TCP connection established 98 | between two endpoints A and B. B terminates abnormally (think kernel panic 99 | or unplugging of network cable) without sending anything over the network 100 | to notify A that connection is broken. A, from its side, is ready to 101 | receive data, and has no idea that B has gone away. Now B comes back up 102 | again, and while A knows about a connection with B and still thinks that it 103 | active, B has no such idea. A tries to send data to B over a dead 104 | connection, and B replies with an RST packet, causing A to finally close 105 | the connection. So, without a keepalive probe A would never close the 106 | connection if it never sent data over it. 107 | 108 | - There are four socket functions that pass a socket address structure from 109 | the process to the kernel - bind, connect, sendmsg and sendto. These 110 | function are also responsible for passing the length of the sockaddr that 111 | they are passing (socklen_t). 112 | There are five socket functions that pass a socket from the kernel to the 113 | process - accept, recvfrom, recvmsg, getpeername, getsockname. The kernel 114 | is also responsible for returning the length of the sockaddr struct that 115 | it returns back to the userspace 116 | 117 | Different sockaddr structs: 118 | 1. sockaddr_in 119 | 2. sockaddr_in6 120 | 3. sockaddr_un 121 | 122 | Special types of in_addr_t 123 | /* Address to accept any incoming messages */ 124 | #define INADDR_ANY ((in_addr_t) 0x00000000) 125 | 126 | /* Address to send to all hosts */ 127 | #define INADDR_BROADCAST ((in_addr_t) 0xffffffff) 128 | 129 | /* Address indicating an error return */ 130 | #define INADDR_NONE ((in_addr_t) 0xffffffff) 131 | 132 | -------------------------------------------------------------------------------- /nutcracker.yml: -------------------------------------------------------------------------------- 1 | db: 2 | listen: 0.0.0.0:LISTEN_PORT 3 | servers: 4 | - SEEDIP:SEEDPORT:1 5 | zone: tc 6 | hash: crc16 7 | distribution: random 8 | redis: true 9 | rediscluster: true 10 | timeout: 1000 11 | auto_eject_hosts: true 12 | server_retry_timeout: 2000 13 | server_failure_limit: 3 14 | msg_max_length_limit: 65535 15 | preconnect: true 16 | env: offline 17 | slowlog: true 18 | slowlog_slower_than: 100 19 | -------------------------------------------------------------------------------- /scripts/benchmark-mget.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | #coding: utf-8 3 | #file : test_mget.py 4 | #author : ning 5 | #date : 2014-04-01 13:15:48 6 | 7 | import os 8 | import re 9 | import commands 10 | 11 | ports = [ 12 | 4001, # before improve 13 | 4000, # after improve 14 | 2000 # redis 15 | ] 16 | 17 | def system(cmd): 18 | return commands.getoutput(cmd) 19 | 20 | def extra(regex, text): 21 | match = re.search(regex, text, re.DOTALL) 22 | if match: 23 | return match.group(1) 24 | 25 | def testit(): 26 | for mget_size in [10, 100, 1000, 10000]: 27 | for port in ports: 28 | cnt = 100*1000 / mget_size 29 | clients = 50 30 | if mget_size == 10000: 31 | clients = 2 32 | cmd = 'cd /home/ning/xredis/deploy-srcs/redis-2.8.3/src && ./redis-benchmark.%d -n %d -p %d -t mget -r 1000000000 -c %d' % (mget_size, cnt, port, clients) 33 | #print cmd 34 | rst = system(cmd) 35 | 36 | #100.00% <= 2 milliseconds 37 | #28089.89 requests per second 38 | rtime = extra('100.00% <= (\d+) milliseconds', rst) 39 | qps = extra('([\.\d]+) requests per second', rst) 40 | 41 | print 'mget_size=%d on %d: pqs: %s, rtime: %s' % (mget_size, port, qps, rtime) 42 | 43 | testit() 44 | -------------------------------------------------------------------------------- /scripts/multi_get.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | port=22123 4 | socatopt="-t 20 -T 20 -b 8193 -d -d " 5 | key="" 6 | keys="" 7 | get_command="" 8 | 9 | # build 10 | for i in `seq 1 512`; do 11 | if [ `expr $i % 2` -eq "0" ]; then 12 | key="foo" 13 | else 14 | key="bar" 15 | fi 16 | key=`printf "%s%d" "${key}" "${i}"` 17 | keys=`printf "%s %s" "${keys}" "${key}"` 18 | done 19 | 20 | get_command="get ${keys}\r\n" 21 | printf "%b" "$get_command" 22 | 23 | # read 24 | for i in `seq 1 16`; do 25 | printf "%b" "${get_command}" | socat ${socatopt} - TCP:localhost:${port},nodelay,shut-none,nonblock=1 1> /dev/null 2>&1 & 26 | done 27 | -------------------------------------------------------------------------------- /scripts/nutcracker.init: -------------------------------------------------------------------------------- 1 | #! /bin/sh 2 | # 3 | # chkconfig: - 55 45 4 | # description: Twitter's twemproxy nutcracker 5 | # processname: nutcracker 6 | # config: /etc/sysconfig/nutcracker 7 | 8 | # Source function library. 9 | . /etc/rc.d/init.d/functions 10 | 11 | USER="nobody" 12 | OPTIONS="-d -c /etc/nutcracker/nutcracker.yml" 13 | 14 | if [ -f /etc/sysconfig/nutcracker ];then 15 | . /etc/sysconfig/nutcracker 16 | fi 17 | 18 | # Check that networking is up. 19 | if [ "$NETWORKING" = "no" ] 20 | then 21 | exit 0 22 | fi 23 | 24 | RETVAL=0 25 | prog="nutcracker" 26 | 27 | start () { 28 | echo -n $"Starting $prog: " 29 | #Test the config before start. 30 | daemon --user ${USER} ${prog} $OPTIONS -t >/dev/null 2>&1 31 | RETVAL=$? 32 | if [ $RETVAL -ne 0 ] ; then 33 | echo "Config check fail! Please use 'nutcracker -c /etc/nutcracker/nutcracker.yml' for detail." 34 | echo_failure; 35 | echo; 36 | exit 1 37 | fi 38 | 39 | daemon --user ${USER} ${prog} $OPTIONS 40 | RETVAL=$? 41 | echo 42 | [ $RETVAL -eq 0 ] && touch /var/lock/subsys/${prog} 43 | } 44 | stop () { 45 | echo -n $"Stopping $prog: " 46 | killproc ${prog} 47 | RETVAL=$? 48 | echo 49 | if [ $RETVAL -eq 0 ] ; then 50 | rm -f /var/lock/subsys/${prog} 51 | fi 52 | } 53 | 54 | restart () { 55 | stop 56 | start 57 | } 58 | 59 | 60 | # See how we were called. 61 | case "$1" in 62 | start) 63 | start 64 | ;; 65 | stop) 66 | stop 67 | ;; 68 | status) 69 | status ${prog} 70 | ;; 71 | restart|reload) 72 | restart 73 | ;; 74 | condrestart) 75 | [ -f /var/lock/subsys/nutcracker ] && restart || : 76 | ;; 77 | *) 78 | echo $"Usage: $0 {start|stop|status|restart|reload|condrestart}" 79 | exit 1 80 | esac 81 | 82 | exit $? 83 | 84 | -------------------------------------------------------------------------------- /scripts/nutcracker.init.debian: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | ### BEGIN INIT INFO 3 | # Provides: nutcracker 4 | # Required-Start: $network $remote_fs $local_fs 5 | # Required-Stop: $network $remote_fs $local_fs 6 | # Default-Start: 2 3 4 5 7 | # Default-Stop: 0 1 6 8 | # Short-Description: Stop/start nutcracker 9 | ### END INIT INFO 10 | 11 | PATH=/sbin:/usr/sbin:/bin:/usr/bin 12 | DESC=nutcracker 13 | NAME=nutcracker 14 | USER=nobody 15 | CONFFILE=/opt/nutcracker/etc/$NAME.yml 16 | LOGFILE=/opt/nutcracker/log/nutcracker.log 17 | DAEMON=/opt/nutcracker/sbin/nutcracker 18 | PIDFILE=/var/run/nutcracker/$NAME.pid 19 | STATSPORT=22222 20 | DAEMON_ARGS="-c $CONFFILE -o $LOGFILE -p $PIDFILE -s $STATSPORT -v 11 -m 2048 -d" 21 | #DAEMON_ARGS="-c $CONFFILE -p $PIDFILE -s $STATSPORT -d" 22 | SCRIPTNAME=/etc/init.d/$NAME 23 | 24 | ulimit -Hn 100000 25 | ulimit -Sn 100000 26 | 27 | [ -x $DAEMON ] || exit 0 28 | 29 | [ -r /etc/default/$NAME ] && . /etc/default/$NAME 30 | 31 | . /lib/init/vars.sh 32 | 33 | . /lib/lsb/init-functions 34 | 35 | do_start() 36 | { 37 | mkdir -p /var/run/nutcracker 38 | touch $PIDFILE 39 | chown $USER:$USER -R /var/run/nutcracker 40 | chmod 755 /var/run/nutcracker 41 | 42 | echo -n "Starting ${NAME}: " 43 | start-stop-daemon --start --quiet -m --pidfile $PIDFILE --chuid $USER:$USER --exec $DAEMON -- \ 44 | $DAEMON_ARGS 45 | case "$?" in 46 | 0|1) echo "STARTED." ;; 47 | 2) echo "FAILED." ;; 48 | esac 49 | } 50 | 51 | do_stop() 52 | { 53 | echo -n "Stopping ${NAME}: " 54 | start-stop-daemon --stop --quiet --pidfile $PIDFILE --exec $DAEMON || true 55 | 56 | case "$?" in 57 | 0|1) echo "STOPPED.";; 58 | 2) echo "FAILED." ;; 59 | esac 60 | } 61 | 62 | case "$1" in 63 | start) 64 | do_start 65 | ;; 66 | stop) 67 | do_stop 68 | ;; 69 | status) 70 | status_of_proc -p $PIDFILE "$DAEMON" nutcracker && exit 0 || exit $? 71 | ;; 72 | restart) 73 | do_stop 74 | do_start 75 | ;; 76 | *) 77 | echo "Usage: $SCRIPTNAME {start|stop|status|restart}" >&2 78 | exit 3 79 | ;; 80 | esac 81 | 82 | exit 83 | $RETVAL 84 | -------------------------------------------------------------------------------- /scripts/nutcracker.spec: -------------------------------------------------------------------------------- 1 | Summary: Twitter's nutcracker redis and memcached proxy 2 | Name: nutcracker 3 | Version: 0.3.0 4 | Release: 1 5 | 6 | URL: https://github.com/twitter/twemproxy/ 7 | Source0: %{name}-%{version}.tar.gz 8 | License: Apache License 2.0 9 | Group: System Environment/Libraries 10 | Packager: Tom Parrott 11 | BuildRoot: %{_tmppath}/%{name}-root 12 | 13 | BuildRequires: autoconf 14 | BuildRequires: automake 15 | BuildRequires: libtool 16 | 17 | %description 18 | twemproxy (pronounced "two-em-proxy"), aka nutcracker is a fast and lightweight proxy for memcached and redis protocol. 19 | It was primarily built to reduce the connection count on the backend caching servers. 20 | 21 | %prep 22 | %setup -q 23 | %if 0%{?rhel} == 6 24 | sed -i 's/2.64/2.63/g' configure.ac 25 | %endif 26 | autoreconf -fvi 27 | 28 | %build 29 | 30 | %configure 31 | %__make 32 | 33 | %install 34 | [ %{buildroot} != "/" ] && rm -rf %{buildroot} 35 | 36 | %makeinstall PREFIX=%{buildroot} 37 | 38 | #Install init script 39 | %{__install} -p -D -m 0755 scripts/%{name}.init %{buildroot}%{_initrddir}/%{name} 40 | 41 | #Install example config file 42 | %{__install} -p -D -m 0644 conf/%{name}.yml %{buildroot}%{_sysconfdir}/%{name}/%{name}.yml 43 | 44 | %post 45 | /sbin/chkconfig --add %{name} 46 | 47 | %preun 48 | if [ $1 = 0 ]; then 49 | /sbin/service %{name} stop > /dev/null 2>&1 50 | /sbin/chkconfig --del %{name} 51 | fi 52 | 53 | %clean 54 | [ %{buildroot} != "/" ] && rm -rf %{buildroot} 55 | 56 | %files 57 | %defattr(-,root,root,-) 58 | %if 0%{?rhel} == 6 59 | /usr/sbin/nutcracker 60 | %else 61 | /usr/bin/nutcracker 62 | %endif 63 | %{_initrddir}/%{name} 64 | %{_mandir}/man8/nutcracker.8.gz 65 | %config(noreplace)%{_sysconfdir}/%{name}/%{name}.yml 66 | 67 | %changelog 68 | * Fri Dec 20 2013 Manju Rajashekhar 69 | - twemproxy: version 0.3.0 release 70 | - SRANDMEMBER support for the optional count argument (mkhq) 71 | - Handle case where server responds while the request is still being sent (jdi-tagged) 72 | - event ports (solaris/smartos) support 73 | - add timestamp when the server was ejected 74 | - support for set ex/px/nx/xx for redis 2.6.12 and up (ypocat) 75 | - kqueue (bsd) support (ferenyx) 76 | - fix parsing redis response to accept integer reply (charsyam) 77 | 78 | * Tue Jul 30 2013 Tait Clarridge 79 | - Rebuild SPEC to work with CentOS 80 | - Added buildrequires if building with mock/koji 81 | 82 | * Tue Apr 23 2013 Manju Rajashekhar 83 | - twemproxy: version 0.2.4 release 84 | - redis keys must be less than mbuf_data_size() in length (fifsky) 85 | - Adds support for DUMP/RESTORE commands in Redis (remotezygote) 86 | - Use of the weight value in the modula distribution (mezzatto) 87 | - Add support to unix socket connections to servers (mezzatto) 88 | - only check for duplicate server name and not 'host:port:weight' when 'name' is configured 89 | - crc16 hash support added (mezzatto) 90 | 91 | * Thu Jan 31 2013 Manju Rajashekhar 92 | - twemproxy: version 0.2.3 release 93 | - RPOPLPUSH, SDIFF, SDIFFSTORE, SINTER, SINTERSTORE, SMOVE, SUNION, SUNIONSTORE, ZINTERSTORE, and ZUNIONSTORE support (dcartoon) 94 | - EVAL and EVALSHA support (ferenyx) 95 | - exit 1 if configuration file is invalid (cofyc) 96 | - return non-zero exit status when nutcracker cannot start for some reason 97 | - use server names in stats (charsyam) 98 | - Fix failure to resolve long FQDN name resolve (conmame) 99 | - add support for hash tags 100 | 101 | * Thu Oct 18 2012 Manju Rajashekhar 102 | - twemproxy: version 0.2.2 release 103 | - fix the off-by-one error when calculating redis key length 104 | 105 | * Fri Oct 12 2012 Manju Rajashekhar 106 | - twemproxy: version 0.2.1 release 107 | - don't use buf in conf_add_server 108 | - allow an optional instance name for consistent hashing (charsyam) 109 | - add --stats-addr=S option 110 | - add stats-bind-any -a option (charsyam) 111 | -------------------------------------------------------------------------------- /scripts/pipelined_read.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | socatopt="-t 4 -T 4 -b 8193 -d -d " 4 | 5 | get_commands="" 6 | 7 | # build 8 | for i in `seq 1 128`; do 9 | if [ `expr $i % 2` -eq "0" ]; then 10 | key="foo" 11 | else 12 | key="bar" 13 | fi 14 | key=`printf "%s%d" "${key}" "${i}"` 15 | 16 | get_command="get ${key}\r\n" 17 | get_commands=`printf "%s%s" "${get_commands}" "${get_command}"` 18 | done 19 | 20 | # read 21 | for i in `seq 1 64`; do 22 | printf "%b" "$get_commands" | socat ${socatopt} - TCP:localhost:22123,nodelay,shut-none,nonblock=1 1> /dev/null 2>&1 & 23 | done 24 | -------------------------------------------------------------------------------- /scripts/pipelined_write.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | socatopt="-t 1 -T 1 -b 16384" 4 | 5 | val=`echo 6^6^6 | bc` 6 | val=`printf "%s" "${val}"` 7 | vallen=`printf "%s" "${val}" | wc -c` 8 | set_command="" 9 | set_commands="" 10 | 11 | # build 12 | for i in `seq 1 64`; do 13 | if [ `expr $i % 2` -eq "0" ]; then 14 | key="foo" 15 | else 16 | key="bar" 17 | fi 18 | key=`printf "%s%d" "${key}" "${i}"` 19 | 20 | set_command="set ${key} 0 0 ${vallen}\r\n${val}\r\n" 21 | set_commands=`printf "%s%s" "${set_commands}" "${set_command}"` 22 | done 23 | 24 | printf "%b" "$set_commands" > /tmp/socat.input 25 | 26 | # write 27 | for i in `seq 1 16`; do 28 | cat /tmp/socat.input | socat ${socatopt} - TCP:localhost:22123,nodelay,shut-down,nonblock=1 & 29 | done 30 | -------------------------------------------------------------------------------- /scripts/populate_memcached.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | port=22123 4 | socatopt="-t 1 -T 1 -b 65537" 5 | 6 | val=`echo 6^6^6 | bc` 7 | val=`printf "%s\r\n" "${val}"` 8 | vallen=`printf "%s" "${val}" | wc -c` 9 | set_command="" 10 | 11 | # build 12 | for i in `seq 1 512`; do 13 | if [ `expr $i % 2` -eq "0" ]; then 14 | key="foo" 15 | else 16 | key="bar" 17 | fi 18 | key=`printf "%s%d" "${key}" "${i}"` 19 | 20 | set_command="set ${key} 0 0 ${vallen}\r\n${val}\r\n" 21 | 22 | printf "%b" "$set_command" | socat ${socatopt} - TCP:localhost:${port},nodelay,shut-down,nonblock=1 & 23 | done 24 | 25 | -------------------------------------------------------------------------------- /scripts/redis-check.py: -------------------------------------------------------------------------------- 1 | import redis 2 | 3 | range=100 4 | factor=32 5 | port=22121 6 | 7 | r = redis.StrictRedis(host='localhost', port=port, db=0) 8 | 9 | # lrange 10 | print [r.lrange('lfoo', 0, x) for x in xrange(1, range)] 11 | print [r.lpush('lfoo', str(x)*factor) for x in xrange(1, range)] 12 | print [r.lrange('lfoo', 0, x) for x in xrange(1, range)] 13 | print r.delete('lfoo') 14 | 15 | # del 16 | print [r.set('foo' + str(x), str(x)*factor) for x in xrange(1, range)] 17 | keys = ['foo' + str(x) for x in xrange(1, range)] 18 | print [r.delete(keys) for x in xrange(1, range)] 19 | 20 | # mget 21 | print [r.set('foo' + str(x), str(x)*100) for x in xrange(1, range)] 22 | keys = ['foo' + str(x) for x in xrange(1, range)] 23 | print [r.mget(keys) for x in xrange(1, range)] 24 | -------------------------------------------------------------------------------- /sources.list: -------------------------------------------------------------------------------- 1 | deb https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ trusty main restricted universe multiverse 2 | # deb-src https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ trusty main main restricted universe multiverse 3 | deb https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ trusty-updates main restricted universe multiverse 4 | # deb-src https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ trusty-updates main restricted universe multiverse 5 | deb https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ trusty-backports main restricted universe multiverse 6 | # deb-src https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ trusty-backports main restricted universe multiverse 7 | deb https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ trusty-security main restricted universe multiverse 8 | # deb-src https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ trusty-security main restricted universe multiverse 9 | -------------------------------------------------------------------------------- /src/Makefile.am: -------------------------------------------------------------------------------- 1 | MAINTAINERCLEANFILES = Makefile.in 2 | 3 | AM_CPPFLAGS = 4 | if !OS_SOLARIS 5 | AM_CPPFLAGS += -D_GNU_SOURCE 6 | endif 7 | AM_CPPFLAGS += -I $(top_srcdir)/src/hashkit 8 | AM_CPPFLAGS += -I $(top_srcdir)/src/proto 9 | AM_CPPFLAGS += -I $(top_srcdir)/src/event 10 | AM_CPPFLAGS += -I $(top_srcdir)/contrib/yaml-0.1.4/include 11 | AM_CPPFLAGS += -I $(top_srcdir)/contrib/LuaJIT-2.0.3/src 12 | 13 | AM_CFLAGS = 14 | # about -fno-strict-aliasing: https://github.com/twitter/twemproxy/issues/276 15 | AM_CFLAGS += -fno-strict-aliasing 16 | AM_CFLAGS += -Wall -Wshadow 17 | AM_CFLAGS += -Wl,-E 18 | AM_CFLAGS += -Wpointer-arith 19 | AM_CFLAGS += -Winline 20 | AM_CFLAGS += -Wunused-function -Wunused-variable -Wunused-value 21 | AM_CFLAGS += -Wno-unused-parameter -Wno-unused-value 22 | AM_CFLAGS += -Wconversion -Wsign-compare 23 | AM_CFLAGS += -Wstrict-prototypes -Wmissing-prototypes -Wredundant-decls -Wmissing-declarations 24 | 25 | AM_LDFLAGS = 26 | AM_LDFLAGS += -lm -lpthread -rdynamic 27 | if OS_SOLARIS 28 | AM_LDFLAGS += -lnsl -lsocket 29 | endif 30 | if OS_FREEBSD 31 | AM_LDFLAGS += -lexecinfo 32 | endif 33 | 34 | SUBDIRS = hashkit proto event 35 | 36 | sbin_PROGRAMS = nutcracker 37 | 38 | nutcracker_SOURCES = \ 39 | nc_core.c nc_core.h \ 40 | nc_connection.c nc_connection.h \ 41 | nc_client.c nc_client.h \ 42 | nc_server.c nc_server.h \ 43 | nc_proxy.c nc_proxy.h \ 44 | nc_message.c nc_message.h \ 45 | nc_request.c \ 46 | nc_response.c \ 47 | nc_mbuf.c nc_mbuf.h \ 48 | nc_conf.c nc_conf.h \ 49 | nc_stats.c nc_stats.h \ 50 | nc_signal.c nc_signal.h \ 51 | nc_rbtree.c nc_rbtree.h \ 52 | nc_log.c nc_log.h \ 53 | nc_string.c nc_string.h \ 54 | nc_array.c nc_array.h \ 55 | nc_util.c nc_util.h \ 56 | nc_assoc.c nc_assoc.h \ 57 | nc_script.c nc_script.h \ 58 | nc_queue.h \ 59 | nc_ipwhitelist.c nc_ipwhitelist.h \ 60 | nc.c 61 | 62 | nutcracker_LDADD = $(top_builddir)/src/hashkit/libhashkit.a 63 | nutcracker_LDADD += $(top_builddir)/src/proto/libproto.a 64 | nutcracker_LDADD += $(top_builddir)/src/event/libevent.a 65 | nutcracker_LDADD += $(top_builddir)/contrib/yaml-0.1.4/src/.libs/libyaml.a 66 | nutcracker_LDADD += $(top_builddir)/contrib/LuaJIT-2.0.3/src/libluajit.a -ldl 67 | -------------------------------------------------------------------------------- /src/event/Makefile.am: -------------------------------------------------------------------------------- 1 | MAINTAINERCLEANFILES = Makefile.in 2 | 3 | AM_CPPFLAGS = -I $(top_srcdir)/src 4 | 5 | AM_CFLAGS = -Wall -Wshadow 6 | AM_CFLAGS += -Wno-unused-parameter -Wno-unused-value 7 | 8 | noinst_LIBRARIES = libevent.a 9 | 10 | noinst_HEADERS = nc_event.h 11 | 12 | libevent_a_SOURCES = \ 13 | nc_epoll.c \ 14 | nc_kqueue.c \ 15 | nc_evport.c 16 | 17 | -------------------------------------------------------------------------------- /src/event/nc_event.h: -------------------------------------------------------------------------------- 1 | /* 2 | * twemproxy - A fast and lightweight proxy for memcached protocol. 3 | * Copyright (C) 2011 Twitter, Inc. 4 | * 5 | * Licensed under the Apache License, Version 2.0 (the "License"); 6 | * you may not use this file except in compliance with the License. 7 | * You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | #ifndef _NC_EVENT_H_ 19 | #define _NC_EVENT_H_ 20 | 21 | #include 22 | 23 | #define EVENT_SIZE 1024 24 | 25 | #define EVENT_READ 0x0000ff 26 | #define EVENT_WRITE 0x00ff00 27 | #define EVENT_ERR 0xff0000 28 | 29 | typedef int (*event_cb_t)(void *, uint32_t); 30 | typedef void (*event_stats_cb_t)(void *, void *); 31 | 32 | #ifdef NC_HAVE_KQUEUE 33 | 34 | struct event_base { 35 | int kq; /* kernel event queue descriptor */ 36 | 37 | struct kevent *change; /* change[] - events we want to monitor */ 38 | int nchange; /* # change */ 39 | 40 | struct kevent *event; /* event[] - events that were triggered */ 41 | int nevent; /* # event */ 42 | int nreturned; /* # event placed in event[] */ 43 | int nprocessed; /* # event processed from event[] */ 44 | 45 | event_cb_t cb; /* event callback */ 46 | }; 47 | 48 | #elif NC_HAVE_EPOLL 49 | 50 | struct event_base { 51 | int ep; /* epoll descriptor */ 52 | 53 | struct epoll_event *event; /* event[] - events that were triggered */ 54 | int nevent; /* # event */ 55 | 56 | event_cb_t cb; /* event callback */ 57 | }; 58 | 59 | #elif NC_HAVE_EVENT_PORTS 60 | 61 | #include 62 | 63 | struct event_base { 64 | int evp; /* event port descriptor */ 65 | 66 | port_event_t *event; /* event[] - events that were triggered */ 67 | int nevent; /* # event */ 68 | 69 | event_cb_t cb; /* event callback */ 70 | }; 71 | 72 | #else 73 | # error missing scalable I/O event notification mechanism 74 | #endif 75 | 76 | struct event_base *event_base_create(int size, event_cb_t cb); 77 | void event_base_destroy(struct event_base *evb); 78 | 79 | int event_add_in(struct event_base *evb, struct conn *c); 80 | int event_del_in(struct event_base *evb, struct conn *c); 81 | int event_add_out(struct event_base *evb, struct conn *c); 82 | int event_del_out(struct event_base *evb, struct conn *c); 83 | int event_add_conn(struct event_base *evb, struct conn *c); 84 | int event_del_conn(struct event_base *evb, struct conn *c); 85 | int event_wait(struct event_base *evb, int timeout); 86 | void event_loop_stats(event_stats_cb_t cb, void *arg); 87 | 88 | #endif /* _NC_EVENT_H */ 89 | -------------------------------------------------------------------------------- /src/hashkit/Makefile.am: -------------------------------------------------------------------------------- 1 | MAINTAINERCLEANFILES = Makefile.in 2 | 3 | AM_CPPFLAGS = -I $(top_srcdir)/src 4 | 5 | AM_CFLAGS = -Wall -Wshadow 6 | AM_CFLAGS += -Wno-unused-parameter -Wno-unused-value 7 | 8 | noinst_LIBRARIES = libhashkit.a 9 | 10 | noinst_HEADERS = nc_hashkit.h 11 | 12 | libhashkit_a_SOURCES = \ 13 | nc_crc16.c \ 14 | nc_crc32.c \ 15 | nc_fnv.c \ 16 | nc_hsieh.c \ 17 | nc_jenkins.c \ 18 | nc_ketama.c \ 19 | nc_md5.c \ 20 | nc_modula.c \ 21 | nc_murmur.c \ 22 | nc_one_at_a_time.c \ 23 | nc_random.c 24 | -------------------------------------------------------------------------------- /src/hashkit/nc_crc16.c: -------------------------------------------------------------------------------- 1 | /* 2 | * twemproxy - A fast and lightweight proxy for memcached protocol. 3 | * Copyright (C) 2011 Twitter, Inc. 4 | * 5 | * Licensed under the Apache License, Version 2.0 (the "License"); 6 | * you may not use this file except in compliance with the License. 7 | * You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | #include 19 | 20 | static const uint16_t crc16tab[256] = { 21 | 0x0000, 0x1021, 0x2042, 0x3063, 0x4084, 0x50a5, 0x60c6, 0x70e7, 22 | 0x8108, 0x9129, 0xa14a, 0xb16b, 0xc18c, 0xd1ad, 0xe1ce, 0xf1ef, 23 | 0x1231, 0x0210, 0x3273, 0x2252, 0x52b5, 0x4294, 0x72f7, 0x62d6, 24 | 0x9339, 0x8318, 0xb37b, 0xa35a, 0xd3bd, 0xc39c, 0xf3ff, 0xe3de, 25 | 0x2462, 0x3443, 0x0420, 0x1401, 0x64e6, 0x74c7, 0x44a4, 0x5485, 26 | 0xa56a, 0xb54b, 0x8528, 0x9509, 0xe5ee, 0xf5cf, 0xc5ac, 0xd58d, 27 | 0x3653, 0x2672, 0x1611, 0x0630, 0x76d7, 0x66f6, 0x5695, 0x46b4, 28 | 0xb75b, 0xa77a, 0x9719, 0x8738, 0xf7df, 0xe7fe, 0xd79d, 0xc7bc, 29 | 0x48c4, 0x58e5, 0x6886, 0x78a7, 0x0840, 0x1861, 0x2802, 0x3823, 30 | 0xc9cc, 0xd9ed, 0xe98e, 0xf9af, 0x8948, 0x9969, 0xa90a, 0xb92b, 31 | 0x5af5, 0x4ad4, 0x7ab7, 0x6a96, 0x1a71, 0x0a50, 0x3a33, 0x2a12, 32 | 0xdbfd, 0xcbdc, 0xfbbf, 0xeb9e, 0x9b79, 0x8b58, 0xbb3b, 0xab1a, 33 | 0x6ca6, 0x7c87, 0x4ce4, 0x5cc5, 0x2c22, 0x3c03, 0x0c60, 0x1c41, 34 | 0xedae, 0xfd8f, 0xcdec, 0xddcd, 0xad2a, 0xbd0b, 0x8d68, 0x9d49, 35 | 0x7e97, 0x6eb6, 0x5ed5, 0x4ef4, 0x3e13, 0x2e32, 0x1e51, 0x0e70, 36 | 0xff9f, 0xefbe, 0xdfdd, 0xcffc, 0xbf1b, 0xaf3a, 0x9f59, 0x8f78, 37 | 0x9188, 0x81a9, 0xb1ca, 0xa1eb, 0xd10c, 0xc12d, 0xf14e, 0xe16f, 38 | 0x1080, 0x00a1, 0x30c2, 0x20e3, 0x5004, 0x4025, 0x7046, 0x6067, 39 | 0x83b9, 0x9398, 0xa3fb, 0xb3da, 0xc33d, 0xd31c, 0xe37f, 0xf35e, 40 | 0x02b1, 0x1290, 0x22f3, 0x32d2, 0x4235, 0x5214, 0x6277, 0x7256, 41 | 0xb5ea, 0xa5cb, 0x95a8, 0x8589, 0xf56e, 0xe54f, 0xd52c, 0xc50d, 42 | 0x34e2, 0x24c3, 0x14a0, 0x0481, 0x7466, 0x6447, 0x5424, 0x4405, 43 | 0xa7db, 0xb7fa, 0x8799, 0x97b8, 0xe75f, 0xf77e, 0xc71d, 0xd73c, 44 | 0x26d3, 0x36f2, 0x0691, 0x16b0, 0x6657, 0x7676, 0x4615, 0x5634, 45 | 0xd94c, 0xc96d, 0xf90e, 0xe92f, 0x99c8, 0x89e9, 0xb98a, 0xa9ab, 46 | 0x5844, 0x4865, 0x7806, 0x6827, 0x18c0, 0x08e1, 0x3882, 0x28a3, 47 | 0xcb7d, 0xdb5c, 0xeb3f, 0xfb1e, 0x8bf9, 0x9bd8, 0xabbb, 0xbb9a, 48 | 0x4a75, 0x5a54, 0x6a37, 0x7a16, 0x0af1, 0x1ad0, 0x2ab3, 0x3a92, 49 | 0xfd2e, 0xed0f, 0xdd6c, 0xcd4d, 0xbdaa, 0xad8b, 0x9de8, 0x8dc9, 50 | 0x7c26, 0x6c07, 0x5c64, 0x4c45, 0x3ca2, 0x2c83, 0x1ce0, 0x0cc1, 51 | 0xef1f, 0xff3e, 0xcf5d, 0xdf7c, 0xaf9b, 0xbfba, 0x8fd9, 0x9ff8, 52 | 0x6e17, 0x7e36, 0x4e55, 0x5e74, 0x2e93, 0x3eb2, 0x0ed1, 0x1ef0, 53 | }; 54 | 55 | uint32_t 56 | hash_crc16(const char *key, size_t key_length) 57 | { 58 | uint64_t x; 59 | uint32_t crc = 0; 60 | 61 | for (x=0; x < key_length; x++) { 62 | crc = (crc << 8) ^ crc16tab[((crc >> 8) ^ *key++) & 0x00ff]; 63 | } 64 | 65 | return crc; 66 | } 67 | -------------------------------------------------------------------------------- /src/hashkit/nc_crc32.c: -------------------------------------------------------------------------------- 1 | /* 2 | * twemproxy - A fast and lightweight proxy for memcached protocol. 3 | * Copyright (C) 2011 Twitter, Inc. 4 | * 5 | * Licensed under the Apache License, Version 2.0 (the "License"); 6 | * you may not use this file except in compliance with the License. 7 | * You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | /* 19 | * The crc32 functions and data was originally written by Spencer 20 | * Garrett and was gleaned from the PostgreSQL source 21 | * tree via the files contrib/ltree/crc32.[ch] and from FreeBSD at 22 | * src/usr.bin/cksum/crc32.c. 23 | */ 24 | 25 | #include 26 | 27 | static const uint32_t crc32tab[256] = { 28 | 0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, 29 | 0x076dc419, 0x706af48f, 0xe963a535, 0x9e6495a3, 30 | 0x0edb8832, 0x79dcb8a4, 0xe0d5e91e, 0x97d2d988, 31 | 0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, 0x90bf1d91, 32 | 0x1db71064, 0x6ab020f2, 0xf3b97148, 0x84be41de, 33 | 0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7, 34 | 0x136c9856, 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec, 35 | 0x14015c4f, 0x63066cd9, 0xfa0f3d63, 0x8d080df5, 36 | 0x3b6e20c8, 0x4c69105e, 0xd56041e4, 0xa2677172, 37 | 0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b, 38 | 0x35b5a8fa, 0x42b2986c, 0xdbbbc9d6, 0xacbcf940, 39 | 0x32d86ce3, 0x45df5c75, 0xdcd60dcf, 0xabd13d59, 40 | 0x26d930ac, 0x51de003a, 0xc8d75180, 0xbfd06116, 41 | 0x21b4f4b5, 0x56b3c423, 0xcfba9599, 0xb8bda50f, 42 | 0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924, 43 | 0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d, 44 | 0x76dc4190, 0x01db7106, 0x98d220bc, 0xefd5102a, 45 | 0x71b18589, 0x06b6b51f, 0x9fbfe4a5, 0xe8b8d433, 46 | 0x7807c9a2, 0x0f00f934, 0x9609a88e, 0xe10e9818, 47 | 0x7f6a0dbb, 0x086d3d2d, 0x91646c97, 0xe6635c01, 48 | 0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e, 49 | 0x6c0695ed, 0x1b01a57b, 0x8208f4c1, 0xf50fc457, 50 | 0x65b0d9c6, 0x12b7e950, 0x8bbeb8ea, 0xfcb9887c, 51 | 0x62dd1ddf, 0x15da2d49, 0x8cd37cf3, 0xfbd44c65, 52 | 0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2, 53 | 0x4adfa541, 0x3dd895d7, 0xa4d1c46d, 0xd3d6f4fb, 54 | 0x4369e96a, 0x346ed9fc, 0xad678846, 0xda60b8d0, 55 | 0x44042d73, 0x33031de5, 0xaa0a4c5f, 0xdd0d7cc9, 56 | 0x5005713c, 0x270241aa, 0xbe0b1010, 0xc90c2086, 57 | 0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f, 58 | 0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4, 59 | 0x59b33d17, 0x2eb40d81, 0xb7bd5c3b, 0xc0ba6cad, 60 | 0xedb88320, 0x9abfb3b6, 0x03b6e20c, 0x74b1d29a, 61 | 0xead54739, 0x9dd277af, 0x04db2615, 0x73dc1683, 62 | 0xe3630b12, 0x94643b84, 0x0d6d6a3e, 0x7a6a5aa8, 63 | 0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1, 64 | 0xf00f9344, 0x8708a3d2, 0x1e01f268, 0x6906c2fe, 65 | 0xf762575d, 0x806567cb, 0x196c3671, 0x6e6b06e7, 66 | 0xfed41b76, 0x89d32be0, 0x10da7a5a, 0x67dd4acc, 67 | 0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5, 68 | 0xd6d6a3e8, 0xa1d1937e, 0x38d8c2c4, 0x4fdff252, 69 | 0xd1bb67f1, 0xa6bc5767, 0x3fb506dd, 0x48b2364b, 70 | 0xd80d2bda, 0xaf0a1b4c, 0x36034af6, 0x41047a60, 71 | 0xdf60efc3, 0xa867df55, 0x316e8eef, 0x4669be79, 72 | 0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236, 73 | 0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f, 74 | 0xc5ba3bbe, 0xb2bd0b28, 0x2bb45a92, 0x5cb36a04, 75 | 0xc2d7ffa7, 0xb5d0cf31, 0x2cd99e8b, 0x5bdeae1d, 76 | 0x9b64c2b0, 0xec63f226, 0x756aa39c, 0x026d930a, 77 | 0x9c0906a9, 0xeb0e363f, 0x72076785, 0x05005713, 78 | 0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38, 79 | 0x92d28e9b, 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21, 80 | 0x86d3d2d4, 0xf1d4e242, 0x68ddb3f8, 0x1fda836e, 81 | 0x81be16cd, 0xf6b9265b, 0x6fb077e1, 0x18b74777, 82 | 0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c, 83 | 0x8f659eff, 0xf862ae69, 0x616bffd3, 0x166ccf45, 84 | 0xa00ae278, 0xd70dd2ee, 0x4e048354, 0x3903b3c2, 85 | 0xa7672661, 0xd06016f7, 0x4969474d, 0x3e6e77db, 86 | 0xaed16a4a, 0xd9d65adc, 0x40df0b66, 0x37d83bf0, 87 | 0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9, 88 | 0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6, 89 | 0xbad03605, 0xcdd70693, 0x54de5729, 0x23d967bf, 90 | 0xb3667a2e, 0xc4614ab8, 0x5d681b02, 0x2a6f2b94, 91 | 0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, 0x2d02ef8d, 92 | }; 93 | 94 | /* 95 | * CRC-32 implementation compatible with libmemcached library. Unfortunately 96 | * this implementation does not return CRC-32 as per spec. 97 | */ 98 | uint32_t 99 | hash_crc32(const char *key, size_t key_length) 100 | { 101 | uint64_t x; 102 | uint32_t crc = UINT32_MAX; 103 | 104 | for (x = 0; x < key_length; x++) { 105 | crc = (crc >> 8) ^ crc32tab[(crc ^ (uint64_t)key[x]) & 0xff]; 106 | } 107 | 108 | return ((~crc) >> 16) & 0x7fff; 109 | } 110 | 111 | uint32_t 112 | hash_crc32a(const char *key, size_t key_length) 113 | { 114 | const uint8_t *p = key; 115 | uint32_t crc; 116 | 117 | crc = ~0U; 118 | while (key_length--) { 119 | crc = crc32tab[(crc ^ *p++) & 0xFF] ^ (crc >> 8); 120 | } 121 | 122 | return crc ^ ~0U; 123 | } 124 | -------------------------------------------------------------------------------- /src/hashkit/nc_fnv.c: -------------------------------------------------------------------------------- 1 | /* 2 | * twemproxy - A fast and lightweight proxy for memcached protocol. 3 | * Copyright (C) 2011 Twitter, Inc. 4 | * 5 | * Licensed under the Apache License, Version 2.0 (the "License"); 6 | * you may not use this file except in compliance with the License. 7 | * You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | #include 19 | 20 | static uint64_t FNV_64_INIT = UINT64_C(0xcbf29ce484222325); 21 | static uint64_t FNV_64_PRIME = UINT64_C(0x100000001b3); 22 | static uint32_t FNV_32_INIT = 2166136261UL; 23 | static uint32_t FNV_32_PRIME = 16777619; 24 | 25 | uint32_t 26 | hash_fnv1_64(const char *key, size_t key_length) 27 | { 28 | uint64_t hash = FNV_64_INIT; 29 | size_t x; 30 | 31 | for (x = 0; x < key_length; x++) { 32 | hash *= FNV_64_PRIME; 33 | hash ^= (uint64_t)key[x]; 34 | } 35 | 36 | return (uint32_t)hash; 37 | } 38 | 39 | uint32_t 40 | hash_fnv1a_64(const char *key, size_t key_length) 41 | { 42 | uint32_t hash = (uint32_t) FNV_64_INIT; 43 | size_t x; 44 | 45 | for (x = 0; x < key_length; x++) { 46 | uint32_t val = (uint32_t)key[x]; 47 | hash ^= val; 48 | hash *= (uint32_t) FNV_64_PRIME; 49 | } 50 | 51 | return hash; 52 | } 53 | 54 | uint32_t 55 | hash_fnv1_32(const char *key, size_t key_length) 56 | { 57 | uint32_t hash = FNV_32_INIT; 58 | size_t x; 59 | 60 | for (x = 0; x < key_length; x++) { 61 | uint32_t val = (uint32_t)key[x]; 62 | hash *= FNV_32_PRIME; 63 | hash ^= val; 64 | } 65 | 66 | return hash; 67 | } 68 | 69 | uint32_t 70 | hash_fnv1a_32(const char *key, size_t key_length) 71 | { 72 | uint32_t hash = FNV_32_INIT; 73 | size_t x; 74 | 75 | for (x= 0; x < key_length; x++) { 76 | uint32_t val = (uint32_t)key[x]; 77 | hash ^= val; 78 | hash *= FNV_32_PRIME; 79 | } 80 | 81 | return hash; 82 | } 83 | -------------------------------------------------------------------------------- /src/hashkit/nc_hashkit.h: -------------------------------------------------------------------------------- 1 | /* 2 | * twemproxy - A fast and lightweight proxy for memcached protocol. 3 | * Copyright (C) 2011 Twitter, Inc. 4 | * 5 | * Licensed under the Apache License, Version 2.0 (the "License"); 6 | * you may not use this file except in compliance with the License. 7 | * You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | #ifndef _NC_HASHKIT_H_ 19 | #define _NC_HASHKIT_H_ 20 | 21 | #include 22 | #include 23 | 24 | #define HASH_CODEC(ACTION) \ 25 | ACTION( HASH_ONE_AT_A_TIME, one_at_a_time ) \ 26 | ACTION( HASH_MD5, md5 ) \ 27 | ACTION( HASH_CRC16, crc16 ) \ 28 | ACTION( HASH_CRC32, crc32 ) \ 29 | ACTION( HASH_CRC32A, crc32a ) \ 30 | ACTION( HASH_FNV1_64, fnv1_64 ) \ 31 | ACTION( HASH_FNV1A_64, fnv1a_64 ) \ 32 | ACTION( HASH_FNV1_32, fnv1_32 ) \ 33 | ACTION( HASH_FNV1A_32, fnv1a_32 ) \ 34 | ACTION( HASH_HSIEH, hsieh ) \ 35 | ACTION( HASH_MURMUR, murmur ) \ 36 | ACTION( HASH_JENKINS, jenkins ) \ 37 | 38 | #define DIST_CODEC(ACTION) \ 39 | ACTION( DIST_KETAMA, ketama ) \ 40 | ACTION( DIST_MODULA, modula ) \ 41 | ACTION( DIST_RANDOM, random ) \ 42 | 43 | #define DEFINE_ACTION(_hash, _name) _hash, 44 | typedef enum hash_type { 45 | HASH_CODEC( DEFINE_ACTION ) 46 | HASH_SENTINEL 47 | } hash_type_t; 48 | #undef DEFINE_ACTION 49 | 50 | #define DEFINE_ACTION(_dist, _name) _dist, 51 | typedef enum dist_type { 52 | DIST_CODEC( DEFINE_ACTION ) 53 | DIST_SENTINEL 54 | } dist_type_t; 55 | #undef DEFINE_ACTION 56 | 57 | uint32_t hash_one_at_a_time(const char *key, size_t key_length); 58 | void md5_signature(const unsigned char *key, unsigned int length, unsigned char *result); 59 | uint32_t hash_md5(const char *key, size_t key_length); 60 | uint32_t hash_crc16(const char *key, size_t key_length); 61 | uint32_t hash_crc32(const char *key, size_t key_length); 62 | uint32_t hash_crc32a(const char *key, size_t key_length); 63 | uint32_t hash_fnv1_64(const char *key, size_t key_length); 64 | uint32_t hash_fnv1a_64(const char *key, size_t key_length); 65 | uint32_t hash_fnv1_32(const char *key, size_t key_length); 66 | uint32_t hash_fnv1a_32(const char *key, size_t key_length); 67 | uint32_t hash_hsieh(const char *key, size_t key_length); 68 | uint32_t hash_jenkins(const char *key, size_t length); 69 | uint32_t hash_murmur(const char *key, size_t length); 70 | 71 | rstatus_t ketama_update(struct server_pool *pool); 72 | uint32_t ketama_dispatch(struct continuum *continuum, uint32_t ncontinuum, uint32_t hash); 73 | rstatus_t modula_update(struct server_pool *pool); 74 | uint32_t modula_dispatch(struct continuum *continuum, uint32_t ncontinuum, uint32_t hash); 75 | rstatus_t random_update(struct server_pool *pool); 76 | uint32_t random_dispatch(struct continuum *continuum, uint32_t ncontinuum, uint32_t hash); 77 | 78 | #endif 79 | -------------------------------------------------------------------------------- /src/hashkit/nc_hsieh.c: -------------------------------------------------------------------------------- 1 | /* 2 | * twemproxy - A fast and lightweight proxy for memcached protocol. 3 | * Copyright (C) 2011 Twitter, Inc. 4 | * 5 | * Licensed under the Apache License, Version 2.0 (the "License"); 6 | * you may not use this file except in compliance with the License. 7 | * You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | /* 19 | * By Paul Hsieh (C) 2004, 2005. Covered under the Paul Hsieh 20 | * derivative license. 21 | * See: http://www.azillionmonkeys.com/qed/weblicense.html for license 22 | * details. 23 | * http://www.azillionmonkeys.com/qed/hash.html 24 | */ 25 | 26 | #include 27 | 28 | #undef get16bits 29 | #if (defined(__GNUC__) && defined(__i386__)) 30 | #define get16bits(d) (*((const uint16_t *) (d))) 31 | #endif 32 | 33 | #if !defined (get16bits) 34 | #define get16bits(d) ((((uint32_t)(((const uint8_t *)(d))[1])) << 8)\ 35 | +(uint32_t)(((const uint8_t *)(d))[0]) ) 36 | #endif 37 | 38 | uint32_t 39 | hash_hsieh(const char *key, size_t key_length) 40 | { 41 | uint32_t hash = 0, tmp; 42 | int rem; 43 | 44 | if (key_length <= 0 || key == NULL) { 45 | return 0; 46 | } 47 | 48 | rem = key_length & 3; 49 | key_length >>= 2; 50 | 51 | /* Main loop */ 52 | for (;key_length > 0; key_length--) { 53 | hash += get16bits (key); 54 | tmp = (get16bits (key+2) << 11) ^ hash; 55 | hash = (hash << 16) ^ tmp; 56 | key += 2*sizeof (uint16_t); 57 | hash += hash >> 11; 58 | } 59 | 60 | /* Handle end cases */ 61 | switch (rem) { 62 | case 3: 63 | hash += get16bits (key); 64 | hash ^= hash << 16; 65 | hash ^= (uint32_t)key[sizeof (uint16_t)] << 18; 66 | hash += hash >> 11; 67 | break; 68 | 69 | case 2: 70 | hash += get16bits (key); 71 | hash ^= hash << 11; 72 | hash += hash >> 17; 73 | break; 74 | 75 | case 1: 76 | hash += (unsigned char)(*key); 77 | hash ^= hash << 10; 78 | hash += hash >> 1; 79 | 80 | default: 81 | break; 82 | } 83 | 84 | /* Force "avalanching" of final 127 bits */ 85 | hash ^= hash << 3; 86 | hash += hash >> 5; 87 | hash ^= hash << 4; 88 | hash += hash >> 17; 89 | hash ^= hash << 25; 90 | hash += hash >> 6; 91 | 92 | return hash; 93 | } 94 | -------------------------------------------------------------------------------- /src/hashkit/nc_jenkins.c: -------------------------------------------------------------------------------- 1 | /* 2 | * twemproxy - A fast and lightweight proxy for memcached protocol. 3 | * Copyright (C) 2011 Twitter, Inc. 4 | * 5 | * Licensed under the Apache License, Version 2.0 (the "License"); 6 | * you may not use this file except in compliance with the License. 7 | * You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | /* 19 | * By Bob Jenkins, 2006. bob_jenkins@burtleburtle.net. You may use this 20 | * code any way you wish, private, educational, or commercial. It's free. 21 | * Use for hash table lookup, or anything where one collision in 2^^32 is 22 | * acceptable. Do NOT use for cryptographic purposes. 23 | * http://burtleburtle.net/bob/hash/index.html 24 | * 25 | * Modified by Brian Pontz for libmemcached 26 | * TODO: 27 | * Add big endian support 28 | */ 29 | 30 | #include 31 | 32 | #define hashsize(n) ((uint32_t)1<<(n)) 33 | #define hashmask(n) (hashsize(n)-1) 34 | #define rot(x,k) (((x)<<(k)) | ((x)>>(32-(k)))) 35 | 36 | #define mix(a,b,c) \ 37 | { \ 38 | a -= c; a ^= rot(c, 4); c += b; \ 39 | b -= a; b ^= rot(a, 6); a += c; \ 40 | c -= b; c ^= rot(b, 8); b += a; \ 41 | a -= c; a ^= rot(c,16); c += b; \ 42 | b -= a; b ^= rot(a,19); a += c; \ 43 | c -= b; c ^= rot(b, 4); b += a; \ 44 | } 45 | 46 | #define final(a,b,c) \ 47 | { \ 48 | c ^= b; c -= rot(b,14); \ 49 | a ^= c; a -= rot(c,11); \ 50 | b ^= a; b -= rot(a,25); \ 51 | c ^= b; c -= rot(b,16); \ 52 | a ^= c; a -= rot(c,4); \ 53 | b ^= a; b -= rot(a,14); \ 54 | c ^= b; c -= rot(b,24); \ 55 | } 56 | 57 | #define JENKINS_INITVAL 13 58 | 59 | /* 60 | * jenkins_hash() -- hash a variable-length key into a 32-bit value 61 | * k : the key (the unaligned variable-length array of bytes) 62 | * length : the length of the key, counting by bytes 63 | * initval : can be any 4-byte value 64 | * Returns a 32-bit value. Every bit of the key affects every bit of 65 | * the return value. Two keys differing by one or two bits will have 66 | * totally different hash values. 67 | 68 | * The best hash table sizes are powers of 2. There is no need to do 69 | * mod a prime (mod is sooo slow!). If you need less than 32 bits, 70 | * use a bitmask. For example, if you need only 10 bits, do 71 | * h = (h & hashmask(10)); 72 | * In which case, the hash table should have hashsize(10) elements. 73 | */ 74 | 75 | uint32_t 76 | hash_jenkins(const char *key, size_t length) 77 | { 78 | uint32_t a,b,c; /* internal state */ 79 | union { const void *ptr; size_t i; } u; /* needed for Mac Powerbook G4 */ 80 | 81 | /* Set up the internal state */ 82 | a = b = c = 0xdeadbeef + ((uint32_t)length) + JENKINS_INITVAL; 83 | 84 | u.ptr = key; 85 | #ifndef WORDS_BIGENDIAN 86 | if ((u.i & 0x3) == 0) 87 | { 88 | const uint32_t *k = (const uint32_t *)key; /* read 32-bit chunks */ 89 | 90 | /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */ 91 | while (length > 12) 92 | { 93 | a += k[0]; 94 | b += k[1]; 95 | c += k[2]; 96 | mix(a,b,c); 97 | length -= 12; 98 | k += 3; 99 | } 100 | 101 | /*----------------------------- handle the last (probably partial) block */ 102 | /* 103 | * "k[2]&0xffffff" actually reads beyond the end of the string, but 104 | * then masks off the part it's not allowed to read. Because the 105 | * string is aligned, the masked-off tail is in the same word as the 106 | * rest of the string. Every machine with memory protection I've seen 107 | * does it on word boundaries, so is OK with this. But VALGRIND will 108 | * still catch it and complain. The masking trick does make the hash 109 | * noticeably faster for short strings (like English words). 110 | */ 111 | switch(length) 112 | { 113 | case 12: c+=k[2]; b+=k[1]; a+=k[0]; break; 114 | case 11: c+=k[2]&0xffffff; b+=k[1]; a+=k[0]; break; 115 | case 10: c+=k[2]&0xffff; b+=k[1]; a+=k[0]; break; 116 | case 9 : c+=k[2]&0xff; b+=k[1]; a+=k[0]; break; 117 | case 8 : b+=k[1]; a+=k[0]; break; 118 | case 7 : b+=k[1]&0xffffff; a+=k[0]; break; 119 | case 6 : b+=k[1]&0xffff; a+=k[0]; break; 120 | case 5 : b+=k[1]&0xff; a+=k[0]; break; 121 | case 4 : a+=k[0]; break; 122 | case 3 : a+=k[0]&0xffffff; break; 123 | case 2 : a+=k[0]&0xffff; break; 124 | case 1 : a+=k[0]&0xff; break; 125 | case 0 : return c; /* zero length strings require no mixing */ 126 | default: return c; 127 | } 128 | 129 | } 130 | else if ((u.i & 0x1) == 0) 131 | { 132 | const uint16_t *k = (const uint16_t *)key; /* read 16-bit chunks */ 133 | const uint8_t *k8; 134 | 135 | /*--------------- all but last block: aligned reads and different mixing */ 136 | while (length > 12) 137 | { 138 | a += k[0] + (((uint32_t)k[1])<<16); 139 | b += k[2] + (((uint32_t)k[3])<<16); 140 | c += k[4] + (((uint32_t)k[5])<<16); 141 | mix(a,b,c); 142 | length -= 12; 143 | k += 6; 144 | } 145 | 146 | /*----------------------------- handle the last (probably partial) block */ 147 | k8 = (const uint8_t *)k; 148 | switch(length) 149 | { 150 | case 12: c+=k[4]+(((uint32_t)k[5])<<16); 151 | b+=k[2]+(((uint32_t)k[3])<<16); 152 | a+=k[0]+(((uint32_t)k[1])<<16); 153 | break; 154 | case 11: c+=((uint32_t)k8[10])<<16; /* fall through */ 155 | case 10: c+=k[4]; 156 | b+=k[2]+(((uint32_t)k[3])<<16); 157 | a+=k[0]+(((uint32_t)k[1])<<16); 158 | break; 159 | case 9 : c+=k8[8]; /* fall through */ 160 | case 8 : b+=k[2]+(((uint32_t)k[3])<<16); 161 | a+=k[0]+(((uint32_t)k[1])<<16); 162 | break; 163 | case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */ 164 | case 6 : b+=k[2]; 165 | a+=k[0]+(((uint32_t)k[1])<<16); 166 | break; 167 | case 5 : b+=k8[4]; /* fall through */ 168 | case 4 : a+=k[0]+(((uint32_t)k[1])<<16); 169 | break; 170 | case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */ 171 | case 2 : a+=k[0]; 172 | break; 173 | case 1 : a+=k8[0]; 174 | break; 175 | case 0 : return c; /* zero length requires no mixing */ 176 | default: return c; 177 | } 178 | 179 | } 180 | else 181 | { /* need to read the key one byte at a time */ 182 | #endif /* little endian */ 183 | const uint8_t *k = (const uint8_t *)key; 184 | 185 | /*--------------- all but the last block: affect some 32 bits of (a,b,c) */ 186 | while (length > 12) 187 | { 188 | a += k[0]; 189 | a += ((uint32_t)k[1])<<8; 190 | a += ((uint32_t)k[2])<<16; 191 | a += ((uint32_t)k[3])<<24; 192 | b += k[4]; 193 | b += ((uint32_t)k[5])<<8; 194 | b += ((uint32_t)k[6])<<16; 195 | b += ((uint32_t)k[7])<<24; 196 | c += k[8]; 197 | c += ((uint32_t)k[9])<<8; 198 | c += ((uint32_t)k[10])<<16; 199 | c += ((uint32_t)k[11])<<24; 200 | mix(a,b,c); 201 | length -= 12; 202 | k += 12; 203 | } 204 | 205 | /*-------------------------------- last block: affect all 32 bits of (c) */ 206 | switch(length) /* all the case statements fall through */ 207 | { 208 | case 12: c+=((uint32_t)k[11])<<24; 209 | case 11: c+=((uint32_t)k[10])<<16; 210 | case 10: c+=((uint32_t)k[9])<<8; 211 | case 9 : c+=k[8]; 212 | case 8 : b+=((uint32_t)k[7])<<24; 213 | case 7 : b+=((uint32_t)k[6])<<16; 214 | case 6 : b+=((uint32_t)k[5])<<8; 215 | case 5 : b+=k[4]; 216 | case 4 : a+=((uint32_t)k[3])<<24; 217 | case 3 : a+=((uint32_t)k[2])<<16; 218 | case 2 : a+=((uint32_t)k[1])<<8; 219 | case 1 : a+=k[0]; 220 | break; 221 | case 0 : return c; 222 | default : return c; 223 | } 224 | #ifndef WORDS_BIGENDIAN 225 | } 226 | #endif 227 | 228 | final(a,b,c); 229 | return c; 230 | } 231 | -------------------------------------------------------------------------------- /src/hashkit/nc_ketama.c: -------------------------------------------------------------------------------- 1 | /* 2 | * twemproxy - A fast and lightweight proxy for memcached protocol. 3 | * Copyright (C) 2011 Twitter, Inc. 4 | * 5 | * Licensed under the Apache License, Version 2.0 (the "License"); 6 | * you may not use this file except in compliance with the License. 7 | * You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | #include 19 | #include 20 | #include 21 | 22 | #include 23 | #include 24 | #include 25 | 26 | #define KETAMA_CONTINUUM_ADDITION 10 /* # extra slots to build into continuum */ 27 | #define KETAMA_POINTS_PER_SERVER 160 /* 40 points per hash */ 28 | #define KETAMA_MAX_HOSTLEN 86 29 | 30 | static uint32_t 31 | ketama_hash(const char *key, size_t key_length, uint32_t alignment) 32 | { 33 | unsigned char results[16]; 34 | 35 | md5_signature((unsigned char*)key, key_length, results); 36 | 37 | return ((uint32_t) (results[3 + alignment * 4] & 0xFF) << 24) 38 | | ((uint32_t) (results[2 + alignment * 4] & 0xFF) << 16) 39 | | ((uint32_t) (results[1 + alignment * 4] & 0xFF) << 8) 40 | | (results[0 + alignment * 4] & 0xFF); 41 | } 42 | 43 | static int 44 | ketama_item_cmp(const void *t1, const void *t2) 45 | { 46 | const struct continuum *ct1 = t1, *ct2 = t2; 47 | 48 | if (ct1->value == ct2->value) { 49 | return 0; 50 | } else if (ct1->value > ct2->value) { 51 | return 1; 52 | } else { 53 | return -1; 54 | } 55 | } 56 | 57 | rstatus_t 58 | ketama_update(struct server_pool *pool) 59 | { 60 | uint32_t nserver; /* # server - live and dead */ 61 | uint32_t nlive_server; /* # live server */ 62 | uint32_t pointer_per_server; /* pointers per server proportional to weight */ 63 | uint32_t pointer_per_hash; /* pointers per hash */ 64 | uint32_t pointer_counter; /* # pointers on continuum */ 65 | uint32_t pointer_index; /* pointer index */ 66 | uint32_t points_per_server; /* points per server */ 67 | uint32_t continuum_index; /* continuum index */ 68 | uint32_t continuum_addition; /* extra space in the continuum */ 69 | uint32_t server_index; /* server index */ 70 | uint32_t value; /* continuum value */ 71 | uint32_t total_weight; /* total live server weight */ 72 | int64_t now; /* current timestamp in usec */ 73 | 74 | ASSERT(array_n(&pool->server) > 0); 75 | 76 | now = nc_usec_now(); 77 | if (now < 0) { 78 | return NC_ERROR; 79 | } 80 | 81 | /* 82 | * Count live servers and total weight, and also update the next time to 83 | * rebuild the distribution 84 | */ 85 | nserver = array_n(&pool->server); 86 | nlive_server = 0; 87 | total_weight = 0; 88 | pool->next_rebuild = 0LL; 89 | for (server_index = 0; server_index < nserver; server_index++) { 90 | struct server *server, **ps; 91 | 92 | ps = array_get(&pool->server, server_index); 93 | server = *ps; 94 | 95 | if (pool->auto_eject_hosts) { 96 | if (server->next_retry <= now) { 97 | server->next_retry = 0LL; 98 | nlive_server++; 99 | } else if (pool->next_rebuild == 0LL || 100 | server->next_retry < pool->next_rebuild) { 101 | pool->next_rebuild = server->next_retry; 102 | } 103 | } else { 104 | nlive_server++; 105 | } 106 | 107 | ASSERT(server->weight > 0); 108 | 109 | /* count weight only for live servers */ 110 | if (!pool->auto_eject_hosts || server->next_retry <= now) { 111 | total_weight += server->weight; 112 | } 113 | } 114 | 115 | pool->nlive_server = nlive_server; 116 | 117 | if (nlive_server == 0) { 118 | log_debug(LOG_DEBUG, "no live servers for pool %"PRIu32" '%.*s'", 119 | pool->idx, pool->name.len, pool->name.data); 120 | 121 | return NC_OK; 122 | } 123 | log_debug(LOG_DEBUG, "%"PRIu32" of %"PRIu32" servers are live for pool " 124 | "%"PRIu32" '%.*s'", nlive_server, nserver, pool->idx, 125 | pool->name.len, pool->name.data); 126 | 127 | continuum_addition = KETAMA_CONTINUUM_ADDITION; 128 | points_per_server = KETAMA_POINTS_PER_SERVER; 129 | /* 130 | * Allocate the continuum for the pool, the first time, and every time we 131 | * add a new server to the pool 132 | */ 133 | if (nlive_server > pool->nserver_continuum) { 134 | struct continuum *continuum; 135 | uint32_t nserver_continuum = nlive_server + continuum_addition; 136 | uint32_t ncontinuum = nserver_continuum * points_per_server; 137 | 138 | continuum = nc_realloc(pool->continuum, sizeof(*continuum) * ncontinuum); 139 | if (continuum == NULL) { 140 | return NC_ENOMEM; 141 | } 142 | 143 | pool->continuum = continuum; 144 | pool->nserver_continuum = nserver_continuum; 145 | /* pool->ncontinuum is initialized later as it could be <= ncontinuum */ 146 | } 147 | 148 | /* 149 | * Build a continuum with the servers that are live and points from 150 | * these servers that are proportial to their weight 151 | */ 152 | continuum_index = 0; 153 | pointer_counter = 0; 154 | for (server_index = 0; server_index < nserver; server_index++) { 155 | struct server *server, **ps; 156 | float pct; 157 | 158 | ps = array_get(&pool->server, server_index); 159 | server = *ps; 160 | 161 | if (pool->auto_eject_hosts && server->next_retry > now) { 162 | continue; 163 | } 164 | 165 | pct = (float)server->weight / (float)total_weight; 166 | pointer_per_server = (uint32_t) ((floorf((float) (pct * KETAMA_POINTS_PER_SERVER / 4 * (float)nlive_server + 0.0000000001))) * 4); 167 | pointer_per_hash = 4; 168 | 169 | log_debug(LOG_VERB, "%.*s:%"PRIu16" weight %"PRIu32" of %"PRIu32" " 170 | "pct %0.5f points per server %"PRIu32"", 171 | server->name.len, server->name.data, server->port, 172 | server->weight, total_weight, pct, pointer_per_server); 173 | 174 | for (pointer_index = 1; 175 | pointer_index <= pointer_per_server / pointer_per_hash; 176 | pointer_index++) { 177 | 178 | char host[KETAMA_MAX_HOSTLEN]= ""; 179 | size_t hostlen; 180 | uint32_t x; 181 | 182 | hostlen = snprintf(host, KETAMA_MAX_HOSTLEN, "%.*s-%u", 183 | server->name.len, server->name.data, 184 | pointer_index - 1); 185 | 186 | for (x = 0; x < pointer_per_hash; x++) { 187 | value = ketama_hash(host, hostlen, x); 188 | pool->continuum[continuum_index].index = server_index; 189 | pool->continuum[continuum_index++].value = value; 190 | } 191 | } 192 | pointer_counter += pointer_per_server; 193 | } 194 | 195 | pool->ncontinuum = pointer_counter; 196 | qsort(pool->continuum, pool->ncontinuum, sizeof(*pool->continuum), 197 | ketama_item_cmp); 198 | 199 | for (pointer_index = 0; 200 | pointer_index < ((nlive_server * KETAMA_POINTS_PER_SERVER) - 1); 201 | pointer_index++) { 202 | if (pointer_index + 1 >= pointer_counter) { 203 | break; 204 | } 205 | ASSERT(pool->continuum[pointer_index].value <= 206 | pool->continuum[pointer_index + 1].value); 207 | } 208 | 209 | log_debug(LOG_VERB, "updated pool %"PRIu32" '%.*s' with %"PRIu32" of " 210 | "%"PRIu32" servers live in %"PRIu32" slots and %"PRIu32" " 211 | "active points in %"PRIu32" slots", pool->idx, 212 | pool->name.len, pool->name.data, nlive_server, nserver, 213 | pool->nserver_continuum, pool->ncontinuum, 214 | (pool->nserver_continuum + continuum_addition) * points_per_server); 215 | 216 | return NC_OK; 217 | } 218 | 219 | uint32_t 220 | ketama_dispatch(struct continuum *continuum, uint32_t ncontinuum, uint32_t hash) 221 | { 222 | struct continuum *begin, *end, *left, *right, *middle; 223 | 224 | ASSERT(continuum != NULL); 225 | ASSERT(ncontinuum != 0); 226 | 227 | begin = left = continuum; 228 | end = right = continuum + ncontinuum; 229 | 230 | while (left < right) { 231 | middle = left + (right - left) / 2; 232 | if (middle->value < hash) { 233 | left = middle + 1; 234 | } else { 235 | right = middle; 236 | } 237 | } 238 | 239 | if (right == end) { 240 | right = begin; 241 | } 242 | 243 | return right->index; 244 | } 245 | -------------------------------------------------------------------------------- /src/hashkit/nc_modula.c: -------------------------------------------------------------------------------- 1 | /* 2 | * twemproxy - A fast and lightweight proxy for memcached protocol. 3 | * Copyright (C) 2011 Twitter, Inc. 4 | * 5 | * Licensed under the Apache License, Version 2.0 (the "License"); 6 | * you may not use this file except in compliance with the License. 7 | * You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | #include 19 | #include 20 | 21 | #include 22 | #include 23 | #include 24 | 25 | #define MODULA_CONTINUUM_ADDITION 10 /* # extra slots to build into continuum */ 26 | #define MODULA_POINTS_PER_SERVER 1 27 | 28 | rstatus_t 29 | modula_update(struct server_pool *pool) 30 | { 31 | uint32_t nserver; /* # server - live and dead */ 32 | uint32_t nlive_server; /* # live server */ 33 | uint32_t pointer_per_server; /* pointers per server proportional to weight */ 34 | uint32_t pointer_counter; /* # pointers on continuum */ 35 | uint32_t points_per_server; /* points per server */ 36 | uint32_t continuum_index; /* continuum index */ 37 | uint32_t continuum_addition; /* extra space in the continuum */ 38 | uint32_t server_index; /* server index */ 39 | uint32_t weight_index; /* weight index */ 40 | uint32_t total_weight; /* total live server weight */ 41 | int64_t now; /* current timestamp in usec */ 42 | 43 | now = nc_usec_now(); 44 | if (now < 0) { 45 | return NC_ERROR; 46 | } 47 | 48 | nserver = array_n(&pool->server); 49 | nlive_server = 0; 50 | total_weight = 0; 51 | pool->next_rebuild = 0LL; 52 | 53 | for (server_index = 0; server_index < nserver; server_index++) { 54 | struct server *server, **ps; 55 | 56 | ps = array_get(&pool->server, server_index); 57 | server = *ps; 58 | 59 | if (pool->auto_eject_hosts) { 60 | if (server->next_retry <= now) { 61 | server->next_retry = 0LL; 62 | nlive_server++; 63 | } else if (pool->next_rebuild == 0LL || 64 | server->next_retry < pool->next_rebuild) { 65 | pool->next_rebuild = server->next_retry; 66 | } 67 | } else { 68 | nlive_server++; 69 | } 70 | 71 | ASSERT(server->weight > 0); 72 | 73 | /* count weight only for live servers */ 74 | if (!pool->auto_eject_hosts || server->next_retry <= now) { 75 | total_weight += server->weight; 76 | } 77 | } 78 | 79 | pool->nlive_server = nlive_server; 80 | 81 | if (nlive_server == 0) { 82 | ASSERT(pool->continuum != NULL); 83 | ASSERT(pool->ncontinuum != 0); 84 | 85 | log_debug(LOG_DEBUG, "no live servers for pool %"PRIu32" '%.*s'", 86 | pool->idx, pool->name.len, pool->name.data); 87 | 88 | return NC_OK; 89 | } 90 | log_debug(LOG_DEBUG, "%"PRIu32" of %"PRIu32" servers are live for pool " 91 | "%"PRIu32" '%.*s'", nlive_server, nserver, pool->idx, 92 | pool->name.len, pool->name.data); 93 | 94 | continuum_addition = MODULA_CONTINUUM_ADDITION; 95 | points_per_server = MODULA_POINTS_PER_SERVER; 96 | 97 | /* 98 | * Allocate the continuum for the pool, the first time, and every time we 99 | * add a new server to the pool 100 | */ 101 | if (total_weight > pool->nserver_continuum) { 102 | struct continuum *continuum; 103 | uint32_t nserver_continuum = total_weight + MODULA_CONTINUUM_ADDITION; 104 | uint32_t ncontinuum = nserver_continuum * MODULA_POINTS_PER_SERVER; 105 | 106 | continuum = nc_realloc(pool->continuum, sizeof(*continuum) * ncontinuum); 107 | if (continuum == NULL) { 108 | return NC_ENOMEM; 109 | } 110 | 111 | pool->continuum = continuum; 112 | pool->nserver_continuum = nserver_continuum; 113 | /* pool->ncontinuum is initialized later as it could be <= ncontinuum */ 114 | } 115 | 116 | /* update the continuum with the servers that are live */ 117 | continuum_index = 0; 118 | pointer_counter = 0; 119 | for (server_index = 0; server_index < nserver; server_index++) { 120 | struct server *server, **ps; 121 | 122 | ps = array_get(&pool->server, server_index); 123 | server = *ps; 124 | 125 | if (pool->auto_eject_hosts && server->next_retry > now) { 126 | continue; 127 | } 128 | 129 | for (weight_index = 0; weight_index < server->weight; weight_index++) { 130 | pointer_per_server = 1; 131 | 132 | pool->continuum[continuum_index].index = server_index; 133 | pool->continuum[continuum_index++].value = 0; 134 | 135 | pointer_counter += pointer_per_server; 136 | } 137 | } 138 | pool->ncontinuum = pointer_counter; 139 | 140 | log_debug(LOG_VERB, "updated pool %"PRIu32" '%.*s' with %"PRIu32" of " 141 | "%"PRIu32" servers live in %"PRIu32" slots and %"PRIu32" " 142 | "active points in %"PRIu32" slots", pool->idx, 143 | pool->name.len, pool->name.data, nlive_server, nserver, 144 | pool->nserver_continuum, pool->ncontinuum, 145 | (pool->nserver_continuum + continuum_addition) * points_per_server); 146 | 147 | return NC_OK; 148 | 149 | } 150 | 151 | uint32_t 152 | modula_dispatch(struct continuum *continuum, uint32_t ncontinuum, uint32_t hash) 153 | { 154 | struct continuum *c; 155 | 156 | ASSERT(continuum != NULL); 157 | ASSERT(ncontinuum != 0); 158 | 159 | c = continuum + hash % ncontinuum; 160 | 161 | return c->index; 162 | } 163 | -------------------------------------------------------------------------------- /src/hashkit/nc_murmur.c: -------------------------------------------------------------------------------- 1 | /* 2 | * twemproxy - A fast and lightweight proxy for memcached protocol. 3 | * Copyright (C) 2011 Twitter, Inc. 4 | * 5 | * Licensed under the Apache License, Version 2.0 (the "License"); 6 | * you may not use this file except in compliance with the License. 7 | * You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | /* 19 | * "Murmur" hash provided by Austin, tanjent@gmail.com 20 | * http://murmurhash.googlepages.com/ 21 | * 22 | * Note - This code makes a few assumptions about how your machine behaves - 23 | * 24 | * 1. We can read a 4-byte value from any address without crashing 25 | * 2. sizeof(int) == 4 26 | * 27 | * And it has a few limitations - 28 | * 1. It will not work incrementally. 29 | * 2. It will not produce the same results on little-endian and big-endian 30 | * machines. 31 | * 32 | * Updated to murmur2 hash - BP 33 | */ 34 | 35 | #include 36 | 37 | uint32_t 38 | hash_murmur(const char *key, size_t length) 39 | { 40 | /* 41 | * 'm' and 'r' are mixing constants generated offline. They're not 42 | * really 'magic', they just happen to work well. 43 | */ 44 | 45 | const unsigned int m = 0x5bd1e995; 46 | const uint32_t seed = (0xdeadbeef * (uint32_t)length); 47 | const int r = 24; 48 | 49 | 50 | /* Initialize the hash to a 'random' value */ 51 | 52 | uint32_t h = seed ^ (uint32_t)length; 53 | 54 | /* Mix 4 bytes at a time into the hash */ 55 | 56 | const unsigned char * data = (const unsigned char *)key; 57 | 58 | while (length >= 4) { 59 | unsigned int k = *(unsigned int *)data; 60 | 61 | k *= m; 62 | k ^= k >> r; 63 | k *= m; 64 | 65 | h *= m; 66 | h ^= k; 67 | 68 | data += 4; 69 | length -= 4; 70 | } 71 | 72 | /* Handle the last few bytes of the input array */ 73 | 74 | switch(length) { 75 | case 3: 76 | h ^= ((uint32_t)data[2]) << 16; 77 | 78 | case 2: 79 | h ^= ((uint32_t)data[1]) << 8; 80 | 81 | case 1: 82 | h ^= data[0]; 83 | h *= m; 84 | 85 | default: 86 | break; 87 | }; 88 | 89 | /* 90 | * Do a few final mixes of the hash to ensure the last few bytes are 91 | * well-incorporated. 92 | */ 93 | 94 | h ^= h >> 13; 95 | h *= m; 96 | h ^= h >> 15; 97 | 98 | return h; 99 | } 100 | -------------------------------------------------------------------------------- /src/hashkit/nc_one_at_a_time.c: -------------------------------------------------------------------------------- 1 | /* 2 | * twemproxy - A fast and lightweight proxy for memcached protocol. 3 | * Copyright (C) 2011 Twitter, Inc. 4 | * 5 | * Licensed under the Apache License, Version 2.0 (the "License"); 6 | * you may not use this file except in compliance with the License. 7 | * You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | /* 19 | * HashKit 20 | * Copyright (C) 2009 Brian Aker 21 | * All rights reserved. 22 | * 23 | * Use and distribution licensed under the BSD license. See 24 | * the COPYING file in the parent directory for full text. 25 | */ 26 | 27 | /* 28 | * This has is Jenkin's "One at A time Hash". 29 | * http://en.wikipedia.org/wiki/Jenkins_hash_function 30 | */ 31 | 32 | #include 33 | 34 | uint32_t 35 | hash_one_at_a_time(const char *key, size_t key_length) 36 | { 37 | const char *ptr = key; 38 | uint32_t value = 0; 39 | 40 | while (key_length--) { 41 | uint32_t val = (uint32_t) *ptr++; 42 | value += val; 43 | value += (value << 10); 44 | value ^= (value >> 6); 45 | } 46 | value += (value << 3); 47 | value ^= (value >> 11); 48 | value += (value << 15); 49 | 50 | return value; 51 | } 52 | -------------------------------------------------------------------------------- /src/hashkit/nc_random.c: -------------------------------------------------------------------------------- 1 | /* 2 | * twemproxy - A fast and lightweight proxy for memcached protocol. 3 | * Copyright (C) 2011 Twitter, Inc. 4 | * 5 | * Licensed under the Apache License, Version 2.0 (the "License"); 6 | * you may not use this file except in compliance with the License. 7 | * You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | #include 19 | #include 20 | 21 | #include 22 | #include 23 | #include 24 | 25 | #define RANDOM_CONTINUUM_ADDITION 10 /* # extra slots to build into continuum */ 26 | #define RANDOM_POINTS_PER_SERVER 1 27 | 28 | rstatus_t 29 | random_update(struct server_pool *pool) 30 | { 31 | uint32_t nserver; /* # server - live and dead */ 32 | uint32_t nlive_server; /* # live server */ 33 | uint32_t pointer_per_server; /* pointers per server proportional to weight */ 34 | uint32_t pointer_counter; /* # pointers on continuum */ 35 | uint32_t points_per_server; /* points per server */ 36 | uint32_t continuum_index; /* continuum index */ 37 | uint32_t continuum_addition; /* extra space in the continuum */ 38 | uint32_t server_index; /* server index */ 39 | int64_t now; /* current timestamp in usec */ 40 | 41 | now = nc_usec_now(); 42 | if (now < 0) { 43 | return NC_ERROR; 44 | } 45 | 46 | nserver = array_n(&pool->server); 47 | nlive_server = 0; 48 | pool->next_rebuild = 0LL; 49 | 50 | for (server_index = 0; server_index < nserver; server_index++) { 51 | struct server *server, **ps; 52 | 53 | ps = array_get(&pool->server, server_index); 54 | server = *ps; 55 | 56 | if (pool->auto_eject_hosts) { 57 | if (server->next_retry <= now) { 58 | server->next_retry = 0LL; 59 | nlive_server++; 60 | } else if (pool->next_rebuild == 0LL || 61 | server->next_retry < pool->next_rebuild) { 62 | pool->next_rebuild = server->next_retry; 63 | } 64 | } else { 65 | nlive_server++; 66 | } 67 | } 68 | 69 | pool->nlive_server = nlive_server; 70 | 71 | if (nlive_server == 0) { 72 | ASSERT(pool->continuum != NULL); 73 | ASSERT(pool->ncontinuum != 0); 74 | 75 | log_debug(LOG_DEBUG, "no live servers for pool %"PRIu32" '%.*s'", 76 | pool->idx, pool->name.len, pool->name.data); 77 | 78 | return NC_OK; 79 | } 80 | log_debug(LOG_DEBUG, "%"PRIu32" of %"PRIu32" servers are live for pool " 81 | "%"PRIu32" '%.*s'", nlive_server, nserver, pool->idx, 82 | pool->name.len, pool->name.data); 83 | 84 | continuum_addition = RANDOM_CONTINUUM_ADDITION; 85 | points_per_server = RANDOM_POINTS_PER_SERVER; 86 | 87 | /* 88 | * Allocate the continuum for the pool, the first time, and every time we 89 | * add a new server to the pool 90 | */ 91 | if (nlive_server > pool->nserver_continuum) { 92 | struct continuum *continuum; 93 | uint32_t nserver_continuum = nlive_server + RANDOM_CONTINUUM_ADDITION; 94 | uint32_t ncontinuum = nserver_continuum * RANDOM_POINTS_PER_SERVER; 95 | 96 | continuum = nc_realloc(pool->continuum, sizeof(*continuum) * ncontinuum); 97 | if (continuum == NULL) { 98 | return NC_ENOMEM; 99 | } 100 | 101 | srandom((uint32_t)time(NULL)); 102 | 103 | pool->continuum = continuum; 104 | pool->nserver_continuum = nserver_continuum; 105 | /* pool->ncontinuum is initialized later as it could be <= ncontinuum */ 106 | } 107 | 108 | /* update the continuum with the servers that are live */ 109 | continuum_index = 0; 110 | pointer_counter = 0; 111 | for (server_index = 0; server_index < nserver; server_index++) { 112 | struct server *server, **ps; 113 | 114 | ps = array_get(&pool->server, server_index); 115 | server = *ps; 116 | 117 | if (pool->auto_eject_hosts && server->next_retry > now) { 118 | continue; 119 | } 120 | 121 | pointer_per_server = 1; 122 | 123 | pool->continuum[continuum_index].index = server_index; 124 | pool->continuum[continuum_index++].value = 0; 125 | 126 | pointer_counter += pointer_per_server; 127 | } 128 | pool->ncontinuum = pointer_counter; 129 | 130 | log_debug(LOG_VERB, "updated pool %"PRIu32" '%.*s' with %"PRIu32" of " 131 | "%"PRIu32" servers live in %"PRIu32" slots and %"PRIu32" " 132 | "active points in %"PRIu32" slots", pool->idx, 133 | pool->name.len, pool->name.data, nlive_server, nserver, 134 | pool->nserver_continuum, pool->ncontinuum, 135 | (pool->nserver_continuum + continuum_addition) * points_per_server); 136 | 137 | return NC_OK; 138 | 139 | } 140 | 141 | uint32_t 142 | random_dispatch(struct continuum *continuum, uint32_t ncontinuum, uint32_t hash) 143 | { 144 | struct continuum *c; 145 | 146 | ASSERT(continuum != NULL); 147 | ASSERT(ncontinuum != 0); 148 | 149 | c = continuum + random() % ncontinuum; 150 | 151 | return c->index; 152 | } 153 | -------------------------------------------------------------------------------- /src/lua/idcmap.lua: -------------------------------------------------------------------------------- 1 | 2 | primary = { 3 | tc = {"$master"}, 4 | jx = {"$master"}, 5 | nj02 = {"$master"}, 6 | nj03 = {"$master"}, 7 | hz01 = {"$master"}, 8 | } 9 | 10 | primary_preferred = { 11 | tc = {"$master","tc","jx",{"nj","nj03","hz"}}, 12 | jx = {"$master","jx","tc",{"nj","nj03","hz"}}, 13 | nj = {"$master","nj","nj03","hz",{"tc","jx"}}, 14 | nj03 = {"$master","nj03","nj","hz",{"tc","jx"}}, 15 | hz = {"$master","hz",{"nj","nj03"},{"tc","jx"}} 16 | } 17 | 18 | nearest = { 19 | tc = {{"tc","jx"},"yq",{"nj","nj03","hz","sh","sz"},"gz"}, 20 | jx = {{"jx","tc"},"yq",{"nj","nj03","hz","sh","sz"},"gz"}, 21 | nj = {{"nj","nj03"},{"hz","sh","sz"},{"tc","jx","yq"},"gz"}, 22 | nj03 = {{"nj03","nj"},{"hz","sh","sz"},{"tc","jx","yq"},"gz"}, 23 | hz = {"hz",{"nj","nj03","sh","sz"},{"tc","jx","yq"},"gz"}, 24 | sh = {"sh",{"nj","nj03","hz","sz"},"gz",{"tc","jx","yq"}}, 25 | gz = {"gz",{"nj","nj03","hz","sz"},"sh",{"tc","jx","yq"}}, 26 | yq = {"yq",{"jx","tc"},{"nj","nj03","hz","sh","sz"},"gz"}, 27 | sz = {"sz",{"jx","tc","yq"},{"nj","nj03","hz","sh"},"gz"} 28 | } 29 | 30 | return nearest 31 | -------------------------------------------------------------------------------- /src/lua/logic_idcmap.lua: -------------------------------------------------------------------------------- 1 | idc_to_region = { 2 | tc = "bj", 3 | jx = "bj", 4 | nj = "nj", 5 | nj03 = "nj", 6 | hz = "hz", 7 | sh = "sh", 8 | gz = "gz", 9 | sz = "sz", 10 | yq = "yq", 11 | sz = "sz" 12 | } 13 | 14 | return idc_to_region 15 | -------------------------------------------------------------------------------- /src/lua/pool.lua: -------------------------------------------------------------------------------- 1 | package.path = package.path .. ";lua/?.lua;../?.lua" 2 | 3 | local ffi = require("ffi") 4 | local C = ffi.C 5 | 6 | ffi.cdef[[ 7 | struct server; 8 | typedef int rstatus_t; 9 | 10 | void ffi_pool_clear_servers(struct server_pool *pool); 11 | void ffi_pool_add_server(struct server_pool *pool, struct server *server); 12 | struct string ffi_pool_get_env(struct server_pool *pool); 13 | 14 | void ffi_server_table_delete(struct server_pool *pool, const char *name); 15 | 16 | void ffi_slots_clear_replicasets(struct server_pool *pool); 17 | 18 | void ffi_server_update_done(struct server_pool *pool); 19 | void ffi_slots_update_done(struct server_pool *pool); 20 | ]] 21 | 22 | local server = require("server") 23 | local replica_set = require("replica_set") 24 | local run_env = C.ffi_pool_get_env(__pool) 25 | 26 | local _M = { 27 | server_map = {}, 28 | replica_sets = {}, 29 | 30 | -- for check change 31 | last_server_names = {}, 32 | 33 | -- struct server_pool {} 34 | pool = __pool, 35 | 36 | -- resource pools 37 | _rs_pool = {}, 38 | _se_pool = {}, 39 | 40 | _run_env = ffi.string(run_env.data, run_env.len), 41 | } 42 | 43 | function _M.is_online(self) 44 | return self._run_env == "online" 45 | end 46 | 47 | function _M.fetch_server(self, config) 48 | local s = nil 49 | 50 | if #self._se_pool == 0 then 51 | s = server:new(config) 52 | else 53 | s = table.remove(self._se_pool, 1) 54 | -- check this server safe to reuse 55 | if s:safe_reuse() then 56 | -- update config 57 | s:update_config(config) 58 | s:update_raw() 59 | else 60 | -- recycle the server again and alloc a new one 61 | self.put_server(s) 62 | s = server:new() 63 | end 64 | end 65 | 66 | return s 67 | end 68 | 69 | function _M.put_server(self, s) 70 | table.insert(self._se_pool, s) 71 | end 72 | 73 | function _M.fetch_replica_set(self) 74 | local rs = nil 75 | 76 | if #self._rs_pool == 0 then 77 | rs = replica_set:new(self.pool) 78 | else 79 | rs = table.remove(self._rs_pool, 1) 80 | --pop old tagged servers 81 | rs:deinit() 82 | end 83 | 84 | return rs 85 | end 86 | 87 | function _M.put_replica_set(self, rs) 88 | table.insert(self._rs_pool, rs) 89 | end 90 | 91 | -- Public Methods 92 | 93 | function _M.set_servers(self, configs) 94 | local configs = configs 95 | local tmp_server_map = {} 96 | 97 | -- Update server status 98 | for _, config in ipairs(configs) do 99 | local id = config.id 100 | 101 | if self.server_map[id] then 102 | local s = self.server_map[id] 103 | s:update_config(config) 104 | tmp_server_map[id] = s 105 | self.server_map[id] = nil 106 | else 107 | tmp_server_map[id] = self:fetch_server(config) 108 | end 109 | end 110 | 111 | -- Swap server_map 112 | self.server_map, tmp_server_map = tmp_server_map, self.server_map 113 | 114 | -- Check server list changes 115 | 116 | -- Drop servers that we no longer use 117 | for id, s in pairs(tmp_server_map) do 118 | C.ffi_server_table_delete(__pool, s.addr) 119 | self:put_server(s) 120 | end 121 | 122 | local tmp_server_names = {} 123 | local server_changed = false 124 | 125 | for _, s in pairs(self.server_map) do 126 | table.insert(tmp_server_names, s.addr) 127 | -- do server connect operation in main thread 128 | end 129 | 130 | table.sort(tmp_server_names) 131 | if #tmp_server_names ~= #self.last_server_names then 132 | server_changed = true 133 | elseif table.concat(tmp_server_names) ~= table.concat(self.last_server_names) then 134 | server_changed = true 135 | end 136 | 137 | if server_changed then 138 | -- Reset stats 139 | C.ffi_pool_clear_servers(__pool) 140 | 141 | for _, s in pairs(self.server_map) do 142 | C.ffi_pool_add_server(__pool, s.raw) 143 | end 144 | 145 | C.ffi_server_update_done(__pool) 146 | end 147 | 148 | self.last_server_names = tmp_server_names 149 | end 150 | 151 | function _M.build_replica_sets(self) 152 | local tmp_rss = {} 153 | 154 | -- Set masters 155 | for id,s in pairs(self.server_map) do 156 | if s:is_master() then 157 | local rs = self:fetch_replica_set() 158 | rs:set_master(s) -- for write 159 | rs:add_tagged_server(s) -- for read 160 | table.insert(tmp_rss, rs) 161 | end 162 | end 163 | 164 | -- Set slaves 165 | for id,s in pairs(self.server_map) do 166 | if s:is_slave() then 167 | local ms = self.server_map[s.master_id] 168 | if ms:is_slave() then 169 | -- slave cascade 170 | ms = self.server_map[ms.master_id] 171 | if ms:is_slave() then 172 | error("slave cascade two level") 173 | return 174 | end 175 | end 176 | if ms ~= nil then 177 | local rs = ms.replica_set 178 | rs:add_tagged_server(s) 179 | end 180 | end 181 | end 182 | 183 | -- Swap replicasets 184 | self.replica_sets, tmp_rss = tmp_rss, self.replica_sets 185 | 186 | -- Recycle 187 | for _, rs in ipairs(tmp_rss) do 188 | self:put_replica_set(rs) 189 | end 190 | end 191 | 192 | function _M.bind_slots(self) 193 | for _,rs in ipairs(self.replica_sets) do 194 | rs:bind_slots() 195 | end 196 | C.ffi_slots_update_done(__pool) 197 | end 198 | 199 | return _M 200 | -------------------------------------------------------------------------------- /src/lua/redis.lua: -------------------------------------------------------------------------------- 1 | package.path = package.path .. ";lua/?.lua;../?.lua" 2 | 3 | print ("Script Init Begin") 4 | 5 | local pool = require("pool") 6 | 7 | function parse(lines) 8 | local configs = {} 9 | local idx = 1 10 | local node_lines = {} 11 | -- skip summary and ip empty line 12 | for i,line in ipairs(lines) do 13 | if string.sub(line,1,2) ~= "# " then 14 | local xs = line:split(" ") 15 | -- skip update in this round 16 | if string.find(xs[5], "noaddr") ~= nil or 17 | string.find(xs[5], "handshake") ~= nil or 18 | xs[2] == "-" then 19 | error("parse: server state maybe noaddr,handshake,notag... please check") 20 | return 21 | end 22 | 23 | local addr = xs[4]:split(":") 24 | if string.len(addr[1]) ~= 0 and 25 | (string.find(xs[5], "master") ~= nil or 26 | string.find(xs[5], "slave") ~= nil) then 27 | table.insert(node_lines,line) 28 | end 29 | end 30 | end 31 | 32 | if pool:is_online() then 33 | if #node_lines < 3 then 34 | error("parse: not enough nodes") 35 | return 36 | end 37 | end 38 | 39 | -- parse nodes 40 | for _,line in ipairs(node_lines) do 41 | local xs = line:split(" ") 42 | local addr = xs[4]:split(":") 43 | ip, port = addr[1], addr[2] 44 | 45 | local role = "master" 46 | if string.find(xs[5], "master") == nil then 47 | role = "slave" 48 | end 49 | 50 | local loc = xs[2]:split(":") 51 | 52 | local c = { 53 | id = xs[3], 54 | addr = xs[4], 55 | ip = ip, 56 | port = tonumber(port), 57 | role = role, 58 | master_id = xs[6], 59 | status = xs[10], 60 | readable = false, 61 | writable = false, 62 | region = loc[1], 63 | zone = loc[2], 64 | room = loc[3], 65 | } 66 | 67 | if string.find(xs[1], "r") then 68 | c.readable = true 69 | end 70 | if string.find(xs[1], "w") then 71 | c.writable = true 72 | end 73 | 74 | if role == "master" then 75 | local ranges = {} 76 | for i = 11, #xs do 77 | -- skip importing/migrating info 78 | if string.sub(xs[i],1,1) == "[" then 79 | break 80 | end 81 | 82 | local range = {} 83 | local pair = xs[i]:split("-") 84 | 85 | if #pair == 2 then 86 | range.left = tonumber(pair[1]) 87 | range.right = tonumber(pair[2]) 88 | else 89 | range.left = tonumber(xs[i]) 90 | range.right = tonumber(xs[i]) 91 | end 92 | 93 | table.insert(ranges, range) 94 | end 95 | c.ranges = ranges 96 | end 97 | 98 | table.insert(configs, c) 99 | end 100 | return configs 101 | end 102 | 103 | function update_cluster_nodes(msg) 104 | if string.sub(msg,1,3) == "+OK" or string.sub(msg,1,3) == "$-1" then 105 | return 106 | end 107 | 108 | local lines = msg:strip():split("\n") 109 | local bytes = tonumber(string.sub(lines[1],2,-1)) 110 | if bytes == nil then 111 | error("update_cluster_nodes: nodes info invalid") 112 | return 113 | end 114 | if bytes > 163840 then 115 | error("update_cluster_nodes: nodes info too large > 163840 (FIXME)") 116 | return 117 | end 118 | table.remove(lines, 1) 119 | 120 | -- parse message returned by 'cluster nodes' 121 | local configs = parse(lines) 122 | 123 | if #configs == 0 then 124 | error("update_cluster_nodes: no server found") 125 | return 126 | end 127 | 128 | if #configs == 1 and configs[1].ranges ~= nil and #configs[1].ranges == 0 then 129 | error("update_cluster_nodes: free node found") 130 | return 131 | end 132 | 133 | -- reconstruct servers, fix adds and drops 134 | pool:set_servers(configs) 135 | 136 | -- rebuild replica sets 137 | pool:build_replica_sets() 138 | 139 | -- bind replica sets to slots 140 | pool:bind_slots() 141 | end 142 | 143 | print ("Script Init Done") 144 | -------------------------------------------------------------------------------- /src/lua/replica_set.lua: -------------------------------------------------------------------------------- 1 | local ffi = require("ffi") 2 | local C = ffi.C 3 | 4 | ffi.cdef[[ 5 | struct replicaset; 6 | struct server; 7 | struct server_pool; 8 | 9 | int ffi_slots_set_replicaset(struct server_pool *pool, struct replicaset *rs, int left, int right); 10 | 11 | struct replicaset* ffi_replicaset_new(); 12 | void ffi_replicaset_deinit(struct replicaset *rs); 13 | void ffi_replicaset_delete(struct replicaset *rs); 14 | void ffi_replicaset_set_master(struct replicaset *rs, struct server *server); 15 | void ffi_replicaset_add_tagged_server(struct replicaset *rs, int tag_idx, struct server *server); 16 | ]] 17 | 18 | local _M = {} 19 | local mt = { __index = _M } 20 | 21 | function _M.new(self) 22 | local raw = C.ffi_replicaset_new(); 23 | return setmetatable({ raw = raw }, mt) 24 | end 25 | 26 | function _M.set_master(self, s) 27 | self.ranges = s.ranges 28 | s.replica_set = self 29 | if s.writable then 30 | C.ffi_replicaset_set_master(self.raw, s.raw) 31 | end 32 | end 33 | 34 | function _M.add_tagged_server(self, s) 35 | if s.tag_idx >= 0 and s.readable then 36 | C.ffi_replicaset_add_tagged_server(self.raw, s.tag_idx, s.raw) 37 | end 38 | end 39 | 40 | function _M.bind_slots(self) 41 | for i, range in ipairs(self.ranges) do 42 | C.ffi_slots_set_replicaset(__pool, self.raw, range.left, range.right) 43 | end 44 | end 45 | 46 | function _M.deinit(self) 47 | C.ffi_replicaset_deinit(self.raw); 48 | end 49 | 50 | return _M 51 | -------------------------------------------------------------------------------- /src/lua/server.lua: -------------------------------------------------------------------------------- 1 | package.path = package.path .. ";lua/?.lua;../?.lua" 2 | 3 | local idcmap = require("idcmap") 4 | local logic_idcmap = require("logic_idcmap") 5 | local ffi = require("ffi") 6 | local C = ffi.C 7 | 8 | ffi.cdef[[ 9 | struct server; 10 | struct server_pool; 11 | struct string { uint32_t len; uint8_t *data; }; 12 | typedef int rstatus_t; 13 | 14 | struct string ffi_pool_get_zone(struct server_pool *pool); 15 | 16 | rstatus_t ffi_server_connect(struct server *server); 17 | rstatus_t ffi_server_disconnect(struct server *server); 18 | struct server* ffi_server_new( 19 | struct server_pool *pool, const char *name, const char *id, const char *ip, int port); 20 | void ffi_server_update_addr(struct server *server, const char *name, const char *ip, int port); 21 | void ffi_server_set_local_idc(struct server *server, int local_idc); 22 | void ffi_server_safe_reuse(struct server *server); 23 | ]] 24 | 25 | local zone = C.ffi_pool_get_zone(__pool) 26 | 27 | local _M = { 28 | local_zone = ffi.string(zone.data, zone.len), 29 | zone_index = {}, 30 | } 31 | local mt = { __index = _M } 32 | 33 | -- Initialize zone index 34 | read_preference = idcmap[_M.local_zone] 35 | for i, item in ipairs(read_preference) do 36 | if item == "$master" then 37 | _M.zone_index["$master"] = i-1 38 | elseif type(item) == "table" then 39 | for j,z in ipairs(item) do 40 | _M.zone_index[z] = i-1 41 | end 42 | elseif type(item) == "string" then 43 | _M.zone_index[item] = i-1 44 | end 45 | end 46 | 47 | function _M.new(self, config) 48 | local s = setmetatable({}, mt) 49 | s:update_config(config) 50 | s.raw = C.ffi_server_new(__pool, config.addr, s.id, s.ip, s.port) 51 | if s.raw == nil then 52 | error("new: create server object failed.") 53 | end 54 | C.ffi_server_set_local_idc(s.raw, s.local_idc); 55 | return s 56 | end 57 | 58 | function _M.update_config(self, config) 59 | self.id = config.id 60 | self.ip = config.ip 61 | self.port = config.port 62 | self.readable = config.readable 63 | self.writable = config.writable 64 | self.role = config.role 65 | self.master_id = config.master_id 66 | self.region = config.region 67 | self.zone = config.zone 68 | self.room = config.room 69 | self.ranges = config.ranges 70 | self.addr = string.format("%s:%d", self.ip, self.port) 71 | 72 | if self.role == "master" and _M.zone_index["$master"] then 73 | self.tag_idx = _M.zone_index["$master"] 74 | else 75 | self.tag_idx = _M.zone_index[self.zone] or -1 76 | end 77 | 78 | if logic_idcmap[_M.local_zone] == self.region then 79 | self.local_idc = 1 80 | else 81 | self.local_idc = 0 82 | end 83 | end 84 | 85 | function _M.update_raw(self) 86 | -- update s.raw address -- 87 | C.ffi_server_update_addr(self.raw, self.addr, self.ip, self.port) 88 | end 89 | 90 | function _M.connect(self) 91 | C.ffi_server_connect(self.raw) 92 | end 93 | 94 | function _M.disconnect(self) 95 | C.ffi_server_disconnect(self.raw) 96 | end 97 | 98 | function _M.is_master(self) 99 | return self.role == "master" 100 | end 101 | 102 | function _M.is_slave(self) 103 | return self.role == "slave" 104 | end 105 | 106 | function _M.safe_reuse(self) 107 | return C.ffi_server_safe_reuse(self.raw) 108 | end 109 | 110 | return _M 111 | -------------------------------------------------------------------------------- /src/nc_array.c: -------------------------------------------------------------------------------- 1 | /* 2 | * twemproxy - A fast and lightweight proxy for memcached protocol. 3 | * Copyright (C) 2011 Twitter, Inc. 4 | * 5 | * Licensed under the Apache License, Version 2.0 (the "License"); 6 | * you may not use this file except in compliance with the License. 7 | * You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | #include 19 | 20 | #include 21 | 22 | struct array * 23 | array_create(uint32_t n, size_t size) 24 | { 25 | struct array *a; 26 | 27 | ASSERT(n != 0 && size != 0); 28 | 29 | a = nc_alloc(sizeof(*a)); 30 | if (a == NULL) { 31 | return NULL; 32 | } 33 | 34 | a->elem = nc_alloc(n * size); 35 | if (a->elem == NULL) { 36 | nc_free(a); 37 | return NULL; 38 | } 39 | 40 | a->nelem = 0; 41 | a->size = size; 42 | a->nalloc = n; 43 | 44 | return a; 45 | } 46 | 47 | void 48 | array_destroy(struct array *a) 49 | { 50 | array_deinit(a); 51 | nc_free(a); 52 | } 53 | 54 | rstatus_t 55 | array_init(struct array *a, uint32_t n, size_t size) 56 | { 57 | ASSERT(n != 0 && size != 0); 58 | 59 | a->elem = nc_alloc(n * size); 60 | if (a->elem == NULL) { 61 | return NC_ENOMEM; 62 | } 63 | 64 | a->nelem = 0; 65 | a->size = size; 66 | a->nalloc = n; 67 | 68 | return NC_OK; 69 | } 70 | 71 | void 72 | array_deinit(struct array *a) 73 | { 74 | ASSERT(a->nelem == 0); 75 | 76 | if (a->elem != NULL) { 77 | nc_free(a->elem); 78 | } 79 | } 80 | 81 | uint32_t 82 | array_idx(struct array *a, void *elem) 83 | { 84 | uint8_t *p, *q; 85 | uint32_t off, idx; 86 | 87 | ASSERT(elem >= a->elem); 88 | 89 | p = a->elem; 90 | q = elem; 91 | off = (uint32_t)(q - p); 92 | 93 | ASSERT(off % (uint32_t)a->size == 0); 94 | 95 | idx = off / (uint32_t)a->size; 96 | 97 | return idx; 98 | } 99 | 100 | void * 101 | array_push(struct array *a) 102 | { 103 | void *elem, *new; 104 | size_t size; 105 | 106 | if (a->nelem == a->nalloc) { 107 | 108 | /* the array is full; allocate new array */ 109 | size = a->size * a->nalloc; 110 | new = nc_realloc(a->elem, 2 * size); 111 | if (new == NULL) { 112 | return NULL; 113 | } 114 | 115 | a->elem = new; 116 | a->nalloc *= 2; 117 | } 118 | 119 | elem = (uint8_t *)a->elem + a->size * a->nelem; 120 | a->nelem++; 121 | 122 | return elem; 123 | } 124 | 125 | void * 126 | array_pop(struct array *a) 127 | { 128 | void *elem; 129 | 130 | ASSERT(a->nelem != 0); 131 | 132 | a->nelem--; 133 | elem = (uint8_t *)a->elem + a->size * a->nelem; 134 | 135 | return elem; 136 | } 137 | 138 | void * 139 | array_get(struct array *a, uint32_t idx) 140 | { 141 | void *elem; 142 | 143 | ASSERT(a->nelem != 0); 144 | ASSERT(idx < a->nelem); 145 | 146 | elem = (uint8_t *)a->elem + (a->size * idx); 147 | 148 | return elem; 149 | } 150 | 151 | void * 152 | array_top(struct array *a) 153 | { 154 | ASSERT(a->nelem != 0); 155 | 156 | return array_get(a, a->nelem - 1); 157 | } 158 | 159 | void 160 | array_swap(struct array *a, struct array *b) 161 | { 162 | struct array tmp; 163 | 164 | tmp = *a; 165 | *a = *b; 166 | *b = tmp; 167 | } 168 | 169 | /* 170 | * Sort nelem elements of the array in ascending order based on the 171 | * compare comparator. 172 | */ 173 | void 174 | array_sort(struct array *a, array_compare_t compare) 175 | { 176 | ASSERT(a->nelem != 0); 177 | 178 | qsort(a->elem, a->nelem, a->size, compare); 179 | } 180 | 181 | /* 182 | * Calls the func once for each element in the array as long as func returns 183 | * success. On failure short-circuits and returns the error status. 184 | */ 185 | rstatus_t 186 | array_each(struct array *a, array_each_t func, void *data) 187 | { 188 | uint32_t i, nelem; 189 | 190 | ASSERT(array_n(a) != 0); 191 | ASSERT(func != NULL); 192 | 193 | for (i = 0, nelem = array_n(a); i < nelem; i++) { 194 | void *elem = array_get(a, i); 195 | rstatus_t status; 196 | 197 | status = func(elem, data); 198 | if (status != NC_OK) { 199 | return status; 200 | } 201 | } 202 | 203 | return NC_OK; 204 | } 205 | -------------------------------------------------------------------------------- /src/nc_array.h: -------------------------------------------------------------------------------- 1 | /* 2 | * twemproxy - A fast and lightweight proxy for memcached protocol. 3 | * Copyright (C) 2011 Twitter, Inc. 4 | * 5 | * Licensed under the Apache License, Version 2.0 (the "License"); 6 | * you may not use this file except in compliance with the License. 7 | * You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | #ifndef _NC_ARRAY_H_ 19 | #define _NC_ARRAY_H_ 20 | 21 | #include 22 | 23 | typedef int (*array_compare_t)(const void *, const void *); 24 | typedef rstatus_t (*array_each_t)(void *, void *); 25 | 26 | struct array { 27 | uint32_t nelem; /* # element */ 28 | void *elem; /* element */ 29 | size_t size; /* element size */ 30 | uint32_t nalloc; /* # allocated element */ 31 | }; 32 | 33 | #define null_array { 0, NULL, 0, 0 } 34 | 35 | static inline void 36 | array_null(struct array *a) 37 | { 38 | a->nelem = 0; 39 | a->elem = NULL; 40 | a->size = 0; 41 | a->nalloc = 0; 42 | } 43 | 44 | static inline void 45 | array_set(struct array *a, void *elem, size_t size, uint32_t nalloc) 46 | { 47 | a->nelem = 0; 48 | a->elem = elem; 49 | a->size = size; 50 | a->nalloc = nalloc; 51 | } 52 | 53 | static inline uint32_t 54 | array_n(const struct array *a) 55 | { 56 | return a->nelem; 57 | } 58 | 59 | struct array *array_create(uint32_t n, size_t size); 60 | void array_destroy(struct array *a); 61 | rstatus_t array_init(struct array *a, uint32_t n, size_t size); 62 | void array_deinit(struct array *a); 63 | 64 | uint32_t array_idx(struct array *a, void *elem); 65 | void *array_push(struct array *a); 66 | void *array_pop(struct array *a); 67 | void *array_get(struct array *a, uint32_t idx); 68 | void *array_top(struct array *a); 69 | void array_swap(struct array *a, struct array *b); 70 | void array_sort(struct array *a, array_compare_t compare); 71 | rstatus_t array_each(struct array *a, array_each_t func, void *data); 72 | 73 | #endif 74 | -------------------------------------------------------------------------------- /src/nc_assoc.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #define HASHSIZE(_n) (1 << (_n)) 5 | #define HASHMASK(_n) (HASHSIZE(_n) - 1) 6 | 7 | struct item { 8 | SLIST_ENTRY(item) h_sle; /* link in hash */ 9 | struct string key; /* key */ 10 | void *data; /* pointer to data */ 11 | }; 12 | 13 | 14 | static struct item * 15 | assoc_create_item(const char *key, size_t nkey, void *data) 16 | { 17 | struct item *it; 18 | 19 | ASSERT(key != NULL && nkey != 0 && data != NULL); 20 | 21 | it = nc_alloc(sizeof(*it)); 22 | if (it == NULL) { 23 | return NULL; 24 | } 25 | it->key.data = nc_alloc(nkey + 1); 26 | memset(it->key.data, 0, nkey + 1); 27 | nc_memcpy(it->key.data, key, nkey); 28 | 29 | it->key.len = (uint32_t)nkey; 30 | it->data = data; 31 | 32 | return it; 33 | } 34 | 35 | static void 36 | assoc_destroy_item(struct item *it) 37 | { 38 | ASSERT(it != NULL); 39 | nc_free(it->key.data); 40 | nc_free(it); 41 | } 42 | 43 | struct hash_table * 44 | assoc_create_table(hash_func_t hash, uint32_t sz) 45 | { 46 | struct hash_table *table; 47 | struct item_slh *buckets; 48 | uint32_t i, hash_power; 49 | 50 | ASSERT(sz != 0); 51 | 52 | table = nc_alloc(sizeof(*table)); 53 | if (table == NULL) { 54 | return NULL; 55 | } 56 | 57 | for (hash_power = 0; HASHSIZE(hash_power) < sz; hash_power++); 58 | 59 | sz = HASHSIZE(hash_power); 60 | 61 | buckets = nc_alloc(sizeof(*buckets) * sz); 62 | if (buckets == NULL) { 63 | return NULL; 64 | } 65 | 66 | for (i = 0; i < sz; i++) { 67 | SLIST_INIT(&buckets[i]); 68 | } 69 | 70 | table->buckets = buckets; 71 | table->nbuckets = sz; 72 | table->mask = HASHMASK(hash_power); 73 | table->hash = hash; 74 | 75 | return table; 76 | } 77 | 78 | struct hash_table * 79 | assoc_create_table_default(void){ 80 | return assoc_create_table(hash_murmur, 10000); 81 | } 82 | 83 | void 84 | assoc_destroy_table(struct hash_table *table) 85 | { 86 | struct item_slh *bucket; 87 | struct item *it, *next; 88 | uint32_t i; 89 | 90 | ASSERT(table != NULL && table->buckets != NULL && table->nbuckets != 0); 91 | 92 | for (i = 0; i < table->nbuckets; i++) { 93 | bucket = &table->buckets[i]; 94 | SLIST_FOREACH_SAFE(it, bucket, h_sle, next) { 95 | SLIST_REMOVE(bucket, it, item, h_sle); 96 | assoc_destroy_item(it); 97 | } 98 | } 99 | 100 | nc_free(table->buckets); 101 | nc_free(table); 102 | } 103 | 104 | static struct item_slh * 105 | assoc_find_bucket(struct hash_table *table, const char *key, size_t nkey) 106 | { 107 | struct item_slh *bucket; 108 | uint32_t hv; 109 | 110 | ASSERT(table != NULL && table->buckets != NULL && table->nbuckets != 0); 111 | ASSERT(key != NULL && nkey != 0); 112 | 113 | hv = table->hash(key, nkey); 114 | hv &= table->mask; 115 | bucket = &table->buckets[hv]; 116 | 117 | return bucket; 118 | } 119 | 120 | void * 121 | assoc_find(struct hash_table *table, const char *key, size_t nkey) 122 | { 123 | struct item_slh *bucket; 124 | struct item *it; 125 | 126 | ASSERT(table != NULL && table->buckets != NULL && table->nbuckets != 0); 127 | ASSERT(key != NULL && nkey != 0); 128 | 129 | bucket = assoc_find_bucket(table, key, nkey); 130 | 131 | SLIST_FOREACH(it, bucket, h_sle) { 132 | if (nkey == it->key.len && (nc_strncmp(key, it->key.data, nkey) == 0)) { 133 | break; 134 | } 135 | } 136 | 137 | if (it) { 138 | return it->data; 139 | } else { 140 | return NULL; 141 | } 142 | } 143 | 144 | rstatus_t 145 | assoc_set(struct hash_table *table, const char *key, size_t nkey, void *data) 146 | { 147 | struct item_slh *bucket; 148 | struct item *it; 149 | 150 | ASSERT(table != NULL && table->buckets != NULL && table->nbuckets != 0); 151 | ASSERT(key != NULL && nkey != 0); 152 | 153 | bucket = assoc_find_bucket(table, key, nkey); 154 | 155 | SLIST_FOREACH(it, bucket, h_sle) { 156 | if (nkey == it->key.len && (nc_strncmp(key, it->key.data, nkey) == 0)) { 157 | break; 158 | } 159 | } 160 | 161 | if (it) { 162 | it->data = data; 163 | return NC_OK; 164 | } else { 165 | it = assoc_create_item(key, nkey, data); 166 | if (it == NULL) { 167 | return NC_ENOMEM; 168 | } 169 | SLIST_INSERT_HEAD(bucket, it, h_sle); 170 | } 171 | return NC_OK; 172 | } 173 | 174 | rstatus_t 175 | assoc_insert(struct hash_table *table, const char *key, size_t nkey, void *data) 176 | { 177 | struct item_slh *bucket; 178 | struct item *it; 179 | 180 | ASSERT(assoc_find(table, key, nkey) == NULL); 181 | 182 | bucket = assoc_find_bucket(table, key, nkey); 183 | 184 | it = assoc_create_item(key, nkey, data); 185 | if (it == NULL) { 186 | return NC_ENOMEM; 187 | } 188 | 189 | SLIST_INSERT_HEAD(bucket, it, h_sle); 190 | return NC_OK; 191 | } 192 | 193 | 194 | void 195 | assoc_delete(struct hash_table *table, const char *key, size_t nkey) 196 | { 197 | struct item_slh *bucket; 198 | struct item *it, *next; 199 | 200 | ASSERT(table != NULL && table->buckets != NULL && table->nbuckets != 0); 201 | ASSERT(key != NULL && nkey != 0); 202 | 203 | bucket = assoc_find_bucket(table, key, nkey); 204 | 205 | SLIST_FOREACH_SAFE(it, bucket, h_sle, next) { 206 | if (nkey == it->key.len && (nc_strncmp(key, it->key.data, nkey) == 0)) { 207 | /* FIXME: don't use this code in critical path */ 208 | SLIST_REMOVE(bucket, it, item, h_sle); 209 | assoc_destroy_item(it); 210 | break; 211 | } 212 | } 213 | } 214 | -------------------------------------------------------------------------------- /src/nc_assoc.h: -------------------------------------------------------------------------------- 1 | #ifndef _NC_ASSOC_H_ 2 | #define _NC_ASSOC_H_ 3 | 4 | #include 5 | 6 | typedef uint32_t (*hash_func_t)(const char *, size_t); 7 | 8 | SLIST_HEAD(item_slh, item); 9 | 10 | struct hash_table { 11 | struct item_slh *buckets; 12 | uint32_t nbuckets; 13 | uint32_t mask; 14 | hash_func_t hash; 15 | }; 16 | 17 | struct hash_table * assoc_create_table(hash_func_t hash, uint32_t sz); 18 | struct hash_table * assoc_create_table_default(void); 19 | void assoc_destroy_table(struct hash_table *table); 20 | 21 | void* assoc_find(struct hash_table *table, const char *key, size_t nkey); 22 | rstatus_t assoc_insert(struct hash_table *table, const char *key, size_t nkey, void *data); 23 | rstatus_t assoc_set(struct hash_table *table, const char *key, size_t nkey, void *data); 24 | void assoc_delete(struct hash_table *table, const char *key, size_t nkey); 25 | 26 | #endif 27 | -------------------------------------------------------------------------------- /src/nc_client.c: -------------------------------------------------------------------------------- 1 | /* 2 | * twemproxy - A fast and lightweight proxy for memcached protocol. 3 | * Copyright (C) 2011 Twitter, Inc. 4 | * 5 | * Licensed under the Apache License, Version 2.0 (the "License"); 6 | * you may not use this file except in compliance with the License. 7 | * You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | #include 19 | #include 20 | #include 21 | 22 | void 23 | client_ref(struct conn *conn, void *owner) 24 | { 25 | struct server_pool *pool = owner; 26 | 27 | ASSERT(conn->client && !conn->proxy); 28 | ASSERT(conn->owner == NULL); 29 | 30 | /* 31 | * We use null pointer as the sockaddr argument in the accept() call as 32 | * we are not interested in the address of the peer for the accepted 33 | * connection 34 | */ 35 | conn->family = 0; 36 | conn->addrlen = 0; 37 | conn->addr = NULL; 38 | 39 | pool->nc_conn_q++; 40 | TAILQ_INSERT_TAIL(&pool->c_conn_q, conn, conn_tqe); 41 | 42 | /* owner of the client connection is the server pool */ 43 | conn->owner = owner; 44 | 45 | log_debug(LOG_VVERB, "ref conn %p owner %p into pool '%.*s'", conn, pool, 46 | pool->name.len, pool->name.data); 47 | } 48 | 49 | void 50 | client_unref(struct conn *conn) 51 | { 52 | struct server_pool *pool; 53 | 54 | ASSERT(conn->client && !conn->proxy); 55 | ASSERT(conn->owner != NULL); 56 | 57 | pool = conn->owner; 58 | conn->owner = NULL; 59 | 60 | ASSERT(pool->nc_conn_q != 0); 61 | pool->nc_conn_q--; 62 | TAILQ_REMOVE(&pool->c_conn_q, conn, conn_tqe); 63 | 64 | log_debug(LOG_VVERB, "unref conn %p owner %p from pool '%.*s'", conn, 65 | pool, pool->name.len, pool->name.data); 66 | } 67 | 68 | bool 69 | client_active(struct conn *conn) 70 | { 71 | ASSERT(conn->client && !conn->proxy); 72 | 73 | ASSERT(TAILQ_EMPTY(&conn->imsg_q)); 74 | 75 | if (!TAILQ_EMPTY(&conn->omsg_q)) { 76 | log_debug(LOG_VVERB, "c %d is active", conn->sd); 77 | return true; 78 | } 79 | 80 | if (conn->rmsg != NULL) { 81 | log_debug(LOG_VVERB, "c %d is active", conn->sd); 82 | return true; 83 | } 84 | 85 | if (conn->smsg != NULL) { 86 | log_debug(LOG_VVERB, "c %d is active", conn->sd); 87 | return true; 88 | } 89 | 90 | log_debug(LOG_VVERB, "c %d is inactive", conn->sd); 91 | 92 | return false; 93 | } 94 | 95 | static void 96 | client_close_stats(struct context *ctx, struct server_pool *pool, err_t err, 97 | unsigned eof) 98 | { 99 | stats_pool_decr(ctx, pool, client_connections); 100 | 101 | if (eof) { 102 | stats_pool_incr(ctx, pool, client_eof); 103 | return; 104 | } 105 | 106 | switch (err) { 107 | case EPIPE: 108 | case ETIMEDOUT: 109 | case ECONNRESET: 110 | case ECONNABORTED: 111 | case ENOTCONN: 112 | case ENETDOWN: 113 | case ENETUNREACH: 114 | case EHOSTDOWN: 115 | case EHOSTUNREACH: 116 | default: 117 | stats_pool_incr(ctx, pool, client_err); 118 | break; 119 | } 120 | } 121 | 122 | void 123 | client_close(struct context *ctx, struct conn *conn) 124 | { 125 | rstatus_t status; 126 | struct msg *msg, *nmsg; /* current and next message */ 127 | 128 | ASSERT(conn->client && !conn->proxy); 129 | 130 | client_close_stats(ctx, conn->owner, conn->err, conn->eof); 131 | 132 | if (conn->sd < 0) { 133 | conn->unref(conn); 134 | conn_put(conn); 135 | return; 136 | } 137 | 138 | msg = conn->rmsg; 139 | if (msg != NULL) { 140 | conn->rmsg = NULL; 141 | 142 | ASSERT(msg->peer == NULL); 143 | ASSERT(msg->request && !msg->done); 144 | 145 | log_debug(LOG_INFO, "close c %d discarding pending req %"PRIu64" len " 146 | "%"PRIu32" type %d", conn->sd, msg->id, msg->mlen, 147 | msg->type); 148 | 149 | req_put(msg); 150 | } 151 | 152 | ASSERT(conn->smsg == NULL); 153 | ASSERT(TAILQ_EMPTY(&conn->imsg_q)); 154 | 155 | for (msg = TAILQ_FIRST(&conn->omsg_q); msg != NULL; msg = nmsg) { 156 | nmsg = TAILQ_NEXT(msg, c_tqe); 157 | 158 | /* dequeue the message (request) from client outq */ 159 | conn->dequeue_outq(ctx, conn, msg); 160 | 161 | if (msg->done) { 162 | log_debug(LOG_INFO, "close c %d discarding %s req %"PRIu64" len " 163 | "%"PRIu32" type %d", conn->sd, 164 | msg->error ? "error": "completed", msg->id, msg->mlen, 165 | msg->type); 166 | req_put(msg); 167 | } else { 168 | msg->swallow = 1; 169 | 170 | ASSERT(msg->request); 171 | ASSERT(msg->peer == NULL); 172 | 173 | log_debug(LOG_INFO, "close c %d schedule swallow of req %"PRIu64" " 174 | "len %"PRIu32" type %d", conn->sd, msg->id, msg->mlen, 175 | msg->type); 176 | } 177 | } 178 | ASSERT(TAILQ_EMPTY(&conn->omsg_q)); 179 | 180 | conn->unref(conn); 181 | 182 | status = close(conn->sd); 183 | if (status < 0) { 184 | log_error("close c %d failed, ignored: %s", conn->sd, strerror(errno)); 185 | } 186 | conn->sd = -1; 187 | 188 | conn_put(conn); 189 | } 190 | -------------------------------------------------------------------------------- /src/nc_client.h: -------------------------------------------------------------------------------- 1 | /* 2 | * twemproxy - A fast and lightweight proxy for memcached protocol. 3 | * Copyright (C) 2011 Twitter, Inc. 4 | * 5 | * Licensed under the Apache License, Version 2.0 (the "License"); 6 | * you may not use this file except in compliance with the License. 7 | * You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | #ifndef _NC_CLIENT_H_ 19 | #define _NC_CLIENT_H_ 20 | 21 | #include 22 | 23 | bool client_active(struct conn *conn); 24 | void client_ref(struct conn *conn, void *owner); 25 | void client_unref(struct conn *conn); 26 | void client_close(struct context *ctx, struct conn *conn); 27 | 28 | #endif 29 | -------------------------------------------------------------------------------- /src/nc_conf.h: -------------------------------------------------------------------------------- 1 | /* 2 | * twemproxy - A fast and lightweight proxy for memcached protocol. 3 | * Copyright (C) 2011 Twitter, Inc. 4 | * 5 | * Licensed under the Apache License, Version 2.0 (the "License"); 6 | * you may not use this file except in compliance with the License. 7 | * You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | #ifndef _NC_CONF_H_ 19 | #define _NC_CONF_H_ 20 | 21 | #include 22 | #include 23 | #include 24 | #include 25 | 26 | #include 27 | #include 28 | 29 | #define CONF_OK (void *) NULL 30 | #define CONF_ERROR (void *) "has an invalid value" 31 | 32 | #define CONF_ROOT_DEPTH 1 33 | #define CONF_MAX_DEPTH CONF_ROOT_DEPTH + 1 34 | 35 | #define CONF_DEFAULT_ARGS 3 36 | #define CONF_DEFAULT_POOL 8 37 | #define CONF_DEFAULT_SERVERS 8 38 | 39 | #define CONF_UNSET_NUM -1 40 | #define CONF_UNSET_PTR NULL 41 | #define CONF_UNSET_HASH (hash_type_t) -1 42 | #define CONF_UNSET_DIST (dist_type_t) -1 43 | 44 | #define CONF_DEFAULT_HASH HASH_FNV1A_64 45 | #define CONF_DEFAULT_DIST DIST_KETAMA 46 | #define CONF_DEFAULT_TIMEOUT -1 47 | #define CONF_DEFAULT_LISTEN_BACKLOG 512 48 | #define CONF_DEFAULT_CLIENT_CONNECTIONS 0 49 | #define CONF_DEFAULT_REDIS false 50 | #define CONF_DEFAULT_REDIS_DB 0 51 | #define CONF_DEFAULT_PRECONNECT false 52 | #define CONF_DEFAULT_AUTO_EJECT_HOSTS false 53 | #define CONF_DEFAULT_SERVER_RETRY_TIMEOUT 30 * 1000 /* in msec */ 54 | #define CONF_DEFAULT_SERVER_FAILURE_LIMIT 2 55 | #define CONF_DEFAULT_SERVER_CONNECTIONS 1 56 | #define CONF_DEFAULT_KETAMA_PORT 11211 57 | #define CONF_DEFAULT_REDIS_MSG_LIMIT 64 * 1024 58 | #define CONF_DEFAULT_WHITELIST_INTERVAL 10 59 | #define CONF_DEFAULT_SLOWLOG false 60 | #define CONF_DEFAULT_SLOWLOG_SLOWER_THAN 50 61 | #define CONF_DEFAULT_TCPKEEPALIVE true 62 | #define CONF_DEFAULT_TCPKEEPIDLE 60 63 | #define CONF_DEFAULT_TCPKEEPINTVAL 10 64 | #define CONF_DEFAULT_TCPKEEPCNT 5 65 | #define CONF_DEFAULT_SERVER_MAX_NODES 1000 66 | 67 | struct conf_listen { 68 | struct string pname; /* listen: as "name:port" */ 69 | struct string name; /* name */ 70 | int port; /* port */ 71 | mode_t perm; /* socket permissions */ 72 | struct sockinfo info; /* listen socket info */ 73 | unsigned valid:1; /* valid? */ 74 | }; 75 | 76 | struct conf_server { 77 | struct string pname; /* server: as "name:port:weight" */ 78 | struct string name; /* name */ 79 | int port; /* port */ 80 | int weight; /* weight */ 81 | struct sockinfo info; /* connect socket info */ 82 | unsigned valid:1; /* valid? */ 83 | }; 84 | 85 | struct conf_pool { 86 | struct string name; /* pool name (root node) */ 87 | struct conf_listen listen; /* listen: */ 88 | hash_type_t hash; /* hash: */ 89 | struct string hash_tag; /* hash_tag: */ 90 | dist_type_t distribution; /* distribution: */ 91 | int timeout; /* timeout: */ 92 | int backlog; /* backlog: */ 93 | int client_connections; /* client_connections: */ 94 | int tcpkeepalive; /* tcpkeepalive: */ 95 | int tcpkeepidle; /* tcpkeepalive idle */ 96 | int tcpkeepintval; /* tcpkeepalive interval */ 97 | int tcpkeepcnt; /* tcpkeepalive count */ 98 | int redis; /* redis: */ 99 | int rediscluster; /* rediscluster */ 100 | struct string redis_auth; /* redis auth password */ 101 | int redis_db; /* redis_db: */ 102 | int preconnect; /* preconnect: */ 103 | int auto_eject_hosts; /* auto_eject_hosts: */ 104 | int server_max_nodes; /* max backend nodes */ 105 | int server_connections; /* server_connections: */ 106 | int server_retry_timeout; /* server_retry_timeout: in msec */ 107 | int server_failure_limit; /* server_failure_limit: */ 108 | int msg_max_length_limit; /* msg max length limit */ 109 | struct string zone; /* avaliablity zone */ 110 | struct string env; /* env type of the pool. online or offline [default:online] */ 111 | struct string whitelist; 112 | int whitelist_interval; 113 | struct array server; /* servers: conf_server[] */ 114 | unsigned valid:1; /* valid? */ 115 | int slowlog; /* slowlog? */ 116 | int slowlog_slower_than; /* slowlog overtime setting */ 117 | }; 118 | 119 | struct conf { 120 | char *fname; /* file name (ref in argv[]) */ 121 | FILE *fh; /* file handle */ 122 | struct array arg; /* string[] (parsed {key, value} pairs) */ 123 | struct array pool; /* conf_pool[] (parsed pools) */ 124 | uint32_t depth; /* parsed tree depth */ 125 | yaml_parser_t parser; /* yaml parser */ 126 | yaml_event_t event; /* yaml event */ 127 | yaml_token_t token; /* yaml token */ 128 | unsigned seq:1; /* sequence? */ 129 | unsigned valid_parser:1; /* valid parser? */ 130 | unsigned valid_event:1; /* valid event? */ 131 | unsigned valid_token:1; /* valid token? */ 132 | unsigned sound:1; /* sound? */ 133 | unsigned parsed:1; /* parsed? */ 134 | unsigned valid:1; /* valid? */ 135 | }; 136 | 137 | struct command { 138 | struct string name; 139 | char *(*set)(struct conf *cf, struct command *cmd, void *data); 140 | int offset; 141 | }; 142 | 143 | #define null_command { null_string, NULL, 0 } 144 | 145 | char *conf_set_string(struct conf *cf, struct command *cmd, void *conf); 146 | char *conf_set_listen(struct conf *cf, struct command *cmd, void *conf); 147 | char *conf_add_server(struct conf *cf, struct command *cmd, void *conf); 148 | char *conf_set_num(struct conf *cf, struct command *cmd, void *conf); 149 | char *conf_set_bool(struct conf *cf, struct command *cmd, void *conf); 150 | char *conf_set_hash(struct conf *cf, struct command *cmd, void *conf); 151 | char *conf_set_distribution(struct conf *cf, struct command *cmd, void *conf); 152 | char *conf_set_hashtag(struct conf *cf, struct command *cmd, void *conf); 153 | 154 | rstatus_t conf_server_each_transform(void *elem, void *data); 155 | rstatus_t conf_pool_each_transform(void *elem, void *data); 156 | 157 | struct conf *conf_create(char *filename); 158 | void conf_destroy(struct conf *cf); 159 | 160 | #endif 161 | -------------------------------------------------------------------------------- /src/nc_connection.h: -------------------------------------------------------------------------------- 1 | /* 2 | * twemproxy - A fast and lightweight proxy for memcached protocol. 3 | * Copyright (C) 2011 Twitter, Inc. 4 | * 5 | * Licensed under the Apache License, Version 2.0 (the "License"); 6 | * you may not use this file except in compliance with the License. 7 | * You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | #ifndef _NC_CONNECTION_H_ 19 | #define _NC_CONNECTION_H_ 20 | 21 | #include 22 | 23 | typedef rstatus_t (*conn_recv_t)(struct context *, struct conn*); 24 | typedef struct msg* (*conn_recv_next_t)(struct context *, struct conn *, bool); 25 | typedef void (*conn_recv_done_t)(struct context *, struct conn *, struct msg *, struct msg *); 26 | 27 | typedef rstatus_t (*conn_send_t)(struct context *, struct conn*); 28 | typedef struct msg* (*conn_send_next_t)(struct context *, struct conn *); 29 | typedef void (*conn_send_done_t)(struct context *, struct conn *, struct msg *); 30 | 31 | typedef void (*conn_close_t)(struct context *, struct conn *); 32 | typedef bool (*conn_active_t)(struct conn *); 33 | 34 | typedef void (*conn_ref_t)(struct conn *, void *); 35 | typedef void (*conn_unref_t)(struct conn *); 36 | 37 | typedef void (*conn_msgq_t)(struct context *, struct conn *, struct msg *); 38 | typedef void (*conn_post_connect_t)(struct context *ctx, struct conn *, struct server *server); 39 | typedef void (*conn_swallow_msg_t)(struct conn *, struct msg *, struct msg *); 40 | 41 | struct conn { 42 | TAILQ_ENTRY(conn) conn_tqe; /* link in server_pool / server / free q */ 43 | void *owner; /* connection owner - server_pool / server */ 44 | 45 | int sd; /* socket descriptor */ 46 | int family; /* socket address family */ 47 | socklen_t addrlen; /* socket length */ 48 | struct sockaddr *addr; /* socket address (ref in server or server_pool) */ 49 | 50 | struct msg_tqh imsg_q; /* incoming request Q */ 51 | struct msg_tqh omsg_q; /* outstanding request Q */ 52 | struct msg *rmsg; /* current message being rcvd */ 53 | struct msg *smsg; /* current message being sent */ 54 | 55 | conn_recv_t recv; /* recv (read) handler */ 56 | conn_recv_next_t recv_next; /* recv next message handler */ 57 | conn_recv_done_t recv_done; /* read done handler */ 58 | conn_send_t send; /* send (write) handler */ 59 | conn_send_next_t send_next; /* write next message handler */ 60 | conn_send_done_t send_done; /* write done handler */ 61 | conn_close_t close; /* close handler */ 62 | conn_active_t active; /* active? handler */ 63 | conn_post_connect_t post_connect; /* post connect handler */ 64 | conn_swallow_msg_t swallow_msg; /* react on messages to be swallowed */ 65 | 66 | conn_ref_t ref; /* connection reference handler */ 67 | conn_unref_t unref; /* connection unreference handler */ 68 | 69 | conn_msgq_t enqueue_inq; /* connection inq msg enqueue handler */ 70 | conn_msgq_t dequeue_inq; /* connection inq msg dequeue handler */ 71 | conn_msgq_t enqueue_outq; /* connection outq msg enqueue handler */ 72 | conn_msgq_t dequeue_outq; /* connection outq msg dequeue handler */ 73 | 74 | size_t recv_bytes; /* received (read) bytes */ 75 | size_t send_bytes; /* sent (written) bytes */ 76 | 77 | uint32_t events; /* connection io events */ 78 | err_t err; /* connection errno */ 79 | unsigned recv_active:1; /* recv active? */ 80 | unsigned recv_ready:1; /* recv ready? */ 81 | unsigned send_active:1; /* send active? */ 82 | unsigned send_ready:1; /* send ready? */ 83 | 84 | unsigned client:1; /* client? or server? */ 85 | unsigned proxy:1; /* proxy? */ 86 | unsigned connecting:1; /* connecting? */ 87 | unsigned connected:1; /* connected? */ 88 | unsigned eof:1; /* eof? aka passive close? */ 89 | unsigned done:1; /* done? aka close? */ 90 | unsigned redis:1; /* redis? */ 91 | unsigned need_auth:1; /* need_auth? */ 92 | }; 93 | 94 | TAILQ_HEAD(conn_tqh, conn); 95 | 96 | struct context *conn_to_ctx(struct conn *conn); 97 | struct conn *conn_get(void *owner, bool client, bool redis); 98 | struct conn *conn_get_proxy(void *owner); 99 | void conn_put(struct conn *conn); 100 | ssize_t conn_recv(struct conn *conn, void *buf, size_t size); 101 | ssize_t conn_sendv(struct conn *conn, struct array *sendv, size_t nsend); 102 | void conn_init(void); 103 | void conn_deinit(void); 104 | uint32_t conn_ncurr_conn(void); 105 | uint64_t conn_ntotal_conn(void); 106 | uint32_t conn_ncurr_cconn(void); 107 | 108 | #endif 109 | -------------------------------------------------------------------------------- /src/nc_core.h: -------------------------------------------------------------------------------- 1 | /* 2 | * twemproxy - A fast and lightweight proxy for memcached protocol. 3 | * Copyright (C) 2011 Twitter, Inc. 4 | * 5 | * Licensed under the Apache License, Version 2.0 (the "License"); 6 | * you may not use this file except in compliance with the License. 7 | * You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | #ifndef _NC_CORE_H_ 19 | #define _NC_CORE_H_ 20 | 21 | #ifdef HAVE_CONFIG_H 22 | # include 23 | #endif 24 | 25 | #ifdef HAVE_DEBUG_LOG 26 | # define NC_DEBUG_LOG 1 27 | #endif 28 | 29 | #ifdef HAVE_ASSERT_PANIC 30 | # define NC_ASSERT_PANIC 1 31 | #endif 32 | 33 | #ifdef HAVE_ASSERT_LOG 34 | # define NC_ASSERT_LOG 1 35 | #endif 36 | 37 | #ifdef HAVE_STATS 38 | # define NC_STATS 1 39 | #else 40 | # define NC_STATS 0 41 | #endif 42 | 43 | #ifdef HAVE_EPOLL 44 | # define NC_HAVE_EPOLL 1 45 | #elif HAVE_KQUEUE 46 | # define NC_HAVE_KQUEUE 1 47 | #elif HAVE_EVENT_PORTS 48 | # define NC_HAVE_EVENT_PORTS 1 49 | #else 50 | # error missing scalable I/O event notification mechanism 51 | #endif 52 | 53 | #ifdef HAVE_LITTLE_ENDIAN 54 | # define NC_LITTLE_ENDIAN 1 55 | #endif 56 | 57 | #ifdef HAVE_BACKTRACE 58 | # define NC_HAVE_BACKTRACE 1 59 | #endif 60 | 61 | #define NC_OK 0 62 | #define NC_ERROR -1 63 | #define NC_EAGAIN -2 64 | #define NC_ENOMEM -3 65 | 66 | /* reserved fds for std streams, log, stats fd, epoll etc. */ 67 | #define RESERVED_FDS 32 68 | 69 | typedef int rstatus_t; /* return type */ 70 | typedef int err_t; /* error type */ 71 | 72 | struct array; 73 | struct string; 74 | struct context; 75 | struct conn; 76 | struct conn_tqh; 77 | struct msg; 78 | struct msg_tqh; 79 | struct server; 80 | struct server_pool; 81 | struct mbuf; 82 | struct mhdr; 83 | struct conf; 84 | struct stats; 85 | struct instance; 86 | struct event_base; 87 | 88 | #include 89 | #include 90 | #include 91 | #include 92 | #include 93 | #include 94 | #include 95 | #include 96 | #include 97 | #include 98 | #include 99 | #include 100 | 101 | #include 102 | #include 103 | #include 104 | #include 105 | #include 106 | #include 107 | 108 | #include 109 | #include 110 | #include 111 | #include 112 | #include 113 | #include 114 | #include 115 | #include 116 | #include 117 | #include 118 | #include 119 | #include 120 | #include 121 | 122 | #define NC_TICK_INTERVAL (1 * 100) /* in msecs */ 123 | 124 | struct context { 125 | uint32_t id; /* unique context id */ 126 | struct conf *cf; /* configuration */ 127 | struct stats *stats; /* stats */ 128 | 129 | struct array pool; /* server_pool[] */ 130 | struct event_base *evb; /* event base */ 131 | int max_timeout; /* max timeout in msec */ 132 | int timeout; /* timeout in msec */ 133 | int64_t next_tick; /* next tick */ 134 | 135 | uint32_t max_nfd; /* max # files */ 136 | uint32_t max_ncconn; /* max # client connections */ 137 | uint32_t max_nsconn; /* max # server connections */ 138 | char *lua_path; /* lua script path copy from instance*/ 139 | }; 140 | 141 | 142 | struct instance { 143 | struct context *ctx; /* active context */ 144 | int log_level; /* log level */ 145 | char *log_filename; /* log filename */ 146 | char *conf_filename; /* configuration filename */ 147 | uint16_t stats_port; /* stats monitoring port */ 148 | int stats_interval; /* stats aggregation interval */ 149 | char *stats_addr; /* stats monitoring addr */ 150 | char hostname[NC_MAXHOSTNAMELEN]; /* hostname */ 151 | size_t mbuf_chunk_size; /* mbuf chunk size */ 152 | pid_t pid; /* process id */ 153 | char *pid_filename; /* pid filename */ 154 | unsigned pidfile:1; /* pid file created? */ 155 | char *lua_path; /* lua script path */ 156 | }; 157 | 158 | struct context *core_start(struct instance *nci); 159 | void core_stop(struct context *ctx); 160 | rstatus_t core_core(void *arg, uint32_t events); 161 | rstatus_t core_loop(struct context *ctx); 162 | 163 | #endif 164 | -------------------------------------------------------------------------------- /src/nc_ipwhitelist.c: -------------------------------------------------------------------------------- 1 | /*************************************************************************** 2 | * 3 | * Copyright (c) 2015 Baidu.com, Inc. All Rights Reserved 4 | * 5 | **************************************************************************/ 6 | 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | static const char *whitelist_file = NULL; 14 | static int check_interval; 15 | 16 | typedef struct _whitelist_t { 17 | struct hash_table *ht; 18 | long mtime; 19 | } whitelist_t; 20 | pthread_t whitelist_thread; 21 | 22 | whitelist_t *whitelist = NULL; 23 | 24 | static long get_mtime(const char* filename) 25 | { 26 | struct stat buf; 27 | if(lstat(filename, &buf)<0){ 28 | return -1; 29 | } 30 | return (long)buf.st_mtime; 31 | } 32 | 33 | whitelist_t* load_whitelist(void) { 34 | FILE *f; 35 | char buf[128]; 36 | char *line; 37 | char *end; 38 | long mtime; 39 | mtime = get_mtime(whitelist_file); 40 | if (mtime < 0) { 41 | return NULL; 42 | } 43 | f = fopen(whitelist_file, "r"); 44 | if (f == NULL) { 45 | log_warn("Open whitelist file %s error, errmsg: %s", whitelist_file, strerror(errno)); 46 | return NULL; 47 | } 48 | 49 | whitelist_t *w = (whitelist_t*) nc_alloc(sizeof(whitelist_t)); 50 | if (w == NULL) { 51 | log_warn("malloc failed"); 52 | return NULL; 53 | } 54 | w->ht = assoc_create_table_default(); 55 | if (w->ht == NULL) { 56 | nc_free(w); 57 | log_warn("hashset create failed"); 58 | return NULL; 59 | } 60 | w->mtime = mtime; 61 | 62 | while(fgets(buf, sizeof(buf), f) != NULL) { 63 | line = buf; 64 | //trim leading whitespace 65 | while (*line == ' ' || *line == '\t') line++; 66 | //skip empty line or comments 67 | if (line[0] == '#' || line[0] == '\r' || line[0] == '\n' || line[0] == 0) continue; 68 | end = line; 69 | //trim trailing characters 70 | while ((*end >= '0' && *end <= '9') || *end == '.') end++; 71 | *end = 0; 72 | 73 | if (strlen(line) == 0) continue; 74 | 75 | //add to ht 76 | if (assoc_set(w->ht, line, strlen(line), (void *)1) != NC_OK) { 77 | free_whitelist(w); 78 | return NULL; 79 | } 80 | log_debug(LOG_DEBUG, "whitelist added for %s", line); 81 | } 82 | fclose(f); 83 | return w; 84 | } 85 | 86 | int is_whitelist_changed(void) { 87 | long mtime = get_mtime(whitelist_file); 88 | static int flag = 0; 89 | if (mtime < 0 && flag == 0) { 90 | log_warn("Get mtime of whitelist file failed, possibly file does not exist"); 91 | flag = 1; 92 | } else if (mtime > 0) { 93 | flag = 0; 94 | } 95 | 96 | /* none -> none: do not need reload */ 97 | if (whitelist == NULL && mtime < 0) { 98 | return 0; 99 | } 100 | /* none -> have: need check */ 101 | if (whitelist == NULL && mtime > 0) { 102 | return 1; 103 | } 104 | /* have -> none: need check */ 105 | if (whitelist != NULL && mtime < 0) { 106 | return 1; 107 | } 108 | 109 | /* have -> have: need check */ 110 | if (whitelist != NULL && mtime > whitelist->mtime) { 111 | return 1; 112 | } 113 | return 0; 114 | } 115 | 116 | void free_whitelist(whitelist_t *w) { 117 | if (!w) return; 118 | assoc_destroy_table(w->ht); 119 | nc_free(w); 120 | } 121 | 122 | int in_whitelist_u(char *ip) { 123 | if (whitelist == NULL) return 1; 124 | if (assoc_find(whitelist->ht, ip, nc_strlen(ip)) != NULL) { 125 | return 1; 126 | } 127 | return 0; 128 | } 129 | 130 | int in_whitelist(struct in_addr in) { 131 | return in_whitelist_u(inet_ntoa(in)); 132 | } 133 | 134 | void *whitelist_loop() { 135 | log_debug(LOG_DEBUG, "whitelist_loop_started"); 136 | for(;;) { 137 | 138 | sleep((unsigned)check_interval); 139 | if (is_whitelist_changed()) { 140 | log_warn("whitelist changed"); 141 | whitelist_t *w = load_whitelist(); 142 | whitelist_t *tmp = whitelist; 143 | whitelist = w; 144 | 145 | /* if whitelist changed, sleep double time */ 146 | sleep((unsigned)check_interval); 147 | free_whitelist(tmp); 148 | } 149 | } 150 | return NULL; 151 | } 152 | 153 | int whitelist_init(const char *filename, int interval) { 154 | int ret; 155 | whitelist_file = filename; 156 | check_interval = interval; 157 | whitelist = load_whitelist(); 158 | ret = pthread_create(&whitelist_thread, NULL, whitelist_loop, NULL); 159 | if (ret) { 160 | log_warn("Error create whitelist check loop thread, errstr: %s", strerror(errno)); 161 | return -1; 162 | } 163 | return 0; 164 | } 165 | 166 | /* vim: set expandtab ts=4 sw=4 sts=4 tw=100: */ 167 | -------------------------------------------------------------------------------- /src/nc_ipwhitelist.h: -------------------------------------------------------------------------------- 1 | /*************************************************************************** 2 | * 3 | * Copyright (c) 2012 Baidu.com, Inc. All Rights Reserved 4 | * 5 | **************************************************************************/ 6 | 7 | 8 | #ifndef _NC_IPWHITELIST_H_ 9 | #define _NC_IPWHITELIST_H_ 10 | 11 | #include 12 | #include 13 | typedef struct _whitelist_t whitelist_t; 14 | whitelist_t* load_whitelist(void); 15 | int is_whitelist_changed(void); 16 | void free_whitelist(whitelist_t *w); 17 | int in_whitelist_u(char *ip); 18 | int in_whitelist(struct in_addr addr); 19 | int whitelist_init(const char *filename, int interval); 20 | 21 | extern whitelist_t *whitelist; 22 | #endif //_NC_IPWHITELIST_H_ 23 | 24 | /* vim: set expandtab ts=4 sw=4 sts=4 tw=100: */ 25 | -------------------------------------------------------------------------------- /src/nc_mbuf.c: -------------------------------------------------------------------------------- 1 | /* 2 | * twemproxy - A fast and lightweight proxy for memcached protocol. 3 | * Copyright (C) 2011 Twitter, Inc. 4 | * 5 | * Licensed under the Apache License, Version 2.0 (the "License"); 6 | * you may not use this file except in compliance with the License. 7 | * You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | #include 19 | #include 20 | 21 | #include 22 | 23 | static uint32_t nfree_mbufq; /* # free mbuf */ 24 | static struct mhdr free_mbufq; /* free mbuf q */ 25 | 26 | static size_t mbuf_chunk_size; /* mbuf chunk size - header + data (const) */ 27 | static size_t mbuf_offset; /* mbuf offset in chunk (const) */ 28 | 29 | static struct mbuf * 30 | _mbuf_get(void) 31 | { 32 | struct mbuf *mbuf; 33 | uint8_t *buf; 34 | 35 | if (!STAILQ_EMPTY(&free_mbufq)) { 36 | ASSERT(nfree_mbufq > 0); 37 | 38 | mbuf = STAILQ_FIRST(&free_mbufq); 39 | nfree_mbufq--; 40 | STAILQ_REMOVE_HEAD(&free_mbufq, next); 41 | 42 | ASSERT(mbuf->magic == MBUF_MAGIC); 43 | goto done; 44 | } 45 | 46 | buf = nc_alloc(mbuf_chunk_size); 47 | if (buf == NULL) { 48 | return NULL; 49 | } 50 | 51 | /* 52 | * mbuf header is at the tail end of the mbuf. This enables us to catch 53 | * buffer overrun early by asserting on the magic value during get or 54 | * put operations 55 | * 56 | * <------------- mbuf_chunk_size -------------> 57 | * +-------------------------------------------+ 58 | * | mbuf data | mbuf header | 59 | * | (mbuf_offset) | (struct mbuf) | 60 | * +-------------------------------------------+ 61 | * ^ ^ ^ ^^ 62 | * | | | || 63 | * \ | | |\ 64 | * mbuf->start \ | | mbuf->end (one byte past valid bound) 65 | * mbuf->pos \ 66 | * \ mbuf 67 | * mbuf->last (one byte past valid byte) 68 | * 69 | */ 70 | mbuf = (struct mbuf *)(buf + mbuf_offset); 71 | mbuf->magic = MBUF_MAGIC; 72 | 73 | done: 74 | STAILQ_NEXT(mbuf, next) = NULL; 75 | return mbuf; 76 | } 77 | 78 | struct mbuf * 79 | mbuf_get(void) 80 | { 81 | struct mbuf *mbuf; 82 | uint8_t *buf; 83 | 84 | mbuf = _mbuf_get(); 85 | if (mbuf == NULL) { 86 | return NULL; 87 | } 88 | 89 | buf = (uint8_t *)mbuf - mbuf_offset; 90 | mbuf->start = buf; 91 | mbuf->end = buf + mbuf_offset; 92 | 93 | ASSERT(mbuf->end - mbuf->start == (int)mbuf_offset); 94 | ASSERT(mbuf->start < mbuf->end); 95 | 96 | mbuf->pos = mbuf->start; 97 | mbuf->last = mbuf->start; 98 | 99 | log_debug(LOG_VVERB, "get mbuf %p", mbuf); 100 | 101 | return mbuf; 102 | } 103 | 104 | static void 105 | mbuf_free(struct mbuf *mbuf) 106 | { 107 | uint8_t *buf; 108 | 109 | log_debug(LOG_VVERB, "put mbuf %p len %d", mbuf, mbuf->last - mbuf->pos); 110 | 111 | ASSERT(STAILQ_NEXT(mbuf, next) == NULL); 112 | ASSERT(mbuf->magic == MBUF_MAGIC); 113 | 114 | buf = (uint8_t *)mbuf - mbuf_offset; 115 | nc_free(buf); 116 | } 117 | 118 | void 119 | mbuf_put(struct mbuf *mbuf) 120 | { 121 | log_debug(LOG_VVERB, "put mbuf %p len %d", mbuf, mbuf->last - mbuf->pos); 122 | 123 | ASSERT(STAILQ_NEXT(mbuf, next) == NULL); 124 | ASSERT(mbuf->magic == MBUF_MAGIC); 125 | 126 | nfree_mbufq++; 127 | STAILQ_INSERT_HEAD(&free_mbufq, mbuf, next); 128 | } 129 | 130 | /* 131 | * Rewind the mbuf by discarding any of the read or unread data that it 132 | * might hold. 133 | */ 134 | void 135 | mbuf_rewind(struct mbuf *mbuf) 136 | { 137 | mbuf->pos = mbuf->start; 138 | mbuf->last = mbuf->start; 139 | } 140 | 141 | /* 142 | * Return the length of data in mbuf. Mbuf cannot contain more than 143 | * 2^32 bytes (4G). 144 | */ 145 | uint32_t 146 | mbuf_length(struct mbuf *mbuf) 147 | { 148 | ASSERT(mbuf->last >= mbuf->pos); 149 | 150 | return (uint32_t)(mbuf->last - mbuf->pos); 151 | } 152 | 153 | /* 154 | * Return the remaining space size for any new data in mbuf. Mbuf cannot 155 | * contain more than 2^32 bytes (4G). 156 | */ 157 | uint32_t 158 | mbuf_size(struct mbuf *mbuf) 159 | { 160 | ASSERT(mbuf->end >= mbuf->last); 161 | 162 | return (uint32_t)(mbuf->end - mbuf->last); 163 | } 164 | 165 | /* 166 | * Return the maximum available space size for data in any mbuf. Mbuf cannot 167 | * contain more than 2^32 bytes (4G). 168 | */ 169 | size_t 170 | mbuf_data_size(void) 171 | { 172 | return mbuf_offset; 173 | } 174 | 175 | /* 176 | * Insert mbuf at the tail of the mhdr Q 177 | */ 178 | void 179 | mbuf_insert(struct mhdr *mhdr, struct mbuf *mbuf) 180 | { 181 | STAILQ_INSERT_TAIL(mhdr, mbuf, next); 182 | log_debug(LOG_VVERB, "insert mbuf %p len %d", mbuf, mbuf->last - mbuf->pos); 183 | } 184 | 185 | /* 186 | * Remove mbuf from the mhdr Q 187 | */ 188 | void 189 | mbuf_remove(struct mhdr *mhdr, struct mbuf *mbuf) 190 | { 191 | log_debug(LOG_VVERB, "remove mbuf %p len %d", mbuf, mbuf->last - mbuf->pos); 192 | 193 | STAILQ_REMOVE(mhdr, mbuf, mbuf, next); 194 | STAILQ_NEXT(mbuf, next) = NULL; 195 | } 196 | 197 | /* 198 | * Copy n bytes from memory area pos to mbuf. 199 | * 200 | * The memory areas should not overlap and the mbuf should have 201 | * enough space for n bytes. 202 | */ 203 | void 204 | mbuf_copy(struct mbuf *mbuf, uint8_t *pos, size_t n) 205 | { 206 | if (n == 0) { 207 | return; 208 | } 209 | 210 | /* mbuf has space for n bytes */ 211 | ASSERT(!mbuf_full(mbuf) && n <= mbuf_size(mbuf)); 212 | 213 | /* no overlapping copy */ 214 | ASSERT(pos < mbuf->start || pos >= mbuf->end); 215 | 216 | nc_memcpy(mbuf->last, pos, n); 217 | mbuf->last += n; 218 | } 219 | 220 | /* 221 | * Split mbuf h into h and t by copying data from h to t. Before 222 | * the copy, we invoke a precopy handler cb that will copy a predefined 223 | * string to the head of t. 224 | * 225 | * Return new mbuf t, if the split was successful. 226 | */ 227 | struct mbuf * 228 | mbuf_split(struct mhdr *h, uint8_t *pos, mbuf_copy_t cb, void *cbarg) 229 | { 230 | struct mbuf *mbuf, *nbuf; 231 | size_t size; 232 | 233 | ASSERT(!STAILQ_EMPTY(h)); 234 | 235 | mbuf = STAILQ_LAST(h, mbuf, next); 236 | ASSERT(pos >= mbuf->pos && pos <= mbuf->last); 237 | 238 | nbuf = mbuf_get(); 239 | if (nbuf == NULL) { 240 | return NULL; 241 | } 242 | 243 | if (cb != NULL) { 244 | /* precopy nbuf */ 245 | cb(nbuf, cbarg); 246 | } 247 | 248 | /* copy data from mbuf to nbuf */ 249 | size = (size_t)(mbuf->last - pos); 250 | mbuf_copy(nbuf, pos, size); 251 | 252 | /* adjust mbuf */ 253 | mbuf->last = pos; 254 | 255 | log_debug(LOG_VVERB, "split into mbuf %p len %"PRIu32" and nbuf %p len " 256 | "%"PRIu32" copied %zu bytes", mbuf, mbuf_length(mbuf), nbuf, 257 | mbuf_length(nbuf), size); 258 | 259 | return nbuf; 260 | } 261 | 262 | void 263 | mbuf_init(struct instance *nci) 264 | { 265 | nfree_mbufq = 0; 266 | STAILQ_INIT(&free_mbufq); 267 | 268 | mbuf_chunk_size = nci->mbuf_chunk_size; 269 | mbuf_offset = mbuf_chunk_size - MBUF_HSIZE; 270 | 271 | log_debug(LOG_DEBUG, "mbuf hsize %d chunk size %zu offset %zu length %zu", 272 | MBUF_HSIZE, mbuf_chunk_size, mbuf_offset, mbuf_offset); 273 | } 274 | 275 | void 276 | mbuf_deinit(void) 277 | { 278 | while (!STAILQ_EMPTY(&free_mbufq)) { 279 | struct mbuf *mbuf = STAILQ_FIRST(&free_mbufq); 280 | mbuf_remove(&free_mbufq, mbuf); 281 | mbuf_free(mbuf); 282 | nfree_mbufq--; 283 | } 284 | ASSERT(nfree_mbufq == 0); 285 | } 286 | -------------------------------------------------------------------------------- /src/nc_mbuf.h: -------------------------------------------------------------------------------- 1 | /* 2 | * twemproxy - A fast and lightweight proxy for memcached protocol. 3 | * Copyright (C) 2011 Twitter, Inc. 4 | * 5 | * Licensed under the Apache License, Version 2.0 (the "License"); 6 | * you may not use this file except in compliance with the License. 7 | * You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | #ifndef _NC_MBUF_H_ 19 | #define _NC_MBUF_H_ 20 | 21 | #include 22 | 23 | typedef void (*mbuf_copy_t)(struct mbuf *, void *); 24 | 25 | struct mbuf { 26 | uint32_t magic; /* mbuf magic (const) */ 27 | STAILQ_ENTRY(mbuf) next; /* next mbuf */ 28 | uint8_t *pos; /* read marker */ 29 | uint8_t *last; /* write marker */ 30 | uint8_t *start; /* start of buffer (const) */ 31 | uint8_t *end; /* end of buffer (const) */ 32 | }; 33 | 34 | STAILQ_HEAD(mhdr, mbuf); 35 | 36 | #define MBUF_MAGIC 0xdeadbeef 37 | #define MBUF_MIN_SIZE 512 38 | #define MBUF_MAX_SIZE 16777216 39 | #define MBUF_SIZE 16384 40 | #define MBUF_HSIZE sizeof(struct mbuf) 41 | 42 | static inline bool 43 | mbuf_empty(struct mbuf *mbuf) 44 | { 45 | return mbuf->pos == mbuf->last ? true : false; 46 | } 47 | 48 | static inline bool 49 | mbuf_full(struct mbuf *mbuf) 50 | { 51 | return mbuf->last == mbuf->end ? true : false; 52 | } 53 | 54 | void mbuf_init(struct instance *nci); 55 | void mbuf_deinit(void); 56 | struct mbuf *mbuf_get(void); 57 | void mbuf_put(struct mbuf *mbuf); 58 | void mbuf_rewind(struct mbuf *mbuf); 59 | uint32_t mbuf_length(struct mbuf *mbuf); 60 | uint32_t mbuf_size(struct mbuf *mbuf); 61 | size_t mbuf_data_size(void); 62 | void mbuf_insert(struct mhdr *mhdr, struct mbuf *mbuf); 63 | void mbuf_remove(struct mhdr *mhdr, struct mbuf *mbuf); 64 | void mbuf_copy(struct mbuf *mbuf, uint8_t *pos, size_t n); 65 | struct mbuf *mbuf_split(struct mhdr *h, uint8_t *pos, mbuf_copy_t cb, void *cbarg); 66 | 67 | #endif 68 | -------------------------------------------------------------------------------- /src/nc_proxy.h: -------------------------------------------------------------------------------- 1 | /* 2 | * twemproxy - A fast and lightweight proxy for memcached protocol. 3 | * Copyright (C) 2011 Twitter, Inc. 4 | * 5 | * Licensed under the Apache License, Version 2.0 (the "License"); 6 | * you may not use this file except in compliance with the License. 7 | * You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | #ifndef _NC_PROXY_H_ 19 | #define _NC_PROXY_H_ 20 | 21 | #include 22 | 23 | void proxy_ref(struct conn *conn, void *owner); 24 | void proxy_unref(struct conn *conn); 25 | void proxy_close(struct context *ctx, struct conn *conn); 26 | 27 | rstatus_t proxy_each_init(void *elem, void *data); 28 | rstatus_t proxy_each_deinit(void *elem, void *data); 29 | 30 | rstatus_t proxy_init(struct context *ctx); 31 | void proxy_deinit(struct context *ctx); 32 | rstatus_t proxy_recv(struct context *ctx, struct conn *conn); 33 | 34 | #endif 35 | -------------------------------------------------------------------------------- /src/nc_rbtree.h: -------------------------------------------------------------------------------- 1 | /* 2 | * twemproxy - A fast and lightweight proxy for memcached protocol. 3 | * Copyright (C) 2011 Twitter, Inc. 4 | * 5 | * Licensed under the Apache License, Version 2.0 (the "License"); 6 | * you may not use this file except in compliance with the License. 7 | * You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | #ifndef _NC_RBTREE_ 19 | #define _NC_RBTREE_ 20 | 21 | #define rbtree_red(_node) ((_node)->color = 1) 22 | #define rbtree_black(_node) ((_node)->color = 0) 23 | #define rbtree_is_red(_node) ((_node)->color) 24 | #define rbtree_is_black(_node) (!rbtree_is_red(_node)) 25 | #define rbtree_copy_color(_n1, _n2) ((_n1)->color = (_n2)->color) 26 | 27 | struct rbnode { 28 | struct rbnode *left; /* left link */ 29 | struct rbnode *right; /* right link */ 30 | struct rbnode *parent; /* parent link */ 31 | int64_t key; /* key for ordering */ 32 | void *data; /* opaque data */ 33 | uint8_t color; /* red | black */ 34 | }; 35 | 36 | struct rbtree { 37 | struct rbnode *root; /* root node */ 38 | struct rbnode *sentinel; /* nil node */ 39 | }; 40 | 41 | void rbtree_node_init(struct rbnode *node); 42 | void rbtree_init(struct rbtree *tree, struct rbnode *node); 43 | struct rbnode *rbtree_min(struct rbtree *tree); 44 | void rbtree_insert(struct rbtree *tree, struct rbnode *node); 45 | void rbtree_delete(struct rbtree *tree, struct rbnode *node); 46 | 47 | #endif 48 | -------------------------------------------------------------------------------- /src/nc_script.h: -------------------------------------------------------------------------------- 1 | #ifndef _NC_SCRIPT_H_ 2 | #define _NC_SCRIPT_H_ 3 | 4 | struct server_pool; 5 | 6 | rstatus_t script_init(struct server_pool *pool, const char *path); 7 | rstatus_t script_call(struct server_pool *pool, const uint8_t *body, int len, const char *func_name); 8 | 9 | /* avoid compiler noise */ 10 | 11 | void ffi_server_table_delete(struct server_pool *pool, const char *name); 12 | 13 | struct server* ffi_server_new(struct server_pool *pool, char *name, char *id, char *ip, int port); 14 | void ffi_server_update_addr(struct server *s, char *name, char *ip, int port); 15 | bool ffi_server_safe_reuse(struct server *server); 16 | 17 | rstatus_t ffi_server_connect(struct server *server); 18 | rstatus_t ffi_server_disconnect(struct server *server); 19 | 20 | struct string ffi_pool_get_zone(struct server_pool *pool); 21 | struct string ffi_pool_get_env(struct server_pool *pool); 22 | void ffi_pool_clear_servers(struct server_pool *pool); 23 | void ffi_pool_add_server(struct server_pool *pool, struct server *server); 24 | void ffi_reset_stats(void); 25 | 26 | struct replicaset* ffi_replicaset_new(void); 27 | void ffi_replicaset_deinit(struct replicaset *rs); 28 | void ffi_replicaset_delete(struct replicaset *rs); 29 | 30 | void ffi_replicaset_set_master(struct replicaset *rs, struct server *server); 31 | void ffi_replicaset_add_tagged_server(struct replicaset *rs, int tag_idx, struct server *server); 32 | 33 | void ffi_slots_set_replicaset(struct server_pool *pool, struct replicaset *rs, int left, int right); 34 | 35 | void ffi_stats_reset(struct server_pool *pool); 36 | 37 | void ffi_server_update_done(struct server_pool *pool); 38 | void ffi_slots_update_done(struct server_pool *pool); 39 | 40 | void ffi_server_hashkey_set(struct server *server, const char *name, int nlen); 41 | void ffi_server_set_local_idc(struct server *s, int local_idc); 42 | 43 | void slots_debug(struct server_pool *pool, int level); 44 | 45 | #ifdef NC_DEBUG_LOG 46 | #define debug_slots(pool, level) slots_debug(pool, level) 47 | #else 48 | #define debug_slots(pool, level) 49 | #endif 50 | 51 | #endif 52 | -------------------------------------------------------------------------------- /src/nc_signal.c: -------------------------------------------------------------------------------- 1 | /* 2 | * twemproxy - A fast and lightweight proxy for memcached protocol. 3 | * Copyright (C) 2011 Twitter, Inc. 4 | * 5 | * Licensed under the Apache License, Version 2.0 (the "License"); 6 | * you may not use this file except in compliance with the License. 7 | * You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | #include 19 | #include 20 | 21 | #include 22 | #include 23 | 24 | static struct signal signals[] = { 25 | { SIGUSR1, "SIGUSR1", 0, signal_handler }, 26 | { SIGUSR2, "SIGUSR2", 0, signal_handler }, 27 | { SIGTTIN, "SIGTTIN", 0, signal_handler }, 28 | { SIGTTOU, "SIGTTOU", 0, signal_handler }, 29 | { SIGHUP, "SIGHUP", 0, signal_handler }, 30 | { SIGINT, "SIGINT", 0, signal_handler }, 31 | { SIGSEGV, "SIGSEGV", (int)SA_RESETHAND, signal_handler }, 32 | { SIGPIPE, "SIGPIPE", 0, SIG_IGN }, 33 | { 0, NULL, 0, NULL } 34 | }; 35 | 36 | rstatus_t 37 | signal_init(void) 38 | { 39 | struct signal *sig; 40 | 41 | for (sig = signals; sig->signo != 0; sig++) { 42 | rstatus_t status; 43 | struct sigaction sa; 44 | 45 | memset(&sa, 0, sizeof(sa)); 46 | sa.sa_handler = sig->handler; 47 | sa.sa_flags = sig->flags; 48 | sigemptyset(&sa.sa_mask); 49 | 50 | status = sigaction(sig->signo, &sa, NULL); 51 | if (status < 0) { 52 | log_error("sigaction(%s) failed: %s", sig->signame, 53 | strerror(errno)); 54 | return NC_ERROR; 55 | } 56 | } 57 | 58 | return NC_OK; 59 | } 60 | 61 | void 62 | signal_deinit(void) 63 | { 64 | } 65 | 66 | void 67 | signal_handler(int signo) 68 | { 69 | struct signal *sig; 70 | void (*action)(void); 71 | char *actionstr; 72 | bool done; 73 | 74 | for (sig = signals; sig->signo != 0; sig++) { 75 | if (sig->signo == signo) { 76 | break; 77 | } 78 | } 79 | ASSERT(sig->signo != 0); 80 | 81 | actionstr = ""; 82 | action = NULL; 83 | done = false; 84 | 85 | switch (signo) { 86 | case SIGUSR1: 87 | actionstr = ", up logbuf exchange period"; 88 | action = logbuf_exchange_period_up; 89 | break; 90 | 91 | case SIGUSR2: 92 | actionstr = ", down logbuf exchange period"; 93 | action = logbuf_exchange_period_down; 94 | break; 95 | 96 | case SIGTTIN: 97 | actionstr = ", up logging level"; 98 | action = log_level_up; 99 | break; 100 | 101 | case SIGTTOU: 102 | actionstr = ", down logging level"; 103 | action = log_level_down; 104 | break; 105 | 106 | case SIGHUP: 107 | actionstr = ", reopening log file"; 108 | action = log_reopen; 109 | break; 110 | 111 | case SIGINT: 112 | done = true; 113 | actionstr = ", exiting"; 114 | break; 115 | 116 | case SIGSEGV: 117 | log_stacktrace(); 118 | actionstr = ", core dumping"; 119 | raise(SIGSEGV); 120 | break; 121 | 122 | default: 123 | NOT_REACHED(); 124 | } 125 | 126 | if (action != NULL) { 127 | action(); 128 | } 129 | 130 | if (done) { 131 | exit(1); 132 | } 133 | } 134 | -------------------------------------------------------------------------------- /src/nc_signal.h: -------------------------------------------------------------------------------- 1 | /* 2 | * twemproxy - A fast and lightweight proxy for memcached protocol. 3 | * Copyright (C) 2011 Twitter, Inc. 4 | * 5 | * Licensed under the Apache License, Version 2.0 (the "License"); 6 | * you may not use this file except in compliance with the License. 7 | * You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | #ifndef _NC_SIGNAL_H_ 19 | #define _NC_SIGNAL_H_ 20 | 21 | #include 22 | 23 | struct signal { 24 | int signo; 25 | char *signame; 26 | int flags; 27 | void (*handler)(int signo); 28 | }; 29 | 30 | rstatus_t signal_init(void); 31 | void signal_deinit(void); 32 | void signal_handler(int signo); 33 | 34 | #endif 35 | -------------------------------------------------------------------------------- /src/nc_string.c: -------------------------------------------------------------------------------- 1 | /* 2 | * twemproxy - A fast and lightweight proxy for memcached protocol. 3 | * Copyright (C) 2011 Twitter, Inc. 4 | * 5 | * Licensed under the Apache License, Version 2.0 (the "License"); 6 | * you may not use this file except in compliance with the License. 7 | * You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | #include 19 | #include 20 | 21 | #include 22 | 23 | /* 24 | * String (struct string) is a sequence of unsigned char objects terminated 25 | * by the null character '\0'. The length of the string is pre-computed and 26 | * made available explicitly as an additional field. This means that we don't 27 | * have to walk the entire character sequence until the null terminating 28 | * character everytime that the length of the String is requested 29 | * 30 | * The only way to create a String is to initialize it using, string_init() 31 | * and duplicate an existing String - string_duplicate() or copy an existing 32 | * raw sequence of character bytes - string_copy(). Such String's must be 33 | * freed using string_deinit() 34 | * 35 | * We can also create String as reference to raw string - string_set_raw() 36 | * or to text string - string_set_text() or string(). Such String don't have 37 | * to be freed. 38 | */ 39 | 40 | void 41 | string_init(struct string *str) 42 | { 43 | str->len = 0; 44 | str->data = NULL; 45 | } 46 | 47 | void 48 | string_deinit(struct string *str) 49 | { 50 | ASSERT((str->len == 0 && str->data == NULL) || 51 | (str->len != 0 && str->data != NULL)); 52 | 53 | if (str->data != NULL) { 54 | nc_free(str->data); 55 | string_init(str); 56 | } 57 | } 58 | 59 | bool 60 | string_empty(const struct string *str) 61 | { 62 | ASSERT((str->len == 0 && str->data == NULL) || 63 | (str->len != 0 && str->data != NULL)); 64 | return str->len == 0 ? true : false; 65 | } 66 | 67 | rstatus_t 68 | string_duplicate(struct string *dst, const struct string *src) 69 | { 70 | ASSERT(dst->len == 0 && dst->data == NULL); 71 | ASSERT(src->len != 0 && src->data != NULL); 72 | 73 | dst->data = nc_strndup(src->data, src->len + 1); 74 | if (dst->data == NULL) { 75 | return NC_ENOMEM; 76 | } 77 | 78 | dst->len = src->len; 79 | dst->data[dst->len] = '\0'; 80 | 81 | return NC_OK; 82 | } 83 | 84 | rstatus_t 85 | string_copy(struct string *dst, const uint8_t *src, uint32_t srclen) 86 | { 87 | ASSERT(dst->len == 0 && dst->data == NULL); 88 | ASSERT(src != NULL && srclen != 0); 89 | 90 | dst->data = nc_strndup(src, srclen + 1); 91 | if (dst->data == NULL) { 92 | return NC_ENOMEM; 93 | } 94 | 95 | dst->len = srclen; 96 | dst->data[dst->len] = '\0'; 97 | 98 | return NC_OK; 99 | } 100 | 101 | int 102 | string_compare(const struct string *s1, const struct string *s2) 103 | { 104 | if (s1->len != s2->len) { 105 | return s1->len > s2->len ? 1 : -1; 106 | } 107 | 108 | return nc_strncmp(s1->data, s2->data, s1->len); 109 | } 110 | 111 | static char * 112 | _safe_utoa(int _base, uint64_t val, char *buf) 113 | { 114 | char hex[] = "0123456789abcdef"; 115 | uint32_t base = (uint32_t) _base; 116 | *buf-- = 0; 117 | do { 118 | *buf-- = hex[val % base]; 119 | } while ((val /= base) != 0); 120 | return buf + 1; 121 | } 122 | 123 | static char * 124 | _safe_itoa(int base, int64_t val, char *buf) 125 | { 126 | char hex[] = "0123456789abcdef"; 127 | char *orig_buf = buf; 128 | const int32_t is_neg = (val < 0); 129 | *buf-- = 0; 130 | 131 | if (is_neg) { 132 | val = -val; 133 | } 134 | if (is_neg && base == 16) { 135 | int ix; 136 | val -= 1; 137 | for (ix = 0; ix < 16; ++ix) 138 | buf[-ix] = '0'; 139 | } 140 | 141 | do { 142 | *buf-- = hex[val % base]; 143 | } while ((val /= base) != 0); 144 | 145 | if (is_neg && base == 10) { 146 | *buf-- = '-'; 147 | } 148 | 149 | if (is_neg && base == 16) { 150 | int ix; 151 | buf = orig_buf - 1; 152 | for (ix = 0; ix < 16; ++ix, --buf) { 153 | /* *INDENT-OFF* */ 154 | switch (*buf) { 155 | case '0': *buf = 'f'; break; 156 | case '1': *buf = 'e'; break; 157 | case '2': *buf = 'd'; break; 158 | case '3': *buf = 'c'; break; 159 | case '4': *buf = 'b'; break; 160 | case '5': *buf = 'a'; break; 161 | case '6': *buf = '9'; break; 162 | case '7': *buf = '8'; break; 163 | case '8': *buf = '7'; break; 164 | case '9': *buf = '6'; break; 165 | case 'a': *buf = '5'; break; 166 | case 'b': *buf = '4'; break; 167 | case 'c': *buf = '3'; break; 168 | case 'd': *buf = '2'; break; 169 | case 'e': *buf = '1'; break; 170 | case 'f': *buf = '0'; break; 171 | } 172 | /* *INDENT-ON* */ 173 | } 174 | } 175 | return buf + 1; 176 | } 177 | 178 | static const char * 179 | _safe_check_longlong(const char *fmt, int32_t * have_longlong) 180 | { 181 | *have_longlong = false; 182 | if (*fmt == 'l') { 183 | fmt++; 184 | if (*fmt != 'l') { 185 | *have_longlong = (sizeof(long) == sizeof(int64_t)); 186 | } else { 187 | fmt++; 188 | *have_longlong = true; 189 | } 190 | } 191 | return fmt; 192 | } 193 | 194 | int 195 | _safe_vsnprintf(char *to, size_t size, const char *format, va_list ap) 196 | { 197 | char *start = to; 198 | char *end = start + size - 1; 199 | for (; *format; ++format) { 200 | int32_t have_longlong = false; 201 | if (*format != '%') { 202 | if (to == end) { /* end of buffer */ 203 | break; 204 | } 205 | *to++ = *format; /* copy ordinary char */ 206 | continue; 207 | } 208 | ++format; /* skip '%' */ 209 | 210 | format = _safe_check_longlong(format, &have_longlong); 211 | 212 | switch (*format) { 213 | case 'd': 214 | case 'i': 215 | case 'u': 216 | case 'x': 217 | case 'p': 218 | { 219 | int64_t ival = 0; 220 | uint64_t uval = 0; 221 | if (*format == 'p') 222 | have_longlong = (sizeof(void *) == sizeof(uint64_t)); 223 | if (have_longlong) { 224 | if (*format == 'u') { 225 | uval = va_arg(ap, uint64_t); 226 | } else { 227 | ival = va_arg(ap, int64_t); 228 | } 229 | } else { 230 | if (*format == 'u') { 231 | uval = va_arg(ap, uint32_t); 232 | } else { 233 | ival = va_arg(ap, int32_t); 234 | } 235 | } 236 | 237 | { 238 | char buff[22]; 239 | const int base = (*format == 'x' || *format == 'p') ? 16 : 10; 240 | 241 | /* *INDENT-OFF* */ 242 | char *val_as_str = (*format == 'u') ? 243 | _safe_utoa(base, uval, &buff[sizeof(buff) - 1]) : 244 | _safe_itoa(base, ival, &buff[sizeof(buff) - 1]); 245 | /* *INDENT-ON* */ 246 | 247 | /* Strip off "ffffffff" if we have 'x' format without 'll' */ 248 | if (*format == 'x' && !have_longlong && ival < 0) { 249 | val_as_str += 8; 250 | } 251 | 252 | while (*val_as_str && to < end) { 253 | *to++ = *val_as_str++; 254 | } 255 | continue; 256 | } 257 | } 258 | case 's': 259 | { 260 | const char *val = va_arg(ap, char *); 261 | if (!val) { 262 | val = "(null)"; 263 | } 264 | while (*val && to < end) { 265 | *to++ = *val++; 266 | } 267 | continue; 268 | } 269 | } 270 | } 271 | *to = 0; 272 | return (int)(to - start); 273 | } 274 | 275 | int 276 | _safe_snprintf(char *to, size_t n, const char *fmt, ...) 277 | { 278 | int result; 279 | va_list args; 280 | va_start(args, fmt); 281 | result = _safe_vsnprintf(to, n, fmt, args); 282 | va_end(args); 283 | return result; 284 | } 285 | -------------------------------------------------------------------------------- /src/nc_string.h: -------------------------------------------------------------------------------- 1 | /* 2 | * twemproxy - A fast and lightweight proxy for memcached protocol. 3 | * Copyright (C) 2011 Twitter, Inc. 4 | * 5 | * Licensed under the Apache License, Version 2.0 (the "License"); 6 | * you may not use this file except in compliance with the License. 7 | * You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | #ifndef _NC_STRING_H_ 19 | #define _NC_STRING_H_ 20 | 21 | #include 22 | #include 23 | #include 24 | 25 | #include 26 | 27 | struct string { 28 | uint32_t len; /* string length */ 29 | uint8_t *data; /* string data */ 30 | }; 31 | 32 | #define string(_str) { sizeof(_str) - 1, (uint8_t *)(_str) } 33 | #define null_string { 0, NULL } 34 | 35 | #define string_set_text(_str, _text) do { \ 36 | (_str)->len = (uint32_t)(sizeof(_text) - 1);\ 37 | (_str)->data = (uint8_t *)(_text); \ 38 | } while (0); 39 | 40 | #define string_set_raw(_str, _raw) do { \ 41 | (_str)->len = (uint32_t)(nc_strlen(_raw)); \ 42 | (_str)->data = (uint8_t *)(_raw); \ 43 | } while (0); 44 | 45 | void string_init(struct string *str); 46 | void string_deinit(struct string *str); 47 | bool string_empty(const struct string *str); 48 | rstatus_t string_duplicate(struct string *dst, const struct string *src); 49 | rstatus_t string_copy(struct string *dst, const uint8_t *src, uint32_t srclen); 50 | int string_compare(const struct string *s1, const struct string *s2); 51 | 52 | /* 53 | * Wrapper around common routines for manipulating C character 54 | * strings 55 | */ 56 | #define nc_memcpy(_d, _c, _n) \ 57 | memcpy(_d, _c, (size_t)(_n)) 58 | 59 | #define nc_memmove(_d, _c, _n) \ 60 | memmove(_d, _c, (size_t)(_n)) 61 | 62 | #define nc_memchr(_d, _c, _n) \ 63 | memchr(_d, _c, (size_t)(_n)) 64 | 65 | #define nc_strlen(_s) \ 66 | strlen((char *)(_s)) 67 | 68 | #define nc_strncmp(_s1, _s2, _n) \ 69 | strncmp((char *)(_s1), (char *)(_s2), (size_t)(_n)) 70 | 71 | #define nc_strchr(_p, _l, _c) \ 72 | _nc_strchr((uint8_t *)(_p), (uint8_t *)(_l), (uint8_t)(_c)) 73 | 74 | #define nc_strrchr(_p, _s, _c) \ 75 | _nc_strrchr((uint8_t *)(_p),(uint8_t *)(_s), (uint8_t)(_c)) 76 | 77 | #define nc_strndup(_s, _n) \ 78 | (uint8_t *)strndup((char *)(_s), (size_t)(_n)); 79 | 80 | /* 81 | * snprintf(s, n, ...) will write at most n - 1 of the characters printed into 82 | * the output string; the nth character then gets the terminating `\0'; if 83 | * the return value is greater than or equal to the n argument, the string 84 | * was too short and some of the printed characters were discarded; the output 85 | * is always null-terminated. 86 | * 87 | * Note that, the return value of snprintf() is always the number of characters 88 | * that would be printed into the output string, assuming n were limited not 89 | * including the trailing `\0' used to end output. 90 | * 91 | * scnprintf(s, n, ...) is same as snprintf() except, it returns the number 92 | * of characters printed into the output string not including the trailing '\0' 93 | */ 94 | #define nc_snprintf(_s, _n, ...) \ 95 | snprintf((char *)(_s), (size_t)(_n), __VA_ARGS__) 96 | 97 | #define nc_scnprintf(_s, _n, ...) \ 98 | _scnprintf((char *)(_s), (size_t)(_n), __VA_ARGS__) 99 | 100 | #define nc_vsnprintf(_s, _n, _f, _a) \ 101 | vsnprintf((char *)(_s), (size_t)(_n), _f, _a) 102 | 103 | #define nc_vscnprintf(_s, _n, _f, _a) \ 104 | _vscnprintf((char *)(_s), (size_t)(_n), _f, _a) 105 | 106 | #define nc_strftime(_s, _n, fmt, tm) \ 107 | (int)strftime((char *)(_s), (size_t)(_n), fmt, tm) 108 | 109 | /* 110 | * A (very) limited version of snprintf 111 | * @param to Destination buffer 112 | * @param n Size of destination buffer 113 | * @param fmt printf() style format string 114 | * @returns Number of bytes written, including terminating '\0' 115 | * Supports 'd' 'i' 'u' 'x' 'p' 's' conversion 116 | * Supports 'l' and 'll' modifiers for integral types 117 | * Does not support any width/precision 118 | * Implemented with simplicity, and async-signal-safety in mind 119 | */ 120 | int _safe_vsnprintf(char *to, size_t size, const char *format, va_list ap); 121 | int _safe_snprintf(char *to, size_t n, const char *fmt, ...); 122 | 123 | #define nc_safe_snprintf(_s, _n, ...) \ 124 | _safe_snprintf((char *)(_s), (size_t)(_n), __VA_ARGS__) 125 | 126 | #define nc_safe_vsnprintf(_s, _n, _f, _a) \ 127 | _safe_vsnprintf((char *)(_s), (size_t)(_n), _f, _a) 128 | 129 | static inline uint8_t * 130 | _nc_strchr(uint8_t *p, uint8_t *last, uint8_t c) 131 | { 132 | while (p < last) { 133 | if (*p == c) { 134 | return p; 135 | } 136 | p++; 137 | } 138 | 139 | return NULL; 140 | } 141 | 142 | static inline uint8_t * 143 | _nc_strrchr(uint8_t *p, uint8_t *start, uint8_t c) 144 | { 145 | while (p >= start) { 146 | if (*p == c) { 147 | return p; 148 | } 149 | p--; 150 | } 151 | 152 | return NULL; 153 | } 154 | 155 | #endif 156 | -------------------------------------------------------------------------------- /src/nc_util.h: -------------------------------------------------------------------------------- 1 | /* 2 | * twemproxy - A fast and lightweight proxy for memcached protocol. 3 | * Copyright (C) 2011 Twitter, Inc. 4 | * 5 | * Licensed under the Apache License, Version 2.0 (the "License"); 6 | * you may not use this file except in compliance with the License. 7 | * You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | #ifndef _NC_UTIL_H_ 19 | #define _NC_UTIL_H_ 20 | 21 | #include 22 | 23 | #define LF (uint8_t) 10 24 | #define CR (uint8_t) 13 25 | #define CRLF "\x0d\x0a" 26 | #define CRLF_LEN (sizeof("\x0d\x0a") - 1) 27 | 28 | #define NELEMS(a) ((sizeof(a)) / sizeof((a)[0])) 29 | 30 | #define MIN(a, b) ((a) < (b) ? (a) : (b)) 31 | #define MAX(a, b) ((a) > (b) ? (a) : (b)) 32 | 33 | #define SQUARE(d) ((d) * (d)) 34 | #define VAR(s, s2, n) (((n) < 2) ? 0.0 : ((s2) - SQUARE(s)/(n)) / ((n) - 1)) 35 | #define STDDEV(s, s2, n) (((n) < 2) ? 0.0 : sqrt(VAR((s), (s2), (n)))) 36 | 37 | #define NC_INET4_ADDRSTRLEN (sizeof("255.255.255.255") - 1) 38 | #define NC_INET6_ADDRSTRLEN \ 39 | (sizeof("ffff:ffff:ffff:ffff:ffff:ffff:255.255.255.255") - 1) 40 | #define NC_INET_ADDRSTRLEN MAX(NC_INET4_ADDRSTRLEN, NC_INET6_ADDRSTRLEN) 41 | #define NC_UNIX_ADDRSTRLEN \ 42 | (sizeof(struct sockaddr_un) - offsetof(struct sockaddr_un, sun_path)) 43 | 44 | #define NC_MAXHOSTNAMELEN 256 45 | 46 | /* 47 | * Length of 1 byte, 2 bytes, 4 bytes, 8 bytes and largest integral 48 | * type (uintmax_t) in ascii, including the null terminator '\0' 49 | * 50 | * From stdint.h, we have: 51 | * # define UINT8_MAX (255) 52 | * # define UINT16_MAX (65535) 53 | * # define UINT32_MAX (4294967295U) 54 | * # define UINT64_MAX (__UINT64_C(18446744073709551615)) 55 | */ 56 | #define NC_UINT8_MAXLEN (3 + 1) 57 | #define NC_UINT16_MAXLEN (5 + 1) 58 | #define NC_UINT32_MAXLEN (10 + 1) 59 | #define NC_UINT64_MAXLEN (20 + 1) 60 | #define NC_UINTMAX_MAXLEN NC_UINT64_MAXLEN 61 | 62 | /* 63 | * Make data 'd' or pointer 'p', n-byte aligned, where n is a power of 2 64 | * of 2. 65 | */ 66 | #define NC_ALIGNMENT sizeof(unsigned long) /* platform word */ 67 | #define NC_ALIGN(d, n) (((d) + (n - 1)) & ~(n - 1)) 68 | #define NC_ALIGN_PTR(p, n) \ 69 | (void *) (((uintptr_t) (p) + ((uintptr_t) n - 1)) & ~((uintptr_t) n - 1)) 70 | 71 | /* 72 | * Wrapper to workaround well known, safe, implicit type conversion when 73 | * invoking system calls. 74 | */ 75 | #define nc_gethostname(_name, _len) \ 76 | gethostname((char *)_name, (size_t)_len) 77 | 78 | #define nc_atoi(_line, _n) \ 79 | _nc_atoi((uint8_t *)_line, (size_t)_n) 80 | 81 | int nc_set_blocking(int sd); 82 | int nc_set_nonblocking(int sd); 83 | int nc_set_reuseaddr(int sd); 84 | int nc_set_tcpnodelay(int sd); 85 | int nc_set_linger(int sd, int timeout); 86 | int nc_set_sndbuf(int sd, int size); 87 | int nc_set_rcvbuf(int sd, int size); 88 | int nc_set_tcpkeepalive(int sd, int idel, int intval, int count); 89 | int nc_get_soerror(int sd); 90 | int nc_get_sndbuf(int sd); 91 | int nc_get_rcvbuf(int sd); 92 | 93 | int _nc_atoi(uint8_t *line, size_t n); 94 | bool nc_valid_port(int n); 95 | 96 | /* 97 | * Memory allocation and free wrappers. 98 | * 99 | * These wrappers enables us to loosely detect double free, dangling 100 | * pointer access and zero-byte alloc. 101 | */ 102 | #define nc_alloc(_s) \ 103 | _nc_alloc((size_t)(_s), __FILE__, __LINE__) 104 | 105 | #define nc_zalloc(_s) \ 106 | _nc_zalloc((size_t)(_s), __FILE__, __LINE__) 107 | 108 | #define nc_calloc(_n, _s) \ 109 | _nc_calloc((size_t)(_n), (size_t)(_s), __FILE__, __LINE__) 110 | 111 | #define nc_realloc(_p, _s) \ 112 | _nc_realloc(_p, (size_t)(_s), __FILE__, __LINE__) 113 | 114 | #define nc_free(_p) do { \ 115 | _nc_free(_p, __FILE__, __LINE__); \ 116 | (_p) = NULL; \ 117 | } while (0) 118 | 119 | void *_nc_alloc(size_t size, const char *name, int line); 120 | void *_nc_zalloc(size_t size, const char *name, int line); 121 | void *_nc_calloc(size_t nmemb, size_t size, const char *name, int line); 122 | void *_nc_realloc(void *ptr, size_t size, const char *name, int line); 123 | void _nc_free(void *ptr, const char *name, int line); 124 | 125 | /* 126 | * Wrappers to send or receive n byte message on a blocking 127 | * socket descriptor. 128 | */ 129 | #define nc_sendn(_s, _b, _n) \ 130 | _nc_sendn(_s, _b, (size_t)(_n)) 131 | 132 | #define nc_recvn(_s, _b, _n) \ 133 | _nc_recvn(_s, _b, (size_t)(_n)) 134 | 135 | /* 136 | * Wrappers to read or write data to/from (multiple) buffers 137 | * to a file or socket descriptor. 138 | */ 139 | #define nc_read(_d, _b, _n) \ 140 | read(_d, _b, (size_t)(_n)) 141 | 142 | #define nc_readv(_d, _b, _n) \ 143 | readv(_d, _b, (int)(_n)) 144 | 145 | #define nc_write(_d, _b, _n) \ 146 | write(_d, _b, (size_t)(_n)) 147 | 148 | #define nc_writev(_d, _b, _n) \ 149 | writev(_d, _b, (int)(_n)) 150 | 151 | ssize_t _nc_sendn(int sd, const void *vptr, size_t n); 152 | ssize_t _nc_recvn(int sd, void *vptr, size_t n); 153 | 154 | /* 155 | * Wrappers for defining custom assert based on whether macro 156 | * NC_ASSERT_PANIC or NC_ASSERT_LOG was defined at the moment 157 | * ASSERT was called. 158 | */ 159 | #ifdef NC_ASSERT_PANIC 160 | 161 | #define ASSERT(_x) do { \ 162 | if (!(_x)) { \ 163 | nc_assert(#_x, __FILE__, __LINE__, 1); \ 164 | } \ 165 | } while (0) 166 | 167 | #define NOT_REACHED() ASSERT(0) 168 | 169 | #elif NC_ASSERT_LOG 170 | 171 | #define ASSERT(_x) do { \ 172 | if (!(_x)) { \ 173 | nc_assert(#_x, __FILE__, __LINE__, 0); \ 174 | } \ 175 | } while (0) 176 | 177 | #define NOT_REACHED() ASSERT(0) 178 | 179 | #else 180 | 181 | #define ASSERT(_x) 182 | 183 | #define NOT_REACHED() 184 | 185 | #endif 186 | 187 | void nc_assert(const char *cond, const char *file, int line, int panic); 188 | void nc_stacktrace(int skip_count); 189 | void nc_stacktrace_fd(int fd); 190 | 191 | int _scnprintf(char *buf, size_t size, const char *fmt, ...); 192 | int _vscnprintf(char *buf, size_t size, const char *fmt, va_list args); 193 | int64_t nc_usec_now(void); 194 | int64_t nc_msec_now(void); 195 | 196 | /* 197 | * Address resolution for internet (ipv4 and ipv6) and unix domain 198 | * socket address. 199 | */ 200 | 201 | struct sockinfo { 202 | int family; /* socket address family */ 203 | socklen_t addrlen; /* socket address length */ 204 | union { 205 | struct sockaddr_in in; /* ipv4 socket address */ 206 | struct sockaddr_in6 in6; /* ipv6 socket address */ 207 | struct sockaddr_un un; /* unix domain address */ 208 | } addr; 209 | }; 210 | 211 | int nc_resolve(struct string *name, int port, struct sockinfo *si); 212 | char *nc_unresolve_addr(struct sockaddr *addr, socklen_t addrlen); 213 | char *nc_unresolve_peer_desc(int sd); 214 | char *nc_unresolve_desc(int sd); 215 | 216 | #endif 217 | -------------------------------------------------------------------------------- /src/proto/Makefile.am: -------------------------------------------------------------------------------- 1 | MAINTAINERCLEANFILES = Makefile.in 2 | 3 | AM_CPPFLAGS = -I $(top_srcdir)/src 4 | 5 | AM_CFLAGS = -Wall -Wshadow 6 | AM_CFLAGS += -Wno-unused-parameter -Wno-unused-value 7 | 8 | noinst_LIBRARIES = libproto.a 9 | 10 | noinst_HEADERS = nc_proto.h 11 | 12 | libproto_a_SOURCES = \ 13 | nc_memcache.c \ 14 | nc_redis.c 15 | --------------------------------------------------------------------------------