├── log └── .gitignore ├── doc └── twemproxy-sentinel-cluster.png ├── bin ├── active ├── gen_conf.py └── deploy.py ├── .gitignore ├── conf ├── sentinel.conf ├── control.sh ├── conf.py └── redis.conf ├── tests └── run.sh ├── lib ├── utils.py └── monitor.py └── README.rst /log/.gitignore: -------------------------------------------------------------------------------- 1 | * 2 | -------------------------------------------------------------------------------- /doc/twemproxy-sentinel-cluster.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UlricQin/redis-mgr/master/doc/twemproxy-sentinel-cluster.png -------------------------------------------------------------------------------- /bin/active: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | eval "$(register-python-argcomplete bin/deploy.py)" 3 | eval "$(register-python-argcomplete ./bin/deploy.py)" 4 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | log/*.log 3 | log/*.conf 4 | list 5 | t 6 | conf/conf_inner.py 7 | script/* 8 | tmp/* 9 | data/* 10 | nohup.out 11 | core 12 | -------------------------------------------------------------------------------- /conf/sentinel.conf: -------------------------------------------------------------------------------- 1 | daemonize yes 2 | port ${port} 3 | logfile ${logfile} 4 | pidfile ${pidfile} 5 | loglevel debug 6 | 7 | # sentinel monitor 8 | #sentinel monitor mymaster 127.0.0.1 6379 2 9 | #sentinel down-after-milliseconds mymaster 60000 10 | #sentinel failover-timeout mymaster 180000 11 | #sentinel can-failover mymaster yes 12 | #sentinel parallel-syncs mymaster 1 13 | 14 | 15 | 16 | -------------------------------------------------------------------------------- /conf/control.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | start() 4 | { 5 | stop 6 | ulimit -c unlimited 7 | 8 | pushd . > /dev/null 9 | 10 | cd `dirname $$0` 11 | ${startcmd} 12 | popd 13 | } 14 | 15 | stop() 16 | { 17 | pkill -f '${runcmd}' 18 | } 19 | 20 | case C"$$1" in 21 | C) 22 | echo "Usage: $$0 {start|stop}" 23 | ;; 24 | Cstart) 25 | start 26 | echo "Done!" 27 | ;; 28 | Cstop) 29 | stop 30 | echo "Done!" 31 | ;; 32 | C*) 33 | echo "Usage: $$0 {start|stop}" 34 | ;; 35 | esac 36 | 37 | 38 | 39 | -------------------------------------------------------------------------------- /tests/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #file : run.sh 3 | #author : ning 4 | #date : 2014-01-06 16:30:21 5 | 6 | CLUSTER='cluster0' 7 | 8 | #test basic 9 | ./bin/deploy.py $CLUSTER deploy 10 | ./bin/deploy.py $CLUSTER start 11 | ./bin/deploy.py $CLUSTER printcmd 12 | ./bin/deploy.py $CLUSTER status 13 | ./bin/deploy.py $CLUSTER log 14 | ./bin/deploy.py $CLUSTER mastercmd 'PING' 15 | ./bin/deploy.py $CLUSTER rdb 16 | 17 | #test bench 18 | ./bin/deploy.py $CLUSTER mlive_qps & 19 | ./bin/deploy.py $CLUSTER nbench 20 | pkill -f './bin/deploy.py' 21 | 22 | #test failover 23 | ./bin/deploy.py $CLUSTER scheduler & 24 | ./bin/deploy.py $CLUSTER randomkill 25 | pkill -f './bin/deploy.py' 26 | 27 | ./bin/deploy.py $CLUSTER stop 28 | 29 | -------------------------------------------------------------------------------- /bin/gen_conf.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | #coding: utf-8 3 | #file : gen_conf.py 4 | #author : ning 5 | #date : 2013-12-20 09:36:42 6 | 7 | import urllib, urllib2 8 | import os, sys 9 | import re, time 10 | import logging 11 | from pcl import common 12 | 13 | from string import Template as T 14 | T.s = T.substitute 15 | 16 | BASEDIR = '/tmp/r' 17 | HOSTS = [ 18 | '127.0.1.1', 19 | '127.0.1.2', 20 | '127.0.1.3', 21 | '127.0.1.4', 22 | ] 23 | MASTER_PER_MACHINE = 2 24 | 25 | # gen the "redis" section 26 | port = 20000 27 | for i in range(len(HOSTS)): 28 | for j in range(MASTER_PER_MACHINE): 29 | slave_port = port + 1000 30 | 31 | m = HOSTS[i] 32 | s = HOSTS[(i+1)%len(HOSTS)] 33 | templete = "('$m:$port', '$BASEDIR/redis-$port'), ('$s:$slave_port', '$BASEDIR/redis-$slave_port')," 34 | print T(templete).s(globals()) 35 | port += 1 36 | 37 | # gen the "nutcracker" section 38 | port = 22000 39 | for i in range(len(HOSTS)): 40 | m = HOSTS[i] 41 | for j in range(MASTER_PER_MACHINE): 42 | xport = port + j 43 | templete = "('$m:$xport', '$BASEDIR/nutcracker-$xport')," 44 | print T(templete).s(globals()) 45 | 46 | -------------------------------------------------------------------------------- /lib/utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | import re 3 | import sys 4 | import time 5 | import copy 6 | import thread 7 | import socket 8 | import threading 9 | import logging 10 | import inspect 11 | import argparse 12 | import telnetlib 13 | import redis 14 | import random 15 | import redis 16 | import json 17 | 18 | from collections import defaultdict 19 | from argparse import RawTextHelpFormatter 20 | 21 | from pcl import common 22 | from pcl import crontab 23 | from string import Template 24 | 25 | # we have to do this here, so that lib/monitor.py can use conf.xxx 26 | # import config in conf/REDIS_DEPLOY_CONFIG.py 27 | if 'REDIS_DEPLOY_CONFIG' not in os.environ: 28 | logging.error('please export REDIS_DEPLOY_CONFIG=conf') 29 | exit(1) 30 | config_name = os.environ['REDIS_DEPLOY_CONFIG'] 31 | conf = __import__(config_name, globals(), locals(), [], 0) #import config_module 32 | 33 | common.system('mkdir -p data tmp', None) 34 | 35 | def my_json_encode(j): 36 | return json.dumps(j, cls=common.MyEncoder) 37 | 38 | def strstr(s1, s2): 39 | return s1.find(s2) != -1 40 | 41 | def lets_sleep(SLEEP_TIME = 0.1): 42 | time.sleep(SLEEP_TIME) 43 | 44 | def TT(template, args): #todo: modify all 45 | return Template(template).substitute(args) 46 | 47 | # vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4 48 | -------------------------------------------------------------------------------- /conf/conf.py: -------------------------------------------------------------------------------- 1 | #coding: utf-8 2 | #the port: role: x, cluster_id: x, instance:xx 3 | # 2 0 x xx 4 | 5 | #redis-master 20xxx 6 | #redis-slave 21xxx 7 | #proxy 22xxx 23xxx(status-port) 8 | #sentinel 29xxx 9 | 10 | #we will generate: 11 | #port 12 | #pidfile 13 | #logfile 14 | #dir 15 | 16 | #path in the deploy machine 17 | BINARYS = { 18 | 'REDIS_SERVER_BINS' : '/home/ning/idning-github/redis/src/redis-*', 19 | 'REDIS_CLI' : '/home/ning/idning-github/redis/src/redis-cli', 20 | 'REDIS_SENTINEL_BINS' : '/home/ning/idning-github/redis/src/redis-sentinel', 21 | 'NUTCRACKER_BINS' : '/home/ning/Desktop/t/nutcracker-0.2.4/output/bin/nutcracker', 22 | } 23 | 24 | RDB_SLEEP_TIME = 1 25 | 26 | #optional 27 | REDIS_MONITOR_EXTRA = { 28 | 'used_cpu_user': (0, 50), 29 | } 30 | 31 | #optional 32 | NUTCRACKER_MONITOR_EXTRA = { 33 | 'client_connections': (0, 10), 34 | "forward_error_INC": (0, 1000), # in every minute 35 | "client_err_INC": (0, 1000), # in every minute 36 | 'in_queue': (0, 10), 37 | 'out_queue': (0, 10), 38 | } 39 | 40 | cluster0 = { 41 | 'cluster_name': 'cluster0', 42 | 'user': 'ning', 43 | 'sentinel':[ 44 | ('127.0.0.5:29001', '/tmp/r/sentinel-29001'), 45 | ('127.0.0.5:29002', '/tmp/r/sentinel-29002'), 46 | ('127.0.0.5:29003', '/tmp/r/sentinel-29003'), 47 | ], 48 | 'redis': [ 49 | # master(host:port, install path) , slave(host:port, install path) 50 | ('127.0.0.5:20000', '/tmp/r/redis-20000'), ('127.0.0.5:21000', '/tmp/r/redis-21000'), 51 | ('127.0.0.5:20001', '/tmp/r/redis-20001'), ('127.0.0.5:21001', '/tmp/r/redis-21001'), 52 | ('127.0.0.5:20002', '/tmp/r/redis-20002'), ('127.0.0.5:21002', '/tmp/r/redis-21002'), 53 | ('127.0.0.5:20003', '/tmp/r/redis-20003'), ('127.0.0.5:21003', '/tmp/r/redis-21003'), 54 | ], 55 | 'nutcracker': [ 56 | ('127.0.0.5:22000', '/tmp/r/nutcracker-22000'), 57 | ('127.0.0.5:22001', '/tmp/r/nutcracker-22001'), 58 | ('127.0.0.5:22002', '/tmp/r/nutcracker-22002'), 59 | ], 60 | } 61 | 62 | 63 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | deploy.py 2 | ========= 3 | 4 | this script will deploy a redis cluster in ``10 minutes`` with: 5 | 6 | - redis 7 | - redis-sentinel 8 | - twemproxy 9 | 10 | you can deploy/start/stop/run_rediscmd/get status/reconfig proxy ... 11 | 12 | config 13 | ------ 14 | 15 | :: 16 | 17 | cluster0 = { 18 | 'cluster_name': 'cluster0', 19 | 'user': 'ning', 20 | 'sentinel':[ 21 | ('127.0.0.5:29001', '/tmp/r/sentinel-29001'), 22 | ('127.0.0.5:29002', '/tmp/r/sentinel-29002'), 23 | ('127.0.0.5:29003', '/tmp/r/sentinel-29003'), 24 | ], 25 | 'redis': [ 26 | # master(host:port, install path) , slave(host:port, install path) 27 | ('127.0.0.5:20000', '/tmp/r/redis-20000'), ('127.0.0.5:21000', '/tmp/r/redis-21000'), 28 | ('127.0.0.5:20001', '/tmp/r/redis-20001'), ('127.0.0.5:21001', '/tmp/r/redis-21001'), 29 | ('127.0.0.5:20002', '/tmp/r/redis-20002'), ('127.0.0.5:21002', '/tmp/r/redis-21002'), 30 | ('127.0.0.5:20003', '/tmp/r/redis-20003'), ('127.0.0.5:21003', '/tmp/r/redis-21003'), 31 | ], 32 | 'nutcracker': [ 33 | ('127.0.0.5:22000', '/tmp/r/nutcracker-22000'), 34 | ('127.0.0.5:22001', '/tmp/r/nutcracker-22001'), 35 | ('127.0.0.5:22002', '/tmp/r/nutcracker-22002'), 36 | ], 37 | } 38 | 39 | this will gen ``sentinel`` config:: 40 | 41 | sentinel monitor cluster0-20000 127.0.0.5 20000 2 42 | sentinel down-after-milliseconds cluster0-20000 60000 43 | sentinel failover-timeout cluster0-20000 180000 44 | sentinel parallel-syncs cluster0-20000 1 45 | 46 | sentinel monitor cluster0-20001 127.0.0.5 20001 2 47 | sentinel down-after-milliseconds cluster0-20001 60000 48 | sentinel failover-timeout cluster0-20001 180000 49 | sentinel parallel-syncs cluster0-20001 1 50 | 51 | and ``twemproxy`` config:: 52 | 53 | cluster0: 54 | listen: 127.0.0.5:22000 55 | hash: fnv1a_64 56 | distribution: modula 57 | preconnect: true 58 | auto_eject_hosts: false 59 | redis: true 60 | backlog: 512 61 | client_connections: 0 62 | server_connections: 1 63 | server_retry_timeout: 2000 64 | server_failure_limit: 2 65 | servers: 66 | - 127.0.0.5:20000:1 cluster0-20000 67 | - 127.0.0.5:20001:1 cluster0-20001 68 | 69 | the name ``cluster0-20000`` is named by the orig master, 70 | if slave use a different port, the server ``host:port`` of ``cluster0-20000`` can be ``127.0.0.5:20000`` or ``127.0.0.5:21000`` 71 | 72 | usage 73 | ----- 74 | 75 | choose your config filename:: 76 | 77 | export REDIS_DEPLOY_CONFIG=conf && . bin/active 78 | 79 | :: 80 | 81 | $ ./bin/deploy.py -h 82 | usage: deploy.py [-h] [-v] [-o LOGFILE] clustername op [cmd] 83 | 84 | positional arguments: 85 | clustername cluster target 86 | op aof_rewrite : None 87 | deploy : deploy the binarys and config file (redis/sentinel/nutcracker) in this cluster 88 | kill : kill all instance(redis/sentinel/nutcracker) in this cluster 89 | log : show log of all instance(redis/sentinel/nutcracker) in this cluster 90 | master_memory : show used_memory_human:1.53M 91 | master_qps : instantaneous_ops_per_sec:4 92 | mastercmd cmd : run redis command against all redis Master instance, like 'INFO, GET xxxx' 93 | monitor : monitor status of the cluster 94 | printcmd : print the start/stop cmd of instance 95 | rdb : do rdb in all redis instance 96 | reconfig_proxy : None 97 | rediscmd cmd : run redis command against all redis instance, like 'INFO, GET xxxx' 98 | start : start all instance(redis/sentinel/nutcracker) in this cluster 99 | status : get status of all instance(redis/sentinel/nutcracker) in this cluster 100 | stop : stop all instance(redis/sentinel/nutcracker) in this cluster 101 | cmd the redis/ssh cmd like "INFO" 102 | 103 | start cluster:: 104 | 105 | $ ./bin/deploy.py cluster0 deploy 106 | 107 | $ ./bin/deploy.py cluster0 start 108 | 2013-12-26 14:47:47,385 [MainThread] [NOTICE] start redis 109 | 2013-12-26 14:47:47,622 [MainThread] [INFO] [redis:127.0.0.5:20000] start ok in 0.23 seconds 110 | 2013-12-26 14:47:47,848 [MainThread] [INFO] [redis:127.0.0.5:21000] start ok in 0.22 seconds 111 | 2013-12-26 14:47:48,099 [MainThread] [INFO] [redis:127.0.0.5:20001] start ok in 0.24 seconds 112 | 2013-12-26 14:47:48,369 [MainThread] [INFO] [redis:127.0.0.5:21001] start ok in 0.27 seconds 113 | 2013-12-26 14:47:50,788 [MainThread] [NOTICE] start sentinel 114 | 2013-12-26 14:47:51,186 [MainThread] [INFO] [sentinel:127.0.0.5:29001] start ok in 0.39 seconds 115 | 2013-12-26 14:47:51,452 [MainThread] [INFO] [sentinel:127.0.0.5:29002] start ok in 0.26 seconds 116 | 2013-12-26 14:47:51,820 [MainThread] [INFO] [sentinel:127.0.0.5:29003] start ok in 0.35 seconds 117 | 2013-12-26 14:47:51,820 [MainThread] [NOTICE] start nutcracker 118 | 2013-12-26 14:47:52,082 [MainThread] [INFO] [nutcracker:127.0.0.5:22000] start ok in 0.26 seconds 119 | 2013-12-26 14:47:52,364 [MainThread] [INFO] [nutcracker:127.0.0.5:22001] start ok in 0.28 seconds 120 | 2013-12-26 14:47:52,573 [MainThread] [INFO] [nutcracker:127.0.0.5:22002] start ok in 0.21 seconds 121 | 2013-12-26 14:47:52,573 [MainThread] [NOTICE] setup master->slave 122 | 2013-12-26 14:47:52,580 [MainThread] [INFO] setup [redis:127.0.0.5:20000]->[redis:127.0.0.5:21000] 123 | 2013-12-26 14:47:52,580 [MainThread] [INFO] [redis:127.0.0.5:21000] /home/ning/idning-github/redis/src/redis-cli -h 127.0.0.5 -p 21000 SLAVEOF 127.0.0.5 20000 124 | OK 125 | ... 126 | 127 | run cmd on each master:: 128 | 129 | $ ./bin/deploy.py cluster0 mastercmd 'get "hello"' 130 | 2013-12-24 13:51:39,748 [MainThread] [INFO] [RedisServer:127.0.0.5:20000]: get "hello" 131 | [RedisServer:127.0.0.5:20000] xxxxx 132 | 2013-12-24 13:51:39,752 [MainThread] [INFO] [RedisServer:127.0.0.5:20001]: get "hello" 133 | [RedisServer:127.0.0.5:20001] 134 | 2013-12-24 13:51:39,756 [MainThread] [INFO] [RedisServer:127.0.0.5:20002]: get "hello" 135 | [RedisServer:127.0.0.5:20002] 136 | 2013-12-24 13:51:39,760 [MainThread] [INFO] [RedisServer:127.0.0.5:20003]: get "hello" 137 | [RedisServer:127.0.0.5:20003] world 138 | 139 | dump rdb:: 140 | 141 | $ ./bin/deploy.py cluster0 rdb 142 | 143 | monitor qps/memory:: 144 | 145 | $ ./bin/deploy.py cluster0 mq 146 | 2013-12-24 14:21:05,841 [MainThread] [INFO] start running: ./bin/deploy.py -v cluster0 mq 147 | 2013-12-24 14:21:05,842 [MainThread] [INFO] Namespace(cmd=None, logfile='log/deploy.log', op='mq', target='cluster0', verbose=1) 148 | 20000 20001 20002 20003 149 | 6 5 5 6 150 | 6 6 5 6 151 | 6 6 5 6 152 | 4741 6 6 6 153 | 33106 5 5 6 154 | 46639 8 7 7 155 | 42265 6 5 7 156 | 157 | run benchmark:: 158 | 159 | $ ./bin/deploy.py cluster_offline0 bench 160 | $ ./bin/deploy.py cluster_offline0 mbench 161 | 162 | modify config:: 163 | 164 | $ ./bin/deploy.py cluster_offline0 mastercmd ' CONFIG GET save' -v 165 | $ ./bin/deploy.py cluster_offline0 mastercmd 'CONFIG SET save "10000 1000000"' -v 166 | 167 | enable auto-complete 168 | ==================== 169 | :: 170 | 171 | export REDIS_DEPLOY_CONFIG=conf 172 | 173 | pip install argcomplete 174 | $ . ./bin/active 175 | 176 | ning@ning-laptop ~/idning-github/redis-mgr$ ./bin/deploy.py cluster0 r 177 | randomkill rdb reconfigproxy rediscmd 178 | 179 | 180 | gen_conf 181 | ======== 182 | 183 | use the config:: 184 | 185 | BASEDIR = '/tmp/r' 186 | HOSTS = [ 187 | '127.0.1.1', 188 | '127.0.1.2', 189 | '127.0.1.3', 190 | '127.0.1.4', 191 | ] 192 | MASTER_PER_MACHINE = 2 193 | SLAVE_PORT_INCREASE = 10000 194 | 195 | it will gen the deploy.py config like this: 196 | 197 | .. image:: doc/twemproxy-sentinel-cluster.png 198 | 199 | Dependency 200 | ========== 201 | 202 | - pcl: https://github.com/idning/pcl 203 | - redis-py: https://github.com/andymccurdy/redis-py 204 | - argcomplete (optional): https://github.com/kislyuk/argcomplete 205 | 206 | Authors 207 | ======= 208 | 209 | - @idning 210 | - @cen-li 211 | 212 | TODO 213 | ==== 214 | 215 | 1. schedular for many clusters, we will need it! 216 | 2. SLOW LOG monitor 217 | 3. #live monitor for nutcracker 218 | 4. #nc to get nutcracker status will fail in background:: 219 | 220 | nohup ./bin/deploy.py cluster0 scheduler & 221 | 222 | we use telnetlib instead 223 | 224 | 225 | https://github.com/idning/redis-mgr 226 | 227 | -------------------------------------------------------------------------------- /lib/monitor.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | #coding: utf-8 3 | 4 | from utils import * 5 | 6 | PWD = os.path.dirname(os.path.realpath(__file__)) 7 | 8 | class BenchThread(threading.Thread): 9 | def __init__ (self, redis, cmd): 10 | threading.Thread.__init__(self) 11 | self.redis = redis 12 | self.cmd = cmd 13 | def run(self): 14 | self.redis._bench(self.cmd) 15 | 16 | class Benchmark(): 17 | def nbench(self): 18 | ''' 19 | run benchmark against nutcracker 20 | ''' 21 | for s in self.all_nutcracker: 22 | cmd = TT('bin/redis-benchmark --csv -h $host -p $port -r 100000 -t set,get -n 10000000 -c 100 ', s.args) 23 | BenchThread(random.choice(self._active_masters()), cmd).start() 24 | 25 | def mbench(self): 26 | ''' 27 | run benchmark against redis master 28 | ''' 29 | for s in self._active_masters(): 30 | cmd = TT('bin/redis-benchmark --csv -h $host -p $port -r 100000 -t set,get -n 10000000 -c 100 ', s.args) 31 | BenchThread(s, cmd).start() 32 | 33 | def stopbench(self): 34 | ''' 35 | you will need this for stop benchmark 36 | ''' 37 | return self.sshcmd("pkill -f 'bin/redis-benchmark'") 38 | 39 | class Monitor(): 40 | def _live_nutcracker(self, what, format_func = lambda x:x): 41 | 42 | for i in xrange(1000*1000): 43 | if i%10 == 0: 44 | self.all_nutcracker 45 | header = common.to_blue(' '.join(['%5s' % s.args['port'] for s in self.all_nutcracker])) 46 | print header 47 | 48 | def get_v(s): 49 | info = s._info_dict()[self.args['cluster_name']] 50 | if what not in info: 51 | return '-' 52 | return format_func(info[what]) 53 | 54 | print ' '.join([ '%5s' % get_v(s) for s in self.all_nutcracker]) + '\t' + common.format_time(None, '%X') 55 | 56 | time.sleep(1) 57 | 58 | def _live_redis(self, what, format_func = lambda x:x): 59 | masters = self._active_masters() 60 | for i in xrange(1000*1000): 61 | if i%10 == 0: 62 | old_masters = masters 63 | masters = self._active_masters() 64 | 65 | old_masters_list = [str(m) for m in old_masters] 66 | masters_list = [str(m) for m in masters] 67 | 68 | if masters_list == old_masters_list: 69 | header = common.to_blue(' '.join(['%5s' % s.args['port'] for s in masters])) 70 | else: 71 | header = common.to_red(' '.join(['%5s' % s.args['port'] for s in masters])) 72 | print header 73 | def get_v(s): 74 | info = s._info_dict() 75 | if what not in info: 76 | return '-' 77 | return format_func(info[what]) 78 | print ' '.join([ '%5s' % get_v(s) for s in masters]) + '\t' + common.format_time(None, '%X') 79 | 80 | time.sleep(1) 81 | 82 | def mlive_mem(self): 83 | ''' 84 | monitor used_memory_human:1.53M of master 85 | ''' 86 | def format(s): 87 | return re.sub('\.\d+', '', s) # 221.53M=>221M 88 | self._live_redis('used_memory_human', format) 89 | 90 | def mlive_qps(self): 91 | ''' 92 | monitor instantaneous_ops_per_sec of master 93 | ''' 94 | self._live_redis('instantaneous_ops_per_sec') 95 | 96 | def nlive_request(self): 97 | ''' 98 | monitor nutcracker requests/s 99 | ''' 100 | self._live_nutcracker('requests_INC') 101 | 102 | def nlive_forward_error(self): 103 | ''' 104 | monitor nutcracker forward_error/s 105 | ''' 106 | self._live_nutcracker('forward_error_INC') 107 | 108 | def nlive_inqueue(self): 109 | ''' 110 | monitor nutcracker forward_error/s 111 | ''' 112 | self._live_nutcracker('in_queue') 113 | 114 | def nlive_outqueue(self): 115 | ''' 116 | monitor nutcracker forward_error/s 117 | ''' 118 | self._live_nutcracker('out_queue') 119 | 120 | def _monitor(self): 121 | ''' 122 | - redis 123 | - connected_clients 124 | - mem 125 | - rdb_last_bgsave_time_sec:0 126 | - aof_last_rewrite_time_sec:0 127 | - latest_fork_usec 128 | - slow log 129 | - hitrate 130 | - master_link_status:down 131 | - nutcracker 132 | - all config of nutcracker is the same 133 | - forward_error 134 | - server_err 135 | - in_queue/out_queue 136 | 137 | save this to a file , in one line: 138 | { 139 | 'ts': xxx, 140 | 'timestr': xxx, 141 | 'infos': { 142 | '[redis:host:port]': {info} 143 | '[redis:host:port]': {info} 144 | '[nutcracker:host:port]': {info} 145 | }, 146 | } 147 | ''' 148 | now = time.time() 149 | 150 | infos = {} 151 | for r in self.all_redis + self.all_sentinel + self.all_nutcracker: 152 | infos[str(r)] = r._info_dict() 153 | self._check_warning(infos) 154 | 155 | ret = { 156 | 'ts': now, 157 | 'timestr': common.format_time_to_min(now), 158 | 'infos': infos, 159 | } 160 | 161 | DIR = os.path.join(PWD, '../data') 162 | STAT_LOG = os.path.join(DIR, 'statlog.%s' % common.format_time(now, '%Y%m%d%H')) 163 | common.system('mkdir -p %s' % DIR, None) 164 | 165 | fout = file(STAT_LOG, 'a+') 166 | print >> fout, my_json_encode(ret) 167 | fout.close() 168 | timeused = time.time() - now 169 | logging.notice("monitor @ ts: %s, timeused: %.2fs" % (common.format_time_to_min(now), timeused)) 170 | 171 | def _check_warning(self, infos): 172 | def match(val, expr): 173 | if type(expr) == set: 174 | return val in expr 175 | _min, _max = expr 176 | return _min <= float(val) <= _max 177 | 178 | def check_redis(node, info): 179 | if not info or 'uptime_in_seconds' not in info: 180 | logging.warn('%s is down' % node) 181 | now = time.time() 182 | redis_spec = { 183 | 'connected_clients': (0, 1000), 184 | 'used_memory_peak' : (0, 5*(2**30)), 185 | 'rdb_last_bgsave_time_sec': (0, 1), 186 | 'aof_last_rewrite_time_sec': (0, 1), 187 | 'latest_fork_usec': (0, 100*1000), #100ms 188 | 'master_link_status': set(['up']), 189 | 'rdb_last_bgsave_status': set(['ok']), 190 | 'rdb_last_save_time': (now-25*60*60, now), 191 | #- hit_rate 192 | #- slow log 193 | } 194 | if 'REDIS_MONITOR_EXTRA' in dir(conf): 195 | redis_spec.update(conf.REDIS_MONITOR_EXTRA) 196 | 197 | for k, expr in redis_spec.items(): 198 | if k in info and not match(info[k], expr): 199 | logging.warn('%s.%s is:\t %s, not in %s' % (node, k, info[k], expr)) 200 | 201 | 202 | def check_nutcracker(node, info): 203 | ''' 204 | see NutCracker._info_dict() for fields 205 | ''' 206 | if not info or 'uptime' not in info: 207 | logging.warn('%s is down' % node) 208 | 209 | nutcracker_cluster_spec = { 210 | 'client_connections': (0, 10000), 211 | "forward_error_INC": (0, 1000), # in every minute 212 | "client_err_INC": (0, 1000), # in every minute 213 | 'in_queue': (0, 1000), 214 | 'out_queue': (0, 1000), 215 | } 216 | if 'NUTCRACKER_MONITOR_EXTRA' in dir(conf): 217 | nutcracker_cluster_spec.update(conf.NUTCRACKER_MONITOR_EXTRA) 218 | 219 | #got info of this cluster 220 | info = info[self.args['cluster_name']] 221 | for k, expr in nutcracker_cluster_spec.items(): 222 | if k in info and not match(info[k], expr): 223 | logging.warn('%s.%s is:\t %s, not in %s' % (node, k, info[k], expr)) 224 | 225 | 226 | for node, info in infos.items(): 227 | if strstr(node, 'redis'): 228 | check_redis(node, info) 229 | if strstr(node, 'nutcracker'): 230 | check_nutcracker(node, info) 231 | 232 | def monitor(self): 233 | ''' 234 | a long time running monitor task, write WARN log on bad things happend 235 | ''' 236 | while True: 237 | self._monitor() 238 | time.sleep(60) 239 | 240 | def scheduler(self): 241 | ''' 242 | start following threads: 243 | - failover 244 | - cron of monitor 245 | - cron of rdb 246 | = graph web server 247 | ''' 248 | thread.start_new_thread(self.failover, ()) 249 | 250 | cron = crontab.Cron() 251 | cron.add('* * * * *' , self._monitor) # every minute 252 | cron.add('0 3 * * *' , self.rdb, use_thread=True) # every day 253 | cron.add('0 5 * * *' , self.aof_rewrite, use_thread=True) # every day 254 | cron.run() 255 | 256 | # vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4 257 | -------------------------------------------------------------------------------- /bin/deploy.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | #coding: utf-8 3 | 4 | import os 5 | import sys 6 | 7 | PWD = os.path.dirname(os.path.realpath(__file__)) 8 | WORKDIR = os.path.join(PWD, '../') 9 | sys.path.append(os.path.join(WORKDIR, 'lib/')) 10 | sys.path.append(os.path.join(WORKDIR, 'conf/')) 11 | 12 | from utils import * 13 | from monitor import Monitor, Benchmark 14 | 15 | class Base: 16 | ''' 17 | the sub class should implement: _alive, _pre_deploy, status, and init self.args 18 | ''' 19 | def __init__(self, name, user, host_port, path): 20 | self.args = { 21 | 'name' : name, 22 | 'user' : user, 23 | 'host' : socket.gethostbyname(host_port.split(':')[0]), 24 | 'port' : int(host_port.split(':')[1]), 25 | 'path' : path, 26 | 27 | 'localdir' : '', #files to deploy 28 | 29 | 'startcmd' : '', #startcmd and runcmd will used to generate the control script 30 | 'runcmd' : '', 31 | 'logfile' : '', 32 | } 33 | 34 | def __str__(self): 35 | return TT('[$name:$host:$port]', self.args) 36 | 37 | def deploy(self): 38 | logging.info('deploy %s' % self) 39 | self.args['localdir'] = TT('tmp/$name-$host-$port', self.args) 40 | self._run(TT('mkdir -p $localdir/bin && mkdir -p $localdir/conf && mkdir -p $localdir/log && mkdir -p $localdir/data ', self.args)) 41 | 42 | self._pre_deploy() 43 | self._gen_control_script() 44 | self._init_dir() 45 | 46 | cmd = TT('rsync -ravP $localdir/ $user@$host:$path 1>/dev/null 2>/dev/null', self.args) 47 | self._run(cmd) 48 | 49 | def _gen_control_script(self): 50 | content = file('conf/control.sh').read() 51 | content = TT(content, self.args) 52 | 53 | control_filename = TT('${localdir}/${name}_control', self.args) 54 | 55 | fout = open(control_filename, 'w+') 56 | fout.write(content) 57 | fout.close() 58 | os.chmod(control_filename, 0755) 59 | 60 | def start(self): 61 | if self._alive(): 62 | logging.warn('%s already running' %(self) ) 63 | return 64 | 65 | logging.debug('starting %s' % self) 66 | t1 = time.time() 67 | sleeptime = .1 68 | self._run(self._remote_start_cmd()) 69 | 70 | while not self._alive(): 71 | lets_sleep(sleeptime) 72 | if sleeptime < 5: 73 | sleeptime *= 2 74 | else: 75 | sleeptime = 5 76 | logging.warn('%s still not alive' % self) 77 | 78 | t2 = time.time() 79 | logging.info('%s start ok in %.2f seconds' %(self, t2-t1) ) 80 | 81 | def stop(self): 82 | if not self._alive(): 83 | logging.warn('%s already stop' %(self) ) 84 | return 85 | 86 | self._run(self._remote_stop_cmd()) 87 | t1 = time.time() 88 | while self._alive(): 89 | lets_sleep() 90 | t2 = time.time() 91 | logging.info('%s stop ok in %.2f seconds' %(self, t2-t1) ) 92 | 93 | def printcmd(self): 94 | print common.to_blue(self), self._remote_start_cmd() 95 | 96 | def status(self): 97 | logging.warn("status: not implement") 98 | 99 | def log(self): 100 | cmd = TT('tail $logfile', self.args) 101 | logging.info('log of %s' % self) 102 | print self._run(self._remote_cmd(cmd)) 103 | 104 | def _bench(self, cmd): 105 | ''' 106 | run a benchmark cmd on this remote machine 107 | ''' 108 | remote_cmd = self._remote_cmd(cmd) 109 | logging.info(remote_cmd) 110 | #common.system_bg(remote_cmd, logging.debug) 111 | print self._run(remote_cmd) 112 | 113 | def _alive(self): 114 | logging.warn("_alive: not implement") 115 | 116 | def _init_dir(self): 117 | raw_cmd = TT('mkdir -p $path', self.args) 118 | self._run(self._remote_cmd(raw_cmd, chdir=False)) 119 | 120 | def _remote_start_cmd(self): 121 | cmd = TT("./${name}_control start", self.args) 122 | return self._remote_cmd(cmd) 123 | 124 | def _remote_stop_cmd(self): 125 | cmd = TT("./${name}_control stop", self.args) 126 | return self._remote_cmd(cmd) 127 | 128 | def _remote_cmd(self, raw_cmd, chdir=True): 129 | if raw_cmd.find('"') >= 0: 130 | raise Exception('bad cmd: ' + raw_cmd) 131 | args = copy.deepcopy(self.args) 132 | args['cmd'] = raw_cmd 133 | if chdir: 134 | return TT('ssh -n -f $user@$host "cd $path && $cmd"', args) 135 | else: 136 | return TT('ssh -n -f $user@$host "$cmd"', args) 137 | 138 | def _run(self, raw_cmd): 139 | ret = common.system(raw_cmd, logging.debug) 140 | logging.debug('return : [%d] [%s] ' % (len(ret), common.shorten(ret)) ) 141 | return ret 142 | 143 | 144 | class RedisServer(Base): 145 | def __init__(self, user, host_port, path): 146 | Base.__init__(self, 'redis', user, host_port, path) 147 | 148 | self.args['startcmd'] = TT('bin/redis-server conf/redis.conf', self.args) 149 | self.args['runcmd'] = TT('redis-server \*:$port', self.args) 150 | 151 | self.args['conf'] = TT('$path/conf/redis.conf', self.args) 152 | self.args['pidfile'] = TT('$path/log/redis.pid', self.args) 153 | self.args['logfile'] = TT('$path/log/redis.log', self.args) 154 | self.args['dir'] = TT('$path/data', self.args) 155 | 156 | self.args['REDIS_CLI'] = conf.BINARYS['REDIS_CLI'] 157 | 158 | def _info_dict(self): 159 | cmd = TT('$REDIS_CLI -h $host -p $port INFO', self.args) 160 | info = self._run(cmd) 161 | 162 | info = [line.split(':', 1) for line in info.split('\r\n') if not line.startswith('#')] 163 | info = [i for i in info if len(i)>1] 164 | return defaultdict(str, info) #this is a defaultdict, be Notice 165 | 166 | def _ping(self): 167 | cmd = TT('$REDIS_CLI -h $host -p $port PING', self.args) 168 | return self._run(cmd) 169 | 170 | def _alive(self): 171 | return strstr(self._ping(), 'PONG') 172 | 173 | def _gen_conf(self): 174 | content = file('conf/redis.conf').read() 175 | return TT(content, self.args) 176 | 177 | def _pre_deploy(self): 178 | self.args['BINS'] = conf.BINARYS['REDIS_SERVER_BINS'] 179 | self._run(TT('cp $BINS $localdir/bin/', self.args)) 180 | 181 | fout = open(TT('$localdir/conf/redis.conf', self.args), 'w+') 182 | fout.write(self._gen_conf()) 183 | fout.close() 184 | 185 | def status(self): 186 | uptime = self._info_dict()['uptime_in_seconds'] 187 | if uptime: 188 | logging.info('%s uptime %s seconds' % (self, uptime)) 189 | else: 190 | logging.error('%s is down' % self) 191 | 192 | def isslaveof(self, master_host, master_port): 193 | info = self._info_dict() 194 | if info['master_host'] == master_host and int(info['master_port']) == master_port: 195 | logging.debug('already slave of %s:%s' % (master_host, master_port)) 196 | return True 197 | 198 | def slaveof(self, master_host, master_port): 199 | cmd = 'SLAVEOF %s %s' % (master_host, master_port) 200 | return self.rediscmd(cmd) 201 | 202 | def rediscmd(self, cmd): 203 | args = copy.deepcopy(self.args) 204 | args['cmd'] = cmd 205 | cmd = TT('$REDIS_CLI -h $host -p $port $cmd', args) 206 | logging.info('%s %s' % (self, cmd)) 207 | print self._run(cmd) 208 | 209 | 210 | class Sentinel(RedisServer): 211 | def __init__(self, user, host_port, path, masters): 212 | RedisServer.__init__(self, user, host_port, path) 213 | 214 | self.args['startcmd'] = TT('bin/redis-sentinel conf/sentinel.conf', self.args) 215 | self.args['runcmd'] = TT('redis-sentinel \*:$port', self.args) 216 | 217 | self.args['conf'] = TT('$path/conf/sentinel.conf', self.args) 218 | self.args['pidfile'] = TT('$path/log/sentinel.pid', self.args) 219 | self.args['logfile'] = TT('$path/log/sentinel.log', self.args) 220 | 221 | self.args['name'] = 'sentinel' 222 | self.masters = masters 223 | 224 | def _gen_conf_section(self): 225 | template = '''\ 226 | sentinel monitor $server_name $host $port 2 227 | sentinel down-after-milliseconds $server_name 60000 228 | sentinel failover-timeout $server_name 180000 229 | sentinel parallel-syncs $server_name 1 230 | ''' 231 | cfg = '\n'.join([TT(template, master.args) for master in self.masters]) 232 | return cfg 233 | 234 | def _gen_conf(self): 235 | content = file('conf/sentinel.conf').read() 236 | content = TT(content, self.args) 237 | return content + self._gen_conf_section() 238 | 239 | def _pre_deploy(self): 240 | self.args['BINS'] = conf.BINARYS['REDIS_SENTINEL_BINS'] 241 | self._run(TT('cp $BINS $localdir/bin/', self.args)) 242 | 243 | fout = open(TT('$localdir/conf/sentinel.conf', self.args), 'w+') 244 | fout.write(self._gen_conf()) 245 | fout.close() 246 | 247 | def get_masters(self): 248 | '''return currnet master list of (host:port, name)''' 249 | conn = redis.Redis(self.args['host'], self.args['port']) 250 | masters = conn.sentinel_masters() 251 | logging.debug('sentinel got masters: %s' % masters) 252 | return [('%s:%s' % (m['ip'], m['port']), m['name']) for m in masters.values()] 253 | 254 | def get_failover_event(self): 255 | self._sub = redis.Redis(self.args['host'], self.args['port']).pubsub() 256 | self._sub.subscribe('+switch-master') 257 | logging.info('subscribe +switch-master on %s' % self) 258 | iterator = self._sub.listen() 259 | if next(iterator)['channel'] != '+switch-master': 260 | raise Exception('error on subscribe') 261 | 262 | for msg in iterator: 263 | logging.info('got msg: %s' % msg) 264 | yield msg 265 | 266 | class NutCracker(Base): 267 | def __init__(self, user, host_port, path, masters): 268 | Base.__init__(self, 'nutcracker', user, host_port, path) 269 | 270 | self.masters = masters 271 | 272 | self.args['conf'] = TT('$path/conf/nutcracker.conf', self.args) 273 | self.args['pidfile'] = TT('$path/log/nutcracker.pid', self.args) 274 | self.args['logfile'] = TT('$path/log/nutcracker.log', self.args) 275 | self.args['status_port'] = self.args['port'] + 1000 276 | 277 | self.args['startcmd'] = TT('bin/nutcracker -d -c $conf -o $logfile -p $pidfile -s $status_port', self.args) 278 | self.args['runcmd'] = self.args['startcmd'] 279 | self._last_info = None 280 | 281 | def _alive(self): 282 | return self._info_dict() 283 | 284 | def _gen_conf_section(self): 285 | template = ' - $host:$port:1 $server_name' 286 | cfg = '\n'.join([TT(template, master.args) for master in self.masters]) 287 | return cfg 288 | 289 | def _gen_conf(self): 290 | content = ''' 291 | $cluster_name: 292 | listen: 0.0.0.0:$port 293 | hash: fnv1a_64 294 | distribution: modula 295 | preconnect: true 296 | auto_eject_hosts: false 297 | redis: true 298 | backlog: 512 299 | timeout: 400 300 | client_connections: 0 301 | server_connections: 1 302 | server_retry_timeout: 2000 303 | server_failure_limit: 2 304 | servers: 305 | ''' 306 | content = TT(content, self.args) 307 | return content + self._gen_conf_section() 308 | 309 | def _pre_deploy(self): 310 | self.args['BINS'] = conf.BINARYS['NUTCRACKER_BINS'] 311 | self._run(TT('cp $BINS $localdir/bin/', self.args)) 312 | 313 | fout = open(TT('$localdir/conf/nutcracker.conf', self.args), 'w+') 314 | fout.write(self._gen_conf()) 315 | fout.close() 316 | 317 | def _info_dict(self): 318 | ''' 319 | | We will add fields in the info dict 320 | "uptime": 370, | 321 | "timestamp": 1389231960, | timestamp_INC 322 | .... | 323 | "cluster0": { | 324 | "client_connections": 100, | 325 | "client_eof": 500, | 326 | "forward_error": 0, | calc forward_error_INC 327 | "client_err": 0, | calc client_err_INC 328 | "fragments": 0, | 329 | "server_ejects": 0, | 330 | | add global in_queue/out_queue/ 331 | | add global requests/responses/ 332 | | add global server_timedout/server_err 333 | | calc requests_INC responses_INC 334 | | calc server_timedout_INC server_err_INC 335 | "cluster0-20001": { #a backend | 336 | "server_timedout": 0, | 337 | "server_err": 0, | 338 | "responses": 125406, | 339 | "response_bytes": 828478, | 340 | "in_queue_bytes": 0, | 341 | "server_connections": 1, | 342 | "request_bytes": 5189724, | 343 | "out_queue": 0, | 344 | "server_eof": 0, | 345 | "requests": 125406, | 346 | "in_queue": 0, | 347 | "out_queue_bytes": 0 | 348 | }, | 349 | ''' 350 | info = self._raw_info_dict() 351 | #logging.debug(info) 352 | if not info: 353 | return None 354 | 355 | def calc_inc(cluster_name, info, last_info): 356 | TO_CALC_INC = ('forward_error', 'client_err', 'requests', 'responses', 'server_timedout', 'server_err') 357 | for item in TO_CALC_INC: 358 | info[item + '_INC'] = info[item] - last_info[item] 359 | 360 | def aggregation(cluster_name, info): 361 | TO_AGGREGATION = ('in_queue', 'out_queue', 'requests', 'responses', 'server_timedout', 'server_err') 362 | for item in TO_AGGREGATION: 363 | info[item] = 0 364 | for k, v, in info.items(): 365 | if type(v) == dict: # a backend 366 | for item in TO_AGGREGATION: 367 | info[item] += v[item] 368 | 369 | if self._last_info: 370 | info['timestamp_INC'] = info['timestamp'] - self._last_info['timestamp'] 371 | 372 | for k, v in info.items(): 373 | if type(v) == dict: 374 | cluster_name = k 375 | cluster_info = v 376 | aggregation(cluster_name, cluster_info) 377 | if self._last_info: 378 | calc_inc(cluster_name, cluster_info, self._last_info[cluster_name]) 379 | 380 | self._last_info = info 381 | logging.debug(info) 382 | return info 383 | 384 | def _raw_info_dict(self): 385 | try: 386 | ret = telnetlib.Telnet(self.args['host'], self.args['status_port']).read_all() 387 | return common.json_decode(ret) 388 | except Exception, e: 389 | logging.debug('--- can not get _info_dict of nutcracker, [Exception: %s]' % (e, )) 390 | return None 391 | 392 | def status(self): 393 | ret = self._info_dict() 394 | if ret: 395 | uptime = ret['uptime'] 396 | logging.info('%s uptime %s seconds' % (self, uptime)) 397 | else: 398 | logging.error('%s is down' % self) 399 | 400 | def get_masters(self): 401 | '''return currnet master list of (host:port, name)''' 402 | cmd = TT('cat $conf', self.args) 403 | content = self._run(self._remote_cmd(cmd)) 404 | logging.debug('current proxy config: %s' % content) 405 | 406 | def parse_line(line): 407 | _x, host_port_w, name = line.split() 408 | host, port, _w = host_port_w.split(':') 409 | return ('%s:%s' % (host, port), name) 410 | return [parse_line(line) for line in content.split('\n') if line.startswith(' -')] 411 | 412 | def reconfig(self, masters): 413 | self.masters = masters 414 | self.stop() 415 | self.deploy() 416 | self.start() 417 | logging.info('proxy %s:%s is updated' % (self.args['host'], self.args['port'])) 418 | 419 | 420 | class Cluster(object, Monitor, Benchmark): 421 | def __init__(self, args): 422 | self.args = args 423 | self.all_redis = [ RedisServer(self.args['user'], hp, path) for hp, path in self.args['redis'] ] 424 | pairs = zip(self.all_redis[::2], self.all_redis[1::2]) 425 | 426 | for m, s in pairs: #slave use same name as master 427 | s.args['cluster_name'] = m.args['cluster_name'] = args['cluster_name'] 428 | s.args['server_name'] = m.args['server_name'] = TT('$cluster_name-$port', m.args) 429 | 430 | masters = self.all_redis[::2] 431 | 432 | self.all_sentinel = [Sentinel(self.args['user'], hp, path, masters) for hp, path in self.args['sentinel'] ] 433 | self.all_nutcracker = [NutCracker(self.args['user'], hp, path, masters) for hp, path in self.args['nutcracker'] ] 434 | for m in self.all_nutcracker: 435 | m.args['cluster_name'] = args['cluster_name'] 436 | 437 | def _doit(self, op): 438 | logging.notice('%s redis' % (op, )) 439 | for s in self.all_redis: 440 | eval('s.%s()' % op) 441 | 442 | logging.notice('%s sentinel' % (op, )) 443 | for s in self.all_sentinel: 444 | eval('s.%s()' % op) 445 | 446 | logging.notice('%s nutcracker' % (op, )) 447 | for s in self.all_nutcracker: 448 | eval('s.%s()' % op) 449 | 450 | def _get_available_sentinel(self): 451 | for s in self.all_sentinel: 452 | if s._alive(): 453 | return s 454 | logging.warn('No sentinel instance are available') 455 | return None 456 | 457 | def _active_masters(self): 458 | '''return the current master list on sentinel''' 459 | new_masters = self._get_available_sentinel().get_masters() 460 | new_masters = sorted(new_masters, key=lambda x: x[1]) 461 | 462 | def make_master(host_port, name): # make master instance 463 | host = host_port.split(':')[0] 464 | port = int(host_port.split(':')[1]) 465 | for r in self.all_redis: 466 | if r.args['host'] == host and r.args['port'] == port: 467 | return r 468 | 469 | masters = [make_master(host_port, name) for host_port, name in new_masters] 470 | return masters 471 | 472 | def deploy(self): 473 | ''' 474 | deploy the binarys and config file (redis/sentinel/nutcracker) in this cluster 475 | ''' 476 | self._doit('deploy') 477 | 478 | def start(self): 479 | ''' 480 | start all instance(redis/sentinel/nutcracker) in this cluster 481 | ''' 482 | self._doit('start') 483 | 484 | logging.notice('setup master->slave') 485 | rs = self.all_redis 486 | pairs = [rs[i:i+2] for i in range(0, len(rs), 2)] 487 | for m, s in pairs: 488 | if s.isslaveof(m.args['host'], m.args['port']): 489 | logging.warn('%s->%s is ok!' % (m,s )) 490 | else: 491 | logging.info('setup %s->%s' % (m,s )) 492 | s.slaveof(m.args['host'], m.args['port']) 493 | 494 | def stop(self): 495 | ''' 496 | stop all instance(redis/sentinel/nutcracker) in this cluster 497 | ''' 498 | if 'yes' == raw_input('do you want to stop yes/no: '): 499 | self._doit('stop') 500 | 501 | def printcmd(self): 502 | ''' 503 | print the start/stop cmd of instance 504 | ''' 505 | self._doit('printcmd') 506 | 507 | def status(self): 508 | ''' 509 | get status of all instance(redis/sentinel/nutcracker) in this cluster 510 | ''' 511 | self._doit('status') 512 | 513 | def log(self): 514 | ''' 515 | show log of all instance(redis/sentinel/nutcracker) in this cluster 516 | ''' 517 | self._doit('log') 518 | 519 | def _rediscmd(self, cmd, sleeptime=.1): 520 | for s in self.all_redis: 521 | time.sleep(sleeptime) 522 | s.rediscmd(cmd) 523 | 524 | def rediscmd(self, cmd): 525 | ''' 526 | run redis command against all redis instance, like 'INFO, GET xxxx' 527 | ''' 528 | self._rediscmd(cmd) 529 | 530 | def mastercmd(self, cmd): 531 | ''' 532 | run redis command against all redis Master instance, like 'INFO, GET xxxx' 533 | ''' 534 | for s in self._active_masters(): 535 | s.rediscmd(cmd) 536 | 537 | def rdb(self): 538 | ''' 539 | do rdb in all redis instance, 540 | ''' 541 | self._rediscmd('BGSAVE', conf.RDB_SLEEP_TIME) 542 | 543 | def aof_rewrite(self): 544 | ''' 545 | do aof_rewrite in all redis instance 546 | ''' 547 | self._rediscmd('BGREWRITEAOF', conf.RDB_SLEEP_TIME) 548 | 549 | def randomkill(self): 550 | ''' 551 | random kill master every mintue (for test failover) 552 | ''' 553 | while True: 554 | r = random.choice(self._active_masters()) 555 | logging.notice('will restart %s' % r) 556 | r.stop() 557 | time.sleep(80) 558 | r.start() 559 | time.sleep(60) 560 | 561 | def sshcmd(self, cmd): 562 | ''' 563 | ssh to target machine and run cmd 564 | ''' 565 | hosts = [s.args['host'] for s in self.all_redis + self.all_sentinel + self.all_nutcracker] 566 | hosts = set(hosts) 567 | 568 | args = copy.deepcopy(self.args) 569 | args['cmd'] = cmd 570 | for h in hosts: 571 | args['host'] = h 572 | cmd = TT('ssh -n -f $user@$host "$cmd"', args) 573 | print common.system(cmd) 574 | 575 | def reconfigproxy(self): 576 | ''' 577 | sync the masters list from sentinel to proxy 578 | ''' 579 | logging.notice('begin reconfigproxy') 580 | old_masters = self.all_nutcracker[0].get_masters() 581 | new_masters = self._get_available_sentinel().get_masters() 582 | logging.info("old masters: %s" % sorted(old_masters, key=lambda x: x[1])) 583 | logging.info("new masters: %s" % sorted(new_masters, key=lambda x: x[1])) 584 | 585 | if set(new_masters) == set(old_masters): 586 | logging.notice('masters list of proxy are already newest, we will not do reconfigproxy') 587 | return 588 | logging.notice('we will do reconfigproxy') 589 | 590 | masters = self._active_masters() 591 | for m in self.all_nutcracker: 592 | m.reconfig(masters) 593 | logging.notice('reconfig all nutcracker Done!') 594 | 595 | def failover(self): 596 | ''' 597 | catch failover event and update the proxy configuration 598 | ''' 599 | while True: 600 | try: 601 | sentinel = self._get_available_sentinel() 602 | for event in sentinel.get_failover_event(): 603 | self.reconfigproxy() 604 | except Exception, e: 605 | logging.warn('we got exception: %s on failover task' % e) 606 | logging.exception(e) 607 | 608 | def migrage(self): 609 | ''' 610 | migrage a redis instance to another machine 611 | ''' 612 | pass 613 | 614 | def discover_op(): 615 | methods = inspect.getmembers(Cluster, predicate=inspect.ismethod) 616 | sets = [m[0] for m in methods if not m[0].startswith('_')] 617 | return sets 618 | 619 | def gen_op_help(): 620 | methods = inspect.getmembers(Cluster, predicate=inspect.ismethod) 621 | sets = [m for m in methods if not m[0].startswith('_')] 622 | 623 | #sort the function list, based on the their position in the files 624 | lines = file('bin/deploy.py').readlines() + file('lib/monitor.py').readlines() 625 | def rank(x): 626 | name, func = x 627 | t = 'def ' + name 628 | for i in range(len(lines)): 629 | if strstr(lines[i], t): 630 | return i 631 | sets = sorted(sets, key=rank) 632 | 633 | def format_func(name, func): 634 | args = ' '.join(inspect.getargspec(func).args[1:]) 635 | if args: 636 | desc = '%s %s' % (name, args) 637 | else: 638 | desc = name 639 | return '%-25s: %s' % (common.to_blue(desc), str(func.__doc__).strip()) 640 | 641 | return '\n'.join([format_func(name, func) for name, func in sets]) 642 | 643 | def discover_cluster(): 644 | sets = [s for s in dir(conf) if s.startswith('cluster')] 645 | return sets 646 | 647 | def main(): 648 | sys.argv.insert(1, '-v') # force -v 649 | parser = argparse.ArgumentParser(formatter_class=RawTextHelpFormatter) 650 | parser.add_argument('target', metavar='clustername', choices=discover_cluster(), help=' / '.join(discover_cluster())) 651 | parser.add_argument('op', metavar='op', choices=discover_op(), 652 | help=gen_op_help()) 653 | parser.add_argument('cmd', nargs='?', help='the redis/ssh cmd like "INFO"') 654 | 655 | LOGPATH = os.path.join(WORKDIR, 'log/deploy.log') 656 | args = common.parse_args2(LOGPATH, parser) 657 | if args.cmd: 658 | eval('Cluster(conf.%s).%s(%s)' % (args.target, args.op, 'args.cmd') ) 659 | else: 660 | eval('Cluster(conf.%s).%s()' % (args.target, args.op) ) 661 | 662 | if __name__ == "__main__": 663 | main() 664 | 665 | # vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4 666 | -------------------------------------------------------------------------------- /conf/redis.conf: -------------------------------------------------------------------------------- 1 | 2 | # Redis configuration file example 3 | # Note on units: when memory size is needed, it is possible to specify 4 | # it in the usual form of 1k 5GB 4M and so forth: 5 | # 6 | # 1k => 1000 bytes 7 | # 1kb => 1024 bytes 8 | # 1m => 1000000 bytes 9 | # 1mb => 1024*1024 bytes 10 | # 1g => 1000000000 bytes 11 | # 1gb => 1024*1024*1024 bytes 12 | # 13 | # units are case insensitive so 1GB 1Gb 1gB are all the same. 14 | 15 | # By default Redis does not run as a daemon. Use 'yes' if you need it. 16 | # Note that Redis will write a pid file in /var/run/redis.pid when daemonized. 17 | daemonize yes 18 | 19 | #whitelist configure 20 | #whitelist yes 21 | #whitelist-file ./whitelist 22 | 23 | # When running daemonized, Redis writes a pid file in /var/run/redis.pid by 24 | # default. You can specify a custom pid file location here. 25 | pidfile ${pidfile} 26 | 27 | # Accept connections on the specified port, default is 6379. 28 | # If port 0 is specified Redis will not listen on a TCP socket. 29 | port ${port} 30 | 31 | # If you want you can bind a single interface, if the bind option is not 32 | # specified all the interfaces will listen for incoming connections. 33 | # 34 | # bind 127.0.0.1 35 | 36 | # Specify the path for the unix socket that will be used to listen for 37 | # incoming connections. There is no default, so Redis will not listen 38 | # on a unix socket when not specified. 39 | # 40 | # unixsocket /tmp/redis.sock 41 | # unixsocketperm 755 42 | 43 | # Close the connection after a client is idle for N seconds (0 to disable) 44 | timeout 0 45 | 46 | # TCP keepalive. 47 | # 48 | # If non-zero, use SO_KEEPALIVE to send TCP ACKs to clients in absence 49 | # of communication. This is useful for two reasons: 50 | # 51 | # 1) Detect dead peers. 52 | # 2) Take the connection alive from the point of view of network 53 | # equipment in the middle. 54 | # 55 | # On Linux, the specified value (in seconds) is the period used to send ACKs. 56 | # Note that to close the connection the double of the time is needed. 57 | # On other kernels the period depends on the kernel configuration. 58 | # 59 | # A reasonable value for this option is 60 seconds. 60 | tcp-keepalive 60 61 | 62 | # Specify the server verbosity level. 63 | # This can be one of: 64 | # debug (a lot of information, useful for development/testing) 65 | # verbose (many rarely useful info, but not a mess like the debug level) 66 | # notice (moderately verbose, what you want in production probably) 67 | # warning (only very important / critical messages are logged) 68 | loglevel notice 69 | 70 | # Specify the log file name. Also 'stdout' can be used to force 71 | # Redis to log on the standard output. Note that if you use standard 72 | # output for logging but daemonize, logs will be sent to /dev/null 73 | logfile ${logfile} 74 | 75 | # To enable logging to the system logger, just set 'syslog-enabled' to yes, 76 | # and optionally update the other syslog parameters to suit your needs. 77 | # syslog-enabled no 78 | 79 | # Specify the syslog identity. 80 | # syslog-ident redis 81 | 82 | # Specify the syslog facility. Must be USER or between LOCAL0-LOCAL7. 83 | # syslog-facility local0 84 | 85 | # Set the number of databases. The default database is DB 0, you can select 86 | # a different one on a per-connection basis using SELECT where 87 | # dbid is a number between 0 and 'databases'-1 88 | databases 16 89 | 90 | ################################ SNAPSHOTTING ################################# 91 | # 92 | # Save the DB on disk: 93 | # 94 | # save 95 | # 96 | # Will save the DB if both the given number of seconds and the given 97 | # number of write operations against the DB occurred. 98 | # 99 | # In the example below the behaviour will be to save: 100 | # after 900 sec (15 min) if at least 1 key changed 101 | # after 300 sec (5 min) if at least 10 keys changed 102 | # after 60 sec if at least 10000 keys changed 103 | # 104 | # Note: you can disable saving at all commenting all the "save" lines. 105 | # 106 | # It is also possible to remove all the previously configured save 107 | # points by adding a save directive with a single empty string argument 108 | # like in the following example: 109 | # 110 | 111 | #save 900 1 112 | #save 300 10 113 | #save 60 10000 114 | save "" 115 | 116 | # By default Redis will stop accepting writes if RDB snapshots are enabled 117 | # (at least one save point) and the latest background save failed. 118 | # This will make the user aware (in an hard way) that data is not persisting 119 | # on disk properly, otherwise chances are that no one will notice and some 120 | # distater will happen. 121 | # 122 | # If the background saving process will start working again Redis will 123 | # automatically allow writes again. 124 | # 125 | # However if you have setup your proper monitoring of the Redis server 126 | # and persistence, you may want to disable this feature so that Redis will 127 | # continue to work as usually even if there are problems with disk, 128 | # permissions, and so forth. 129 | stop-writes-on-bgsave-error yes 130 | 131 | # Compress string objects using LZF when dump .rdb databases? 132 | # For default that's set to 'yes' as it's almost always a win. 133 | # If you want to save some CPU in the saving child set it to 'no' but 134 | # the dataset will likely be bigger if you have compressible values or keys. 135 | rdbcompression yes 136 | 137 | # Since version 5 of RDB a CRC64 checksum is placed at the end of the file. 138 | # This makes the format more resistant to corruption but there is a performance 139 | # hit to pay (around 10%) when saving and loading RDB files, so you can disable it 140 | # for maximum performances. 141 | # 142 | # RDB files created with checksum disabled have a checksum of zero that will 143 | # tell the loading code to skip the check. 144 | rdbchecksum yes 145 | 146 | # The filename where to dump the DB 147 | dbfilename dump.rdb 148 | 149 | # The working directory. 150 | # 151 | # The DB will be written inside this directory, with the filename specified 152 | # above using the 'dbfilename' configuration directive. 153 | # 154 | # The Append Only File will also be created inside this directory. 155 | # 156 | # Note that you must specify a directory here, not a file name. 157 | dir ${dir} 158 | 159 | ################################# REPLICATION ################################# 160 | 161 | # Master-Slave replication. Use slaveof to make a Redis instance a copy of 162 | # another Redis server. Note that the configuration is local to the slave 163 | # so for example it is possible to configure the slave to save the DB with a 164 | # different interval, or to listen to another port, and so on. 165 | # 166 | # slaveof 167 | 168 | # If the master is password protected (using the "requirepass" configuration 169 | # directive below) it is possible to tell the slave to authenticate before 170 | # starting the replication synchronization process, otherwise the master will 171 | # refuse the slave request. 172 | # 173 | # masterauth 174 | 175 | # When a slave loses its connection with the master, or when the replication 176 | # is still in progress, the slave can act in two different ways: 177 | # 178 | # 1) if slave-serve-stale-data is set to 'yes' (the default) the slave will 179 | # still reply to client requests, possibly with out of date data, or the 180 | # data set may just be empty if this is the first synchronization. 181 | # 182 | # 2) if slave-serve-stale-data is set to 'no' the slave will reply with 183 | # an error "SYNC with master in progress" to all the kind of commands 184 | # but to INFO and SLAVEOF. 185 | # 186 | slave-serve-stale-data yes 187 | 188 | # You can configure a slave instance to accept writes or not. Writing against 189 | # a slave instance may be useful to store some ephemeral data (because data 190 | # written on a slave will be easily deleted after resync with the master) but 191 | # may also cause problems if clients are writing to it because of a 192 | # misconfiguration. 193 | # 194 | # Since Redis 2.6 by default slaves are read-only. 195 | # 196 | # Note: read only slaves are not designed to be exposed to untrusted clients 197 | # on the internet. It's just a protection layer against misuse of the instance. 198 | # Still a read only slave exports by default all the administrative commands 199 | # such as CONFIG, DEBUG, and so forth. To a limited extend you can improve 200 | # security of read only slaves using 'rename-command' to shadow all the 201 | # administrative / dangerous commands. 202 | slave-read-only yes 203 | 204 | # Slaves send PINGs to server in a predefined interval. It's possible to change 205 | # this interval with the repl_ping_slave_period option. The default value is 10 206 | # seconds. 207 | # 208 | # repl-ping-slave-period 10 209 | 210 | # The following option sets a timeout for both Bulk transfer I/O timeout and 211 | # master data or ping response timeout. The default value is 60 seconds. 212 | # 213 | # It is important to make sure that this value is greater than the value 214 | # specified for repl-ping-slave-period otherwise a timeout will be detected 215 | # every time there is low traffic between the master and the slave. 216 | # 217 | repl-timeout 120 218 | 219 | # Disable TCP_NODELAY on the slave socket after SYNC? 220 | # 221 | # If you select "yes" Redis will use a smaller number of TCP packets and 222 | # less bandwidth to send data to slaves. But this can add a delay for 223 | # the data to appear on the slave side, up to 40 milliseconds with 224 | # Linux kernels using a default configuration. 225 | # 226 | # If you select "no" the delay for data to appear on the slave side will 227 | # be reduced but more bandwidth will be used for replication. 228 | # 229 | # By default we optimize for low latency, but in very high traffic conditions 230 | # or when the master and slaves are many hops away, turning this to "yes" may 231 | # be a good idea. 232 | repl-disable-tcp-nodelay no 233 | 234 | # Set the replication backlog size. The backlog is a buffer that accumulates 235 | # slave data when slaves are disconnected for some time, so that when a slave 236 | # wants to reconnect again, often a full resync is not needed, but a partial 237 | # resync is enough, just passing the portion of data the slave missed while 238 | # disconnected. 239 | # 240 | # The biggest the replication backlog, the longer the time the slave can be 241 | # disconnected and later be able to perform a partial resynchronization. 242 | # 243 | # The backlog is only allocated once there is at least a slave connected. 244 | # 245 | repl-backlog-size 64mb 246 | 247 | # After a master has no longer connected slaves for some time, the backlog 248 | # will be freed. The following option configures the amount of seconds that 249 | # need to elapse, starting from the time the last slave disconnected, for 250 | # the backlog buffer to be freed. 251 | # 252 | # A value of 0 means to never release the backlog. 253 | # 254 | # repl-backlog-ttl 3600 255 | 256 | # The slave priority is an integer number published by Redis in the INFO output. 257 | # It is used by Redis Sentinel in order to select a slave to promote into a 258 | # master if the master is no longer working correctly. 259 | # 260 | # A slave with a low priority number is considered better for promotion, so 261 | # for instance if there are three slaves with priority 10, 100, 25 Sentinel will 262 | # pick the one wtih priority 10, that is the lowest. 263 | # 264 | # However a special priority of 0 marks the slave as not able to perform the 265 | # role of master, so a slave with priority of 0 will never be selected by 266 | # Redis Sentinel for promotion. 267 | # 268 | # By default the priority is 100. 269 | slave-priority 100 270 | 271 | # It is possible for a master to stop accepting writes if there are less than 272 | # N slaves connected, having a lag less or equal than M seconds. 273 | # 274 | # The N slaves need to be in "online" state. 275 | # 276 | # The lag in seconds, that must be <= the specified value, is calculated from 277 | # the last ping received from the slave, that is usually sent every second. 278 | # 279 | # This option does not GUARANTEES that N replicas will accept the write, but 280 | # will limit the window of exposure for lost writes in case not enough slaves 281 | # are available, to the specified number of seconds. 282 | # 283 | # For example to require at least 3 slaves with a lag <= 10 seconds use: 284 | # 285 | # min-slaves-to-write 3 286 | # min-slaves-max-lag 10 287 | # 288 | # Setting one or the other to 0 disables the feature. 289 | # 290 | # By default min-slaves-to-write is set to 0 (feature disabled) and 291 | # min-slaves-max-lag is set to 10. 292 | 293 | ################################## SECURITY ################################### 294 | 295 | # Require clients to issue AUTH before processing any other 296 | # commands. This might be useful in environments in which you do not trust 297 | # others with access to the host running redis-server. 298 | # 299 | # This should stay commented out for backward compatibility and because most 300 | # people do not need auth (e.g. they run their own servers). 301 | # 302 | # Warning: since Redis is pretty fast an outside user can try up to 303 | # 150k passwords per second against a good box. This means that you should 304 | # use a very strong password otherwise it will be very easy to break. 305 | # 306 | # requirepass foobared 307 | 308 | # Command renaming. 309 | # 310 | # It is possible to change the name of dangerous commands in a shared 311 | # environment. For instance the CONFIG command may be renamed into something 312 | # hard to guess so that it will still be available for internal-use tools 313 | # but not available for general clients. 314 | # 315 | # Example: 316 | # 317 | # rename-command CONFIG b840fc02d524045429941cc15f59e41cb7be6c52 318 | # 319 | # It is also possible to completely kill a command by renaming it into 320 | # an empty string: 321 | # 322 | # rename-command CONFIG "" 323 | # 324 | # Please note that changing the name of commands that are logged into the 325 | # AOF file or transmitted to slaves may cause problems. 326 | 327 | ################################### LIMITS #################################### 328 | 329 | # Set the max number of connected clients at the same time. By default 330 | # this limit is set to 10000 clients, however if the Redis server is not 331 | # able to configure the process file limit to allow for the specified limit 332 | # the max number of allowed clients is set to the current file limit 333 | # minus 32 (as Redis reserves a few file descriptors for internal uses). 334 | # 335 | # Once the limit is reached Redis will close all the new connections sending 336 | # an error 'max number of clients reached'. 337 | # 338 | # maxclients 10000 339 | 340 | # Don't use more memory than the specified amount of bytes. 341 | # When the memory limit is reached Redis will try to remove keys 342 | # accordingly to the eviction policy selected (see maxmemmory-policy). 343 | # 344 | # If Redis can't remove keys according to the policy, or if the policy is 345 | # set to 'noeviction', Redis will start to reply with errors to commands 346 | # that would use more memory, like SET, LPUSH, and so on, and will continue 347 | # to reply to read-only commands like GET. 348 | # 349 | # This option is usually useful when using Redis as an LRU cache, or to set 350 | # an hard memory limit for an instance (using the 'noeviction' policy). 351 | # 352 | # WARNING: If you have slaves attached to an instance with maxmemory on, 353 | # the size of the output buffers needed to feed the slaves are subtracted 354 | # from the used memory count, so that network problems / resyncs will 355 | # not trigger a loop where keys are evicted, and in turn the output 356 | # buffer of slaves is full with DELs of keys evicted triggering the deletion 357 | # of more keys, and so forth until the database is completely emptied. 358 | # 359 | # In short... if you have slaves attached it is suggested that you set a lower 360 | # limit for maxmemory so that there is some free RAM on the system for slave 361 | # output buffers (but this is not needed if the policy is 'noeviction'). 362 | # 363 | maxmemory 5368709120 364 | 365 | # MAXMEMORY POLICY: how Redis will select what to remove when maxmemory 366 | # is reached. You can select among five behaviors: 367 | # 368 | # volatile-lru -> remove the key with an expire set using an LRU algorithm 369 | # allkeys-lru -> remove any key accordingly to the LRU algorithm 370 | # volatile-random -> remove a random key with an expire set 371 | # allkeys-random -> remove a random key, any key 372 | # volatile-ttl -> remove the key with the nearest expire time (minor TTL) 373 | # noeviction -> don't expire at all, just return an error on write operations 374 | # 375 | # Note: with any of the above policies, Redis will return an error on write 376 | # operations, when there are not suitable keys for eviction. 377 | # 378 | # At the date of writing this commands are: set setnx setex append 379 | # incr decr rpush lpush rpushx lpushx linsert lset rpoplpush sadd 380 | # sinter sinterstore sunion sunionstore sdiff sdiffstore zadd zincrby 381 | # zunionstore zinterstore hset hsetnx hmset hincrby incrby decrby 382 | # getset mset msetnx exec sort 383 | # 384 | # The default is: 385 | # 386 | maxmemory-policy volatile-lru 387 | 388 | # LRU and minimal TTL algorithms are not precise algorithms but approximated 389 | # algorithms (in order to save memory), so you can select as well the sample 390 | # size to check. For instance for default Redis will check three keys and 391 | # pick the one that was used less recently, you can change the sample size 392 | # using the following configuration directive. 393 | # 394 | maxmemory-samples 3 395 | 396 | ############################## APPEND ONLY MODE ############################### 397 | 398 | # By default Redis asynchronously dumps the dataset on disk. This mode is 399 | # good enough in many applications, but an issue with the Redis process or 400 | # a power outage may result into a few minutes of writes lost (depending on 401 | # the configured save points). 402 | # 403 | # The Append Only File is an alternative persistence mode that provides 404 | # much better durability. For instance using the default data fsync policy 405 | # (see later in the config file) Redis can lose just one second of writes in a 406 | # dramatic event like a server power outage, or a single write if something 407 | # wrong with the Redis process itself happens, but the operating system is 408 | # still running correctly. 409 | # 410 | # AOF and RDB persistence can be enabled at the same time without problems. 411 | # If the AOF is enabled on startup Redis will load the AOF, that is the file 412 | # with the better durability guarantees. 413 | # 414 | # Please check http://redis.io/topics/persistence for more information. 415 | 416 | appendonly yes 417 | 418 | # The name of the append only file (default: "appendonly.aof") 419 | # appendfilename appendonly.aof 420 | 421 | # The fsync() call tells the Operating System to actually write data on disk 422 | # instead to wait for more data in the output buffer. Some OS will really flush 423 | # data on disk, some other OS will just try to do it ASAP. 424 | # 425 | # Redis supports three different modes: 426 | # 427 | # no: don't fsync, just let the OS flush the data when it wants. Faster. 428 | # always: fsync after every write to the append only log . Slow, Safest. 429 | # everysec: fsync only one time every second. Compromise. 430 | # 431 | # The default is "everysec", as that's usually the right compromise between 432 | # speed and data safety. It's up to you to understand if you can relax this to 433 | # "no" that will let the operating system flush the output buffer when 434 | # it wants, for better performances (but if you can live with the idea of 435 | # some data loss consider the default persistence mode that's snapshotting), 436 | # or on the contrary, use "always" that's very slow but a bit safer than 437 | # everysec. 438 | # 439 | # More details please check the following article: 440 | # http://antirez.com/post/redis-persistence-demystified.html 441 | # 442 | # If unsure, use "everysec". 443 | 444 | # appendfsync always 445 | appendfsync everysec 446 | # appendfsync no 447 | 448 | # When the AOF fsync policy is set to always or everysec, and a background 449 | # saving process (a background save or AOF log background rewriting) is 450 | # performing a lot of I/O against the disk, in some Linux configurations 451 | # Redis may block too long on the fsync() call. Note that there is no fix for 452 | # this currently, as even performing fsync in a different thread will block 453 | # our synchronous write(2) call. 454 | # 455 | # In order to mitigate this problem it's possible to use the following option 456 | # that will prevent fsync() from being called in the main process while a 457 | # BGSAVE or BGREWRITEAOF is in progress. 458 | # 459 | # This means that while another child is saving, the durability of Redis is 460 | # the same as "appendfsync none". In practical terms, this means that it is 461 | # possible to lose up to 30 seconds of log in the worst scenario (with the 462 | # default Linux settings). 463 | # 464 | # If you have latency problems turn this to "yes". Otherwise leave it as 465 | # "no" that is the safest pick from the point of view of durability. 466 | no-appendfsync-on-rewrite no 467 | 468 | # Automatic rewrite of the append only file. 469 | # Redis is able to automatically rewrite the log file implicitly calling 470 | # BGREWRITEAOF when the AOF log size grows by the specified percentage. 471 | # 472 | # This is how it works: Redis remembers the size of the AOF file after the 473 | # latest rewrite (if no rewrite has happened since the restart, the size of 474 | # the AOF at startup is used). 475 | # 476 | # This base size is compared to the current size. If the current size is 477 | # bigger than the specified percentage, the rewrite is triggered. Also 478 | # you need to specify a minimal size for the AOF file to be rewritten, this 479 | # is useful to avoid rewriting the AOF file even if the percentage increase 480 | # is reached but it is still pretty small. 481 | # 482 | # Specify a percentage of zero in order to disable the automatic AOF 483 | # rewrite feature. 484 | 485 | auto-aof-rewrite-percentage 0 486 | auto-aof-rewrite-min-size 64mb 487 | 488 | ################################ LUA SCRIPTING ############################### 489 | 490 | # Max execution time of a Lua script in milliseconds. 491 | # 492 | # If the maximum execution time is reached Redis will log that a script is 493 | # still in execution after the maximum allowed time and will start to 494 | # reply to queries with an error. 495 | # 496 | # When a long running script exceed the maximum execution time only the 497 | # SCRIPT KILL and SHUTDOWN NOSAVE commands are available. The first can be 498 | # used to stop a script that did not yet called write commands. The second 499 | # is the only way to shut down the server in the case a write commands was 500 | # already issue by the script but the user don't want to wait for the natural 501 | # termination of the script. 502 | # 503 | # Set it to 0 or a negative value for unlimited execution without warnings. 504 | lua-time-limit 5000 505 | 506 | ################################ REDIS CLUSTER ############################### 507 | # 508 | # Normal Redis instances can't be part of a Redis Cluster; only nodes that are 509 | # started as cluster nodes can. In order to start a Redis instance as a 510 | # cluster node enable the cluster support uncommenting the following: 511 | # 512 | # cluster-enabled yes 513 | 514 | # Every cluster node has a cluster configuration file. This file is not 515 | # intended to be edited by hand. It is created and updated by Redis nodes. 516 | # Every Redis Cluster node requires a different cluster configuration file. 517 | # Make sure that instances running in the same system does not have 518 | # overlapping cluster configuration file names. 519 | # 520 | # cluster-config-file nodes-6379.conf 521 | 522 | # Cluster node timeout is the amount of milliseconds a node must be unreachable 523 | # for it to be considered in failure state. 524 | # Most other internal time limits are multiple of the node timeout. 525 | # 526 | # cluster-node-timeout 15000 527 | 528 | # In order to setup your cluster make sure to read the documentation 529 | # available at http://redis.io web site. 530 | 531 | ################################## SLOW LOG ################################### 532 | 533 | # The Redis Slow Log is a system to log queries that exceeded a specified 534 | # execution time. The execution time does not include the I/O operations 535 | # like talking with the client, sending the reply and so forth, 536 | # but just the time needed to actually execute the command (this is the only 537 | # stage of command execution where the thread is blocked and can not serve 538 | # other requests in the meantime). 539 | # 540 | # You can configure the slow log with two parameters: one tells Redis 541 | # what is the execution time, in microseconds, to exceed in order for the 542 | # command to get logged, and the other parameter is the length of the 543 | # slow log. When a new command is logged the oldest one is removed from the 544 | # queue of logged commands. 545 | 546 | # The following time is expressed in microseconds, so 1000000 is equivalent 547 | # to one second. Note that a negative number disables the slow log, while 548 | # a value of zero forces the logging of every command. 549 | slowlog-log-slower-than 10000 550 | 551 | # There is no limit to this length. Just be aware that it will consume memory. 552 | # You can reclaim memory used by the slow log with SLOWLOG RESET. 553 | slowlog-max-len 128 554 | 555 | ############################# Event notification ############################## 556 | 557 | # Redis can notify Pub/Sub clients about events happening in the key space. 558 | # This feature is documented at http://redis.io/topics/keyspace-events 559 | # 560 | # For instance if keyspace events notification is enabled, and a client 561 | # performs a DEL operation on key "foo" stored in the Database 0, two 562 | # messages will be published via Pub/Sub: 563 | # 564 | # PUBLISH __keyspace@0__:foo del 565 | # PUBLISH __keyevent@0__:del foo 566 | # 567 | # It is possible to select the events that Redis will notify among a set 568 | # of classes. Every class is identified by a single character: 569 | # 570 | # K Keyspace events, published with __keyspace@__ prefix. 571 | # E Keyevent events, published with __keyevent@__ prefix. 572 | # g Generic commands (non-type specific) like DEL, EXPIRE, RENAME, ... 573 | # \ String commands 574 | # l List commands 575 | # s Set commands 576 | # h Hash commands 577 | # z Sorted set commands 578 | # x Expired events (events generated every time a key expires) 579 | # e Evicted events (events generated when a key is evicted for maxmemory) 580 | # A Alias for g\lshzxe, so that the "AKE" string means all the events. 581 | # 582 | # The "notify-keyspace-events" takes as argument a string that is composed 583 | # by zero or multiple characters. The empty string means that notifications 584 | # are disabled at all. 585 | # 586 | # Example: to enable list and generic events, from the point of view of the 587 | # event name, use: 588 | # 589 | # notify-keyspace-events Elg 590 | # 591 | # Example 2: to get the stream of the expired keys subscribing to channel 592 | # name __keyevent@0__:expired use: 593 | # 594 | # notify-keyspace-events Ex 595 | # 596 | # By default all notifications are disabled because most users don't need 597 | # this feature and the feature has some overhead. Note that if you don't 598 | # specify at least one of K or E, no events will be delivered. 599 | notify-keyspace-events "" 600 | 601 | ############################### ADVANCED CONFIG ############################### 602 | 603 | # Hashes are encoded using a memory efficient data structure when they have a 604 | # small number of entries, and the biggest entry does not exceed a given 605 | # threshold. These thresholds can be configured using the following directives. 606 | hash-max-ziplist-entries 512 607 | hash-max-ziplist-value 64 608 | 609 | # Similarly to hashes, small lists are also encoded in a special way in order 610 | # to save a lot of space. The special representation is only used when 611 | # you are under the following limits: 612 | list-max-ziplist-entries 512 613 | list-max-ziplist-value 64 614 | 615 | # Sets have a special encoding in just one case: when a set is composed 616 | # of just strings that happens to be integers in radix 10 in the range 617 | # of 64 bit signed integers. 618 | # The following configuration setting sets the limit in the size of the 619 | # set in order to use this special memory saving encoding. 620 | set-max-intset-entries 512 621 | 622 | # Similarly to hashes and lists, sorted sets are also specially encoded in 623 | # order to save a lot of space. This encoding is only used when the length and 624 | # elements of a sorted set are below the following limits: 625 | zset-max-ziplist-entries 128 626 | zset-max-ziplist-value 64 627 | 628 | # Active rehashing uses 1 millisecond every 100 milliseconds of CPU time in 629 | # order to help rehashing the main Redis hash table (the one mapping top-level 630 | # keys to values). The hash table implementation Redis uses (see dict.c) 631 | # performs a lazy rehashing: the more operation you run into an hash table 632 | # that is rehashing, the more rehashing "steps" are performed, so if the 633 | # server is idle the rehashing is never complete and some more memory is used 634 | # by the hash table. 635 | # 636 | # The default is to use this millisecond 10 times every second in order to 637 | # active rehashing the main dictionaries, freeing memory when possible. 638 | # 639 | # If unsure: 640 | # use "activerehashing no" if you have hard latency requirements and it is 641 | # not a good thing in your environment that Redis can reply form time to time 642 | # to queries with 2 milliseconds delay. 643 | # 644 | # use "activerehashing yes" if you don't have such hard requirements but 645 | # want to free memory asap when possible. 646 | activerehashing yes 647 | 648 | # The client output buffer limits can be used to force disconnection of clients 649 | # that are not reading data from the server fast enough for some reason (a 650 | # common reason is that a Pub/Sub client can't consume messages as fast as the 651 | # publisher can produce them). 652 | # 653 | # The limit can be set differently for the three different classes of clients: 654 | # 655 | # normal -> normal clients 656 | # slave -> slave clients and MONITOR clients 657 | # pubsub -> clients subcribed to at least one pubsub channel or pattern 658 | # 659 | # The syntax of every client-output-buffer-limit directive is the following: 660 | # 661 | # client-output-buffer-limit 662 | # 663 | # A client is immediately disconnected once the hard limit is reached, or if 664 | # the soft limit is reached and remains reached for the specified number of 665 | # seconds (continuously). 666 | # So for instance if the hard limit is 32 megabytes and the soft limit is 667 | # 16 megabytes / 10 seconds, the client will get disconnected immediately 668 | # if the size of the output buffers reach 32 megabytes, but will also get 669 | # disconnected if the client reaches 16 megabytes and continuously overcomes 670 | # the limit for 10 seconds. 671 | # 672 | # By default normal clients are not limited because they don't receive data 673 | # without asking (in a push way), but just after a request, so only 674 | # asynchronous clients may create a scenario where data is requested faster 675 | # than it can read. 676 | # 677 | # Instead there is a default limit for pubsub and slave clients, since 678 | # subscribers and slaves receive data in a push fashion. 679 | # 680 | # Both the hard or the soft limit can be disabled by setting them to zero. 681 | client-output-buffer-limit normal 0 0 0 682 | client-output-buffer-limit slave 256mb 64mb 60 683 | client-output-buffer-limit pubsub 32mb 8mb 60 684 | 685 | # Redis calls an internal function to perform many background tasks, like 686 | # closing connections of clients in timeot, purging expired keys that are 687 | # never requested, and so forth. 688 | # 689 | # Not all tasks are perforemd with the same frequency, but Redis checks for 690 | # tasks to perform accordingly to the specified "hz" value. 691 | # 692 | # By default "hz" is set to 10. Raising the value will use more CPU when 693 | # Redis is idle, but at the same time will make Redis more responsive when 694 | # there are many keys expiring at the same time, and timeouts may be 695 | # handled with more precision. 696 | # 697 | # The range is between 1 and 500, however a value over 100 is usually not 698 | # a good idea. Most users should use the default of 10 and raise this up to 699 | # 100 only in environments where very low latency is required. 700 | hz 10 701 | 702 | # When a child rewrites the AOF file, if the following option is enabled 703 | # the file will be fsync-ed every 32 MB of data generated. This is useful 704 | # in order to commit the file to the disk more incrementally and avoid 705 | # big latency spikes. 706 | aof-rewrite-incremental-fsync yes 707 | 708 | ################################## INCLUDES ################################### 709 | 710 | # Include one or more other config files here. This is useful if you 711 | # have a standard template that goes to all Redis server but also need 712 | # to customize a few per-server settings. Include files can include 713 | # other files, so use this wisely. 714 | # 715 | # include /path/to/local.conf 716 | # include /path/to/other.conf 717 | --------------------------------------------------------------------------------