├── log
    └── .gitignore
├── doc
    └── twemproxy-sentinel-cluster.png
├── bin
    ├── active
    ├── gen_conf.py
    └── deploy.py
├── .gitignore
├── conf
    ├── sentinel.conf
    ├── control.sh
    ├── conf.py
    └── redis.conf
├── tests
    └── run.sh
├── lib
    ├── utils.py
    └── monitor.py
└── README.rst


/log/.gitignore:
--------------------------------------------------------------------------------
1 | *
2 | 


--------------------------------------------------------------------------------
/doc/twemproxy-sentinel-cluster.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UlricQin/redis-mgr/master/doc/twemproxy-sentinel-cluster.png


--------------------------------------------------------------------------------
/bin/active:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | eval "$(register-python-argcomplete bin/deploy.py)"
3 | eval "$(register-python-argcomplete ./bin/deploy.py)"
4 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | *.pyc
 2 | log/*.log
 3 | log/*.conf
 4 | list
 5 | t
 6 | conf/conf_inner.py
 7 | script/*
 8 | tmp/*
 9 | data/*
10 | nohup.out
11 | core
12 | 


--------------------------------------------------------------------------------
/conf/sentinel.conf:
--------------------------------------------------------------------------------
 1 | daemonize yes
 2 | port ${port}
 3 | logfile ${logfile}
 4 | pidfile ${pidfile}
 5 | loglevel debug
 6 | 
 7 | # sentinel monitor <master-name> <ip> <redis-port> <quorum>
 8 | #sentinel monitor mymaster 127.0.0.1 6379 2
 9 | #sentinel down-after-milliseconds mymaster 60000
10 | #sentinel failover-timeout mymaster 180000
11 | #sentinel can-failover mymaster yes
12 | #sentinel parallel-syncs mymaster 1
13 | 
14 | 
15 | 
16 | 


--------------------------------------------------------------------------------
/conf/control.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | start() 
 4 | {
 5 |     stop
 6 |     ulimit -c unlimited
 7 | 
 8 |     pushd . > /dev/null
 9 | 
10 |     cd `dirname $$0`
11 |     ${startcmd}
12 |     popd
13 | }
14 | 
15 | stop() 
16 | {
17 |     pkill -f '${runcmd}'
18 | }
19 | 
20 | case C"$$1" in
21 |     C)
22 |         echo "Usage: $$0 {start|stop}"
23 |         ;; 
24 |     Cstart)
25 |         start
26 |         echo "Done!"
27 |         ;;
28 |     Cstop)
29 |         stop
30 |         echo "Done!"
31 |         ;;
32 |     C*)
33 |         echo "Usage: $$0 {start|stop}"
34 |         ;;
35 | esac
36 | 
37 | 
38 | 
39 | 


--------------------------------------------------------------------------------
/tests/run.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #file   : run.sh
 3 | #author : ning
 4 | #date   : 2014-01-06 16:30:21
 5 | 
 6 | CLUSTER='cluster0'
 7 | 
 8 | #test basic
 9 | ./bin/deploy.py $CLUSTER deploy
10 | ./bin/deploy.py $CLUSTER start
11 | ./bin/deploy.py $CLUSTER printcmd
12 | ./bin/deploy.py $CLUSTER status
13 | ./bin/deploy.py $CLUSTER log
14 | ./bin/deploy.py $CLUSTER mastercmd 'PING'
15 | ./bin/deploy.py $CLUSTER rdb
16 | 
17 | #test bench
18 | ./bin/deploy.py $CLUSTER mlive_qps &
19 | ./bin/deploy.py $CLUSTER nbench
20 | pkill -f './bin/deploy.py'
21 | 
22 | #test failover
23 | ./bin/deploy.py $CLUSTER scheduler &
24 | ./bin/deploy.py $CLUSTER randomkill 
25 | pkill -f './bin/deploy.py'
26 | 
27 | ./bin/deploy.py $CLUSTER stop
28 | 
29 | 


--------------------------------------------------------------------------------
/bin/gen_conf.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | #coding: utf-8
 3 | #file   : gen_conf.py
 4 | #author : ning
 5 | #date   : 2013-12-20 09:36:42
 6 | 
 7 | import urllib, urllib2
 8 | import os, sys
 9 | import re, time
10 | import logging
11 | from pcl import common
12 | 
13 | from string import Template as T
14 | T.s = T.substitute
15 | 
16 | BASEDIR = '/tmp/r'
17 | HOSTS = [
18 |         '127.0.1.1',
19 |         '127.0.1.2',
20 |         '127.0.1.3',
21 |         '127.0.1.4',
22 |         ]
23 | MASTER_PER_MACHINE = 2
24 | 
25 | # gen the "redis" section
26 | port = 20000
27 | for i in range(len(HOSTS)):
28 |     for j in range(MASTER_PER_MACHINE):
29 |         slave_port = port + 1000
30 | 
31 |         m = HOSTS[i]
32 |         s = HOSTS[(i+1)%len(HOSTS)]
33 |         templete = "('$m:$port', '$BASEDIR/redis-$port'), ('$s:$slave_port', '$BASEDIR/redis-$slave_port'),"
34 |         print T(templete).s(globals())
35 |         port += 1
36 | 
37 | # gen the "nutcracker" section
38 | port = 22000
39 | for i in range(len(HOSTS)):
40 |     m = HOSTS[i]
41 |     for j in range(MASTER_PER_MACHINE):
42 |         xport = port + j
43 |         templete = "('$m:$xport', '$BASEDIR/nutcracker-$xport'),"
44 |         print T(templete).s(globals())
45 | 
46 | 


--------------------------------------------------------------------------------
/lib/utils.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import re
 3 | import sys
 4 | import time
 5 | import copy
 6 | import thread
 7 | import socket
 8 | import threading
 9 | import logging
10 | import inspect
11 | import argparse
12 | import telnetlib
13 | import redis
14 | import random
15 | import redis
16 | import json
17 | 
18 | from collections import defaultdict
19 | from argparse import RawTextHelpFormatter
20 | 
21 | from pcl import common
22 | from pcl import crontab
23 | from string import Template
24 | 
25 | # we have to do this here, so that lib/monitor.py can use conf.xxx
26 | # import config in conf/REDIS_DEPLOY_CONFIG.py
27 | if 'REDIS_DEPLOY_CONFIG' not in os.environ:
28 |     logging.error('please export REDIS_DEPLOY_CONFIG=conf')
29 |     exit(1)
30 | config_name = os.environ['REDIS_DEPLOY_CONFIG']
31 | conf = __import__(config_name, globals(), locals(), [], 0)        #import config_module
32 | 
33 | common.system('mkdir -p data tmp', None)
34 | 
35 | def my_json_encode(j):
36 |     return json.dumps(j, cls=common.MyEncoder)
37 | 
38 | def strstr(s1, s2):
39 |     return s1.find(s2) != -1
40 | 
41 | def lets_sleep(SLEEP_TIME = 0.1):
42 |     time.sleep(SLEEP_TIME)
43 | 
44 | def TT(template, args): #todo: modify all
45 |     return Template(template).substitute(args)
46 | 
47 | # vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4
48 | 


--------------------------------------------------------------------------------
/conf/conf.py:
--------------------------------------------------------------------------------
 1 | #coding: utf-8
 2 | #the port: role: x, cluster_id: x, instance:xx
 3 | #       2        0              x           xx
 4 | 
 5 | #redis-master   20xxx
 6 | #redis-slave    21xxx
 7 | #proxy          22xxx 23xxx(status-port)
 8 | #sentinel       29xxx
 9 | 
10 | #we will generate:
11 | #port
12 | #pidfile
13 | #logfile
14 | #dir
15 | 
16 | #path in the deploy machine
17 | BINARYS = {
18 |     'REDIS_SERVER_BINS' : '/home/ning/idning-github/redis/src/redis-*',
19 |     'REDIS_CLI' : '/home/ning/idning-github/redis/src/redis-cli',
20 |     'REDIS_SENTINEL_BINS' : '/home/ning/idning-github/redis/src/redis-sentinel',
21 |     'NUTCRACKER_BINS' : '/home/ning/Desktop/t/nutcracker-0.2.4/output/bin/nutcracker',
22 | }
23 | 
24 | RDB_SLEEP_TIME = 1
25 | 
26 | #optional
27 | REDIS_MONITOR_EXTRA = {
28 |     'used_cpu_user':              (0, 50),
29 | }
30 | 
31 | #optional
32 | NUTCRACKER_MONITOR_EXTRA = {
33 |     'client_connections':  (0, 10),
34 |     "forward_error_INC":   (0, 1000),  # in every minute
35 |     "client_err_INC":      (0, 1000),  # in every minute
36 |     'in_queue':            (0, 10),
37 |     'out_queue':           (0, 10),
38 | }
39 | 
40 | cluster0 = {
41 |     'cluster_name': 'cluster0',
42 |     'user': 'ning',
43 |     'sentinel':[
44 |         ('127.0.0.5:29001', '/tmp/r/sentinel-29001'),
45 |         ('127.0.0.5:29002', '/tmp/r/sentinel-29002'),
46 |         ('127.0.0.5:29003', '/tmp/r/sentinel-29003'),
47 |     ],
48 |     'redis': [
49 |         # master(host:port, install path)       ,  slave(host:port, install path)
50 |         ('127.0.0.5:20000', '/tmp/r/redis-20000'), ('127.0.0.5:21000', '/tmp/r/redis-21000'),
51 |         ('127.0.0.5:20001', '/tmp/r/redis-20001'), ('127.0.0.5:21001', '/tmp/r/redis-21001'),
52 |         ('127.0.0.5:20002', '/tmp/r/redis-20002'), ('127.0.0.5:21002', '/tmp/r/redis-21002'),
53 |         ('127.0.0.5:20003', '/tmp/r/redis-20003'), ('127.0.0.5:21003', '/tmp/r/redis-21003'),
54 |     ],
55 |     'nutcracker': [
56 |         ('127.0.0.5:22000', '/tmp/r/nutcracker-22000'),
57 |         ('127.0.0.5:22001', '/tmp/r/nutcracker-22001'),
58 |         ('127.0.0.5:22002', '/tmp/r/nutcracker-22002'),
59 |     ],
60 | }
61 | 
62 | 
63 | 


--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
  1 | deploy.py
  2 | =========
  3 | 
  4 | this script will deploy a redis cluster in ``10 minutes`` with:
  5 | 
  6 | - redis
  7 | - redis-sentinel
  8 | - twemproxy
  9 | 
 10 | you can deploy/start/stop/run_rediscmd/get status/reconfig proxy ... 
 11 | 
 12 | config
 13 | ------
 14 | 
 15 | ::
 16 | 
 17 |     cluster0 = {
 18 |         'cluster_name': 'cluster0',
 19 |         'user': 'ning',
 20 |         'sentinel':[
 21 |             ('127.0.0.5:29001', '/tmp/r/sentinel-29001'),
 22 |             ('127.0.0.5:29002', '/tmp/r/sentinel-29002'),
 23 |             ('127.0.0.5:29003', '/tmp/r/sentinel-29003'),
 24 |         ],
 25 |         'redis': [
 26 |             # master(host:port, install path)       ,  slave(host:port, install path)
 27 |             ('127.0.0.5:20000', '/tmp/r/redis-20000'), ('127.0.0.5:21000', '/tmp/r/redis-21000'),
 28 |             ('127.0.0.5:20001', '/tmp/r/redis-20001'), ('127.0.0.5:21001', '/tmp/r/redis-21001'),
 29 |             ('127.0.0.5:20002', '/tmp/r/redis-20002'), ('127.0.0.5:21002', '/tmp/r/redis-21002'),
 30 |             ('127.0.0.5:20003', '/tmp/r/redis-20003'), ('127.0.0.5:21003', '/tmp/r/redis-21003'),
 31 |         ],
 32 |         'nutcracker': [
 33 |             ('127.0.0.5:22000', '/tmp/r/nutcracker-22000'),
 34 |             ('127.0.0.5:22001', '/tmp/r/nutcracker-22001'),
 35 |             ('127.0.0.5:22002', '/tmp/r/nutcracker-22002'),
 36 |         ],
 37 |     }
 38 | 
 39 | this will gen ``sentinel``  config::
 40 | 
 41 |     sentinel monitor cluster0-20000 127.0.0.5 20000 2
 42 |     sentinel down-after-milliseconds  cluster0-20000 60000
 43 |     sentinel failover-timeout cluster0-20000 180000
 44 |     sentinel parallel-syncs cluster0-20000 1
 45 |             
 46 |     sentinel monitor cluster0-20001 127.0.0.5 20001 2
 47 |     sentinel down-after-milliseconds  cluster0-20001 60000
 48 |     sentinel failover-timeout cluster0-20001 180000
 49 |     sentinel parallel-syncs cluster0-20001 1
 50 | 
 51 | and ``twemproxy`` config::
 52 | 
 53 |     cluster0:
 54 |       listen: 127.0.0.5:22000
 55 |       hash: fnv1a_64
 56 |       distribution: modula
 57 |       preconnect: true
 58 |       auto_eject_hosts: false
 59 |       redis: true
 60 |       backlog: 512
 61 |       client_connections: 0
 62 |       server_connections: 1
 63 |       server_retry_timeout: 2000
 64 |       server_failure_limit: 2
 65 |       servers:
 66 |         - 127.0.0.5:20000:1 cluster0-20000
 67 |         - 127.0.0.5:20001:1 cluster0-20001
 68 | 
 69 | the name ``cluster0-20000`` is named by the orig master, 
 70 | if slave use a different port, the server ``host:port``  of ``cluster0-20000`` can be ``127.0.0.5:20000`` or ``127.0.0.5:21000``
 71 | 
 72 | usage
 73 | -----
 74 | 
 75 | choose your config filename::
 76 | 
 77 |     export REDIS_DEPLOY_CONFIG=conf && . bin/active
 78 | 
 79 | ::
 80 | 
 81 |     $ ./bin/deploy.py -h
 82 |     usage: deploy.py [-h] [-v] [-o LOGFILE] clustername op [cmd]
 83 | 
 84 |     positional arguments:
 85 |       clustername           cluster target 
 86 |       op                    aof_rewrite     : None
 87 |                             deploy          : deploy the binarys and config file (redis/sentinel/nutcracker) in this cluster
 88 |                             kill            : kill all instance(redis/sentinel/nutcracker) in this cluster
 89 |                             log             : show log of all instance(redis/sentinel/nutcracker) in this cluster
 90 |                             master_memory   : show used_memory_human:1.53M
 91 |                             master_qps      : instantaneous_ops_per_sec:4
 92 |                             mastercmd cmd   : run redis command against all redis Master instance, like 'INFO, GET xxxx'
 93 |                             monitor         : monitor status of the cluster
 94 |                             printcmd        : print the start/stop cmd of instance
 95 |                             rdb             : do rdb in all redis instance
 96 |                             reconfig_proxy  : None
 97 |                             rediscmd cmd    : run redis command against all redis instance, like 'INFO, GET xxxx'
 98 |                             start           : start all instance(redis/sentinel/nutcracker) in this cluster
 99 |                             status          : get status of all instance(redis/sentinel/nutcracker) in this cluster
100 |                             stop            : stop all instance(redis/sentinel/nutcracker) in this cluster
101 |       cmd                   the redis/ssh cmd like "INFO"
102 | 
103 | start cluster::
104 | 
105 |     $ ./bin/deploy.py cluster0 deploy
106 | 
107 |     $ ./bin/deploy.py cluster0 start
108 |     2013-12-26 14:47:47,385 [MainThread] [NOTICE] start redis
109 |     2013-12-26 14:47:47,622 [MainThread] [INFO] [redis:127.0.0.5:20000] start ok in 0.23 seconds
110 |     2013-12-26 14:47:47,848 [MainThread] [INFO] [redis:127.0.0.5:21000] start ok in 0.22 seconds
111 |     2013-12-26 14:47:48,099 [MainThread] [INFO] [redis:127.0.0.5:20001] start ok in 0.24 seconds
112 |     2013-12-26 14:47:48,369 [MainThread] [INFO] [redis:127.0.0.5:21001] start ok in 0.27 seconds
113 |     2013-12-26 14:47:50,788 [MainThread] [NOTICE] start sentinel
114 |     2013-12-26 14:47:51,186 [MainThread] [INFO] [sentinel:127.0.0.5:29001] start ok in 0.39 seconds
115 |     2013-12-26 14:47:51,452 [MainThread] [INFO] [sentinel:127.0.0.5:29002] start ok in 0.26 seconds
116 |     2013-12-26 14:47:51,820 [MainThread] [INFO] [sentinel:127.0.0.5:29003] start ok in 0.35 seconds
117 |     2013-12-26 14:47:51,820 [MainThread] [NOTICE] start nutcracker
118 |     2013-12-26 14:47:52,082 [MainThread] [INFO] [nutcracker:127.0.0.5:22000] start ok in 0.26 seconds
119 |     2013-12-26 14:47:52,364 [MainThread] [INFO] [nutcracker:127.0.0.5:22001] start ok in 0.28 seconds
120 |     2013-12-26 14:47:52,573 [MainThread] [INFO] [nutcracker:127.0.0.5:22002] start ok in 0.21 seconds
121 |     2013-12-26 14:47:52,573 [MainThread] [NOTICE] setup master->slave
122 |     2013-12-26 14:47:52,580 [MainThread] [INFO] setup [redis:127.0.0.5:20000]->[redis:127.0.0.5:21000]
123 |     2013-12-26 14:47:52,580 [MainThread] [INFO] [redis:127.0.0.5:21000] /home/ning/idning-github/redis/src/redis-cli -h 127.0.0.5 -p 21000 SLAVEOF 127.0.0.5 20000
124 |     OK
125 |     ...
126 | 
127 | run cmd on each master::
128 | 
129 |     $ ./bin/deploy.py cluster0 mastercmd 'get "hello"'
130 |     2013-12-24 13:51:39,748 [MainThread] [INFO] [RedisServer:127.0.0.5:20000]: get "hello"
131 |     [RedisServer:127.0.0.5:20000] xxxxx
132 |     2013-12-24 13:51:39,752 [MainThread] [INFO] [RedisServer:127.0.0.5:20001]: get "hello"
133 |     [RedisServer:127.0.0.5:20001] 
134 |     2013-12-24 13:51:39,756 [MainThread] [INFO] [RedisServer:127.0.0.5:20002]: get "hello"
135 |     [RedisServer:127.0.0.5:20002] 
136 |     2013-12-24 13:51:39,760 [MainThread] [INFO] [RedisServer:127.0.0.5:20003]: get "hello"
137 |     [RedisServer:127.0.0.5:20003] world
138 | 
139 | dump rdb::
140 | 
141 |     $ ./bin/deploy.py cluster0 rdb
142 | 
143 | monitor qps/memory::
144 | 
145 |     $ ./bin/deploy.py cluster0 mq
146 |     2013-12-24 14:21:05,841 [MainThread] [INFO] start running: ./bin/deploy.py -v cluster0 mq
147 |     2013-12-24 14:21:05,842 [MainThread] [INFO] Namespace(cmd=None, logfile='log/deploy.log', op='mq', target='cluster0', verbose=1)
148 |     20000 20001 20002 20003
149 |         6     5     5     6
150 |         6     6     5     6
151 |         6     6     5     6
152 |      4741     6     6     6
153 |     33106     5     5     6
154 |     46639     8     7     7
155 |     42265     6     5     7
156 | 
157 | run benchmark::
158 | 
159 |     $ ./bin/deploy.py cluster_offline0 bench
160 |     $ ./bin/deploy.py cluster_offline0 mbench
161 | 
162 | modify config::
163 | 
164 |     $ ./bin/deploy.py cluster_offline0 mastercmd ' CONFIG GET save' -v
165 |     $ ./bin/deploy.py cluster_offline0 mastercmd 'CONFIG SET save "10000 1000000"' -v
166 | 
167 | enable auto-complete
168 | ====================
169 | ::
170 | 
171 |     export REDIS_DEPLOY_CONFIG=conf
172 | 
173 |     pip install argcomplete
174 |     $ . ./bin/active
175 | 
176 |     ning@ning-laptop ~/idning-github/redis-mgr$ ./bin/deploy.py cluster0 r<TAB>
177 |     randomkill     rdb            reconfigproxy  rediscmd       
178 | 
179 | 
180 | gen_conf
181 | ========
182 | 
183 | use the config::
184 | 
185 |     BASEDIR = '/tmp/r'
186 |     HOSTS = [
187 |             '127.0.1.1',
188 |             '127.0.1.2',
189 |             '127.0.1.3',
190 |             '127.0.1.4',
191 |             ]
192 |     MASTER_PER_MACHINE = 2
193 |     SLAVE_PORT_INCREASE = 10000
194 | 
195 | it will gen the deploy.py config like this:
196 | 
197 | .. image:: doc/twemproxy-sentinel-cluster.png
198 | 
199 | Dependency
200 | ==========
201 | 
202 | - pcl: https://github.com/idning/pcl
203 | - redis-py: https://github.com/andymccurdy/redis-py
204 | - argcomplete (optional): https://github.com/kislyuk/argcomplete
205 | 
206 | Authors
207 | =======
208 | 
209 | - @idning
210 | - @cen-li
211 | 
212 | TODO
213 | ====
214 | 
215 | 1. schedular for many clusters, we will need it!
216 | 2. SLOW LOG monitor
217 | 3. #live monitor for nutcracker
218 | 4. #nc to get nutcracker status will fail in background::
219 | 
220 |       nohup ./bin/deploy.py cluster0 scheduler  &
221 | 
222 |    we use telnetlib instead
223 | 
224 | 
225 | https://github.com/idning/redis-mgr
226 | 
227 | 


--------------------------------------------------------------------------------
/lib/monitor.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | #coding: utf-8
  3 | 
  4 | from utils import *
  5 | 
  6 | PWD = os.path.dirname(os.path.realpath(__file__))
  7 | 
  8 | class BenchThread(threading.Thread):
  9 |     def __init__ (self, redis, cmd):
 10 |         threading.Thread.__init__(self)
 11 |         self.redis = redis
 12 |         self.cmd = cmd
 13 |     def run(self):
 14 |         self.redis._bench(self.cmd)
 15 | 
 16 | class Benchmark():
 17 |     def nbench(self):
 18 |         '''
 19 |         run benchmark against nutcracker
 20 |         '''
 21 |         for s in self.all_nutcracker:
 22 |             cmd = TT('bin/redis-benchmark --csv -h $host -p $port -r 100000 -t set,get -n 10000000 -c 100 ', s.args)
 23 |             BenchThread(random.choice(self._active_masters()), cmd).start()
 24 | 
 25 |     def mbench(self):
 26 |         '''
 27 |         run benchmark against redis master
 28 |         '''
 29 |         for s in self._active_masters():
 30 |             cmd = TT('bin/redis-benchmark --csv -h $host -p $port -r 100000 -t set,get -n 10000000 -c 100 ', s.args)
 31 |             BenchThread(s, cmd).start()
 32 | 
 33 |     def stopbench(self):
 34 |         '''
 35 |         you will need this for stop benchmark
 36 |         '''
 37 |         return self.sshcmd("pkill -f 'bin/redis-benchmark'")
 38 | 
 39 | class Monitor():
 40 |     def _live_nutcracker(self, what, format_func = lambda x:x):
 41 |         
 42 |         for i in xrange(1000*1000):
 43 |             if i%10 == 0:
 44 |                 self.all_nutcracker
 45 |                 header = common.to_blue(' '.join(['%5s' % s.args['port'] for s in self.all_nutcracker]))
 46 |                 print header
 47 | 
 48 |             def get_v(s):
 49 |                 info = s._info_dict()[self.args['cluster_name']]
 50 |                 if what not in info:
 51 |                     return '-'
 52 |                 return format_func(info[what])
 53 | 
 54 |             print ' '.join([ '%5s' % get_v(s) for s in self.all_nutcracker]) + '\t' + common.format_time(None, '%X')
 55 |             
 56 |             time.sleep(1)
 57 | 
 58 |     def _live_redis(self, what, format_func = lambda x:x):
 59 |         masters = self._active_masters()
 60 |         for i in xrange(1000*1000):
 61 |             if i%10 == 0:
 62 |                 old_masters = masters
 63 |                 masters = self._active_masters()
 64 | 
 65 |                 old_masters_list = [str(m) for m in old_masters]
 66 |                 masters_list = [str(m) for m in masters]
 67 | 
 68 |                 if masters_list == old_masters_list: 
 69 |                     header = common.to_blue(' '.join(['%5s' % s.args['port'] for s in masters]))
 70 |                 else:
 71 |                     header = common.to_red(' '.join(['%5s' % s.args['port'] for s in masters]))
 72 |                 print header
 73 |             def get_v(s):
 74 |                 info = s._info_dict()
 75 |                 if what not in info:
 76 |                     return '-'
 77 |                 return format_func(info[what])
 78 |             print ' '.join([ '%5s' % get_v(s) for s in masters]) + '\t' + common.format_time(None, '%X')
 79 |             
 80 |             time.sleep(1)
 81 | 
 82 |     def mlive_mem(self):
 83 |         '''
 84 |         monitor used_memory_human:1.53M of master
 85 |         '''
 86 |         def format(s):
 87 |             return re.sub('\.\d+', '', s) # 221.53M=>221M
 88 |         self._live_redis('used_memory_human', format)
 89 | 
 90 |     def mlive_qps(self):
 91 |         '''
 92 |         monitor instantaneous_ops_per_sec of master
 93 |         '''
 94 |         self._live_redis('instantaneous_ops_per_sec')
 95 | 
 96 |     def nlive_request(self):
 97 |         '''
 98 |         monitor nutcracker requests/s
 99 |         '''
100 |         self._live_nutcracker('requests_INC')
101 | 
102 |     def nlive_forward_error(self):
103 |         '''
104 |         monitor nutcracker forward_error/s
105 |         '''
106 |         self._live_nutcracker('forward_error_INC')
107 | 
108 |     def nlive_inqueue(self):
109 |         '''
110 |         monitor nutcracker forward_error/s
111 |         '''
112 |         self._live_nutcracker('in_queue')
113 | 
114 |     def nlive_outqueue(self):
115 |         '''
116 |         monitor nutcracker forward_error/s
117 |         '''
118 |         self._live_nutcracker('out_queue')
119 | 
120 |     def _monitor(self):
121 |         '''
122 |         - redis 
123 |             - connected_clients
124 |             - mem
125 |             - rdb_last_bgsave_time_sec:0
126 |             - aof_last_rewrite_time_sec:0
127 |             - latest_fork_usec
128 |             - slow log
129 |             - hitrate
130 |             - master_link_status:down
131 |         - nutcracker
132 |             - all config of nutcracker is the same
133 |             - forward_error
134 |             - server_err
135 |             - in_queue/out_queue
136 | 
137 |         save this to a file , in one line: 
138 |         {
139 |             'ts': xxx, 
140 |             'timestr': xxx, 
141 |             'infos': {
142 |                 '[redis:host:port]': {info}
143 |                 '[redis:host:port]': {info}
144 |                 '[nutcracker:host:port]': {info}
145 |             },
146 |         }
147 |         '''
148 |         now = time.time()
149 | 
150 |         infos = {}
151 |         for r in self.all_redis + self.all_sentinel + self.all_nutcracker:
152 |             infos[str(r)] = r._info_dict()
153 |         self._check_warning(infos)
154 | 
155 |         ret = {
156 |             'ts': now, 
157 |             'timestr': common.format_time_to_min(now), 
158 |             'infos': infos,
159 |         }
160 | 
161 |         DIR = os.path.join(PWD, '../data')
162 |         STAT_LOG = os.path.join(DIR, 'statlog.%s' % common.format_time(now, '%Y%m%d%H'))
163 |         common.system('mkdir -p %s' % DIR, None)
164 | 
165 |         fout = file(STAT_LOG, 'a+')
166 |         print >> fout, my_json_encode(ret)
167 |         fout.close()
168 |         timeused = time.time() - now
169 |         logging.notice("monitor @ ts: %s, timeused: %.2fs" % (common.format_time_to_min(now), timeused))
170 | 
171 |     def _check_warning(self, infos):
172 |         def match(val, expr):
173 |             if type(expr) == set:
174 |                 return val in expr
175 |             _min, _max = expr
176 |             return _min <= float(val) <= _max
177 | 
178 |         def check_redis(node, info):
179 |             if not info or 'uptime_in_seconds' not in info:
180 |                 logging.warn('%s is down' % node)
181 |             now = time.time()
182 |             redis_spec = {
183 |                     'connected_clients':          (0, 1000),
184 |                     'used_memory_peak' :          (0, 5*(2**30)),
185 |                     'rdb_last_bgsave_time_sec':   (0, 1),
186 |                     'aof_last_rewrite_time_sec':  (0, 1),
187 |                     'latest_fork_usec':           (0, 100*1000), #100ms
188 |                     'master_link_status':         set(['up']),
189 |                     'rdb_last_bgsave_status':     set(['ok']),
190 |                     'rdb_last_save_time':         (now-25*60*60, now),
191 |                     #- hit_rate
192 |                     #- slow log
193 |                 }
194 |             if 'REDIS_MONITOR_EXTRA' in dir(conf):
195 |                 redis_spec.update(conf.REDIS_MONITOR_EXTRA)
196 | 
197 |             for k, expr in redis_spec.items():
198 |                 if k in info and not match(info[k], expr):
199 |                     logging.warn('%s.%s is:\t %s, not in %s' % (node, k, info[k], expr))
200 | 
201 | 
202 |         def check_nutcracker(node, info):
203 |             '''
204 |             see NutCracker._info_dict() for fields
205 |             '''
206 |             if not info or 'uptime' not in info:
207 |                 logging.warn('%s is down' % node)
208 | 
209 |             nutcracker_cluster_spec = {
210 |                     'client_connections':  (0, 10000),
211 |                     "forward_error_INC":   (0, 1000),  # in every minute
212 |                     "client_err_INC":      (0, 1000),  # in every minute
213 |                     'in_queue':            (0, 1000),
214 |                     'out_queue':           (0, 1000),
215 |             }
216 |             if 'NUTCRACKER_MONITOR_EXTRA' in dir(conf):
217 |                 nutcracker_cluster_spec.update(conf.NUTCRACKER_MONITOR_EXTRA)
218 | 
219 |             #got info of this cluster
220 |             info = info[self.args['cluster_name']]
221 |             for k, expr in nutcracker_cluster_spec.items():
222 |                 if k in info and not match(info[k], expr):
223 |                     logging.warn('%s.%s is:\t %s, not in %s' % (node, k, info[k], expr))
224 |         
225 | 
226 |         for node, info in infos.items():
227 |             if strstr(node, 'redis'):
228 |                 check_redis(node, info)
229 |             if strstr(node, 'nutcracker'):
230 |                 check_nutcracker(node, info)
231 | 
232 |     def monitor(self):
233 |         '''
234 |         a long time running monitor task, write WARN log on bad things happend
235 |         '''
236 |         while True:
237 |             self._monitor()
238 |             time.sleep(60)
239 | 
240 |     def scheduler(self):
241 |         '''
242 |         start following threads:
243 |             - failover 
244 |             - cron of monitor
245 |             - cron of rdb 
246 |             = graph web server
247 |         '''
248 |         thread.start_new_thread(self.failover, ())
249 | 
250 |         cron = crontab.Cron()
251 |         cron.add('* * * * *'   , self._monitor) # every minute
252 |         cron.add('0 3 * * *' , self.rdb, use_thread=True)                # every day
253 |         cron.add('0 5 * * *' , self.aof_rewrite, use_thread=True)        # every day
254 |         cron.run()
255 | 
256 | # vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4
257 | 


--------------------------------------------------------------------------------
/bin/deploy.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | #coding: utf-8
  3 | 
  4 | import os
  5 | import sys
  6 | 
  7 | PWD = os.path.dirname(os.path.realpath(__file__))
  8 | WORKDIR = os.path.join(PWD,  '../')
  9 | sys.path.append(os.path.join(WORKDIR, 'lib/'))
 10 | sys.path.append(os.path.join(WORKDIR, 'conf/'))
 11 | 
 12 | from utils import *
 13 | from monitor import Monitor, Benchmark
 14 | 
 15 | class Base:
 16 |     '''
 17 |     the sub class should implement: _alive, _pre_deploy, status, and init self.args
 18 |     '''
 19 |     def __init__(self, name, user, host_port, path):
 20 |         self.args = {
 21 |             'name'      : name,
 22 |             'user'      : user,
 23 |             'host'      : socket.gethostbyname(host_port.split(':')[0]),
 24 |             'port'      : int(host_port.split(':')[1]),
 25 |             'path'      : path,
 26 | 
 27 |             'localdir'  : '',     #files to deploy
 28 | 
 29 |             'startcmd'  : '',     #startcmd and runcmd will used to generate the control script
 30 |             'runcmd'    : '',
 31 |             'logfile'   : '',
 32 |         }
 33 | 
 34 |     def __str__(self):
 35 |         return TT('[$name:$host:$port]', self.args)
 36 | 
 37 |     def deploy(self):
 38 |         logging.info('deploy %s' % self)
 39 |         self.args['localdir'] = TT('tmp/$name-$host-$port', self.args)
 40 |         self._run(TT('mkdir -p $localdir/bin && mkdir -p $localdir/conf && mkdir -p $localdir/log && mkdir -p $localdir/data ', self.args))
 41 | 
 42 |         self._pre_deploy()
 43 |         self._gen_control_script()
 44 |         self._init_dir()
 45 | 
 46 |         cmd = TT('rsync -ravP $localdir/ $user@$host:$path 1>/dev/null 2>/dev/null', self.args)
 47 |         self._run(cmd)
 48 | 
 49 |     def _gen_control_script(self):
 50 |         content = file('conf/control.sh').read()
 51 |         content = TT(content, self.args)
 52 | 
 53 |         control_filename = TT('${localdir}/${name}_control', self.args)
 54 | 
 55 |         fout = open(control_filename, 'w+')
 56 |         fout.write(content)
 57 |         fout.close()
 58 |         os.chmod(control_filename, 0755)
 59 | 
 60 |     def start(self):
 61 |         if self._alive():
 62 |             logging.warn('%s already running' %(self) )
 63 |             return
 64 | 
 65 |         logging.debug('starting %s' % self)
 66 |         t1 = time.time()
 67 |         sleeptime = .1
 68 |         self._run(self._remote_start_cmd())
 69 | 
 70 |         while not self._alive():
 71 |             lets_sleep(sleeptime)
 72 |             if sleeptime < 5:
 73 |                 sleeptime *= 2
 74 |             else:
 75 |                 sleeptime = 5
 76 |                 logging.warn('%s still not alive' % self)
 77 | 
 78 |         t2 = time.time()
 79 |         logging.info('%s start ok in %.2f seconds' %(self, t2-t1) )
 80 | 
 81 |     def stop(self):
 82 |         if not self._alive():
 83 |             logging.warn('%s already stop' %(self) )
 84 |             return
 85 | 
 86 |         self._run(self._remote_stop_cmd())
 87 |         t1 = time.time()
 88 |         while self._alive():
 89 |             lets_sleep()
 90 |         t2 = time.time()
 91 |         logging.info('%s stop ok in %.2f seconds' %(self, t2-t1) )
 92 | 
 93 |     def printcmd(self):
 94 |         print common.to_blue(self), self._remote_start_cmd()
 95 | 
 96 |     def status(self):
 97 |         logging.warn("status: not implement")
 98 | 
 99 |     def log(self):
100 |         cmd = TT('tail $logfile', self.args)
101 |         logging.info('log of %s' % self)
102 |         print self._run(self._remote_cmd(cmd))
103 | 
104 |     def _bench(self, cmd):
105 |         '''
106 |         run a benchmark cmd on this remote machine
107 |         '''
108 |         remote_cmd = self._remote_cmd(cmd)
109 |         logging.info(remote_cmd)
110 |         #common.system_bg(remote_cmd, logging.debug)
111 |         print self._run(remote_cmd)
112 | 
113 |     def _alive(self):
114 |         logging.warn("_alive: not implement")
115 | 
116 |     def _init_dir(self):
117 |         raw_cmd = TT('mkdir -p $path', self.args)
118 |         self._run(self._remote_cmd(raw_cmd, chdir=False))
119 | 
120 |     def _remote_start_cmd(self):
121 |         cmd = TT("./${name}_control start", self.args)
122 |         return self._remote_cmd(cmd)
123 | 
124 |     def _remote_stop_cmd(self):
125 |         cmd = TT("./${name}_control stop", self.args)
126 |         return self._remote_cmd(cmd)
127 | 
128 |     def _remote_cmd(self, raw_cmd, chdir=True):
129 |         if raw_cmd.find('"') >= 0:
130 |             raise Exception('bad cmd: ' + raw_cmd)
131 |         args = copy.deepcopy(self.args)
132 |         args['cmd'] = raw_cmd
133 |         if chdir:
134 |             return TT('ssh -n -f $user@$host "cd $path && $cmd"', args)
135 |         else:
136 |             return TT('ssh -n -f $user@$host "$cmd"', args)
137 | 
138 |     def _run(self, raw_cmd):
139 |         ret = common.system(raw_cmd, logging.debug)
140 |         logging.debug('return : [%d] [%s] ' % (len(ret), common.shorten(ret)) )
141 |         return ret
142 | 
143 | 
144 | class RedisServer(Base):
145 |     def __init__(self, user, host_port, path):
146 |         Base.__init__(self, 'redis', user, host_port, path)
147 | 
148 |         self.args['startcmd'] = TT('bin/redis-server conf/redis.conf', self.args)
149 |         self.args['runcmd']   = TT('redis-server \*:$port', self.args)
150 | 
151 |         self.args['conf']     = TT('$path/conf/redis.conf', self.args)
152 |         self.args['pidfile']  = TT('$path/log/redis.pid', self.args)
153 |         self.args['logfile']  = TT('$path/log/redis.log', self.args)
154 |         self.args['dir']      = TT('$path/data', self.args)
155 | 
156 |         self.args['REDIS_CLI'] = conf.BINARYS['REDIS_CLI']
157 | 
158 |     def _info_dict(self):
159 |         cmd = TT('$REDIS_CLI -h $host -p $port INFO', self.args)
160 |         info = self._run(cmd)
161 | 
162 |         info = [line.split(':', 1) for line in info.split('\r\n') if not line.startswith('#')]
163 |         info = [i for i in info if len(i)>1]
164 |         return defaultdict(str, info) #this is a defaultdict, be Notice
165 | 
166 |     def _ping(self):
167 |         cmd = TT('$REDIS_CLI -h $host -p $port PING', self.args)
168 |         return self._run(cmd)
169 | 
170 |     def _alive(self):
171 |         return strstr(self._ping(), 'PONG')
172 | 
173 |     def _gen_conf(self):
174 |         content = file('conf/redis.conf').read()
175 |         return TT(content, self.args)
176 | 
177 |     def _pre_deploy(self):
178 |         self.args['BINS'] = conf.BINARYS['REDIS_SERVER_BINS']
179 |         self._run(TT('cp $BINS $localdir/bin/', self.args))
180 | 
181 |         fout = open(TT('$localdir/conf/redis.conf', self.args), 'w+')
182 |         fout.write(self._gen_conf())
183 |         fout.close()
184 | 
185 |     def status(self):
186 |         uptime = self._info_dict()['uptime_in_seconds']
187 |         if uptime:
188 |             logging.info('%s uptime %s seconds' % (self, uptime))
189 |         else:
190 |             logging.error('%s is down' % self)
191 | 
192 |     def isslaveof(self, master_host, master_port):
193 |         info = self._info_dict()
194 |         if info['master_host'] == master_host and int(info['master_port']) == master_port:
195 |             logging.debug('already slave of %s:%s' % (master_host, master_port))
196 |             return True
197 | 
198 |     def slaveof(self, master_host, master_port):
199 |         cmd = 'SLAVEOF %s %s' % (master_host, master_port)
200 |         return self.rediscmd(cmd)
201 | 
202 |     def rediscmd(self, cmd):
203 |         args = copy.deepcopy(self.args)
204 |         args['cmd'] = cmd
205 |         cmd = TT('$REDIS_CLI -h $host -p $port $cmd', args)
206 |         logging.info('%s %s' % (self, cmd))
207 |         print self._run(cmd)
208 | 
209 | 
210 | class Sentinel(RedisServer):
211 |     def __init__(self, user, host_port, path, masters):
212 |         RedisServer.__init__(self, user, host_port, path)
213 | 
214 |         self.args['startcmd'] = TT('bin/redis-sentinel conf/sentinel.conf', self.args)
215 |         self.args['runcmd']   = TT('redis-sentinel \*:$port', self.args)
216 | 
217 |         self.args['conf']     = TT('$path/conf/sentinel.conf', self.args)
218 |         self.args['pidfile']  = TT('$path/log/sentinel.pid', self.args)
219 |         self.args['logfile']  = TT('$path/log/sentinel.log', self.args)
220 | 
221 |         self.args['name']     = 'sentinel'
222 |         self.masters = masters
223 | 
224 |     def _gen_conf_section(self):
225 |         template = '''\
226 | sentinel monitor $server_name $host $port 2
227 | sentinel down-after-milliseconds  $server_name 60000
228 | sentinel failover-timeout $server_name 180000
229 | sentinel parallel-syncs $server_name 1
230 |         '''
231 |         cfg = '\n'.join([TT(template, master.args) for master in self.masters])
232 |         return cfg
233 | 
234 |     def _gen_conf(self):
235 |         content = file('conf/sentinel.conf').read()
236 |         content = TT(content, self.args)
237 |         return content + self._gen_conf_section()
238 | 
239 |     def _pre_deploy(self):
240 |         self.args['BINS'] = conf.BINARYS['REDIS_SENTINEL_BINS']
241 |         self._run(TT('cp $BINS $localdir/bin/', self.args))
242 | 
243 |         fout = open(TT('$localdir/conf/sentinel.conf', self.args), 'w+')
244 |         fout.write(self._gen_conf())
245 |         fout.close()
246 | 
247 |     def get_masters(self):
248 |         '''return currnet master list of (host:port, name)'''
249 |         conn = redis.Redis(self.args['host'], self.args['port'])
250 |         masters = conn.sentinel_masters()
251 |         logging.debug('sentinel got masters: %s' % masters)
252 |         return [('%s:%s' % (m['ip'], m['port']), m['name']) for m in masters.values()]
253 | 
254 |     def get_failover_event(self):
255 |         self._sub = redis.Redis(self.args['host'], self.args['port']).pubsub()
256 |         self._sub.subscribe('+switch-master')
257 |         logging.info('subscribe +switch-master on %s' % self)
258 |         iterator = self._sub.listen()
259 |         if next(iterator)['channel'] != '+switch-master':
260 |             raise Exception('error on subscribe')
261 | 
262 |         for msg in iterator:
263 |             logging.info('got msg: %s' % msg)
264 |             yield msg
265 | 
266 | class NutCracker(Base):
267 |     def __init__(self, user, host_port, path, masters):
268 |         Base.__init__(self, 'nutcracker', user, host_port, path)
269 | 
270 |         self.masters = masters
271 | 
272 |         self.args['conf']        = TT('$path/conf/nutcracker.conf', self.args)
273 |         self.args['pidfile']     = TT('$path/log/nutcracker.pid', self.args)
274 |         self.args['logfile']     = TT('$path/log/nutcracker.log', self.args)
275 |         self.args['status_port'] = self.args['port'] + 1000
276 | 
277 |         self.args['startcmd']    = TT('bin/nutcracker -d -c $conf -o $logfile -p $pidfile -s $status_port', self.args)
278 |         self.args['runcmd']      = self.args['startcmd']
279 |         self._last_info = None
280 | 
281 |     def _alive(self):
282 |         return self._info_dict()
283 | 
284 |     def _gen_conf_section(self):
285 |         template = '    - $host:$port:1 $server_name'
286 |         cfg = '\n'.join([TT(template, master.args) for master in self.masters])
287 |         return cfg
288 | 
289 |     def _gen_conf(self):
290 |         content = '''
291 | $cluster_name:
292 |   listen: 0.0.0.0:$port
293 |   hash: fnv1a_64
294 |   distribution: modula
295 |   preconnect: true
296 |   auto_eject_hosts: false
297 |   redis: true
298 |   backlog: 512
299 |   timeout: 400
300 |   client_connections: 0
301 |   server_connections: 1
302 |   server_retry_timeout: 2000
303 |   server_failure_limit: 2
304 |   servers:
305 | '''
306 |         content = TT(content, self.args)
307 |         return content + self._gen_conf_section()
308 | 
309 |     def _pre_deploy(self):
310 |         self.args['BINS'] = conf.BINARYS['NUTCRACKER_BINS']
311 |         self._run(TT('cp $BINS $localdir/bin/', self.args))
312 | 
313 |         fout = open(TT('$localdir/conf/nutcracker.conf', self.args), 'w+')
314 |         fout.write(self._gen_conf())
315 |         fout.close()
316 | 
317 |     def _info_dict(self):
318 |         '''
319 |                                                         | We will add fields in the info dict
320 |         "uptime": 370,                                  |
321 |         "timestamp": 1389231960,                        | timestamp_INC
322 |         ....                                            |
323 |         "cluster0": {                                   |
324 |             "client_connections": 100,                  | 
325 |             "client_eof": 500,                          |
326 |             "forward_error": 0,                         | calc forward_error_INC
327 |             "client_err": 0,                            | calc client_err_INC
328 |             "fragments": 0,                             |  
329 |             "server_ejects": 0,                         |  
330 |                                                         | add global in_queue/out_queue/
331 |                                                         | add global requests/responses/
332 |                                                         | add global server_timedout/server_err
333 |                                                         | calc requests_INC responses_INC
334 |                                                         | calc server_timedout_INC server_err_INC
335 |             "cluster0-20001": {       #a backend        | 
336 |                 "server_timedout": 0,                   | 
337 |                 "server_err": 0,                        | 
338 |                 "responses": 125406,                    | 
339 |                 "response_bytes": 828478,               | 
340 |                 "in_queue_bytes": 0,                    | 
341 |                 "server_connections": 1,                | 
342 |                 "request_bytes": 5189724,               | 
343 |                 "out_queue": 0,                         | 
344 |                 "server_eof": 0,                        | 
345 |                 "requests": 125406,                     | 
346 |                 "in_queue": 0,                          | 
347 |                 "out_queue_bytes": 0                    | 
348 |             },                                          |
349 |         '''
350 |         info = self._raw_info_dict()
351 |         #logging.debug(info)
352 |         if not info:
353 |             return None
354 | 
355 |         def calc_inc(cluster_name, info, last_info):
356 |             TO_CALC_INC = ('forward_error', 'client_err', 'requests', 'responses', 'server_timedout', 'server_err')
357 |             for item in TO_CALC_INC:
358 |                 info[item + '_INC'] = info[item] - last_info[item]
359 | 
360 |         def aggregation(cluster_name, info):
361 |             TO_AGGREGATION = ('in_queue', 'out_queue', 'requests', 'responses', 'server_timedout', 'server_err')
362 |             for item in TO_AGGREGATION:
363 |                 info[item] = 0
364 |             for k, v, in info.items():
365 |                 if type(v) == dict: # a backend
366 |                     for item in TO_AGGREGATION:
367 |                         info[item] += v[item]
368 | 
369 |         if self._last_info:
370 |             info['timestamp_INC'] = info['timestamp'] - self._last_info['timestamp']
371 | 
372 |         for k, v in info.items():
373 |             if type(v) == dict:
374 |                 cluster_name = k
375 |                 cluster_info = v
376 |                 aggregation(cluster_name, cluster_info)
377 |                 if self._last_info:
378 |                     calc_inc(cluster_name, cluster_info, self._last_info[cluster_name])
379 | 
380 |         self._last_info = info
381 |         logging.debug(info)
382 |         return info
383 | 
384 |     def _raw_info_dict(self):
385 |         try:
386 |             ret = telnetlib.Telnet(self.args['host'], self.args['status_port']).read_all()
387 |             return common.json_decode(ret)
388 |         except Exception, e:
389 |             logging.debug('--- can not get _info_dict of nutcracker, [Exception: %s]' % (e, ))
390 |             return None
391 | 
392 |     def status(self):
393 |         ret = self._info_dict()
394 |         if ret:
395 |             uptime = ret['uptime']
396 |             logging.info('%s uptime %s seconds' % (self, uptime))
397 |         else:
398 |             logging.error('%s is down' % self)
399 | 
400 |     def get_masters(self):
401 |         '''return currnet master list of (host:port, name)'''
402 |         cmd = TT('cat $conf', self.args)
403 |         content = self._run(self._remote_cmd(cmd))
404 |         logging.debug('current proxy config: %s' % content)
405 | 
406 |         def parse_line(line):
407 |             _x, host_port_w, name = line.split()
408 |             host, port, _w = host_port_w.split(':')
409 |             return ('%s:%s' % (host, port), name)
410 |         return [parse_line(line) for line in content.split('\n') if line.startswith('    -')]
411 | 
412 |     def reconfig(self, masters):
413 |         self.masters = masters
414 |         self.stop()
415 |         self.deploy()
416 |         self.start()
417 |         logging.info('proxy %s:%s is updated' % (self.args['host'], self.args['port']))
418 | 
419 | 
420 | class Cluster(object, Monitor, Benchmark):
421 |     def __init__(self, args):
422 |         self.args = args
423 |         self.all_redis = [ RedisServer(self.args['user'], hp, path) for hp, path in self.args['redis'] ]
424 |         pairs = zip(self.all_redis[::2], self.all_redis[1::2])
425 | 
426 |         for m, s in pairs: #slave use same name as master
427 |             s.args['cluster_name'] = m.args['cluster_name'] = args['cluster_name']
428 |             s.args['server_name'] = m.args['server_name'] = TT('$cluster_name-$port', m.args)
429 | 
430 |         masters = self.all_redis[::2]
431 | 
432 |         self.all_sentinel = [Sentinel(self.args['user'], hp, path, masters) for hp, path in self.args['sentinel'] ]
433 |         self.all_nutcracker = [NutCracker(self.args['user'], hp, path, masters) for hp, path in self.args['nutcracker'] ]
434 |         for m in self.all_nutcracker:
435 |             m.args['cluster_name'] = args['cluster_name']
436 | 
437 |     def _doit(self, op):
438 |         logging.notice('%s redis' % (op, ))
439 |         for s in self.all_redis:
440 |             eval('s.%s()' % op)
441 | 
442 |         logging.notice('%s sentinel' % (op, ))
443 |         for s in self.all_sentinel:
444 |             eval('s.%s()' % op)
445 | 
446 |         logging.notice('%s nutcracker' % (op, ))
447 |         for s in self.all_nutcracker:
448 |             eval('s.%s()' % op)
449 | 
450 |     def _get_available_sentinel(self):
451 |         for s in self.all_sentinel:
452 |             if s._alive():
453 |                 return s
454 |         logging.warn('No sentinel instance are available')
455 |         return None
456 | 
457 |     def _active_masters(self):
458 |         '''return the current master list on sentinel'''
459 |         new_masters = self._get_available_sentinel().get_masters()
460 |         new_masters = sorted(new_masters, key=lambda x: x[1])
461 | 
462 |         def make_master(host_port, name): # make master instance
463 |             host = host_port.split(':')[0]
464 |             port = int(host_port.split(':')[1])
465 |             for r in self.all_redis:
466 |                 if r.args['host'] == host and r.args['port'] == port:
467 |                     return r
468 | 
469 |         masters = [make_master(host_port, name) for host_port, name in new_masters]
470 |         return masters
471 | 
472 |     def deploy(self):
473 |         '''
474 |         deploy the binarys and config file (redis/sentinel/nutcracker) in this cluster
475 |         '''
476 |         self._doit('deploy')
477 | 
478 |     def start(self):
479 |         '''
480 |         start all instance(redis/sentinel/nutcracker) in this cluster
481 |         '''
482 |         self._doit('start')
483 | 
484 |         logging.notice('setup master->slave')
485 |         rs = self.all_redis
486 |         pairs = [rs[i:i+2] for i in range(0, len(rs), 2)]
487 |         for m, s in pairs:
488 |             if s.isslaveof(m.args['host'], m.args['port']):
489 |                 logging.warn('%s->%s is ok!' % (m,s ))
490 |             else:
491 |                 logging.info('setup %s->%s' % (m,s ))
492 |                 s.slaveof(m.args['host'], m.args['port'])
493 | 
494 |     def stop(self):
495 |         '''
496 |         stop all instance(redis/sentinel/nutcracker) in this cluster
497 |         '''
498 |         if 'yes' == raw_input('do you want to stop yes/no: '):
499 |             self._doit('stop')
500 | 
501 |     def printcmd(self):
502 |         '''
503 |         print the start/stop cmd of instance
504 |         '''
505 |         self._doit('printcmd')
506 | 
507 |     def status(self):
508 |         '''
509 |         get status of all instance(redis/sentinel/nutcracker) in this cluster
510 |         '''
511 |         self._doit('status')
512 | 
513 |     def log(self):
514 |         '''
515 |         show log of all instance(redis/sentinel/nutcracker) in this cluster
516 |         '''
517 |         self._doit('log')
518 | 
519 |     def _rediscmd(self, cmd, sleeptime=.1):
520 |         for s in self.all_redis:
521 |             time.sleep(sleeptime)
522 |             s.rediscmd(cmd)
523 | 
524 |     def rediscmd(self, cmd):
525 |         '''
526 |         run redis command against all redis instance, like 'INFO, GET xxxx'
527 |         '''
528 |         self._rediscmd(cmd)
529 | 
530 |     def mastercmd(self, cmd):
531 |         '''
532 |         run redis command against all redis Master instance, like 'INFO, GET xxxx'
533 |         '''
534 |         for s in self._active_masters():
535 |             s.rediscmd(cmd)
536 | 
537 |     def rdb(self):
538 |         '''
539 |         do rdb in all redis instance, 
540 |         '''
541 |         self._rediscmd('BGSAVE', conf.RDB_SLEEP_TIME)
542 | 
543 |     def aof_rewrite(self):
544 |         '''
545 |         do aof_rewrite in all redis instance
546 |         '''
547 |         self._rediscmd('BGREWRITEAOF', conf.RDB_SLEEP_TIME)
548 | 
549 |     def randomkill(self):
550 |         '''
551 |         random kill master every mintue (for test failover)
552 |         '''
553 |         while True:
554 |             r = random.choice(self._active_masters())
555 |             logging.notice('will restart %s' % r)
556 |             r.stop()
557 |             time.sleep(80)
558 |             r.start()
559 |             time.sleep(60)
560 | 
561 |     def sshcmd(self, cmd):
562 |         '''
563 |         ssh to target machine and run cmd
564 |         '''
565 |         hosts = [s.args['host'] for s in self.all_redis + self.all_sentinel + self.all_nutcracker]
566 |         hosts = set(hosts)
567 | 
568 |         args = copy.deepcopy(self.args)
569 |         args['cmd'] = cmd
570 |         for h in hosts:
571 |             args['host'] = h
572 |             cmd = TT('ssh -n -f $user@$host "$cmd"', args)
573 |             print common.system(cmd)
574 | 
575 |     def reconfigproxy(self):
576 |         '''
577 |         sync the masters list from sentinel to proxy
578 |         '''
579 |         logging.notice('begin reconfigproxy')
580 |         old_masters = self.all_nutcracker[0].get_masters()
581 |         new_masters = self._get_available_sentinel().get_masters()
582 |         logging.info("old masters: %s" % sorted(old_masters, key=lambda x: x[1]))
583 |         logging.info("new masters: %s" % sorted(new_masters, key=lambda x: x[1]))
584 | 
585 |         if set(new_masters) == set(old_masters):
586 |             logging.notice('masters list of proxy are already newest, we will not do reconfigproxy')
587 |             return
588 |         logging.notice('we will do reconfigproxy')
589 | 
590 |         masters = self._active_masters()
591 |         for m in self.all_nutcracker:
592 |             m.reconfig(masters)
593 |         logging.notice('reconfig all nutcracker Done!')
594 | 
595 |     def failover(self):
596 |         '''
597 |         catch failover event and update the proxy configuration
598 |         '''
599 |         while True:
600 |             try:
601 |                 sentinel = self._get_available_sentinel()
602 |                 for event in sentinel.get_failover_event():
603 |                     self.reconfigproxy()
604 |             except Exception, e:
605 |                 logging.warn('we got exception: %s on failover task' % e)
606 |                 logging.exception(e)
607 | 
608 |     def migrage(self):
609 |         '''
610 |         migrage a redis instance to another machine
611 |         '''
612 |         pass
613 | 
614 | def discover_op():
615 |     methods = inspect.getmembers(Cluster, predicate=inspect.ismethod)
616 |     sets = [m[0] for m in methods if not m[0].startswith('_')]
617 |     return sets
618 | 
619 | def gen_op_help():
620 |     methods = inspect.getmembers(Cluster, predicate=inspect.ismethod)
621 |     sets = [m for m in methods if not m[0].startswith('_')]
622 | 
623 |     #sort the function list, based on the their position in the files
624 |     lines = file('bin/deploy.py').readlines() + file('lib/monitor.py').readlines()
625 |     def rank(x):
626 |         name, func = x
627 |         t = 'def ' + name 
628 |         for i in range(len(lines)):
629 |             if strstr(lines[i], t):
630 |                 return i
631 |     sets = sorted(sets, key=rank)
632 | 
633 |     def format_func(name, func):
634 |         args = ' '.join(inspect.getargspec(func).args[1:])
635 |         if args:
636 |             desc = '%s %s' % (name, args)
637 |         else:
638 |             desc = name
639 |         return '%-25s: %s' % (common.to_blue(desc), str(func.__doc__).strip())
640 | 
641 |     return '\n'.join([format_func(name, func) for name, func in sets])
642 | 
643 | def discover_cluster():
644 |     sets = [s for s in dir(conf) if s.startswith('cluster')]
645 |     return sets
646 | 
647 | def main():
648 |     sys.argv.insert(1, '-v') # force -v
649 |     parser = argparse.ArgumentParser(formatter_class=RawTextHelpFormatter)
650 |     parser.add_argument('target', metavar='clustername', choices=discover_cluster(), help=' / '.join(discover_cluster()))
651 |     parser.add_argument('op', metavar='op', choices=discover_op(),
652 |         help=gen_op_help())
653 |     parser.add_argument('cmd', nargs='?', help='the redis/ssh cmd like "INFO"')
654 | 
655 |     LOGPATH = os.path.join(WORKDIR, 'log/deploy.log')
656 |     args = common.parse_args2(LOGPATH, parser)
657 |     if args.cmd:
658 |         eval('Cluster(conf.%s).%s(%s)' % (args.target, args.op, 'args.cmd') )
659 |     else:
660 |         eval('Cluster(conf.%s).%s()' % (args.target, args.op) )
661 | 
662 | if __name__ == "__main__":
663 |     main()
664 | 
665 | # vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4
666 | 


--------------------------------------------------------------------------------
/conf/redis.conf:
--------------------------------------------------------------------------------
  1 | 
  2 | # Redis configuration file example
  3 | # Note on units: when memory size is needed, it is possible to specify
  4 | # it in the usual form of 1k 5GB 4M and so forth:
  5 | #
  6 | # 1k => 1000 bytes
  7 | # 1kb => 1024 bytes
  8 | # 1m => 1000000 bytes
  9 | # 1mb => 1024*1024 bytes
 10 | # 1g => 1000000000 bytes
 11 | # 1gb => 1024*1024*1024 bytes
 12 | #
 13 | # units are case insensitive so 1GB 1Gb 1gB are all the same.
 14 | 
 15 | # By default Redis does not run as a daemon. Use 'yes' if you need it.
 16 | # Note that Redis will write a pid file in /var/run/redis.pid when daemonized.
 17 | daemonize yes
 18 | 
 19 | #whitelist configure
 20 | #whitelist yes
 21 | #whitelist-file ./whitelist
 22 | 
 23 | # When running daemonized, Redis writes a pid file in /var/run/redis.pid by
 24 | # default. You can specify a custom pid file location here.
 25 | pidfile ${pidfile}
 26 | 
 27 | # Accept connections on the specified port, default is 6379.
 28 | # If port 0 is specified Redis will not listen on a TCP socket.
 29 | port ${port}
 30 | 
 31 | # If you want you can bind a single interface, if the bind option is not
 32 | # specified all the interfaces will listen for incoming connections.
 33 | #
 34 | # bind 127.0.0.1
 35 | 
 36 | # Specify the path for the unix socket that will be used to listen for
 37 | # incoming connections. There is no default, so Redis will not listen
 38 | # on a unix socket when not specified.
 39 | #
 40 | # unixsocket /tmp/redis.sock
 41 | # unixsocketperm 755
 42 | 
 43 | # Close the connection after a client is idle for N seconds (0 to disable)
 44 | timeout 0
 45 | 
 46 | # TCP keepalive.
 47 | #
 48 | # If non-zero, use SO_KEEPALIVE to send TCP ACKs to clients in absence
 49 | # of communication. This is useful for two reasons:
 50 | #
 51 | # 1) Detect dead peers.
 52 | # 2) Take the connection alive from the point of view of network
 53 | #    equipment in the middle.
 54 | #
 55 | # On Linux, the specified value (in seconds) is the period used to send ACKs.
 56 | # Note that to close the connection the double of the time is needed.
 57 | # On other kernels the period depends on the kernel configuration.
 58 | #
 59 | # A reasonable value for this option is 60 seconds.
 60 | tcp-keepalive 60
 61 | 
 62 | # Specify the server verbosity level.
 63 | # This can be one of:
 64 | # debug (a lot of information, useful for development/testing)
 65 | # verbose (many rarely useful info, but not a mess like the debug level)
 66 | # notice (moderately verbose, what you want in production probably)
 67 | # warning (only very important / critical messages are logged)
 68 | loglevel notice
 69 | 
 70 | # Specify the log file name. Also 'stdout' can be used to force
 71 | # Redis to log on the standard output. Note that if you use standard
 72 | # output for logging but daemonize, logs will be sent to /dev/null
 73 | logfile ${logfile}
 74 | 
 75 | # To enable logging to the system logger, just set 'syslog-enabled' to yes,
 76 | # and optionally update the other syslog parameters to suit your needs.
 77 | # syslog-enabled no
 78 | 
 79 | # Specify the syslog identity.
 80 | # syslog-ident redis
 81 | 
 82 | # Specify the syslog facility. Must be USER or between LOCAL0-LOCAL7.
 83 | # syslog-facility local0
 84 | 
 85 | # Set the number of databases. The default database is DB 0, you can select
 86 | # a different one on a per-connection basis using SELECT <dbid> where
 87 | # dbid is a number between 0 and 'databases'-1
 88 | databases 16
 89 | 
 90 | ################################ SNAPSHOTTING  #################################
 91 | #
 92 | # Save the DB on disk:
 93 | #
 94 | #   save <seconds> <changes>
 95 | #
 96 | #   Will save the DB if both the given number of seconds and the given
 97 | #   number of write operations against the DB occurred.
 98 | #
 99 | #   In the example below the behaviour will be to save:
100 | #   after 900 sec (15 min) if at least 1 key changed
101 | #   after 300 sec (5 min) if at least 10 keys changed
102 | #   after 60 sec if at least 10000 keys changed
103 | #
104 | #   Note: you can disable saving at all commenting all the "save" lines.
105 | #
106 | #   It is also possible to remove all the previously configured save
107 | #   points by adding a save directive with a single empty string argument
108 | #   like in the following example:
109 | #
110 | 
111 | #save 900 1
112 | #save 300 10
113 | #save 60 10000
114 | save ""
115 | 
116 | # By default Redis will stop accepting writes if RDB snapshots are enabled
117 | # (at least one save point) and the latest background save failed.
118 | # This will make the user aware (in an hard way) that data is not persisting
119 | # on disk properly, otherwise chances are that no one will notice and some
120 | # distater will happen.
121 | #
122 | # If the background saving process will start working again Redis will
123 | # automatically allow writes again.
124 | #
125 | # However if you have setup your proper monitoring of the Redis server
126 | # and persistence, you may want to disable this feature so that Redis will
127 | # continue to work as usually even if there are problems with disk,
128 | # permissions, and so forth.
129 | stop-writes-on-bgsave-error yes
130 | 
131 | # Compress string objects using LZF when dump .rdb databases?
132 | # For default that's set to 'yes' as it's almost always a win.
133 | # If you want to save some CPU in the saving child set it to 'no' but
134 | # the dataset will likely be bigger if you have compressible values or keys.
135 | rdbcompression yes
136 | 
137 | # Since version 5 of RDB a CRC64 checksum is placed at the end of the file.
138 | # This makes the format more resistant to corruption but there is a performance
139 | # hit to pay (around 10%) when saving and loading RDB files, so you can disable it
140 | # for maximum performances.
141 | #
142 | # RDB files created with checksum disabled have a checksum of zero that will
143 | # tell the loading code to skip the check.
144 | rdbchecksum yes
145 | 
146 | # The filename where to dump the DB
147 | dbfilename dump.rdb
148 | 
149 | # The working directory.
150 | #
151 | # The DB will be written inside this directory, with the filename specified
152 | # above using the 'dbfilename' configuration directive.
153 | # 
154 | # The Append Only File will also be created inside this directory.
155 | # 
156 | # Note that you must specify a directory here, not a file name.
157 | dir ${dir}
158 | 
159 | ################################# REPLICATION #################################
160 | 
161 | # Master-Slave replication. Use slaveof to make a Redis instance a copy of
162 | # another Redis server. Note that the configuration is local to the slave
163 | # so for example it is possible to configure the slave to save the DB with a
164 | # different interval, or to listen to another port, and so on.
165 | #
166 | # slaveof <masterip> <masterport>
167 | 
168 | # If the master is password protected (using the "requirepass" configuration
169 | # directive below) it is possible to tell the slave to authenticate before
170 | # starting the replication synchronization process, otherwise the master will
171 | # refuse the slave request.
172 | #
173 | # masterauth <master-password>
174 | 
175 | # When a slave loses its connection with the master, or when the replication
176 | # is still in progress, the slave can act in two different ways:
177 | #
178 | # 1) if slave-serve-stale-data is set to 'yes' (the default) the slave will
179 | #    still reply to client requests, possibly with out of date data, or the
180 | #    data set may just be empty if this is the first synchronization.
181 | #
182 | # 2) if slave-serve-stale-data is set to 'no' the slave will reply with
183 | #    an error "SYNC with master in progress" to all the kind of commands
184 | #    but to INFO and SLAVEOF.
185 | #
186 | slave-serve-stale-data yes
187 | 
188 | # You can configure a slave instance to accept writes or not. Writing against
189 | # a slave instance may be useful to store some ephemeral data (because data
190 | # written on a slave will be easily deleted after resync with the master) but
191 | # may also cause problems if clients are writing to it because of a
192 | # misconfiguration.
193 | #
194 | # Since Redis 2.6 by default slaves are read-only.
195 | #
196 | # Note: read only slaves are not designed to be exposed to untrusted clients
197 | # on the internet. It's just a protection layer against misuse of the instance.
198 | # Still a read only slave exports by default all the administrative commands
199 | # such as CONFIG, DEBUG, and so forth. To a limited extend you can improve
200 | # security of read only slaves using 'rename-command' to shadow all the
201 | # administrative / dangerous commands.
202 | slave-read-only yes
203 | 
204 | # Slaves send PINGs to server in a predefined interval. It's possible to change
205 | # this interval with the repl_ping_slave_period option. The default value is 10
206 | # seconds.
207 | #
208 | # repl-ping-slave-period 10
209 | 
210 | # The following option sets a timeout for both Bulk transfer I/O timeout and
211 | # master data or ping response timeout. The default value is 60 seconds.
212 | #
213 | # It is important to make sure that this value is greater than the value
214 | # specified for repl-ping-slave-period otherwise a timeout will be detected
215 | # every time there is low traffic between the master and the slave.
216 | #
217 | repl-timeout 120
218 | 
219 | # Disable TCP_NODELAY on the slave socket after SYNC?
220 | #
221 | # If you select "yes" Redis will use a smaller number of TCP packets and
222 | # less bandwidth to send data to slaves. But this can add a delay for
223 | # the data to appear on the slave side, up to 40 milliseconds with
224 | # Linux kernels using a default configuration.
225 | #
226 | # If you select "no" the delay for data to appear on the slave side will
227 | # be reduced but more bandwidth will be used for replication.
228 | #
229 | # By default we optimize for low latency, but in very high traffic conditions
230 | # or when the master and slaves are many hops away, turning this to "yes" may
231 | # be a good idea.
232 | repl-disable-tcp-nodelay no
233 | 
234 | # Set the replication backlog size. The backlog is a buffer that accumulates
235 | # slave data when slaves are disconnected for some time, so that when a slave
236 | # wants to reconnect again, often a full resync is not needed, but a partial
237 | # resync is enough, just passing the portion of data the slave missed while
238 | # disconnected.
239 | #
240 | # The biggest the replication backlog, the longer the time the slave can be
241 | # disconnected and later be able to perform a partial resynchronization.
242 | #
243 | # The backlog is only allocated once there is at least a slave connected.
244 | #
245 | repl-backlog-size 64mb
246 | 
247 | # After a master has no longer connected slaves for some time, the backlog
248 | # will be freed. The following option configures the amount of seconds that
249 | # need to elapse, starting from the time the last slave disconnected, for
250 | # the backlog buffer to be freed.
251 | #
252 | # A value of 0 means to never release the backlog.
253 | #
254 | # repl-backlog-ttl 3600
255 | 
256 | # The slave priority is an integer number published by Redis in the INFO output.
257 | # It is used by Redis Sentinel in order to select a slave to promote into a
258 | # master if the master is no longer working correctly.
259 | #
260 | # A slave with a low priority number is considered better for promotion, so
261 | # for instance if there are three slaves with priority 10, 100, 25 Sentinel will
262 | # pick the one wtih priority 10, that is the lowest.
263 | #
264 | # However a special priority of 0 marks the slave as not able to perform the
265 | # role of master, so a slave with priority of 0 will never be selected by
266 | # Redis Sentinel for promotion.
267 | #
268 | # By default the priority is 100.
269 | slave-priority 100
270 | 
271 | # It is possible for a master to stop accepting writes if there are less than
272 | # N slaves connected, having a lag less or equal than M seconds.
273 | #
274 | # The N slaves need to be in "online" state.
275 | #
276 | # The lag in seconds, that must be <= the specified value, is calculated from
277 | # the last ping received from the slave, that is usually sent every second.
278 | #
279 | # This option does not GUARANTEES that N replicas will accept the write, but
280 | # will limit the window of exposure for lost writes in case not enough slaves
281 | # are available, to the specified number of seconds.
282 | #
283 | # For example to require at least 3 slaves with a lag <= 10 seconds use:
284 | #
285 | # min-slaves-to-write 3
286 | # min-slaves-max-lag 10
287 | #
288 | # Setting one or the other to 0 disables the feature.
289 | #
290 | # By default min-slaves-to-write is set to 0 (feature disabled) and
291 | # min-slaves-max-lag is set to 10.
292 | 
293 | ################################## SECURITY ###################################
294 | 
295 | # Require clients to issue AUTH <PASSWORD> before processing any other
296 | # commands.  This might be useful in environments in which you do not trust
297 | # others with access to the host running redis-server.
298 | #
299 | # This should stay commented out for backward compatibility and because most
300 | # people do not need auth (e.g. they run their own servers).
301 | # 
302 | # Warning: since Redis is pretty fast an outside user can try up to
303 | # 150k passwords per second against a good box. This means that you should
304 | # use a very strong password otherwise it will be very easy to break.
305 | #
306 | # requirepass foobared
307 | 
308 | # Command renaming.
309 | #
310 | # It is possible to change the name of dangerous commands in a shared
311 | # environment. For instance the CONFIG command may be renamed into something
312 | # hard to guess so that it will still be available for internal-use tools
313 | # but not available for general clients.
314 | #
315 | # Example:
316 | #
317 | # rename-command CONFIG b840fc02d524045429941cc15f59e41cb7be6c52
318 | #
319 | # It is also possible to completely kill a command by renaming it into
320 | # an empty string:
321 | #
322 | # rename-command CONFIG ""
323 | #
324 | # Please note that changing the name of commands that are logged into the
325 | # AOF file or transmitted to slaves may cause problems.
326 | 
327 | ################################### LIMITS ####################################
328 | 
329 | # Set the max number of connected clients at the same time. By default
330 | # this limit is set to 10000 clients, however if the Redis server is not
331 | # able to configure the process file limit to allow for the specified limit
332 | # the max number of allowed clients is set to the current file limit
333 | # minus 32 (as Redis reserves a few file descriptors for internal uses).
334 | #
335 | # Once the limit is reached Redis will close all the new connections sending
336 | # an error 'max number of clients reached'.
337 | #
338 | # maxclients 10000
339 | 
340 | # Don't use more memory than the specified amount of bytes.
341 | # When the memory limit is reached Redis will try to remove keys
342 | # accordingly to the eviction policy selected (see maxmemmory-policy).
343 | #
344 | # If Redis can't remove keys according to the policy, or if the policy is
345 | # set to 'noeviction', Redis will start to reply with errors to commands
346 | # that would use more memory, like SET, LPUSH, and so on, and will continue
347 | # to reply to read-only commands like GET.
348 | #
349 | # This option is usually useful when using Redis as an LRU cache, or to set
350 | # an hard memory limit for an instance (using the 'noeviction' policy).
351 | #
352 | # WARNING: If you have slaves attached to an instance with maxmemory on,
353 | # the size of the output buffers needed to feed the slaves are subtracted
354 | # from the used memory count, so that network problems / resyncs will
355 | # not trigger a loop where keys are evicted, and in turn the output
356 | # buffer of slaves is full with DELs of keys evicted triggering the deletion
357 | # of more keys, and so forth until the database is completely emptied.
358 | #
359 | # In short... if you have slaves attached it is suggested that you set a lower
360 | # limit for maxmemory so that there is some free RAM on the system for slave
361 | # output buffers (but this is not needed if the policy is 'noeviction').
362 | #
363 | maxmemory 5368709120
364 | 
365 | # MAXMEMORY POLICY: how Redis will select what to remove when maxmemory
366 | # is reached. You can select among five behaviors:
367 | # 
368 | # volatile-lru -> remove the key with an expire set using an LRU algorithm
369 | # allkeys-lru -> remove any key accordingly to the LRU algorithm
370 | # volatile-random -> remove a random key with an expire set
371 | # allkeys-random -> remove a random key, any key
372 | # volatile-ttl -> remove the key with the nearest expire time (minor TTL)
373 | # noeviction -> don't expire at all, just return an error on write operations
374 | # 
375 | # Note: with any of the above policies, Redis will return an error on write
376 | #       operations, when there are not suitable keys for eviction.
377 | #
378 | #       At the date of writing this commands are: set setnx setex append
379 | #       incr decr rpush lpush rpushx lpushx linsert lset rpoplpush sadd
380 | #       sinter sinterstore sunion sunionstore sdiff sdiffstore zadd zincrby
381 | #       zunionstore zinterstore hset hsetnx hmset hincrby incrby decrby
382 | #       getset mset msetnx exec sort
383 | #
384 | # The default is:
385 | #
386 | maxmemory-policy volatile-lru
387 | 
388 | # LRU and minimal TTL algorithms are not precise algorithms but approximated
389 | # algorithms (in order to save memory), so you can select as well the sample
390 | # size to check. For instance for default Redis will check three keys and
391 | # pick the one that was used less recently, you can change the sample size
392 | # using the following configuration directive.
393 | #
394 | maxmemory-samples 3
395 | 
396 | ############################## APPEND ONLY MODE ###############################
397 | 
398 | # By default Redis asynchronously dumps the dataset on disk. This mode is
399 | # good enough in many applications, but an issue with the Redis process or
400 | # a power outage may result into a few minutes of writes lost (depending on
401 | # the configured save points).
402 | #
403 | # The Append Only File is an alternative persistence mode that provides
404 | # much better durability. For instance using the default data fsync policy
405 | # (see later in the config file) Redis can lose just one second of writes in a
406 | # dramatic event like a server power outage, or a single write if something
407 | # wrong with the Redis process itself happens, but the operating system is
408 | # still running correctly.
409 | #
410 | # AOF and RDB persistence can be enabled at the same time without problems.
411 | # If the AOF is enabled on startup Redis will load the AOF, that is the file
412 | # with the better durability guarantees.
413 | #
414 | # Please check http://redis.io/topics/persistence for more information.
415 | 
416 | appendonly yes
417 | 
418 | # The name of the append only file (default: "appendonly.aof")
419 | # appendfilename appendonly.aof
420 | 
421 | # The fsync() call tells the Operating System to actually write data on disk
422 | # instead to wait for more data in the output buffer. Some OS will really flush 
423 | # data on disk, some other OS will just try to do it ASAP.
424 | #
425 | # Redis supports three different modes:
426 | #
427 | # no: don't fsync, just let the OS flush the data when it wants. Faster.
428 | # always: fsync after every write to the append only log . Slow, Safest.
429 | # everysec: fsync only one time every second. Compromise.
430 | #
431 | # The default is "everysec", as that's usually the right compromise between
432 | # speed and data safety. It's up to you to understand if you can relax this to
433 | # "no" that will let the operating system flush the output buffer when
434 | # it wants, for better performances (but if you can live with the idea of
435 | # some data loss consider the default persistence mode that's snapshotting),
436 | # or on the contrary, use "always" that's very slow but a bit safer than
437 | # everysec.
438 | #
439 | # More details please check the following article:
440 | # http://antirez.com/post/redis-persistence-demystified.html
441 | #
442 | # If unsure, use "everysec".
443 | 
444 | # appendfsync always
445 | appendfsync everysec
446 | # appendfsync no
447 | 
448 | # When the AOF fsync policy is set to always or everysec, and a background
449 | # saving process (a background save or AOF log background rewriting) is
450 | # performing a lot of I/O against the disk, in some Linux configurations
451 | # Redis may block too long on the fsync() call. Note that there is no fix for
452 | # this currently, as even performing fsync in a different thread will block
453 | # our synchronous write(2) call.
454 | #
455 | # In order to mitigate this problem it's possible to use the following option
456 | # that will prevent fsync() from being called in the main process while a
457 | # BGSAVE or BGREWRITEAOF is in progress.
458 | #
459 | # This means that while another child is saving, the durability of Redis is
460 | # the same as "appendfsync none". In practical terms, this means that it is
461 | # possible to lose up to 30 seconds of log in the worst scenario (with the
462 | # default Linux settings).
463 | # 
464 | # If you have latency problems turn this to "yes". Otherwise leave it as
465 | # "no" that is the safest pick from the point of view of durability.
466 | no-appendfsync-on-rewrite no
467 | 
468 | # Automatic rewrite of the append only file.
469 | # Redis is able to automatically rewrite the log file implicitly calling
470 | # BGREWRITEAOF when the AOF log size grows by the specified percentage.
471 | # 
472 | # This is how it works: Redis remembers the size of the AOF file after the
473 | # latest rewrite (if no rewrite has happened since the restart, the size of
474 | # the AOF at startup is used).
475 | #
476 | # This base size is compared to the current size. If the current size is
477 | # bigger than the specified percentage, the rewrite is triggered. Also
478 | # you need to specify a minimal size for the AOF file to be rewritten, this
479 | # is useful to avoid rewriting the AOF file even if the percentage increase
480 | # is reached but it is still pretty small.
481 | #
482 | # Specify a percentage of zero in order to disable the automatic AOF
483 | # rewrite feature.
484 | 
485 | auto-aof-rewrite-percentage 0
486 | auto-aof-rewrite-min-size 64mb
487 | 
488 | ################################ LUA SCRIPTING  ###############################
489 | 
490 | # Max execution time of a Lua script in milliseconds.
491 | #
492 | # If the maximum execution time is reached Redis will log that a script is
493 | # still in execution after the maximum allowed time and will start to
494 | # reply to queries with an error.
495 | #
496 | # When a long running script exceed the maximum execution time only the
497 | # SCRIPT KILL and SHUTDOWN NOSAVE commands are available. The first can be
498 | # used to stop a script that did not yet called write commands. The second
499 | # is the only way to shut down the server in the case a write commands was
500 | # already issue by the script but the user don't want to wait for the natural
501 | # termination of the script.
502 | #
503 | # Set it to 0 or a negative value for unlimited execution without warnings.
504 | lua-time-limit 5000
505 | 
506 | ################################ REDIS CLUSTER  ###############################
507 | #
508 | # Normal Redis instances can't be part of a Redis Cluster; only nodes that are
509 | # started as cluster nodes can. In order to start a Redis instance as a
510 | # cluster node enable the cluster support uncommenting the following:
511 | #
512 | # cluster-enabled yes
513 | 
514 | # Every cluster node has a cluster configuration file. This file is not
515 | # intended to be edited by hand. It is created and updated by Redis nodes.
516 | # Every Redis Cluster node requires a different cluster configuration file.
517 | # Make sure that instances running in the same system does not have
518 | # overlapping cluster configuration file names.
519 | #
520 | # cluster-config-file nodes-6379.conf
521 | 
522 | # Cluster node timeout is the amount of milliseconds a node must be unreachable 
523 | # for it to be considered in failure state.
524 | # Most other internal time limits are multiple of the node timeout.
525 | #
526 | # cluster-node-timeout 15000
527 | 
528 | # In order to setup your cluster make sure to read the documentation
529 | # available at http://redis.io web site.
530 | 
531 | ################################## SLOW LOG ###################################
532 | 
533 | # The Redis Slow Log is a system to log queries that exceeded a specified
534 | # execution time. The execution time does not include the I/O operations
535 | # like talking with the client, sending the reply and so forth,
536 | # but just the time needed to actually execute the command (this is the only
537 | # stage of command execution where the thread is blocked and can not serve
538 | # other requests in the meantime).
539 | # 
540 | # You can configure the slow log with two parameters: one tells Redis
541 | # what is the execution time, in microseconds, to exceed in order for the
542 | # command to get logged, and the other parameter is the length of the
543 | # slow log. When a new command is logged the oldest one is removed from the
544 | # queue of logged commands.
545 | 
546 | # The following time is expressed in microseconds, so 1000000 is equivalent
547 | # to one second. Note that a negative number disables the slow log, while
548 | # a value of zero forces the logging of every command.
549 | slowlog-log-slower-than 10000
550 | 
551 | # There is no limit to this length. Just be aware that it will consume memory.
552 | # You can reclaim memory used by the slow log with SLOWLOG RESET.
553 | slowlog-max-len 128
554 | 
555 | ############################# Event notification ##############################
556 | 
557 | # Redis can notify Pub/Sub clients about events happening in the key space.
558 | # This feature is documented at http://redis.io/topics/keyspace-events
559 | # 
560 | # For instance if keyspace events notification is enabled, and a client
561 | # performs a DEL operation on key "foo" stored in the Database 0, two
562 | # messages will be published via Pub/Sub:
563 | #
564 | # PUBLISH __keyspace@0__:foo del
565 | # PUBLISH __keyevent@0__:del foo
566 | #
567 | # It is possible to select the events that Redis will notify among a set
568 | # of classes. Every class is identified by a single character:
569 | #
570 | #  K     Keyspace events, published with __keyspace@<db>__ prefix.
571 | #  E     Keyevent events, published with __keyevent@<db>__ prefix.
572 | #  g     Generic commands (non-type specific) like DEL, EXPIRE, RENAME, ...
573 | #  \     String commands
574 | #  l     List commands
575 | #  s     Set commands
576 | #  h     Hash commands
577 | #  z     Sorted set commands
578 | #  x     Expired events (events generated every time a key expires)
579 | #  e     Evicted events (events generated when a key is evicted for maxmemory)
580 | #  A     Alias for g\lshzxe, so that the "AKE" string means all the events.
581 | #
582 | #  The "notify-keyspace-events" takes as argument a string that is composed
583 | #  by zero or multiple characters. The empty string means that notifications
584 | #  are disabled at all.
585 | #
586 | #  Example: to enable list and generic events, from the point of view of the
587 | #           event name, use:
588 | #
589 | #  notify-keyspace-events Elg
590 | #
591 | #  Example 2: to get the stream of the expired keys subscribing to channel
592 | #             name __keyevent@0__:expired use:
593 | #
594 | #  notify-keyspace-events Ex
595 | #
596 | #  By default all notifications are disabled because most users don't need
597 | #  this feature and the feature has some overhead. Note that if you don't
598 | #  specify at least one of K or E, no events will be delivered.
599 | notify-keyspace-events ""
600 | 
601 | ############################### ADVANCED CONFIG ###############################
602 | 
603 | # Hashes are encoded using a memory efficient data structure when they have a
604 | # small number of entries, and the biggest entry does not exceed a given
605 | # threshold. These thresholds can be configured using the following directives.
606 | hash-max-ziplist-entries 512
607 | hash-max-ziplist-value 64
608 | 
609 | # Similarly to hashes, small lists are also encoded in a special way in order
610 | # to save a lot of space. The special representation is only used when
611 | # you are under the following limits:
612 | list-max-ziplist-entries 512
613 | list-max-ziplist-value 64
614 | 
615 | # Sets have a special encoding in just one case: when a set is composed
616 | # of just strings that happens to be integers in radix 10 in the range
617 | # of 64 bit signed integers.
618 | # The following configuration setting sets the limit in the size of the
619 | # set in order to use this special memory saving encoding.
620 | set-max-intset-entries 512
621 | 
622 | # Similarly to hashes and lists, sorted sets are also specially encoded in
623 | # order to save a lot of space. This encoding is only used when the length and
624 | # elements of a sorted set are below the following limits:
625 | zset-max-ziplist-entries 128
626 | zset-max-ziplist-value 64
627 | 
628 | # Active rehashing uses 1 millisecond every 100 milliseconds of CPU time in
629 | # order to help rehashing the main Redis hash table (the one mapping top-level
630 | # keys to values). The hash table implementation Redis uses (see dict.c)
631 | # performs a lazy rehashing: the more operation you run into an hash table
632 | # that is rehashing, the more rehashing "steps" are performed, so if the
633 | # server is idle the rehashing is never complete and some more memory is used
634 | # by the hash table.
635 | # 
636 | # The default is to use this millisecond 10 times every second in order to
637 | # active rehashing the main dictionaries, freeing memory when possible.
638 | #
639 | # If unsure:
640 | # use "activerehashing no" if you have hard latency requirements and it is
641 | # not a good thing in your environment that Redis can reply form time to time
642 | # to queries with 2 milliseconds delay.
643 | #
644 | # use "activerehashing yes" if you don't have such hard requirements but
645 | # want to free memory asap when possible.
646 | activerehashing yes
647 | 
648 | # The client output buffer limits can be used to force disconnection of clients
649 | # that are not reading data from the server fast enough for some reason (a
650 | # common reason is that a Pub/Sub client can't consume messages as fast as the
651 | # publisher can produce them).
652 | #
653 | # The limit can be set differently for the three different classes of clients:
654 | #
655 | # normal -> normal clients
656 | # slave  -> slave clients and MONITOR clients
657 | # pubsub -> clients subcribed to at least one pubsub channel or pattern
658 | #
659 | # The syntax of every client-output-buffer-limit directive is the following:
660 | #
661 | # client-output-buffer-limit <class> <hard limit> <soft limit> <soft seconds>
662 | #
663 | # A client is immediately disconnected once the hard limit is reached, or if
664 | # the soft limit is reached and remains reached for the specified number of
665 | # seconds (continuously).
666 | # So for instance if the hard limit is 32 megabytes and the soft limit is
667 | # 16 megabytes / 10 seconds, the client will get disconnected immediately
668 | # if the size of the output buffers reach 32 megabytes, but will also get
669 | # disconnected if the client reaches 16 megabytes and continuously overcomes
670 | # the limit for 10 seconds.
671 | #
672 | # By default normal clients are not limited because they don't receive data
673 | # without asking (in a push way), but just after a request, so only
674 | # asynchronous clients may create a scenario where data is requested faster
675 | # than it can read.
676 | #
677 | # Instead there is a default limit for pubsub and slave clients, since
678 | # subscribers and slaves receive data in a push fashion.
679 | #
680 | # Both the hard or the soft limit can be disabled by setting them to zero.
681 | client-output-buffer-limit normal 0 0 0
682 | client-output-buffer-limit slave 256mb 64mb 60
683 | client-output-buffer-limit pubsub 32mb 8mb 60
684 | 
685 | # Redis calls an internal function to perform many background tasks, like
686 | # closing connections of clients in timeot, purging expired keys that are
687 | # never requested, and so forth.
688 | #
689 | # Not all tasks are perforemd with the same frequency, but Redis checks for
690 | # tasks to perform accordingly to the specified "hz" value.
691 | #
692 | # By default "hz" is set to 10. Raising the value will use more CPU when
693 | # Redis is idle, but at the same time will make Redis more responsive when
694 | # there are many keys expiring at the same time, and timeouts may be
695 | # handled with more precision.
696 | #
697 | # The range is between 1 and 500, however a value over 100 is usually not
698 | # a good idea. Most users should use the default of 10 and raise this up to
699 | # 100 only in environments where very low latency is required.
700 | hz 10
701 | 
702 | # When a child rewrites the AOF file, if the following option is enabled
703 | # the file will be fsync-ed every 32 MB of data generated. This is useful
704 | # in order to commit the file to the disk more incrementally and avoid
705 | # big latency spikes.
706 | aof-rewrite-incremental-fsync yes
707 | 
708 | ################################## INCLUDES ###################################
709 | 
710 | # Include one or more other config files here.  This is useful if you
711 | # have a standard template that goes to all Redis server but also need
712 | # to customize a few per-server settings.  Include files can include
713 | # other files, so use this wisely.
714 | #
715 | # include /path/to/local.conf
716 | # include /path/to/other.conf
717 | 


--------------------------------------------------------------------------------