├── README.md ├── cluster.rb ├── consistency-test.rb ├── crc16.rb └── example.rb /README.md: -------------------------------------------------------------------------------- 1 | # Redis-rb-cluster 2 | 3 | Redis Cluster client work in progress. 4 | It wraps Redis-rb, and eventually should be part of it. 5 | 6 | For now the goal is to write a simple (but not too simple) client that works 7 | as a reference implementation, and can be used in order to further develop 8 | and test Redis Cluster, that is a work in progress itself. 9 | 10 | ## Creating a new instance 11 | 12 | In order to create a new Redis Cluster instance use: 13 | 14 | startup_nodes = [ 15 | {:host => "127.0.0.1", :port => 6379}, 16 | {:host => "127.0.0.1", :port => 6380} 17 | ] 18 | max_cached_connections = 2 19 | rc = RedisCluster.new(startup_nodes,max_cached_connections) 20 | 21 | The startup nodes are a list of addresses of Cluster Nodes, for the client to 22 | work it is important that at least one address works. Startup nodes are used 23 | in order to: 24 | 25 | * Initialize the hash slot -> node cache, using the `CLUSTER NODES` command. 26 | * To contact a random node every time we are not able to talk with the right node currently cached for the specified hash slot we are interested in, in the context of the current request. 27 | 28 | The list of nodes provided by the user will be extended once the client 29 | will be able to retrieve the cluster configuration. 30 | 31 | The second parameter in the object initialization is the maximum number of 32 | connections that the client is allowed to cache. Ideally this should be at 33 | least equal to the number of nodes you have, in order to avoid closing and 34 | reopening TCP sockets. However if you have very large cluster and want to 35 | optimize for clients resource saving, it is possible to use a smaller value. 36 | 37 | ## Sending commands 38 | 39 | Sending commands is very similar to redis-rb: 40 | 41 | rc.get("foo") 42 | 43 | Currently only a subset of commands are implemented (and in general multi-keys 44 | commands are not supported by Redis Cluster), because for every supported 45 | command we need a function able to identify the key among the arguments. 46 | 47 | ## Disclaimer 48 | 49 | Redis Cluster is released as stable. 50 | This client is a work in progress that might not be suitable to be used in production environments. 51 | -------------------------------------------------------------------------------- /cluster.rb: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2013 Salvatore Sanfilippo 2 | # 3 | # Permission is hereby granted, free of charge, to any person obtaining 4 | # a copy of this software and associated documentation files (the 5 | # "Software"), to deal in the Software without restriction, including 6 | # without limitation the rights to use, copy, modify, merge, publish, 7 | # distribute, sublicense, and/or sell copies of the Software, and to 8 | # permit persons to whom the Software is furnished to do so, subject to 9 | # the following conditions: 10 | # 11 | # The above copyright notice and this permission notice shall be 12 | # included in all copies or substantial portions of the Software. 13 | # 14 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 15 | # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 16 | # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 17 | # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 18 | # LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 19 | # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 20 | # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 21 | 22 | require 'rubygems' 23 | require 'redis' 24 | require './crc16' 25 | 26 | class RedisCluster 27 | 28 | RedisClusterHashSlots = 16384 29 | RedisClusterRequestTTL = 16 30 | RedisClusterDefaultTimeout = 1 31 | 32 | def initialize(startup_nodes,connections,opt={}) 33 | @startup_nodes = startup_nodes 34 | @max_connections = connections 35 | @connections = {} 36 | @opt = opt 37 | @refresh_table_asap = false 38 | initialize_slots_cache 39 | end 40 | 41 | def get_redis_link(host,port) 42 | timeout = @opt[:timeout] or RedisClusterDefaultTimeout 43 | Redis.new(:host => host, :port => port, :timeout => timeout) 44 | end 45 | 46 | # Given a node (that is just a Ruby hash) give it a name just 47 | # concatenating the host and port. We use the node name as a key 48 | # to cache connections to that node. 49 | def set_node_name!(n) 50 | if !n[:name] 51 | n[:name] = "#{n[:host]}:#{n[:port]}" 52 | end 53 | end 54 | 55 | # Contact the startup nodes and try to fetch the hash slots -> instances 56 | # map in order to initialize the @slots hash. 57 | def initialize_slots_cache 58 | @startup_nodes.each{|n| 59 | begin 60 | @slots = {} 61 | @nodes = [] 62 | 63 | r = get_redis_link(n[:host],n[:port]) 64 | r.cluster("slots").each {|r| 65 | (r[0]..r[1]).each{|slot| 66 | ip,port = r[2] 67 | name = "#{ip}:#{port}" 68 | node = { 69 | :host => ip, :port => port, 70 | :name => name 71 | } 72 | @nodes << node 73 | @slots[slot] = node 74 | } 75 | } 76 | populate_startup_nodes 77 | @refresh_table_asap = false 78 | rescue 79 | # Try with the next node on error. 80 | next 81 | end 82 | # Exit the loop as long as the first node replies 83 | break 84 | } 85 | end 86 | 87 | # Use @nodes to populate @startup_nodes, so that we have more chances 88 | # if a subset of the cluster fails. 89 | def populate_startup_nodes 90 | # Make sure every node has already a name, so that later the 91 | # Array uniq! method will work reliably. 92 | @startup_nodes.each{|n| set_node_name! n} 93 | @nodes.each{|n| @startup_nodes << n} 94 | @startup_nodes.uniq! 95 | end 96 | 97 | # Flush the cache, mostly useful for debugging when we want to force 98 | # redirection. 99 | def flush_slots_cache 100 | @slots = {} 101 | end 102 | 103 | # Return the hash slot from the key. 104 | def keyslot(key) 105 | # Only hash what is inside {...} if there is such a pattern in the key. 106 | # Note that the specification requires the content that is between 107 | # the first { and the first } after the first {. If we found {} without 108 | # nothing in the middle, the whole key is hashed as usually. 109 | s = key.index "{" 110 | if s 111 | e = key.index "}",s+1 112 | if e && e != s+1 113 | key = key[s+1..e-1] 114 | end 115 | end 116 | RedisClusterCRC16.crc16(key) % RedisClusterHashSlots 117 | end 118 | 119 | # Return the first key in the command arguments. 120 | # 121 | # Currently we just return argv[1], that is, the first argument 122 | # after the command name. 123 | # 124 | # This is indeed the key for most commands, and when it is not true 125 | # the cluster redirection will point us to the right node anyway. 126 | # 127 | # For commands we want to explicitly bad as they don't make sense 128 | # in the context of cluster, nil is returned. 129 | def get_key_from_command(argv) 130 | case argv[0].to_s.downcase 131 | when "info","multi","exec","slaveof","config","shutdown" 132 | return nil 133 | else 134 | # Unknown commands, and all the commands having the key 135 | # as first argument are handled here: 136 | # set, get, ... 137 | return argv[1] 138 | end 139 | end 140 | 141 | # If the current number of connections is already the maximum number 142 | # allowed, close a random connection. This should be called every time 143 | # we cache a new connection in the @connections hash. 144 | def close_existing_connection 145 | while @connections.length >= @max_connections 146 | @connections.each{|n,r| 147 | @connections.delete(n) 148 | begin 149 | r.client.disconnect 150 | rescue 151 | end 152 | break 153 | } 154 | end 155 | end 156 | 157 | # Return a link to a random node, or raise an error if no node can be 158 | # contacted. This function is only called when we can't reach the node 159 | # associated with a given hash slot, or when we don't know the right 160 | # mapping. 161 | # 162 | # The function will try to get a successful reply to the PING command, 163 | # otherwise the next node is tried. 164 | def get_random_connection 165 | e = "" 166 | @startup_nodes.shuffle.each{|n| 167 | begin 168 | set_node_name!(n) 169 | conn = @connections[n[:name]] 170 | 171 | if !conn 172 | # Connect the node if it is not connected 173 | conn = get_redis_link(n[:host],n[:port]) 174 | if conn.ping == "PONG" 175 | close_existing_connection 176 | @connections[n[:name]] = conn 177 | return conn 178 | else 179 | # If the connection is not good close it ASAP in order 180 | # to avoid waiting for the GC finalizer. File 181 | # descriptors are a rare resource. 182 | conn.client.disconnect 183 | end 184 | else 185 | # The node was already connected, test the connection. 186 | return conn if conn.ping == "PONG" 187 | end 188 | rescue => e 189 | # Just try with the next node. 190 | end 191 | } 192 | raise "Can't reach a single startup node. #{e}" 193 | end 194 | 195 | # Given a slot return the link (Redis instance) to the mapped node. 196 | # Make sure to create a connection with the node if we don't have 197 | # one. 198 | def get_connection_by_slot(slot) 199 | node = @slots[slot] 200 | # If we don't know what the mapping is, return a random node. 201 | return get_random_connection if !node 202 | set_node_name!(node) 203 | if not @connections[node[:name]] 204 | begin 205 | close_existing_connection 206 | @connections[node[:name]] = 207 | get_redis_link(node[:host],node[:port]) 208 | rescue 209 | # This will probably never happen with recent redis-rb 210 | # versions because the connection is enstablished in a lazy 211 | # way only when a command is called. However it is wise to 212 | # handle an instance creation error of some kind. 213 | return get_random_connection 214 | end 215 | end 216 | @connections[node[:name]] 217 | end 218 | 219 | # Dispatch commands. 220 | def send_cluster_command(argv) 221 | initialize_slots_cache if @refresh_table_asap 222 | ttl = RedisClusterRequestTTL; # Max number of redirections 223 | e = "" 224 | asking = false 225 | try_random_node = false 226 | while ttl > 0 227 | ttl -= 1 228 | key = get_key_from_command(argv) 229 | raise "No way to dispatch this command to Redis Cluster." if !key 230 | slot = keyslot(key) 231 | if try_random_node 232 | r = get_random_connection 233 | try_random_node = false 234 | else 235 | r = get_connection_by_slot(slot) 236 | end 237 | begin 238 | # TODO: use pipelining to send asking and save a rtt. 239 | r.asking if asking 240 | asking = false 241 | return r.send(argv[0].to_sym,*argv[1..-1]) 242 | rescue Errno::ECONNREFUSED, Redis::TimeoutError, Redis::CannotConnectError, Errno::EACCES 243 | try_random_node = true 244 | sleep(0.1) if ttl < RedisClusterRequestTTL/2 245 | rescue => e 246 | errv = e.to_s.split 247 | if errv[0] == "MOVED" || errv[0] == "ASK" 248 | if errv[0] == "ASK" 249 | asking = true 250 | else 251 | # Serve replied with MOVED. It's better for us to 252 | # ask for CLUSTER NODES the next time. 253 | @refresh_table_asap = true 254 | end 255 | newslot = errv[1].to_i 256 | node_ip,node_port = errv[2].split(":") 257 | if !asking 258 | @slots[newslot] = {:host => node_ip, 259 | :port => node_port.to_i} 260 | end 261 | else 262 | raise e 263 | end 264 | end 265 | end 266 | raise "Too many Cluster redirections? (last error: #{e})" 267 | end 268 | 269 | # Currently we handle all the commands using method_missing for 270 | # simplicity. For a Cluster client actually it will be better to have 271 | # every single command as a method with the right arity and possibly 272 | # additional checks (example: RPOPLPUSH with same src/dst key, SORT 273 | # without GET or BY, and so forth). 274 | def method_missing(*argv) 275 | send_cluster_command(argv) 276 | end 277 | end 278 | 279 | -------------------------------------------------------------------------------- /consistency-test.rb: -------------------------------------------------------------------------------- 1 | # This file implements a simple consistency test for Redis Cluster (or any other 2 | # Redis environment if you pass a different client object) where a client 3 | # write to the database using INCR in order to increment keys, but actively 4 | # remember the value the key should have. Before every write a read is performed 5 | # to check if the value in the database matches the value expected. 6 | # 7 | # In this way this program can check for lost writes, or acknowledged writes 8 | # that were executed. 9 | # 10 | # Copyright (C) 2013 Salvatore Sanfilippo 11 | # 12 | # Permission is hereby granted, free of charge, to any person obtaining 13 | # a copy of this software and associated documentation files (the 14 | # "Software"), to deal in the Software without restriction, including 15 | # without limitation the rights to use, copy, modify, merge, publish, 16 | # distribute, sublicense, and/or sell copies of the Software, and to 17 | # permit persons to whom the Software is furnished to do so, subject to 18 | # the following conditions: 19 | # 20 | # The above copyright notice and this permission notice shall be 21 | # included in all copies or substantial portions of the Software. 22 | # 23 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 24 | # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 25 | # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 26 | # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 27 | # LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 28 | # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 29 | # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 30 | 31 | require './cluster' 32 | 33 | class ConsistencyTester 34 | def initialize(redis) 35 | @r = redis 36 | @working_set = 1000 37 | @keyspace = 10000 38 | @writes = 0 39 | @reads = 0 40 | @failed_writes = 0 41 | @failed_reads = 0 42 | @lost_writes = 0 43 | @not_ack_writes = 0 44 | @delay = 0 45 | @cached = {} # We take our view of data stored in the DB. 46 | @prefix = [Process.pid.to_s,Time.now.usec,@r.object_id,""].join("|") 47 | @errtime = {} 48 | end 49 | 50 | def genkey 51 | # Write more often to a small subset of keys 52 | ks = rand() > 0.5 ? @keyspace : @working_set 53 | @prefix+"key_"+rand(ks).to_s 54 | end 55 | 56 | def check_consistency(key,value) 57 | expected = @cached[key] 58 | return if !expected # We lack info about previous state. 59 | if expected > value 60 | @lost_writes += expected-value 61 | elsif expected < value 62 | @not_ack_writes += value-expected 63 | end 64 | end 65 | 66 | def puterr(msg) 67 | if !@errtime[msg] || Time.now.to_i != @errtime[msg] 68 | puts msg 69 | end 70 | @errtime[msg] = Time.now.to_i 71 | end 72 | 73 | def test 74 | last_report = Time.now.to_i 75 | while true 76 | # Read 77 | key = genkey 78 | begin 79 | val = @r.get(key) 80 | check_consistency(key,val.to_i) 81 | @reads += 1 82 | rescue => e 83 | puterr "Reading: #{e.to_s}" 84 | @failed_reads += 1 85 | end 86 | 87 | # Write 88 | begin 89 | @cached[key] = @r.incr(key).to_i 90 | @writes += 1 91 | rescue => e 92 | puterr "Writing: #{e.to_s}" 93 | @failed_writes += 1 94 | end 95 | 96 | # Report 97 | sleep @delay 98 | if Time.now.to_i != last_report 99 | report = "#{@reads} R (#{@failed_reads} err) | " + 100 | "#{@writes} W (#{@failed_writes} err) | " 101 | report += "#{@lost_writes} lost | " if @lost_writes > 0 102 | report += "#{@not_ack_writes} noack | " if @not_ack_writes > 0 103 | last_report = Time.now.to_i 104 | puts report 105 | end 106 | end 107 | end 108 | end 109 | 110 | if ARGV.length != 2 111 | puts "Usage: consistency-test.rb " 112 | exit 1 113 | else 114 | startup_nodes = [ 115 | {:host => ARGV[0], :port => ARGV[1].to_i} 116 | ] 117 | end 118 | 119 | rc = RedisCluster.new(startup_nodes,32,:timeout => 0.1) 120 | tester = ConsistencyTester.new(rc) 121 | tester.test 122 | -------------------------------------------------------------------------------- /crc16.rb: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2013 Salvatore Sanfilippo 2 | # 3 | # Permission is hereby granted, free of charge, to any person obtaining 4 | # a copy of this software and associated documentation files (the 5 | # "Software"), to deal in the Software without restriction, including 6 | # without limitation the rights to use, copy, modify, merge, publish, 7 | # distribute, sublicense, and/or sell copies of the Software, and to 8 | # permit persons to whom the Software is furnished to do so, subject to 9 | # the following conditions: 10 | # 11 | # The above copyright notice and this permission notice shall be 12 | # included in all copies or substantial portions of the Software. 13 | # 14 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 15 | # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 16 | # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 17 | # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 18 | # LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 19 | # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 20 | # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 21 | # 22 | # ----------------------------------------------------------------------------- 23 | # 24 | # This is the CRC16 algorithm used by Redis Cluster to hash keys. 25 | # Implementation according to CCITT standards. 26 | # 27 | # This is actually the XMODEM CRC 16 algorithm, using the 28 | # following parameters: 29 | # 30 | # Name : "XMODEM", also known as "ZMODEM", "CRC-16/ACORN" 31 | # Width : 16 bit 32 | # Poly : 1021 (That is actually x^16 + x^12 + x^5 + 1) 33 | # Initialization : 0000 34 | # Reflect Input byte : False 35 | # Reflect Output CRC : False 36 | # Xor constant to output CRC : 0000 37 | # Output for "123456789" : 31C3 38 | 39 | module RedisClusterCRC16 40 | 41 | def RedisClusterCRC16.crc16(bytes) 42 | crc = 0 43 | bytes.each_byte{|b| 44 | crc = ((crc<<8) & 0xffff) ^ XMODEMCRC16Lookup[((crc>>8)^b) & 0xff] 45 | } 46 | crc 47 | end 48 | 49 | private 50 | 51 | XMODEMCRC16Lookup = [ 52 | 0x0000,0x1021,0x2042,0x3063,0x4084,0x50a5,0x60c6,0x70e7, 53 | 0x8108,0x9129,0xa14a,0xb16b,0xc18c,0xd1ad,0xe1ce,0xf1ef, 54 | 0x1231,0x0210,0x3273,0x2252,0x52b5,0x4294,0x72f7,0x62d6, 55 | 0x9339,0x8318,0xb37b,0xa35a,0xd3bd,0xc39c,0xf3ff,0xe3de, 56 | 0x2462,0x3443,0x0420,0x1401,0x64e6,0x74c7,0x44a4,0x5485, 57 | 0xa56a,0xb54b,0x8528,0x9509,0xe5ee,0xf5cf,0xc5ac,0xd58d, 58 | 0x3653,0x2672,0x1611,0x0630,0x76d7,0x66f6,0x5695,0x46b4, 59 | 0xb75b,0xa77a,0x9719,0x8738,0xf7df,0xe7fe,0xd79d,0xc7bc, 60 | 0x48c4,0x58e5,0x6886,0x78a7,0x0840,0x1861,0x2802,0x3823, 61 | 0xc9cc,0xd9ed,0xe98e,0xf9af,0x8948,0x9969,0xa90a,0xb92b, 62 | 0x5af5,0x4ad4,0x7ab7,0x6a96,0x1a71,0x0a50,0x3a33,0x2a12, 63 | 0xdbfd,0xcbdc,0xfbbf,0xeb9e,0x9b79,0x8b58,0xbb3b,0xab1a, 64 | 0x6ca6,0x7c87,0x4ce4,0x5cc5,0x2c22,0x3c03,0x0c60,0x1c41, 65 | 0xedae,0xfd8f,0xcdec,0xddcd,0xad2a,0xbd0b,0x8d68,0x9d49, 66 | 0x7e97,0x6eb6,0x5ed5,0x4ef4,0x3e13,0x2e32,0x1e51,0x0e70, 67 | 0xff9f,0xefbe,0xdfdd,0xcffc,0xbf1b,0xaf3a,0x9f59,0x8f78, 68 | 0x9188,0x81a9,0xb1ca,0xa1eb,0xd10c,0xc12d,0xf14e,0xe16f, 69 | 0x1080,0x00a1,0x30c2,0x20e3,0x5004,0x4025,0x7046,0x6067, 70 | 0x83b9,0x9398,0xa3fb,0xb3da,0xc33d,0xd31c,0xe37f,0xf35e, 71 | 0x02b1,0x1290,0x22f3,0x32d2,0x4235,0x5214,0x6277,0x7256, 72 | 0xb5ea,0xa5cb,0x95a8,0x8589,0xf56e,0xe54f,0xd52c,0xc50d, 73 | 0x34e2,0x24c3,0x14a0,0x0481,0x7466,0x6447,0x5424,0x4405, 74 | 0xa7db,0xb7fa,0x8799,0x97b8,0xe75f,0xf77e,0xc71d,0xd73c, 75 | 0x26d3,0x36f2,0x0691,0x16b0,0x6657,0x7676,0x4615,0x5634, 76 | 0xd94c,0xc96d,0xf90e,0xe92f,0x99c8,0x89e9,0xb98a,0xa9ab, 77 | 0x5844,0x4865,0x7806,0x6827,0x18c0,0x08e1,0x3882,0x28a3, 78 | 0xcb7d,0xdb5c,0xeb3f,0xfb1e,0x8bf9,0x9bd8,0xabbb,0xbb9a, 79 | 0x4a75,0x5a54,0x6a37,0x7a16,0x0af1,0x1ad0,0x2ab3,0x3a92, 80 | 0xfd2e,0xed0f,0xdd6c,0xcd4d,0xbdaa,0xad8b,0x9de8,0x8dc9, 81 | 0x7c26,0x6c07,0x5c64,0x4c45,0x3ca2,0x2c83,0x1ce0,0x0cc1, 82 | 0xef1f,0xff3e,0xcf5d,0xdf7c,0xaf9b,0xbfba,0x8fd9,0x9ff8, 83 | 0x6e17,0x7e36,0x4e55,0x5e74,0x2e93,0x3eb2,0x0ed1,0x1ef0 84 | ] 85 | end 86 | -------------------------------------------------------------------------------- /example.rb: -------------------------------------------------------------------------------- 1 | require './cluster' 2 | 3 | if ARGV.length != 2 4 | startup_nodes = [ 5 | {:host => "127.0.0.1", :port => 6379}, 6 | {:host => "127.0.0.1", :port => 6380} 7 | ] 8 | else 9 | startup_nodes = [ 10 | {:host => ARGV[0], :port => ARGV[1].to_i} 11 | ] 12 | end 13 | 14 | rc = RedisCluster.new(startup_nodes,32,:timeout => 0.1) 15 | 16 | last = false 17 | 18 | while not last 19 | begin 20 | last = rc.get("__last__") 21 | last = 0 if !last 22 | rescue => e 23 | puts "error #{e.to_s}" 24 | sleep 1 25 | end 26 | end 27 | 28 | ((last.to_i+1)..1000000000).each{|x| 29 | begin 30 | rc.set("foo#{x}",x) 31 | puts rc.get("foo#{x}") 32 | rc.set("__last__",x) 33 | rescue => e 34 | puts "error #{e.to_s}" 35 | end 36 | sleep 0.1 37 | } 38 | --------------------------------------------------------------------------------