├── redis-resharding.png ├── .travis.yml ├── crc64redis_test.go ├── LICENSE ├── main_test.go ├── README.rst ├── crc64redis.go ├── main.go ├── rdb_test.go └── rdb.go /redis-resharding.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/smira/redis-resharding-proxy/HEAD/redis-resharding.png -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: go 2 | 3 | go: 4 | - 1.2 5 | - 1.3 6 | 7 | env: 8 | global: 9 | - secure: "oafkdBHV4pk7mLSfEGLHX3hyqsLXM6/EVNzD63ChQFLjYYpS/DorFfO65pzXJrW8VpWnYGayNJI5Z3mK/tOPKGTjwoQbM1kJ8/aM14OqdAnM4aAk+rejX7BoEERXNV0bKbCewahWFU/akeBmc6I6onUNo6qoGGNzb++Eg3mQ3+8=" 10 | 11 | install: 12 | - go get code.google.com/p/go.tools/cmd/cover 13 | - go get github.com/axw/gocov/gocov 14 | - go get github.com/mattn/goveralls 15 | 16 | script: /home/travis/gopath/bin/goveralls -service travis-ci.org -repotoken=$COVERALLS_TOKEN -------------------------------------------------------------------------------- /crc64redis_test.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "testing" 5 | ) 6 | 7 | func TestRedisCRC64(t *testing.T) { 8 | hash := CRC64Update(0, []byte{'1', '2', '3', '4', '5', '6', '7', '8', '9'}) 9 | if hash != 0xe9c6d914c4b8d9ca { 10 | t.Errorf("crc64 doesn't match: crc64(\"123456789\") = %#v != 0xe9c6d914c4b8d9ca", hash) 11 | } 12 | } 13 | 14 | func BenchmarkRedisCRC64(b *testing.B) { 15 | data := []byte{'1', '2', '3', '4', '5', '6', '7', '8', '9'} 16 | var crc uint64 17 | for i := 0; i < b.N; i++ { 18 | CRC64Update(crc, data) 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright 2013 Andrey Smirnov. All rights reserved. 2 | 3 | MIT License 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in 13 | all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 19 | OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 | IN THE SOFTWARE. -------------------------------------------------------------------------------- /main_test.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "bufio" 5 | "bytes" 6 | "fmt" 7 | "io" 8 | "reflect" 9 | "testing" 10 | ) 11 | 12 | func TestReadRedisCommand(t *testing.T) { 13 | tests := []struct { 14 | description string 15 | input string 16 | expected redisCommand 17 | expectedError error 18 | }{ 19 | { 20 | description: "1: Reply", 21 | input: "+PONG\r\n", 22 | expected: redisCommand{reply: "PONG"}, 23 | expectedError: nil, 24 | }, 25 | { 26 | description: "2: Empty command", 27 | input: "\n", 28 | expected: redisCommand{}, 29 | expectedError: nil, 30 | }, 31 | { 32 | description: "3: Simple command", 33 | input: "SYNC\r\n", 34 | expected: redisCommand{command: []string{"SYNC"}}, 35 | expectedError: nil, 36 | }, 37 | { 38 | description: "4: Bulk reply", 39 | input: "$4568\r\n", 40 | expected: redisCommand{bulkSize: 4568}, 41 | expectedError: nil, 42 | }, 43 | { 44 | description: "5: Complex command", 45 | input: "*3\r\n$3\r\nSET\r\n$5\r\nmykey\r\n$7\r\nmyvalue\r\n", 46 | expected: redisCommand{command: []string{"SET", "mykey", "myvalue"}}, 47 | expectedError: nil, 48 | }, 49 | { 50 | description: "6: Immediate EOF", 51 | input: "+PONG", 52 | expected: redisCommand{}, 53 | expectedError: fmt.Errorf("Failed to read command: %v", io.EOF), 54 | }, 55 | { 56 | description: "7: EOF in length", 57 | input: "*3\r\n$3", 58 | expected: redisCommand{}, 59 | expectedError: fmt.Errorf("Failed to read command: %v", io.EOF), 60 | }, 61 | { 62 | description: "8: EOF in data", 63 | input: "*3\r\n$3\r\nSE", 64 | expected: redisCommand{}, 65 | expectedError: fmt.Errorf("Failed to read argument: %v", io.ErrUnexpectedEOF), 66 | }, 67 | { 68 | description: "9: Unparsable length", 69 | input: "*x\r\n", 70 | expected: redisCommand{}, 71 | expectedError: fmt.Errorf("Unable to parse command length: strconv.ParseInt: parsing \"x\": invalid syntax"), 72 | }, 73 | } 74 | 75 | for _, test := range tests { 76 | test.expected.raw = []byte(test.input) 77 | 78 | command, err := readRedisCommand(bufio.NewReader(bytes.NewBufferString(test.input))) 79 | if err != nil { 80 | if test.expectedError == nil || test.expectedError.Error() != err.Error() { 81 | t.Errorf("Unexpected error: %v (test %s)", err, test.description) 82 | } 83 | } else if !reflect.DeepEqual(*command, test.expected) { 84 | t.Errorf("Output not equal to expected %#v != %#v (test %s)", *command, test.expected, test.description) 85 | } 86 | } 87 | } 88 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | Redis Resharding Proxy 2 | ====================== 3 | 4 | .. image:: https://travis-ci.org/smira/redis-resharding-proxy.png?branch=master 5 | :target: https://travis-ci.org/smira/redis-resharding-proxy 6 | 7 | .. image:: https://coveralls.io/repos/smira/redis-resharding-proxy/badge.png?branch=HEAD 8 | :target: https://coveralls.io/r/smira/redis-resharding-proxy?branch=HEAD 9 | 10 | Redis Resharding Proxy could be used to split (re-shard) instance of Redis into several smaller instances without interrupting 11 | normal operations. 12 | 13 | Introduction 14 | ------------ 15 | 16 | .. image:: https://raw.github.com/smira/redis-resharding-proxy/master/redis-resharding.png 17 | :width: 500px 18 | 19 | Resharding is using Redis built-in `replication `_ to transfer data from master Redis node 20 | (existing big node) to slave (new smaller node) through special proxy which filters keys in both initial data (RDB) and incremental 21 | updates in real-time. 22 | 23 | For example, let's assume that keys in Redis are numeric (``[0-9]+``) distributed evenly. We would like to split it into two parts, so 24 | that 50% of keys goes to first Redis and 50% to another one. So we would set up two redis resharding proxies, one with regular 25 | expression ``^[0-4].*`` and another one with ``^[5-9].*``. Both proxies would be using the same original master Redis as their upstream 26 | master server. We would launch two new Redis instances, making them slaves of respective resharding proxies, replication would start 27 | from master to two new slaves via proxy which would filter keys by regexps splitting original dataset into two halves. 28 | 29 | Redis resharding proxy is written in Go and requires no dependencies. 30 | 31 | Installing/building 32 | ------------------- 33 | 34 | If you have Go environment ready:: 35 | 36 | go get github.com/smira/redis-resharding-proxy 37 | 38 | Otherwise install Go and set up environment:: 39 | 40 | $ mkdir $HOME/go 41 | $ export GOPATH=$HOME/go 42 | $ export PATH=$PATH:$GOPATH/bin 43 | 44 | After that you can run ``redis-resharding-proxy``. 45 | 46 | Using 47 | ----- 48 | 49 | ``redis-resharding-proxy`` accepts several options:: 50 | 51 | -master-host="localhost": Master Redis host 52 | -master-port=6379: Master Redis port 53 | -proxy-host="": Proxy listening interface, default is all interfaces 54 | -proxy-port=6380: Proxy port for listening 55 | 56 | They are used to configure proxy's listening address (which is used in Redis slave to connect to) and master Redis address. 57 | 58 | Regular expression is given as the only argument which controls which keys should pass through proxy:: 59 | 60 | redis-resharding-proxy --master-host=redis1.srv --proxy-port=5400 '^[a-e].*' 61 | 62 | Example 63 | ------- 64 | 65 | First, let's launch master Redis server:: 66 | 67 | redis-server --port 6400 68 | 69 | And fill it with some data:: 70 | 71 | $ redis-cli -p 6400 72 | redis 127.0.0.1:6400> set apple red 73 | OK 74 | redis 127.0.0.1:6400> set banana yellow 75 | OK 76 | redis 127.0.0.1:6400> set cucumber green 77 | OK 78 | redis 127.0.0.1:6400> 79 | 80 | Then, let's launch slaves:: 81 | 82 | redis-server --port 6410 83 | redis-server --port 6420 84 | 85 | And resharding proxies:: 86 | 87 | redis-resharding-proxy -master-port=6400 -proxy-port=6401 '^a.*' 88 | redis-resharding-proxy -master-port=6400 -proxy-port=6402 '^b.*' 89 | 90 | First proxy would pass only keys that start with ``a``, second one only keys that start with ``b``. 91 | 92 | Then, let's start replication:: 93 | 94 | $ redis-cli -p 6410 95 | redis 127.0.0.1:6410> slaveof localhost 6401 96 | OK 97 | redis 127.0.0.1:6410> 98 | 99 | And with another slave:: 100 | 101 | $ redis-cli -p 6420 102 | redis 127.0.0.1:6420> slaveof localhost 6402 103 | OK 104 | redis 127.0.0.1:6420> 105 | 106 | You should see replication progress both in Redis output and resharding proxy log. 107 | 108 | Now, we can verify that replication went well:: 109 | 110 | $ redis-cli -p 6410 111 | redis 127.0.0.1:6410> get apple 112 | "red" 113 | redis 127.0.0.1:6410> get banana 114 | (nil) 115 | 116 | And with another slave:: 117 | 118 | $ redis-cli -p 6420 119 | redis 127.0.0.1:6420> get apple 120 | (nil) 121 | redis 127.0.0.1:6420> get banana 122 | "yellow" 123 | 124 | Let's try to change key on master:: 125 | 126 | $ redis-cli -p 6400 127 | redis 127.0.0.1:6400> set apple blue 128 | OK 129 | 130 | The change would be propagated to slave:: 131 | 132 | $ redis-cli -p 6410 133 | redis 127.0.0.1:6410> get apple 134 | "blue" 135 | 136 | Now, replication could be switched off on slaves, master and proxies shut down. One Redis has been split into two Redises, one with keys 137 | starting with a and another one with keys starting with b. 138 | 139 | Performance 140 | ----------- 141 | 142 | Resharding proxy is filtering RDB approximately 50% slower than Redis itself is loading RDB into memory, so replication may take twice the time 143 | with proxy compared to direct Redis to Redis replication. 144 | 145 | Compatibility 146 | ------------- 147 | 148 | Resharding proxy should be compatible with any Redis version, it has been extensively tested with 2.6.16. When filtering live commands, 149 | only commands which affect one key are supported (that's majority of Redis commands), e.g. ``SET``, ``INCR``, ``LPUSH``, etc. Commands that affect 150 | several keys may lead to unexpected results (like commands ``BITOP``, ``SUNIONSTORE``.) 151 | 152 | 153 | Thanks 154 | ------ 155 | 156 | I would like to say thanks for ideas and inspiration to Vasiliy Evseenko, Alexander Titov and Alexey Palazhchenko. 157 | 158 | Copyright and Licensing 159 | ----------------------- 160 | 161 | Copyright 2013 Andrey Smirnov. Unless otherwise noted, the source files are distributed under the MIT License found in the LICENSE file. -------------------------------------------------------------------------------- /crc64redis.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | // Redis version of CRC64 4 | 5 | var table = [256]uint64{ 6 | 0x0000000000000000, 0x7ad870c830358979, 7 | 0xf5b0e190606b12f2, 0x8f689158505e9b8b, 8 | 0xc038e5739841b68f, 0xbae095bba8743ff6, 9 | 0x358804e3f82aa47d, 0x4f50742bc81f2d04, 10 | 0xab28ecb46814fe75, 0xd1f09c7c5821770c, 11 | 0x5e980d24087fec87, 0x24407dec384a65fe, 12 | 0x6b1009c7f05548fa, 0x11c8790fc060c183, 13 | 0x9ea0e857903e5a08, 0xe478989fa00bd371, 14 | 0x7d08ff3b88be6f81, 0x07d08ff3b88be6f8, 15 | 0x88b81eabe8d57d73, 0xf2606e63d8e0f40a, 16 | 0xbd301a4810ffd90e, 0xc7e86a8020ca5077, 17 | 0x4880fbd87094cbfc, 0x32588b1040a14285, 18 | 0xd620138fe0aa91f4, 0xacf86347d09f188d, 19 | 0x2390f21f80c18306, 0x594882d7b0f40a7f, 20 | 0x1618f6fc78eb277b, 0x6cc0863448deae02, 21 | 0xe3a8176c18803589, 0x997067a428b5bcf0, 22 | 0xfa11fe77117cdf02, 0x80c98ebf2149567b, 23 | 0x0fa11fe77117cdf0, 0x75796f2f41224489, 24 | 0x3a291b04893d698d, 0x40f16bccb908e0f4, 25 | 0xcf99fa94e9567b7f, 0xb5418a5cd963f206, 26 | 0x513912c379682177, 0x2be1620b495da80e, 27 | 0xa489f35319033385, 0xde51839b2936bafc, 28 | 0x9101f7b0e12997f8, 0xebd98778d11c1e81, 29 | 0x64b116208142850a, 0x1e6966e8b1770c73, 30 | 0x8719014c99c2b083, 0xfdc17184a9f739fa, 31 | 0x72a9e0dcf9a9a271, 0x08719014c99c2b08, 32 | 0x4721e43f0183060c, 0x3df994f731b68f75, 33 | 0xb29105af61e814fe, 0xc849756751dd9d87, 34 | 0x2c31edf8f1d64ef6, 0x56e99d30c1e3c78f, 35 | 0xd9810c6891bd5c04, 0xa3597ca0a188d57d, 36 | 0xec09088b6997f879, 0x96d1784359a27100, 37 | 0x19b9e91b09fcea8b, 0x636199d339c963f2, 38 | 0xdf7adabd7a6e2d6f, 0xa5a2aa754a5ba416, 39 | 0x2aca3b2d1a053f9d, 0x50124be52a30b6e4, 40 | 0x1f423fcee22f9be0, 0x659a4f06d21a1299, 41 | 0xeaf2de5e82448912, 0x902aae96b271006b, 42 | 0x74523609127ad31a, 0x0e8a46c1224f5a63, 43 | 0x81e2d7997211c1e8, 0xfb3aa75142244891, 44 | 0xb46ad37a8a3b6595, 0xceb2a3b2ba0eecec, 45 | 0x41da32eaea507767, 0x3b024222da65fe1e, 46 | 0xa2722586f2d042ee, 0xd8aa554ec2e5cb97, 47 | 0x57c2c41692bb501c, 0x2d1ab4dea28ed965, 48 | 0x624ac0f56a91f461, 0x1892b03d5aa47d18, 49 | 0x97fa21650afae693, 0xed2251ad3acf6fea, 50 | 0x095ac9329ac4bc9b, 0x7382b9faaaf135e2, 51 | 0xfcea28a2faafae69, 0x8632586aca9a2710, 52 | 0xc9622c4102850a14, 0xb3ba5c8932b0836d, 53 | 0x3cd2cdd162ee18e6, 0x460abd1952db919f, 54 | 0x256b24ca6b12f26d, 0x5fb354025b277b14, 55 | 0xd0dbc55a0b79e09f, 0xaa03b5923b4c69e6, 56 | 0xe553c1b9f35344e2, 0x9f8bb171c366cd9b, 57 | 0x10e3202993385610, 0x6a3b50e1a30ddf69, 58 | 0x8e43c87e03060c18, 0xf49bb8b633338561, 59 | 0x7bf329ee636d1eea, 0x012b592653589793, 60 | 0x4e7b2d0d9b47ba97, 0x34a35dc5ab7233ee, 61 | 0xbbcbcc9dfb2ca865, 0xc113bc55cb19211c, 62 | 0x5863dbf1e3ac9dec, 0x22bbab39d3991495, 63 | 0xadd33a6183c78f1e, 0xd70b4aa9b3f20667, 64 | 0x985b3e827bed2b63, 0xe2834e4a4bd8a21a, 65 | 0x6debdf121b863991, 0x1733afda2bb3b0e8, 66 | 0xf34b37458bb86399, 0x8993478dbb8deae0, 67 | 0x06fbd6d5ebd3716b, 0x7c23a61ddbe6f812, 68 | 0x3373d23613f9d516, 0x49aba2fe23cc5c6f, 69 | 0xc6c333a67392c7e4, 0xbc1b436e43a74e9d, 70 | 0x95ac9329ac4bc9b5, 0xef74e3e19c7e40cc, 71 | 0x601c72b9cc20db47, 0x1ac40271fc15523e, 72 | 0x5594765a340a7f3a, 0x2f4c0692043ff643, 73 | 0xa02497ca54616dc8, 0xdafce7026454e4b1, 74 | 0x3e847f9dc45f37c0, 0x445c0f55f46abeb9, 75 | 0xcb349e0da4342532, 0xb1eceec59401ac4b, 76 | 0xfebc9aee5c1e814f, 0x8464ea266c2b0836, 77 | 0x0b0c7b7e3c7593bd, 0x71d40bb60c401ac4, 78 | 0xe8a46c1224f5a634, 0x927c1cda14c02f4d, 79 | 0x1d148d82449eb4c6, 0x67ccfd4a74ab3dbf, 80 | 0x289c8961bcb410bb, 0x5244f9a98c8199c2, 81 | 0xdd2c68f1dcdf0249, 0xa7f41839ecea8b30, 82 | 0x438c80a64ce15841, 0x3954f06e7cd4d138, 83 | 0xb63c61362c8a4ab3, 0xcce411fe1cbfc3ca, 84 | 0x83b465d5d4a0eece, 0xf96c151de49567b7, 85 | 0x76048445b4cbfc3c, 0x0cdcf48d84fe7545, 86 | 0x6fbd6d5ebd3716b7, 0x15651d968d029fce, 87 | 0x9a0d8ccedd5c0445, 0xe0d5fc06ed698d3c, 88 | 0xaf85882d2576a038, 0xd55df8e515432941, 89 | 0x5a3569bd451db2ca, 0x20ed197575283bb3, 90 | 0xc49581ead523e8c2, 0xbe4df122e51661bb, 91 | 0x3125607ab548fa30, 0x4bfd10b2857d7349, 92 | 0x04ad64994d625e4d, 0x7e7514517d57d734, 93 | 0xf11d85092d094cbf, 0x8bc5f5c11d3cc5c6, 94 | 0x12b5926535897936, 0x686de2ad05bcf04f, 95 | 0xe70573f555e26bc4, 0x9ddd033d65d7e2bd, 96 | 0xd28d7716adc8cfb9, 0xa85507de9dfd46c0, 97 | 0x273d9686cda3dd4b, 0x5de5e64efd965432, 98 | 0xb99d7ed15d9d8743, 0xc3450e196da80e3a, 99 | 0x4c2d9f413df695b1, 0x36f5ef890dc31cc8, 100 | 0x79a59ba2c5dc31cc, 0x037deb6af5e9b8b5, 101 | 0x8c157a32a5b7233e, 0xf6cd0afa9582aa47, 102 | 0x4ad64994d625e4da, 0x300e395ce6106da3, 103 | 0xbf66a804b64ef628, 0xc5bed8cc867b7f51, 104 | 0x8aeeace74e645255, 0xf036dc2f7e51db2c, 105 | 0x7f5e4d772e0f40a7, 0x05863dbf1e3ac9de, 106 | 0xe1fea520be311aaf, 0x9b26d5e88e0493d6, 107 | 0x144e44b0de5a085d, 0x6e963478ee6f8124, 108 | 0x21c640532670ac20, 0x5b1e309b16452559, 109 | 0xd476a1c3461bbed2, 0xaeaed10b762e37ab, 110 | 0x37deb6af5e9b8b5b, 0x4d06c6676eae0222, 111 | 0xc26e573f3ef099a9, 0xb8b627f70ec510d0, 112 | 0xf7e653dcc6da3dd4, 0x8d3e2314f6efb4ad, 113 | 0x0256b24ca6b12f26, 0x788ec2849684a65f, 114 | 0x9cf65a1b368f752e, 0xe62e2ad306bafc57, 115 | 0x6946bb8b56e467dc, 0x139ecb4366d1eea5, 116 | 0x5ccebf68aecec3a1, 0x2616cfa09efb4ad8, 117 | 0xa97e5ef8cea5d153, 0xd3a62e30fe90582a, 118 | 0xb0c7b7e3c7593bd8, 0xca1fc72bf76cb2a1, 119 | 0x45775673a732292a, 0x3faf26bb9707a053, 120 | 0x70ff52905f188d57, 0x0a2722586f2d042e, 121 | 0x854fb3003f739fa5, 0xff97c3c80f4616dc, 122 | 0x1bef5b57af4dc5ad, 0x61372b9f9f784cd4, 123 | 0xee5fbac7cf26d75f, 0x9487ca0fff135e26, 124 | 0xdbd7be24370c7322, 0xa10fceec0739fa5b, 125 | 0x2e675fb4576761d0, 0x54bf2f7c6752e8a9, 126 | 0xcdcf48d84fe75459, 0xb71738107fd2dd20, 127 | 0x387fa9482f8c46ab, 0x42a7d9801fb9cfd2, 128 | 0x0df7adabd7a6e2d6, 0x772fdd63e7936baf, 129 | 0xf8474c3bb7cdf024, 0x829f3cf387f8795d, 130 | 0x66e7a46c27f3aa2c, 0x1c3fd4a417c62355, 131 | 0x935745fc4798b8de, 0xe98f353477ad31a7, 132 | 0xa6df411fbfb21ca3, 0xdc0731d78f8795da, 133 | 0x536fa08fdfd90e51, 0x29b7d047efec8728, 134 | } 135 | 136 | // CRC64Update calculate crc64 exactly as Redis 137 | func CRC64Update(crc uint64, p []byte) uint64 { 138 | for _, v := range p { 139 | crc = table[byte(crc)^v] ^ (crc >> 8) 140 | } 141 | return crc 142 | } 143 | -------------------------------------------------------------------------------- /main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "bufio" 5 | "flag" 6 | "fmt" 7 | "io" 8 | "log" 9 | "net" 10 | "os" 11 | "regexp" 12 | "strconv" 13 | "strings" 14 | ) 15 | 16 | var ( 17 | masterPort int 18 | masterHost string 19 | proxyPort int 20 | proxyHost string 21 | keyRegexp *regexp.Regexp 22 | ) 23 | 24 | const ( 25 | bufSize = 16384 26 | channelBuffer = 100 27 | ) 28 | 29 | type redisCommand struct { 30 | raw []byte 31 | command []string 32 | reply string 33 | bulkSize int64 34 | } 35 | 36 | func readRedisCommand(reader *bufio.Reader) (*redisCommand, error) { 37 | header, err := reader.ReadString('\n') 38 | if err != nil { 39 | return nil, fmt.Errorf("Failed to read command: %v", err) 40 | } 41 | 42 | if header == "\n" || header == "\r\n" { 43 | // empty command 44 | return &redisCommand{raw: []byte(header)}, nil 45 | } 46 | 47 | if strings.HasPrefix(header, "+") { 48 | return &redisCommand{raw: []byte(header), reply: strings.TrimSpace(header[1:])}, nil 49 | } 50 | 51 | if strings.HasPrefix(header, "$") { 52 | bulkSize, err := strconv.ParseInt(strings.TrimSpace(header[1:]), 10, 64) 53 | if err != nil { 54 | return nil, fmt.Errorf("Unable to decode bulk size: %v", err) 55 | } 56 | return &redisCommand{raw: []byte(header), bulkSize: bulkSize}, nil 57 | } 58 | 59 | if strings.HasPrefix(header, "*") { 60 | cmdSize, err := strconv.Atoi(strings.TrimSpace(header[1:])) 61 | if err != nil { 62 | return nil, fmt.Errorf("Unable to parse command length: %v", err) 63 | } 64 | 65 | result := &redisCommand{raw: []byte(header), command: make([]string, cmdSize)} 66 | 67 | for i := range result.command { 68 | header, err = reader.ReadString('\n') 69 | if !strings.HasPrefix(header, "$") || err != nil { 70 | return nil, fmt.Errorf("Failed to read command: %v", err) 71 | } 72 | 73 | result.raw = append(result.raw, []byte(header)...) 74 | 75 | argSize, err := strconv.Atoi(strings.TrimSpace(header[1:])) 76 | if err != nil { 77 | return nil, fmt.Errorf("Unable to parse argument length: %v", err) 78 | } 79 | 80 | argument := make([]byte, argSize) 81 | _, err = io.ReadFull(reader, argument) 82 | if err != nil { 83 | return nil, fmt.Errorf("Failed to read argument: %v", err) 84 | } 85 | 86 | result.raw = append(result.raw, argument...) 87 | 88 | header, err = reader.ReadString('\n') 89 | if err != nil { 90 | return nil, fmt.Errorf("Failed to read argument: %v", err) 91 | } 92 | 93 | result.raw = append(result.raw, []byte(header)...) 94 | 95 | result.command[i] = string(argument) 96 | } 97 | 98 | return result, nil 99 | } 100 | 101 | return &redisCommand{raw: []byte(header), command: []string{strings.TrimSpace(header)}}, nil 102 | } 103 | 104 | // Goroutine that handles writing commands to master 105 | func masterWriter(conn net.Conn, masterchannel <-chan []byte) { 106 | defer conn.Close() 107 | 108 | for data := range masterchannel { 109 | _, err := conn.Write(data) 110 | if err != nil { 111 | log.Printf("Failed to write data to master: %v\n", err) 112 | return 113 | } 114 | } 115 | } 116 | 117 | // Connect to master, request replication and filter it 118 | func masterConnection(slavechannel chan<- []byte, masterchannel <-chan []byte) { 119 | conn, err := net.Dial("tcp", fmt.Sprintf("%s:%d", masterHost, masterPort)) 120 | if err != nil { 121 | log.Printf("Failed to connect to master: %v\n", err) 122 | return 123 | } 124 | 125 | defer conn.Close() 126 | go masterWriter(conn, masterchannel) 127 | 128 | reader := bufio.NewReaderSize(conn, bufSize) 129 | 130 | for { 131 | command, err := readRedisCommand(reader) 132 | if err != nil { 133 | log.Printf("Error while reading from master: %v\n", err) 134 | return 135 | } 136 | 137 | if command.reply != "" || command.command == nil && command.bulkSize == 0 { 138 | // passthrough reply & empty command 139 | slavechannel <- command.raw 140 | slavechannel <- nil 141 | } else if len(command.command) == 1 && command.command[0] == "PING" { 142 | log.Println("Got PING from master") 143 | 144 | slavechannel <- command.raw 145 | slavechannel <- nil 146 | } else if command.bulkSize > 0 { 147 | // RDB Transfer 148 | 149 | log.Printf("RDB size: %d\n", command.bulkSize) 150 | 151 | slavechannel <- command.raw 152 | 153 | err = FilterRDB(reader, slavechannel, func(key string) bool { return keyRegexp.FindStringIndex(key) != nil }, command.bulkSize) 154 | if err != nil { 155 | log.Printf("Unable to read RDB: %v\n", err) 156 | return 157 | } 158 | 159 | log.Println("RDB filtering finished, filtering commands...") 160 | } else { 161 | if len(command.command) >= 2 && keyRegexp.FindStringIndex(command.command[1]) == nil { 162 | continue 163 | } 164 | 165 | slavechannel <- command.raw 166 | slavechannel <- nil 167 | } 168 | 169 | } 170 | } 171 | 172 | // Goroutine that handles writing data back to slave 173 | func slaveWriter(conn net.Conn, slavechannel <-chan []byte) { 174 | writer := bufio.NewWriterSize(conn, bufSize) 175 | 176 | for data := range slavechannel { 177 | var err error 178 | 179 | if data == nil { 180 | err = writer.Flush() 181 | } else { 182 | _, err = writer.Write(data) 183 | } 184 | 185 | if err != nil { 186 | log.Printf("Failed to write data to slave: %v\n", err) 187 | return 188 | } 189 | } 190 | } 191 | 192 | // Read commands from slave 193 | func slaveReader(conn net.Conn) { 194 | defer conn.Close() 195 | 196 | log.Print("Slave connection established from ", conn.RemoteAddr().String()) 197 | 198 | reader := bufio.NewReaderSize(conn, bufSize) 199 | 200 | // channel for writing to slave 201 | slavechannel := make(chan []byte, channelBuffer) 202 | defer close(slavechannel) 203 | 204 | // channel for writing to master 205 | masterchannel := make(chan []byte, channelBuffer) 206 | defer close(masterchannel) 207 | 208 | go slaveWriter(conn, slavechannel) 209 | go masterConnection(slavechannel, masterchannel) 210 | 211 | for { 212 | command, err := readRedisCommand(reader) 213 | if err != nil { 214 | log.Printf("Error while reading from slave: %v\n", err) 215 | return 216 | } 217 | 218 | if command.reply != "" || command.command == nil && command.bulkSize == 0 { 219 | // passthrough reply & empty command 220 | masterchannel <- command.raw 221 | } else if len(command.command) == 1 && command.command[0] == "PING" { 222 | log.Println("Got PING from slave") 223 | 224 | masterchannel <- command.raw 225 | } else if len(command.command) == 1 && command.command[0] == "SYNC" { 226 | log.Println("Starting SYNC") 227 | 228 | masterchannel <- command.raw 229 | } else if len(command.command) == 3 && command.command[0] == "REPLCONF" && command.command[1] == "ACK" { 230 | log.Println("Got ACK from slave") 231 | 232 | masterchannel <- command.raw 233 | } else { 234 | // unknown command 235 | slavechannel <- []byte("+ERR unknown command\r\n") 236 | slavechannel <- nil 237 | } 238 | } 239 | } 240 | 241 | func main() { 242 | flag.StringVar(&masterHost, "master-host", "localhost", "Master Redis host") 243 | flag.IntVar(&masterPort, "master-port", 6379, "Master Redis port") 244 | flag.StringVar(&proxyHost, "proxy-host", "", "Proxy listening interface, default is on all interfaces") 245 | flag.IntVar(&proxyPort, "proxy-port", 6380, "Proxy port for listening") 246 | flag.Parse() 247 | 248 | if flag.NArg() != 1 { 249 | flag.Usage() 250 | fmt.Fprintln(os.Stderr, "Please specify regular expression to match against the Redis keys as the only argument.") 251 | os.Exit(1) 252 | } 253 | 254 | var err error 255 | keyRegexp, err = regexp.Compile(flag.Arg(0)) 256 | if err != nil { 257 | fmt.Fprintf(os.Stderr, "Wrong format of regular expression: %v", err) 258 | os.Exit(1) 259 | } 260 | 261 | log.Printf("Redis Resharding Proxy configured for Redis master at %s:%d\n", masterHost, masterPort) 262 | log.Printf("Waiting for connection from slave at %s:%d\n", proxyHost, proxyPort) 263 | 264 | // listen for incoming connection from Redis slave 265 | ln, err := net.Listen("tcp", fmt.Sprintf("%s:%d", proxyHost, proxyPort)) 266 | if err != nil { 267 | log.Fatalf("Unable to listen: %v\n", err) 268 | } 269 | for { 270 | conn, err := ln.Accept() 271 | if err != nil { 272 | log.Printf("Unable to accept: %v\n", err) 273 | continue 274 | } 275 | 276 | go slaveReader(conn) 277 | } 278 | } 279 | -------------------------------------------------------------------------------- /rdb_test.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "bufio" 5 | "bytes" 6 | "io" 7 | "strings" 8 | "testing" 9 | ) 10 | 11 | func TestFilterRDB(t *testing.T) { 12 | tests := []struct { 13 | description string 14 | rdb string 15 | expected string 16 | expectedError error 17 | filter func(string) bool 18 | }{ 19 | { 20 | description: "1: Simple RDB, no filter", 21 | rdb: RDBFile1, 22 | expected: RDBFile1, 23 | expectedError: nil, 24 | filter: func(string) bool { return true }, 25 | }, 26 | { 27 | description: "2: Simple RDB, filter out b_", 28 | rdb: RDBFile1, 29 | expected: "REDIS0006\xfe\x00\x00\x03a_1\x04lala\x00\x03a_2\xc0!\xff\xad}0`\xa6\xf4\xa1\xab" + strings.Repeat("\xff", 56), 30 | expectedError: nil, 31 | filter: func(key string) bool { return strings.HasPrefix(key, "a_") }, 32 | }, 33 | { 34 | description: "3: RDB broken, no magic", 35 | rdb: "NOTREDIS", 36 | expected: "", 37 | expectedError: ErrWrongSignature, 38 | filter: func(string) bool { return true }, 39 | }, 40 | { 41 | description: "4: RDB version unsupported", 42 | rdb: "REDIS0007", 43 | expected: "", 44 | expectedError: ErrVersionUnsupported, 45 | filter: func(string) bool { return true }, 46 | }, 47 | { 48 | description: "5: RDB too short", 49 | rdb: "REDIS0006\xfe\x00\x00\x03", 50 | expected: "", 51 | expectedError: io.EOF, 52 | filter: func(string) bool { return true }, 53 | }, 54 | { 55 | description: "6: RDB too short", 56 | rdb: "REDIS0006\xfe", 57 | expected: "", 58 | expectedError: io.EOF, 59 | filter: func(string) bool { return true }, 60 | }, 61 | { 62 | description: "7: RDB too short", 63 | rdb: "REDIS0006", 64 | expected: "", 65 | expectedError: io.EOF, 66 | filter: func(string) bool { return true }, 67 | }, 68 | { 69 | description: "8: RDB too short", 70 | rdb: "REDIS00", 71 | expected: "", 72 | expectedError: io.ErrUnexpectedEOF, 73 | filter: func(string) bool { return true }, 74 | }, 75 | { 76 | description: "9: Old RDB, many types, no filtering", 77 | rdb: RDBFile2, 78 | expected: RDBFile2, 79 | filter: func(string) bool { return true }, 80 | }, 81 | { 82 | description: "10: Old RDB, many types, fully filtered out", 83 | rdb: RDBFile2, 84 | expected: "REDIS0001\xfe\x00\xfe\x06\xfe\x07\xfe\x08\xfe\t\xfe\x0b\xfe\x0e\xfe\x0f\xff" + strings.Repeat("\xff", 1546), 85 | filter: func(string) bool { return false }, 86 | }, 87 | { 88 | description: "11: Old RDB, many types, some filtered out", 89 | rdb: RDBFile2, 90 | expected: "REDIS0001\xfe\x00\xfe\x06\x02\x0bv02d_um_109\x01 86756ab85811f6603e59c6d5911c858c\x02\x0bv02e_um_108\x01 86756ab85811f6603e59c6d5911c858c\xfe\x07\xfe\x08\xfe\t\xfe\x0b\xfe\x0e\xfe\x0f\x02\x0bv02e_um_108\x01 86756ab85811f6603e59c6d5911c858c\x02\x0bv02d_um_109\x01 86756ab85811f6603e59c6d5911c858c\xff" + strings.Repeat("\xff", 1358), 91 | filter: func(key string) bool { return strings.HasPrefix(key, "v02") }, 92 | }, 93 | { 94 | description: "12: RDB with list", 95 | rdb: RDBFile3, 96 | expected: RDBFile3, 97 | filter: func(key string) bool { return true }, 98 | }, 99 | { 100 | description: "13: RDB with integer keys", 101 | rdb: RDBFile4, 102 | expected: "REDIS0006\xfe\x00\x00\xc0\f\x03abc\x00\u0087\xd6\x12\x00\x03fgh\xffQ\a\xb5\t\xfb\xe8ɦ\xff\xff\xff\xff\xff\xff\xff\xff", 103 | filter: func(key string) bool { return strings.HasPrefix(key, "1") }, 104 | }, 105 | { 106 | description: "14: RDB with lzf compressed strings", 107 | rdb: RDBFile5, 108 | expected: "REDIS0006\xfe\x00\x00\xc3\x12/\x01aa \x00\x00d\xe0\n\x00\x00e\xe0\n\x00\x01ee\x02x3\x00\xc3\x130\x01aa\xe0\a\x00\x00b\xe0\b\x00\x00c\xe0\x00\x00\x01cc\x02x1\xff\x8f\xa2\xae٠Y\xa8N\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff", 109 | filter: func(key string) bool { return strings.HasPrefix(key, "aaaa") }, 110 | }, 111 | } 112 | 113 | for _, test := range tests { 114 | ch := make(chan []byte) 115 | hadError := false 116 | 117 | go func() { 118 | err := FilterRDB(bufio.NewReader(bytes.NewBufferString(test.rdb)), ch, test.filter, int64(len(test.rdb))) 119 | if err != nil { 120 | if test.expectedError == nil || test.expectedError != err { 121 | t.Errorf("Filtering failed (%s): %v", test.description, err) 122 | } else { 123 | hadError = true 124 | } 125 | 126 | } 127 | close(ch) 128 | }() 129 | 130 | received := "" 131 | 132 | for data := range ch { 133 | received += string(data) 134 | } 135 | 136 | if test.expected != "" && len(received) != len(test.rdb) { 137 | t.Errorf("Size of filtered RDB doesn't match original size: %d != %d (test %s)", len(received), len(test.rdb), test.description) 138 | } 139 | 140 | if test.expected != "" && test.expected != received { 141 | t.Errorf("output not equal to expected: %#v != %#v (test %s)", test.expected, received, test.description) 142 | } 143 | 144 | if test.expectedError != nil && !hadError { 145 | t.Errorf("should have failed with error %v (%s)", test.expectedError, test.description) 146 | } 147 | } 148 | 149 | } 150 | 151 | func runRDBBenchmark(b *testing.B, filter func(string) bool) { 152 | for i := 0; i < b.N; i++ { 153 | ch := make(chan []byte) 154 | 155 | go func() { 156 | err := FilterRDB(bufio.NewReader(bytes.NewBufferString(RDBFile2)), ch, filter, int64(len(RDBFile2))) 157 | close(ch) 158 | if err != nil { 159 | b.Fatalf("Unable to filter RDB: %v", err) 160 | } 161 | }() 162 | 163 | for _ = range ch { 164 | } 165 | 166 | } 167 | } 168 | 169 | func BenchmarkFilterRDBCopy(b *testing.B) { 170 | runRDBBenchmark(b, func(string) bool { return true }) 171 | } 172 | 173 | func BenchmarkFilterRDBDiscard(b *testing.B) { 174 | runRDBBenchmark(b, func(string) bool { return false }) 175 | } 176 | 177 | func BenchmarkFilterRDBSome(b *testing.B) { 178 | runRDBBenchmark(b, func(key string) bool { return strings.HasPrefix(key, "v02") }) 179 | } 180 | 181 | const ( 182 | RDBFile1 = "REDIS0006\xfe\x00\x00\x03b_1\x04kuku\x00\x03a_1\x04lala\x00\x03b_3\xc3\t@\xb3\x01aa\xe0\xa6\x00\x01aa\xfc\xdb\x82\xb0\\B\x01\x00\x00\x00\x03b_2\r2343545345345\x00\x03a_2\xc0!\xffT\x81\xe9\x86\xcc\x9f\x1f\xc4" 183 | RDBFile2 = "REDIS0001\xfe\x00\x00\ncompressed\xc3\x0c(\x04abcda\xe0\x18\x03\x01cd\x03\x05testz\x06\x01b\x0256\x01c\x0257\x03aaa\x0277\x04dddd\x011\x01a\x0243\x02aa\x017\xfe\x06\x02\x0bv02d_um_109\x01 86756ab85811f6603e59c6d5911c858c\x02\x0bv02e_um_108\x01 86756ab85811f6603e59c6d5911c858c\xfe\x07\x00\x12v3fe_Eramu@qik.com\xc0\x07\x00*v0a0_Ugrizmo4552d32c-af1e-484c-9d0b-6e4447\xc0\x04\xfe\x08\x00\x15v8da_Enikolay@qik.com\xc0\x08\x01\tbi_webapp\x06@q{\"event\":\"webapp.user.signup\",\"method\":\"api\",\"timestamp\":1311664050.7045,\"actor_id\":159973,\"ip\":null,\"app\":\"mob\"}@r{\"event\":\"webapp.user.signup\",\"method\":\"api\",\"timestamp\":1311664056.18088,\"actor_id\":159974,\"ip\":null,\"app\":\"mob\"}@r{\"event\":\"webapp.user.signup\",\"method\":\"api\",\"timestamp\":1311664560.31115,\"actor_id\":159975,\"ip\":null,\"app\":\"mob\"}@r{\"event\":\"webapp.user.signup\",\"method\":\"api\",\"timestamp\":1311664565.91616,\"actor_id\":159976,\"ip\":null,\"app\":\"mob\"}@r{\"event\":\"webapp.user.signup\",\"method\":\"api\",\"timestamp\":1311664820.23724,\"actor_id\":159977,\"ip\":null,\"app\":\"mob\"}@r{\"event\":\"webapp.user.signup\",\"method\":\"api\",\"timestamp\":1311664860.49914,\"actor_id\":159978,\"ip\":null,\"app\":\"mob\"}\x00*v7c5_Ugrizmo15919895-bba1-47ef-8bdb-f6f968\xc0\x06\x00\x0bv693_dudeid\xc0\x08\xfe\t\x00*vd06_Ugrizmo29b59262-d286-4ed5-b7bf-1566cf\xc0\x03\x00*vf9e_Ugrizmo6b035e25-02b2-44ee-8860-48bac5\xc0\x05\x00*veaf_Ugrizmo468defb9-dc99-4cf2-92e7-cdef05\xc0\x01\x00*vc12_Ugrizmo8b2858e9-d439-4726-bb8e-abdbd9\xc0\x02\xfe\x0b\x00\x06v035_5\xc2\xe9p\x02\x00\xfe\x0e\x04\x0cv9a5_U159946\x03\x05dirty\x010\x07clients\x04\x80\x02].\x05users\x04\x80\x02].\x04\x0cv94b_U159973\x01\x05dirty\x011\x04\x0cv94a_U159974\x01\x05dirty\x011\x04\x0cv948_U159976\x01\x05dirty\x011\x04\x0cv946_U159978\x01\x05dirty\x011\x04\x0cv947_U159977\x01\x05dirty\x011\x04\x0cv949_U159975\x01\x05dirty\x011\xfe\x0f\x02\nv588_um_45\x01 f427ecf81e3afe3f4037a629944aaea0\x02\x0bv02e_um_108\x01 86756ab85811f6603e59c6d5911c858c\x02\x0bv02d_um_109\x01 86756ab85811f6603e59c6d5911c858c\xff" 184 | RDBFile3 = "REDIS0006\xfe\x00\n\x06mylist\xc3A\xbeE\x83\x04\x83\x05\x00\x00t \x03\x04d\x00\x00\x0c0\xe0\x00\x00\x0270\x0e\xe0\x02\r\x0115\xe0\x03\r\x0124\xe0\x03\r\x0198\xe0\x03\r\x0137\xe0\x03\r\x008\xe0\x04)\x0119\xe0\x03\x1b\x0121\xe0\x03\r\x0173\xe0\x03\r\x002\xe0\x04)\x0142\xe0\x03\x1b\x003\xe0\x04\x1b\x009\xe0\x04a\x0186\xe0\x03)\x002\xe0\x04\r\x001\xe0\x12E\x006\xe0\x04\xc3\x007\xe0\x047\x006\xe1\x04\t\x003\xe0\x04E\x009\xe0\x04\x8b\x005\xe0\x04\x8b\x005\xe0\x04\xdf\x000\xe0\x04\xdf\x001\xe0\x04\x1b\xe1\x05%\x008\xe0\x05\x8b\xe0\x05\r\xe0\x04\x99\x000\xe0\x04\x1b\x008\xe1\x04y\x005\xe0\x04\xb5\x004\xe0\x04}\x006\xe0\x04\xa7\x003\xe0\x04\r\x006\xe0\x04E\x001\xe0\x04\x1b\x004\xe2\x04\x05\x005\xe0\x04\x8b\x008\xe1\x05\x17\xe0\x04)\xe2\x05/\x005\xe0\x05a\xe0\x04\x99\xe1\x06\xf7\xe0\x04a\x000\xe0\x04S\x003\xe0\x04\x1b\x002\xe0\x04S\xe1\x05O\x002\xe0\x047\x009\xe0\x04o\xe0\x05S\x008\xe1\x04\x95\x009\xe0\x04a\xe3\x05\x0f\x006\xe0\x04\xed\xe2\x05\xbb\xe1\x06\x17\xe1\x04\x87\x009\xe0\x04a\xe2\x06\x9f\xe0\x04\x99\xe1\x05\x87\x006\xe2\x05g\xe1\x04\xf7\x002\xe0\x04\x8b\x000\xe0\x04\r\xe1\x05O\x003\xe1\x04\t\xe0\x05a\x002\xe0\x05}\xe0\x04E\x003\xe0\x04\xc3\xe1\x05\xe9\xe0\x05a\xe0\x05S\xe3\x05U\xe2\x05\xf3\xe1\x05\xbf\x007\xe0\x04o\xe1\x05\x17\x004\xe0\x04}\xe0\x05\x1b\x006\xe0\x04\xb5\x005\xe1\x04%\x009\xe0\x057\xe2\x04!\xe0\x05\xdf\xe4\x05\x89\x004\xe0\x05E\xe0\x04\x99\xe4\x05_\xe1\x05O\xe2\x05Y\xe5\x051\x007\xe0\x04}\xe0\x05E\x0283\xff\xffy\xaa\x8e\x05\xb8\xd6\xecX" 185 | RDBFile4 = "REDIS0006\xfe\x00\x00\xc1aS\x03cde\x00\xc0\x0c\x03abc\x00\xc2\x87\xd6\x12\x00\x03fgh\xff\xe9 \xb4\xe35e\x99\x92" 186 | RDBFile5 = "REDIS0006\xfe\x00\x00\xc3\x12/\x01aa \x00\x00d\xe0\n\x00\x00e\xe0\n\x00\x01ee\x02x3\x00\xc3\x120\x01bb\xe0\x07\x00\x00a\xe0\t\x00\x00c\xc0\x00\x01cc\x02x2\x00\xc3\x130\x01aa\xe0\x07\x00\x00b\xe0\x08\x00\x00c\xe0\x00\x00\x01cc\x02x1\xff\x83J\xb9\xf9mX\x8a\xa6" 187 | ) 188 | -------------------------------------------------------------------------------- /rdb.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | // Filter RDB file per spec: https://github.com/sripathikrishnan/redis-rdb-tools/wiki/Redis-RDB-Dump-File-Format 4 | 5 | import ( 6 | "bufio" 7 | "bytes" 8 | "encoding/binary" 9 | "errors" 10 | "fmt" 11 | "io" 12 | "strconv" 13 | ) 14 | 15 | const ( 16 | rdbOpDB = 0xFE 17 | rdbOpExpirySec = 0xFD 18 | rdbOpExpiryMSec = 0xFC 19 | rdbOpEOF = 0xFF 20 | 21 | rdbLen6Bit = 0x0 22 | rdbLen14bit = 0x1 23 | rdbLen32Bit = 0x2 24 | rdbLenEnc = 0x3 25 | 26 | rdbOpString = 0x00 27 | rdbOpList = 0x01 28 | rdbOpSet = 0x02 29 | rdbOpZset = 0x03 30 | rdbOpHash = 0x04 31 | rdbOpZipmap = 0x09 32 | rdbOpZiplist = 0x0a 33 | rdbOpIntset = 0x0b 34 | rdbOpSortedSet = 0x0c 35 | rdbOpHashmap = 0x0d 36 | ) 37 | 38 | var ( 39 | rdbSignature = []byte{0x52, 0x45, 0x44, 0x49, 0x53} 40 | ) 41 | 42 | var ( 43 | // ErrWrongSignature is returned when RDB signature can't be parsed 44 | ErrWrongSignature = errors.New("rdb: wrong signature") 45 | // ErrVersionUnsupported is returned when RDB version is too high (can't parse) 46 | ErrVersionUnsupported = errors.New("rdb: version unsupported") 47 | // ErrUnsupportedOp is returned when unsupported operation is encountered in RDB 48 | ErrUnsupportedOp = errors.New("rdb: unsupported opcode") 49 | // ErrUnsupportedStringEnc is returned when unsupported string encoding is encountered in RDB 50 | ErrUnsupportedStringEnc = errors.New("rdb: unsupported string encoding") 51 | ) 52 | 53 | // RDBFilter holds internal state of RDB filter while running 54 | type RDBFilter struct { 55 | reader *bufio.Reader 56 | output chan<- []byte 57 | dissector func(string) bool 58 | originalLength int64 59 | length int64 60 | hash uint64 61 | saved []byte 62 | rdbVersion int 63 | valueState state 64 | shouldKeep bool 65 | currentOp byte 66 | } 67 | 68 | type state func(filter *RDBFilter) (nextstate state, err error) 69 | 70 | // FilterRDB filters RDB file which is read from reader, sending chunks of data through output channel 71 | // dissector function is applied to keys to check whether item should be kept or skipped 72 | // length is original length of RDB file 73 | func FilterRDB(reader *bufio.Reader, output chan<- []byte, dissector func(string) bool, length int64) (err error) { 74 | filter := &RDBFilter{ 75 | reader: reader, 76 | output: output, 77 | dissector: dissector, 78 | originalLength: length, 79 | shouldKeep: true, 80 | } 81 | 82 | state := stateMagic 83 | 84 | for state != nil { 85 | state, err = state(filter) 86 | if err != nil { 87 | return 88 | } 89 | } 90 | 91 | return nil 92 | } 93 | 94 | // Read exactly n bytes 95 | func (filter *RDBFilter) safeRead(n uint32) (result []byte, err error) { 96 | result = make([]byte, n) 97 | _, err = io.ReadFull(filter.reader, result) 98 | return 99 | } 100 | 101 | // Accumulate some data that might be either filtered out or passed through 102 | func (filter *RDBFilter) write(data []byte) { 103 | if !filter.shouldKeep { 104 | return 105 | } 106 | 107 | if filter.saved == nil { 108 | filter.saved = make([]byte, len(data), 4096) 109 | copy(filter.saved, data) 110 | } else { 111 | filter.saved = append(filter.saved, data...) 112 | } 113 | } 114 | 115 | // Discard or keep saved data 116 | func (filter *RDBFilter) keepOrDiscard() { 117 | if filter.shouldKeep && filter.saved != nil { 118 | filter.output <- filter.saved 119 | filter.hash = CRC64Update(filter.hash, filter.saved) 120 | filter.length += int64(len(filter.saved)) 121 | } 122 | filter.saved = nil 123 | filter.shouldKeep = true 124 | } 125 | 126 | // Read length encoded prefix 127 | func (filter *RDBFilter) readLength() (length uint32, encoding int8, err error) { 128 | prefix, err := filter.reader.ReadByte() 129 | if err != nil { 130 | return 0, 0, err 131 | } 132 | filter.write([]byte{prefix}) 133 | 134 | kind := (prefix & 0xC0) >> 6 135 | 136 | switch kind { 137 | case rdbLen6Bit: 138 | length = uint32(prefix & 0x3F) 139 | return length, -1, nil 140 | case rdbLen14bit: 141 | data, err := filter.reader.ReadByte() 142 | if err != nil { 143 | return 0, 0, err 144 | } 145 | filter.write([]byte{data}) 146 | length = ((uint32(prefix) & 0x3F) << 8) | uint32(data) 147 | return length, -1, nil 148 | case rdbLen32Bit: 149 | data, err := filter.safeRead(4) 150 | if err != nil { 151 | return 0, 0, err 152 | } 153 | filter.write(data) 154 | length = binary.BigEndian.Uint32(data) 155 | return length, -1, nil 156 | case rdbLenEnc: 157 | encoding = int8(prefix & 0x3F) 158 | return 0, encoding, nil 159 | } 160 | panic("never reached") 161 | } 162 | 163 | // Taken from Golly: https://github.com/tav/golly/blob/master/lzf/lzf.go 164 | // Removed part that gets outputLength from data 165 | func lzfDecompress(input []byte, outputLength uint32) (output []byte) { 166 | 167 | inputLength := uint32(len(input)) 168 | 169 | var backref int64 170 | var ctrl, iidx, length, oidx uint32 171 | 172 | output = make([]byte, outputLength, outputLength) 173 | iidx = 0 174 | 175 | for iidx < inputLength { 176 | // Get the control byte. 177 | ctrl = uint32(input[iidx]) 178 | iidx++ 179 | 180 | if ctrl < (1 << 5) { 181 | // The control byte indicates a literal reference. 182 | ctrl++ 183 | if oidx+ctrl > outputLength { 184 | return nil 185 | } 186 | 187 | // Safety check. 188 | if iidx+ctrl > inputLength { 189 | return nil 190 | } 191 | 192 | for { 193 | output[oidx] = input[iidx] 194 | iidx++ 195 | oidx++ 196 | ctrl-- 197 | if ctrl == 0 { 198 | break 199 | } 200 | } 201 | } else { 202 | // The control byte indicates a back reference. 203 | length = ctrl >> 5 204 | backref = int64(oidx - ((ctrl & 31) << 8) - 1) 205 | 206 | // Safety check. 207 | if iidx >= inputLength { 208 | return nil 209 | } 210 | 211 | // It's an extended back reference. Read the extended length before 212 | // reading the full back reference location. 213 | if length == 7 { 214 | length += uint32(input[iidx]) 215 | iidx++ 216 | // Safety check. 217 | if iidx >= inputLength { 218 | return nil 219 | } 220 | } 221 | 222 | // Put together the full back reference location. 223 | backref -= int64(input[iidx]) 224 | iidx++ 225 | 226 | if oidx+length+2 > outputLength { 227 | return nil 228 | } 229 | 230 | if backref < 0 { 231 | return nil 232 | } 233 | 234 | output[oidx] = output[backref] 235 | oidx++ 236 | backref++ 237 | output[oidx] = output[backref] 238 | oidx++ 239 | backref++ 240 | 241 | for { 242 | output[oidx] = output[backref] 243 | oidx++ 244 | backref++ 245 | length-- 246 | if length == 0 { 247 | break 248 | } 249 | } 250 | 251 | } 252 | } 253 | 254 | return output 255 | } 256 | 257 | // read string from RDB, only uncompressed version is supported 258 | func (filter *RDBFilter) readString() (string, error) { 259 | var result string 260 | 261 | length, encoding, err := filter.readLength() 262 | if err != nil { 263 | return "", err 264 | } 265 | 266 | switch encoding { 267 | // length-prefixed string 268 | case -1: 269 | data, err := filter.safeRead(length) 270 | if err != nil { 271 | return "", err 272 | } 273 | filter.write(data) 274 | result = string(data) 275 | // integer as string 276 | case 0, 1, 2: 277 | data, err := filter.safeRead(1 << uint8(encoding)) 278 | if err != nil { 279 | return "", err 280 | } 281 | filter.write(data) 282 | 283 | var num uint32 284 | 285 | if encoding == 0 { 286 | num = uint32(data[0]) 287 | } else if encoding == 1 { 288 | num = uint32(data[0]) | (uint32(data[1]) << 8) 289 | } else if encoding == 2 { 290 | num = uint32(data[0]) | (uint32(data[1]) << 8) | (uint32(data[2]) << 16) | (uint32(data[3]) << 24) 291 | } 292 | 293 | result = fmt.Sprintf("%d", num) 294 | // compressed string 295 | case 3: 296 | clength, _, err := filter.readLength() 297 | if err != nil { 298 | return "", err 299 | } 300 | length, _, err := filter.readLength() 301 | if err != nil { 302 | return "", err 303 | } 304 | data, err := filter.safeRead(clength) 305 | if err != nil { 306 | return "", err 307 | } 308 | filter.write(data) 309 | 310 | result = string(lzfDecompress(data, length)) 311 | default: 312 | return "", ErrUnsupportedStringEnc 313 | } 314 | 315 | return result, nil 316 | } 317 | 318 | // skip (copy) string from RDB 319 | func (filter *RDBFilter) skipString() error { 320 | length, encoding, err := filter.readLength() 321 | if err != nil { 322 | return err 323 | } 324 | 325 | switch encoding { 326 | // length-prefixed string 327 | case -1: 328 | data, err := filter.safeRead(length) 329 | if err != nil { 330 | return err 331 | } 332 | filter.write(data) 333 | // integer as string 334 | case 0, 1, 2: 335 | data, err := filter.safeRead(1 << uint8(encoding)) 336 | if err != nil { 337 | return err 338 | } 339 | filter.write(data) 340 | // compressed string 341 | case 3: 342 | clength, _, err := filter.readLength() 343 | if err != nil { 344 | return err 345 | } 346 | _, _, err = filter.readLength() 347 | if err != nil { 348 | return err 349 | } 350 | data, err := filter.safeRead(clength) 351 | if err != nil { 352 | return err 353 | } 354 | filter.write(data) 355 | default: 356 | return ErrUnsupportedStringEnc 357 | } 358 | return nil 359 | } 360 | 361 | // read RDB magic header 362 | func stateMagic(filter *RDBFilter) (state, error) { 363 | signature, err := filter.safeRead(5) 364 | if err != nil { 365 | return nil, err 366 | } 367 | if bytes.Compare(signature, rdbSignature) != 0 { 368 | return nil, ErrWrongSignature 369 | } 370 | filter.write(signature) 371 | 372 | versionRaw, err := filter.safeRead(4) 373 | if err != nil { 374 | return nil, err 375 | } 376 | version, err := strconv.Atoi(string(versionRaw)) 377 | if err != nil { 378 | return nil, ErrWrongSignature 379 | } 380 | 381 | if version > 6 { 382 | return nil, ErrVersionUnsupported 383 | } 384 | 385 | filter.rdbVersion = version 386 | filter.write(versionRaw) 387 | filter.keepOrDiscard() 388 | 389 | return stateOp, nil 390 | } 391 | 392 | // main selector of operations 393 | func stateOp(filter *RDBFilter) (state, error) { 394 | op, err := filter.reader.ReadByte() 395 | if err != nil { 396 | return nil, err 397 | } 398 | filter.currentOp = op 399 | 400 | switch op { 401 | case rdbOpDB: 402 | filter.keepOrDiscard() 403 | return stateDB, nil 404 | case rdbOpExpirySec: 405 | return stateExpirySec, nil 406 | case rdbOpExpiryMSec: 407 | return stateExpiryMSec, nil 408 | case rdbOpString, rdbOpZipmap, rdbOpZiplist, rdbOpIntset, rdbOpSortedSet, rdbOpHashmap: 409 | filter.valueState = stateSkipString 410 | return stateKey, nil 411 | case rdbOpList, rdbOpSet: 412 | filter.valueState = stateSkipSetOrList 413 | return stateKey, nil 414 | case rdbOpZset: 415 | filter.valueState = stateSkipZset 416 | return stateKey, nil 417 | case rdbOpHash: 418 | filter.valueState = stateSkipHash 419 | return stateKey, nil 420 | case rdbOpEOF: 421 | filter.keepOrDiscard() 422 | filter.write([]byte{rdbOpEOF}) 423 | filter.keepOrDiscard() 424 | if filter.rdbVersion > 4 { 425 | return stateCRC64, nil 426 | } 427 | return statePadding, nil 428 | default: 429 | return nil, ErrUnsupportedOp 430 | } 431 | } 432 | 433 | // DB index operation 434 | func stateDB(filter *RDBFilter) (state, error) { 435 | filter.write([]byte{rdbOpDB}) 436 | _, _, err := filter.readLength() 437 | if err != nil { 438 | return nil, err 439 | } 440 | filter.keepOrDiscard() 441 | 442 | return stateOp, nil 443 | } 444 | 445 | func stateExpirySec(filter *RDBFilter) (state, error) { 446 | expiry, err := filter.safeRead(4) 447 | if err != nil { 448 | return nil, err 449 | } 450 | 451 | filter.write([]byte{rdbOpExpirySec}) 452 | filter.write(expiry) 453 | 454 | return stateOp, nil 455 | } 456 | 457 | func stateExpiryMSec(filter *RDBFilter) (state, error) { 458 | expiry, err := filter.safeRead(8) 459 | if err != nil { 460 | return nil, err 461 | } 462 | 463 | filter.write([]byte{rdbOpExpiryMSec}) 464 | filter.write(expiry) 465 | 466 | return stateOp, nil 467 | } 468 | 469 | // read key 470 | func stateKey(filter *RDBFilter) (state, error) { 471 | filter.write([]byte{filter.currentOp}) 472 | key, err := filter.readString() 473 | if err != nil { 474 | return nil, err 475 | } 476 | 477 | filter.shouldKeep = filter.dissector(key) 478 | 479 | return filter.valueState, nil 480 | } 481 | 482 | // skip over string 483 | func stateSkipString(filter *RDBFilter) (state, error) { 484 | err := filter.skipString() 485 | if err != nil { 486 | return nil, err 487 | } 488 | 489 | filter.keepOrDiscard() 490 | return stateOp, nil 491 | } 492 | 493 | // skip over set or list 494 | func stateSkipSetOrList(filter *RDBFilter) (state, error) { 495 | length, _, err := filter.readLength() 496 | if err != nil { 497 | return nil, err 498 | } 499 | 500 | var i uint32 501 | 502 | for i = 0; i < length; i++ { 503 | // list element 504 | err = filter.skipString() 505 | if err != nil { 506 | return nil, err 507 | } 508 | } 509 | 510 | filter.keepOrDiscard() 511 | return stateOp, nil 512 | } 513 | 514 | // skip over hash 515 | func stateSkipHash(filter *RDBFilter) (state, error) { 516 | length, _, err := filter.readLength() 517 | if err != nil { 518 | return nil, err 519 | } 520 | 521 | var i uint32 522 | 523 | for i = 0; i < length; i++ { 524 | // key 525 | err = filter.skipString() 526 | if err != nil { 527 | return nil, err 528 | } 529 | 530 | // value 531 | err = filter.skipString() 532 | if err != nil { 533 | return nil, err 534 | } 535 | } 536 | 537 | filter.keepOrDiscard() 538 | return stateOp, nil 539 | } 540 | 541 | // skip over zset 542 | func stateSkipZset(filter *RDBFilter) (state, error) { 543 | length, _, err := filter.readLength() 544 | if err != nil { 545 | return nil, err 546 | } 547 | 548 | var i uint32 549 | 550 | for i = 0; i < length; i++ { 551 | err = filter.skipString() 552 | if err != nil { 553 | return nil, err 554 | } 555 | 556 | dlen, err := filter.reader.ReadByte() 557 | if err != nil { 558 | return nil, err 559 | } 560 | filter.write([]byte{dlen}) 561 | 562 | if dlen < 0xFD { 563 | double, err := filter.safeRead(uint32(dlen)) 564 | if err != nil { 565 | return nil, err 566 | } 567 | 568 | filter.write(double) 569 | } 570 | } 571 | 572 | filter.keepOrDiscard() 573 | return stateOp, nil 574 | } 575 | 576 | // re-calculate crc64 577 | func stateCRC64(filter *RDBFilter) (state, error) { 578 | _, err := filter.safeRead(8) 579 | if err != nil { 580 | return nil, err 581 | } 582 | 583 | buf := make([]byte, 8) 584 | 585 | binary.LittleEndian.PutUint64(buf, filter.hash) 586 | filter.output <- buf 587 | filter.length += 8 588 | 589 | return statePadding, nil 590 | } 591 | 592 | // pad RDB with 0xFF up to original length 593 | func statePadding(filter *RDBFilter) (state, error) { 594 | const paddingSize = 4096 595 | 596 | paddingLength := filter.originalLength - filter.length 597 | paddingBlock := make([]byte, paddingSize) 598 | 599 | for i := range paddingBlock { 600 | paddingBlock[i] = 0xFF 601 | } 602 | 603 | for paddingLength > 0 { 604 | if paddingLength > paddingSize { 605 | filter.output <- paddingBlock 606 | paddingLength -= paddingSize 607 | } else { 608 | filter.output <- paddingBlock[:paddingLength] 609 | break 610 | } 611 | } 612 | return nil, nil 613 | } 614 | --------------------------------------------------------------------------------