├── Makefile ├── README.md ├── buildme.sh ├── images ├── image_algorithm.png ├── image_permutation.png ├── image_result.png └── image_test.png ├── maglev_unittest.c ├── maglevhash.c └── maglevhash.h /Makefile: -------------------------------------------------------------------------------- 1 | CC=g++ 2 | CFLAGS=-g -Wall -pthread -I./ 3 | LDFLAGS= -lpthread -lm -lgtest 4 | 5 | OBJECTS=$(SUBSRCS:.c=.o) 6 | EXEC=maglevhash_test 7 | 8 | all: $(EXEC) 9 | 10 | $(EXEC): $(wildcard *.c) $(OBJECTS) 11 | $(CC) $(CFLAGS) $^ $(LDFLAGS) -o $@ 12 | 13 | clean: 14 | $(RM) $(EXEC) 15 | 16 | .PHONY: $(EXEC) 17 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # maglev_hash 原始论文 2 | 一个根据google maglev 论文,用c语言实现的一致性hash算法 3 | [Maglev: A Fast and Reliable Software Network Load Balancer](http://static.googleusercontent.com/media/research.google.com/zh-TW//pubs/archive/44824.pdf) 4 | 5 | ## 评价一致性hash的好坏,主要关注以下两点: 6 | - 平衡性(Balance): 7 | 平衡性是指哈希的结果能够尽可能分布到所有的节点中去,很多哈希算法都能够满足这一条件。 8 | - 单调性(Monotonicity): 9 | 单调性是指如果已经有一些内容通过哈希分派到了相应的节点,又有新的节点加入到系统中。 10 | 哈希的结果应能够保证原有已分配的内容可以被映射到原有的或者新的节点中去,而不会被映射到旧的集合中的其他节点。 11 | 12 | ## maglev一致性hash算法实现: 13 | - 构造permutation数组 14 | 根据不同的两个hash函数,我们生成两个数值(offset ,skip) 15 | 在这里我们使用的是murmur2算法和DJBHash算法,读者可以自己到网上找算法实现。 16 | 17 | ![maglev consistent hashing](images/image_permutation.png) 18 | 19 | M = 一个hash槽大小 ,这个槽设置成一个素数 20 | N=节点个数 21 | 22 | ```text 23 | unsigned int offset = DJBHash(p_rs_srv_name); 24 | offset = offset % M; 25 | unsigned int skip = ngx_murmur_hash2(p_rs_srv_name,strlen(p_rs_srv_name) ); 26 | skip = skip % ( M -1 ) + 1; 27 | for(int j = 0; j < M ; j++ ) { 28 | *(p_rs_info->m_permute + j) = (offset + j * skip) % M 29 | } 30 | 31 | ``` 32 | 33 | 生成hash查找桶entry 34 | 35 | ![算法](images/image_algorithm.png) 36 | 37 | 根据这个算法 ,数据源是 permutation , 生成 最后的 entry 数组 ,就是下图的表。 38 | 39 | ![论文上结果](images/image_result.png) 40 | 41 | ## maglev一致性hash测试 42 | 43 | 下表是测试情况: 44 | > 1. 正常三个节点时,通过一致性hash负载的分布情况,测试平衡性。 45 | > 2. 有单点故障时,流量变化情况(2节点故障),测试单调性。 46 | > 3. 增加一个节点时,其他节点流量的变化情况,测试单调性。 47 | 48 | ![单调性测试](images/image_test.png) 49 | 50 | ### 单调性测试 51 | > M = 2017, real server = 4 删除一个rs = 3后,前后entry(lookup table)槽对比, hash size = 2017 52 | > delete:504, 由于单点故障,故障点的请求都被分流了,删除的节点有504个槽 53 | > interupt:10, 其他槽中有10个漂移到其他rs上去了。 54 | > 这样算下来有: (2017-10)/2017 = 99.5% 保持了单调性 55 | 56 | 57 | # Maglev Hash与Consistent Hashing的对比 58 | 59 | - 平衡性(Balance) 60 | > maglev hash通过轮询放入loopup table(entry)数组,保证每个节点的机会是均衡的。 61 | > consistent hash 通过增加虚拟节点的倍数来保证平衡性,虚拟节点的倍数越高平衡性越好。 62 | > 缺点就是consistent hash 的loopup table 一般是一个树形结构(rbtree),虚拟节点越多查询时性能越差。 63 | 64 | - 单调性(Monotonicity) 65 | > consistent hash通过有序的hash环,保证单点故障和增加节点时很好的单调性。 66 | > maglev hash通过之前生成的偏好表(permutation list)决定变化时的单调性。 67 | > 缺点是maglev hash有些时候会出现disruption ,比例是:0.5% ,这一点上不如consistent hash。 68 | 69 | - 根据上面比较看: 70 | > maglev hash在平衡性和查询性能上要优于consistent hash。 71 | > consistent hash在单调性上要优于maglev hash。 72 | -------------------------------------------------------------------------------- /buildme.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | 4 | # check lib and header file 5 | HDRS_IN='/usr' 6 | LIBS_IN='/usr' 7 | find_dir_of_lib() { 8 | local lib=$(find ${LIBS_IN} -name "lib${1}.a" -o -name "lib${1}.$SO" 2>/dev/null | head -n1) 9 | if [ ! -z "$lib" ]; then 10 | dirname $lib 11 | fi 12 | } 13 | find_dir_of_header() { 14 | find -L ${HDRS_IN} -path "*/$1" | head -n1 | sed "s|$1||g" 15 | } 16 | 17 | #gtest 18 | GTEST_LIB=$(find_dir_of_lib gtest) 19 | if [ -z "$GTEST_LIB" ]; then 20 | echo " \$(error \"Fail to find gtest lib\")" 21 | echo "sudo apt-get install -y cmake libgtest-dev && cd /usr/src/gtest && sudo cmake . && sudo make && sudo mv libgtest* /usr/lib/ && cd -" 22 | exit 0 23 | else 24 | GTEST_HDR=$(find_dir_of_header gtest/gtest.h) 25 | if [ -z "$GTEST_HDR" ]; then 26 | echo " \$(error \"Fail to find gtest include\")" 27 | echo "sudo apt-get install -y cmake libgtest-dev && cd /usr/src/gtest && sudo cmake . && sudo make && sudo mv libgtest* /usr/lib/ && cd -" 28 | exit 0 29 | fi 30 | fi 31 | 32 | echo $GTEST_LIB " " 33 | echo $GTEST_HDR " " 34 | 35 | make 36 | -------------------------------------------------------------------------------- /images/image_algorithm.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sunwsh/maglev_hash/19a36013bf83e7ec252b18600c3fd3e78221de75/images/image_algorithm.png -------------------------------------------------------------------------------- /images/image_permutation.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sunwsh/maglev_hash/19a36013bf83e7ec252b18600c3fd3e78221de75/images/image_permutation.png -------------------------------------------------------------------------------- /images/image_result.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sunwsh/maglev_hash/19a36013bf83e7ec252b18600c3fd3e78221de75/images/image_result.png -------------------------------------------------------------------------------- /images/image_test.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sunwsh/maglev_hash/19a36013bf83e7ec252b18600c3fd3e78221de75/images/image_test.png -------------------------------------------------------------------------------- /maglev_unittest.c: -------------------------------------------------------------------------------- 1 | // Copyright 2005, Google Inc. 2 | // All rights reserved. 3 | // 4 | // Step 1. Include necessary header files such that the stuff your 5 | // test logic needs is declared. 6 | // 7 | 8 | #include 9 | #include "maglevhash.h" 10 | #include "gtest/gtest.h" 11 | #include 12 | //#include 13 | 14 | //using std::string; 15 | 16 | // Tests factorial of negative numbers. 17 | TEST(FactorialTest, Negative) { 18 | // This test is named "Negative", and belongs to the "FactorialTest" 19 | // test case. 20 | struct MAGLEV_LOOKUP_HASH m_maglev_hash; 21 | int REAL_SERVER_NUMB = 3; 22 | int rs_entry_count[3] = {0}; 23 | 24 | maglev_init( &m_maglev_hash ); 25 | 26 | EXPECT_EQ(0, maglev_update_service( &m_maglev_hash , REAL_SERVER_NUMB, 313) ); 27 | int i; 28 | for(i=0;i < REAL_SERVER_NUMB; i++) 29 | { 30 | char *rsdesc = (char *) malloc(2); 31 | snprintf(rsdesc, 2,"%d", i); 32 | char descname[100]; 33 | snprintf(descname, sizeof(descname), "rs:%d", i); 34 | 35 | maglev_add_node(&m_maglev_hash, descname, rsdesc); 36 | rs_entry_count[i] = 0; 37 | } 38 | 39 | maglev_create_ht( &m_maglev_hash ); 40 | 41 | maglev_swap_entry( &m_maglev_hash ); 42 | 43 | // 44 | struct MAGLEV_SERVICE_PARAMS *temp_srv = &m_maglev_hash.item[ m_maglev_hash.is_use_index ]; 45 | for(i = 0; i < temp_srv->hash_bucket_size; i++ ) { 46 | if (0 == strcmp((char *)(temp_srv->hash_entry[i]), "0")) { 47 | rs_entry_count[0]++; 48 | } 49 | if (0 == strcmp((char *)(temp_srv->hash_entry[i]), "1")) { 50 | rs_entry_count[1]++; 51 | } 52 | if (0 == strcmp((char *)(temp_srv->hash_entry[i]), "2")) { 53 | rs_entry_count[2]++; 54 | } 55 | } 56 | 57 | printf("hash size:%d\n", temp_srv->hash_bucket_size); 58 | for(i = 0; i < REAL_SERVER_NUMB; i++) { 59 | printf("node: %s, count: %d\n", (char *)temp_srv->node_info_entry[i], rs_entry_count[i]); 60 | } 61 | } 62 | 63 | int main(int argc,char **argv){ 64 | testing::InitGoogleTest(&argc,argv); 65 | return RUN_ALL_TESTS(); 66 | } 67 | -------------------------------------------------------------------------------- /maglevhash.c: -------------------------------------------------------------------------------- 1 | /* 2 | * maglevhash.h 3 | * 4 | * Created on: 2017-9-13 5 | */ 6 | 7 | #include "maglevhash.h" 8 | #include 9 | #include 10 | 11 | 12 | void maglev_init(struct MAGLEV_LOOKUP_HASH *psrv) 13 | { 14 | psrv->is_use_index = -1; 15 | psrv->is_modify_lock = 0; 16 | psrv->p_temp = NULL; 17 | 18 | psrv->item[0].hash_bucket_size = 0; 19 | psrv->item[1].hash_bucket_size = 0; 20 | 21 | psrv->item[0].node_size = 0; 22 | psrv->item[1].node_size = 0; 23 | 24 | psrv->item[0].permutation = NULL; 25 | psrv->item[1].permutation = NULL; 26 | 27 | psrv->item[0].next = NULL; 28 | psrv->item[1].next = NULL; 29 | 30 | psrv->item[0].hash_entry = NULL; 31 | psrv->item[1].hash_entry = NULL; 32 | 33 | psrv->item[0].node_name = NULL; 34 | psrv->item[1].node_name = NULL; 35 | 36 | psrv->item[0].node_info_entry = NULL; 37 | psrv->item[1].node_info_entry = NULL; 38 | 39 | psrv->item[0].node_add_index = 0; 40 | psrv->item[1].node_add_index = 0; 41 | } 42 | 43 | static int8_t __is_maglev_prime(int32_t n) 44 | { 45 | if (n < MAGLEV_HASH_SIZE_MIN) 46 | return 0; 47 | if (n > MAGLEV_HASH_SIZE_MAX) 48 | return 0; 49 | if (n%2 == 0) 50 | return 0; 51 | 52 | int32_t i, j; 53 | j = (int32_t) sqrt(n + 1); 54 | for (i = 3; i <= j; i = i + 2) 55 | if (n % i == 0) 56 | return 0; 57 | return 1; 58 | } 59 | 60 | static struct MAGLEV_SERVICE_PARAMS* __create_maglev_service_unit(struct MAGLEV_SERVICE_PARAMS* pServ, 61 | int node_size, 62 | int hash_bucket_size) 63 | { 64 | if (0 == __is_maglev_prime(hash_bucket_size)) { 65 | return NULL; 66 | } 67 | 68 | pServ->hash_bucket_size = hash_bucket_size; 69 | pServ->node_size = node_size; 70 | pServ->node_add_index = 0; 71 | pServ->permutation = (int *) malloc(node_size * hash_bucket_size * sizeof(int)); 72 | pServ->node_info_entry = (void **) malloc(node_size * sizeof(void *)); 73 | 74 | pServ->next = (int *) malloc(node_size * sizeof(int)); 75 | pServ->hash_entry = (void **) malloc(hash_bucket_size * sizeof(void *)); 76 | 77 | if (NULL == pServ->hash_entry) { 78 | return NULL; 79 | } 80 | pServ->node_name = (char **) malloc(node_size * sizeof(char *)); 81 | 82 | return pServ; 83 | } 84 | 85 | static void maglev_loopup_item_clean(struct MAGLEV_LOOKUP_HASH *psrv, int index) 86 | { 87 | if (NULL == psrv->item[index].hash_entry) { 88 | return; 89 | } 90 | 91 | struct MAGLEV_SERVICE_PARAMS *p_item = & psrv->item[index]; 92 | 93 | int i; 94 | for (i=0;i < p_item->node_size; i++) { 95 | free( *(p_item->node_name + i) ); 96 | } 97 | free( p_item->node_name ); 98 | free( p_item->hash_entry ); 99 | free( p_item->node_info_entry ); 100 | 101 | // free 102 | free( p_item->permutation ); 103 | free( p_item->next ); 104 | 105 | p_item->node_name = NULL; 106 | p_item->hash_entry = NULL; 107 | p_item->node_info_entry = NULL; 108 | p_item->permutation = NULL; 109 | p_item->next = NULL; 110 | 111 | p_item->hash_bucket_size = 0; 112 | p_item->node_size = 0; 113 | p_item->node_add_index = 0; 114 | } 115 | 116 | int maglev_update_service(struct MAGLEV_LOOKUP_HASH *psrv, int node_size, int hash_bucket_size) 117 | { 118 | if (psrv->is_modify_lock) { 119 | return -1; 120 | } 121 | 122 | psrv->is_modify_lock = 1; 123 | 124 | int i_index = (psrv->is_use_index + 1) % 2; 125 | maglev_loopup_item_clean(psrv, i_index); 126 | 127 | psrv->p_temp = __create_maglev_service_unit(&psrv->item[i_index], node_size, hash_bucket_size); 128 | 129 | if (NULL == psrv->p_temp) { 130 | return -1; 131 | } 132 | return 0; 133 | } 134 | 135 | /* 136 | * maglev_add_node 137 | * Add a node server configuration, where the server name must be unique 138 | * */ 139 | int maglev_add_node(struct MAGLEV_LOOKUP_HASH *psrv, char *node_name_key, void *rs_info) 140 | { 141 | if (0 == psrv->is_modify_lock) { 142 | return -2; 143 | } 144 | 145 | if (psrv->p_temp->node_add_index >= psrv->p_temp->node_size) { 146 | return -1; 147 | } 148 | 149 | int M = psrv->p_temp->hash_bucket_size; 150 | int *permutation = psrv->p_temp->permutation; 151 | 152 | unsigned int offset = DJBHash(node_name_key); 153 | offset = offset % M; 154 | unsigned int skip = ngx_murmur_hash2(node_name_key, strlen(node_name_key)); 155 | skip = (skip % (M -1)) + 1; 156 | 157 | void **cur_node_info = psrv->p_temp->node_info_entry; 158 | *(cur_node_info + psrv->p_temp->node_add_index) = rs_info; 159 | 160 | int cur_name_size = strlen(node_name_key) + 1; 161 | char *cur_rs_name = (char *) malloc( cur_name_size ); 162 | 163 | snprintf(cur_rs_name, cur_name_size, "%s", node_name_key); 164 | *(psrv->p_temp->node_name + psrv->p_temp->node_add_index) = cur_rs_name; 165 | 166 | int j; 167 | for (j = 0; j < psrv->p_temp->hash_bucket_size; ++j) { 168 | int perm = (offset + j * skip) % M; 169 | *(permutation + psrv->p_temp->node_add_index * M + j) = perm; 170 | } 171 | 172 | psrv->p_temp->node_add_index++; 173 | 174 | return 0; 175 | } 176 | 177 | void maglev_create_ht(struct MAGLEV_LOOKUP_HASH *psrv) 178 | { 179 | if (0 == psrv->is_modify_lock) { 180 | return; 181 | } 182 | 183 | struct MAGLEV_SERVICE_PARAMS *pServ = psrv->p_temp; 184 | 185 | int N = pServ->node_size; 186 | int M = pServ->hash_bucket_size; 187 | int *permutation = pServ->permutation; 188 | 189 | int *next = pServ->next; 190 | void **entry = pServ->hash_entry; 191 | void **cur_node_info = pServ->node_info_entry; 192 | 193 | int j; 194 | for (j=0; jis_modify_lock) { 232 | return; 233 | } 234 | 235 | int i_index = (psrv->is_use_index + 1) % 2; 236 | 237 | psrv->is_use_index = i_index; 238 | 239 | psrv->p_temp = NULL; 240 | psrv->is_modify_lock = 0; 241 | } 242 | 243 | /* 244 | * maglev_lookup_node 245 | * */ 246 | void * maglev_lookup_node(struct MAGLEV_LOOKUP_HASH *psrv, char *key, int key_size) 247 | { 248 | int i_index = psrv->is_use_index; 249 | if (i_index < 0) { 250 | return NULL; 251 | } 252 | if (0 >= psrv->item[ i_index ].hash_bucket_size) { 253 | return NULL; 254 | } 255 | 256 | void *pnode_info; 257 | 258 | unsigned int new_key = ngx_murmur_hash2(key,key_size); 259 | int M = psrv->item[i_index].hash_bucket_size; 260 | void **entry = psrv->item[i_index].hash_entry; 261 | 262 | unsigned int hashkey = new_key % M; 263 | pnode_info = *(entry + hashkey); 264 | 265 | return pnode_info; 266 | } 267 | 268 | /* the famous DJB Hash Function for strings */ 269 | unsigned int DJBHash(char *str) 270 | { 271 | unsigned int hash = 5381; 272 | while (*str) { 273 | hash = ((hash << 5) + hash) + (*str++); /* times 33 */ 274 | } 275 | hash &= ~(1 << 31); /* strip the highest bit */ 276 | return hash; 277 | } 278 | 279 | unsigned int ngx_murmur_hash2(char *data, int len) 280 | { 281 | unsigned int h, k; 282 | 283 | h = 0 ^ len; 284 | 285 | while (len >= 4) { 286 | k = data[0]; 287 | k |= data[1] << 8; 288 | k |= data[2] << 16; 289 | k |= data[3] << 24; 290 | 291 | k *= 0x5bd1e995; 292 | k ^= k >> 24; 293 | k *= 0x5bd1e995; 294 | 295 | h *= 0x5bd1e995; 296 | h ^= k; 297 | 298 | data += 4; 299 | len -= 4; 300 | } 301 | 302 | switch (len) { 303 | case 3: 304 | h ^= data[2] << 16; 305 | case 2: 306 | h ^= data[1] << 8; 307 | case 1: 308 | h ^= data[0]; 309 | h *= 0x5bd1e995; 310 | } 311 | 312 | h ^= h >> 13; 313 | h *= 0x5bd1e995; 314 | h ^= h >> 15; 315 | 316 | return h; 317 | } 318 | -------------------------------------------------------------------------------- /maglevhash.h: -------------------------------------------------------------------------------- 1 | /* 2 | * maglevhash.h 3 | * 4 | * Created on: 2017-9-13 5 | */ 6 | 7 | #ifndef MAGLEV_HASH_H_ 8 | #define MAGLEV_HASH_H_ 9 | 10 | #include 11 | #include 12 | #include 13 | #include 14 | 15 | /* 16 | * maglev entry size 17 | * Too small can not guarantee the balance, too large calculations each time, 18 | * it is recommended to set the prime number near the real node * 100. 19 | */ 20 | #define MAGLEV_HASH_SIZE_MIN 211 21 | #define MAGLEV_HASH_SIZE_MAX 40009 22 | 23 | struct MAGLEV_SERVICE_PARAMS 24 | { 25 | int node_size; 26 | int node_add_index; 27 | void **node_info_entry; 28 | char **node_name; 29 | 30 | int hash_bucket_size; 31 | void **hash_entry; 32 | 33 | int *permutation; 34 | int *next; 35 | }; 36 | 37 | struct MAGLEV_LOOKUP_HASH 38 | { 39 | volatile int is_use_index; 40 | volatile int is_modify_lock; 41 | 42 | struct MAGLEV_SERVICE_PARAMS item[2]; 43 | struct MAGLEV_SERVICE_PARAMS *p_temp; 44 | }; 45 | 46 | /* 47 | * maglev_init() 48 | * */ 49 | void maglev_init(struct MAGLEV_LOOKUP_HASH *psrv); 50 | 51 | /* 52 | * maglev_update_service 53 | * Initialize a maglev consistent hash cache, 54 | * node_size = real server number 55 | * hash_bucket_size = prime > node_size * 100; 56 | * return 57 | * 0 == success 58 | * -1 == false 59 | * */ 60 | int maglev_update_service(struct MAGLEV_LOOKUP_HASH *psrv, int node_size, int hash_bucket_size); 61 | 62 | /* 63 | * maglev_add_node 64 | * Add a node, the node_name_key must be unique 65 | * return 66 | * 0 == success 67 | * -1 == false 68 | * */ 69 | int maglev_add_node(struct MAGLEV_LOOKUP_HASH *psrv, char *node_name_key, void *rs_info); 70 | 71 | /* 72 | * maglev_create_ht 73 | * create hash table 74 | * */ 75 | void maglev_create_ht(struct MAGLEV_LOOKUP_HASH *psrv); 76 | 77 | 78 | /* 79 | * maglev_swap_entry 80 | * */ 81 | void maglev_swap_entry(struct MAGLEV_LOOKUP_HASH *psrv); 82 | 83 | 84 | /* 85 | * maglev_lookup_node 86 | * */ 87 | void * maglev_lookup_node(struct MAGLEV_LOOKUP_HASH *psrv, char *key, int key_size); 88 | 89 | 90 | /* the famous DJB Hash Function for strings */ 91 | unsigned int DJBHash(char *str); 92 | unsigned int ngx_murmur_hash2(char *data, int len); 93 | 94 | 95 | #endif /* MAGLEV_HASH_H_ */ 96 | --------------------------------------------------------------------------------