├── .gitignore ├── CMakeLists.txt ├── README.md ├── build ├── gen_confs.py └── run_analysis_for_conf.py ├── configure_all.sh ├── configure_client.sh ├── configure_server.sh ├── scripts ├── setup_dkdp_env.sh └── unbind.sh └── src ├── CMakeLists.txt ├── alloc.h ├── alloc_dynamic.c ├── alloc_dynamic.h ├── alloc_malloc.c ├── alloc_malloc.h ├── alloc_pool.c ├── alloc_pool.h ├── basic_types.h ├── city.c ├── city.h ├── citycrc.h ├── common.h ├── config.h ├── hash.c ├── hash.h ├── load.c ├── mehcached.h ├── microbench.c ├── net_common.c ├── net_common.h ├── netbench_analysis.c ├── netbench_client.c ├── netbench_config.c ├── netbench_config.h ├── netbench_hot_item_hash.h ├── netbench_server.c ├── perf_count ├── CMakeLists.txt ├── perf_count.c └── perf_count.h ├── proto.h ├── shm.c ├── shm.h ├── stopwatch.c ├── stopwatch.h ├── table.c ├── table.h ├── test.c ├── util.c ├── util.h └── zipf.h /.gitignore: -------------------------------------------------------------------------------- 1 | build/CMakeCache.txt 2 | build/CMakeFiles 3 | build/src 4 | Makefile 5 | cmake_install.cmake 6 | 7 | test 8 | load 9 | microbench 10 | microbench_store 11 | netbench_client 12 | netbench_server 13 | netbench_server_store 14 | netbench_client_latency 15 | netbench_server_latency 16 | netbench_client_soft_fdir 17 | netbench_server_soft_fdir 18 | netbench_analysis 19 | 20 | perf.data 21 | perf.data.old 22 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Copyright 2014 Carnegie Mellon University 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | cmake_minimum_required(VERSION 2.6) 16 | 17 | if(CMAKE_CURRENT_SOURCE_DIR STREQUAL CMAKE_CURRENT_BINARY_DIR) 18 | message(FATAL_ERROR "Use out-of-source build only!") 19 | endif(CMAKE_CURRENT_SOURCE_DIR STREQUAL CMAKE_CURRENT_BINARY_DIR) 20 | 21 | add_subdirectory(src) 22 | 23 | add_custom_target( 24 | symbolic_links 25 | ALL 26 | ln -sf src/test src/load src/microbench src/microbench_store src/netbench_client src/netbench_server src/netbench_client_latency src/netbench_client_soft_fdir src/netbench_server_latency src/netbench_server_soft_fdir src/netbench_server_store src/netbench_analysis . 27 | ) 28 | 29 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | MICA 2 | ==== 3 | 4 | A fast in-memory key-value store. 5 | 6 | 7 | Hardware Requirements 8 | --------------------- 9 | 10 | * Dual CPU system 11 | * Intel 10 GbE NICs 12 | * Note: The current codebase has several assumptions on the hardware configuration of the server and clients. 13 | It runs ideally on a dual octa-core server with 4 dual-port 10 GbE NICs, and clients with 2 dual-port 10 GbE NICs. 14 | 15 | 16 | Software Requrements 17 | -------------------- 18 | 19 | * linux x86_64 >= 3.2.0 20 | * gcc >= 4.6.0 21 | * Python >= 2.7.0 22 | * Intel DPDK >= 1.5.0 23 | * bash >= 4.0.0 24 | * cmake >= 2.6.0 25 | * Hugepage (2 GiB) support 26 | 27 | 28 | Executables 29 | ----------- 30 | 31 | * build/netbench_server: MICA server in cache mode (use with netbench_client) 32 | * build/netbench_server_store: MICA server in store mode (use with netbench_client) 33 | * build/netbench_server_latency: MICA server in cache mode modified for end-to-end latency measurement (use with netbench_client_latency) 34 | * build/netbench_server_soft_fdir: MICA server in cache mode using software-based request direction (use with netbench_client_soft_fdir) 35 | * build/netbench_client*: MICA clients 36 | * build/netbench_analysis: workload analyzer (used for generating preset configurations) 37 | * build/microbench: a local microbenchmark for MICA in cache mode 38 | * build/microbench_store: a local microbenchmark for MICA in store mode 39 | * build/test: a simple feature test program 40 | * build/load: a load factor experiment 41 | 42 | 43 | Compiling Executables 44 | --------------------- 45 | 46 | # unpack DPDK as "DPDK" to the directory containing mica 47 | $ cd mica/build 48 | $ ../scripts/setup_dkdp_env.sh # this uses sudo 49 | $ ../configure_all.sh 50 | $ make 51 | 52 | 53 | Generating Configuration Files 54 | ------------------------------ 55 | 56 | # conf_* files determine how MICA uses system resources. build/gen_confs.py generates a preset of configuration files for a 16-core server and 12-core clients 57 | # in mica 58 | $ ./run_analysis_for_conf.py # this uses sudo 59 | $ ./gen_confs.py 60 | 61 | 62 | Running a Server 63 | ---------------- 64 | 65 | # in mica/build 66 | $ sudo ./netbench_server conf_machines_DATASET_CMODE_0.5 server 0 0 conf_prepopulation_empty 67 | # DATASET=0,1,2 (used to determine how much memory to allocate); CMODE=EREW,CREW,CRCWS (specifies the data access mode) 68 | 69 | 70 | Running a Client (e.g., client0) 71 | -------------------------------- 72 | 73 | # in mica/build 74 | $ sudo ./netbench_client conf_machines_DATASET_CMODE_0.5 client0 0 0 conf_workload_DATASET_SKEW_GET_PUT_0.00_1 75 | # DATASET=0,1,2 (specifies the dataset to use); SKEW=uniform,skewed,single (specifies the workload skew); GET/PUT=0.00,0.50,0.95,1.00 (specifies the read/write ratio) 76 | 77 | 78 | Running a Local Microbenchmark 79 | ------------------------------ 80 | 81 | # in mica/build 82 | $ sudo ./microbench CMODE SKEWNESS 0.5 83 | # CMODE=EREW,CREW,CRCWS (specifies the data acces mode); SKEWNESS=0(uniform),0.99(skewed),99(single) (specifies the workload skew) 84 | 85 | 86 | License 87 | ------- 88 | 89 | Copyright 2014 Carnegie Mellon University 90 | 91 | Licensed under the Apache License, Version 2.0 (the "License"); 92 | you may not use this file except in compliance with the License. 93 | You may obtain a copy of the License at 94 | 95 | http://www.apache.org/licenses/LICENSE-2.0 96 | 97 | Unless required by applicable law or agreed to in writing, software 98 | distributed under the License is distributed on an "AS IS" BASIS, 99 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 100 | See the License for the specific language governing permissions and 101 | limitations under the License. 102 | 103 | -------------------------------------------------------------------------------- /build/gen_confs.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | class ServerConf: 4 | def __init__(self, server_name): 5 | self.server_name = server_name 6 | self.ports = [] 7 | self.threads = [] 8 | self.partitions = [] 9 | self.hot_items = [] 10 | 11 | def add_port(self, mac_addr, ip_addr): 12 | self.ports.append((mac_addr, ip_addr)) 13 | 14 | def add_thread(self, port_ids): 15 | self.threads.append(port_ids) 16 | 17 | def add_partition(self, num_items, alloc_size, concurrent_table_read, concurrent_table_write, concurrent_alloc_write, thread_id, mth_threshold): 18 | self.partitions.append((num_items, alloc_size, concurrent_table_read, concurrent_table_write, concurrent_alloc_write, thread_id, mth_threshold)) 19 | 20 | def add_hot_item(self, key_hash, thread_id): 21 | self.hot_items.append((key_hash, thread_id)) 22 | 23 | def write(self, f): 24 | f.write('server,%s\n' % self.server_name) 25 | for port in self.ports: 26 | f.write('server_port,%s,%s\n' % port) 27 | for thread in self.threads: 28 | f.write('server_thread,%s\n' % ' '.join([str(port_id) for port_id in thread])) 29 | for partition in self.partitions: 30 | f.write('server_partition,%s,%s,%s,%s,%s,%s,%s\n' % partition) 31 | for hot_item in self.hot_items: 32 | f.write('server_hot_item,%016x,%s\n' % hot_item) 33 | f.write('\n') 34 | 35 | class ClientConf: 36 | def __init__(self, client_name): 37 | self.client_name = client_name 38 | self.ports = [] 39 | self.threads = [] 40 | 41 | def add_port(self, mac_addr, ip_addr): 42 | self.ports.append((mac_addr, ip_addr)) 43 | 44 | def add_thread(self): 45 | self.threads.append(None) 46 | 47 | def write(self, f): 48 | f.write('client,%s\n' % self.client_name) 49 | for thread in self.threads: 50 | f.write('client_thread,\n') 51 | for port in self.ports: 52 | f.write('client_port,%s,%s\n' % port) 53 | f.write('\n') 54 | 55 | class PrePopulationConf: 56 | def __init__(self, server_name): 57 | self.server_name = server_name 58 | self.dataset = None 59 | 60 | def set(self, num_items, key_length, value_length): 61 | self.dataset = (num_items, key_length, value_length) 62 | 63 | def write(self, f): 64 | f.write('prepopulation,%s\n' % self.server_name) 65 | f.write('dataset,%s,%s,%s\n' % self.dataset) 66 | f.write('\n') 67 | 68 | class WorkloadConf: 69 | def __init__(self, client_name): 70 | self.client_name = client_name 71 | self.threads = [] 72 | 73 | def add_thread(self, port_ids, server_name, partition_mode, num_items, key_length, value_length, zipf_theta, get_ratio, put_ratio, increment_ratio, batch_size, num_operations, duration): 74 | assert abs(get_ratio) + abs(put_ratio) + abs(increment_ratio) == 1. 75 | self.threads.append((port_ids, server_name, partition_mode, num_items, key_length, value_length, zipf_theta, get_ratio, put_ratio, increment_ratio, batch_size, num_operations, duration)) 76 | 77 | def write(self, f): 78 | f.write('workload,%s\n' % self.client_name) 79 | for thread in self.threads: 80 | f.write('workload_thread,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s\n' % ( 81 | ' '.join([str(port_id) for port_id in thread[0]]), 82 | thread[1], 83 | thread[2], 84 | thread[3], 85 | thread[4], 86 | thread[5], 87 | thread[6], 88 | thread[7], 89 | thread[8], 90 | thread[9], 91 | thread[10], 92 | thread[11], 93 | thread[12] 94 | )) 95 | f.write('\n') 96 | 97 | def init_addr(): 98 | global _last_addr_id 99 | _last_addr_id = 0 100 | 101 | def next_addr(): 102 | global _last_addr_id 103 | 104 | addr_id = _last_addr_id 105 | _last_addr_id += 1 106 | 107 | mac_addr = '80:00:00:00:00:{:02}'.format(addr_id) 108 | ip_addr = '10.0.0.{}'.format(addr_id) 109 | return mac_addr, ip_addr 110 | 111 | 112 | class ConcurrencyModel: 113 | def concurrent_table_read(self, partition_id): pass 114 | def concurrent_table_write(self, partition_id): pass 115 | def concurrent_alloc_write(self, partition_id): pass 116 | def thread_id(self, partition_id): pass 117 | def hot_items(self): pass 118 | 119 | class EREW(ConcurrencyModel): 120 | name = 'EREW' 121 | def concurrent_table_read(self, partition_id): return 0 122 | def concurrent_table_write(self, partition_id): return 0 123 | def concurrent_alloc_write(self, partition_id): return 0 124 | def thread_id(self, partition_id): return partition_id % 16 125 | def hot_items(self): return [] 126 | 127 | class CREW(EREW): 128 | name = 'CREW' 129 | def concurrent_table_read(self, partition_id): return 1 130 | 131 | class CRCW(EREW): 132 | name = 'CRCW' 133 | def concurrent_table_read(self, partition_id): return 1 134 | def concurrent_table_write(self, partition_id): return 1 135 | 136 | class CRCWS(EREW): 137 | name = 'CRCWS' 138 | def concurrent_table_read(self, partition_id): return 1 139 | def concurrent_table_write(self, partition_id): return 1 140 | def concurrent_alloc_write(self, partition_id): return 1 141 | 142 | class CREW0(CREW): 143 | name = 'CREW0' 144 | def thread_id(self, partition_id): return 0 # all writes go to core 0 145 | 146 | # use this for EREW partitions, CREW hot items 147 | #class LB(EREW): 148 | # use this for CREW partitions and hot items (uncomment MEHCACHED_LOAD_BALANCE_USE_CREW_PARTITION in netbench_analysis.c) 149 | class LB(CREW): 150 | def __init__(self, num_hot_items, zipf, get_ratio): 151 | self.name = 'LB-%d-%s-%.2f' % (num_hot_items, zipf[0], get_ratio) 152 | self.thread_id_list = None 153 | self.hot_item_list = None 154 | 155 | f = open('analysis_%d_%s_%.2f' % (num_hot_items, zipf[0], get_ratio)) 156 | lines = list(f.readlines()) 157 | i = 0 158 | while i < len(lines): 159 | line = lines[i] 160 | if line.strip() == 'partition_to_thread:': 161 | self.thread_id_list = eval('[' + lines[i + 1].strip() + ']') 162 | elif line.strip() == 'hot_item_to_thread:': 163 | self.hot_item_list = eval('[' + lines[i + 1].strip() + ']') 164 | i += 1 165 | assert self.thread_id_list != None 166 | assert self.hot_item_list != None 167 | 168 | def thread_id(self, partition_id): return self.thread_id_list[partition_id] 169 | def hot_items(self): return self.hot_item_list 170 | 171 | 172 | def main(): 173 | datasets = [ 174 | (8, 8, 192 * 1048576), 175 | (16, 64, 128 * 1048576), 176 | (128, 1024, 8 * 1048576), 177 | ] 178 | 179 | f = open('conf_prepopulation_empty', 'w') 180 | p = PrePopulationConf('server') 181 | p.set(0, 8, 8) 182 | p.write(f) 183 | 184 | for dataset, (key_length, value_length, num_items) in enumerate(datasets): 185 | assert key_length >= len('%x' % (num_items - 1)) # for hexadecimal key 186 | #num_partitions = 64 187 | num_partitions = 16 188 | # the following should be the same as in run_analysis_for_conf.py 189 | # isolated_server_numa_nodes = True 190 | isolated_server_numa_nodes = False 191 | 192 | # the followings are always 0 to allow exp.py to control duration 193 | load_duration = 0. 194 | trans_duration = 0. 195 | 196 | concurrency_list = [EREW(), CREW(), CRCW(), CRCWS(), CREW0()] 197 | for num_hot_items in (0, 32): 198 | for zipf in (('uniform', 0.), ('skewed', 0.99), ('single', 99.)): 199 | for get_ratio in (0., 0.1, 0.25, 0.5, 0.75, 0.9, 0.95, 0.99, 1.): 200 | concurrency_list.append(LB(num_hot_items, zipf, get_ratio)) 201 | 202 | mth_threshold_list = (1.0, 0.5, 0.0) 203 | 204 | for concurrency in concurrency_list: 205 | for mth_threshold in mth_threshold_list: 206 | init_addr() 207 | 208 | f = open('conf_machines_%s_%s_%s' % (dataset, concurrency.name, mth_threshold), 'w') 209 | 210 | s = ServerConf('server') 211 | for port_id in range(8): 212 | s.add_port(*next_addr()) 213 | for thread_id in range(0, 16, 2): 214 | s.add_thread(list(range(0, 4))) 215 | s.add_thread(list(range(4, 8))) 216 | for partition_id in range(num_partitions): 217 | num_items_per_partition = num_items / num_partitions 218 | alloc_size_per_partition = num_items * (key_length + value_length) / num_partitions 219 | 220 | concurrent_table_read = concurrency.concurrent_table_read(partition_id) 221 | concurrent_table_write = concurrency.concurrent_table_write(partition_id) 222 | concurrent_alloc_write = concurrency.concurrent_alloc_write(partition_id) 223 | thread_id = concurrency.thread_id(partition_id) 224 | s.add_partition(num_items_per_partition, alloc_size_per_partition, concurrent_table_read, concurrent_table_write, concurrent_alloc_write, thread_id, mth_threshold) 225 | for hot_item in concurrency.hot_items(): 226 | s.add_hot_item(*hot_item) 227 | s.write(f) 228 | 229 | c0 = ClientConf('client0') 230 | for port in range(4): 231 | c0.add_port(*next_addr()) 232 | for thread_id in range(12): 233 | c0.add_thread() 234 | c0.write(f) 235 | 236 | c1 = ClientConf('client1') 237 | for port in range(4): 238 | c1.add_port(*next_addr()) 239 | for thread_id in range(12): 240 | c1.add_thread() 241 | c1.write(f) 242 | 243 | f = open('conf_prepopulation_%s' % dataset, 'w') 244 | p = PrePopulationConf('server') 245 | p.set(num_items, key_length, value_length) 246 | p.write(f) 247 | 248 | for zipf in (('uniform', 0.), ('skewed', 0.99), ('single', 99.)): 249 | # load operations 250 | f = open('conf_workload_%s_load_%s' % (dataset, zipf[0]), 'w') 251 | if zipf[1] == 0.: 252 | # use sequential uniform instead for fast ingest 253 | zipf_theta = -1.0 254 | else: 255 | # other skewed distributions usually allow fast ingest 256 | zipf_theta = zipf[1] 257 | get_ratio = 0. 258 | put_ratio = 1. - get_ratio 259 | increment_ratio = 0. 260 | load_batch_size = 1 261 | num_operations = 0 262 | duration = load_duration 263 | w = WorkloadConf(c0.client_name) 264 | for thread_id in range(12): 265 | if isolated_server_numa_nodes: 266 | w.add_thread(list(range(4)), s.server_name, 0, num_items, key_length, value_length, zipf_theta, get_ratio, put_ratio, increment_ratio, load_batch_size, num_operations, duration) 267 | else: 268 | w.add_thread(list(range(4)), s.server_name, -1, num_items, key_length, value_length, zipf_theta, get_ratio, put_ratio, increment_ratio, load_batch_size, num_operations, duration) 269 | w.write(f) 270 | w = WorkloadConf(c1.client_name) 271 | for thread_id in range(12): 272 | if isolated_server_numa_nodes: 273 | w.add_thread(list(range(4)), s.server_name, 1, num_items, key_length, value_length, zipf_theta, get_ratio, put_ratio, increment_ratio, load_batch_size, num_operations, duration) 274 | else: 275 | w.add_thread(list(range(4)), s.server_name, -1, num_items, key_length, value_length, zipf_theta, get_ratio, put_ratio, increment_ratio, load_batch_size, num_operations, duration) 276 | w.write(f) 277 | 278 | # trans operations 279 | zipf_theta = zipf[1] 280 | for get_ratio, put_ratio, increment_ratio in ( 281 | (0., 1., 0.), 282 | (0.1, 0.9, 0.), 283 | (0.25, 0.75, 0.), 284 | (0.5, 0.5, 0.), 285 | (0.75, 0.25, 0.), 286 | (0.9, 0.1, 0.), 287 | (0.95, 0.05, 0.), 288 | (0.99, 0.01, 0.), 289 | (1., 0., 0.), 290 | (0., -1., 0.), 291 | (-0.1, -0.9, 0.), 292 | (-0.25, -0.75, 0.), 293 | (-0.5, -0.5, 0.), 294 | (-0.75, -0.25, 0.), 295 | (-0.9, -0.1, 0.), 296 | (-0.95, -0.05, 0.), 297 | (-0.99, -0.01, 0.), 298 | (-1., 0., 0.), 299 | (0., 0., 1.), 300 | ): 301 | for batch_size in (1, 2, 4, 8, 16, 32): 302 | f = open('conf_workload_%s_%s_%.2f_%.2f_%.2f_%s' % (dataset, zipf[0], get_ratio, put_ratio, increment_ratio, batch_size), 'w') 303 | num_operations = 0 304 | duration = trans_duration 305 | w = WorkloadConf(c0.client_name) 306 | for thread_id in range(12): 307 | if isolated_server_numa_nodes: 308 | w.add_thread(list(range(4)), s.server_name, 0, num_items, key_length, value_length, zipf_theta, get_ratio, put_ratio, increment_ratio, batch_size, num_operations, duration) 309 | else: 310 | w.add_thread(list(range(4)), s.server_name, -1, num_items, key_length, value_length, zipf_theta, get_ratio, put_ratio, increment_ratio, batch_size, num_operations, duration) 311 | w.write(f) 312 | w = WorkloadConf(c1.client_name) 313 | for thread_id in range(12): 314 | if isolated_server_numa_nodes: 315 | w.add_thread(list(range(4)), s.server_name, 1, num_items, key_length, value_length, zipf_theta, get_ratio, put_ratio, increment_ratio, batch_size, num_operations, duration) 316 | else: 317 | w.add_thread(list(range(4)), s.server_name, -1, num_items, key_length, value_length, zipf_theta, get_ratio, put_ratio, increment_ratio, batch_size, num_operations, duration) 318 | w.write(f) 319 | 320 | 321 | if __name__ == '__main__': 322 | main() 323 | -------------------------------------------------------------------------------- /build/run_analysis_for_conf.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | import os 4 | 5 | # isolated_server_numa_nodes = True 6 | isolated_server_numa_nodes = False 7 | 8 | for num_hot_items in (0, 32): 9 | for zipf in (('uniform', 0.), ('skewed', 0.99), ('single', 99.)): 10 | for get_ratio in (0., 0.1, 0.25, 0.5, 0.75, 0.9, 0.95, 0.99, 1.): 11 | cmd = './netbench_analysis %d %f %f %d > analysis_%d_%s_%.2f' % (num_hot_items, zipf[1], get_ratio, isolated_server_numa_nodes, num_hot_items, zipf[0], get_ratio) 12 | os.system(cmd) 13 | 14 | -------------------------------------------------------------------------------- /configure_all.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # Copyright 2014 Carnegie Mellon University 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | [ ! -d "$(dirname $0)/build" ] && mkdir "$(dirname $0)/build" 18 | 19 | cd "$(dirname $0)/build" || exit 1 20 | 21 | rm -f CMakeCache.txt 22 | 23 | NDEBUG=yes cmake .. 24 | 25 | -------------------------------------------------------------------------------- /configure_client.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # Copyright 2014 Carnegie Mellon University 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | [ ! -d "$(dirname $0)/build" ] && mkdir "$(dirname $0)/build" 18 | 19 | cd "$(dirname $0)/build" || exit 1 20 | 21 | rm -f CMakeCache.txt 22 | 23 | NDEBUG=yes NSERVER=yes cmake .. 24 | 25 | -------------------------------------------------------------------------------- /configure_server.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # Copyright 2014 Carnegie Mellon University 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | [ ! -d "$(dirname $0)/build" ] && mkdir "$(dirname $0)/build" 18 | 19 | cd "$(dirname $0)/build" || exit 1 20 | 21 | rm -f CMakeCache.txt 22 | 23 | NDEBUG=yes NCLIENT=yes cmake .. 24 | 25 | -------------------------------------------------------------------------------- /scripts/setup_dkdp_env.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | ##### from DPDK/tools/setup.sh 4 | 5 | # 6 | # Sets up envronment variables for ICC. 7 | # 8 | setup_icc() 9 | { 10 | DEFAULT_PATH=/opt/intel/bin/iccvars.sh 11 | param=$1 12 | shpath=`which iccvars.sh 2> /dev/null` 13 | if [ $? -eq 0 ] ; then 14 | echo "Loading iccvars.sh from $shpath for $param" 15 | source $shpath $param 16 | elif [ -f $DEFAULT_PATH ] ; then 17 | echo "Loading iccvars.sh from $DEFAULT_PATH for $param" 18 | source $DEFAULT_PATH $param 19 | else 20 | echo "## ERROR: cannot find 'iccvars.sh' script to set up ICC." 21 | echo "## To fix, please add the directory that contains" 22 | echo "## iccvars.sh to your 'PATH' environment variable." 23 | quit 24 | fi 25 | } 26 | 27 | # 28 | # Sets RTE_TARGET and does a "make install". 29 | # 30 | setup_target() 31 | { 32 | #option=$1 33 | #export RTE_TARGET=${TARGETS[option]} 34 | 35 | compiler=${RTE_TARGET##*-} 36 | if [ "$compiler" == "icc" ] ; then 37 | platform=${RTE_TARGET%%-*} 38 | if [ "$platform" == "x86_64" ] ; then 39 | setup_icc intel64 40 | else 41 | setup_icc ia32 42 | fi 43 | fi 44 | #if [ "$QUIT" == "0" ] ; then 45 | if [ ! -d $RTE_SDK/$RTE_TARGET ]; then 46 | make config T=${RTE_TARGET} O=$RTE_SDK/$RTE_TARGET 47 | sed -i 's/CONFIG_RTE_MEMPOOL_CACHE_MAX_SIZE=.*/CONFIG_RTE_MEMPOOL_CACHE_MAX_SIZE=8192/g' $RTE_SDK/$RTE_TARGET/.config 48 | rm $RTE_SDK/$RTE_TARGET/include/rte_config.h 49 | fi 50 | make -C $RTE_SDK/$RTE_TARGET 51 | #fi 52 | #echo "------------------------------------------------------------------------------" 53 | #echo " RTE_TARGET exported as $RTE_TARGET" 54 | #echo "------------------------------------------------------------------------------" 55 | } 56 | 57 | # 58 | # Uninstall all targets. 59 | # 60 | uninstall_targets() 61 | { 62 | make uninstall 63 | } 64 | 65 | # 66 | # Creates hugepage filesystem. 67 | # 68 | create_mnt_huge() 69 | { 70 | echo "Creating /mnt/huge and mounting as hugetlbfs" 71 | sudo mkdir -p /mnt/huge 72 | 73 | grep -s '/mnt/huge' /proc/mounts > /dev/null 74 | if [ $? -ne 0 ] ; then 75 | sudo mount -t hugetlbfs nodev /mnt/huge 76 | fi 77 | } 78 | 79 | # 80 | # Removes hugepage filesystem. 81 | # 82 | remove_mnt_huge() 83 | { 84 | echo "Unmounting /mnt/huge and removing directory" 85 | grep -s '/mnt/huge' /proc/mounts > /dev/null 86 | if [ $? -eq 0 ] ; then 87 | sudo umount /mnt/huge 88 | fi 89 | 90 | if [ -d /mnt/huge ] ; then 91 | sudo rm -R /mnt/huge 92 | fi 93 | } 94 | 95 | # 96 | # Unloads igb_uio.ko. 97 | # 98 | remove_igb_uio_module() 99 | { 100 | echo "Unloading any existing DPDK UIO module" 101 | /sbin/lsmod | grep -s igb_uio > /dev/null 102 | if [ $? -eq 0 ] ; then 103 | sudo /sbin/rmmod igb_uio 104 | fi 105 | } 106 | 107 | # 108 | # Loads new igb_uio.ko (and uio module if needed). 109 | # 110 | load_igb_uio_module() 111 | { 112 | if [ ! -f $RTE_SDK/$RTE_TARGET/kmod/igb_uio.ko ];then 113 | echo "## ERROR: Target does not have the DPDK UIO Kernel Module." 114 | echo " To fix, please try to rebuild target." 115 | return 116 | fi 117 | 118 | remove_igb_uio_module 119 | 120 | /sbin/lsmod | grep -s uio > /dev/null 121 | if [ $? -ne 0 ] ; then 122 | if [ -f /lib/modules/$(uname -r)/kernel/drivers/uio/uio.ko ] ; then 123 | echo "Loading uio module" 124 | sudo /sbin/modprobe uio 125 | fi 126 | fi 127 | 128 | # UIO may be compiled into kernel, so it may not be an error if it can't 129 | # be loaded. 130 | 131 | echo "Loading DPDK UIO module" 132 | sudo /sbin/insmod $RTE_SDK/$RTE_TARGET/kmod/igb_uio.ko 133 | if [ $? -ne 0 ] ; then 134 | echo "## ERROR: Could not load kmod/igb_uio.ko." 135 | quit 136 | fi 137 | } 138 | 139 | # 140 | # Removes all reserved hugepages. 141 | # 142 | clear_huge_pages() 143 | { 144 | echo > .echo_tmp 145 | for d in /sys/devices/system/node/node? ; do 146 | echo "echo 0 > $d/hugepages/hugepages-2048kB/nr_hugepages" >> .echo_tmp 147 | done 148 | echo "Removing currently reserved hugepages" 149 | sudo sh .echo_tmp 150 | rm -f .echo_tmp 151 | 152 | remove_mnt_huge 153 | } 154 | 155 | # 156 | # Creates hugepages. 157 | # 158 | set_non_numa_pages() 159 | { 160 | clear_huge_pages 161 | 162 | echo "" 163 | echo " Input the number of 2MB pages" 164 | echo " Example: to have 128MB of hugepages available, enter '64' to" 165 | echo " reserve 64 * 2MB pages" 166 | echo -n "Number of pages: " 167 | #read Pages 168 | Pages=$1 169 | 170 | echo "echo $Pages > /sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages" > .echo_tmp 171 | 172 | echo "Reserving hugepages" 173 | sudo sh .echo_tmp 174 | rm -f .echo_tmp 175 | 176 | create_mnt_huge 177 | } 178 | 179 | # 180 | # Creates hugepages on specific NUMA nodes. 181 | # 182 | set_numa_pages() 183 | { 184 | clear_huge_pages 185 | 186 | echo "" 187 | echo " Input the number of 2MB pages for each node" 188 | echo " Example: to have 128MB of hugepages available per node," 189 | echo " enter '64' to reserve 64 * 2MB pages on each node" 190 | 191 | echo > .echo_tmp 192 | for d in /sys/devices/system/node/node? ; do 193 | node=$(basename $d) 194 | echo -n "Number of pages for $node: " 195 | #read Pages 196 | Pages=$1 197 | shift 198 | echo "echo $Pages > $d/hugepages/hugepages-2048kB/nr_hugepages" >> .echo_tmp 199 | done 200 | echo "Reserving hugepages" 201 | sudo sh .echo_tmp 202 | rm -f .echo_tmp 203 | 204 | create_mnt_huge 205 | } 206 | 207 | # 208 | # Run unit test application. 209 | # 210 | run_test_app() 211 | { 212 | echo "" 213 | echo " Enter hex bitmask of cores to execute test app on" 214 | echo " Example: to execute app on cores 0 to 7, enter 0xff" 215 | echo -n "bitmask: " 216 | read Bitmask 217 | echo "Launching app" 218 | sudo ${RTE_TARGET}/app/test -c $Bitmask $EAL_PARAMS 219 | } 220 | 221 | # 222 | # Run unit testpmd application. 223 | # 224 | run_testpmd_app() 225 | { 226 | echo "" 227 | echo " Enter hex bitmask of cores to execute testpmd app on" 228 | echo " Example: to execute app on cores 0 to 7, enter 0xff" 229 | echo -n "bitmask: " 230 | read Bitmask 231 | echo "Launching app" 232 | sudo ${RTE_TARGET}/app/testpmd -c $Bitmask $EAL_PARAMS -- -i 233 | } 234 | 235 | # 236 | # Print hugepage information. 237 | # 238 | grep_meminfo() 239 | { 240 | grep -i huge /proc/meminfo 241 | } 242 | 243 | # 244 | # List all hugepage file references 245 | # 246 | ls_mnt_huge() 247 | { 248 | ls -lh /mnt/huge 249 | } 250 | 251 | ##### from DPDK/tools/setup.sh 252 | 253 | 254 | export RTE_SDK=`readlink -f $(dirname ${BASH_SOURCE[0]})/../../DPDK` 255 | export RTE_TARGET=x86_64-default-linuxapp-gcc 256 | 257 | pushd "$RTE_SDK"; setup_target; popd 258 | 259 | #if [ "$HOSTNAME" == "server" ]; then 260 | set_numa_pages 8192 8192 # 32 GiB 261 | #else 262 | # set_numa_pages 2048 2048 # 8 GiB 263 | #fi 264 | load_igb_uio_module 265 | 266 | grep_meminfo 267 | 268 | sudo $RTE_SDK/tools/pci_unbind.py --force --bind=igb_uio xge0 xge1 xge2 xge3 269 | sudo $RTE_SDK/tools/pci_unbind.py --force --bind=igb_uio xge4 xge5 xge6 xge7 270 | 271 | # disable OOM kills 272 | sudo sysctl -w vm.overcommit_memory=1 273 | sudo sysctl -w kernel.shmmax=12884901888 274 | sudo sysctl -w kernel.shmall=12884901888 275 | -------------------------------------------------------------------------------- /scripts/unbind.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | export RTE_SDK=`readlink -f $(dirname ${BASH_SOURCE[0]})/../../DPDK` 4 | 5 | DEVS=`lspci | grep 82599EB | awk '{ print $1 }'` 6 | 7 | sudo $RTE_SDK/tools/pci_unbind.py --bind=ixgbe $DEVS 8 | 9 | -------------------------------------------------------------------------------- /src/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Copyright 2014 Carnegie Mellon University 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | cmake_minimum_required(VERSION 2.6) 16 | 17 | # basic configuration 18 | if(CMAKE_CURRENT_SOURCE_DIR STREQUAL CMAKE_CURRENT_BINARY_DIR) 19 | message(FATAL_ERROR "Use out-of-source build only!") 20 | endif(CMAKE_CURRENT_SOURCE_DIR STREQUAL CMAKE_CURRENT_BINARY_DIR) 21 | 22 | project(MEHCACHED) 23 | 24 | add_subdirectory(perf_count) 25 | set(LIBRARY_PATH ${LIBRARY_PATH} perf_count/) 26 | 27 | # compiler options 28 | add_definitions(-pthread) 29 | add_definitions(-g -Wall -Wextra -Wsign-conversion -Winline -Wno-unused-function) 30 | add_definitions(-Wconversion) 31 | add_definitions(-O9) 32 | add_definitions(-msse4.2 -march=corei7) 33 | 34 | # preprocessor 35 | SET(NDEBUG "$ENV{NDEBUG}" CACHE STRING "define NDEBUG macro") 36 | 37 | message(STATUS "NDEBUG (disable all additional checkings; no*, yes) = ${NDEBUG}") 38 | if("${NDEBUG}" STREQUAL "yes") 39 | add_definitions(-DNDEBUG) 40 | endif() 41 | 42 | SET(NLOCAL "$ENV{NLOCAL}" CACHE STRING "no local programs") 43 | SET(NCLIENT "$ENV{NCLIENT}" CACHE STRING "no clients") 44 | SET(NSERVER "$ENV{NSERVER}" CACHE STRING "no servers") 45 | 46 | message(STATUS "NLOCAL (do not compile local programs; no*, yes) = ${NLOCAL}") 47 | message(STATUS "NCLIENT (do not compile clients; no*, yes) = ${NCLIENT}") 48 | message(STATUS "NSERVER (do not compile server; no*, yes) = ${NSERVER}") 49 | 50 | # common source files 51 | set(SOURCES ${SOURCES} hash.c) 52 | set(SOURCES ${SOURCES} city.c) 53 | set(SOURCES ${SOURCES} stopwatch.c) 54 | set(SOURCES ${SOURCES} shm.c) 55 | 56 | # common libraries 57 | set(LIBRARIES ${LIBRARIES} rt crypto perf_count m pthread) 58 | 59 | # tcmalloc (optional) 60 | #add_definitions(-fno-builtin-malloc -fno-builtin-calloc -fno-builtin-realloc -fno-builtin-free) 61 | #set(LIBRARIES ${LIBRARIES} tcmalloc_minimal) 62 | 63 | # DPDK 64 | set(RTE_SDK ${CMAKE_CURRENT_SOURCE_DIR}/../../DPDK) 65 | set(RTE_TARGET x86_64-default-linuxapp-gcc) 66 | add_definitions(-DUSE_DPDK) 67 | add_definitions(-isystem ${RTE_SDK}/${RTE_TARGET}/include -include rte_config.h) 68 | link_directories(${LINK_DIRECTORIES} ${RTE_SDK}/${RTE_TARGET}/lib) 69 | set(LIBRARIES ${LIBRARIES} rte_eal rte_malloc rte_mempool rte_ring) # for basic features 70 | set(LIBRARIES ${LIBRARIES} ethdev rte_mbuf rte_pmd_ixgbe librte_pmd_ring.a) # for network features 71 | set(LIBRARIES ${LIBRARIES} pthread rt) # to make sure symbols in pthread and rt are resolved 72 | 73 | if(NOT "${NLOCAL}" STREQUAL "yes") 74 | # test 75 | add_executable(test test.c ${SOURCES}) 76 | target_link_libraries(test ${LIBRARIES}) 77 | 78 | # load 79 | add_executable(load load.c ${SOURCES}) 80 | target_link_libraries(load ${LIBRARIES}) 81 | 82 | # microbench 83 | add_executable(microbench microbench.c ${SOURCES}) 84 | target_link_libraries(microbench ${LIBRARIES}) 85 | 86 | # microbench_store 87 | add_executable(microbench_store microbench.c ${SOURCES}) 88 | set_target_properties(microbench_store PROPERTIES COMPILE_FLAGS "-DMEHCACHED_NO_EVICTION") 89 | target_link_libraries(microbench_store ${LIBRARIES}) 90 | 91 | # netbench_analysis 92 | add_executable(netbench_analysis netbench_config.c netbench_analysis.c ${SOURCES}) 93 | target_link_libraries(netbench_analysis ${LIBRARIES}) 94 | endif() 95 | 96 | if(NOT "${NCLIENT}" STREQUAL "yes") 97 | # netbench_client 98 | add_executable(netbench_client net_common.c netbench_config.c netbench_client.c ${SOURCES}) 99 | target_link_libraries(netbench_client ${LIBRARIES}) 100 | 101 | # netbench_client_latency 102 | add_executable(netbench_client_latency net_common.c netbench_config.c netbench_client.c ${SOURCES}) 103 | set_target_properties(netbench_client_latency PROPERTIES COMPILE_FLAGS "-DMEHCACHED_MEASURE_LATENCY") 104 | target_link_libraries(netbench_client_latency ${LIBRARIES}) 105 | 106 | # netbench_client_soft_fdir 107 | add_executable(netbench_client_soft_fdir net_common.c netbench_config.c netbench_client.c ${SOURCES}) 108 | set_target_properties(netbench_client_soft_fdir PROPERTIES COMPILE_FLAGS "-DMEHCACHED_USE_SOFT_FDIR") 109 | target_link_libraries(netbench_client_soft_fdir ${LIBRARIES}) 110 | endif() 111 | 112 | if(NOT "${NSERVER}" STREQUAL "yes") 113 | # netbench_server 114 | add_executable(netbench_server net_common.c netbench_config.c netbench_server.c ${SOURCES}) 115 | target_link_libraries(netbench_server ${LIBRARIES}) 116 | 117 | # netbench_server_latency 118 | add_executable(netbench_server_latency net_common.c netbench_config.c netbench_server.c ${SOURCES}) 119 | set_target_properties(netbench_server_latency PROPERTIES COMPILE_FLAGS "-DMEHCACHED_MEASURE_LATENCY") 120 | target_link_libraries(netbench_server_latency ${LIBRARIES}) 121 | 122 | # netbench_server_soft_fdir 123 | add_executable(netbench_server_soft_fdir net_common.c netbench_config.c netbench_server.c ${SOURCES}) 124 | set_target_properties(netbench_server_soft_fdir PROPERTIES COMPILE_FLAGS "-DMEHCACHED_USE_SOFT_FDIR") 125 | target_link_libraries(netbench_server_soft_fdir ${LIBRARIES}) 126 | 127 | # netbench_server_store 128 | add_executable(netbench_server_store net_common.c netbench_config.c netbench_server.c ${SOURCES}) 129 | set_target_properties(netbench_server_store PROPERTIES COMPILE_FLAGS "-DMEHCACHED_NO_EVICTION") 130 | target_link_libraries(netbench_server_store ${LIBRARIES}) 131 | endif() 132 | -------------------------------------------------------------------------------- /src/alloc.h: -------------------------------------------------------------------------------- 1 | // Copyright 2014 Carnegie Mellon University 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #pragma once 16 | 17 | #include "common.h" 18 | 19 | MEHCACHED_BEGIN 20 | 21 | struct mehcached_alloc_item 22 | { 23 | // uint32_t item_size; // XXX: isn't this breaking 8-byte alignment? 24 | uint64_t item_size; // XXX: isn't this breaking 8-byte alignment? 25 | uint8_t data[0]; 26 | }; 27 | 28 | MEHCACHED_END 29 | 30 | -------------------------------------------------------------------------------- /src/alloc_dynamic.c: -------------------------------------------------------------------------------- 1 | // Copyright 2014 Carnegie Mellon University 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #pragma once 16 | 17 | #include "alloc_dynamic.h" 18 | #include "table.h" 19 | 20 | MEHCACHED_BEGIN 21 | 22 | #define MEHCACHED_DYNAMIC_FREE (0UL) 23 | #define MEHCACHED_DYNAMIC_OCCUPIED (1UL) 24 | 25 | #define MEHCACHED_DYNAMIC_TAG_SIZE(vec) ((vec) & ((1UL << 63UL) - 1UL)) 26 | #define MEHCACHED_DYNAMIC_TAG_STATUS(vec) ((vec) >> 63UL) 27 | #define MEHCACHED_DYNAMIC_TAG_VEC(size, status) ((size) | (status) << 63UL) 28 | 29 | // TODO: use address order for each freelist to reduce fragmentation and improve locality 30 | // TODO: use the LSB (not MSB) to store status as all sizes are aligned to 8-byte boundary 31 | 32 | static 33 | void 34 | mehcached_dynamic_init(struct mehcached_dynamic *alloc, uint64_t size, bool concurrent_alloc_read, bool concurrent_alloc_write, size_t numa_node) 35 | { 36 | if (!concurrent_alloc_read) 37 | alloc->concurrent_access_mode = 0; 38 | else if (!concurrent_alloc_write) 39 | alloc->concurrent_access_mode = 1; 40 | else 41 | alloc->concurrent_access_mode = 2; 42 | 43 | alloc->lock = 0; 44 | 45 | size = mehcached_shm_adjust_size(size); 46 | assert(size <= MEHCACHED_DYNAMIC_MAX_SIZE); 47 | 48 | alloc->size = size; 49 | 50 | size_t shm_id = mehcached_shm_alloc(size, numa_node); 51 | if (shm_id == (size_t)-1) 52 | { 53 | printf("failed to allocate memory\n"); 54 | assert(false); 55 | } 56 | while (true) 57 | { 58 | alloc->data = mehcached_shm_find_free_address(size); 59 | if (alloc->data == NULL) 60 | assert(false); 61 | 62 | if (!mehcached_shm_map(shm_id, alloc->data, 0, size)) 63 | continue; 64 | 65 | break; 66 | } 67 | 68 | if (!mehcached_shm_schedule_remove(shm_id)) 69 | { 70 | perror(""); 71 | assert(false); 72 | } 73 | 74 | mehcached_dynamic_reset(alloc); 75 | } 76 | 77 | static 78 | void 79 | mehcached_dynamic_free(struct mehcached_dynamic *alloc) 80 | { 81 | if (!mehcached_shm_unmap(alloc->data)) 82 | assert(false); 83 | } 84 | 85 | static 86 | void 87 | mehcached_dynamic_lock(struct mehcached_dynamic *alloc MEHCACHED_UNUSED) 88 | { 89 | #ifdef MEHCACHED_CONCURRENT 90 | if (alloc->concurrent_access_mode == 2) 91 | { 92 | while (1) 93 | { 94 | if (__sync_bool_compare_and_swap((volatile uint32_t *)&alloc->lock, 0U, 1U)) 95 | break; 96 | } 97 | } 98 | #endif 99 | } 100 | 101 | static 102 | void 103 | mehcached_dynamic_unlock(struct mehcached_dynamic *alloc MEHCACHED_UNUSED) 104 | { 105 | #ifdef MEHCACHED_CONCURRENT 106 | if (alloc->concurrent_access_mode == 2) 107 | { 108 | memory_barrier(); 109 | assert((*(volatile uint32_t *)&alloc->lock & 1U) == 1U); 110 | // no need to use atomic add because this thread is the only one writing to version 111 | *(volatile uint32_t *)&alloc->lock = 0U; 112 | } 113 | #endif 114 | } 115 | 116 | static 117 | size_t 118 | mehcached_dynamic_size_to_class_roundup(uint64_t size) 119 | { 120 | assert(size <= MEHCACHED_DYNAMIC_MAX_SIZE); 121 | 122 | if (size <= MEHCACHED_DYNAMIC_MIN_SIZE + (MEHCACHED_DYNAMIC_NUM_CLASSES - 1) * MEHCACHED_DYNAMIC_CLASS_INCREMENT) 123 | return (size - MEHCACHED_DYNAMIC_MIN_SIZE + MEHCACHED_DYNAMIC_CLASS_INCREMENT - 1) / MEHCACHED_DYNAMIC_CLASS_INCREMENT; 124 | else 125 | return MEHCACHED_DYNAMIC_NUM_CLASSES - 1; 126 | } 127 | 128 | static 129 | size_t 130 | mehcached_dynamic_size_to_class_rounddown(uint64_t size) 131 | { 132 | assert(size <= MEHCACHED_DYNAMIC_MAX_SIZE); 133 | assert(size >= MEHCACHED_DYNAMIC_MIN_SIZE); 134 | 135 | if (size < MEHCACHED_DYNAMIC_MIN_SIZE + MEHCACHED_DYNAMIC_NUM_CLASSES * MEHCACHED_DYNAMIC_CLASS_INCREMENT) 136 | return (size - MEHCACHED_DYNAMIC_MIN_SIZE) / MEHCACHED_DYNAMIC_CLASS_INCREMENT; 137 | else 138 | return MEHCACHED_DYNAMIC_NUM_CLASSES - 1; 139 | } 140 | 141 | static 142 | void 143 | mehcached_dynamic_insert_free_chunk(struct mehcached_dynamic *alloc, uint8_t *chunk_start, uint64_t chunk_size) 144 | { 145 | #ifdef MEHCACHED_VERBOSE 146 | printf("mehcached_dynamic_insert_free_chunk: start=%p size=%lu\n", chunk_start, chunk_size); 147 | #endif 148 | size_t chunk_class = mehcached_dynamic_size_to_class_rounddown(chunk_size); 149 | *(uint64_t *)chunk_start = *(uint64_t *)(chunk_start + chunk_size - 8) = MEHCACHED_DYNAMIC_TAG_VEC(chunk_size, MEHCACHED_DYNAMIC_FREE); 150 | *(uint8_t **)(chunk_start + 8) = NULL; // the head has no previous free chunk 151 | *(uint8_t **)(chunk_start + 16) = alloc->free_head[chunk_class]; // point to the old head 152 | 153 | if (alloc->free_head[chunk_class] != NULL) 154 | { 155 | assert(*(uint8_t **)(alloc->free_head[chunk_class] + 8) == NULL); 156 | *(uint8_t **)(alloc->free_head[chunk_class] + 8) = chunk_start; // update the previous head's prev pointer 157 | } 158 | 159 | alloc->free_head[chunk_class] = chunk_start; // set as a new head 160 | } 161 | 162 | static 163 | void mehcached_dynamic_remove_free_chunk_from_free_list(struct mehcached_dynamic *alloc, uint8_t *chunk_start, uint64_t chunk_size) 164 | { 165 | #ifdef MEHCACHED_VERBOSE 166 | printf("mehcached_dynamic_remove_free_chunk_from_free_list: start=%p size=%lu\n", chunk_start, chunk_size); 167 | #endif 168 | 169 | uint8_t *prev_chunk_start = *(uint8_t **)(chunk_start + 8); 170 | uint8_t *next_chunk_start = *(uint8_t **)(chunk_start + 16); 171 | 172 | if (prev_chunk_start != NULL) 173 | *(uint8_t **)(prev_chunk_start + 16) = next_chunk_start; 174 | else 175 | { 176 | size_t chunk_class = mehcached_dynamic_size_to_class_rounddown(chunk_size); 177 | assert(alloc->free_head[chunk_class] == chunk_start); 178 | alloc->free_head[chunk_class] = next_chunk_start; // set the next free chunk as the head 179 | } 180 | 181 | if (next_chunk_start != NULL) 182 | *(uint8_t **)(next_chunk_start + 8) = prev_chunk_start; 183 | } 184 | 185 | static 186 | bool 187 | mehcached_dynamic_remove_free_chunk_from_head(struct mehcached_dynamic *alloc, uint64_t minimum_chunk_size, uint8_t **out_chunk_start, uint64_t *out_chunk_size) 188 | { 189 | size_t chunk_class = mehcached_dynamic_size_to_class_roundup(minimum_chunk_size); 190 | 191 | // determine the size class to use (best fit) 192 | for (; chunk_class < MEHCACHED_DYNAMIC_NUM_CLASSES; chunk_class++) 193 | if (alloc->free_head[chunk_class] != NULL) 194 | break; 195 | 196 | if (chunk_class == MEHCACHED_DYNAMIC_NUM_CLASSES) 197 | { 198 | #ifdef MEHCACHED_VERBOSE 199 | printf("mehcached_dynamic_remove_free_chunk_from_head: minsize=%lu no space\n", minimum_chunk_size); 200 | #endif 201 | return false; 202 | } 203 | 204 | // use the first free chunk in the class; the overall policy is still approximately best fit (which is good) due to segregation 205 | uint8_t *chunk_start = alloc->free_head[chunk_class]; 206 | assert(MEHCACHED_DYNAMIC_TAG_STATUS(*(uint64_t *)chunk_start) == MEHCACHED_DYNAMIC_FREE); 207 | uint64_t chunk_size = MEHCACHED_DYNAMIC_TAG_SIZE(*(uint64_t *)chunk_start); 208 | assert(*(uint64_t *)chunk_start == *(uint64_t *)(chunk_start + chunk_size - 8)); 209 | 210 | assert(chunk_size >= minimum_chunk_size); 211 | 212 | mehcached_dynamic_remove_free_chunk_from_free_list(alloc, chunk_start, chunk_size); 213 | 214 | *out_chunk_start = chunk_start; 215 | *out_chunk_size = chunk_size; 216 | #ifdef MEHCACHED_VERBOSE 217 | printf("mehcached_dynamic_remove_free_chunk_from_head: minsize=%lu start=%p size=%lu\n", minimum_chunk_size, *out_chunk_start, *out_chunk_size); 218 | #endif 219 | return true; 220 | } 221 | 222 | static 223 | void 224 | mehcached_dynamic_reset(struct mehcached_dynamic *alloc) 225 | { 226 | memset(alloc->free_head, 0, sizeof(alloc->free_head)); 227 | 228 | // set the entire free space as a free chunk 229 | mehcached_dynamic_insert_free_chunk(alloc, alloc->data, alloc->size); 230 | } 231 | 232 | static 233 | struct mehcached_alloc_item * 234 | mehcached_dynamic_item(const struct mehcached_dynamic *alloc, uint64_t dynamic_offset) 235 | { 236 | return (struct mehcached_alloc_item *)(alloc->data + dynamic_offset); 237 | } 238 | 239 | static 240 | void 241 | mehcached_dynamic_coalese_free_chunk_left(struct mehcached_dynamic *alloc, uint8_t **chunk_start, uint64_t *chunk_size) 242 | { 243 | if (*chunk_start == alloc->data) 244 | return; 245 | assert(*chunk_start > alloc->data); 246 | 247 | if (MEHCACHED_DYNAMIC_TAG_STATUS(*(uint64_t *)(*chunk_start - 8)) == MEHCACHED_DYNAMIC_OCCUPIED) 248 | return; 249 | 250 | uint64_t adj_chunk_size = MEHCACHED_DYNAMIC_TAG_SIZE(*(uint64_t *)(*chunk_start - 8)); 251 | uint8_t *adj_chunk_start = *chunk_start - adj_chunk_size; 252 | assert(*(uint64_t *)adj_chunk_start == *(uint64_t *)(adj_chunk_start + adj_chunk_size - 8)); 253 | 254 | #ifdef MEHCACHED_VERBOSE 255 | printf("mehcached_dynamic_coalese_free_chunk_left: start=%p size=%lu left=%lu\n", *chunk_start, *chunk_size, adj_chunk_size); 256 | #endif 257 | 258 | mehcached_dynamic_remove_free_chunk_from_free_list(alloc, adj_chunk_start, adj_chunk_size); 259 | *chunk_start = adj_chunk_start; 260 | *chunk_size = *chunk_size + adj_chunk_size; 261 | } 262 | 263 | static 264 | void 265 | mehcached_dynamic_coalese_free_chunk_right(struct mehcached_dynamic *alloc, uint8_t **chunk_start, uint64_t *chunk_size) 266 | { 267 | if (*chunk_start + *chunk_size == alloc->data + alloc->size) 268 | return; 269 | assert(*chunk_start + *chunk_size < alloc->data + alloc->size); 270 | 271 | if (MEHCACHED_DYNAMIC_TAG_STATUS(*(uint64_t *)(*chunk_start + *chunk_size)) == MEHCACHED_DYNAMIC_OCCUPIED) 272 | return; 273 | 274 | uint8_t *adj_chunk_start = *chunk_start + *chunk_size; 275 | uint64_t adj_chunk_size = MEHCACHED_DYNAMIC_TAG_SIZE(*(uint64_t *)adj_chunk_start); 276 | assert(*(uint64_t *)adj_chunk_start == *(uint64_t *)(adj_chunk_start + adj_chunk_size - 8)); 277 | 278 | #ifdef MEHCACHED_VERBOSE 279 | printf("mehcached_dynamic_coalese_free_chunk_right: start=%p size=%lu right=%lu\n", *chunk_start, *chunk_size, adj_chunk_size); 280 | #endif 281 | 282 | mehcached_dynamic_remove_free_chunk_from_free_list(alloc, adj_chunk_start, adj_chunk_size); 283 | // chunk_start is unchanged 284 | *chunk_size = *chunk_size + adj_chunk_size; 285 | } 286 | 287 | static 288 | uint64_t 289 | mehcached_dynamic_allocate(struct mehcached_dynamic *alloc, uint32_t item_size) 290 | { 291 | uint64_t minimum_chunk_size = MEHCACHED_ROUNDUP8((uint64_t)item_size) + MEHCAHCED_DYNAMIC_OVERHEAD; 292 | 293 | uint8_t *chunk_start; 294 | uint64_t chunk_size; 295 | if (!mehcached_dynamic_remove_free_chunk_from_head(alloc, minimum_chunk_size, &chunk_start, &chunk_size)) 296 | return MEHCACHED_DYNAMIC_INSUFFICIENT_SPACE; 297 | 298 | // see if we can make a leftover free chunk 299 | uint64_t leftover_chunk_size = chunk_size - minimum_chunk_size; 300 | if (leftover_chunk_size >= MEHCACHED_DYNAMIC_MIN_SIZE) 301 | { 302 | // create a leftover free chunk and insert it to the freelist 303 | mehcached_dynamic_insert_free_chunk(alloc, chunk_start + minimum_chunk_size, leftover_chunk_size); 304 | // coalescing is not required here because the previous chunk already used to be a big coalesced free chunk 305 | 306 | // adjust the free chunk to avoid overlapping 307 | chunk_size = minimum_chunk_size; 308 | } 309 | else 310 | leftover_chunk_size = 0; 311 | 312 | #ifdef MEHCACHED_VERBOSE 313 | printf("mehcached_dynamic_allocate: item_size=%u minsize=%lu start=%p size=%lu (leftover=%lu)\n", item_size, minimum_chunk_size, chunk_start, chunk_size, leftover_chunk_size); 314 | #endif 315 | 316 | *(uint64_t *)chunk_start = *(uint64_t *)(chunk_start + chunk_size - 8) = MEHCACHED_DYNAMIC_TAG_VEC(chunk_size, MEHCACHED_DYNAMIC_OCCUPIED); 317 | 318 | // TODO: We are wasting 4 bytes for struct mehcached_alloc_item for compatibility. Need to implement an allocator-specific method to obtain the item size 319 | struct mehcached_alloc_item *alloc_item = (struct mehcached_alloc_item *)(chunk_start + 8); 320 | alloc_item->item_size = item_size; 321 | 322 | return (uint64_t)((uint8_t *)alloc_item - alloc->data); 323 | } 324 | 325 | static 326 | void 327 | mehcached_dynamic_deallocate(struct mehcached_dynamic *alloc, uint64_t dynamic_offset) 328 | { 329 | struct mehcached_alloc_item *alloc_item = mehcached_dynamic_item(alloc, dynamic_offset); 330 | uint8_t *chunk_start = (uint8_t *)alloc_item - 8; 331 | assert(MEHCACHED_DYNAMIC_TAG_STATUS(*(uint64_t *)chunk_start) == MEHCACHED_DYNAMIC_OCCUPIED); 332 | uint64_t chunk_size = MEHCACHED_DYNAMIC_TAG_SIZE(*(uint64_t *)chunk_start); 333 | 334 | #ifdef MEHCACHED_VERBOSE 335 | printf("mehcached_dynamic_deallocate: start=%p size=%lu\n", chunk_start, chunk_size); 336 | #endif 337 | 338 | mehcached_dynamic_coalese_free_chunk_left(alloc, &chunk_start, &chunk_size); 339 | mehcached_dynamic_coalese_free_chunk_right(alloc, &chunk_start, &chunk_size); 340 | mehcached_dynamic_insert_free_chunk(alloc, chunk_start, chunk_size); 341 | } 342 | 343 | MEHCACHED_END 344 | -------------------------------------------------------------------------------- /src/alloc_dynamic.h: -------------------------------------------------------------------------------- 1 | // Copyright 2014 Carnegie Mellon University 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #pragma once 16 | 17 | #include "common.h" 18 | #include "alloc.h" 19 | 20 | MEHCACHED_BEGIN 21 | 22 | // memory allocation using segregated fit (similar to Lea) 23 | 24 | #define MEHCAHCED_DYNAMIC_OVERHEAD (16) // per-item space overhead solely caused by mehcached_dynamic 25 | 26 | #define MEHCACHED_DYNAMIC_MIN_SIZE (32UL) // 32 bytes (must be able to hold 4 size_t variables) 27 | #define MEHCACHED_DYNAMIC_MAX_SIZE ((1UL << 40) - 1) // 40-bit wide size (can be up to 63-bit wide) 28 | #define MEHCACHED_DYNAMIC_NUM_CLASSES (32) // 32 classes for freelists 29 | #define MEHCACHED_DYNAMIC_CLASS_INCREMENT (8) // 8-byte increment in freelist classes 30 | 31 | #define MEHCACHED_DYNAMIC_INSUFFICIENT_SPACE ((uint64_t)-1) 32 | 33 | // data structure layout 34 | 35 | // free_head[class] -> the first free chunk of the class (NULL if none exists) 36 | 37 | // free chunk (of size N) - N is the same or larger than the size of the class 38 | // 8-byte: status (1 bit), size (63 bit) 39 | // 8-byte: prev free chunk of the same class (NULL if head) 40 | // 8-byte: next free chunk of the same class (NULL if tail) 41 | // (N - 32 bytes) 42 | // 8-byte: status (1 bit), size (63 bit) 43 | 44 | // occupied chunk (of size N) - overhead of 16 bytes 45 | // 8-byte: status (1 bit), size (63 bit) 46 | // (N - 16 bytes) 47 | // 8-byte: status (1 bit), size (63 bit) 48 | 49 | struct mehcached_dynamic 50 | { 51 | uint8_t concurrent_access_mode; 52 | uint32_t lock; 53 | uint64_t size; // the total size 54 | uint8_t *data; // the base address of the reserved memory 55 | uint8_t *free_head[MEHCACHED_DYNAMIC_NUM_CLASSES]; // the head free pointer of each class 56 | }; 57 | 58 | static 59 | void 60 | mehcached_dynamic_init(struct mehcached_dynamic *alloc, uint64_t size, bool concurrent_alloc_read, bool concurrent_alloc_write, size_t numa_node); 61 | 62 | static 63 | void 64 | mehcached_dynamic_free(struct mehcached_dynamic *alloc); 65 | 66 | static 67 | void 68 | mehcached_dynamic_reset(struct mehcached_dynamic *alloc); 69 | 70 | static 71 | void 72 | mehcached_dynamic_lock(struct mehcached_dynamic *alloc); 73 | 74 | static 75 | void 76 | mehcached_dynamic_unlock(struct mehcached_dynamic *alloc); 77 | 78 | static 79 | struct mehcached_alloc_item * 80 | mehcached_dynamic_item(const struct mehcached_dynamic *alloc, uint64_t dynamic_offset); 81 | 82 | static 83 | uint64_t 84 | mehcached_dynamic_allocate(struct mehcached_dynamic *alloc, uint32_t item_size); 85 | 86 | static 87 | void 88 | mehcached_dynamic_deallocate(struct mehcached_dynamic *alloc, uint64_t dynamic_offset); 89 | 90 | MEHCACHED_END 91 | 92 | -------------------------------------------------------------------------------- /src/alloc_malloc.c: -------------------------------------------------------------------------------- 1 | // Copyright 2014 Carnegie Mellon University 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #pragma once 16 | 17 | #include "alloc_malloc.h" 18 | #include "table.h" 19 | 20 | MEHCACHED_BEGIN 21 | 22 | static 23 | void 24 | mehcached_malloc_init(struct mehcached_malloc *alloc) 25 | { 26 | uint8_t *ptr = (uint8_t *)malloc(8); 27 | free(ptr); 28 | alloc->pointer_base = ptr - 0x7fffffffUL; 29 | } 30 | 31 | static 32 | void 33 | mehcached_malloc_free(struct mehcached_malloc *alloc MEHCACHED_UNUSED) 34 | { 35 | } 36 | 37 | static 38 | void 39 | mehcached_malloc_reset(struct mehcached_malloc *alloc MEHCACHED_UNUSED) 40 | { 41 | } 42 | 43 | static 44 | struct mehcached_alloc_item * 45 | mehcached_malloc_item(const struct mehcached_malloc *alloc, uint64_t malloc_offset) 46 | { 47 | return (struct mehcached_alloc_item *)(alloc->pointer_base + malloc_offset); 48 | } 49 | 50 | static 51 | uint64_t 52 | mehcached_malloc_allocate(struct mehcached_malloc *alloc, uint32_t item_size) 53 | { 54 | void *p = malloc(item_size); 55 | if (p == NULL) 56 | return MEHCACHED_MALLOC_INSUFFICIENT_SPACE; 57 | 58 | size_t malloc_offset = (size_t)((uint8_t *)p - alloc->pointer_base); 59 | if (malloc_offset > MEHCACHED_ITEM_OFFSET_MASK) 60 | { 61 | printf("too large pointer: %zx (offset = %zx)\n", (size_t)p, malloc_offset); 62 | assert(false); 63 | return MEHCACHED_MALLOC_INSUFFICIENT_SPACE; 64 | } 65 | struct mehcached_alloc_item *alloc_item = (struct mehcached_alloc_item *)p; 66 | alloc_item->item_size = item_size; 67 | return (uint64_t)malloc_offset; 68 | } 69 | 70 | static 71 | void 72 | mehcached_malloc_deallocate(struct mehcached_malloc *alloc, uint64_t malloc_offset) 73 | { 74 | free(alloc->pointer_base + malloc_offset); 75 | } 76 | 77 | MEHCACHED_END 78 | 79 | -------------------------------------------------------------------------------- /src/alloc_malloc.h: -------------------------------------------------------------------------------- 1 | // Copyright 2014 Carnegie Mellon University 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #pragma once 16 | 17 | #include "common.h" 18 | #include "alloc.h" 19 | 20 | MEHCACHED_BEGIN 21 | 22 | #define MEHCACHED_MALLOC_INSUFFICIENT_SPACE ((uint64_t)-1) 23 | 24 | struct mehcached_malloc 25 | { 26 | uint8_t *pointer_base; 27 | }; 28 | 29 | static 30 | void 31 | mehcached_malloc_init(struct mehcached_malloc *alloc); 32 | 33 | static 34 | void 35 | mehcached_malloc_free(struct mehcached_malloc *alloc); 36 | 37 | static 38 | void 39 | mehcached_malloc_reset(struct mehcached_malloc *alloc); 40 | 41 | static 42 | struct mehcached_alloc_item * 43 | mehcached_malloc_item(const struct mehcached_malloc *alloc, uint64_t malloc_offset); 44 | 45 | static 46 | uint64_t 47 | mehcached_malloc_allocate(struct mehcached_malloc *alloc, uint32_t item_size); 48 | 49 | static 50 | void 51 | mehcached_malloc_deallocate(struct mehcached_malloc *alloc, uint64_t malloc_offset); 52 | 53 | MEHCACHED_END 54 | 55 | -------------------------------------------------------------------------------- /src/alloc_pool.c: -------------------------------------------------------------------------------- 1 | // Copyright 2014 Carnegie Mellon University 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #pragma once 16 | 17 | #include "alloc_pool.h" 18 | #include "table.h" 19 | #include "shm.h" 20 | 21 | MEHCACHED_BEGIN 22 | 23 | static 24 | void 25 | mehcached_pool_init(struct mehcached_pool *alloc, uint64_t size, bool concurrent_alloc_read, bool concurrent_alloc_write, size_t numa_node) 26 | { 27 | if (size < MEHCACHED_MINIMUM_POOL_SIZE) 28 | size = MEHCACHED_MINIMUM_POOL_SIZE; 29 | size = mehcached_shm_adjust_size(size); 30 | size = mehcached_next_power_of_two(size); 31 | assert(size <= MEHCACHED_ITEM_OFFSET_MASK >> 1); // ">> 1" is for sufficient garbage collection time 32 | assert(size == mehcached_shm_adjust_size(size)); 33 | 34 | if (!concurrent_alloc_read) 35 | alloc->concurrent_access_mode = 0; 36 | else if (!concurrent_alloc_write) 37 | alloc->concurrent_access_mode = 1; 38 | else 39 | alloc->concurrent_access_mode = 2; 40 | 41 | alloc->size = size; 42 | alloc->mask = size - 1; 43 | 44 | alloc->lock = 0; 45 | alloc->head = alloc->tail = 0; 46 | 47 | size_t shm_id = mehcached_shm_alloc(size, numa_node); 48 | if (shm_id == (size_t)-1) 49 | { 50 | printf("failed to allocate memory\n"); 51 | assert(false); 52 | } 53 | while (true) 54 | { 55 | alloc->data = mehcached_shm_find_free_address(size + MEHCACHED_MINIMUM_POOL_SIZE); 56 | if (alloc->data == NULL) 57 | assert(false); 58 | 59 | if (!mehcached_shm_map(shm_id, alloc->data, 0, size)) 60 | continue; 61 | 62 | // aliased access across pool end boundary 63 | if (!mehcached_shm_map(shm_id, alloc->data + size, 0, MEHCACHED_MINIMUM_POOL_SIZE)) 64 | { 65 | mehcached_shm_unmap(alloc->data); 66 | continue; 67 | } 68 | 69 | break; 70 | } 71 | 72 | if (!mehcached_shm_schedule_remove(shm_id)) 73 | { 74 | perror(""); 75 | assert(false); 76 | } 77 | } 78 | 79 | static 80 | void 81 | mehcached_pool_free(struct mehcached_pool *alloc) 82 | { 83 | if (!mehcached_shm_unmap(alloc->data)) 84 | assert(false); 85 | if (!mehcached_shm_unmap(alloc->data + alloc->size)) 86 | assert(false); 87 | } 88 | 89 | static 90 | void 91 | mehcached_pool_reset(struct mehcached_pool *alloc) 92 | { 93 | alloc->head = alloc->tail = 0; 94 | } 95 | 96 | static 97 | void 98 | mehcached_pool_lock(struct mehcached_pool *alloc MEHCACHED_UNUSED) 99 | { 100 | #ifdef MEHCACHED_CONCURRENT 101 | if (alloc->concurrent_access_mode == 2) 102 | { 103 | while (1) 104 | { 105 | if (__sync_bool_compare_and_swap((volatile uint32_t *)&alloc->lock, 0U, 1U)) 106 | break; 107 | } 108 | } 109 | #endif 110 | } 111 | 112 | static 113 | void 114 | mehcached_pool_unlock(struct mehcached_pool *alloc MEHCACHED_UNUSED) 115 | { 116 | #ifdef MEHCACHED_CONCURRENT 117 | if (alloc->concurrent_access_mode == 2) 118 | { 119 | memory_barrier(); 120 | assert((*(volatile uint32_t *)&alloc->lock & 1U) == 1U); 121 | // no need to use atomic add because this thread is the only one writing to version 122 | *(volatile uint32_t *)&alloc->lock = 0U; 123 | } 124 | #endif 125 | } 126 | 127 | static 128 | struct mehcached_alloc_item * 129 | mehcached_pool_item(const struct mehcached_pool *alloc, uint64_t pool_offset) 130 | { 131 | return (struct mehcached_alloc_item *)(alloc->data + (pool_offset & alloc->mask)); 132 | } 133 | 134 | static 135 | void 136 | mehcached_pool_check_invariants(const struct mehcached_pool *alloc MEHCACHED_UNUSED) 137 | { 138 | assert(alloc->tail - alloc->head <= alloc->size); 139 | } 140 | 141 | static 142 | void 143 | mehcached_pool_pop_head(struct mehcached_pool *alloc) 144 | { 145 | struct mehcached_alloc_item *alloc_item = mehcached_pool_item(alloc, alloc->head); 146 | #ifdef MEHCACHED_VERBOSE 147 | printf("popping item size = %u at head = %lu\n", alloc_item->item_size, alloc->head & MEHCACHED_ITEM_OFFSET_MASK); 148 | #endif 149 | 150 | alloc->head += alloc_item->item_size; 151 | mehcached_pool_check_invariants(alloc); 152 | } 153 | 154 | static 155 | uint64_t 156 | mehcached_pool_push_tail(struct mehcached_pool *alloc, uint32_t item_size) 157 | { 158 | assert(item_size == MEHCACHED_ROUNDUP8(item_size)); 159 | assert(item_size <= alloc->size); 160 | 161 | uint64_t item_offset = alloc->tail; 162 | 163 | uint64_t v = item_offset + item_size; 164 | while (v > alloc->head + alloc->size) 165 | mehcached_pool_pop_head(alloc); 166 | 167 | struct mehcached_alloc_item *alloc_item = mehcached_pool_item(alloc, item_offset); 168 | alloc_item->item_size = item_size; 169 | 170 | if (alloc->concurrent_access_mode == 0) 171 | alloc->tail += item_size; 172 | else 173 | { 174 | *(volatile uint64_t *)&alloc->tail += item_size; 175 | memory_barrier(); 176 | } 177 | 178 | mehcached_pool_check_invariants(alloc); 179 | 180 | #ifdef MEHCACHED_VERBOSE 181 | printf("pushing item size = %u at tail = %lu\n", item_size, item_offset & MEHCACHED_ITEM_OFFSET_MASK); 182 | #endif 183 | 184 | return item_offset & MEHCACHED_ITEM_OFFSET_MASK; 185 | } 186 | 187 | static 188 | uint64_t 189 | mehcached_pool_allocate(struct mehcached_pool *alloc, uint32_t item_size) 190 | { 191 | return mehcached_pool_push_tail(alloc, item_size); 192 | } 193 | 194 | static 195 | bool 196 | mehcached_pool_is_valid(const struct mehcached_pool *alloc, uint64_t pool_offset) 197 | { 198 | if (alloc->concurrent_access_mode == 0) 199 | return ((alloc->tail - pool_offset) & MEHCACHED_ITEM_OFFSET_MASK) <= alloc->size; 200 | else 201 | { 202 | memory_barrier(); 203 | return ((*(volatile uint64_t *)&alloc->tail - pool_offset) & MEHCACHED_ITEM_OFFSET_MASK) <= alloc->size; 204 | } 205 | } 206 | 207 | MEHCACHED_END 208 | 209 | -------------------------------------------------------------------------------- /src/alloc_pool.h: -------------------------------------------------------------------------------- 1 | // Copyright 2014 Carnegie Mellon University 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #pragma once 16 | 17 | #include "common.h" 18 | #include "alloc.h" 19 | 20 | MEHCACHED_BEGIN 21 | 22 | // the minimum pool size that will prevent any invalid read with garbage item metadata 23 | // this must be at least as large as the rounded sum of an item header, key, and value, 24 | // and must also be a multiple of mehcached_shm_get_page_size() 25 | #define MEHCACHED_MINIMUM_POOL_SIZE (2097152) 26 | 27 | struct mehcached_pool 28 | { 29 | uint8_t concurrent_access_mode; 30 | uint32_t lock; 31 | uint8_t *data; 32 | uint64_t size; // a power of two 33 | uint64_t mask; // size - 1; this mask is used only when converting the offset to the actual location of the item 34 | // internally, pool uses full 64-bit numbers for head and tail 35 | // however, the valid range for item_offset is limited to (MEHCACHED_ITEM_OFFSET_MASK + 1) 36 | // we resolve this inconsistency by applying MEHCACHED_ITEM_OFFSET_MASK mask 37 | // whenever returning the offset to the outside or using a masked offset given from the outside 38 | uint64_t head; // start offset of items 39 | uint64_t tail; // end offset of items 40 | } MEHCACHED_ALIGNED(64); 41 | 42 | static 43 | void 44 | mehcached_pool_init(struct mehcached_pool *alloc, uint64_t size, bool concurrent_alloc_read, bool concurrent_alloc_write, size_t numa_node); 45 | 46 | static 47 | void 48 | mehcached_pool_free(struct mehcached_pool *alloc); 49 | 50 | static 51 | void 52 | mehcached_pool_reset(struct mehcached_pool *alloc); 53 | 54 | static 55 | void 56 | mehcached_pool_lock(struct mehcached_pool *alloc); 57 | 58 | static 59 | void 60 | mehcached_pool_unlock(struct mehcached_pool *alloc); 61 | 62 | static 63 | struct mehcached_alloc_item * 64 | mehcached_pool_item(const struct mehcached_pool *alloc, uint64_t pool_offset); 65 | 66 | static 67 | void 68 | mehcached_pool_check_invariants(const struct mehcached_pool *alloc); 69 | 70 | static 71 | void 72 | mehcached_pool_pop_head(struct mehcached_pool *alloc); 73 | 74 | static 75 | uint64_t 76 | mehcached_pool_push_tail(struct mehcached_pool *alloc, uint32_t item_size); 77 | 78 | static 79 | uint64_t 80 | mehcached_pool_allocate(struct mehcached_pool *alloc, uint32_t item_size); 81 | 82 | static 83 | bool 84 | mehcached_pool_is_valid(const struct mehcached_pool *alloc, uint64_t pool_offset); 85 | 86 | MEHCACHED_END 87 | 88 | -------------------------------------------------------------------------------- /src/basic_types.h: -------------------------------------------------------------------------------- 1 | // Copyright 2014 Carnegie Mellon University 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #pragma once 16 | 17 | #include 18 | #include 19 | 20 | typedef int bool; 21 | #define true (1) 22 | #define false (0) 23 | 24 | -------------------------------------------------------------------------------- /src/city.c: -------------------------------------------------------------------------------- 1 | // city.c - cityhash-c 2 | // CityHash on C 3 | // Copyright (c) 2011-2012, Alexander Nusov 4 | // 5 | // - original copyright notice - 6 | // Copyright (c) 2011 Google, Inc. 7 | // 8 | // Permission is hereby granted, free of charge, to any person obtaining a copy 9 | // of this software and associated documentation files (the "Software"), to deal 10 | // in the Software without restriction, including without limitation the rights 11 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 | // copies of the Software, and to permit persons to whom the Software is 13 | // furnished to do so, subject to the following conditions: 14 | // 15 | // The above copyright notice and this permission notice shall be included in 16 | // all copies or substantial portions of the Software. 17 | // 18 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 23 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 24 | // THE SOFTWARE. 25 | // 26 | // CityHash, by Geoff Pike and Jyrki Alakuijala 27 | // 28 | // This file provides CityHash64() and related functions. 29 | // 30 | // It's probably possible to create even faster hash functions by 31 | // writing a program that systematically explores some of the space of 32 | // possible hash functions, by using SIMD instructions, or by 33 | // compromising on hash quality. 34 | 35 | #include 36 | #include "city.h" 37 | 38 | static uint64 UNALIGNED_LOAD64(const char *p) { 39 | uint64 result; 40 | memcpy(&result, p, sizeof(result)); 41 | return result; 42 | } 43 | 44 | static uint32 UNALIGNED_LOAD32(const char *p) { 45 | uint32 result; 46 | memcpy(&result, p, sizeof(result)); 47 | return result; 48 | } 49 | 50 | #if !defined(WORDS_BIGENDIAN) 51 | 52 | #define uint32_in_expected_order(x) (x) 53 | #define uint64_in_expected_order(x) (x) 54 | 55 | #else 56 | 57 | #ifdef _MSC_VER 58 | #include 59 | #define bswap_32(x) _byteswap_ulong(x) 60 | #define bswap_64(x) _byteswap_uint64(x) 61 | 62 | #elif defined(__APPLE__) 63 | // Mac OS X / Darwin features 64 | #include 65 | #define bswap_32(x) OSSwapInt32(x) 66 | #define bswap_64(x) OSSwapInt64(x) 67 | 68 | #else 69 | #include 70 | #endif 71 | 72 | #define uint32_in_expected_order(x) (bswap_32(x)) 73 | #define uint64_in_expected_order(x) (bswap_64(x)) 74 | 75 | #endif // WORDS_BIGENDIAN 76 | 77 | #if !defined(LIKELY) 78 | #if HAVE_BUILTIN_EXPECT 79 | #define LIKELY(x) (__builtin_expect(!!(x), 1)) 80 | #else 81 | #define LIKELY(x) (x) 82 | #endif 83 | #endif 84 | 85 | static uint64 Fetch64(const char *p) { 86 | return uint64_in_expected_order(UNALIGNED_LOAD64(p)); 87 | } 88 | 89 | static uint32 Fetch32(const char *p) { 90 | return uint32_in_expected_order(UNALIGNED_LOAD32(p)); 91 | } 92 | 93 | // Some primes between 2^63 and 2^64 for various uses. 94 | static const uint64 k0 = 0xc3a5c85c97cb3127ULL; 95 | static const uint64 k1 = 0xb492b66fbe98f273ULL; 96 | static const uint64 k2 = 0x9ae16a3b2f90404fULL; 97 | static const uint64 k3 = 0xc949d7c7509e6557ULL; 98 | 99 | // Hash 128 input bits down to 64 bits of output. 100 | // This is intended to be a reasonably good hash function. 101 | static inline uint64 Hash128to64(const uint128 x) { 102 | // Murmur-inspired hashing. 103 | const uint64 kMul = 0x9ddfea08eb382d69ULL; 104 | uint64 a = (Uint128Low64(x) ^ Uint128High64(x)) * kMul; 105 | a ^= (a >> 47); 106 | uint64 b = (Uint128High64(x) ^ a) * kMul; 107 | b ^= (b >> 47); 108 | b *= kMul; 109 | return b; 110 | } 111 | 112 | 113 | // Bitwise right rotate. Normally this will compile to a single 114 | // instruction, especially if the shift is a manifest constant. 115 | static uint64 Rotate(uint64 val, int shift) { 116 | // Avoid shifting by 64: doing so yields an undefined result. 117 | return shift == 0 ? val : ((val >> shift) | (val << (64 - shift))); 118 | } 119 | 120 | // Equivalent to Rotate(), but requires the second arg to be non-zero. 121 | // On x86-64, and probably others, it's possible for this to compile 122 | // to a single instruction if both args are already in registers. 123 | static uint64 RotateByAtLeast1(uint64 val, int shift) { 124 | return (val >> shift) | (val << (64 - shift)); 125 | } 126 | 127 | static uint64 ShiftMix(uint64 val) { 128 | return val ^ (val >> 47); 129 | } 130 | 131 | static uint64 HashLen16(uint64 u, uint64 v) { 132 | uint128 result; 133 | result.first = u; 134 | result.second = v; 135 | return Hash128to64(result); 136 | } 137 | 138 | static uint64 HashLen0to16(const char *s, size_t len) { 139 | if (len > 8) { 140 | uint64 a = Fetch64(s); 141 | uint64 b = Fetch64(s + len - 8); 142 | return HashLen16(a, RotateByAtLeast1(b + len, (int)len)) ^ b; 143 | } 144 | if (len >= 4) { 145 | uint64 a = Fetch32(s); 146 | return HashLen16(len + (a << 3), Fetch32(s + len - 4)); 147 | } 148 | if (len > 0) { 149 | uint8 a = (uint8)s[0]; 150 | uint8 b = (uint8)s[len >> 1]; 151 | uint8 c = (uint8)s[len - 1]; 152 | uint32 y = (uint32)(a) + ((uint32)(b) << 8); 153 | uint32 z = (uint32)len + ((uint32)(c) << 2); 154 | return ShiftMix(y * k2 ^ z * k3) * k2; 155 | } 156 | return k2; 157 | } 158 | 159 | // This probably works well for 16-byte strings as well, but it may be overkill 160 | // in that case. 161 | static uint64 HashLen17to32(const char *s, size_t len) { 162 | uint64 a = Fetch64(s) * k1; 163 | uint64 b = Fetch64(s + 8); 164 | uint64 c = Fetch64(s + len - 8) * k2; 165 | uint64 d = Fetch64(s + len - 16) * k0; 166 | return HashLen16(Rotate(a - b, 43) + Rotate(c, 30) + d, 167 | a + Rotate(b ^ k3, 20) - c + len); 168 | } 169 | 170 | // Return a 16-byte hash for 48 bytes. Quick and dirty. 171 | // Callers do best to use "random-looking" values for a and b. 172 | // static pair WeakHashLen32WithSeeds( 173 | uint128 WeakHashLen32WithSeeds6( 174 | uint64 w, uint64 x, uint64 y, uint64 z, uint64 a, uint64 b) { 175 | a += w; 176 | b = Rotate(b + a + z, 21); 177 | uint64 c = a; 178 | a += x; 179 | a += y; 180 | b += Rotate(a, 44); 181 | 182 | uint128 result; 183 | result.first = (uint64) (a + z); 184 | result.second = (uint64) (b + c); 185 | return result; 186 | } 187 | 188 | // Return a 16-byte hash for s[0] ... s[31], a, and b. Quick and dirty. 189 | // static pair WeakHashLen32WithSeeds( 190 | uint128 WeakHashLen32WithSeeds( 191 | const char* s, uint64 a, uint64 b) { 192 | return WeakHashLen32WithSeeds6(Fetch64(s), 193 | Fetch64(s + 8), 194 | Fetch64(s + 16), 195 | Fetch64(s + 24), 196 | a, 197 | b); 198 | } 199 | 200 | // Return an 8-byte hash for 33 to 64 bytes. 201 | static uint64 HashLen33to64(const char *s, size_t len) { 202 | uint64 z = Fetch64(s + 24); 203 | uint64 a = Fetch64(s) + (len + Fetch64(s + len - 16)) * k0; 204 | uint64 b = Rotate(a + z, 52); 205 | uint64 c = Rotate(a, 37); 206 | a += Fetch64(s + 8); 207 | c += Rotate(a, 7); 208 | a += Fetch64(s + 16); 209 | uint64 vf = a + z; 210 | uint64 vs = b + Rotate(a, 31) + c; 211 | a = Fetch64(s + 16) + Fetch64(s + len - 32); 212 | z = Fetch64(s + len - 8); 213 | b = Rotate(a + z, 52); 214 | c = Rotate(a, 37); 215 | a += Fetch64(s + len - 24); 216 | c += Rotate(a, 7); 217 | a += Fetch64(s + len - 16); 218 | uint64 wf = a + z; 219 | uint64 ws = b + Rotate(a, 31) + c; 220 | uint64 r = ShiftMix((vf + ws) * k2 + (wf + vs) * k0); 221 | return ShiftMix(r * k0 + vs) * k2; 222 | } 223 | 224 | uint64 CityHash64(const char *s, size_t len) { 225 | if (len <= 32) { 226 | if (len <= 16) { 227 | return HashLen0to16(s, len); 228 | } else { 229 | return HashLen17to32(s, len); 230 | } 231 | } else if (len <= 64) { 232 | return HashLen33to64(s, len); 233 | } 234 | 235 | // For strings over 64 bytes we hash the end first, and then as we 236 | // loop we keep 56 bytes of state: v, w, x, y, and z. 237 | uint64 x = Fetch64(s + len - 40); 238 | uint64 y = Fetch64(s + len - 16) + Fetch64(s + len - 56); 239 | uint64 z = HashLen16(Fetch64(s + len - 48) + len, Fetch64(s + len - 24)); 240 | uint64 temp; 241 | uint128 v = WeakHashLen32WithSeeds(s + len - 64, len, z); 242 | uint128 w = WeakHashLen32WithSeeds(s + len - 32, y + k1, x); 243 | x = x * k1 + Fetch64(s); 244 | 245 | // Decrease len to the nearest multiple of 64, and operate on 64-byte chunks. 246 | len = (len - 1) & ~(size_t)(63); 247 | do { 248 | x = Rotate(x + y + v.first + Fetch64(s + 8), 37) * k1; 249 | y = Rotate(y + v.second + Fetch64(s + 48), 42) * k1; 250 | x ^= w.second; 251 | y += v.first + Fetch64(s + 40); 252 | z = Rotate(z + w.first, 33) * k1; 253 | v = WeakHashLen32WithSeeds(s, v.second * k1, x + w.first); 254 | w = WeakHashLen32WithSeeds(s + 32, z + w.second, y + Fetch64(s + 16)); 255 | temp = z; 256 | z = x; 257 | x = temp; 258 | s += 64; 259 | len -= 64; 260 | } while (len != 0); 261 | return HashLen16(HashLen16(v.first, w.first) + ShiftMix(y) * k1 + z, 262 | HashLen16(v.second, w.second) + x); 263 | } 264 | 265 | uint64 CityHash64WithSeed(const char *s, size_t len, uint64 seed) { 266 | return CityHash64WithSeeds(s, len, k2, seed); 267 | } 268 | 269 | uint64 CityHash64WithSeeds(const char *s, size_t len, 270 | uint64 seed0, uint64 seed1) { 271 | return HashLen16(CityHash64(s, len) - seed0, seed1); 272 | } 273 | 274 | // A subroutine for CityHash128(). Returns a decent 128-bit hash for strings 275 | // of any length representable in signed long. Based on City and Murmur. 276 | static uint128 CityMurmur(const char *s, size_t len, uint128 seed) { 277 | uint64 a = Uint128Low64(seed); 278 | uint64 b = Uint128High64(seed); 279 | uint64 c = 0; 280 | uint64 d = 0; 281 | signed long l = (signed long)(len - 16); 282 | if (l <= 0) { // len <= 16 283 | a = ShiftMix(a * k1) * k1; 284 | c = b * k1 + HashLen0to16(s, len); 285 | d = ShiftMix(a + (len >= 8 ? Fetch64(s) : c)); 286 | } else { // len > 16 287 | c = HashLen16(Fetch64(s + len - 8) + k1, a); 288 | d = HashLen16(b + len, c + Fetch64(s + len - 16)); 289 | a += d; 290 | do { 291 | a ^= ShiftMix(Fetch64(s) * k1) * k1; 292 | a *= k1; 293 | b ^= a; 294 | c ^= ShiftMix(Fetch64(s + 8) * k1) * k1; 295 | c *= k1; 296 | d ^= c; 297 | s += 16; 298 | l -= 16; 299 | } while (l > 0); 300 | } 301 | a = HashLen16(a, c); 302 | b = HashLen16(d, b); 303 | 304 | uint128 result; 305 | result.first = (uint64) (a ^ b); 306 | result.second = (uint64) (HashLen16(b,a)); 307 | return result; 308 | } 309 | 310 | uint128 CityHash128WithSeed(const char *s, size_t len, uint128 seed) { 311 | if (len < 128) { 312 | return CityMurmur(s, len, seed); 313 | } 314 | 315 | // We expect len >= 128 to be the common case. Keep 56 bytes of state: 316 | // v, w, x, y, and z. 317 | uint128 v, w; 318 | uint64 x = Uint128Low64(seed); 319 | uint64 y = Uint128High64(seed); 320 | uint64 z = len * k1; 321 | uint64 temp; 322 | v.first = Rotate(y ^ k1, 49) * k1 + Fetch64(s); 323 | v.second = Rotate(v.first, 42) * k1 + Fetch64(s + 8); 324 | w.first = Rotate(y + z, 35) * k1 + x; 325 | w.second = Rotate(x + Fetch64(s + 88), 53) * k1; 326 | 327 | // This is the same inner loop as CityHash64(), manually unrolled. 328 | do { 329 | x = Rotate(x + y + v.first + Fetch64(s + 8), 37) * k1; 330 | y = Rotate(y + v.second + Fetch64(s + 48), 42) * k1; 331 | x ^= w.second; 332 | y += v.first + Fetch64(s + 40); 333 | z = Rotate(z + w.first, 33) * k1; 334 | v = WeakHashLen32WithSeeds(s, v.second * k1, x + w.first); 335 | w = WeakHashLen32WithSeeds(s + 32, z + w.second, y + Fetch64(s + 16)); 336 | temp = z; 337 | z = x; 338 | x = temp; 339 | s += 64; 340 | x = Rotate(x + y + v.first + Fetch64(s + 8), 37) * k1; 341 | y = Rotate(y + v.second + Fetch64(s + 48), 42) * k1; 342 | x ^= w.second; 343 | y += v.first + Fetch64(s + 40); 344 | z = Rotate(z + w.first, 33) * k1; 345 | v = WeakHashLen32WithSeeds(s, v.second * k1, x + w.first); 346 | w = WeakHashLen32WithSeeds(s + 32, z + w.second, y + Fetch64(s + 16)); 347 | temp = z; 348 | z = x; 349 | x = temp; 350 | s += 64; 351 | len -= 128; 352 | } while (LIKELY(len >= 128)); 353 | x += Rotate(v.first + z, 49) * k0; 354 | z += Rotate(w.first, 37) * k0; 355 | // If 0 < len < 128, hash up to 4 chunks of 32 bytes each from the end of s. 356 | size_t tail_done; 357 | for (tail_done = 0; tail_done < len; ) { 358 | tail_done += 32; 359 | y = Rotate(x + y, 42) * k0 + v.second; 360 | w.first += Fetch64(s + len - tail_done + 16); 361 | x = x * k0 + w.first; 362 | z += w.second + Fetch64(s + len - tail_done); 363 | w.second += v.first; 364 | v = WeakHashLen32WithSeeds(s + len - tail_done, v.first + z, v.second); 365 | } 366 | // At this point our 56 bytes of state should contain more than 367 | // enough information for a strong 128-bit hash. We use two 368 | // different 56-byte-to-8-byte hashes to get a 16-byte final result. 369 | x = HashLen16(x, v.first); 370 | y = HashLen16(y + z, w.first); 371 | 372 | uint128 result; 373 | result.first = (uint64) (HashLen16(x + v.second, w.second) + y); 374 | result.second = (uint64) HashLen16(x + w.second, y + v.second); 375 | return result; 376 | } 377 | 378 | uint128 CityHash128(const char *s, size_t len) { 379 | uint128 r; 380 | if (len >= 16) { 381 | r.first = (uint64) (Fetch64(s) ^ k3); 382 | r.second = (uint64) (Fetch64(s + 8)); 383 | 384 | return CityHash128WithSeed(s + 16, 385 | len - 16, 386 | r); 387 | 388 | } else if (len >= 8) { 389 | r.first = (uint64) (Fetch64(s) ^ (len * k0)); 390 | r.second = (uint64) (Fetch64(s + len - 8) ^ k1); 391 | 392 | return CityHash128WithSeed(NULL, 393 | 0, 394 | r); 395 | } else { 396 | r.first = (uint64) k0; 397 | r.second = (uint64) k1; 398 | return CityHash128WithSeed(s, len, r); 399 | } 400 | } 401 | 402 | #ifdef __SSE4_2__ 403 | #include "citycrc.h" 404 | #include 405 | 406 | // Requires len >= 240. 407 | static void CityHashCrc256Long(const char *s, size_t len, 408 | uint32 seed, uint64 *result) { 409 | uint64 a = Fetch64(s + 56) + k0; 410 | uint64 b = Fetch64(s + 96) + k0; 411 | uint64 c = result[0] = HashLen16(b, len); 412 | uint64 d = result[1] = Fetch64(s + 120) * k0 + len; 413 | uint64 e = Fetch64(s + 184) + seed; 414 | uint64 f = seed; 415 | uint64 g = 0; 416 | uint64 h = 0; 417 | uint64 i = 0; 418 | uint64 j = 0; 419 | uint64 t = c + d; 420 | 421 | // 240 bytes of input per iter. 422 | size_t iters = len / 240; 423 | len -= iters * 240; 424 | do { 425 | #define CHUNK(multiplier, z) \ 426 | { \ 427 | uint64 old_a = a; \ 428 | a = Rotate(b, 41 ^ z) * multiplier + Fetch64(s); \ 429 | b = Rotate(c, 27 ^ z) * multiplier + Fetch64(s + 8); \ 430 | c = Rotate(d, 41 ^ z) * multiplier + Fetch64(s + 16); \ 431 | d = Rotate(e, 33 ^ z) * multiplier + Fetch64(s + 24); \ 432 | e = Rotate(t, 25 ^ z) * multiplier + Fetch64(s + 32); \ 433 | t = old_a; \ 434 | } \ 435 | f = _mm_crc32_u64(f, a); \ 436 | g = _mm_crc32_u64(g, b); \ 437 | h = _mm_crc32_u64(h, c); \ 438 | i = _mm_crc32_u64(i, d); \ 439 | j = _mm_crc32_u64(j, e); \ 440 | s += 40 441 | 442 | CHUNK(1, 1); CHUNK(k0, 0); 443 | CHUNK(1, 1); CHUNK(k0, 0); 444 | CHUNK(1, 1); CHUNK(k0, 0); 445 | } while (--iters > 0); 446 | 447 | while (len >= 40) { 448 | CHUNK(k0, 0); 449 | len -= 40; 450 | } 451 | if (len > 0) { 452 | s = s + len - 40; 453 | CHUNK(k0, 0); 454 | } 455 | j += i << 32; 456 | a = HashLen16(a, j); 457 | h += g << 32; 458 | b += h; 459 | c = HashLen16(c, f) + i; 460 | d = HashLen16(d, e + result[0]); 461 | j += e; 462 | i += HashLen16(h, t); 463 | e = HashLen16(a, d) + j; 464 | f = HashLen16(b, c) + a; 465 | g = HashLen16(j, i) + c; 466 | result[0] = e + f + g + h; 467 | a = ShiftMix((a + g) * k0) * k0 + b; 468 | result[1] += a + result[0]; 469 | a = ShiftMix(a * k0) * k0 + c; 470 | result[2] = a + result[1]; 471 | a = ShiftMix((a + e) * k0) * k0; 472 | result[3] = a + result[2]; 473 | } 474 | 475 | // Requires len < 240. 476 | static void CityHashCrc256Short(const char *s, size_t len, uint64 *result) { 477 | char buf[240]; 478 | memcpy(buf, s, len); 479 | memset(buf + len, 0, 240 - len); 480 | CityHashCrc256Long(buf, 240, ~(uint32)(len), result); 481 | } 482 | 483 | void CityHashCrc256(const char *s, size_t len, uint64 *result) { 484 | if (LIKELY(len >= 240)) { 485 | CityHashCrc256Long(s, len, 0, result); 486 | } else { 487 | CityHashCrc256Short(s, len, result); 488 | } 489 | } 490 | 491 | uint128 CityHashCrc128WithSeed(const char *s, size_t len, uint128 seed) { 492 | if (len <= 900) { 493 | return CityHash128WithSeed(s, len, seed); 494 | } else { 495 | uint64 result[4]; 496 | CityHashCrc256(s, len, result); 497 | uint64 u = Uint128High64(seed) + result[0]; 498 | uint64 v = Uint128Low64(seed) + result[1]; 499 | uint128 crc; 500 | crc.first = (uint64) (HashLen16(u, v + result[2])); 501 | crc.second = (uint64) (HashLen16(Rotate(v, 32), u * k0 + result[3])); 502 | return crc; 503 | } 504 | } 505 | 506 | uint128 CityHashCrc128(const char *s, size_t len) { 507 | if (len <= 900) { 508 | return CityHash128(s, len); 509 | } else { 510 | uint64 result[4]; 511 | CityHashCrc256(s, len, result); 512 | uint128 crc; 513 | crc.first = (uint64) result[2]; 514 | crc.second = (uint64) result[3]; 515 | return crc; 516 | } 517 | } 518 | 519 | #endif 520 | 521 | -------------------------------------------------------------------------------- /src/city.h: -------------------------------------------------------------------------------- 1 | // city.h - cityhash-c 2 | // CityHash on C 3 | // Copyright (c) 2011-2012, Alexander Nusov 4 | // 5 | // - original copyright notice - 6 | // Copyright (c) 2011 Google, Inc. 7 | // 8 | // Permission is hereby granted, free of charge, to any person obtaining a copy 9 | // of this software and associated documentation files (the "Software"), to deal 10 | // in the Software without restriction, including without limitation the rights 11 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 | // copies of the Software, and to permit persons to whom the Software is 13 | // furnished to do so, subject to the following conditions: 14 | // 15 | // The above copyright notice and this permission notice shall be included in 16 | // all copies or substantial portions of the Software. 17 | // 18 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 23 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 24 | // THE SOFTWARE. 25 | // 26 | // CityHash, by Geoff Pike and Jyrki Alakuijala 27 | // 28 | // This file provides a few functions for hashing strings. On x86-64 29 | // hardware in 2011, CityHash64() is faster than other high-quality 30 | // hash functions, such as Murmur. This is largely due to higher 31 | // instruction-level parallelism. CityHash64() and CityHash128() also perform 32 | // well on hash-quality tests. 33 | // 34 | // CityHash128() is optimized for relatively long strings and returns 35 | // a 128-bit hash. For strings more than about 2000 bytes it can be 36 | // faster than CityHash64(). 37 | // 38 | // Functions in the CityHash family are not suitable for cryptography. 39 | // 40 | // WARNING: This code has not been tested on big-endian platforms! 41 | // It is known to work well on little-endian platforms that have a small penalty 42 | // for unaligned reads, such as current Intel and AMD moderate-to-high-end CPUs. 43 | // 44 | // By the way, for some hash functions, given strings a and b, the hash 45 | // of a+b is easily derived from the hashes of a and b. This property 46 | // doesn't hold for any hash functions in this file. 47 | 48 | #ifndef CITY_HASH_H_ 49 | #define CITY_HASH_H_ 50 | 51 | #include 52 | #include 53 | 54 | typedef uint8_t uint8; 55 | typedef uint32_t uint32; 56 | typedef uint64_t uint64; 57 | 58 | typedef struct _uint128 uint128; 59 | struct _uint128 { 60 | uint64 first; 61 | uint64 second; 62 | }; 63 | 64 | #define Uint128Low64(x) (x).first 65 | #define Uint128High64(x) (x).second 66 | 67 | // Hash function for a byte array. 68 | uint64 CityHash64(const char *buf, size_t len); 69 | 70 | // Hash function for a byte array. For convenience, a 64-bit seed is also 71 | // hashed into the result. 72 | uint64 CityHash64WithSeed(const char *buf, size_t len, uint64 seed); 73 | 74 | // Hash function for a byte array. For convenience, two seeds are also 75 | // hashed into the result. 76 | uint64 CityHash64WithSeeds(const char *buf, size_t len, 77 | uint64 seed0, uint64 seed1); 78 | 79 | // Hash function for a byte array. 80 | uint128 CityHash128(const char *s, size_t len); 81 | 82 | // Hash function for a byte array. For convenience, a 128-bit seed is also 83 | // hashed into the result. 84 | uint128 CityHash128WithSeed(const char *s, size_t len, uint128 seed); 85 | 86 | #endif // CITY_HASH_H_ 87 | 88 | -------------------------------------------------------------------------------- /src/citycrc.h: -------------------------------------------------------------------------------- 1 | // citycrc.h - cityhash-c 2 | // CityHash on C 3 | // Copyright (c) 2011-2012, Alexander Nusov 4 | // 5 | // - original copyright notice - 6 | // Copyright (c) 2011 Google, Inc. 7 | // 8 | // Permission is hereby granted, free of charge, to any person obtaining a copy 9 | // of this software and associated documentation files (the "Software"), to deal 10 | // in the Software without restriction, including without limitation the rights 11 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 | // copies of the Software, and to permit persons to whom the Software is 13 | // furnished to do so, subject to the following conditions: 14 | // 15 | // The above copyright notice and this permission notice shall be included in 16 | // all copies or substantial portions of the Software. 17 | // 18 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 23 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 24 | // THE SOFTWARE. 25 | // 26 | // CityHash, by Geoff Pike and Jyrki Alakuijala 27 | // 28 | // This file declares the subset of the CityHash functions that require 29 | // _mm_crc32_u64(). See the CityHash README for details. 30 | // 31 | // Functions in the CityHash family are not suitable for cryptography. 32 | 33 | #ifndef CITY_HASH_CRC_H_ 34 | #define CITY_HASH_CRC_H_ 35 | 36 | #include "city.h" 37 | 38 | // Hash function for a byte array. 39 | uint128 CityHashCrc128(const char *s, size_t len); 40 | 41 | // Hash function for a byte array. For convenience, a 128-bit seed is also 42 | // hashed into the result. 43 | uint128 CityHashCrc128WithSeed(const char *s, size_t len, uint128 seed); 44 | 45 | // Hash function for a byte array. Sets result[0] ... result[3]. 46 | void CityHashCrc256(const char *s, size_t len, uint64 *result); 47 | 48 | #endif // CITY_HASH_CRC_H_ 49 | 50 | -------------------------------------------------------------------------------- /src/common.h: -------------------------------------------------------------------------------- 1 | // Copyright 2014 Carnegie Mellon University 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #pragma once 16 | 17 | #include "basic_types.h" 18 | 19 | #ifdef __cplusplus 20 | #define MEHCACHED_BEGIN extern "C" { 21 | #define MEHCACHED_END } 22 | #else 23 | #define MEHCACHED_BEGIN 24 | #define MEHCACHED_END 25 | #endif 26 | 27 | #define MEHCACHED_UNUSED __attribute__((unused)) 28 | #define MEHCACHED_WARN_UNUSED_RESULT __attribute__((warn_unused_result)) 29 | #define MEHCACHED_ALWAYS_INLINE __attribute__((always_inline)) 30 | 31 | #define MEHCACHED_ALIGNED(alignment) __attribute__ ((aligned (alignment))) 32 | 33 | #include "config.h" 34 | 35 | -------------------------------------------------------------------------------- /src/config.h: -------------------------------------------------------------------------------- 1 | // Copyright 2014 Carnegie Mellon University 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #pragma once 16 | 17 | // mehcached configuration 18 | 19 | // be verbose (only for debugging) 20 | //#define MEHCACHED_VERBOSE 21 | 22 | // use counters to collect statistics 23 | //#define MEHCACHED_COLLECT_STATS 24 | 25 | 26 | // support for concurrent access 27 | #define MEHCACHED_CONCURRENT 28 | 29 | 30 | // store mode 31 | // #define MEHCACHED_NO_EVICTION 32 | 33 | // use log-structured pool allocator (other MEHCACHED_ALLOC_* must be undef) 34 | //#define MEHCACHED_ALLOC_POOL 35 | #ifndef MEHCACHED_NO_EVICTION 36 | #define MEHCACHED_ALLOC_POOL 37 | #endif 38 | 39 | // use malloc allocator for each item (other MEHCACHED_ALLOC_* must be undef) 40 | //#define MEHCACHED_ALLOC_MALLOC 41 | 42 | // use custom dynamic allocator for each item (other MEHCACHED_ALLOC_* must be undef) 43 | //#define MEHCACHED_ALLOC_DYNAMIC 44 | #ifdef MEHCACHED_NO_EVICTION 45 | #define MEHCACHED_ALLOC_DYNAMIC 46 | #endif 47 | 48 | -------------------------------------------------------------------------------- /src/hash.c: -------------------------------------------------------------------------------- 1 | // Copyright 2014 Carnegie Mellon University 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #include "hash.h" 16 | 17 | // random vector from http://home.comcast.net/~bretm/hash/10.html 18 | const uint32_t sbox[] = 19 | { 20 | 0xF53E1837, 0x5F14C86B, 0x9EE3964C, 0xFA796D53, 21 | 0x32223FC3, 0x4D82BC98, 0xA0C7FA62, 0x63E2C982, 22 | 0x24994A5B, 0x1ECE7BEE, 0x292B38EF, 0xD5CD4E56, 23 | 0x514F4303, 0x7BE12B83, 0x7192F195, 0x82DC7300, 24 | 0x084380B4, 0x480B55D3, 0x5F430471, 0x13F75991, 25 | 0x3F9CF22C, 0x2FE0907A, 0xFD8E1E69, 0x7B1D5DE8, 26 | 0xD575A85C, 0xAD01C50A, 0x7EE00737, 0x3CE981E8, 27 | 0x0E447EFA, 0x23089DD6, 0xB59F149F, 0x13600EC7, 28 | 0xE802C8E6, 0x670921E4, 0x7207EFF0, 0xE74761B0, 29 | 0x69035234, 0xBFA40F19, 0xF63651A0, 0x29E64C26, 30 | 0x1F98CCA7, 0xD957007E, 0xE71DDC75, 0x3E729595, 31 | 0x7580B7CC, 0xD7FAF60B, 0x92484323, 0xA44113EB, 32 | 0xE4CBDE08, 0x346827C9, 0x3CF32AFA, 0x0B29BCF1, 33 | 0x6E29F7DF, 0xB01E71CB, 0x3BFBC0D1, 0x62EDC5B8, 34 | 0xB7DE789A, 0xA4748EC9, 0xE17A4C4F, 0x67E5BD03, 35 | 0xF3B33D1A, 0x97D8D3E9, 0x09121BC0, 0x347B2D2C, 36 | 0x79A1913C, 0x504172DE, 0x7F1F8483, 0x13AC3CF6, 37 | 0x7A2094DB, 0xC778FA12, 0xADF7469F, 0x21786B7B, 38 | 0x71A445D0, 0xA8896C1B, 0x656F62FB, 0x83A059B3, 39 | 0x972DFE6E, 0x4122000C, 0x97D9DA19, 0x17D5947B, 40 | 0xB1AFFD0C, 0x6EF83B97, 0xAF7F780B, 0x4613138A, 41 | 0x7C3E73A6, 0xCF15E03D, 0x41576322, 0x672DF292, 42 | 0xB658588D, 0x33EBEFA9, 0x938CBF06, 0x06B67381, 43 | 0x07F192C6, 0x2BDA5855, 0x348EE0E8, 0x19DBB6E3, 44 | 0x3222184B, 0xB69D5DBA, 0x7E760B88, 0xAF4D8154, 45 | 0x007A51AD, 0x35112500, 0xC9CD2D7D, 0x4F4FB761, 46 | 0x694772E3, 0x694C8351, 0x4A7E3AF5, 0x67D65CE1, 47 | 0x9287DE92, 0x2518DB3C, 0x8CB4EC06, 0xD154D38F, 48 | 0xE19A26BB, 0x295EE439, 0xC50A1104, 0x2153C6A7, 49 | 0x82366656, 0x0713BC2F, 0x6462215A, 0x21D9BFCE, 50 | 0xBA8EACE6, 0xAE2DF4C1, 0x2A8D5E80, 0x3F7E52D1, 51 | 0x29359399, 0xFEA1D19C, 0x18879313, 0x455AFA81, 52 | 0xFADFE838, 0x62609838, 0xD1028839, 0x0736E92F, 53 | 0x3BCA22A3, 0x1485B08A, 0x2DA7900B, 0x852C156D, 54 | 0xE8F24803, 0x00078472, 0x13F0D332, 0x2ACFD0CF, 55 | 0x5F747F5C, 0x87BB1E2F, 0xA7EFCB63, 0x23F432F0, 56 | 0xE6CE7C5C, 0x1F954EF6, 0xB609C91B, 0x3B4571BF, 57 | 0xEED17DC0, 0xE556CDA0, 0xA7846A8D, 0xFF105F94, 58 | 0x52B7CCDE, 0x0E33E801, 0x664455EA, 0xF2C70414, 59 | 0x73E7B486, 0x8F830661, 0x8B59E826, 0xBB8AEDCA, 60 | 0xF3D70AB9, 0xD739F2B9, 0x4A04C34A, 0x88D0F089, 61 | 0xE02191A2, 0xD89D9C78, 0x192C2749, 0xFC43A78F, 62 | 0x0AAC88CB, 0x9438D42D, 0x9E280F7A, 0x36063802, 63 | 0x38E8D018, 0x1C42A9CB, 0x92AAFF6C, 0xA24820C5, 64 | 0x007F077F, 0xCE5BC543, 0x69668D58, 0x10D6FF74, 65 | 0xBE00F621, 0x21300BBE, 0x2E9E8F46, 0x5ACEA629, 66 | 0xFA1F86C7, 0x52F206B8, 0x3EDF1A75, 0x6DA8D843, 67 | 0xCF719928, 0x73E3891F, 0xB4B95DD6, 0xB2A42D27, 68 | 0xEDA20BBF, 0x1A58DBDF, 0xA449AD03, 0x6DDEF22B, 69 | 0x900531E6, 0x3D3BFF35, 0x5B24ABA2, 0x472B3E4C, 70 | 0x387F2D75, 0x4D8DBA36, 0x71CB5641, 0xE3473F3F, 71 | 0xF6CD4B7F, 0xBF7D1428, 0x344B64D0, 0xC5CDFCB6, 72 | 0xFE2E0182, 0x2C37A673, 0xDE4EB7A3, 0x63FDC933, 73 | 0x01DC4063, 0x611F3571, 0xD167BFAF, 0x4496596F, 74 | 0x3DEE0689, 0xD8704910, 0x7052A114, 0x068C9EC5, 75 | 0x75D0E766, 0x4D54CC20, 0xB44ECDE2, 0x4ABC653E, 76 | 0x2C550A21, 0x1A52C0DB, 0xCFED03D0, 0x119BAFE2, 77 | 0x876A6133, 0xBC232088, 0x435BA1B2, 0xAE99BBFA, 78 | 0xBB4F08E4, 0xA62B5F49, 0x1DA4B695, 0x336B84DE, 79 | 0xDC813D31, 0x00C134FB, 0x397A98E6, 0x151F0E64, 80 | 0xD9EB3E69, 0xD3C7DF60, 0xD2F2C336, 0x2DDD067B, 81 | 0xBD122835, 0xB0B3BD3A, 0xB0D54E46, 0x8641F1E4, 82 | 0xA0B38F96, 0x51D39199, 0x37A6AD75, 0xDF84EE41, 83 | 0x3C034CBA, 0xACDA62FC, 0x11923B8B, 0x45EF170A, 84 | }; 85 | 86 | -------------------------------------------------------------------------------- /src/hash.h: -------------------------------------------------------------------------------- 1 | // Copyright 2014 Carnegie Mellon University 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #pragma once 16 | 17 | #include 18 | #include 19 | #include 20 | #include "citycrc.h" 21 | 22 | /* 23 | static uint32_t crc32_le(uint32_t crc, const uint8_t *data, size_t len) 24 | { 25 | // SSE 4.2 & 64-bit required 26 | size_t words = len >> 3; 27 | size_t tail = len & 7; 28 | while (words) 29 | { 30 | words--; 31 | crc = (uint32_t)__builtin_ia32_crc32di(crc, *(const uint64_t *)data); 32 | data += 8; 33 | } 34 | while (tail) 35 | { 36 | tail--; 37 | crc = __builtin_ia32_crc32qi(crc, *data); 38 | data++; 39 | } 40 | return crc; 41 | } 42 | */ 43 | 44 | extern const uint32_t sbox[]; 45 | 46 | static uint64_t tab_hash(const uint8_t *key, size_t len) 47 | { 48 | // a large prime number 49 | uint32_t h = 4294967291U; 50 | while (len) 51 | { 52 | len--; 53 | // tabulation hashing -- Carter and Wegman (STOC'77) 54 | h ^= sbox[*key]; 55 | key++; 56 | } 57 | return (uint64_t)h; 58 | } 59 | 60 | static uint64_t sbox_hash(uint8_t *key, size_t len) 61 | { 62 | // a large prime number 63 | uint32_t h = 4294967291U; 64 | while (len) 65 | { 66 | len--; 67 | h ^= sbox[*key]; 68 | h *= 3; 69 | key++; 70 | } 71 | return (uint64_t)h; 72 | } 73 | 74 | static uint64_t noop_hash(const uint8_t *key, size_t len) 75 | { 76 | assert(len == sizeof(uint64_t)); 77 | (void)len; 78 | return *(uint64_t *)key; 79 | } 80 | 81 | static uint64_t mul_hash(const uint8_t *key, size_t len) 82 | { 83 | assert(len == sizeof(uint64_t)); 84 | (void)len; 85 | // a large prime number 86 | return *(uint64_t *)key * 18446744073709551557UL; 87 | } 88 | 89 | // MD4 truncated to 12 B 90 | #include 91 | static uint64_t hash_md4(const uint8_t *key, size_t len) 92 | { 93 | size_t temp_hash[(MD4_DIGEST_LENGTH + sizeof(size_t) - 1) / sizeof(size_t)]; 94 | MD4(key, len, (uint8_t *)temp_hash); 95 | assert(8 <= MD4_DIGEST_LENGTH); 96 | return *(size_t *)temp_hash; 97 | } 98 | 99 | static uint64_t hash(const uint8_t *key, size_t len) 100 | { 101 | //return noop_hash(key, len); 102 | //return mul_hash(key, len); 103 | //return tab_hash(key, len); 104 | //return (uint64_t)crc32_le(0xffffffff, key, len); 105 | //return (uint64_t)crc32_le(0xffffffff, key, len) * 18446744073709551557UL; 106 | //return sbox_hash(key, len); 107 | //return hash_md4(key, len); 108 | //return CityHashCrc128((const char *)key, len).first; 109 | return CityHash64((const char *)key, len); 110 | } 111 | 112 | -------------------------------------------------------------------------------- /src/load.c: -------------------------------------------------------------------------------- 1 | // Copyright 2014 Carnegie Mellon University 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #include 16 | #include 17 | 18 | #include "mehcached.h" 19 | #include "hash.h" 20 | 21 | void 22 | test_load() 23 | { 24 | printf("test_load()\n"); 25 | 26 | const size_t num_items = 1048576; 27 | //const size_t num_items = 1048576 * 10; 28 | 29 | struct mehcached_table table_o; 30 | struct mehcached_table *table = &table_o; 31 | size_t numa_nodes[] = {(size_t)-1}; 32 | size_t alloc_overhead = sizeof(struct mehcached_item); 33 | #ifdef MEHCACHED_ALLOC_DYNAMIC 34 | alloc_overhead += MEHCAHCED_DYNAMIC_OVERHEAD; 35 | #endif 36 | mehcached_table_init(table, (num_items + MEHCACHED_ITEMS_PER_BUCKET - 1) / MEHCACHED_ITEMS_PER_BUCKET, 1, num_items * /*MEHCACHED_ROUNDUP64*/(alloc_overhead + 8 + 8), false, false, false, numa_nodes[0], numa_nodes, MEHCACHED_MTH_THRESHOLD_FIFO); 37 | 38 | bool first_failure = false; 39 | size_t first_failure_i = 0; 40 | size_t success_count = 0; 41 | 42 | size_t i; 43 | for (i = 0; i < num_items; i++) 44 | { 45 | size_t key = i; 46 | size_t value = i; 47 | uint64_t key_hash = hash((const uint8_t *)&key, sizeof(key)); 48 | mehcached_set(0, table, key_hash, (const uint8_t *)&key, sizeof(key), (const uint8_t *)&value, sizeof(value), 0, false); 49 | } 50 | 51 | for (i = 0; i < num_items; i++) 52 | { 53 | size_t key = i; 54 | size_t value; 55 | size_t value_len = sizeof(value); 56 | uint64_t key_hash = hash((const uint8_t *)&key, sizeof(key)); 57 | 58 | if (mehcached_get(0, table, key_hash, (const uint8_t *)&key, sizeof(key), (uint8_t *)&value, &value_len, NULL, false)) 59 | success_count++; 60 | else 61 | { 62 | if (!first_failure) 63 | { 64 | first_failure = true; 65 | first_failure_i = i; 66 | } 67 | } 68 | } 69 | 70 | printf("first_failure: %zu (%.2f%%)\n", first_failure_i, 100. * (double)first_failure_i / (double)num_items); 71 | printf("success_count: %zu (%.2f%%)\n", success_count, 100. * (double)success_count / (double)num_items); 72 | 73 | //mehcached_print_buckets(table); 74 | mehcached_print_stats(table); 75 | 76 | mehcached_table_free(table); 77 | } 78 | 79 | int 80 | main(int argc MEHCACHED_UNUSED, const char *argv[] MEHCACHED_UNUSED) 81 | { 82 | const size_t page_size = 1048576 * 2; 83 | const size_t num_numa_nodes = 2; 84 | const size_t num_pages_to_try = 16384; 85 | const size_t num_pages_to_reserve = 16384 - 2048; // give 2048 pages to dpdk 86 | 87 | mehcached_shm_init(page_size, num_numa_nodes, num_pages_to_try, num_pages_to_reserve); 88 | 89 | test_load(); 90 | 91 | return EXIT_SUCCESS; 92 | } 93 | 94 | -------------------------------------------------------------------------------- /src/mehcached.h: -------------------------------------------------------------------------------- 1 | // Copyright 2014 Carnegie Mellon University 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #pragma once 16 | 17 | #include "table.c" 18 | #include "alloc_pool.c" 19 | #include "alloc_malloc.c" 20 | #include "alloc_dynamic.c" 21 | 22 | -------------------------------------------------------------------------------- /src/net_common.c: -------------------------------------------------------------------------------- 1 | // Copyright 2014 Carnegie Mellon University 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #include "net_common.h" 16 | #include "util.h" 17 | #include "stopwatch.h" 18 | 19 | #include 20 | #include 21 | #include 22 | 23 | #include 24 | #include 25 | #include 26 | #include 27 | #include 28 | #include 29 | 30 | #define MEHCACHED_MBUF_ENTRY_SIZE (2048 + sizeof(struct rte_mbuf) + RTE_PKTMBUF_HEADROOM) 31 | #define MEHCACHED_MBUF_SIZE (MEHCACHED_MAX_PORTS * MEHCACHED_MAX_QUEUES * 4096) // TODO: need to divide by numa node count 32 | 33 | #define MEHCACHED_MAX_PKT_BURST (32) 34 | 35 | #define MEHCACHED_RX_PTHRESH (8) 36 | #define MEHCACHED_RX_HTHRESH (8) 37 | #define MEHCACHED_RX_WTHRESH (4) 38 | 39 | #define MEHCACHED_TX_PTHRESH (36) 40 | #define MEHCACHED_TX_HTHRESH (0) 41 | #define MEHCACHED_TX_WTHRESH (0) 42 | 43 | #define RTE_TEST_RX_DESC_DEFAULT (128) 44 | #define RTE_TEST_TX_DESC_DEFAULT (512) 45 | static uint16_t mehcached_num_rx_desc = RTE_TEST_RX_DESC_DEFAULT; 46 | static uint16_t mehcached_num_tx_desc = RTE_TEST_TX_DESC_DEFAULT; 47 | 48 | //#define MEHCACHED_USE_QUICK_SLEEP 49 | //#define MEHCACHED_USE_DEEP_SLEEP 50 | 51 | static const struct rte_eth_conf mehcached_port_conf = { 52 | .rxmode = { 53 | .max_rx_pkt_len = ETHER_MAX_LEN, 54 | .split_hdr_size = 0, 55 | .header_split = 0, /**< Header Split disabled */ 56 | .hw_ip_checksum = 0, /**< IP checksum offload disabled */ 57 | .hw_vlan_filter = 0, /**< VLAN filtering disabled */ 58 | .jumbo_frame = 0, /**< Jumbo Frame Support disabled */ 59 | .hw_strip_crc = 0, /**< CRC stripped by hardware */ 60 | .mq_mode = ETH_MQ_RX_NONE, 61 | }, 62 | .txmode = { 63 | .mq_mode = ETH_MQ_TX_NONE, 64 | }, 65 | .fdir_conf = { 66 | //.mode = RTE_FDIR_MODE_NONE, 67 | .mode = RTE_FDIR_MODE_PERFECT, 68 | .pballoc = RTE_FDIR_PBALLOC_64K, 69 | //.pballoc = RTE_FDIR_PBALLOC_256K, 70 | #ifndef NDEBUG 71 | .status = RTE_FDIR_NO_REPORT_STATUS, 72 | #else 73 | .status = RTE_FDIR_REPORT_STATUS_ALWAYS, 74 | #endif 75 | .flexbytes_offset = 0, 76 | .drop_queue = 0, 77 | }, 78 | }; 79 | 80 | static const struct rte_eth_rxconf mehcached_rx_conf = { 81 | .rx_thresh = { 82 | .pthresh = MEHCACHED_RX_PTHRESH, 83 | .hthresh = MEHCACHED_RX_HTHRESH, 84 | .wthresh = MEHCACHED_RX_WTHRESH, 85 | }, 86 | .rx_free_thresh = 32, // for DPDK >= 1.3 87 | .rx_drop_en = 0, // (does not seem to be used) 88 | }; 89 | 90 | static const struct rte_eth_txconf mehcached_tx_conf = { 91 | .tx_thresh = { 92 | .pthresh = MEHCACHED_TX_PTHRESH, 93 | .hthresh = MEHCACHED_TX_HTHRESH, 94 | .wthresh = MEHCACHED_TX_WTHRESH, 95 | }, 96 | .tx_free_thresh = 0, /* Use PMD default values */ 97 | .tx_rs_thresh = 0, /* Use PMD default values */ 98 | #ifndef MEHCACHED_USE_SOFT_FDIR 99 | .txq_flags = (ETH_TXQ_FLAGS_NOMULTSEGS | ETH_TXQ_FLAGS_NOREFCOUNT | ETH_TXQ_FLAGS_NOMULTMEMP | ETH_TXQ_FLAGS_NOOFFLOADS), 100 | #else 101 | .txq_flags = (ETH_TXQ_FLAGS_NOMULTSEGS | ETH_TXQ_FLAGS_NOREFCOUNT | ETH_TXQ_FLAGS_NOOFFLOADS), 102 | #endif 103 | }; 104 | 105 | 106 | struct mehcached_queue_state { 107 | struct rte_mbuf *rx_mbufs[MEHCACHED_MAX_PKT_BURST]; 108 | uint16_t rx_length; 109 | uint16_t rx_next_to_use; 110 | 111 | #ifdef MEHCACHED_USE_QUICK_SLEEP 112 | uint16_t rx_quick_sleep; 113 | uint16_t rx_full_quick_sleep_count; 114 | #endif 115 | #ifdef MEHCACHED_USE_DEEP_SLEEP 116 | uint64_t rx_last_seen; 117 | uint64_t rx_deep_sleep_until; 118 | uint64_t rx_inter_batch_time; 119 | #endif 120 | 121 | struct rte_mbuf *tx_mbufs[MEHCACHED_MAX_PKT_BURST]; 122 | uint16_t tx_length; 123 | 124 | uint64_t num_rx_burst; 125 | uint64_t num_rx_received; 126 | 127 | uint64_t num_tx_burst; 128 | uint64_t num_tx_sent; 129 | uint64_t num_tx_dropped; 130 | } __rte_cache_aligned; 131 | 132 | static struct rte_mempool *mehcached_pktmbuf_pool[MEHCACHED_MAX_NUMA_NODES]; 133 | 134 | //static uint16_t mehcached_lcore_to_queue[MEHCACHED_MAX_LCORES]; 135 | //static struct ether_addr mehcached_eth_addr[MEHCACHED_MAX_PORTS]; 136 | 137 | static struct mehcached_queue_state *mehcached_queue_states[MEHCACHED_MAX_QUEUES * MEHCACHED_MAX_PORTS]; 138 | 139 | struct rte_mbuf * 140 | mehcached_packet_alloc() 141 | { 142 | return rte_pktmbuf_alloc(mehcached_pktmbuf_pool[rte_socket_id()]); 143 | } 144 | 145 | void 146 | mehcached_packet_free(struct rte_mbuf *mbuf) 147 | { 148 | rte_pktmbuf_free(mbuf); 149 | } 150 | 151 | struct rte_mbuf * 152 | mehcached_receive_packet(uint8_t port_id) 153 | { 154 | uint32_t lcore = rte_lcore_id(); 155 | // uint16_t queue = mehcached_lcore_to_queue[lcore]; 156 | // assert(queue != (uint16_t)-1); 157 | uint16_t queue = (uint16_t)lcore; 158 | struct mehcached_queue_state *state = mehcached_queue_states[queue * MEHCACHED_MAX_PORTS + port_id]; 159 | 160 | if (state->rx_next_to_use == state->rx_length) 161 | { 162 | #ifdef MEHCACHED_USE_QUICK_SLEEP 163 | if (state->rx_quick_sleep > 0) 164 | { 165 | // struct rte_mbuf *t = mehcached_packet_alloc(); 166 | // if (t == NULL) 167 | // printf("cannot alloc mbuf\n"); 168 | // mehcached_packet_free(t); 169 | state->rx_quick_sleep--; 170 | return NULL; 171 | } 172 | #endif 173 | 174 | #ifdef MEHCACHED_USE_DEEP_SLEEP 175 | uint64_t now = mehcached_stopwatch_now(); 176 | 177 | // too small value makes deep sleep ineffective 178 | // too large value may incorrectly penalize a queue with occasional underflows 179 | const uint64_t max_deep_sleep_time = mehcached_stopwatch_1_usec * 50; 180 | 181 | // still need to sleep? 182 | if (state->rx_deep_sleep_until - now <= max_deep_sleep_time) 183 | { 184 | // assumed invariant: rx_deep_sleep_until <= now + max_deep_sleep_time 185 | // (when no overflow happens) 186 | // the condition in the if statement checks the sleep time correctly under this invariant 187 | return NULL; 188 | } 189 | #endif 190 | 191 | state->rx_length = rte_eth_rx_burst(port_id, queue, state->rx_mbufs, MEHCACHED_MAX_PKT_BURST); 192 | state->num_rx_received += state->rx_length; 193 | state->rx_next_to_use = 0; 194 | state->num_rx_burst++; 195 | 196 | #ifdef MEHCACHED_USE_QUICK_SLEEP 197 | // sleep if no enough RX packets were received 198 | // this helps reduce PCIe traffic when # of RX packets is imbalanced across queues used by the same core 199 | state->rx_quick_sleep = (uint16_t)(MEHCACHED_MAX_PKT_BURST - state->rx_length); 200 | if (state->rx_length != 0) 201 | state->rx_full_quick_sleep_count = 0; 202 | else 203 | { 204 | if (state->rx_full_quick_sleep_count < 1024) 205 | state->rx_full_quick_sleep_count++; 206 | state->rx_quick_sleep = (uint16_t)(state->rx_quick_sleep * state->rx_full_quick_sleep_count); 207 | } 208 | 209 | #endif 210 | 211 | #ifdef MEHCACHED_USE_DEEP_SLEEP 212 | uint64_t to_sleep; 213 | uint64_t inter_batch_time; 214 | 215 | // adjust sleep time so that the next rx_burst can get MEHCACHED_MAX_PKT_BURST packets 216 | // note (state->rx_length + 1): this makes inter_batch_time slightly smaller than actual expectation 217 | // because we do not know whether there are additional subsequent batches 218 | inter_batch_time = (now - state->rx_last_seen) * MEHCACHED_MAX_PKT_BURST / (state->rx_length + 1); 219 | if (inter_batch_time > max_deep_sleep_time) 220 | inter_batch_time = max_deep_sleep_time; 221 | state->rx_last_seen = now; 222 | 223 | state->rx_inter_batch_time = (state->rx_inter_batch_time * 7 + inter_batch_time * 1) / 8; 224 | 225 | // deep sleep to prevent excessive PCIe traffic when RX across cores is imbalanced 226 | state->rx_deep_sleep_until = now + state->rx_inter_batch_time; 227 | 228 | // for debugging batch size 229 | // if ((state->num_rx_burst & 0xffffUL) == 0) 230 | // { 231 | // printf("port = %zu, queue = %zu; average_batch size = %lf, inter batch time = %lf us\n", port, queue, (double)state->num_rx_received / (double)state->num_rx_burst, (double)state->rx_inter_batch_time / (double)mehcached_stopwatch_1_usec); 232 | // state->num_rx_received = 0; 233 | // state->num_rx_burst = 0; 234 | // } 235 | #endif 236 | } 237 | 238 | if (state->rx_next_to_use < state->rx_length) 239 | { 240 | #ifndef NDEBUG 241 | //printf("mehcached_receive_packet: lcore=%zu, port=%zu, queue=%zu\n", lcore, port, queue); 242 | #endif 243 | return state->rx_mbufs[state->rx_next_to_use++]; 244 | } 245 | else 246 | return NULL; 247 | } 248 | 249 | void 250 | mehcached_receive_packets(uint8_t port_id, struct rte_mbuf **mbufs, size_t *in_out_num_mbufs) 251 | { 252 | uint32_t lcore = rte_lcore_id(); 253 | // uint16_t queue = mehcached_lcore_to_queue[lcore]; 254 | // assert(queue != (uint16_t)-1); 255 | uint16_t queue = (uint16_t)lcore; 256 | struct mehcached_queue_state *state = mehcached_queue_states[queue * MEHCACHED_MAX_PORTS + port_id]; 257 | 258 | *in_out_num_mbufs = (size_t)rte_eth_rx_burst(port_id, queue, mbufs, (uint16_t)*in_out_num_mbufs); 259 | state->num_rx_received += *in_out_num_mbufs; 260 | state->num_rx_burst++; 261 | } 262 | 263 | void 264 | mehcached_send_packet(uint8_t port_id, struct rte_mbuf *mbuf) 265 | { 266 | uint32_t lcore = rte_lcore_id(); 267 | // uint16_t queue = mehcached_lcore_to_queue[lcore]; 268 | // assert(queue != (uint16_t)-1); 269 | uint16_t queue = (uint16_t)lcore; 270 | struct mehcached_queue_state *state = mehcached_queue_states[queue * MEHCACHED_MAX_PORTS + port_id]; 271 | 272 | #ifndef NDEBUG 273 | //printf("mehcached_send_packet: lcore=%zu, port=%zu, queue=%zu\n", lcore, port, queue); 274 | #endif 275 | 276 | state->tx_mbufs[state->tx_length++] = mbuf; 277 | if (state->tx_length == MEHCACHED_MAX_PKT_BURST) 278 | { 279 | uint16_t count = rte_eth_tx_burst(port_id, queue, state->tx_mbufs, MEHCACHED_MAX_PKT_BURST); 280 | state->num_tx_sent += count; 281 | state->num_tx_dropped += (uint64_t)(MEHCACHED_MAX_PKT_BURST - count); 282 | for (; count < MEHCACHED_MAX_PKT_BURST; count++) 283 | rte_pktmbuf_free(state->tx_mbufs[count]); 284 | state->tx_length = 0; 285 | state->num_tx_burst++; 286 | } 287 | } 288 | 289 | void 290 | mehcached_send_packet_flush(uint8_t port_id) 291 | { 292 | uint32_t lcore = rte_lcore_id(); 293 | // uint16_t queue = mehcached_lcore_to_queue[lcore]; 294 | // assert(queue != (uint16_t)-1); 295 | uint16_t queue = (uint16_t)lcore; 296 | struct mehcached_queue_state *state = mehcached_queue_states[queue * MEHCACHED_MAX_PORTS + port_id]; 297 | 298 | if (state->tx_length > 0) 299 | { 300 | uint16_t count = rte_eth_tx_burst(port_id, queue, state->tx_mbufs, state->tx_length); 301 | state->num_tx_sent += count; 302 | state->num_tx_dropped += (uint64_t)(state->tx_length - count); 303 | for (; count < state->tx_length; count++) 304 | rte_pktmbuf_free(state->tx_mbufs[count]); 305 | state->tx_length = 0; 306 | state->num_tx_burst++; 307 | } 308 | } 309 | 310 | void 311 | mehcached_get_stats(uint8_t port_id, uint64_t *out_num_rx_burst, uint64_t *out_num_rx_received, uint64_t *out_num_tx_burst, uint64_t *out_num_tx_sent, uint64_t *out_num_tx_dropped) 312 | { 313 | mehcached_get_stats_lcore(port_id, rte_lcore_id(), out_num_rx_burst, out_num_rx_received, out_num_tx_burst, out_num_tx_sent, out_num_tx_dropped); 314 | } 315 | 316 | void 317 | mehcached_get_stats_lcore(uint8_t port_id, uint32_t lcore, uint64_t *out_num_rx_burst, uint64_t *out_num_rx_received, uint64_t *out_num_tx_burst, uint64_t *out_num_tx_sent, uint64_t *out_num_tx_dropped) 318 | { 319 | // uint16_t queue = mehcached_lcore_to_queue[lcore]; 320 | // assert(queue != (uint16_t)-1); 321 | uint16_t queue = (uint16_t)lcore; 322 | struct mehcached_queue_state *state = mehcached_queue_states[queue * MEHCACHED_MAX_PORTS + port_id]; 323 | 324 | if (out_num_rx_burst) 325 | *out_num_rx_burst = state->num_rx_burst; 326 | if (out_num_rx_received) 327 | *out_num_rx_received = state->num_rx_received; 328 | if (out_num_tx_burst) 329 | *out_num_tx_burst = state->num_tx_burst; 330 | if (out_num_tx_sent) 331 | *out_num_tx_sent = state->num_tx_sent; 332 | if (out_num_tx_dropped) 333 | *out_num_tx_dropped = state->num_tx_dropped; 334 | 335 | //struct rte_eth_stats stats; 336 | //rte_eth_stats_get(port, &stats); 337 | //printf("port %zu i %lu o %lu ie %lu oe %lu\n", port, stats.ipackets, stats.opackets, stats.ierrors, stats.oerrors); 338 | } 339 | 340 | struct rte_mbuf * 341 | mehcached_clone_packet(struct rte_mbuf *mbuf_src) 342 | { 343 | return rte_pktmbuf_clone(mbuf_src, mehcached_pktmbuf_pool[rte_socket_id()]); 344 | } 345 | 346 | bool 347 | mehcached_init_network(uint64_t cpu_mask, uint64_t port_mask, uint8_t *out_num_ports) 348 | { 349 | int ret; 350 | size_t i; 351 | 352 | size_t num_numa_nodes = 0; 353 | uint16_t num_queues = 0; 354 | 355 | assert(rte_lcore_count() <= MEHCACHED_MAX_LCORES); 356 | 357 | // count required queues 358 | for (i = 0; i < rte_lcore_count(); i++) 359 | { 360 | if ((cpu_mask & ((uint64_t)1 << i)) != 0) 361 | num_queues++; 362 | } 363 | assert(num_numa_nodes <= MEHCACHED_MAX_QUEUES); 364 | 365 | // count numa nodes 366 | for (i = 0; i < rte_lcore_count(); i++) 367 | { 368 | uint32_t socket_id = (uint32_t)rte_lcore_to_socket_id((unsigned int)i); 369 | if (num_numa_nodes <= socket_id) 370 | num_numa_nodes = socket_id + 1; 371 | } 372 | assert(num_numa_nodes <= MEHCACHED_MAX_NUMA_NODES); 373 | 374 | // initialize pktmbuf 375 | for (i = 0; i < num_numa_nodes; i++) 376 | { 377 | printf("allocating pktmbuf on node %zu... \n", i); 378 | char pool_name[64]; 379 | snprintf(pool_name, sizeof(pool_name), "pktmbuf_pool%zu", i); 380 | // if this is not big enough, RX/TX performance may not be consistent, e.g., between CREW and CRCW experiments 381 | // the maximum cache size can be adjusted in DPDK's .config file: CONFIG_RTE_MEMPOOL_CACHE_MAX_SIZE 382 | const unsigned int cache_size = MEHCACHED_MAX_PORTS * 1024; 383 | mehcached_pktmbuf_pool[i] = rte_mempool_create(pool_name, MEHCACHED_MBUF_SIZE, MEHCACHED_MBUF_ENTRY_SIZE, cache_size, sizeof(struct rte_pktmbuf_pool_private), rte_pktmbuf_pool_init, NULL, rte_pktmbuf_init, NULL, (int)i, 0); 384 | if (mehcached_pktmbuf_pool[i] == NULL) 385 | { 386 | fprintf(stderr, "failed to allocate mbuf for numa node %zu\n", i); 387 | return false; 388 | } 389 | } 390 | 391 | // initialize driver 392 | #ifdef RTE_LIBRTE_IXGBE_PMD 393 | printf("initializing PMD\n"); 394 | if (rte_ixgbe_pmd_init() < 0) 395 | { 396 | fprintf(stderr, "failed to initialize ixgbe pmd\n"); 397 | return false; 398 | } 399 | #endif 400 | 401 | printf("probing PCI\n"); 402 | if (rte_eal_pci_probe() < 0) 403 | { 404 | fprintf(stderr, "failed to probe PCI\n"); 405 | return false; 406 | } 407 | 408 | // TODO: initialize and set up timer for forced TX 409 | 410 | // check port and queue limits 411 | uint8_t num_ports = rte_eth_dev_count(); 412 | assert(num_ports <= MEHCACHED_MAX_PORTS); 413 | *out_num_ports = num_ports; 414 | 415 | printf("checking queue limits\n"); 416 | uint8_t port_id; 417 | for (port_id = 0; port_id < num_ports; port_id++) 418 | { 419 | if ((port_mask & ((uint64_t)1 << port_id)) == 0) 420 | continue; 421 | 422 | struct rte_eth_dev_info dev_info; 423 | rte_eth_dev_info_get((uint8_t)port_id, &dev_info); 424 | 425 | if (num_queues > dev_info.max_tx_queues || num_queues > dev_info.max_rx_queues) 426 | { 427 | fprintf(stderr, "device supports too few queues\n"); 428 | return false; 429 | } 430 | } 431 | 432 | // map queues to lcores 433 | uint32_t lcore = 0; 434 | // uint16_t queue = 0; 435 | // for (lcore = 0; lcore < rte_lcore_count(); lcore++) 436 | // { 437 | // if ((cpu_mask & ((uint64_t)1 << i)) == 0) 438 | // { 439 | // mehcached_lcore_to_queue[lcore] = (uint16_t)-1; 440 | // continue; 441 | // } 442 | 443 | // mehcached_lcore_to_queue[lcore] = queue; 444 | // #ifndef NDEBUG 445 | // printf("queue %hhu mapped to lcore %hu\n", queue, lcore); 446 | // #endif 447 | // queue++; 448 | // } 449 | 450 | // initialize ports 451 | for (port_id = 0; port_id < num_ports; port_id++) 452 | { 453 | if ((port_mask & ((uint64_t)1 << port_id)) == 0) 454 | continue; 455 | 456 | printf("initializing port %hhu...\n", port_id); 457 | 458 | // get mac address 459 | //rte_eth_macaddr_get((uint8_t)port, &mehcached_eth_addr[port]); 460 | 461 | ret = rte_eth_dev_configure(port_id, num_queues, num_queues, &mehcached_port_conf); 462 | if (ret < 0) 463 | { 464 | fprintf(stderr, "failed to configure port %hhu (err=%d)\n", port_id, ret); 465 | return false; 466 | } 467 | 468 | uint32_t lcore; 469 | for (lcore = 0; lcore < rte_lcore_count(); lcore++) 470 | { 471 | // uint16_t queue = mehcached_lcore_to_queue[lcore]; 472 | // if (queue == (uint16_t)-1) 473 | // continue; 474 | uint16_t queue = (uint16_t)lcore; 475 | 476 | size_t numa_node = rte_lcore_to_socket_id((unsigned int)lcore); 477 | 478 | ret = rte_eth_rx_queue_setup(port_id, queue, (unsigned int)mehcached_num_rx_desc, (unsigned int)numa_node, &mehcached_rx_conf, mehcached_pktmbuf_pool[numa_node]); 479 | if (ret < 0) 480 | { 481 | fprintf(stderr, "failed to configure port %hhu rx_queue %hu (err=%d)\n", port_id, queue, ret); 482 | return false; 483 | } 484 | 485 | ret = rte_eth_tx_queue_setup(port_id, queue, (unsigned int)mehcached_num_tx_desc, (unsigned int)numa_node, &mehcached_tx_conf); 486 | if (ret < 0) 487 | { 488 | fprintf(stderr, "failed to configure port %hhu tx_queue %hu (err=%d)\n", port_id, queue, ret); 489 | return false; 490 | } 491 | } 492 | 493 | // start device 494 | ret = rte_eth_dev_start(port_id); 495 | if (ret < 0) 496 | { 497 | fprintf(stderr, "failed to start port %hhu (err=%d)\n", port_id, ret); 498 | return false; 499 | } 500 | 501 | // // turn on promiscuous mode 502 | // #ifndef NDEBUG 503 | // printf("setting promiscuous mode on port %hhu...\n", port_id); 504 | // #endif 505 | // rte_eth_promiscuous_enable(port_id); 506 | } 507 | 508 | // the following takes some time, but this ensures the device ready for full speed RX/TX when the initialization is done 509 | // without this, the initial packet transmission may be blocked 510 | for (port_id = 0; port_id < num_ports; port_id++) 511 | { 512 | if ((port_mask & ((uint64_t)1 << port_id)) == 0) 513 | continue; 514 | 515 | printf("querying port %hhu... ", port_id); 516 | fflush(stdout); 517 | 518 | struct rte_eth_link link; 519 | rte_eth_link_get(port_id, &link); 520 | if (!link.link_status) 521 | { 522 | printf("link down\n"); 523 | return false; 524 | } 525 | 526 | printf("%hu Gbps (%s)\n", link.link_speed / 1000, (link.link_duplex == ETH_LINK_FULL_DUPLEX) ? ("full-duplex") : ("half-duplex")); 527 | } 528 | 529 | memset(mehcached_queue_states, 0, sizeof(mehcached_queue_states)); 530 | for (port_id = 0; port_id < num_ports; port_id++) 531 | for (lcore = 0; lcore < rte_lcore_count(); lcore++) 532 | { 533 | uint16_t queue = (uint16_t)lcore; 534 | mehcached_queue_states[queue * MEHCACHED_MAX_PORTS + port_id] = mehcached_eal_malloc_lcore(sizeof(struct mehcached_queue_state), lcore); 535 | memset(mehcached_queue_states[queue * MEHCACHED_MAX_PORTS + port_id], 0, sizeof(struct mehcached_queue_state)); 536 | } 537 | 538 | return true; 539 | } 540 | 541 | void 542 | mehcached_free_network(uint64_t port_mask) 543 | { 544 | uint8_t port_id; 545 | uint8_t num_ports = rte_eth_dev_count(); 546 | 547 | for (port_id = 0; port_id < num_ports; port_id++) 548 | { 549 | if ((port_mask & ((uint64_t)1 << port_id)) == 0) 550 | continue; 551 | 552 | printf("stopping port %hhu...\n", port_id); 553 | rte_eth_dev_stop(port_id); 554 | } 555 | 556 | for (port_id = 0; port_id < num_ports; port_id++) 557 | { 558 | if ((port_mask & ((uint64_t)1 << port_id)) == 0) 559 | continue; 560 | 561 | printf("closing port %hhu...\n", port_id); 562 | rte_eth_dev_close(port_id); 563 | } 564 | } 565 | 566 | bool 567 | mehcached_set_dst_port_mask(uint8_t port_id, uint16_t l4_dst_port_mask) 568 | { 569 | struct rte_fdir_masks mask; 570 | memset(&mask, 0, sizeof(mask)); 571 | mask.dst_port_mask = l4_dst_port_mask; // this must be little-endian (host) 572 | 573 | int ret = rte_eth_dev_fdir_set_masks(port_id, &mask); 574 | if (ret < 0) 575 | { 576 | fprintf(stderr, "failed to set perfect filter mask on port %hhu (err=%d)\n", port_id, ret); 577 | return false; 578 | } 579 | 580 | return true; 581 | } 582 | 583 | bool 584 | mehcached_set_dst_port_mapping(uint8_t port_id, uint16_t l4_dst_port, uint32_t lcore) 585 | { 586 | // uint16_t queue = mehcached_lcore_to_queue[lcore]; 587 | // if (queue == (uint16_t)-1) 588 | // { 589 | // fprintf(stderr, "no queue on port %hhu exists for lcore %u\n", port_id, lcore); 590 | // return false; 591 | // } 592 | uint16_t queue = (uint16_t)lcore; 593 | 594 | struct rte_fdir_filter filter; 595 | memset(&filter, 0, sizeof(filter)); 596 | filter.iptype = RTE_FDIR_IPTYPE_IPV4; 597 | filter.l4type = RTE_FDIR_L4TYPE_UDP; 598 | filter.port_dst = rte_cpu_to_be_16((uint16_t)l4_dst_port); // this must be big-endian 599 | uint16_t soft_id = (uint16_t)l4_dst_port; // will be unique on each port (with perfect filter) 600 | 601 | int ret = rte_eth_dev_fdir_add_perfect_filter(port_id, &filter, soft_id, (uint8_t)queue, 0); 602 | if (ret < 0) 603 | { 604 | fprintf(stderr, "failed to add perfect filter entry on port %hhu (err=%d)\n", port_id, ret); 605 | return false; 606 | } 607 | 608 | return true; 609 | } 610 | -------------------------------------------------------------------------------- /src/net_common.h: -------------------------------------------------------------------------------- 1 | // Copyright 2014 Carnegie Mellon University 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #pragma once 16 | 17 | #include "common.h" 18 | 19 | #include 20 | 21 | #define MEHCACHED_MAX_LCORES (16) 22 | #define MEHCACHED_MAX_NUMA_NODES (2) 23 | 24 | #define MEHCACHED_MAX_PORTS (8) 25 | #define MEHCACHED_MAX_QUEUES (16) 26 | 27 | struct rte_mbuf * 28 | mehcached_packet_alloc(); 29 | 30 | void 31 | mehcached_packet_free(struct rte_mbuf *mbuf); 32 | 33 | struct rte_mbuf * 34 | mehcached_receive_packet(uint8_t port_id); 35 | 36 | void 37 | mehcached_receive_packets(uint8_t port_id, struct rte_mbuf **mbufs, size_t *in_out_num_mbufs); 38 | 39 | void 40 | mehcached_send_packet(uint8_t port_id, struct rte_mbuf *mbuf); 41 | 42 | void 43 | mehcached_send_packet_flush(uint8_t port_id); 44 | 45 | void 46 | mehcached_get_stats(uint8_t port_id, uint64_t *out_num_rx_burst, uint64_t *out_num_rx_received, uint64_t *out_num_tx_burst, uint64_t *out_num_tx_sent, uint64_t *out_num_tx_dropped); 47 | 48 | void 49 | mehcached_get_stats_lcore(uint8_t port_id, uint32_t lcore, uint64_t *out_num_rx_burst, uint64_t *out_num_rx_received, uint64_t *out_num_tx_burst, uint64_t *out_num_tx_sent, uint64_t *out_num_tx_dropped); 50 | 51 | struct rte_mbuf * 52 | mehcached_clone_packet(struct rte_mbuf *mbuf_src); 53 | 54 | bool 55 | mehcached_init_network(uint64_t cpu_mask, uint64_t port_mask, uint8_t *out_num_ports); 56 | 57 | void 58 | mehcached_free_network(uint64_t port_mask); 59 | 60 | bool 61 | mehcached_set_dst_port_mask(uint8_t port_id, uint16_t l4_dst_port_mask); 62 | 63 | bool 64 | mehcached_set_dst_port_mapping(uint8_t port_id, uint16_t l4_dst_port, uint32_t lcore); 65 | -------------------------------------------------------------------------------- /src/netbench_analysis.c: -------------------------------------------------------------------------------- 1 | // Copyright 2014 Carnegie Mellon University 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | 22 | #include "mehcached.h" 23 | #include "hash.h" 24 | #include "zipf.h" 25 | #include "stopwatch.h" 26 | #include "netbench_config.h" 27 | 28 | // uncomment this to use CREW instead of EREW 29 | //#define MEHCACHED_LOAD_BALANCE_USE_CREW_PARTITIONS 30 | 31 | static 32 | uint64_t 33 | mehcached_hash_key(uint64_t int_key) 34 | { 35 | return hash((const uint8_t *)&int_key, 8); 36 | } 37 | 38 | static 39 | void 40 | mehcached_print_array_uint64_t(uint64_t arr[], size_t num_elements) 41 | { 42 | size_t i; 43 | for (i = 0; i < num_elements; i++) 44 | printf("[%3zu]%lu ", i, arr[i]); 45 | printf("\n"); 46 | } 47 | 48 | static 49 | void 50 | mehcached_print_array_normalized(uint64_t arr[], size_t num_elements) 51 | { 52 | size_t i; 53 | uint64_t max_elem = 0; 54 | uint64_t min_elem = (uint64_t)-1; 55 | uint64_t sum_elem = 0; 56 | for (i = 0; i < num_elements; i++) 57 | { 58 | if (max_elem < arr[i]) 59 | max_elem = arr[i]; 60 | if (min_elem > arr[i]) 61 | min_elem = arr[i]; 62 | sum_elem += arr[i]; 63 | } 64 | if (max_elem == 0) 65 | max_elem = 1; // to avoid divide by zero 66 | for (i = 0; i < num_elements; i++) 67 | { 68 | printf("[%3zu]%lf", i, (double)arr[i] / (double)max_elem); 69 | if (i % 8 != 7) 70 | printf(" "); 71 | else if (i != num_elements - 1) 72 | printf("\n"); 73 | } 74 | printf("\n"); 75 | printf("min = %lf\n", (double)min_elem / (double)max_elem); 76 | printf("avg = %lf\n", (double)sum_elem / (double)max_elem / (double)num_elements); 77 | } 78 | 79 | static 80 | void 81 | mehcached_calc_thread_load(uint64_t out_thread_load[], const uint64_t partition_load[], const uint64_t hot_item_load[], const uint8_t partition_to_thread_org[], const uint8_t hot_item_to_thread_org[], const uint8_t partition_to_thread_new[], const uint8_t hot_item_to_thread_new[], uint8_t num_threads, uint64_t num_partitions, uint64_t num_hot_items, uint8_t num_numa_nodes, double get_ratio, bool isolated_server_numa_nodes) 82 | { 83 | uint16_t partition_id; 84 | uint8_t thread_id; 85 | uint64_t key; 86 | 87 | #ifndef MEHCACHED_LOAD_BALANCE_USE_CREW_PARTITIONS 88 | (void)partition_to_thread_org; 89 | #endif 90 | 91 | for (thread_id = 0; thread_id < num_threads; thread_id++) 92 | out_thread_load[thread_id] = 0; 93 | 94 | for (partition_id = 0; partition_id < num_partitions; partition_id++) 95 | { 96 | #ifndef MEHCACHED_LOAD_BALANCE_USE_CREW_PARTITIONS 97 | // EREW 98 | thread_id = partition_to_thread_new[partition_id]; 99 | out_thread_load[thread_id] += partition_load[partition_id]; 100 | #else 101 | // CR 102 | uint8_t numa_node_id = (uint8_t)(partition_to_thread_org[partition_id] & 1); 103 | for (thread_id = 0; thread_id < num_threads; thread_id++) 104 | { 105 | if (isolated_server_numa_nodes) 106 | { 107 | if (thread_id % num_numa_nodes != numa_node_id) 108 | continue; 109 | out_thread_load[thread_id] += (uint64_t)((double)partition_load[partition_id] * get_ratio / (double)(num_threads / num_numa_nodes)); 110 | } 111 | else 112 | out_thread_load[thread_id] += (uint64_t)((double)partition_load[partition_id] * get_ratio / (double)num_threads); 113 | } 114 | // EW 115 | thread_id = partition_to_thread_new[partition_id]; 116 | out_thread_load[thread_id] += (uint64_t)((double)partition_load[partition_id] * (1. - get_ratio)); 117 | #endif 118 | } 119 | 120 | for (key = 0; key < num_hot_items; key++) 121 | { 122 | // CR 123 | uint8_t numa_node_id = (uint8_t)(hot_item_to_thread_org[key] & 1); 124 | for (thread_id = 0; thread_id < num_threads; thread_id++) 125 | { 126 | if (isolated_server_numa_nodes) 127 | { 128 | if (thread_id % num_numa_nodes != numa_node_id) 129 | continue; 130 | out_thread_load[thread_id] += (uint64_t)((double)hot_item_load[key] * get_ratio / (double)(num_threads / num_numa_nodes)); 131 | } 132 | else 133 | out_thread_load[thread_id] += (uint64_t)((double)hot_item_load[key] * get_ratio / (double)num_threads); 134 | } 135 | // EW 136 | thread_id = hot_item_to_thread_new[key]; 137 | out_thread_load[thread_id] += (uint64_t)((double)hot_item_load[key] * (1. - get_ratio)); 138 | } 139 | } 140 | 141 | static 142 | void 143 | mehcached_load_balance(const uint64_t partition_load[], const uint64_t hot_item_load[], const uint8_t partition_to_thread_org[], uint8_t out_partition_to_thread_new[], const uint8_t hot_item_to_thread_org[], uint8_t out_hot_item_to_thread_new[], uint8_t num_threads, uint64_t num_partitions, uint64_t num_hot_items, uint8_t num_numa_nodes, double get_ratio, bool isolated_server_numa_nodes) 144 | { 145 | uint64_t max_num_entries = num_partitions + num_hot_items; 146 | size_t entry_type[max_num_entries]; 147 | size_t entry_id[max_num_entries]; 148 | uint64_t entry_load[max_num_entries]; 149 | 150 | uint8_t numa_node_id; 151 | for (numa_node_id = 0; numa_node_id < num_numa_nodes; numa_node_id++) 152 | { 153 | uint16_t partition_id; 154 | uint64_t key; 155 | 156 | // enumerate all entries to consider 157 | uint64_t num_entries = 0; 158 | for (partition_id = 0; partition_id < num_partitions; partition_id++) 159 | { 160 | if (partition_to_thread_org[partition_id] % num_numa_nodes != numa_node_id) 161 | continue; 162 | entry_type[num_entries] = 0; 163 | entry_id[num_entries] = partition_id; 164 | entry_load[num_entries] = partition_load[partition_id]; 165 | num_entries++; 166 | } 167 | 168 | for (key = 0; key < num_hot_items; key++) 169 | { 170 | if (hot_item_to_thread_org[key] % num_numa_nodes != numa_node_id) 171 | continue; 172 | entry_type[num_entries] = 1; 173 | entry_id[num_entries] = key; 174 | entry_load[num_entries] = hot_item_load[key]; 175 | num_entries++; 176 | } 177 | 178 | 179 | uint64_t thread_load[num_threads]; 180 | 181 | uint8_t thread_id; 182 | for (thread_id = 0; thread_id < num_threads; thread_id++) 183 | thread_load[thread_id] = 0; 184 | 185 | uint64_t i; 186 | uint64_t j; 187 | 188 | #ifdef MEHCACHED_LOAD_BALANCE_USE_CREW_PARTITIONS 189 | // apply concurrent read load from partitions (CREW) 190 | for (i = 0; i < num_entries; i++) 191 | if (entry_type[i] == 0) 192 | { 193 | partition_id = (uint16_t)entry_id[i]; 194 | for (thread_id = 0; thread_id < num_threads; thread_id++) 195 | { 196 | if (isolated_server_numa_nodes) 197 | { 198 | if (thread_id % num_numa_nodes != numa_node_id) 199 | continue; 200 | else 201 | thread_load[thread_id] += (uint64_t)((double)entry_load[i] * get_ratio / (double)(num_threads / num_numa_nodes)); 202 | } 203 | else 204 | thread_load[thread_id] += (uint64_t)((double)entry_load[i] * get_ratio / (double)num_threads); 205 | } 206 | entry_load[i] = (uint64_t)((double)entry_load[i] * (1. - get_ratio)); 207 | } 208 | #endif 209 | 210 | // apply concurrent read load from hot items (CREW) 211 | for (i = 0; i < num_entries; i++) 212 | if (entry_type[i] == 1) 213 | { 214 | key = entry_id[i]; 215 | for (thread_id = 0; thread_id < num_threads; thread_id++) 216 | { 217 | if (isolated_server_numa_nodes) 218 | { 219 | if (thread_id % num_numa_nodes != numa_node_id) 220 | continue; 221 | else 222 | thread_load[thread_id] += (uint64_t)((double)entry_load[i] * get_ratio / (double)(num_threads / num_numa_nodes)); 223 | } 224 | else 225 | thread_load[thread_id] += (uint64_t)((double)entry_load[i] * get_ratio / (double)num_threads); 226 | } 227 | entry_load[i] = (uint64_t)((double)entry_load[i] * (1. - get_ratio)); 228 | } 229 | 230 | // guarantee non-zero load to spread partitions across cores even when all access can be done by any core 231 | for (i = 0; i < num_entries; i++) 232 | if (entry_load[i] == 0) 233 | entry_load[i] = 1; 234 | 235 | // sort in descending order 236 | for (i = 0; i < num_entries; i++) 237 | for (j = i + 1; j < num_entries; j++) 238 | { 239 | if (entry_load[i] < entry_load[j]) 240 | { 241 | size_t t0 = entry_type[i]; 242 | entry_type[i] = entry_type[j]; 243 | entry_type[j] = t0; 244 | t0 = entry_id[i]; 245 | entry_id[i] = entry_id[j]; 246 | entry_id[j] = t0; 247 | uint64_t t1 = entry_load[i]; 248 | entry_load[i] = entry_load[j]; 249 | entry_load[j] = t1; 250 | } 251 | } 252 | 253 | // best fit 254 | for (i = 0; i < num_entries; i++) 255 | { 256 | uint8_t min_load_thread_id = numa_node_id; 257 | for (thread_id = 0; thread_id < num_threads; thread_id++) 258 | { 259 | if (thread_id % num_numa_nodes != numa_node_id) // do not move anything across NUMA nodes 260 | continue; 261 | if (thread_load[min_load_thread_id] > thread_load[thread_id]) 262 | min_load_thread_id = thread_id; 263 | } 264 | 265 | if (entry_type[i] == 0) 266 | { 267 | //min_load_thread_id = out_partition_to_thread[entry_id[i]]; // uncomment this when we are doing object remapping only 268 | out_partition_to_thread_new[entry_id[i]] = min_load_thread_id; 269 | } 270 | else 271 | out_hot_item_to_thread_new[entry_id[i]] = min_load_thread_id; 272 | thread_load[min_load_thread_id] += entry_load[i]; 273 | } 274 | } 275 | } 276 | 277 | static 278 | void 279 | mehcached_benchmark_analysis(uint64_t num_hot_items, double zipf_theta, double get_ratio, bool isolated_server_numa_nodes) 280 | { 281 | printf("num_hot_items = %lu\n", num_hot_items); 282 | printf("zipf_theta = %lf\n", zipf_theta); 283 | printf("get_ratio = %lf\n", get_ratio); 284 | printf("\n"); 285 | 286 | uint8_t num_numa_nodes = 2; 287 | uint8_t num_threads = 16; 288 | //uint16_t num_partitions = 64; 289 | uint16_t num_partitions = 16; 290 | uint64_t num_items = 192 * 1048576; 291 | 292 | uint64_t partition_load[num_partitions]; 293 | uint64_t hot_item_load[num_hot_items]; 294 | memset(partition_load, 0, sizeof(partition_load)); 295 | memset(hot_item_load, 0, sizeof(hot_item_load)); 296 | 297 | uint8_t partition_to_thread_org[num_partitions]; 298 | uint8_t partition_to_thread_new[num_partitions]; 299 | uint8_t hot_item_to_thread_org[num_hot_items]; 300 | uint8_t hot_item_to_thread_new[num_hot_items]; 301 | 302 | uint16_t partition_id; 303 | uint64_t key; 304 | uint64_t key_hash; 305 | 306 | uint64_t i; 307 | uint64_t num_samples = 1048576; 308 | 309 | // measure load 310 | struct zipf_gen_state zipf_state; 311 | mehcached_zipf_init(&zipf_state, num_items, zipf_theta, 0); 312 | 313 | for (i = 0; i < num_samples; i++) 314 | { 315 | key = mehcached_zipf_next(&zipf_state); 316 | 317 | key_hash = mehcached_hash_key(key); 318 | partition_id = (uint16_t)(key_hash >> 48) & (uint16_t)(num_partitions - 1); 319 | 320 | if (key < num_hot_items) 321 | hot_item_load[key]++; 322 | else 323 | partition_load[partition_id]++; 324 | } 325 | 326 | // fix zero load (e.g., for single key workloads) 327 | for (partition_id = 0; partition_id < num_partitions; partition_id++) 328 | if (partition_load[partition_id] == 0) 329 | partition_load[partition_id] = 1; 330 | for (key = 0; key < num_hot_items; key++) 331 | if (hot_item_load[key] == 0) 332 | hot_item_load[key] = 1; 333 | 334 | // initial mapping 335 | for (partition_id = 0; partition_id < num_partitions; partition_id++) 336 | { 337 | partition_to_thread_org[partition_id] = (uint8_t)(partition_id % num_threads); 338 | partition_to_thread_new[partition_id] = partition_to_thread_org[partition_id]; 339 | } 340 | 341 | for (key = 0; key < num_hot_items; key++) 342 | { 343 | key_hash = mehcached_hash_key(key); 344 | partition_id = (uint16_t)(key_hash >> 48) & (uint16_t)(num_partitions - 1); 345 | hot_item_to_thread_org[key] = partition_to_thread_org[partition_id]; 346 | hot_item_to_thread_new[key] = hot_item_to_thread_org[key]; 347 | } 348 | 349 | uint64_t thread_load[num_threads]; 350 | 351 | // printf("partition load\n"); 352 | // mehcached_print_array_normalized(partition_load, num_partitions); 353 | // printf("\n"); 354 | // printf("hot item load\n"); 355 | // mehcached_print_array_normalized(hot_item_load, num_hot_items); 356 | // printf("\n"); 357 | 358 | printf("original thread load\n"); 359 | mehcached_calc_thread_load(thread_load, partition_load, hot_item_load, partition_to_thread_org, hot_item_to_thread_org, partition_to_thread_new, hot_item_to_thread_new, num_threads, num_partitions, num_hot_items, num_numa_nodes, get_ratio, isolated_server_numa_nodes); 360 | mehcached_print_array_normalized(thread_load, num_threads); 361 | printf("\n"); 362 | 363 | printf("load-balanced thread load\n"); 364 | mehcached_load_balance(partition_load, hot_item_load, partition_to_thread_org, partition_to_thread_new, hot_item_to_thread_org, hot_item_to_thread_new, num_threads, num_partitions, num_hot_items, num_numa_nodes, get_ratio, isolated_server_numa_nodes); 365 | mehcached_calc_thread_load(thread_load, partition_load, hot_item_load, partition_to_thread_org, hot_item_to_thread_org, partition_to_thread_new, hot_item_to_thread_new, num_threads, num_partitions, num_hot_items, num_numa_nodes, get_ratio, isolated_server_numa_nodes); 366 | mehcached_print_array_normalized(thread_load, num_threads); 367 | printf("\n"); 368 | 369 | printf("partition_to_thread: \n"); 370 | for (partition_id = 0; partition_id < num_partitions; partition_id++) 371 | { 372 | printf("%hhu", partition_to_thread_new[partition_id]); 373 | if (partition_id != num_partitions - 1) 374 | printf(","); 375 | } 376 | printf("\n\n"); 377 | 378 | printf("hot_item_to_thread: \n"); 379 | for (key = 0; key < num_hot_items; key++) 380 | { 381 | key_hash = mehcached_hash_key(key); 382 | printf("(0x%016lx,%hhu)", key_hash, hot_item_to_thread_new[key]); 383 | if (key != num_hot_items - 1) 384 | printf(","); 385 | } 386 | printf("\n\n"); 387 | } 388 | 389 | int 390 | main(int argc, const char *argv[]) 391 | { 392 | if (argc < 5) 393 | { 394 | printf("%s NUM-HOT-ITEMS ZIPF-THETA GET-RATIO ISOLATED-SERVER-NUMA-NODES\n", argv[0]); 395 | 396 | mehcached_test_zipf(0.); 397 | mehcached_test_zipf(0.01); 398 | mehcached_test_zipf(0.1); 399 | mehcached_test_zipf(0.5); 400 | mehcached_test_zipf(0.9); 401 | mehcached_test_zipf(0.99); 402 | mehcached_test_zipf(0.992); 403 | mehcached_test_zipf(0.993); 404 | mehcached_test_zipf(0.994); 405 | mehcached_test_zipf(0.999); 406 | mehcached_test_zipf(1.); 407 | mehcached_test_zipf(10.); 408 | mehcached_test_zipf(20.); 409 | mehcached_test_zipf(30.); 410 | mehcached_test_zipf(40.); 411 | mehcached_test_zipf(50.); 412 | mehcached_test_zipf(100.); 413 | 414 | return EXIT_FAILURE; 415 | } 416 | 417 | mehcached_benchmark_analysis((uint64_t)atol(argv[1]), atof(argv[2]), atof(argv[3]), atoi(argv[4])); 418 | 419 | return EXIT_SUCCESS; 420 | } 421 | -------------------------------------------------------------------------------- /src/netbench_config.c: -------------------------------------------------------------------------------- 1 | // Copyright 2014 Carnegie Mellon University 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #include "netbench_config.h" 16 | #include 17 | #include 18 | #include 19 | #include 20 | 21 | struct mehcached_server_conf * 22 | mehcached_get_server_conf(const char *filename, const char *server_name) 23 | { 24 | FILE *fp = fopen(filename, "r"); 25 | if (!fp) 26 | { 27 | fprintf(stderr, "cannot open %s\n", filename); 28 | return NULL; 29 | } 30 | 31 | struct mehcached_server_conf *conf = malloc(sizeof(struct mehcached_server_conf)); 32 | memset(conf, 0, sizeof(struct mehcached_server_conf)); 33 | 34 | while (true) 35 | { 36 | char buf[4096]; 37 | int ret = fscanf(fp, "server,%[^,\n]\n", buf); 38 | if (ret == EOF) 39 | break; 40 | if (strcmp(buf, server_name) != 0) 41 | { 42 | // skip 43 | while (true) 44 | { 45 | if (fgets(buf, sizeof(buf), fp) == NULL) 46 | break; 47 | if (buf[0] == '\n') 48 | break; 49 | } 50 | continue; 51 | } 52 | 53 | while (true) 54 | { 55 | if (fgets(buf, sizeof(buf), fp) == NULL) 56 | break; 57 | 58 | { 59 | char ip_addr[4096]; 60 | char mac_addr[4096]; 61 | ret = sscanf(buf, "server_port,%[^,],%[^,\n]\n", mac_addr, ip_addr); 62 | if (ret == 2) 63 | { 64 | size_t i; 65 | char *p = mac_addr; 66 | for (i = 0; i < 6; i++, p++) 67 | conf->ports[conf->num_ports].mac_addr[i] = (uint8_t)strtoul(p, &p, 16); 68 | p = ip_addr; 69 | for (i = 0; i < 4; i++, p++) 70 | conf->ports[conf->num_ports].ip_addr[i] = (uint8_t)strtoul(p, &p, 10); 71 | conf->num_ports++; 72 | assert(conf->num_ports <= MEHCACHED_MAX_PORTS); 73 | continue; 74 | } 75 | else if (ret != 0) 76 | { 77 | fprintf(stderr, "parse error: %s (in %s)\n", buf, filename); 78 | continue; 79 | } 80 | } 81 | { 82 | char port_ids[4096]; 83 | ret = sscanf(buf, "server_thread,%[^,\n]\n", port_ids); 84 | if (ret == 1) 85 | { 86 | char *p = port_ids; 87 | while (*p != 0) 88 | { 89 | conf->threads[conf->num_threads].port_ids[conf->threads[conf->num_threads].num_ports] = (uint8_t)strtoul(p, &p, 10); 90 | conf->threads[conf->num_threads].num_ports++; 91 | assert(conf->threads[conf->num_threads].num_ports <= MEHCACHED_MAX_PORTS); 92 | if (*p != 0) p++; 93 | } 94 | conf->num_threads++; 95 | assert(conf->num_threads <= MEHCACHED_MAX_THREADS); 96 | continue; 97 | } 98 | else if (ret != 0) 99 | { 100 | fprintf(stderr, "parse error: %s (in %s)\n", buf, filename); 101 | continue; 102 | } 103 | } 104 | { 105 | uint64_t num_items; 106 | uint64_t alloc_size; 107 | uint8_t concurrent_table_read; 108 | uint8_t concurrent_table_write; 109 | uint8_t concurrent_alloc_write; 110 | uint8_t thread_id; 111 | double mth_threshold; 112 | ret = sscanf(buf, "server_partition,%lu,%lu,%hhu,%hhu,%hhu,%hhu,%lf\n", &num_items, &alloc_size, &concurrent_table_read, &concurrent_table_write, &concurrent_alloc_write, &thread_id, &mth_threshold); 113 | if (ret == 7) 114 | { 115 | conf->partitions[conf->num_partitions].num_items = num_items; 116 | conf->partitions[conf->num_partitions].alloc_size = alloc_size; 117 | conf->partitions[conf->num_partitions].concurrent_table_read = concurrent_table_read; 118 | conf->partitions[conf->num_partitions].concurrent_table_write = concurrent_table_write; 119 | conf->partitions[conf->num_partitions].concurrent_alloc_write = concurrent_alloc_write; 120 | conf->partitions[conf->num_partitions].thread_id = thread_id; 121 | conf->partitions[conf->num_partitions].mth_threshold = mth_threshold; 122 | conf->num_partitions++; 123 | assert(conf->num_partitions <= MEHCACHED_MAX_PARTITIONS); 124 | continue; 125 | } 126 | else if (ret != 0) 127 | { 128 | fprintf(stderr, "parse error: %s (in %s)\n", buf, filename); 129 | continue; 130 | } 131 | } 132 | { 133 | uint64_t key_hash; 134 | uint8_t thread_id; 135 | ret = sscanf(buf, "server_hot_item,%lx,%hhu\n", &key_hash, &thread_id); 136 | if (ret == 2) 137 | { 138 | conf->hot_items[conf->num_hot_items].key_hash = key_hash; 139 | conf->hot_items[conf->num_hot_items].thread_id = thread_id; 140 | conf->num_hot_items++; 141 | assert(conf->num_hot_items <= MEHCACHED_MAX_HOT_ITEMS); 142 | continue; 143 | } 144 | else if (ret != 0) 145 | { 146 | fprintf(stderr, "parse error: %s (in %s)\n", buf, filename); 147 | continue; 148 | } 149 | } 150 | if (buf[0] == '\n') 151 | break; 152 | fprintf(stderr, "parse error: %s (in %s)\n", buf, filename); 153 | } 154 | } 155 | 156 | fclose(fp); 157 | return conf; 158 | } 159 | 160 | struct mehcached_client_conf * 161 | mehcached_get_client_conf(const char *filename, const char *client_name) 162 | { 163 | FILE *fp = fopen(filename, "r"); 164 | if (!fp) 165 | { 166 | fprintf(stderr, "cannot open %s\n", filename); 167 | return NULL; 168 | } 169 | 170 | struct mehcached_client_conf *conf = malloc(sizeof(struct mehcached_client_conf)); 171 | memset(conf, 0, sizeof(struct mehcached_client_conf)); 172 | 173 | while (true) 174 | { 175 | char buf[4096]; 176 | int ret = fscanf(fp, "client,%[^,\n]\n", buf); 177 | if (ret == EOF) 178 | break; 179 | if (strcmp(buf, client_name) != 0) 180 | { 181 | // skip 182 | while (true) 183 | { 184 | if (fgets(buf, sizeof(buf), fp) == NULL) 185 | break; 186 | if (buf[0] == '\n') 187 | break; 188 | } 189 | continue; 190 | } 191 | 192 | while (true) 193 | { 194 | if (fgets(buf, sizeof(buf), fp) == NULL) 195 | break; 196 | 197 | { 198 | char ip_addr[4096]; 199 | char mac_addr[4096]; 200 | ret = sscanf(buf, "client_port,%[^,],%[^,\n]\n", mac_addr, ip_addr); 201 | if (ret == 2) 202 | { 203 | size_t i; 204 | char *p = mac_addr; 205 | for (i = 0; i < 6; i++, p++) 206 | conf->ports[conf->num_ports].mac_addr[i] = (uint8_t)strtoul(p, &p, 16); 207 | p = ip_addr; 208 | for (i = 0; i < 4; i++, p++) 209 | conf->ports[conf->num_ports].ip_addr[i] = (uint8_t)strtoul(p, &p, 10); 210 | conf->num_ports++; 211 | assert(conf->num_ports <= MEHCACHED_MAX_PORTS); 212 | continue; 213 | } 214 | else if (ret != 0) 215 | { 216 | fprintf(stderr, "parse error: %s (in %s)\n", buf, filename); 217 | continue; 218 | } 219 | } 220 | { 221 | if (strcmp(buf, "client_thread,\n") == 0) 222 | { 223 | conf->num_threads++; 224 | assert(conf->num_threads <= MEHCACHED_MAX_THREADS); 225 | continue; 226 | } 227 | } 228 | if (buf[0] == '\n') 229 | break; 230 | fprintf(stderr, "parse error: %s (in %s)\n", buf, filename); 231 | } 232 | } 233 | 234 | fclose(fp); 235 | return conf; 236 | } 237 | 238 | struct mehcached_prepopulation_conf * 239 | mehcached_get_prepopulation_conf(const char *filename, const char *server_name) 240 | { 241 | FILE *fp = fopen(filename, "r"); 242 | if (!fp) 243 | { 244 | fprintf(stderr, "cannot open %s\n", filename); 245 | return NULL; 246 | } 247 | 248 | struct mehcached_prepopulation_conf *conf = malloc(sizeof(struct mehcached_prepopulation_conf)); 249 | memset(conf, 0, sizeof(struct mehcached_prepopulation_conf)); 250 | 251 | while (true) 252 | { 253 | char buf[4096]; 254 | int ret = fscanf(fp, "prepopulation,%[^,\n]\n", buf); 255 | if (ret == EOF) 256 | break; 257 | if (strcmp(buf, server_name) != 0) 258 | { 259 | // skip 260 | while (true) 261 | { 262 | if (fgets(buf, sizeof(buf), fp) == NULL) 263 | break; 264 | if (buf[0] == '\n') 265 | break; 266 | } 267 | continue; 268 | } 269 | 270 | while (true) 271 | { 272 | if (fgets(buf, sizeof(buf), fp) == NULL) 273 | break; 274 | 275 | { 276 | uint64_t num_items; 277 | size_t key_length; 278 | size_t value_length; 279 | int ret = sscanf(buf, "dataset,%lu,%zu,%zu\n", &num_items, &key_length, &value_length); 280 | if (ret == 3) 281 | { 282 | conf->num_items = num_items; 283 | conf->key_length = key_length; 284 | conf->value_length = value_length; 285 | continue; 286 | } 287 | else if (ret != 0) 288 | { 289 | fprintf(stderr, "parse error: %s (in %s)\n", buf, filename); 290 | continue; 291 | } 292 | } 293 | 294 | if (buf[0] == '\n') 295 | break; 296 | fprintf(stderr, "parse error: %s (in %s)\n", buf, filename); 297 | } 298 | } 299 | 300 | fclose(fp); 301 | return conf; 302 | } 303 | struct mehcached_workload_conf * 304 | mehcached_get_workload_conf(const char *filename, const char *client_name) 305 | { 306 | FILE *fp = fopen(filename, "r"); 307 | if (!fp) 308 | { 309 | fprintf(stderr, "cannot open %s\n", filename); 310 | return NULL; 311 | } 312 | 313 | struct mehcached_workload_conf *conf = malloc(sizeof(struct mehcached_workload_conf)); 314 | memset(conf, 0, sizeof(struct mehcached_workload_conf)); 315 | 316 | while (true) 317 | { 318 | char buf[4096]; 319 | int ret = fscanf(fp, "workload,%[^,\n]\n", buf); 320 | if (ret == EOF) 321 | break; 322 | if (strcmp(buf, client_name) != 0) 323 | { 324 | // skip 325 | while (true) 326 | { 327 | if (fgets(buf, sizeof(buf), fp) == NULL) 328 | break; 329 | if (buf[0] == '\n') 330 | break; 331 | } 332 | continue; 333 | } 334 | 335 | while (true) 336 | { 337 | if (fgets(buf, sizeof(buf), fp) == NULL) 338 | break; 339 | 340 | { 341 | char port_ids[4096]; 342 | char server_name[4096]; 343 | int8_t partition_mode; 344 | uint64_t num_items; 345 | size_t key_length; 346 | size_t value_length; 347 | double zipf_theta; 348 | double get_ratio; 349 | double put_ratio; 350 | double increment_ratio; 351 | uint8_t batch_size; 352 | uint64_t num_operations; 353 | double duration; 354 | int ret = sscanf(buf, "workload_thread,%[^,],%[^,],%hhd,%lu,%zu,%zu,%lf,%lf,%lf,%lf,%hhu,%lu,%lf\n", port_ids, server_name, &partition_mode, &num_items, &key_length, &value_length, &zipf_theta, &get_ratio, &put_ratio, &increment_ratio, &batch_size, &num_operations, &duration); 355 | if (ret == 13) 356 | { 357 | char *p = port_ids; 358 | while (*p != 0) 359 | { 360 | conf->threads[conf->num_threads].port_ids[conf->threads[conf->num_threads].num_ports] = (uint8_t)strtoul(p, &p, 10); 361 | conf->threads[conf->num_threads].num_ports++; 362 | assert(conf->threads[conf->num_threads].num_ports <= MEHCACHED_MAX_PORTS); 363 | if (*p != 0) p++; 364 | } 365 | strcpy(conf->threads[conf->num_threads].server_name, server_name); 366 | conf->threads[conf->num_threads].partition_mode = partition_mode; 367 | conf->threads[conf->num_threads].num_items = num_items; 368 | conf->threads[conf->num_threads].key_length = key_length; 369 | conf->threads[conf->num_threads].value_length = value_length; 370 | conf->threads[conf->num_threads].zipf_theta = zipf_theta; 371 | conf->threads[conf->num_threads].get_ratio = get_ratio; 372 | conf->threads[conf->num_threads].put_ratio = put_ratio; 373 | conf->threads[conf->num_threads].increment_ratio = increment_ratio; 374 | conf->threads[conf->num_threads].batch_size = batch_size; 375 | conf->threads[conf->num_threads].num_operations = num_operations; 376 | conf->threads[conf->num_threads].duration = duration; 377 | conf->num_threads++; 378 | assert(conf->num_threads <= MEHCACHED_MAX_THREADS); 379 | continue; 380 | } 381 | else if (ret != 0) 382 | { 383 | fprintf(stderr, "parse error: %s (in %s)\n", buf, filename); 384 | continue; 385 | } 386 | } 387 | 388 | if (buf[0] == '\n') 389 | break; 390 | fprintf(stderr, "parse error: %s (in %s)\n", buf, filename); 391 | } 392 | } 393 | 394 | fclose(fp); 395 | return conf; 396 | } 397 | -------------------------------------------------------------------------------- /src/netbench_config.h: -------------------------------------------------------------------------------- 1 | // Copyright 2014 Carnegie Mellon University 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #pragma once 16 | 17 | #include "common.h" 18 | #include "net_common.h" 19 | 20 | //#define MEHCACHED_MAX_PORTS (8) 21 | #define MEHCACHED_MAX_THREADS (16) 22 | #define MEHCACHED_MAX_PARTITIONS (64) 23 | #define MEHCACHED_MAX_WORKLOAD_THREADS (16) 24 | #define MEHCACHED_MAX_HOT_ITEMS (64) 25 | 26 | 27 | // common 28 | struct mehcached_port_conf 29 | { 30 | uint8_t mac_addr[6]; 31 | uint8_t ip_addr[4]; 32 | }; 33 | 34 | 35 | // server 36 | struct mehcached_server_thread_conf 37 | { 38 | uint8_t num_ports; 39 | uint8_t port_ids[MEHCACHED_MAX_PORTS]; 40 | }; 41 | 42 | struct mehcached_server_partition_conf 43 | { 44 | uint64_t num_items; 45 | uint64_t alloc_size; 46 | uint8_t concurrent_table_read; 47 | uint8_t concurrent_table_write; 48 | uint8_t concurrent_alloc_write; 49 | uint8_t thread_id; 50 | double mth_threshold; 51 | }; 52 | 53 | struct mehcached_server_hot_item_conf 54 | { 55 | uint64_t key_hash; 56 | uint8_t thread_id; 57 | }; 58 | 59 | struct mehcached_server_conf 60 | { 61 | uint8_t num_ports; 62 | struct mehcached_port_conf ports[MEHCACHED_MAX_PORTS]; 63 | uint8_t num_threads; 64 | struct mehcached_server_thread_conf threads[MEHCACHED_MAX_THREADS]; 65 | uint16_t num_partitions; 66 | struct mehcached_server_partition_conf partitions[MEHCACHED_MAX_PARTITIONS]; 67 | uint8_t num_hot_items; 68 | struct mehcached_server_hot_item_conf hot_items[MEHCACHED_MAX_HOT_ITEMS]; 69 | }; 70 | 71 | #define MEHCACHED_CONCURRENT_TABLE_READ(server_conf, partition_id) ((server_conf)->partitions[partition_id].concurrent_table_read) 72 | #define MEHCACHED_CONCURRENT_TABLE_WRITE(server_conf, partition_id) ((server_conf)->partitions[partition_id].concurrent_table_write) 73 | #define MEHCACHED_CONCURRENT_ALLOC_WRITE(server_conf, partition_id) ((server_conf)->partitions[partition_id].concurrent_alloc_write) 74 | 75 | 76 | // client 77 | struct mehcached_client_conf 78 | { 79 | uint8_t num_ports; 80 | struct mehcached_port_conf ports[MEHCACHED_MAX_PORTS]; 81 | uint8_t num_threads; 82 | }; 83 | 84 | 85 | // prepopulation 86 | struct mehcached_prepopulation_conf 87 | { 88 | // TODO: support multiple datasets 89 | uint64_t num_items; 90 | size_t key_length; 91 | size_t value_length; 92 | }; 93 | 94 | 95 | // workload 96 | struct mehcached_workload_thread_conf 97 | { 98 | uint8_t num_ports; 99 | uint8_t port_ids[MEHCACHED_MAX_PORTS]; 100 | char server_name[64]; 101 | int8_t partition_mode; 102 | uint64_t num_items; 103 | size_t key_length; 104 | size_t value_length; 105 | double zipf_theta; 106 | uint8_t batch_size; 107 | double get_ratio; 108 | double put_ratio; 109 | double increment_ratio; 110 | uint64_t num_operations; 111 | double duration; 112 | }; 113 | 114 | struct mehcached_workload_conf 115 | { 116 | uint8_t num_threads; 117 | struct mehcached_workload_thread_conf threads[MEHCACHED_MAX_WORKLOAD_THREADS]; 118 | }; 119 | 120 | 121 | // functions 122 | struct mehcached_server_conf * 123 | mehcached_get_server_conf(const char *filename, const char *server_name); 124 | 125 | struct mehcached_client_conf * 126 | mehcached_get_client_conf(const char *filename, const char *client_name); 127 | 128 | struct mehcached_prepopulation_conf * 129 | mehcached_get_prepopulation_conf(const char *filename, const char *server_name); 130 | 131 | struct mehcached_workload_conf * 132 | mehcached_get_workload_conf(const char *filename, const char *client_name); 133 | -------------------------------------------------------------------------------- /src/netbench_hot_item_hash.h: -------------------------------------------------------------------------------- 1 | // Copyright 2014 Carnegie Mellon University 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #pragma once 16 | 17 | #include "common.h" 18 | 19 | struct mehcached_hot_item_hash 20 | { 21 | }; 22 | 23 | static 24 | void 25 | mehcached_calc_hot_item_hash(struct mehcached_server_conf *server_conf, struct mehcached_hot_item_hash *hot_item_hash) 26 | { 27 | (void)server_conf; 28 | (void)hot_item_hash; 29 | } 30 | 31 | static 32 | uint8_t 33 | mehcached_get_hot_item_id(struct mehcached_server_conf *server_conf, struct mehcached_hot_item_hash *hot_item_hash, uint64_t key_hash) 34 | { 35 | (void)server_conf; 36 | (void)hot_item_hash; 37 | (void)key_hash; 38 | return (uint8_t)-1; 39 | } 40 | -------------------------------------------------------------------------------- /src/perf_count/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Copyright 2014 Carnegie Mellon University 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | cmake_minimum_required(VERSION 2.6) 16 | 17 | # basic configuration 18 | if(CMAKE_CURRENT_SOURCE_DIR STREQUAL CMAKE_CURRENT_BINARY_DIR) 19 | message(FATAL_ERROR "Use out-of-source build only!") 20 | endif(CMAKE_CURRENT_SOURCE_DIR STREQUAL CMAKE_CURRENT_BINARY_DIR) 21 | 22 | project(PERF_COUNT) 23 | 24 | add_definitions(-g -Wall -Wextra -Wconversion -Wsign-conversion) 25 | add_definitions(-O3) 26 | add_definitions(-std=gnu99) 27 | 28 | add_library(perf_count perf_count.c) 29 | 30 | -------------------------------------------------------------------------------- /src/perf_count/perf_count.c: -------------------------------------------------------------------------------- 1 | // Copyright 2014 Carnegie Mellon University 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #include "perf_count.h" 16 | 17 | #ifdef __cplusplus 18 | extern "C" { 19 | #endif 20 | 21 | #ifdef __linux__ 22 | #include 23 | #include 24 | #include 25 | #endif 26 | 27 | #include 28 | #include 29 | #include 30 | #include 31 | 32 | struct perf_count_ctx 33 | { 34 | size_t num_groups; 35 | size_t num_events; 36 | enum PERF_COUNT_TYPE *types; 37 | #ifdef __linux__ 38 | struct perf_event_attr *events; 39 | #endif 40 | int *fds; 41 | uint64_t *counters; 42 | }; 43 | 44 | static const char *perf_count_name[] = 45 | { 46 | "CPUCycles", 47 | "Instructions", 48 | "CacheReferences", 49 | "CacheMisses", 50 | "BranchInstructions", 51 | "BranchMisses", 52 | "BUSCycles", 53 | "L1IReadAccess", 54 | "L1IReadMiss", 55 | "L1IWriteAccess", 56 | "L1IWriteMiss", 57 | "L1IPrefetchAccess", 58 | "L1IPrefetchMiss", 59 | "L1DReadAccess", 60 | "L1DReadMiss", 61 | "L1DWriteAccess", 62 | "L1DWriteMiss", 63 | "L1DPrefetchAccess", 64 | "L1DPrefetchMiss", 65 | "LLReadAccess", 66 | "LLReadMiss", 67 | "LLWriteAccess", 68 | "LLWriteMiss", 69 | "LLPrefetchAccess", 70 | "LLPrefetchMiss", 71 | "ITLBReadAccess", 72 | "ITLBReadMiss", 73 | "ITLBWriteAccess", 74 | "ITLBWriteMiss", 75 | "ITLBPrefetchAccess", 76 | "ITLBPrefetchMiss", 77 | "DTLBReadAccess", 78 | "DTLBReadMiss", 79 | "DTLBWriteAccess", 80 | "DTLBWriteMiss", 81 | "DTLBPrefetchAccess", 82 | "DTLBPrefetchMiss", 83 | "CPUClock", 84 | "TaskClock", 85 | "PageFaults", 86 | "ContextSwitches", 87 | "CPUMigrations", 88 | "PageFaultsMinor", 89 | "PageFaultsMajor", 90 | "AlignmentFaults", 91 | "EmulationFaults", 92 | }; 93 | 94 | #ifdef __linux__ 95 | static const struct perf_event_attr perf_count_mapping[] = 96 | { 97 | { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES }, 98 | { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_INSTRUCTIONS }, 99 | { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CACHE_REFERENCES }, 100 | { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CACHE_MISSES }, 101 | { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS }, 102 | { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_MISSES }, 103 | { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BUS_CYCLES }, 104 | { .type = PERF_TYPE_HW_CACHE, .config = PERF_COUNT_HW_CACHE_L1I | (PERF_COUNT_HW_CACHE_OP_READ << 8) | (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, 105 | { .type = PERF_TYPE_HW_CACHE, .config = PERF_COUNT_HW_CACHE_L1I | (PERF_COUNT_HW_CACHE_OP_READ << 8) | (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, 106 | { .type = PERF_TYPE_HW_CACHE, .config = PERF_COUNT_HW_CACHE_L1I | (PERF_COUNT_HW_CACHE_OP_WRITE << 8) | (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, 107 | { .type = PERF_TYPE_HW_CACHE, .config = PERF_COUNT_HW_CACHE_L1I | (PERF_COUNT_HW_CACHE_OP_WRITE << 8) | (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, 108 | { .type = PERF_TYPE_HW_CACHE, .config = PERF_COUNT_HW_CACHE_L1I | (PERF_COUNT_HW_CACHE_OP_PREFETCH << 8) | (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, 109 | { .type = PERF_TYPE_HW_CACHE, .config = PERF_COUNT_HW_CACHE_L1I | (PERF_COUNT_HW_CACHE_OP_PREFETCH << 8) | (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, 110 | { .type = PERF_TYPE_HW_CACHE, .config = PERF_COUNT_HW_CACHE_L1D | (PERF_COUNT_HW_CACHE_OP_READ << 8) | (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, 111 | { .type = PERF_TYPE_HW_CACHE, .config = PERF_COUNT_HW_CACHE_L1D | (PERF_COUNT_HW_CACHE_OP_READ << 8) | (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, 112 | { .type = PERF_TYPE_HW_CACHE, .config = PERF_COUNT_HW_CACHE_L1D | (PERF_COUNT_HW_CACHE_OP_WRITE << 8) | (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, 113 | { .type = PERF_TYPE_HW_CACHE, .config = PERF_COUNT_HW_CACHE_L1D | (PERF_COUNT_HW_CACHE_OP_WRITE << 8) | (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, 114 | { .type = PERF_TYPE_HW_CACHE, .config = PERF_COUNT_HW_CACHE_L1D | (PERF_COUNT_HW_CACHE_OP_PREFETCH << 8) | (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, 115 | { .type = PERF_TYPE_HW_CACHE, .config = PERF_COUNT_HW_CACHE_L1D | (PERF_COUNT_HW_CACHE_OP_PREFETCH << 8) | (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, 116 | { .type = PERF_TYPE_HW_CACHE, .config = PERF_COUNT_HW_CACHE_LL | (PERF_COUNT_HW_CACHE_OP_READ << 8) | (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, 117 | { .type = PERF_TYPE_HW_CACHE, .config = PERF_COUNT_HW_CACHE_LL | (PERF_COUNT_HW_CACHE_OP_READ << 8) | (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, 118 | { .type = PERF_TYPE_HW_CACHE, .config = PERF_COUNT_HW_CACHE_LL | (PERF_COUNT_HW_CACHE_OP_WRITE << 8) | (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, 119 | { .type = PERF_TYPE_HW_CACHE, .config = PERF_COUNT_HW_CACHE_LL | (PERF_COUNT_HW_CACHE_OP_WRITE << 8) | (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, 120 | { .type = PERF_TYPE_HW_CACHE, .config = PERF_COUNT_HW_CACHE_LL | (PERF_COUNT_HW_CACHE_OP_PREFETCH << 8) | (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, 121 | { .type = PERF_TYPE_HW_CACHE, .config = PERF_COUNT_HW_CACHE_LL | (PERF_COUNT_HW_CACHE_OP_PREFETCH << 8) | (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, 122 | { .type = PERF_TYPE_HW_CACHE, .config = PERF_COUNT_HW_CACHE_ITLB | (PERF_COUNT_HW_CACHE_OP_READ << 8) | (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, 123 | { .type = PERF_TYPE_HW_CACHE, .config = PERF_COUNT_HW_CACHE_ITLB | (PERF_COUNT_HW_CACHE_OP_READ << 8) | (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, 124 | { .type = PERF_TYPE_HW_CACHE, .config = PERF_COUNT_HW_CACHE_ITLB | (PERF_COUNT_HW_CACHE_OP_WRITE << 8) | (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, 125 | { .type = PERF_TYPE_HW_CACHE, .config = PERF_COUNT_HW_CACHE_ITLB | (PERF_COUNT_HW_CACHE_OP_WRITE << 8) | (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, 126 | { .type = PERF_TYPE_HW_CACHE, .config = PERF_COUNT_HW_CACHE_ITLB | (PERF_COUNT_HW_CACHE_OP_PREFETCH << 8) | (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, 127 | { .type = PERF_TYPE_HW_CACHE, .config = PERF_COUNT_HW_CACHE_ITLB | (PERF_COUNT_HW_CACHE_OP_PREFETCH << 8) | (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, 128 | { .type = PERF_TYPE_HW_CACHE, .config = PERF_COUNT_HW_CACHE_DTLB | (PERF_COUNT_HW_CACHE_OP_READ << 8) | (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, 129 | { .type = PERF_TYPE_HW_CACHE, .config = PERF_COUNT_HW_CACHE_DTLB | (PERF_COUNT_HW_CACHE_OP_READ << 8) | (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, 130 | { .type = PERF_TYPE_HW_CACHE, .config = PERF_COUNT_HW_CACHE_DTLB | (PERF_COUNT_HW_CACHE_OP_WRITE << 8) | (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, 131 | { .type = PERF_TYPE_HW_CACHE, .config = PERF_COUNT_HW_CACHE_DTLB | (PERF_COUNT_HW_CACHE_OP_WRITE << 8) | (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, 132 | { .type = PERF_TYPE_HW_CACHE, .config = PERF_COUNT_HW_CACHE_DTLB | (PERF_COUNT_HW_CACHE_OP_PREFETCH << 8) | (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, 133 | { .type = PERF_TYPE_HW_CACHE, .config = PERF_COUNT_HW_CACHE_DTLB | (PERF_COUNT_HW_CACHE_OP_PREFETCH << 8) | (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, 134 | { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CPU_CLOCK }, 135 | { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK }, 136 | { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_PAGE_FAULTS }, 137 | { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CONTEXT_SWITCHES }, 138 | { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CPU_MIGRATIONS }, 139 | { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_PAGE_FAULTS_MIN }, 140 | { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_PAGE_FAULTS_MAJ }, 141 | { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_ALIGNMENT_FAULTS }, 142 | { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_EMULATION_FAULTS }, 143 | }; 144 | #endif 145 | 146 | #ifdef __linux__ 147 | static int 148 | sys_perf_event_open(struct perf_event_attr *attr, pid_t pid, int cpu, int group_fd, unsigned long flags) 149 | { 150 | attr->size = sizeof(*attr); 151 | return (int)syscall(__NR_perf_event_open, attr, pid, cpu, group_fd, flags); 152 | } 153 | #endif 154 | 155 | perf_count_t 156 | perf_count_init(const enum PERF_COUNT_TYPE *perf_count_types, size_t num_events, int system_wide) 157 | { 158 | #ifdef __linux__ 159 | if (perf_count_types == NULL) 160 | return NULL; 161 | 162 | for (size_t event = 0; event < num_events; event++) 163 | if (perf_count_types[event] < 0 || perf_count_types[event] >= PERF_COUNT_TYPE_MAX) 164 | return NULL; 165 | 166 | struct perf_count_ctx *ctx = (struct perf_count_ctx *)malloc(sizeof(struct perf_count_ctx)); 167 | if (!ctx) 168 | return NULL; 169 | 170 | if (system_wide) 171 | ctx->num_groups = (size_t)sysconf(_SC_NPROCESSORS_ONLN); 172 | else 173 | ctx->num_groups = 1; 174 | ctx->num_events = num_events; 175 | 176 | ctx->types = (enum PERF_COUNT_TYPE *)calloc(sizeof(enum PERF_COUNT_TYPE), (size_t)ctx->num_events); 177 | if (!ctx->types) 178 | { 179 | free(ctx); 180 | return NULL; 181 | } 182 | ctx->events = (struct perf_event_attr *)calloc(sizeof(struct perf_event_attr), (size_t)ctx->num_events); 183 | if (!ctx->events) 184 | { 185 | free(ctx->types); 186 | free(ctx); 187 | return NULL; 188 | } 189 | ctx->fds = (int *)calloc(sizeof(int), (size_t)ctx->num_groups * (size_t)ctx->num_events); 190 | if (!ctx->fds) 191 | { 192 | free(ctx->events); 193 | free(ctx->types); 194 | free(ctx); 195 | return NULL; 196 | } 197 | ctx->counters = (uint64_t *)calloc(sizeof(uint64_t), (size_t)ctx->num_events); 198 | if (!ctx->counters) 199 | { 200 | free(ctx->fds); 201 | free(ctx->events); 202 | free(ctx->types); 203 | free(ctx); 204 | return NULL; 205 | } 206 | 207 | for (size_t event = 0; event < ctx->num_events; event++) 208 | { 209 | ctx->types[event] = perf_count_types[event]; 210 | ctx->events[event] = perf_count_mapping[perf_count_types[event]]; 211 | ctx->events[event].read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | PERF_FORMAT_TOTAL_TIME_RUNNING; 212 | if (!system_wide) 213 | ctx->events[event].inherit = 1; 214 | } 215 | 216 | for (size_t group = 0; group < ctx->num_groups; group++) 217 | for (size_t event = 0; event < ctx->num_events; event++) 218 | { 219 | pid_t pid; 220 | int cpu; 221 | 222 | if (system_wide) 223 | { 224 | pid = -1; 225 | // XXX: assuming the IDs of online cpus range from 0 to (num_cpus - 1) 226 | cpu = (int)group; 227 | } 228 | else 229 | { 230 | // this process 231 | pid = 0; 232 | cpu = -1; 233 | } 234 | 235 | ctx->fds[group * ctx->num_events + event] = sys_perf_event_open(&ctx->events[event], pid, cpu, -1, 0); 236 | if (ctx->fds[group * ctx->num_events + event] < 0) 237 | { 238 | perror("perf_count: error while sys_perf_event_open()"); 239 | break; 240 | } 241 | } 242 | 243 | return ctx; 244 | #else 245 | return NULL; 246 | #endif 247 | } 248 | 249 | void 250 | perf_count_free(perf_count_t perf_count) 251 | { 252 | #ifdef __linux__ 253 | struct perf_count_ctx *ctx = (struct perf_count_ctx *)perf_count; 254 | 255 | for (size_t group = 0; group < ctx->num_groups; group++) 256 | for (size_t event = 0; event < ctx->num_events; event++) 257 | { 258 | if (ctx->fds[group * ctx->num_events + event] >= 0) 259 | close(ctx->fds[group * ctx->num_events + event]); 260 | } 261 | 262 | free(ctx->counters); 263 | free(ctx->fds); 264 | free(ctx->events); 265 | free(ctx->types); 266 | free(ctx); 267 | #endif 268 | } 269 | 270 | static void 271 | perf_count_accumulate(perf_count_t perf_count, int additive) 272 | { 273 | #ifdef __linux__ 274 | struct perf_count_ctx *ctx = (struct perf_count_ctx *)perf_count; 275 | 276 | for (size_t event = 0; event < ctx->num_events; event++) 277 | { 278 | uint64_t count[3]; 279 | uint64_t accum_count[3] = {0, 0, 0}; 280 | 281 | for (size_t group = 0; group < ctx->num_groups; group++) 282 | { 283 | if (ctx->fds[group * ctx->num_events + event] < 0) 284 | continue; 285 | 286 | count[0] = count[1] = count[2] = 0; 287 | ssize_t len = read(ctx->fds[group * ctx->num_events + event], count, sizeof(count)); 288 | //printf("%d %ld %ld %ld\n", len, count[0], count[1], count[2]); 289 | if (len < 0) 290 | { 291 | perror("perf_count: error while reading stats"); 292 | break; 293 | } 294 | else if ((size_t)len != sizeof(count)) 295 | { 296 | fprintf(stderr, "perf_count: invalid stats reading; did you really use -std=gnu99 when compiling?\n"); 297 | break; 298 | } 299 | 300 | accum_count[0] += count[0]; 301 | accum_count[1] += count[1]; 302 | accum_count[2] += count[2]; 303 | } 304 | 305 | if (accum_count[2] == 0) 306 | { 307 | // no event occurred at all 308 | } 309 | else 310 | { 311 | if (accum_count[2] < accum_count[1]) 312 | { 313 | // need to scale 314 | accum_count[0] = (uint64_t)((double)accum_count[0] * (double)accum_count[1] / (double)accum_count[2] + 0.5); 315 | } 316 | } 317 | 318 | if (additive) 319 | { 320 | ctx->counters[event] += accum_count[0]; 321 | // due to the scaling, we may observe a negative increment 322 | if ((int64_t)ctx->counters[event] < 0) 323 | ctx->counters[event] = 0; 324 | } 325 | else 326 | ctx->counters[event] -= accum_count[0]; 327 | } 328 | #endif 329 | } 330 | 331 | void 332 | perf_count_start(perf_count_t perf_count) 333 | { 334 | perf_count_accumulate(perf_count, 0); 335 | } 336 | 337 | void 338 | perf_count_stop(perf_count_t perf_count) 339 | { 340 | perf_count_accumulate(perf_count, 1); 341 | } 342 | 343 | void 344 | perf_count_reset(perf_count_t perf_count) 345 | { 346 | #ifdef __linux__ 347 | struct perf_count_ctx *ctx = (struct perf_count_ctx *)perf_count; 348 | 349 | for (size_t event = 0; event < ctx->num_events; event++) 350 | ctx->counters[event] = 0; 351 | #endif 352 | } 353 | 354 | uint64_t 355 | perf_count_get_by_index(perf_count_t perf_count, size_t index) 356 | { 357 | #ifdef __linux__ 358 | struct perf_count_ctx *ctx = (struct perf_count_ctx *)perf_count; 359 | 360 | if (index >= ctx->num_events) 361 | return (uint64_t)-1; 362 | 363 | return ctx->counters[index]; 364 | #else 365 | return (uint64_t)-1; 366 | #endif 367 | } 368 | 369 | uint64_t 370 | perf_count_get_by_type(perf_count_t perf_count, enum PERF_COUNT_TYPE type) 371 | { 372 | #ifdef __linux__ 373 | if (type < 0 || type >= PERF_COUNT_TYPE_MAX) 374 | return (uint64_t)-1; 375 | 376 | struct perf_count_ctx *ctx = (struct perf_count_ctx *)perf_count; 377 | 378 | for (size_t event = 0; event < ctx->num_events; event++) 379 | if (ctx->types[event] == type) 380 | return ctx->counters[event]; 381 | #endif 382 | 383 | return (uint64_t)-1; 384 | } 385 | 386 | const char * 387 | perf_count_name_by_type(enum PERF_COUNT_TYPE type) 388 | { 389 | #ifdef __linux__ 390 | if (type < 0 || type >= PERF_COUNT_TYPE_MAX) 391 | return NULL; 392 | 393 | return perf_count_name[type]; 394 | #else 395 | return NULL; 396 | #endif 397 | } 398 | 399 | enum PERF_COUNT_TYPE 400 | perf_count_type_by_name(const char *name) 401 | { 402 | #ifdef __linux__ 403 | if (!name) 404 | return PERF_COUNT_TYPE_INVALID; 405 | 406 | for (size_t type = 0; type < PERF_COUNT_TYPE_MAX; type++) 407 | if (strcmp(perf_count_name[type], name) == 0) 408 | return type; 409 | #endif 410 | 411 | return PERF_COUNT_TYPE_INVALID; 412 | } 413 | 414 | #ifdef __cplusplus 415 | } 416 | #endif 417 | 418 | -------------------------------------------------------------------------------- /src/perf_count/perf_count.h: -------------------------------------------------------------------------------- 1 | // Copyright 2014 Carnegie Mellon University 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #ifndef __PERF_COUNT__ 16 | #define __PERF_COUNT__ 17 | 18 | #ifdef __cplusplus 19 | extern "C" { 20 | #endif 21 | 22 | #include 23 | #include 24 | 25 | enum PERF_COUNT_TYPE 26 | { 27 | PERF_COUNT_TYPE_HW_CPU_CYCLES, 28 | PERF_COUNT_TYPE_HW_INSTRUCTIONS, 29 | PERF_COUNT_TYPE_HW_CACHE_REFERENCES, 30 | PERF_COUNT_TYPE_HW_CACHE_MISSES, 31 | PERF_COUNT_TYPE_HW_BRANCH_INSTRUCTIONS, 32 | PERF_COUNT_TYPE_HW_BRANCH_MISSES, 33 | PERF_COUNT_TYPE_HW_BUS_CYCLES, 34 | PERF_COUNT_TYPE_HW_CACHE_L1I_READ_ACCESS, 35 | PERF_COUNT_TYPE_HW_CACHE_L1I_READ_MISS, 36 | PERF_COUNT_TYPE_HW_CACHE_L1I_WRITE_ACCESS, 37 | PERF_COUNT_TYPE_HW_CACHE_L1I_WRITE_MISS, 38 | PERF_COUNT_TYPE_HW_CACHE_L1I_PREFETCH_ACCESS, // not working? 39 | PERF_COUNT_TYPE_HW_CACHE_L1I_PREFETCH_MISS, // not working? 40 | PERF_COUNT_TYPE_HW_CACHE_L1D_READ_ACCESS, 41 | PERF_COUNT_TYPE_HW_CACHE_L1D_READ_MISS, 42 | PERF_COUNT_TYPE_HW_CACHE_L1D_WRITE_ACCESS, 43 | PERF_COUNT_TYPE_HW_CACHE_L1D_WRITE_MISS, 44 | PERF_COUNT_TYPE_HW_CACHE_L1D_PREFETCH_ACCESS, // not working? 45 | PERF_COUNT_TYPE_HW_CACHE_L1D_PREFETCH_MISS, // not working? 46 | PERF_COUNT_TYPE_HW_CACHE_LL_READ_ACCESS, 47 | PERF_COUNT_TYPE_HW_CACHE_LL_READ_MISS, 48 | PERF_COUNT_TYPE_HW_CACHE_LL_WRITE_ACCESS, 49 | PERF_COUNT_TYPE_HW_CACHE_LL_WRITE_MISS, 50 | PERF_COUNT_TYPE_HW_CACHE_LL_PREFETCH_ACCESS, // not working? 51 | PERF_COUNT_TYPE_HW_CACHE_LL_PREFETCH_MISS, // not working? 52 | PERF_COUNT_TYPE_HW_CACHE_ITLB_READ_ACCESS, 53 | PERF_COUNT_TYPE_HW_CACHE_ITLB_READ_MISS, 54 | PERF_COUNT_TYPE_HW_CACHE_ITLB_WRITE_ACCESS, 55 | PERF_COUNT_TYPE_HW_CACHE_ITLB_WRITE_MISS, 56 | PERF_COUNT_TYPE_HW_CACHE_ITLB_PREFETCH_ACCESS, // not working? 57 | PERF_COUNT_TYPE_HW_CACHE_ITLB_PREFETCH_MISS, // not working? 58 | PERF_COUNT_TYPE_HW_CACHE_DTLB_READ_ACCESS, 59 | PERF_COUNT_TYPE_HW_CACHE_DTLB_READ_MISS, 60 | PERF_COUNT_TYPE_HW_CACHE_DTLB_WRITE_ACCESS, 61 | PERF_COUNT_TYPE_HW_CACHE_DTLB_WRITE_MISS, 62 | PERF_COUNT_TYPE_HW_CACHE_DTLB_PREFETCH_ACCESS, // not working? 63 | PERF_COUNT_TYPE_HW_CACHE_DTLB_PREFETCH_MISS, // not working? 64 | PERF_COUNT_TYPE_SW_CPU_CLOCK, 65 | PERF_COUNT_TYPE_SW_TASK_CLOCK, 66 | PERF_COUNT_TYPE_SW_PAGE_FAULTS, 67 | PERF_COUNT_TYPE_SW_CONTEXT_SWITCHES, 68 | PERF_COUNT_TYPE_SW_CPU_MIGRATIONS, 69 | PERF_COUNT_TYPE_SW_PAGE_FAULTS_MIN, 70 | PERF_COUNT_TYPE_SW_PAGE_FAULTS_MAJ, 71 | PERF_COUNT_TYPE_SW_ALIGNMENT_FAULTS, 72 | PERF_COUNT_TYPE_SW_EMULATION_FAULTS, 73 | 74 | PERF_COUNT_TYPE_MAX, 75 | 76 | PERF_COUNT_TYPE_INVALID = -1, 77 | }; 78 | 79 | typedef void *perf_count_t; 80 | 81 | // system_wide would require CAP_SYS_ADMIN 82 | perf_count_t perf_count_init(const enum PERF_COUNT_TYPE *perf_count_types, size_t num_events, int system_wide); 83 | void perf_count_free(perf_count_t perf_count); 84 | 85 | void perf_count_start(perf_count_t perf_count); 86 | void perf_count_stop(perf_count_t perf_count); 87 | void perf_count_reset(perf_count_t perf_count); 88 | 89 | uint64_t perf_count_get_by_index(perf_count_t perf_count, size_t index); 90 | uint64_t perf_count_get_by_type(perf_count_t perf_count, enum PERF_COUNT_TYPE type); 91 | 92 | const char *perf_count_name_by_type(enum PERF_COUNT_TYPE type); 93 | enum PERF_COUNT_TYPE perf_count_type_by_name(const char *name); 94 | 95 | #ifdef __cplusplus 96 | } 97 | #endif 98 | 99 | #endif 100 | -------------------------------------------------------------------------------- /src/proto.h: -------------------------------------------------------------------------------- 1 | // Copyright 2014 Carnegie Mellon University 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #pragma once 16 | 17 | #include "common.h" 18 | #include "table.h" 19 | 20 | #include 21 | #include 22 | #include 23 | 24 | // override opaque to measure latency 25 | #define MEHCACHED_ENABLE_THROTTLING 26 | 27 | // override expire time to measure end-to-end latency 28 | // use this only for full-RX latency measurement using core 0; this may lower throughput due to more I/O and processing on core0 and less responsive throttling 29 | //#define MEHCACHED_MEASURE_LATENCY 30 | 31 | // (ETHER_MAX_LEN - ETHER_CRC_LEN - sizeof(struct mehcached_batch_packet)) / (sizeof(struct mehcached_request) + 8 + 8) 32 | #define MEHCACHED_MAX_BATCH_SIZE (36) 33 | 34 | // use software flow director (slower); this does not disable hardware flow director on the server, but the client will send packets to all cores regardless of the concurrency mode 35 | //#define MEHCACHED_USE_SOFT_FDIR 36 | 37 | // collect per-partition load 38 | #define MEHCACHED_COLLECT_PER_PARTITION_LOAD 39 | 40 | struct mehcached_batch_packet 41 | { 42 | // 0 43 | uint8_t header[sizeof(struct ether_hdr) + sizeof(struct ipv4_hdr) + sizeof(struct udp_hdr)]; 44 | // 42 45 | uint8_t num_requests; 46 | uint8_t reserved0; 47 | // 44 48 | uint32_t opaque; 49 | // 48 50 | uint8_t data[0]; 51 | // batch 52 | }; 53 | -------------------------------------------------------------------------------- /src/shm.h: -------------------------------------------------------------------------------- 1 | // Copyright 2014 Carnegie Mellon University 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #pragma once 16 | 17 | #include "common.h" 18 | 19 | MEHCACHED_BEGIN 20 | 21 | size_t 22 | mehcached_shm_adjust_size(size_t size); 23 | 24 | void 25 | mehcached_shm_dump_page_info(); 26 | 27 | void 28 | mehcached_shm_init(size_t page_size, size_t num_numa_nodes, size_t num_pages_to_try, size_t num_pages_to_reserve); 29 | 30 | void * 31 | mehcached_shm_find_free_address(size_t size); 32 | 33 | size_t 34 | mehcached_shm_alloc(size_t length, size_t numa_node); 35 | 36 | bool 37 | mehcached_shm_schedule_remove(size_t entry_id); 38 | 39 | bool 40 | mehcached_shm_map(size_t entry_id, void *ptr, size_t offset, size_t length); 41 | 42 | bool 43 | mehcached_shm_unmap(void *ptr); 44 | 45 | size_t 46 | mehcached_shm_get_page_size(); 47 | 48 | size_t 49 | mehcached_shm_get_memuse(); 50 | 51 | void * 52 | mehcached_shm_malloc_contiguous(size_t size, size_t numa_node); 53 | 54 | void * 55 | mehcached_shm_malloc_contiguous_local(size_t size); 56 | 57 | void 58 | mehcached_shm_free_contiguous(void *ptr); 59 | 60 | void * 61 | mehcached_shm_malloc_striped(size_t size); 62 | 63 | void 64 | mehcached_shm_free_striped(void *ptr); 65 | 66 | MEHCACHED_END 67 | 68 | -------------------------------------------------------------------------------- /src/stopwatch.c: -------------------------------------------------------------------------------- 1 | // Copyright 2014 Carnegie Mellon University 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #include "stopwatch.h" 16 | #include 17 | #include 18 | 19 | static struct timeval mehcached_stopwatch_init_tv; 20 | static uint64_t mehcached_stopwatch_init_s; 21 | 22 | uint64_t mehcached_stopwatch_1_sec = 0UL; 23 | uint64_t mehcached_stopwatch_1_msec = 0UL; 24 | uint64_t mehcached_stopwatch_1_usec = 0UL; 25 | 26 | void 27 | mehcached_stopwatch_init_start() 28 | { 29 | mehcached_stopwatch_init_s = mehcached_stopwatch_now(); 30 | gettimeofday(&mehcached_stopwatch_init_tv, NULL); 31 | } 32 | 33 | void 34 | mehcached_stopwatch_init_end() 35 | { 36 | struct timeval tv_now; 37 | 38 | const uint64_t s_1_sec = 1000000UL; 39 | 40 | while (true) 41 | { 42 | gettimeofday(&tv_now, NULL); 43 | 44 | uint64_t diff = (uint64_t)(tv_now.tv_sec - mehcached_stopwatch_init_tv.tv_sec) * 1000000UL + (uint64_t)(tv_now.tv_usec - mehcached_stopwatch_init_tv.tv_usec); 45 | if (diff >= s_1_sec) 46 | { 47 | uint64_t s = mehcached_stopwatch_now(); 48 | mehcached_stopwatch_1_sec = (s - mehcached_stopwatch_init_s) * s_1_sec / diff; 49 | mehcached_stopwatch_1_msec = mehcached_stopwatch_1_sec / 1000UL; 50 | mehcached_stopwatch_1_usec = mehcached_stopwatch_1_msec / 1000UL; 51 | break; 52 | } 53 | } 54 | } 55 | -------------------------------------------------------------------------------- /src/stopwatch.h: -------------------------------------------------------------------------------- 1 | // Copyright 2014 Carnegie Mellon University 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #pragma once 16 | 17 | #include "common.h" 18 | #include 19 | 20 | extern uint64_t mehcached_stopwatch_1_sec; // this should be used whenever possible for better accuracy than below 21 | extern uint64_t mehcached_stopwatch_1_msec; 22 | extern uint64_t mehcached_stopwatch_1_usec; 23 | 24 | void 25 | mehcached_stopwatch_init_start(); 26 | 27 | void 28 | mehcached_stopwatch_init_end(); 29 | 30 | static 31 | uint64_t 32 | mehcached_stopwatch_now() 33 | { 34 | return rte_rdtsc(); 35 | } 36 | 37 | static 38 | uint64_t 39 | mehcached_stopwatch_diff_in_us(uint64_t new_t, uint64_t old_t) 40 | { 41 | return (new_t - old_t) * 1000000UL / mehcached_stopwatch_1_sec; 42 | } 43 | 44 | static 45 | double 46 | mehcached_stopwatch_diff_in_s(uint64_t new_t, uint64_t old_t) 47 | { 48 | return (double)mehcached_stopwatch_diff_in_us(new_t, old_t) * 0.000001; 49 | } -------------------------------------------------------------------------------- /src/table.h: -------------------------------------------------------------------------------- 1 | // Copyright 2014 Carnegie Mellon University 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #pragma once 16 | 17 | #include "common.h" 18 | #include "alloc_pool.h" 19 | #include "alloc_malloc.h" 20 | #include "alloc_dynamic.h" 21 | 22 | MEHCACHED_BEGIN 23 | 24 | #define MEHCACHED_MAX_KEY_LENGTH (255) 25 | #define MEHCACHED_MAX_VALUE_LENGTH (1048575) 26 | 27 | 28 | #ifndef MEHCACHED_NO_EVICTION 29 | // #define MEHCACHED_ITEMS_PER_BUCKET (7) 30 | #define MEHCACHED_ITEMS_PER_BUCKET (15) 31 | #else 32 | #define MEHCACHED_ITEMS_PER_BUCKET (7) 33 | // #define MEHCACHED_ITEMS_PER_BUCKET (15) 34 | #endif 35 | 36 | // do move-to-head if when (item's distance from tail) >= (pool size) * mth_threshold 37 | // 0.0: full LRU; 1.0: full FIFO 38 | #define MEHCACHED_MTH_THRESHOLD_FIFO (1.0) 39 | #define MEHCACHED_MTH_THRESHOLD_LRU (0.0) 40 | 41 | #define MEHCACHED_SINGLE_ALLOC 42 | 43 | #ifdef MEHCACHED_COLLECT_STATS 44 | #define MEHCACHED_STAT_INC(table, name) do { __sync_add_and_fetch(&(table)->stats.name, 1); } while (0) 45 | #define MEHCACHED_STAT_DEC(table, name) do { __sync_sub_and_fetch(&(table)->stats.name, 1); } while (0) 46 | #else 47 | #define MEHCACHED_STAT_INC(table, name) do { (void)table; } while (0) 48 | #define MEHCACHED_STAT_DEC(table, name) do { (void)table; } while (0) 49 | #endif 50 | 51 | typedef enum _MEHCACHED_RESULT 52 | { 53 | MEHCACHED_OK = 0, 54 | MEHCACHED_ERROR, 55 | MEHCACHED_FULL, 56 | MEHCACHED_EXIST, 57 | MEHCACHED_NOT_FOUND, 58 | MEHCACHED_PARTIAL_VALUE, 59 | MEHCACHED_NOT_PROCESSED, 60 | } MEHCACHED_RESULT; 61 | 62 | struct mehcached_bucket 63 | { 64 | uint32_t version; // XXX: is uint32_t wide enough? 65 | uint32_t next_extra_bucket_index; // 1-base; 0 = no extra bucket 66 | uint64_t item_vec[MEHCACHED_ITEMS_PER_BUCKET]; 67 | 68 | // 16: tag (1-base) 69 | // 8: alloc id 70 | // 40: item offset 71 | // item == 0: empty item 72 | 73 | #define MEHCACHED_TAG_MASK (((uint64_t)1 << 16) - 1) 74 | #define MEHCACHED_TAG(item_vec) ((item_vec) >> 48) 75 | 76 | #ifndef MEHCACHED_SINGLE_ALLOC 77 | #define MEHCACHED_ALLOC_ID_MASK (((uint64_t)1 << 8) - 1) 78 | #define MEHCACHED_ALLOC_ID(item_vec) (((item_vec) >> 40) & MEHCACHED_ALLOC_ID_MASK) 79 | #else 80 | #define MEHCACHED_ALLOC_ID(item_vec) (0LU) 81 | #endif 82 | 83 | #ifndef MEHCACHED_SINGLE_ALLOC 84 | #define MEHCACHED_ITEM_OFFSET_MASK (((uint64_t)1 << 40) - 1) 85 | #else 86 | #define MEHCACHED_ITEM_OFFSET_MASK (((uint64_t)1 << 48) - 1) 87 | #endif 88 | #define MEHCACHED_ITEM_OFFSET(item_vec) ((item_vec) & MEHCACHED_ITEM_OFFSET_MASK) 89 | 90 | #ifndef MEHCACHED_SINGLE_ALLOC 91 | #define MEHCACHED_ITEM_VEC(tag, alloc_id, item_offset) (((uint64_t)(tag) << 48) | ((uint64_t)(alloc_id) << 40) | (uint64_t)(item_offset)) 92 | #else 93 | #define MEHCACHED_ITEM_VEC(tag, alloc_id, item_offset) (((uint64_t)(tag) << 48) | (uint64_t)(item_offset)) 94 | #endif 95 | }; 96 | 97 | struct mehcached_item 98 | { 99 | struct mehcached_alloc_item alloc_item; 100 | 101 | uint32_t kv_length_vec; // key_length: 8, value_length: 24; kv_length_vec == 0: empty item 102 | 103 | #define MEHCACHED_KEY_MASK (((uint32_t)1 << 8) - 1) 104 | #define MEHCACHED_KEY_LENGTH(kv_length_vec) ((kv_length_vec) >> 24) 105 | 106 | #define MEHCACHED_VALUE_MASK (((uint32_t)1 << 24) - 1) 107 | #define MEHCACHED_VALUE_LENGTH(kv_length_vec) ((kv_length_vec) & MEHCACHED_VALUE_MASK) 108 | 109 | #define MEHCACHED_KV_LENGTH_VEC(key_length, value_length) (((uint32_t)(key_length) << 24) | (uint32_t)(value_length)) 110 | 111 | // the rest is meaningful only when kv_length_vec != 0 112 | uint32_t expire_time; 113 | uint64_t key_hash; 114 | uint8_t data[0]; 115 | }; 116 | 117 | #define MEHCACHED_MAX_POOLS (16) 118 | 119 | struct mehcached_table 120 | { 121 | #ifdef MEHCACHED_ALLOC_POOL 122 | struct mehcached_pool alloc[MEHCACHED_MAX_POOLS]; 123 | uint8_t alloc_id_mask; 124 | uint64_t mth_threshold; 125 | #endif 126 | #ifdef MEHCACHED_ALLOC_MALLOC 127 | struct mehcached_malloc alloc; 128 | #endif 129 | #ifdef MEHCACHED_ALLOC_DYNAMIC 130 | struct mehcached_dynamic alloc; 131 | #endif 132 | 133 | struct mehcached_bucket *buckets; 134 | struct mehcached_bucket *extra_buckets; // = (buckets + num_buckets); extra_buckets[0] is not used because index 0 indicates "no more extra bucket" 135 | 136 | uint8_t concurrent_access_mode; 137 | 138 | uint32_t num_buckets; 139 | uint32_t num_buckets_mask; 140 | uint32_t num_extra_buckets; 141 | 142 | struct 143 | { 144 | uint32_t lock; 145 | uint32_t head; // 1-base; 0 = no extra bucket 146 | } extra_bucket_free_list MEHCACHED_ALIGNED(64); 147 | 148 | uint8_t rshift; 149 | 150 | #ifdef MEHCACHED_COLLECT_STATS 151 | struct 152 | { 153 | size_t count; 154 | size_t set_nooverwrite; 155 | size_t set_new; 156 | size_t set_inplace; 157 | size_t set_evicted; 158 | size_t get_found; 159 | size_t get_notfound; 160 | size_t test_found; 161 | size_t test_notfound; 162 | size_t delete_found; 163 | size_t delete_notfound; 164 | size_t cleanup; 165 | size_t move_to_head_performed; 166 | size_t move_to_head_skipped; 167 | size_t move_to_head_failed; 168 | } stats; 169 | #endif 170 | } MEHCACHED_ALIGNED(64); 171 | 172 | struct mehcached_prefetch_state 173 | { 174 | struct mehcached_table *table; 175 | struct mehcached_bucket *bucket; 176 | uint64_t key_hash; 177 | }; 178 | 179 | typedef enum _MEHCACHED_OPERATION 180 | { 181 | MEHCACHED_NOOP_READ = 0, 182 | MEHCACHED_NOOP_WRITE, 183 | MEHCACHED_ADD, 184 | MEHCACHED_SET, 185 | MEHCACHED_GET, 186 | MEHCACHED_TEST, 187 | MEHCACHED_DELETE, 188 | MEHCACHED_INCREMENT, 189 | } MEHCACHED_OPERATION; 190 | 191 | struct mehcached_request 192 | { 193 | // 0 194 | uint8_t operation; // of enum MEHCACHED_OPERATION type 195 | uint8_t result; // of enum MEHCACHED_RESULT type 196 | // 2 197 | uint16_t reserved0; 198 | // 4 199 | uint32_t kv_length_vec; 200 | // 8 201 | uint64_t key_hash; 202 | // 16 203 | uint32_t expire_time; 204 | // 20 205 | uint32_t reserved1; 206 | // 24 207 | }; 208 | 209 | static 210 | void 211 | mehcached_print_bucket(const struct mehcached_bucket *bucket); 212 | 213 | static 214 | void 215 | mehcached_print_buckets(const struct mehcached_table *table); 216 | 217 | static 218 | void 219 | mehcached_print_stats(const struct mehcached_table *table); 220 | 221 | static 222 | void 223 | mehcached_reset_table_stats(struct mehcached_table *table); 224 | 225 | static 226 | uint32_t 227 | mehcached_calc_bucket_index(const struct mehcached_table *table, uint64_t key_hash); 228 | 229 | static 230 | uint16_t 231 | mehcached_calc_tag(uint64_t key_hash); 232 | 233 | static 234 | void 235 | mehcached_set_item(struct mehcached_item *item, uint64_t key_hash, const uint8_t *key, uint32_t key_length, const uint8_t *value, uint32_t value_length, uint32_t expire_time); 236 | 237 | static 238 | void 239 | mehcached_set_item_value(struct mehcached_item *item, const uint8_t *value, uint32_t value_length, uint32_t expire_time); 240 | 241 | static 242 | bool 243 | mehcached_compare_keys(const uint8_t *key1, size_t key1_len, const uint8_t *key2, size_t key2_len); 244 | 245 | static 246 | void 247 | mehcached_cleanup_all(uint8_t current_alloc_id, struct mehcached_table *table); 248 | 249 | static 250 | void 251 | mehcached_prefetch_table(struct mehcached_table *table, uint64_t key_hash, struct mehcached_prefetch_state *out_prefetch_state); 252 | 253 | static 254 | void 255 | mehcached_prefetch_alloc(struct mehcached_prefetch_state *in_out_prefetch_state); 256 | 257 | static 258 | bool 259 | mehcached_get(uint8_t current_alloc_id, struct mehcached_table *table, uint64_t key_hash, const uint8_t *key, size_t key_length, uint8_t *out_value, size_t *in_out_value_length, uint32_t *out_expire_time, bool readonly); 260 | 261 | static 262 | bool 263 | mehcached_test(uint8_t current_alloc_id, struct mehcached_table *table, uint64_t key_hash, const uint8_t *key, size_t key_length); 264 | 265 | static 266 | bool 267 | mehcached_set(uint8_t current_alloc_id, struct mehcached_table *table, uint64_t key_hash, const uint8_t *key, size_t key_length, const uint8_t *value, size_t value_length, uint32_t expire_time, bool overwrite); 268 | 269 | static 270 | bool 271 | mehcached_delete(uint8_t current_alloc_id, struct mehcached_table *table, uint64_t key_hash, const uint8_t *key, size_t key_length); 272 | 273 | static 274 | bool 275 | mehcached_increment(uint8_t current_alloc_id, struct mehcached_table *table, uint64_t key_hash, const uint8_t *key, size_t key_length, uint64_t increment, uint64_t *out_new_value, uint32_t expire_time); 276 | 277 | static 278 | void 279 | mehcached_process_batch(uint8_t current_alloc_id, struct mehcached_table *table, struct mehcached_request *requests, size_t num_requests, const uint8_t *in_data, uint8_t *out_data, size_t *out_data_length, bool readonly); 280 | 281 | static 282 | void 283 | mehcached_table_reset(struct mehcached_table *table); 284 | 285 | static 286 | void 287 | mehcached_table_init(struct mehcached_table *table, size_t num_buckets, size_t num_pools, size_t pool_size, bool concurrent_table_read, bool concurrent_table_write, bool concurrent_alloc_write, size_t table_numa_node, size_t alloc_numa_nodes[], double mth_threshold); 288 | 289 | static 290 | void 291 | mehcached_table_free(struct mehcached_table *table); 292 | 293 | MEHCACHED_END 294 | 295 | -------------------------------------------------------------------------------- /src/test.c: -------------------------------------------------------------------------------- 1 | // Copyright 2014 Carnegie Mellon University 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #include 16 | #include 17 | 18 | #include "mehcached.h" 19 | #include "hash.h" 20 | 21 | void 22 | test_basic() 23 | { 24 | printf("test_basic()\n"); 25 | 26 | struct mehcached_table table_o; 27 | struct mehcached_table *table = &table_o; 28 | size_t numa_nodes[] = {(size_t)-1}; 29 | mehcached_table_init(table, 1, 1, 256, false, false, false, numa_nodes[0], numa_nodes, MEHCACHED_MTH_THRESHOLD_FIFO); 30 | assert(table); 31 | 32 | size_t i; 33 | for (i = 0; i < MEHCACHED_ITEMS_PER_BUCKET; i++) 34 | { 35 | size_t key = i; 36 | size_t value = i; 37 | uint64_t key_hash = hash((const uint8_t *)&key, sizeof(key)); 38 | //printf("add key = %zu, value = %zu, key_hash = %lx\n", key, value, key_hash); 39 | 40 | if (!mehcached_set(0, table, key_hash, (const uint8_t *)&key, sizeof(key), (const uint8_t *)&value, sizeof(value), 0, false)) 41 | assert(false); 42 | } 43 | for (i = 0; i < MEHCACHED_ITEMS_PER_BUCKET; i++) 44 | { 45 | size_t key = i; 46 | size_t value = 100 + i; 47 | uint64_t key_hash = hash((const uint8_t *)&key, sizeof(key)); 48 | //printf("set key = %zu, value = %zu, key_hash = %lx\n", key, value, key_hash); 49 | 50 | if (!mehcached_set(0, table, key_hash, (const uint8_t *)&key, sizeof(key), (const uint8_t *)&value, sizeof(value), 0, true)) 51 | assert(false); 52 | } 53 | 54 | size_t value = 0; 55 | for (i = 0; i < MEHCACHED_ITEMS_PER_BUCKET; i++) 56 | { 57 | size_t key = i; 58 | uint64_t key_hash = hash((const uint8_t *)&key, sizeof(key)); 59 | 60 | size_t value_length = sizeof(value); 61 | if (!mehcached_get(0, table, key_hash, (const uint8_t *)&key, sizeof(key), (uint8_t *)&value, &value_length, NULL, false)) 62 | { 63 | printf("get key = %zu, value = \n", key); 64 | continue; 65 | } 66 | assert(value_length == sizeof(value)); 67 | printf("get key = %zu, value = %zu\n", key, value); 68 | } 69 | 70 | mehcached_print_stats(table); 71 | 72 | mehcached_table_free(table); 73 | } 74 | 75 | int 76 | main(int argc MEHCACHED_UNUSED, const char *argv[] MEHCACHED_UNUSED) 77 | { 78 | const size_t page_size = 1048576 * 2; 79 | const size_t num_numa_nodes = 2; 80 | const size_t num_pages_to_try = 16384; 81 | const size_t num_pages_to_reserve = 16384 - 2048; // give 2048 pages to dpdk 82 | 83 | mehcached_shm_init(page_size, num_numa_nodes, num_pages_to_try, num_pages_to_reserve); 84 | 85 | test_basic(); 86 | 87 | return EXIT_SUCCESS; 88 | } 89 | 90 | -------------------------------------------------------------------------------- /src/util.c: -------------------------------------------------------------------------------- 1 | // Copyright 2014 Carnegie Mellon University 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #include "util.h" 16 | 17 | -------------------------------------------------------------------------------- /src/util.h: -------------------------------------------------------------------------------- 1 | // Copyright 2014 Carnegie Mellon University 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #pragma once 16 | 17 | #include "common.h" 18 | #include "shm.h" 19 | 20 | #include 21 | 22 | #include 23 | #include 24 | 25 | #include 26 | #include 27 | 28 | #ifdef USE_DPDK 29 | #include 30 | #endif 31 | 32 | #define MEHCACHED_ROUNDUP8(x) (((x) + 7UL) & (~7UL)) 33 | #define MEHCACHED_ROUNDUP64(x) (((x) + 63UL) & (~63UL)) 34 | #define MEHCACHED_ROUNDUP4K(x) (((x) + 4095UL) & (~4095UL)) 35 | #define MEHCACHED_ROUNDUP1M(x) (((x) + 1048575UL) & (~1048575UL)) 36 | #define MEHCACHED_ROUNDUP2M(x) (((x) + 2097151UL) & (~2097151UL)) 37 | 38 | MEHCACHED_BEGIN 39 | 40 | static 41 | size_t 42 | mehcached_next_power_of_two(size_t v) 43 | { 44 | size_t s = 0; 45 | while (((size_t)1 << s) < v) 46 | s++; 47 | return (size_t)1 << s; 48 | } 49 | 50 | static 51 | void 52 | memory_barrier() 53 | { 54 | asm volatile("" ::: "memory"); 55 | } 56 | 57 | static 58 | void 59 | mehcached_memcpy8(uint8_t *dest, const uint8_t *src, size_t length) 60 | { 61 | length = MEHCACHED_ROUNDUP8(length); 62 | #ifndef USE_DPDK 63 | switch (length >> 3) 64 | { 65 | case 0: 66 | break; 67 | case 1: 68 | *(uint64_t *)(dest + 0) = *(const uint64_t *)(src + 0); 69 | break; 70 | case 2: 71 | *(uint64_t *)(dest + 0) = *(const uint64_t *)(src + 0); 72 | *(uint64_t *)(dest + 8) = *(const uint64_t *)(src + 8); 73 | break; 74 | case 3: 75 | *(uint64_t *)(dest + 0) = *(const uint64_t *)(src + 0); 76 | *(uint64_t *)(dest + 8) = *(const uint64_t *)(src + 8); 77 | *(uint64_t *)(dest + 16) = *(const uint64_t *)(src + 16); 78 | break; 79 | case 4: 80 | *(uint64_t *)(dest + 0) = *(const uint64_t *)(src + 0); 81 | *(uint64_t *)(dest + 8) = *(const uint64_t *)(src + 8); 82 | *(uint64_t *)(dest + 16) = *(const uint64_t *)(src + 16); 83 | *(uint64_t *)(dest + 24) = *(const uint64_t *)(src + 24); 84 | break; 85 | default: 86 | memcpy(dest, src, length); 87 | break; 88 | } 89 | #else 90 | rte_memcpy(dest, src, length); 91 | #endif 92 | } 93 | 94 | static 95 | bool 96 | mehcached_memcmp8(const uint8_t *dest, const uint8_t *src, size_t length) 97 | { 98 | length = MEHCACHED_ROUNDUP8(length); 99 | switch (length >> 3) 100 | { 101 | case 0: 102 | return true; 103 | case 1: 104 | if (*(const uint64_t *)(dest + 0) != *(const uint64_t *)(src + 0)) 105 | return false; 106 | return true; 107 | case 2: 108 | if (*(const uint64_t *)(dest + 0) != *(const uint64_t *)(src + 0)) 109 | return false; 110 | if (*(const uint64_t *)(dest + 8) != *(const uint64_t *)(src + 8)) 111 | return false; 112 | return true; 113 | case 3: 114 | if (*(const uint64_t *)(dest + 0) != *(const uint64_t *)(src + 0)) 115 | return false; 116 | if (*(const uint64_t *)(dest + 8) != *(const uint64_t *)(src + 8)) 117 | return false; 118 | if (*(const uint64_t *)(dest + 16) != *(const uint64_t *)(src + 16)) 119 | return false; 120 | return true; 121 | case 4: 122 | if (*(const uint64_t *)(dest + 0) != *(const uint64_t *)(src + 0)) 123 | return false; 124 | if (*(const uint64_t *)(dest + 8) != *(const uint64_t *)(src + 8)) 125 | return false; 126 | if (*(const uint64_t *)(dest + 16) != *(const uint64_t *)(src + 16)) 127 | return false; 128 | if (*(const uint64_t *)(dest + 24) != *(const uint64_t *)(src + 24)) 129 | return false; 130 | return true; 131 | default: 132 | return memcmp(dest, src, length) == 0; 133 | } 134 | } 135 | 136 | static 137 | uint32_t 138 | mehcached_rand(uint64_t *state) 139 | { 140 | // same as Java's 141 | *state = (*state * 0x5deece66dUL + 0xbUL) & ((1UL << 48) - 1); 142 | return (uint32_t)(*state >> (48 - 32)); 143 | } 144 | 145 | static 146 | double 147 | mehcached_rand_d(uint64_t *state) 148 | { 149 | // caution: this is maybe too non-random 150 | *state = (*state * 0x5deece66dUL + 0xbUL) & ((1UL << 48) - 1); 151 | return (double)*state / (double)((1UL << 48) - 1); 152 | } 153 | 154 | static 155 | size_t 156 | mehcached_get_memuse() 157 | { 158 | struct rusage usage; 159 | int ret = getrusage(RUSAGE_SELF, &usage); 160 | assert(ret == 0); 161 | (void)ret; 162 | return (size_t)usage.ru_maxrss * 1024 + mehcached_shm_get_memuse(); 163 | } 164 | 165 | #include 166 | #include 167 | #include 168 | #include 169 | #include 170 | #include 171 | #include 172 | 173 | // use this for EAL-related memory allocation only (use mehcached_shm_malloc* instead for other cases) 174 | struct mehcached_eal_malloc_arg 175 | { 176 | size_t size; 177 | void *ret; 178 | }; 179 | 180 | static 181 | int 182 | mehcached_eal_malloc_lcore_internal(void *arg) 183 | { 184 | struct mehcached_eal_malloc_arg *malloc_arg = (struct mehcached_eal_malloc_arg *)arg; 185 | malloc_arg->ret = rte_malloc(NULL, malloc_arg->size, 0); 186 | return 0; 187 | } 188 | 189 | static 190 | void * 191 | mehcached_eal_malloc_lcore(size_t size, size_t lcore) 192 | { 193 | struct mehcached_eal_malloc_arg malloc_arg; 194 | malloc_arg.size = size; 195 | if (lcore == rte_lcore_id()) 196 | mehcached_eal_malloc_lcore_internal(&malloc_arg); 197 | else 198 | { 199 | assert(rte_lcore_id() == rte_get_master_lcore()); 200 | rte_eal_remote_launch(mehcached_eal_malloc_lcore_internal, &malloc_arg, (unsigned int)lcore); 201 | rte_eal_mp_wait_lcore(); 202 | } 203 | return malloc_arg.ret; 204 | } 205 | 206 | static 207 | void 208 | rte_eal_launch(lcore_function_t *f, void *arg, unsigned int core_id) 209 | { 210 | if (core_id == rte_lcore_id()) 211 | f(arg); 212 | else 213 | rte_eal_remote_launch(f, arg, core_id); 214 | } 215 | 216 | MEHCACHED_END 217 | 218 | -------------------------------------------------------------------------------- /src/zipf.h: -------------------------------------------------------------------------------- 1 | // Copyright 2014 Carnegie Mellon University 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #pragma once 16 | 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include "util.h" 22 | 23 | struct zipf_gen_state 24 | { 25 | uint64_t n; // number of items (input) 26 | double theta; // skewness (input) in (0, 1); or, 0 = uniform, 1 = always zero 27 | double alpha; // only depends on theta 28 | double thres; // only depends on theta 29 | uint64_t last_n; // last n used to calculate the following 30 | double dbl_n; 31 | double zetan; 32 | double eta; 33 | // unsigned short rand_state[3]; // prng state 34 | uint64_t rand_state; 35 | }; 36 | 37 | static 38 | double 39 | mehcached_pow_approx(double a, double b) 40 | { 41 | // from http://martin.ankerl.com/2012/01/25/optimized-approximative-pow-in-c-and-cpp/ 42 | 43 | // calculate approximation with fraction of the exponent 44 | int e = (int)b; 45 | union 46 | { 47 | double d; 48 | int x[2]; 49 | } u = { a }; 50 | u.x[1] = (int)((b - (double)e) * (double)(u.x[1] - 1072632447) + 1072632447.); 51 | u.x[0] = 0; 52 | 53 | // exponentiation by squaring with the exponent's integer part 54 | // double r = u.d makes everything much slower, not sure why 55 | // TODO: use popcount? 56 | double r = 1.; 57 | while (e) 58 | { 59 | if (e & 1) 60 | r *= a; 61 | a *= a; 62 | e >>= 1; 63 | } 64 | 65 | return r * u.d; 66 | } 67 | 68 | static 69 | void 70 | mehcached_zipf_init(struct zipf_gen_state *state, uint64_t n, double theta, uint64_t rand_seed) 71 | { 72 | assert(n > 0); 73 | if (theta > 0.992 && theta < 1) 74 | fprintf(stderr, "theta > 0.992 will be inaccurate due to approximation\n"); 75 | if (theta >= 1. && theta < 40.) 76 | { 77 | fprintf(stderr, "theta in [1., 40.) is not supported\n"); 78 | assert(false); 79 | } 80 | assert(theta == -1. || (theta >= 0. && theta < 1.) || theta >= 40.); 81 | assert(rand_seed < (1UL << 48)); 82 | memset(state, 0, sizeof(struct zipf_gen_state)); 83 | state->n = n; 84 | state->theta = theta; 85 | if (theta == -1.) 86 | rand_seed = rand_seed % n; 87 | else if (theta > 0. && theta < 1.) 88 | { 89 | state->alpha = 1. / (1. - theta); 90 | state->thres = 1. + mehcached_pow_approx(0.5, theta); 91 | } 92 | else 93 | { 94 | state->alpha = 0.; // unused 95 | state->thres = 0.; // unused 96 | } 97 | state->last_n = 0; 98 | state->zetan = 0.; 99 | // state->rand_state[0] = (unsigned short)(rand_seed >> 0); 100 | // state->rand_state[1] = (unsigned short)(rand_seed >> 16); 101 | // state->rand_state[2] = (unsigned short)(rand_seed >> 32); 102 | state->rand_state = rand_seed; 103 | } 104 | 105 | static 106 | void 107 | mehcached_zipf_init_copy(struct zipf_gen_state *state, const struct zipf_gen_state *src_state, uint64_t rand_seed) 108 | { 109 | assert(rand_seed < (1UL << 48)); 110 | memcpy(state, src_state, sizeof(struct zipf_gen_state)); 111 | // state->rand_state[0] = (unsigned short)(rand_seed >> 0); 112 | // state->rand_state[1] = (unsigned short)(rand_seed >> 16); 113 | // state->rand_state[2] = (unsigned short)(rand_seed >> 32); 114 | state->rand_state = rand_seed; 115 | } 116 | 117 | static 118 | void 119 | mehcached_zipf_change_n(struct zipf_gen_state *state, uint64_t n) 120 | { 121 | state->n = n; 122 | } 123 | 124 | static 125 | double 126 | mehcached_zeta(uint64_t last_n, double last_sum, uint64_t n, double theta) 127 | { 128 | if (last_n > n) 129 | { 130 | last_n = 0; 131 | last_sum = 0.; 132 | } 133 | while (last_n < n) 134 | { 135 | last_sum += 1. / mehcached_pow_approx((double)last_n + 1., theta); 136 | last_n++; 137 | } 138 | return last_sum; 139 | } 140 | 141 | static 142 | uint64_t 143 | mehcached_zipf_next(struct zipf_gen_state *state) 144 | { 145 | if (state->last_n != state->n) 146 | { 147 | if (state->theta > 0. && state->theta < 1.) 148 | { 149 | state->zetan = mehcached_zeta(state->last_n, state->zetan, state->n, state->theta); 150 | state->eta = (1. - mehcached_pow_approx(2. / (double)state->n, 1. - state->theta)) / 151 | (1. - mehcached_zeta(0, 0., 2, state->theta) / state->zetan); 152 | } 153 | state->last_n = state->n; 154 | state->dbl_n = (double)state->n; 155 | } 156 | 157 | if (state->theta == -1.) 158 | { 159 | uint64_t v = state->rand_state; 160 | if (++state->rand_state >= state->n) 161 | state->rand_state = 0; 162 | return v; 163 | } 164 | else if (state->theta == 0.) 165 | { 166 | double u = mehcached_rand_d(&state->rand_state); 167 | return (uint64_t)(state->dbl_n * u); 168 | } 169 | else if (state->theta >= 40.) 170 | { 171 | return 0UL; 172 | } 173 | else 174 | { 175 | // from J. Gray et al. Quickly generating billion-record synthetic databases. In SIGMOD, 1994. 176 | 177 | // double u = erand48(state->rand_state); 178 | double u = mehcached_rand_d(&state->rand_state); 179 | double uz = u * state->zetan; 180 | if (uz < 1.) 181 | return 0UL; 182 | else if (uz < state->thres) 183 | return 1UL; 184 | else 185 | return (uint64_t)(state->dbl_n * mehcached_pow_approx(state->eta * (u - 1.) + 1., state->alpha)); 186 | } 187 | } 188 | 189 | static 190 | void 191 | mehcached_test_zipf(double theta) 192 | { 193 | double zetan = 0.; 194 | const uint64_t n = 1000000UL; 195 | uint64_t i; 196 | 197 | for (i = 0; i < n; i++) 198 | zetan += 1. / pow((double)i + 1., theta); 199 | 200 | struct zipf_gen_state state; 201 | if (theta < 1. || theta >= 40.) 202 | mehcached_zipf_init(&state, n, theta, 0); 203 | 204 | uint64_t num_key0 = 0; 205 | const uint64_t num_samples = 10000000UL; 206 | if (theta < 1. || theta >= 40.) 207 | { 208 | for (i = 0; i < num_samples; i++) 209 | if (mehcached_zipf_next(&state) == 0) 210 | num_key0++; 211 | } 212 | 213 | printf("theta = %lf; using pow(): %.10lf", theta, 1. / zetan); 214 | if (theta < 1. || theta >= 40.) 215 | printf(", using approx-pow(): %.10lf", (double)num_key0 / (double)num_samples); 216 | printf("\n"); 217 | } 218 | --------------------------------------------------------------------------------