├── .gitignore
├── CMakeLists.txt
├── README.md
├── build
    ├── gen_confs.py
    └── run_analysis_for_conf.py
├── configure_all.sh
├── configure_client.sh
├── configure_server.sh
├── scripts
    ├── setup_dkdp_env.sh
    └── unbind.sh
└── src
    ├── CMakeLists.txt
    ├── alloc.h
    ├── alloc_dynamic.c
    ├── alloc_dynamic.h
    ├── alloc_malloc.c
    ├── alloc_malloc.h
    ├── alloc_pool.c
    ├── alloc_pool.h
    ├── basic_types.h
    ├── city.c
    ├── city.h
    ├── citycrc.h
    ├── common.h
    ├── config.h
    ├── hash.c
    ├── hash.h
    ├── load.c
    ├── mehcached.h
    ├── microbench.c
    ├── net_common.c
    ├── net_common.h
    ├── netbench_analysis.c
    ├── netbench_client.c
    ├── netbench_config.c
    ├── netbench_config.h
    ├── netbench_hot_item_hash.h
    ├── netbench_server.c
    ├── perf_count
        ├── CMakeLists.txt
        ├── perf_count.c
        └── perf_count.h
    ├── proto.h
    ├── shm.c
    ├── shm.h
    ├── stopwatch.c
    ├── stopwatch.h
    ├── table.c
    ├── table.h
    ├── test.c
    ├── util.c
    ├── util.h
    └── zipf.h


/.gitignore:
--------------------------------------------------------------------------------
 1 | build/CMakeCache.txt
 2 | build/CMakeFiles
 3 | build/src
 4 | Makefile
 5 | cmake_install.cmake
 6 | 
 7 | test
 8 | load
 9 | microbench
10 | microbench_store
11 | netbench_client
12 | netbench_server
13 | netbench_server_store
14 | netbench_client_latency
15 | netbench_server_latency
16 | netbench_client_soft_fdir
17 | netbench_server_soft_fdir
18 | netbench_analysis
19 | 
20 | perf.data
21 | perf.data.old
22 | 


--------------------------------------------------------------------------------
/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | # Copyright 2014 Carnegie Mellon University
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | cmake_minimum_required(VERSION 2.6)
16 | 
17 | if(CMAKE_CURRENT_SOURCE_DIR STREQUAL CMAKE_CURRENT_BINARY_DIR)
18 | 	message(FATAL_ERROR "Use out-of-source build only!")
19 | endif(CMAKE_CURRENT_SOURCE_DIR STREQUAL CMAKE_CURRENT_BINARY_DIR)
20 | 
21 | add_subdirectory(src)
22 | 
23 | add_custom_target(
24 |     symbolic_links
25 |     ALL
26 |     ln -sf src/test src/load src/microbench src/microbench_store src/netbench_client src/netbench_server src/netbench_client_latency src/netbench_client_soft_fdir src/netbench_server_latency src/netbench_server_soft_fdir src/netbench_server_store src/netbench_analysis .
27 | )
28 | 
29 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | MICA
  2 | ====
  3 | 
  4 | A fast in-memory key-value store.
  5 | 
  6 | 
  7 | Hardware Requirements
  8 | ---------------------
  9 | 
 10 |  * Dual CPU system
 11 |  * Intel 10 GbE NICs
 12 |  * Note: The current codebase has several assumptions on the hardware configuration of the server and clients.
 13 |          It runs ideally on a dual octa-core server with 4 dual-port 10 GbE NICs, and clients with 2 dual-port 10 GbE NICs.
 14 | 
 15 | 
 16 | Software Requrements
 17 | --------------------
 18 | 
 19 |  * linux x86_64 >= 3.2.0
 20 |  * gcc >= 4.6.0
 21 |  * Python >= 2.7.0
 22 |  * Intel DPDK >= 1.5.0
 23 |  * bash >= 4.0.0
 24 |  * cmake >= 2.6.0
 25 |  * Hugepage (2 GiB) support
 26 | 
 27 | 
 28 | Executables
 29 | -----------
 30 | 
 31 |  * build/netbench_server: MICA server in cache mode (use with netbench_client)
 32 |  * build/netbench_server_store: MICA server in store mode (use with netbench_client)
 33 |  * build/netbench_server_latency: MICA server in cache mode modified for end-to-end latency measurement (use with netbench_client_latency)
 34 |  * build/netbench_server_soft_fdir: MICA server in cache mode using software-based request direction (use with netbench_client_soft_fdir)
 35 |  * build/netbench_client*: MICA clients
 36 |  * build/netbench_analysis: workload analyzer (used for generating preset configurations)
 37 |  * build/microbench: a local microbenchmark for MICA in cache mode
 38 |  * build/microbench_store: a local microbenchmark for MICA in store mode
 39 |  * build/test: a simple feature test program
 40 |  * build/load: a load factor experiment
 41 | 
 42 | 
 43 | Compiling Executables
 44 | ---------------------
 45 | 
 46 | 	# unpack DPDK as "DPDK" to the directory containing mica
 47 | 	$ cd mica/build
 48 | 	$ ../scripts/setup_dkdp_env.sh	# this uses sudo
 49 | 	$ ../configure_all.sh
 50 | 	$ make
 51 | 
 52 | 
 53 | Generating Configuration Files
 54 | ------------------------------
 55 | 
 56 | 	# conf_* files determine how MICA uses system resources. build/gen_confs.py generates a preset of configuration files for a 16-core server and 12-core clients
 57 | 	# in mica
 58 | 	$ ./run_analysis_for_conf.py	# this uses sudo
 59 | 	$ ./gen_confs.py
 60 | 
 61 | 
 62 | Running a Server
 63 | ----------------
 64 | 
 65 | 	# in mica/build
 66 | 	$ sudo ./netbench_server conf_machines_DATASET_CMODE_0.5 server 0 0 conf_prepopulation_empty
 67 | 	# DATASET=0,1,2 (used to determine how much memory to allocate); CMODE=EREW,CREW,CRCWS (specifies the data access mode)
 68 | 
 69 | 
 70 | Running a Client (e.g., client0)
 71 | --------------------------------
 72 | 
 73 | 	# in mica/build
 74 | 	$ sudo ./netbench_client conf_machines_DATASET_CMODE_0.5 client0 0 0 conf_workload_DATASET_SKEW_GET_PUT_0.00_1
 75 | 	# DATASET=0,1,2 (specifies the dataset to use); SKEW=uniform,skewed,single (specifies the workload skew); GET/PUT=0.00,0.50,0.95,1.00 (specifies the read/write ratio)
 76 | 
 77 | 
 78 | Running a Local Microbenchmark
 79 | ------------------------------
 80 | 
 81 | 	# in mica/build
 82 | 	$ sudo ./microbench CMODE SKEWNESS 0.5
 83 | 	# CMODE=EREW,CREW,CRCWS (specifies the data acces mode); SKEWNESS=0(uniform),0.99(skewed),99(single) (specifies the workload skew)
 84 | 
 85 | 
 86 | License
 87 | -------
 88 | 
 89 | 	Copyright 2014 Carnegie Mellon University
 90 | 
 91 | 	Licensed under the Apache License, Version 2.0 (the "License");
 92 | 	you may not use this file except in compliance with the License.
 93 | 	You may obtain a copy of the License at
 94 | 
 95 | 	    http://www.apache.org/licenses/LICENSE-2.0
 96 | 
 97 | 	Unless required by applicable law or agreed to in writing, software
 98 | 	distributed under the License is distributed on an "AS IS" BASIS,
 99 | 	WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
100 | 	See the License for the specific language governing permissions and
101 | 	limitations under the License.
102 | 
103 | 


--------------------------------------------------------------------------------
/build/gen_confs.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | 
  3 | class ServerConf:
  4 |     def __init__(self, server_name):
  5 |         self.server_name = server_name
  6 |         self.ports = []
  7 |         self.threads = []
  8 |         self.partitions = []
  9 |         self.hot_items = []
 10 | 
 11 |     def add_port(self, mac_addr, ip_addr):
 12 |         self.ports.append((mac_addr, ip_addr))
 13 | 
 14 |     def add_thread(self, port_ids):
 15 |         self.threads.append(port_ids)
 16 | 
 17 |     def add_partition(self, num_items, alloc_size, concurrent_table_read, concurrent_table_write, concurrent_alloc_write, thread_id, mth_threshold):
 18 |         self.partitions.append((num_items, alloc_size, concurrent_table_read, concurrent_table_write, concurrent_alloc_write, thread_id, mth_threshold))
 19 | 
 20 |     def add_hot_item(self, key_hash, thread_id):
 21 |         self.hot_items.append((key_hash, thread_id))
 22 | 
 23 |     def write(self, f):
 24 |         f.write('server,%s\n' % self.server_name)
 25 |         for port in self.ports:
 26 |             f.write('server_port,%s,%s\n' % port)
 27 |         for thread in self.threads:
 28 |             f.write('server_thread,%s\n' % ' '.join([str(port_id) for port_id in thread]))
 29 |         for partition in self.partitions:
 30 |             f.write('server_partition,%s,%s,%s,%s,%s,%s,%s\n' % partition)
 31 |         for hot_item in self.hot_items:
 32 |             f.write('server_hot_item,%016x,%s\n' % hot_item)
 33 |         f.write('\n')
 34 | 
 35 | class ClientConf:
 36 |     def __init__(self, client_name):
 37 |         self.client_name = client_name
 38 |         self.ports = []
 39 |         self.threads = []
 40 | 
 41 |     def add_port(self, mac_addr, ip_addr):
 42 |         self.ports.append((mac_addr, ip_addr))
 43 | 
 44 |     def add_thread(self):
 45 |         self.threads.append(None)
 46 | 
 47 |     def write(self, f):
 48 |         f.write('client,%s\n' % self.client_name)
 49 |         for thread in self.threads:
 50 |             f.write('client_thread,\n')
 51 |         for port in self.ports:
 52 |             f.write('client_port,%s,%s\n' % port)
 53 |         f.write('\n')
 54 | 
 55 | class PrePopulationConf:
 56 |     def __init__(self, server_name):
 57 |         self.server_name = server_name
 58 |         self.dataset = None
 59 | 
 60 |     def set(self, num_items, key_length, value_length):
 61 |         self.dataset = (num_items, key_length, value_length)
 62 | 
 63 |     def write(self, f):
 64 |         f.write('prepopulation,%s\n' % self.server_name)
 65 |         f.write('dataset,%s,%s,%s\n' % self.dataset)
 66 |         f.write('\n')
 67 | 
 68 | class WorkloadConf:
 69 |     def __init__(self, client_name):
 70 |         self.client_name = client_name
 71 |         self.threads = []
 72 | 
 73 |     def add_thread(self, port_ids, server_name, partition_mode, num_items, key_length, value_length, zipf_theta, get_ratio, put_ratio, increment_ratio, batch_size, num_operations, duration):
 74 |         assert abs(get_ratio) + abs(put_ratio) + abs(increment_ratio) == 1.
 75 |         self.threads.append((port_ids, server_name, partition_mode, num_items, key_length, value_length, zipf_theta, get_ratio, put_ratio, increment_ratio, batch_size, num_operations, duration))
 76 | 
 77 |     def write(self, f):
 78 |         f.write('workload,%s\n' % self.client_name)
 79 |         for thread in self.threads:
 80 |             f.write('workload_thread,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s\n' % (
 81 |                 ' '.join([str(port_id) for port_id in thread[0]]),
 82 |                 thread[1],
 83 |                 thread[2],
 84 |                 thread[3],
 85 |                 thread[4],
 86 |                 thread[5],
 87 |                 thread[6],
 88 |                 thread[7],
 89 |                 thread[8],
 90 |                 thread[9],
 91 |                 thread[10],
 92 |                 thread[11],
 93 |                 thread[12]
 94 |                 ))
 95 |         f.write('\n')
 96 | 
 97 | def init_addr():
 98 |     global _last_addr_id
 99 |     _last_addr_id = 0
100 | 
101 | def next_addr():
102 |     global _last_addr_id
103 | 
104 |     addr_id = _last_addr_id
105 |     _last_addr_id += 1
106 | 
107 |     mac_addr = '80:00:00:00:00:{:02}'.format(addr_id)
108 |     ip_addr = '10.0.0.{}'.format(addr_id)
109 |     return mac_addr, ip_addr
110 | 
111 | 
112 | class ConcurrencyModel:
113 |     def concurrent_table_read(self, partition_id): pass
114 |     def concurrent_table_write(self, partition_id): pass
115 |     def concurrent_alloc_write(self, partition_id): pass
116 |     def thread_id(self, partition_id): pass
117 |     def hot_items(self): pass
118 | 
119 | class EREW(ConcurrencyModel):
120 |     name = 'EREW'
121 |     def concurrent_table_read(self, partition_id): return 0
122 |     def concurrent_table_write(self, partition_id): return 0
123 |     def concurrent_alloc_write(self, partition_id): return 0
124 |     def thread_id(self, partition_id): return partition_id % 16
125 |     def hot_items(self): return []
126 | 
127 | class CREW(EREW):
128 |     name = 'CREW'
129 |     def concurrent_table_read(self, partition_id): return 1
130 | 
131 | class CRCW(EREW):
132 |     name = 'CRCW'
133 |     def concurrent_table_read(self, partition_id): return 1
134 |     def concurrent_table_write(self, partition_id): return 1
135 | 
136 | class CRCWS(EREW):
137 |     name = 'CRCWS'
138 |     def concurrent_table_read(self, partition_id): return 1
139 |     def concurrent_table_write(self, partition_id): return 1
140 |     def concurrent_alloc_write(self, partition_id): return 1
141 | 
142 | class CREW0(CREW):
143 |     name = 'CREW0'
144 |     def thread_id(self, partition_id): return 0     # all writes go to core 0
145 | 
146 | # use this for EREW partitions, CREW hot items
147 | #class LB(EREW):
148 | # use this for CREW partitions and hot items (uncomment MEHCACHED_LOAD_BALANCE_USE_CREW_PARTITION in netbench_analysis.c)
149 | class LB(CREW):
150 |     def __init__(self, num_hot_items, zipf, get_ratio):
151 |         self.name = 'LB-%d-%s-%.2f' % (num_hot_items, zipf[0], get_ratio)
152 |         self.thread_id_list = None
153 |         self.hot_item_list = None
154 | 
155 |         f = open('analysis_%d_%s_%.2f' % (num_hot_items, zipf[0], get_ratio))
156 |         lines = list(f.readlines())
157 |         i = 0
158 |         while i < len(lines):
159 |             line = lines[i]
160 |             if line.strip() == 'partition_to_thread:':
161 |                 self.thread_id_list = eval('[' + lines[i + 1].strip() + ']')
162 |             elif line.strip() == 'hot_item_to_thread:':
163 |                 self.hot_item_list = eval('[' + lines[i + 1].strip() + ']')
164 |             i += 1
165 |         assert self.thread_id_list != None
166 |         assert self.hot_item_list != None
167 | 
168 |     def thread_id(self, partition_id): return self.thread_id_list[partition_id]
169 |     def hot_items(self): return self.hot_item_list
170 | 
171 | 
172 | def main():
173 |     datasets = [
174 |             (8, 8, 192 * 1048576),
175 |             (16, 64, 128 * 1048576),
176 |             (128, 1024, 8 * 1048576),
177 |         ]
178 | 
179 |     f = open('conf_prepopulation_empty', 'w')
180 |     p = PrePopulationConf('server')
181 |     p.set(0, 8, 8)
182 |     p.write(f)
183 | 
184 |     for dataset, (key_length, value_length, num_items) in enumerate(datasets):
185 |         assert key_length >= len('%x' % (num_items - 1))    # for hexadecimal key
186 |         #num_partitions = 64
187 |         num_partitions = 16
188 |         # the following should be the same as in run_analysis_for_conf.py
189 |         # isolated_server_numa_nodes = True
190 |         isolated_server_numa_nodes = False
191 | 
192 |         # the followings are always 0 to allow exp.py to control duration
193 |         load_duration = 0.
194 |         trans_duration = 0.
195 | 
196 |         concurrency_list = [EREW(), CREW(), CRCW(), CRCWS(), CREW0()]
197 |         for num_hot_items in (0, 32):
198 |             for zipf in (('uniform', 0.), ('skewed', 0.99), ('single', 99.)):
199 |                 for get_ratio in (0., 0.1, 0.25, 0.5, 0.75, 0.9, 0.95, 0.99, 1.):
200 |                     concurrency_list.append(LB(num_hot_items, zipf, get_ratio))
201 | 
202 |         mth_threshold_list = (1.0, 0.5, 0.0)
203 | 
204 |         for concurrency in concurrency_list:
205 |             for mth_threshold in mth_threshold_list:
206 |                 init_addr()
207 | 
208 |                 f = open('conf_machines_%s_%s_%s' % (dataset, concurrency.name, mth_threshold), 'w')
209 | 
210 |                 s = ServerConf('server')
211 |                 for port_id in range(8):
212 |                     s.add_port(*next_addr())
213 |                 for thread_id in range(0, 16, 2):
214 |                     s.add_thread(list(range(0, 4)))
215 |                     s.add_thread(list(range(4, 8)))
216 |                 for partition_id in range(num_partitions):
217 |                     num_items_per_partition = num_items / num_partitions
218 |                     alloc_size_per_partition = num_items * (key_length + value_length) / num_partitions
219 | 
220 |                     concurrent_table_read = concurrency.concurrent_table_read(partition_id)
221 |                     concurrent_table_write = concurrency.concurrent_table_write(partition_id)
222 |                     concurrent_alloc_write = concurrency.concurrent_alloc_write(partition_id)
223 |                     thread_id = concurrency.thread_id(partition_id)
224 |                     s.add_partition(num_items_per_partition, alloc_size_per_partition, concurrent_table_read, concurrent_table_write, concurrent_alloc_write, thread_id, mth_threshold)
225 |                 for hot_item in concurrency.hot_items():
226 |                     s.add_hot_item(*hot_item)
227 |                 s.write(f)
228 | 
229 |                 c0 = ClientConf('client0')
230 |                 for port in range(4):
231 |                     c0.add_port(*next_addr())
232 |                 for thread_id in range(12):
233 |                     c0.add_thread()
234 |                 c0.write(f)
235 | 
236 |                 c1 = ClientConf('client1')
237 |                 for port in range(4):
238 |                     c1.add_port(*next_addr())
239 |                 for thread_id in range(12):
240 |                     c1.add_thread()
241 |                 c1.write(f)
242 | 
243 |         f = open('conf_prepopulation_%s' % dataset, 'w')
244 |         p = PrePopulationConf('server')
245 |         p.set(num_items, key_length, value_length)
246 |         p.write(f)
247 | 
248 |         for zipf in (('uniform', 0.), ('skewed', 0.99), ('single', 99.)):
249 |             # load operations
250 |             f = open('conf_workload_%s_load_%s' % (dataset, zipf[0]), 'w')
251 |             if zipf[1] == 0.:
252 |                 # use sequential uniform instead for fast ingest
253 |                 zipf_theta = -1.0
254 |             else:
255 |                 # other skewed distributions usually allow fast ingest
256 |                 zipf_theta = zipf[1]
257 |             get_ratio = 0.
258 |             put_ratio = 1. - get_ratio
259 |             increment_ratio = 0.
260 |             load_batch_size = 1
261 |             num_operations = 0
262 |             duration = load_duration
263 |             w = WorkloadConf(c0.client_name)
264 |             for thread_id in range(12):
265 |                 if isolated_server_numa_nodes:
266 |                     w.add_thread(list(range(4)), s.server_name, 0, num_items, key_length, value_length, zipf_theta, get_ratio, put_ratio, increment_ratio, load_batch_size, num_operations, duration)
267 |                 else:
268 |                     w.add_thread(list(range(4)), s.server_name, -1, num_items, key_length, value_length, zipf_theta, get_ratio, put_ratio, increment_ratio, load_batch_size, num_operations, duration)
269 |             w.write(f)
270 |             w = WorkloadConf(c1.client_name)
271 |             for thread_id in range(12):
272 |                 if isolated_server_numa_nodes:
273 |                     w.add_thread(list(range(4)), s.server_name, 1, num_items, key_length, value_length, zipf_theta, get_ratio, put_ratio, increment_ratio, load_batch_size, num_operations, duration)
274 |                 else:
275 |                     w.add_thread(list(range(4)), s.server_name, -1, num_items, key_length, value_length, zipf_theta, get_ratio, put_ratio, increment_ratio, load_batch_size, num_operations, duration)
276 |             w.write(f)
277 | 
278 |             # trans operations
279 |             zipf_theta = zipf[1]
280 |             for get_ratio, put_ratio, increment_ratio in (
281 |                     (0., 1., 0.),
282 |                     (0.1, 0.9, 0.),
283 |                     (0.25, 0.75, 0.),
284 |                     (0.5, 0.5, 0.),
285 |                     (0.75, 0.25, 0.),
286 |                     (0.9, 0.1, 0.),
287 |                     (0.95, 0.05, 0.),
288 |                     (0.99, 0.01, 0.),
289 |                     (1., 0., 0.),
290 |                     (0., -1., 0.),
291 |                     (-0.1, -0.9, 0.),
292 |                     (-0.25, -0.75, 0.),
293 |                     (-0.5, -0.5, 0.),
294 |                     (-0.75, -0.25, 0.),
295 |                     (-0.9, -0.1, 0.),
296 |                     (-0.95, -0.05, 0.),
297 |                     (-0.99, -0.01, 0.),
298 |                     (-1., 0., 0.),
299 |                     (0., 0., 1.),
300 |                 ):
301 |                 for batch_size in (1, 2, 4, 8, 16, 32):
302 |                     f = open('conf_workload_%s_%s_%.2f_%.2f_%.2f_%s' % (dataset, zipf[0], get_ratio, put_ratio, increment_ratio, batch_size), 'w')
303 |                     num_operations = 0
304 |                     duration = trans_duration
305 |                     w = WorkloadConf(c0.client_name)
306 |                     for thread_id in range(12):
307 |                         if isolated_server_numa_nodes:
308 |                             w.add_thread(list(range(4)), s.server_name, 0, num_items, key_length, value_length, zipf_theta, get_ratio, put_ratio, increment_ratio, batch_size, num_operations, duration)
309 |                         else:
310 |                             w.add_thread(list(range(4)), s.server_name, -1, num_items, key_length, value_length, zipf_theta, get_ratio, put_ratio, increment_ratio, batch_size, num_operations, duration)
311 |                     w.write(f)
312 |                     w = WorkloadConf(c1.client_name)
313 |                     for thread_id in range(12):
314 |                         if isolated_server_numa_nodes:
315 |                             w.add_thread(list(range(4)), s.server_name, 1, num_items, key_length, value_length, zipf_theta, get_ratio, put_ratio, increment_ratio, batch_size, num_operations, duration)
316 |                         else:
317 |                             w.add_thread(list(range(4)), s.server_name, -1, num_items, key_length, value_length, zipf_theta, get_ratio, put_ratio, increment_ratio, batch_size, num_operations, duration)
318 |                     w.write(f)
319 | 
320 | 
321 | if __name__ == '__main__':
322 |     main()
323 | 


--------------------------------------------------------------------------------
/build/run_analysis_for_conf.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | 
 3 | import os
 4 | 
 5 | # isolated_server_numa_nodes = True
 6 | isolated_server_numa_nodes = False
 7 | 
 8 | for num_hot_items in (0, 32):
 9 |     for zipf in (('uniform', 0.), ('skewed', 0.99), ('single', 99.)):
10 |         for get_ratio in (0., 0.1, 0.25, 0.5, 0.75, 0.9, 0.95, 0.99, 1.):
11 |             cmd = './netbench_analysis %d %f %f %d > analysis_%d_%s_%.2f' % (num_hot_items, zipf[1], get_ratio, isolated_server_numa_nodes, num_hot_items, zipf[0], get_ratio)
12 |             os.system(cmd)
13 | 
14 | 


--------------------------------------------------------------------------------
/configure_all.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | # Copyright 2014 Carnegie Mellon University
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #     http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | [ ! -d "$(dirname $0)/build" ] && mkdir "$(dirname $0)/build"
18 | 
19 | cd "$(dirname $0)/build" || exit 1
20 | 
21 | rm -f CMakeCache.txt
22 | 
23 | NDEBUG=yes cmake ..
24 | 
25 | 


--------------------------------------------------------------------------------
/configure_client.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | # Copyright 2014 Carnegie Mellon University
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #     http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | [ ! -d "$(dirname $0)/build" ] && mkdir "$(dirname $0)/build"
18 | 
19 | cd "$(dirname $0)/build" || exit 1
20 | 
21 | rm -f CMakeCache.txt
22 | 
23 | NDEBUG=yes NSERVER=yes cmake ..
24 | 
25 | 


--------------------------------------------------------------------------------
/configure_server.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | # Copyright 2014 Carnegie Mellon University
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #     http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | [ ! -d "$(dirname $0)/build" ] && mkdir "$(dirname $0)/build"
18 | 
19 | cd "$(dirname $0)/build" || exit 1
20 | 
21 | rm -f CMakeCache.txt
22 | 
23 | NDEBUG=yes NCLIENT=yes cmake ..
24 | 
25 | 


--------------------------------------------------------------------------------
/scripts/setup_dkdp_env.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | 
  3 | ##### from DPDK/tools/setup.sh
  4 | 
  5 | #
  6 | # Sets up envronment variables for ICC.
  7 | #
  8 | setup_icc()
  9 | {
 10 | 	DEFAULT_PATH=/opt/intel/bin/iccvars.sh
 11 | 	param=$1
 12 | 	shpath=`which iccvars.sh 2> /dev/null`
 13 | 	if [ $? -eq 0 ] ; then
 14 | 		echo "Loading iccvars.sh from $shpath for $param"
 15 | 		source $shpath $param
 16 | 	elif [ -f $DEFAULT_PATH ] ; then
 17 | 		echo "Loading iccvars.sh from $DEFAULT_PATH for $param"
 18 | 		source $DEFAULT_PATH $param
 19 | 	else
 20 | 		echo "## ERROR: cannot find 'iccvars.sh' script to set up ICC."
 21 | 		echo "##     To fix, please add the directory that contains"
 22 | 		echo "##     iccvars.sh  to your 'PATH' environment variable."
 23 | 		quit
 24 | 	fi
 25 | }
 26 | 
 27 | #
 28 | # Sets RTE_TARGET and does a "make install".
 29 | #
 30 | setup_target()
 31 | {
 32 | 	#option=$1
 33 | 	#export RTE_TARGET=${TARGETS[option]}
 34 | 
 35 | 	compiler=${RTE_TARGET##*-}
 36 | 	if [ "$compiler" == "icc" ] ; then
 37 | 		platform=${RTE_TARGET%%-*}
 38 | 		if [ "$platform" == "x86_64" ] ; then
 39 | 			setup_icc intel64
 40 | 		else
 41 | 			setup_icc ia32
 42 | 		fi
 43 | 	fi
 44 | 	#if [ "$QUIT" == "0" ] ; then
 45 | 		if [ ! -d $RTE_SDK/$RTE_TARGET ]; then
 46 | 			make config T=${RTE_TARGET} O=$RTE_SDK/$RTE_TARGET
 47 | 			sed -i 's/CONFIG_RTE_MEMPOOL_CACHE_MAX_SIZE=.*/CONFIG_RTE_MEMPOOL_CACHE_MAX_SIZE=8192/g' $RTE_SDK/$RTE_TARGET/.config
 48 | 			rm $RTE_SDK/$RTE_TARGET/include/rte_config.h
 49 | 		fi
 50 | 		make -C $RTE_SDK/$RTE_TARGET
 51 | 	#fi
 52 | 	#echo "------------------------------------------------------------------------------"
 53 | 	#echo " RTE_TARGET exported as $RTE_TARGET"
 54 | 	#echo "------------------------------------------------------------------------------"
 55 | }
 56 | 
 57 | #
 58 | # Uninstall all targets.
 59 | #
 60 | uninstall_targets()
 61 | {
 62 | 	make uninstall
 63 | }
 64 | 
 65 | #
 66 | # Creates hugepage filesystem.
 67 | #
 68 | create_mnt_huge()
 69 | {
 70 | 	echo "Creating /mnt/huge and mounting as hugetlbfs"
 71 | 	sudo mkdir -p /mnt/huge
 72 | 
 73 | 	grep -s '/mnt/huge' /proc/mounts > /dev/null
 74 | 	if [ $? -ne 0 ] ; then
 75 | 		sudo mount -t hugetlbfs nodev /mnt/huge
 76 | 	fi
 77 | }
 78 | 
 79 | #
 80 | # Removes hugepage filesystem.
 81 | #
 82 | remove_mnt_huge()
 83 | {
 84 | 	echo "Unmounting /mnt/huge and removing directory"
 85 | 	grep -s '/mnt/huge' /proc/mounts > /dev/null
 86 | 	if [ $? -eq 0 ] ; then
 87 | 		sudo umount /mnt/huge
 88 | 	fi
 89 | 
 90 | 	if [ -d /mnt/huge ] ; then
 91 | 		sudo rm -R /mnt/huge
 92 | 	fi
 93 | }
 94 | 
 95 | #
 96 | # Unloads igb_uio.ko.
 97 | #
 98 | remove_igb_uio_module()
 99 | {
100 | 	echo "Unloading any existing DPDK UIO module"
101 | 	/sbin/lsmod | grep -s igb_uio > /dev/null
102 | 	if [ $? -eq 0 ] ; then
103 | 		sudo /sbin/rmmod igb_uio
104 | 	fi
105 | }
106 | 
107 | #
108 | # Loads new igb_uio.ko (and uio module if needed).
109 | #
110 | load_igb_uio_module()
111 | {
112 | 	if [ ! -f $RTE_SDK/$RTE_TARGET/kmod/igb_uio.ko ];then
113 | 		echo "## ERROR: Target does not have the DPDK UIO Kernel Module."
114 | 		echo "       To fix, please try to rebuild target."
115 | 		return
116 | 	fi
117 | 
118 | 	remove_igb_uio_module
119 | 
120 | 	/sbin/lsmod | grep -s uio > /dev/null
121 | 	if [ $? -ne 0 ] ; then
122 | 		if [ -f /lib/modules/$(uname -r)/kernel/drivers/uio/uio.ko ] ; then
123 | 			echo "Loading uio module"
124 | 			sudo /sbin/modprobe uio
125 | 		fi
126 | 	fi
127 | 
128 | 	# UIO may be compiled into kernel, so it may not be an error if it can't
129 | 	# be loaded.
130 | 
131 | 	echo "Loading DPDK UIO module"
132 | 	sudo /sbin/insmod $RTE_SDK/$RTE_TARGET/kmod/igb_uio.ko
133 | 	if [ $? -ne 0 ] ; then
134 | 		echo "## ERROR: Could not load kmod/igb_uio.ko."
135 | 		quit
136 | 	fi
137 | }
138 | 
139 | #
140 | # Removes all reserved hugepages.
141 | #
142 | clear_huge_pages()
143 | {
144 | 	echo > .echo_tmp
145 | 	for d in /sys/devices/system/node/node? ; do
146 | 		echo "echo 0 > $d/hugepages/hugepages-2048kB/nr_hugepages" >> .echo_tmp
147 | 	done
148 | 	echo "Removing currently reserved hugepages"
149 | 	sudo sh .echo_tmp
150 | 	rm -f .echo_tmp
151 | 
152 | 	remove_mnt_huge
153 | }
154 | 
155 | #
156 | # Creates hugepages.
157 | #
158 | set_non_numa_pages()
159 | {
160 | 	clear_huge_pages
161 | 
162 | 	echo ""
163 | 	echo "  Input the number of 2MB pages"
164 | 	echo "  Example: to have 128MB of hugepages available, enter '64' to"
165 | 	echo "  reserve 64 * 2MB pages"
166 | 	echo -n "Number of pages: "
167 | 	#read Pages
168 | 	Pages=$1
169 | 
170 | 	echo "echo $Pages > /sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages" > .echo_tmp
171 | 
172 | 	echo "Reserving hugepages"
173 | 	sudo sh .echo_tmp
174 | 	rm -f .echo_tmp
175 | 
176 | 	create_mnt_huge
177 | }
178 | 
179 | #
180 | # Creates hugepages on specific NUMA nodes.
181 | #
182 | set_numa_pages()
183 | {
184 | 	clear_huge_pages
185 | 
186 | 	echo ""
187 | 	echo "  Input the number of 2MB pages for each node"
188 | 	echo "  Example: to have 128MB of hugepages available per node,"
189 | 	echo "  enter '64' to reserve 64 * 2MB pages on each node"
190 | 
191 | 	echo > .echo_tmp
192 | 	for d in /sys/devices/system/node/node? ; do
193 | 		node=$(basename $d)
194 | 		echo -n "Number of pages for $node: "
195 | 		#read Pages
196 | 		Pages=$1
197 | 		shift
198 | 		echo "echo $Pages > $d/hugepages/hugepages-2048kB/nr_hugepages" >> .echo_tmp
199 | 	done
200 | 	echo "Reserving hugepages"
201 | 	sudo sh .echo_tmp
202 | 	rm -f .echo_tmp
203 | 
204 | 	create_mnt_huge
205 | }
206 | 
207 | #
208 | # Run unit test application.
209 | #
210 | run_test_app()
211 | {
212 | 	echo ""
213 | 	echo "  Enter hex bitmask of cores to execute test app on"
214 | 	echo "  Example: to execute app on cores 0 to 7, enter 0xff"
215 | 	echo -n "bitmask: "
216 | 	read Bitmask
217 | 	echo "Launching app"
218 | 	sudo ${RTE_TARGET}/app/test -c $Bitmask $EAL_PARAMS
219 | }
220 | 
221 | #
222 | # Run unit testpmd application.
223 | #
224 | run_testpmd_app()
225 | {
226 | 	echo ""
227 | 	echo "  Enter hex bitmask of cores to execute testpmd app on"
228 | 	echo "  Example: to execute app on cores 0 to 7, enter 0xff"
229 | 	echo -n "bitmask: "
230 | 	read Bitmask
231 | 	echo "Launching app"
232 | 	sudo ${RTE_TARGET}/app/testpmd -c $Bitmask $EAL_PARAMS -- -i
233 | }
234 | 
235 | #
236 | # Print hugepage information.
237 | #
238 | grep_meminfo()
239 | {
240 | 	grep -i huge /proc/meminfo
241 | }
242 | 
243 | #
244 | # List all hugepage file references
245 | #
246 | ls_mnt_huge()
247 | {
248 | 	ls -lh /mnt/huge
249 | }
250 | 
251 | ##### from DPDK/tools/setup.sh
252 | 
253 | 
254 | export RTE_SDK=`readlink -f $(dirname ${BASH_SOURCE[0]})/../../DPDK`
255 | export RTE_TARGET=x86_64-default-linuxapp-gcc
256 | 
257 | pushd "$RTE_SDK"; setup_target; popd
258 | 
259 | #if [ "$HOSTNAME" == "server" ]; then
260 | 	set_numa_pages 8192 8192	# 32 GiB
261 | #else
262 | #	set_numa_pages 2048 2048	# 8 GiB
263 | #fi
264 | load_igb_uio_module
265 | 
266 | grep_meminfo
267 | 
268 | sudo $RTE_SDK/tools/pci_unbind.py --force --bind=igb_uio xge0 xge1 xge2 xge3
269 | sudo $RTE_SDK/tools/pci_unbind.py --force --bind=igb_uio xge4 xge5 xge6 xge7
270 | 
271 | # disable OOM kills
272 | sudo sysctl -w vm.overcommit_memory=1
273 | sudo sysctl -w kernel.shmmax=12884901888
274 | sudo sysctl -w kernel.shmall=12884901888
275 | 


--------------------------------------------------------------------------------
/scripts/unbind.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | export RTE_SDK=`readlink -f $(dirname ${BASH_SOURCE[0]})/../../DPDK`
4 | 
5 | DEVS=`lspci | grep 82599EB | awk '{ print $1 }'`
6 | 
7 | sudo $RTE_SDK/tools/pci_unbind.py --bind=ixgbe $DEVS
8 | 
9 | 


--------------------------------------------------------------------------------
/src/CMakeLists.txt:
--------------------------------------------------------------------------------
  1 | # Copyright 2014 Carnegie Mellon University
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | cmake_minimum_required(VERSION 2.6)
 16 | 
 17 | # basic configuration
 18 | if(CMAKE_CURRENT_SOURCE_DIR STREQUAL CMAKE_CURRENT_BINARY_DIR)
 19 | 	message(FATAL_ERROR "Use out-of-source build only!")
 20 | endif(CMAKE_CURRENT_SOURCE_DIR STREQUAL CMAKE_CURRENT_BINARY_DIR)
 21 | 
 22 | project(MEHCACHED)
 23 | 
 24 | add_subdirectory(perf_count)
 25 | set(LIBRARY_PATH ${LIBRARY_PATH} perf_count/)
 26 | 
 27 | # compiler options
 28 | add_definitions(-pthread)
 29 | add_definitions(-g -Wall -Wextra -Wsign-conversion -Winline -Wno-unused-function)
 30 | add_definitions(-Wconversion)
 31 | add_definitions(-O9)
 32 | add_definitions(-msse4.2 -march=corei7)
 33 | 
 34 | # preprocessor
 35 | SET(NDEBUG "$ENV{NDEBUG}" CACHE STRING "define NDEBUG macro")
 36 | 
 37 | message(STATUS "NDEBUG (disable all additional checkings; no*, yes) = ${NDEBUG}")
 38 | if("${NDEBUG}" STREQUAL "yes")
 39 | 	add_definitions(-DNDEBUG)
 40 | endif()
 41 | 
 42 | SET(NLOCAL "$ENV{NLOCAL}" CACHE STRING "no local programs")
 43 | SET(NCLIENT "$ENV{NCLIENT}" CACHE STRING "no clients")
 44 | SET(NSERVER "$ENV{NSERVER}" CACHE STRING "no servers")
 45 | 
 46 | message(STATUS "NLOCAL (do not compile local programs; no*, yes) = ${NLOCAL}")
 47 | message(STATUS "NCLIENT (do not compile clients; no*, yes) = ${NCLIENT}")
 48 | message(STATUS "NSERVER (do not compile server; no*, yes) = ${NSERVER}")
 49 | 
 50 | # common source files
 51 | set(SOURCES ${SOURCES} hash.c)
 52 | set(SOURCES ${SOURCES} city.c)
 53 | set(SOURCES ${SOURCES} stopwatch.c)
 54 | set(SOURCES ${SOURCES} shm.c)
 55 | 
 56 | # common libraries
 57 | set(LIBRARIES ${LIBRARIES} rt crypto perf_count m pthread)
 58 | 
 59 | # tcmalloc (optional)
 60 | #add_definitions(-fno-builtin-malloc -fno-builtin-calloc -fno-builtin-realloc -fno-builtin-free)
 61 | #set(LIBRARIES ${LIBRARIES} tcmalloc_minimal)
 62 | 
 63 | # DPDK
 64 | set(RTE_SDK ${CMAKE_CURRENT_SOURCE_DIR}/../../DPDK)
 65 | set(RTE_TARGET x86_64-default-linuxapp-gcc)
 66 | add_definitions(-DUSE_DPDK)
 67 | add_definitions(-isystem ${RTE_SDK}/${RTE_TARGET}/include -include rte_config.h)
 68 | link_directories(${LINK_DIRECTORIES} ${RTE_SDK}/${RTE_TARGET}/lib)
 69 | set(LIBRARIES ${LIBRARIES} rte_eal rte_malloc rte_mempool rte_ring)	# for basic features
 70 | set(LIBRARIES ${LIBRARIES} ethdev rte_mbuf rte_pmd_ixgbe librte_pmd_ring.a)	# for network features
 71 | set(LIBRARIES ${LIBRARIES} pthread rt)	# to make sure symbols in pthread and rt are resolved
 72 | 
 73 | if(NOT "${NLOCAL}" STREQUAL "yes")
 74 | 	# test
 75 | 	add_executable(test test.c ${SOURCES})
 76 | 	target_link_libraries(test ${LIBRARIES})
 77 | 
 78 | 	# load
 79 | 	add_executable(load load.c ${SOURCES})
 80 | 	target_link_libraries(load ${LIBRARIES})
 81 | 
 82 | 	# microbench
 83 | 	add_executable(microbench microbench.c ${SOURCES})
 84 | 	target_link_libraries(microbench ${LIBRARIES})
 85 | 
 86 | 	# microbench_store
 87 | 	add_executable(microbench_store microbench.c ${SOURCES})
 88 | 	set_target_properties(microbench_store PROPERTIES COMPILE_FLAGS "-DMEHCACHED_NO_EVICTION")
 89 | 	target_link_libraries(microbench_store ${LIBRARIES})
 90 | 
 91 | 	# netbench_analysis
 92 | 	add_executable(netbench_analysis netbench_config.c netbench_analysis.c ${SOURCES})
 93 | 	target_link_libraries(netbench_analysis ${LIBRARIES})
 94 | endif()
 95 | 
 96 | if(NOT "${NCLIENT}" STREQUAL "yes")
 97 | 	# netbench_client
 98 | 	add_executable(netbench_client net_common.c netbench_config.c netbench_client.c ${SOURCES})
 99 | 	target_link_libraries(netbench_client ${LIBRARIES})
100 | 
101 | 	# netbench_client_latency
102 | 	add_executable(netbench_client_latency net_common.c netbench_config.c netbench_client.c ${SOURCES})
103 | 	set_target_properties(netbench_client_latency PROPERTIES COMPILE_FLAGS "-DMEHCACHED_MEASURE_LATENCY")
104 | 	target_link_libraries(netbench_client_latency ${LIBRARIES})
105 | 
106 | 	# netbench_client_soft_fdir
107 | 	add_executable(netbench_client_soft_fdir net_common.c netbench_config.c netbench_client.c ${SOURCES})
108 | 	set_target_properties(netbench_client_soft_fdir PROPERTIES COMPILE_FLAGS "-DMEHCACHED_USE_SOFT_FDIR")
109 | 	target_link_libraries(netbench_client_soft_fdir ${LIBRARIES})
110 | endif()
111 | 
112 | if(NOT "${NSERVER}" STREQUAL "yes")
113 | 	# netbench_server
114 | 	add_executable(netbench_server net_common.c netbench_config.c netbench_server.c ${SOURCES})
115 | 	target_link_libraries(netbench_server ${LIBRARIES})
116 | 
117 | 	# netbench_server_latency
118 | 	add_executable(netbench_server_latency net_common.c netbench_config.c netbench_server.c ${SOURCES})
119 | 	set_target_properties(netbench_server_latency PROPERTIES COMPILE_FLAGS "-DMEHCACHED_MEASURE_LATENCY")
120 | 	target_link_libraries(netbench_server_latency ${LIBRARIES})
121 | 
122 | 	# netbench_server_soft_fdir
123 | 	add_executable(netbench_server_soft_fdir net_common.c netbench_config.c netbench_server.c ${SOURCES})
124 | 	set_target_properties(netbench_server_soft_fdir PROPERTIES COMPILE_FLAGS "-DMEHCACHED_USE_SOFT_FDIR")
125 | 	target_link_libraries(netbench_server_soft_fdir ${LIBRARIES})
126 | 
127 | 	# netbench_server_store
128 | 	add_executable(netbench_server_store net_common.c netbench_config.c netbench_server.c ${SOURCES})
129 | 	set_target_properties(netbench_server_store PROPERTIES COMPILE_FLAGS "-DMEHCACHED_NO_EVICTION")
130 | 	target_link_libraries(netbench_server_store ${LIBRARIES})
131 | endif()
132 | 


--------------------------------------------------------------------------------
/src/alloc.h:
--------------------------------------------------------------------------------
 1 | // Copyright 2014 Carnegie Mellon University
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | 
15 | #pragma once
16 | 
17 | #include "common.h"
18 | 
19 | MEHCACHED_BEGIN
20 | 
21 | struct mehcached_alloc_item
22 | {
23 |     // uint32_t item_size;		// XXX: isn't this breaking 8-byte alignment?
24 |     uint64_t item_size;		// XXX: isn't this breaking 8-byte alignment?
25 |     uint8_t data[0];
26 | };
27 | 
28 | MEHCACHED_END
29 | 
30 | 


--------------------------------------------------------------------------------
/src/alloc_dynamic.c:
--------------------------------------------------------------------------------
  1 | // Copyright 2014 Carnegie Mellon University
  2 | //
  3 | // Licensed under the Apache License, Version 2.0 (the "License");
  4 | // you may not use this file except in compliance with the License.
  5 | // You may obtain a copy of the License at
  6 | //
  7 | //     http://www.apache.org/licenses/LICENSE-2.0
  8 | //
  9 | // Unless required by applicable law or agreed to in writing, software
 10 | // distributed under the License is distributed on an "AS IS" BASIS,
 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | // See the License for the specific language governing permissions and
 13 | // limitations under the License.
 14 | 
 15 | #pragma once
 16 | 
 17 | #include "alloc_dynamic.h"
 18 | #include "table.h"
 19 | 
 20 | MEHCACHED_BEGIN
 21 | 
 22 | #define MEHCACHED_DYNAMIC_FREE (0UL)
 23 | #define MEHCACHED_DYNAMIC_OCCUPIED (1UL)
 24 | 
 25 | #define MEHCACHED_DYNAMIC_TAG_SIZE(vec) ((vec) & ((1UL << 63UL) - 1UL))
 26 | #define MEHCACHED_DYNAMIC_TAG_STATUS(vec) ((vec) >> 63UL)
 27 | #define MEHCACHED_DYNAMIC_TAG_VEC(size, status) ((size) | (status) << 63UL)
 28 | 
 29 | // TODO: use address order for each freelist to reduce fragmentation and improve locality
 30 | // TODO: use the LSB (not MSB) to store status as all sizes are aligned to 8-byte boundary
 31 | 
 32 | static
 33 | void
 34 | mehcached_dynamic_init(struct mehcached_dynamic *alloc, uint64_t size, bool concurrent_alloc_read, bool concurrent_alloc_write, size_t numa_node)
 35 | {
 36 |     if (!concurrent_alloc_read)
 37 |         alloc->concurrent_access_mode = 0;
 38 |     else if (!concurrent_alloc_write)
 39 |         alloc->concurrent_access_mode = 1;
 40 |     else
 41 |         alloc->concurrent_access_mode = 2;
 42 | 
 43 |     alloc->lock = 0;
 44 | 
 45 |     size = mehcached_shm_adjust_size(size);
 46 |     assert(size <= MEHCACHED_DYNAMIC_MAX_SIZE);
 47 | 
 48 |     alloc->size = size;
 49 | 
 50 |     size_t shm_id = mehcached_shm_alloc(size, numa_node);
 51 |     if (shm_id == (size_t)-1)
 52 |     {
 53 |         printf("failed to allocate memory\n");
 54 |         assert(false);
 55 |     }
 56 |     while (true)
 57 |     {
 58 |         alloc->data = mehcached_shm_find_free_address(size);
 59 |         if (alloc->data == NULL)
 60 |             assert(false);
 61 | 
 62 |         if (!mehcached_shm_map(shm_id, alloc->data, 0, size))
 63 |             continue;
 64 | 
 65 |         break;
 66 |     }
 67 | 
 68 |     if (!mehcached_shm_schedule_remove(shm_id))
 69 |     {
 70 |         perror("");
 71 |         assert(false);
 72 |     }
 73 | 
 74 |     mehcached_dynamic_reset(alloc);
 75 | }
 76 | 
 77 | static
 78 | void
 79 | mehcached_dynamic_free(struct mehcached_dynamic *alloc)
 80 | {
 81 |     if (!mehcached_shm_unmap(alloc->data))
 82 |         assert(false);
 83 | }
 84 | 
 85 | static
 86 | void
 87 | mehcached_dynamic_lock(struct mehcached_dynamic *alloc MEHCACHED_UNUSED)
 88 | {
 89 | #ifdef MEHCACHED_CONCURRENT
 90 |     if (alloc->concurrent_access_mode == 2)
 91 |     {
 92 |         while (1)
 93 |         {
 94 |             if (__sync_bool_compare_and_swap((volatile uint32_t *)&alloc->lock, 0U, 1U))
 95 |                 break;
 96 |         }
 97 |     }
 98 | #endif
 99 | }
100 | 
101 | static
102 | void
103 | mehcached_dynamic_unlock(struct mehcached_dynamic *alloc MEHCACHED_UNUSED)
104 | {
105 | #ifdef MEHCACHED_CONCURRENT
106 |     if (alloc->concurrent_access_mode == 2)
107 |     {
108 |         memory_barrier();
109 |         assert((*(volatile uint32_t *)&alloc->lock & 1U) == 1U);
110 |         // no need to use atomic add because this thread is the only one writing to version
111 |         *(volatile uint32_t *)&alloc->lock = 0U;
112 |     }
113 | #endif
114 | }
115 | 
116 | static
117 | size_t
118 | mehcached_dynamic_size_to_class_roundup(uint64_t size)
119 | {
120 |     assert(size <= MEHCACHED_DYNAMIC_MAX_SIZE);
121 | 
122 |     if (size <= MEHCACHED_DYNAMIC_MIN_SIZE + (MEHCACHED_DYNAMIC_NUM_CLASSES - 1) * MEHCACHED_DYNAMIC_CLASS_INCREMENT)
123 |         return (size - MEHCACHED_DYNAMIC_MIN_SIZE + MEHCACHED_DYNAMIC_CLASS_INCREMENT - 1) / MEHCACHED_DYNAMIC_CLASS_INCREMENT;
124 |     else
125 |         return MEHCACHED_DYNAMIC_NUM_CLASSES - 1;
126 | }
127 | 
128 | static
129 | size_t
130 | mehcached_dynamic_size_to_class_rounddown(uint64_t size)
131 | {
132 |     assert(size <= MEHCACHED_DYNAMIC_MAX_SIZE);
133 | 	assert(size >= MEHCACHED_DYNAMIC_MIN_SIZE);
134 | 
135 |     if (size < MEHCACHED_DYNAMIC_MIN_SIZE + MEHCACHED_DYNAMIC_NUM_CLASSES * MEHCACHED_DYNAMIC_CLASS_INCREMENT)
136 |         return (size - MEHCACHED_DYNAMIC_MIN_SIZE) / MEHCACHED_DYNAMIC_CLASS_INCREMENT;
137 |     else
138 |         return MEHCACHED_DYNAMIC_NUM_CLASSES - 1;
139 | }
140 | 
141 | static
142 | void
143 | mehcached_dynamic_insert_free_chunk(struct mehcached_dynamic *alloc, uint8_t *chunk_start, uint64_t chunk_size)
144 | {
145 | #ifdef MEHCACHED_VERBOSE
146 |     printf("mehcached_dynamic_insert_free_chunk: start=%p size=%lu\n", chunk_start, chunk_size);
147 | #endif
148 |     size_t chunk_class = mehcached_dynamic_size_to_class_rounddown(chunk_size);
149 |     *(uint64_t *)chunk_start = *(uint64_t *)(chunk_start + chunk_size - 8) = MEHCACHED_DYNAMIC_TAG_VEC(chunk_size, MEHCACHED_DYNAMIC_FREE);
150 |     *(uint8_t **)(chunk_start + 8) = NULL;  // the head has no previous free chunk
151 |     *(uint8_t **)(chunk_start + 16) = alloc->free_head[chunk_class];    // point to the old head
152 | 
153 |     if (alloc->free_head[chunk_class] != NULL)
154 |     {
155 |         assert(*(uint8_t **)(alloc->free_head[chunk_class] + 8) == NULL);
156 |         *(uint8_t **)(alloc->free_head[chunk_class] + 8) = chunk_start; // update the previous head's prev pointer
157 |     }
158 | 
159 |     alloc->free_head[chunk_class] = chunk_start;    // set as a new head
160 | }
161 | 
162 | static
163 | void mehcached_dynamic_remove_free_chunk_from_free_list(struct mehcached_dynamic *alloc, uint8_t *chunk_start, uint64_t chunk_size)
164 | {
165 | #ifdef MEHCACHED_VERBOSE
166 |     printf("mehcached_dynamic_remove_free_chunk_from_free_list: start=%p size=%lu\n", chunk_start, chunk_size);
167 | #endif
168 | 
169 |     uint8_t *prev_chunk_start = *(uint8_t **)(chunk_start + 8);
170 |     uint8_t *next_chunk_start = *(uint8_t **)(chunk_start + 16);
171 | 
172 |     if (prev_chunk_start != NULL)
173 |         *(uint8_t **)(prev_chunk_start + 16) = next_chunk_start;
174 |     else
175 |     {
176 |         size_t chunk_class = mehcached_dynamic_size_to_class_rounddown(chunk_size);
177 |         assert(alloc->free_head[chunk_class] == chunk_start);
178 |         alloc->free_head[chunk_class] = next_chunk_start;        // set the next free chunk as the head
179 |     }
180 | 
181 |     if (next_chunk_start != NULL)
182 |         *(uint8_t **)(next_chunk_start + 8) = prev_chunk_start;
183 | }
184 | 
185 | static
186 | bool
187 | mehcached_dynamic_remove_free_chunk_from_head(struct mehcached_dynamic *alloc, uint64_t minimum_chunk_size, uint8_t **out_chunk_start, uint64_t *out_chunk_size)
188 | {
189 |     size_t chunk_class = mehcached_dynamic_size_to_class_roundup(minimum_chunk_size);
190 | 
191 |     // determine the size class to use (best fit)
192 |     for (; chunk_class < MEHCACHED_DYNAMIC_NUM_CLASSES; chunk_class++)
193 |         if (alloc->free_head[chunk_class] != NULL)
194 |             break;
195 | 
196 |     if (chunk_class == MEHCACHED_DYNAMIC_NUM_CLASSES)
197 |     {
198 | #ifdef MEHCACHED_VERBOSE
199 |         printf("mehcached_dynamic_remove_free_chunk_from_head: minsize=%lu no space\n", minimum_chunk_size);
200 | #endif
201 |         return false;
202 |     }
203 | 
204 |     // use the first free chunk in the class; the overall policy is still approximately best fit (which is good) due to segregation
205 |     uint8_t *chunk_start = alloc->free_head[chunk_class];
206 |     assert(MEHCACHED_DYNAMIC_TAG_STATUS(*(uint64_t *)chunk_start) == MEHCACHED_DYNAMIC_FREE);
207 |     uint64_t chunk_size = MEHCACHED_DYNAMIC_TAG_SIZE(*(uint64_t *)chunk_start);
208 |     assert(*(uint64_t *)chunk_start == *(uint64_t *)(chunk_start + chunk_size - 8));
209 | 
210 |     assert(chunk_size >= minimum_chunk_size);
211 | 
212 |     mehcached_dynamic_remove_free_chunk_from_free_list(alloc, chunk_start, chunk_size);
213 | 
214 |     *out_chunk_start = chunk_start;
215 |     *out_chunk_size = chunk_size;
216 | #ifdef MEHCACHED_VERBOSE
217 |     printf("mehcached_dynamic_remove_free_chunk_from_head: minsize=%lu start=%p size=%lu\n", minimum_chunk_size, *out_chunk_start, *out_chunk_size);
218 | #endif
219 |     return true;
220 | }
221 | 
222 | static
223 | void
224 | mehcached_dynamic_reset(struct mehcached_dynamic *alloc)
225 | {
226 |     memset(alloc->free_head, 0, sizeof(alloc->free_head));
227 | 
228 |     // set the entire free space as a free chunk
229 |     mehcached_dynamic_insert_free_chunk(alloc, alloc->data, alloc->size);
230 | }
231 | 
232 | static
233 | struct mehcached_alloc_item *
234 | mehcached_dynamic_item(const struct mehcached_dynamic *alloc, uint64_t dynamic_offset)
235 | {
236 |     return (struct mehcached_alloc_item *)(alloc->data + dynamic_offset);
237 | }
238 | 
239 | static
240 | void
241 | mehcached_dynamic_coalese_free_chunk_left(struct mehcached_dynamic *alloc, uint8_t **chunk_start, uint64_t *chunk_size)
242 | {
243 |     if (*chunk_start == alloc->data)
244 |         return;
245 |     assert(*chunk_start > alloc->data);
246 | 
247 |     if (MEHCACHED_DYNAMIC_TAG_STATUS(*(uint64_t *)(*chunk_start - 8)) == MEHCACHED_DYNAMIC_OCCUPIED)
248 |         return;
249 | 
250 |     uint64_t adj_chunk_size = MEHCACHED_DYNAMIC_TAG_SIZE(*(uint64_t *)(*chunk_start - 8));
251 |     uint8_t *adj_chunk_start = *chunk_start - adj_chunk_size;
252 |     assert(*(uint64_t *)adj_chunk_start == *(uint64_t *)(adj_chunk_start + adj_chunk_size - 8));
253 | 
254 | #ifdef MEHCACHED_VERBOSE
255 |     printf("mehcached_dynamic_coalese_free_chunk_left: start=%p size=%lu left=%lu\n", *chunk_start, *chunk_size, adj_chunk_size);
256 | #endif
257 | 
258 |     mehcached_dynamic_remove_free_chunk_from_free_list(alloc, adj_chunk_start, adj_chunk_size);
259 |     *chunk_start = adj_chunk_start;
260 |     *chunk_size = *chunk_size + adj_chunk_size;
261 | }
262 | 
263 | static
264 | void
265 | mehcached_dynamic_coalese_free_chunk_right(struct mehcached_dynamic *alloc, uint8_t **chunk_start, uint64_t *chunk_size)
266 | {
267 |     if (*chunk_start + *chunk_size == alloc->data + alloc->size)
268 |         return;
269 |     assert(*chunk_start + *chunk_size < alloc->data + alloc->size);
270 | 
271 |     if (MEHCACHED_DYNAMIC_TAG_STATUS(*(uint64_t *)(*chunk_start + *chunk_size)) == MEHCACHED_DYNAMIC_OCCUPIED)
272 |         return;
273 | 
274 |     uint8_t *adj_chunk_start = *chunk_start + *chunk_size;
275 |     uint64_t adj_chunk_size = MEHCACHED_DYNAMIC_TAG_SIZE(*(uint64_t *)adj_chunk_start);
276 |     assert(*(uint64_t *)adj_chunk_start == *(uint64_t *)(adj_chunk_start + adj_chunk_size - 8));
277 | 
278 | #ifdef MEHCACHED_VERBOSE
279 |     printf("mehcached_dynamic_coalese_free_chunk_right: start=%p size=%lu right=%lu\n", *chunk_start, *chunk_size, adj_chunk_size);
280 | #endif
281 | 
282 |     mehcached_dynamic_remove_free_chunk_from_free_list(alloc, adj_chunk_start, adj_chunk_size);
283 |     // chunk_start is unchanged
284 |     *chunk_size = *chunk_size + adj_chunk_size;
285 | }
286 | 
287 | static
288 | uint64_t
289 | mehcached_dynamic_allocate(struct mehcached_dynamic *alloc, uint32_t item_size)
290 | {
291 |     uint64_t minimum_chunk_size = MEHCACHED_ROUNDUP8((uint64_t)item_size) + MEHCAHCED_DYNAMIC_OVERHEAD;
292 | 
293 |     uint8_t *chunk_start;
294 |     uint64_t chunk_size;
295 |     if (!mehcached_dynamic_remove_free_chunk_from_head(alloc, minimum_chunk_size, &chunk_start, &chunk_size))
296 |         return MEHCACHED_DYNAMIC_INSUFFICIENT_SPACE;
297 | 
298 |     // see if we can make a leftover free chunk
299 |     uint64_t leftover_chunk_size = chunk_size - minimum_chunk_size;
300 |     if (leftover_chunk_size >= MEHCACHED_DYNAMIC_MIN_SIZE)
301 |     {
302 |         // create a leftover free chunk and insert it to the freelist
303 |         mehcached_dynamic_insert_free_chunk(alloc, chunk_start + minimum_chunk_size, leftover_chunk_size);
304 |         // coalescing is not required here because the previous chunk already used to be a big coalesced free chunk
305 | 
306 |         // adjust the free chunk to avoid overlapping
307 |         chunk_size = minimum_chunk_size;
308 |     }
309 |     else
310 |         leftover_chunk_size = 0;
311 | 
312 | #ifdef MEHCACHED_VERBOSE
313 |     printf("mehcached_dynamic_allocate: item_size=%u minsize=%lu start=%p size=%lu (leftover=%lu)\n", item_size, minimum_chunk_size, chunk_start, chunk_size, leftover_chunk_size);
314 | #endif
315 | 
316 |     *(uint64_t *)chunk_start = *(uint64_t *)(chunk_start + chunk_size - 8) = MEHCACHED_DYNAMIC_TAG_VEC(chunk_size, MEHCACHED_DYNAMIC_OCCUPIED);
317 | 
318 |     // TODO: We are wasting 4 bytes for struct mehcached_alloc_item for compatibility.  Need to implement an allocator-specific method to obtain the item size
319 |     struct mehcached_alloc_item *alloc_item = (struct mehcached_alloc_item *)(chunk_start + 8);
320 |     alloc_item->item_size = item_size;
321 | 
322 |     return (uint64_t)((uint8_t *)alloc_item - alloc->data);
323 | }
324 | 
325 | static
326 | void
327 | mehcached_dynamic_deallocate(struct mehcached_dynamic *alloc, uint64_t dynamic_offset)
328 | {
329 |     struct mehcached_alloc_item *alloc_item = mehcached_dynamic_item(alloc, dynamic_offset);
330 |     uint8_t *chunk_start = (uint8_t *)alloc_item - 8;
331 |     assert(MEHCACHED_DYNAMIC_TAG_STATUS(*(uint64_t *)chunk_start) == MEHCACHED_DYNAMIC_OCCUPIED);
332 |     uint64_t chunk_size = MEHCACHED_DYNAMIC_TAG_SIZE(*(uint64_t *)chunk_start);
333 | 
334 | #ifdef MEHCACHED_VERBOSE
335 |     printf("mehcached_dynamic_deallocate: start=%p size=%lu\n", chunk_start, chunk_size);
336 | #endif
337 | 
338 |     mehcached_dynamic_coalese_free_chunk_left(alloc, &chunk_start, &chunk_size);
339 |     mehcached_dynamic_coalese_free_chunk_right(alloc, &chunk_start, &chunk_size);
340 |     mehcached_dynamic_insert_free_chunk(alloc, chunk_start, chunk_size);
341 | }
342 | 
343 | MEHCACHED_END
344 | 


--------------------------------------------------------------------------------
/src/alloc_dynamic.h:
--------------------------------------------------------------------------------
 1 | // Copyright 2014 Carnegie Mellon University
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | 
15 | #pragma once
16 | 
17 | #include "common.h"
18 | #include "alloc.h"
19 | 
20 | MEHCACHED_BEGIN
21 | 
22 | // memory allocation using segregated fit (similar to Lea)
23 | 
24 | #define MEHCAHCED_DYNAMIC_OVERHEAD (16)	// per-item space overhead solely caused by mehcached_dynamic
25 | 
26 | #define MEHCACHED_DYNAMIC_MIN_SIZE (32UL)				// 32 bytes (must be able to hold 4 size_t variables)
27 | #define MEHCACHED_DYNAMIC_MAX_SIZE ((1UL << 40) - 1)	// 40-bit wide size (can be up to 63-bit wide)
28 | #define MEHCACHED_DYNAMIC_NUM_CLASSES (32)		// 32 classes for freelists
29 | #define MEHCACHED_DYNAMIC_CLASS_INCREMENT (8)	// 8-byte increment in freelist classes
30 | 
31 | #define MEHCACHED_DYNAMIC_INSUFFICIENT_SPACE ((uint64_t)-1)
32 | 
33 | // data structure layout
34 | 
35 | // free_head[class] -> the first free chunk of the class (NULL if none exists)
36 | 
37 | // free chunk (of size N) - N is the same or larger than the size of the class
38 | // 8-byte: status (1 bit), size (63 bit)
39 | // 8-byte: prev free chunk of the same class (NULL if head)
40 | // 8-byte: next free chunk of the same class (NULL if tail)
41 | // (N - 32 bytes)
42 | // 8-byte: status (1 bit), size (63 bit)
43 | 
44 | // occupied chunk (of size N) - overhead of 16 bytes
45 | // 8-byte: status (1 bit), size (63 bit)
46 | // (N - 16 bytes)
47 | // 8-byte: status (1 bit), size (63 bit)
48 | 
49 | struct mehcached_dynamic
50 | {
51 |     uint8_t concurrent_access_mode;
52 |     uint32_t lock;
53 | 	uint64_t size;	// the total size
54 |     uint8_t *data;	// the base address of the reserved memory
55 |     uint8_t *free_head[MEHCACHED_DYNAMIC_NUM_CLASSES];	// the head free pointer of each class
56 | };
57 | 
58 | static
59 | void
60 | mehcached_dynamic_init(struct mehcached_dynamic *alloc, uint64_t size, bool concurrent_alloc_read, bool concurrent_alloc_write, size_t numa_node);
61 | 
62 | static
63 | void
64 | mehcached_dynamic_free(struct mehcached_dynamic *alloc);
65 | 
66 | static
67 | void
68 | mehcached_dynamic_reset(struct mehcached_dynamic *alloc);
69 | 
70 | static
71 | void
72 | mehcached_dynamic_lock(struct mehcached_dynamic *alloc);
73 | 
74 | static
75 | void
76 | mehcached_dynamic_unlock(struct mehcached_dynamic *alloc);
77 | 
78 | static
79 | struct mehcached_alloc_item *
80 | mehcached_dynamic_item(const struct mehcached_dynamic *alloc, uint64_t dynamic_offset);
81 | 
82 | static
83 | uint64_t
84 | mehcached_dynamic_allocate(struct mehcached_dynamic *alloc, uint32_t item_size);
85 | 
86 | static
87 | void
88 | mehcached_dynamic_deallocate(struct mehcached_dynamic *alloc, uint64_t dynamic_offset);
89 | 
90 | MEHCACHED_END
91 | 
92 | 


--------------------------------------------------------------------------------
/src/alloc_malloc.c:
--------------------------------------------------------------------------------
 1 | // Copyright 2014 Carnegie Mellon University
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | 
15 | #pragma once
16 | 
17 | #include "alloc_malloc.h"
18 | #include "table.h"
19 | 
20 | MEHCACHED_BEGIN
21 | 
22 | static
23 | void
24 | mehcached_malloc_init(struct mehcached_malloc *alloc)
25 | {
26 |     uint8_t *ptr = (uint8_t *)malloc(8);
27 |     free(ptr);
28 |     alloc->pointer_base = ptr - 0x7fffffffUL;
29 | }
30 | 
31 | static
32 | void
33 | mehcached_malloc_free(struct mehcached_malloc *alloc MEHCACHED_UNUSED)
34 | {
35 | }
36 | 
37 | static
38 | void
39 | mehcached_malloc_reset(struct mehcached_malloc *alloc MEHCACHED_UNUSED)
40 | {
41 | }
42 | 
43 | static
44 | struct mehcached_alloc_item *
45 | mehcached_malloc_item(const struct mehcached_malloc *alloc, uint64_t malloc_offset)
46 | {
47 |     return (struct mehcached_alloc_item *)(alloc->pointer_base + malloc_offset);
48 | }
49 | 
50 | static
51 | uint64_t
52 | mehcached_malloc_allocate(struct mehcached_malloc *alloc, uint32_t item_size)
53 | {
54 |     void *p = malloc(item_size);
55 |     if (p == NULL)
56 |         return MEHCACHED_MALLOC_INSUFFICIENT_SPACE;
57 | 
58 |     size_t malloc_offset = (size_t)((uint8_t *)p - alloc->pointer_base);
59 |     if (malloc_offset > MEHCACHED_ITEM_OFFSET_MASK)
60 |     {
61 |         printf("too large pointer: %zx (offset = %zx)\n", (size_t)p, malloc_offset);
62 |         assert(false);
63 |         return MEHCACHED_MALLOC_INSUFFICIENT_SPACE;
64 |     }
65 |     struct mehcached_alloc_item *alloc_item = (struct mehcached_alloc_item *)p;
66 |     alloc_item->item_size = item_size;
67 |     return (uint64_t)malloc_offset;
68 | }
69 | 
70 | static
71 | void
72 | mehcached_malloc_deallocate(struct mehcached_malloc *alloc, uint64_t malloc_offset)
73 | {
74 |     free(alloc->pointer_base + malloc_offset);
75 | }
76 | 
77 | MEHCACHED_END
78 | 
79 | 


--------------------------------------------------------------------------------
/src/alloc_malloc.h:
--------------------------------------------------------------------------------
 1 | // Copyright 2014 Carnegie Mellon University
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | 
15 | #pragma once
16 | 
17 | #include "common.h"
18 | #include "alloc.h"
19 | 
20 | MEHCACHED_BEGIN
21 | 
22 | #define MEHCACHED_MALLOC_INSUFFICIENT_SPACE ((uint64_t)-1)
23 | 
24 | struct mehcached_malloc
25 | {
26 |     uint8_t *pointer_base;
27 | };
28 | 
29 | static
30 | void
31 | mehcached_malloc_init(struct mehcached_malloc *alloc);
32 | 
33 | static
34 | void
35 | mehcached_malloc_free(struct mehcached_malloc *alloc);
36 | 
37 | static
38 | void
39 | mehcached_malloc_reset(struct mehcached_malloc *alloc);
40 | 
41 | static
42 | struct mehcached_alloc_item *
43 | mehcached_malloc_item(const struct mehcached_malloc *alloc, uint64_t malloc_offset);
44 | 
45 | static
46 | uint64_t
47 | mehcached_malloc_allocate(struct mehcached_malloc *alloc, uint32_t item_size);
48 | 
49 | static
50 | void
51 | mehcached_malloc_deallocate(struct mehcached_malloc *alloc, uint64_t malloc_offset);
52 | 
53 | MEHCACHED_END
54 | 
55 | 


--------------------------------------------------------------------------------
/src/alloc_pool.c:
--------------------------------------------------------------------------------
  1 | // Copyright 2014 Carnegie Mellon University
  2 | //
  3 | // Licensed under the Apache License, Version 2.0 (the "License");
  4 | // you may not use this file except in compliance with the License.
  5 | // You may obtain a copy of the License at
  6 | //
  7 | //     http://www.apache.org/licenses/LICENSE-2.0
  8 | //
  9 | // Unless required by applicable law or agreed to in writing, software
 10 | // distributed under the License is distributed on an "AS IS" BASIS,
 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | // See the License for the specific language governing permissions and
 13 | // limitations under the License.
 14 | 
 15 | #pragma once
 16 | 
 17 | #include "alloc_pool.h"
 18 | #include "table.h"
 19 | #include "shm.h"
 20 | 
 21 | MEHCACHED_BEGIN
 22 | 
 23 | static
 24 | void
 25 | mehcached_pool_init(struct mehcached_pool *alloc, uint64_t size, bool concurrent_alloc_read, bool concurrent_alloc_write, size_t numa_node)
 26 | {
 27 |     if (size < MEHCACHED_MINIMUM_POOL_SIZE)
 28 |         size = MEHCACHED_MINIMUM_POOL_SIZE;
 29 |     size = mehcached_shm_adjust_size(size);
 30 |     size = mehcached_next_power_of_two(size);
 31 |     assert(size <= MEHCACHED_ITEM_OFFSET_MASK >> 1);    // ">> 1" is for sufficient garbage collection time
 32 |     assert(size == mehcached_shm_adjust_size(size));
 33 | 
 34 |     if (!concurrent_alloc_read)
 35 |         alloc->concurrent_access_mode = 0;
 36 |     else if (!concurrent_alloc_write)
 37 |         alloc->concurrent_access_mode = 1;
 38 |     else
 39 |         alloc->concurrent_access_mode = 2;
 40 | 
 41 |     alloc->size = size;
 42 |     alloc->mask = size - 1;
 43 | 
 44 |     alloc->lock = 0;
 45 |     alloc->head = alloc->tail = 0;
 46 | 
 47 |     size_t shm_id = mehcached_shm_alloc(size, numa_node);
 48 |     if (shm_id == (size_t)-1)
 49 | 	{
 50 | 		printf("failed to allocate memory\n");
 51 |         assert(false);
 52 | 	}
 53 |     while (true)
 54 |     {
 55 | 		alloc->data = mehcached_shm_find_free_address(size + MEHCACHED_MINIMUM_POOL_SIZE);
 56 | 		if (alloc->data == NULL)
 57 | 			assert(false);
 58 | 
 59 |         if (!mehcached_shm_map(shm_id, alloc->data, 0, size))
 60 | 			continue;
 61 | 
 62 | 		// aliased access across pool end boundary
 63 |         if (!mehcached_shm_map(shm_id, alloc->data + size, 0, MEHCACHED_MINIMUM_POOL_SIZE))
 64 |         {
 65 |             mehcached_shm_unmap(alloc->data);
 66 |             continue;
 67 |         }
 68 | 
 69 |         break;
 70 |     }
 71 | 
 72 |     if (!mehcached_shm_schedule_remove(shm_id))
 73 |     {
 74 |         perror("");
 75 |         assert(false);
 76 |     }
 77 | }
 78 | 
 79 | static
 80 | void
 81 | mehcached_pool_free(struct mehcached_pool *alloc)
 82 | {
 83 | 	if (!mehcached_shm_unmap(alloc->data))
 84 | 		assert(false);
 85 | 	if (!mehcached_shm_unmap(alloc->data + alloc->size))
 86 | 		assert(false);
 87 | }
 88 | 
 89 | static
 90 | void
 91 | mehcached_pool_reset(struct mehcached_pool *alloc)
 92 | {
 93 |     alloc->head = alloc->tail = 0;
 94 | }
 95 | 
 96 | static
 97 | void
 98 | mehcached_pool_lock(struct mehcached_pool *alloc MEHCACHED_UNUSED)
 99 | {
100 | #ifdef MEHCACHED_CONCURRENT
101 |     if (alloc->concurrent_access_mode == 2)
102 |     {
103 |         while (1)
104 |         {
105 |             if (__sync_bool_compare_and_swap((volatile uint32_t *)&alloc->lock, 0U, 1U))
106 |                 break;
107 |         }
108 |     }
109 | #endif
110 | }
111 | 
112 | static
113 | void
114 | mehcached_pool_unlock(struct mehcached_pool *alloc MEHCACHED_UNUSED)
115 | {
116 | #ifdef MEHCACHED_CONCURRENT
117 |     if (alloc->concurrent_access_mode == 2)
118 |     {
119 |         memory_barrier();
120 |         assert((*(volatile uint32_t *)&alloc->lock & 1U) == 1U);
121 |         // no need to use atomic add because this thread is the only one writing to version
122 |         *(volatile uint32_t *)&alloc->lock = 0U;
123 |     }
124 | #endif
125 | }
126 | 
127 | static
128 | struct mehcached_alloc_item *
129 | mehcached_pool_item(const struct mehcached_pool *alloc, uint64_t pool_offset)
130 | {
131 |     return (struct mehcached_alloc_item *)(alloc->data + (pool_offset & alloc->mask));
132 | }
133 | 
134 | static
135 | void
136 | mehcached_pool_check_invariants(const struct mehcached_pool *alloc MEHCACHED_UNUSED)
137 | {
138 |     assert(alloc->tail - alloc->head <= alloc->size);
139 | }
140 | 
141 | static
142 | void
143 | mehcached_pool_pop_head(struct mehcached_pool *alloc)
144 | {
145 |     struct mehcached_alloc_item *alloc_item = mehcached_pool_item(alloc, alloc->head);
146 | #ifdef MEHCACHED_VERBOSE
147 |     printf("popping item size = %u at head = %lu\n", alloc_item->item_size, alloc->head & MEHCACHED_ITEM_OFFSET_MASK);
148 | #endif
149 | 
150 |     alloc->head += alloc_item->item_size;
151 |     mehcached_pool_check_invariants(alloc);
152 | }
153 | 
154 | static
155 | uint64_t
156 | mehcached_pool_push_tail(struct mehcached_pool *alloc, uint32_t item_size)
157 | {
158 |     assert(item_size == MEHCACHED_ROUNDUP8(item_size));
159 |     assert(item_size <= alloc->size);
160 | 
161 |     uint64_t item_offset = alloc->tail;
162 | 
163 |     uint64_t v = item_offset + item_size;
164 |     while (v > alloc->head + alloc->size)
165 |         mehcached_pool_pop_head(alloc);
166 | 
167 |     struct mehcached_alloc_item *alloc_item = mehcached_pool_item(alloc, item_offset);
168 |     alloc_item->item_size = item_size;
169 | 
170 |     if (alloc->concurrent_access_mode == 0)
171 |         alloc->tail += item_size;
172 |     else
173 |     {
174 |         *(volatile uint64_t *)&alloc->tail += item_size;
175 |         memory_barrier();
176 |     }
177 | 
178 |     mehcached_pool_check_invariants(alloc);
179 | 
180 | #ifdef MEHCACHED_VERBOSE
181 |     printf("pushing item size = %u at tail = %lu\n", item_size, item_offset & MEHCACHED_ITEM_OFFSET_MASK);
182 | #endif
183 | 
184 |     return item_offset & MEHCACHED_ITEM_OFFSET_MASK;
185 | }
186 | 
187 | static
188 | uint64_t
189 | mehcached_pool_allocate(struct mehcached_pool *alloc, uint32_t item_size)
190 | {
191 |     return mehcached_pool_push_tail(alloc, item_size);
192 | }
193 | 
194 | static
195 | bool
196 | mehcached_pool_is_valid(const struct mehcached_pool *alloc, uint64_t pool_offset)
197 | {
198 |     if (alloc->concurrent_access_mode == 0)
199 |         return ((alloc->tail - pool_offset) & MEHCACHED_ITEM_OFFSET_MASK) <= alloc->size;
200 |     else
201 |     {
202 |         memory_barrier();
203 |         return ((*(volatile uint64_t *)&alloc->tail - pool_offset) & MEHCACHED_ITEM_OFFSET_MASK) <= alloc->size;
204 |     }
205 | }
206 | 
207 | MEHCACHED_END
208 | 
209 | 


--------------------------------------------------------------------------------
/src/alloc_pool.h:
--------------------------------------------------------------------------------
 1 | // Copyright 2014 Carnegie Mellon University
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | 
15 | #pragma once
16 | 
17 | #include "common.h"
18 | #include "alloc.h"
19 | 
20 | MEHCACHED_BEGIN
21 | 
22 | // the minimum pool size that will prevent any invalid read with garbage item metadata
23 | // this must be at least as large as the rounded sum of an item header, key, and value,
24 | // and must also be a multiple of mehcached_shm_get_page_size()
25 | #define MEHCACHED_MINIMUM_POOL_SIZE (2097152)
26 | 
27 | struct mehcached_pool
28 | {
29 |     uint8_t concurrent_access_mode;
30 |     uint32_t lock;
31 |     uint8_t *data;
32 |     uint64_t size;  // a power of two
33 |     uint64_t mask;  // size - 1; this mask is used only when converting the offset to the actual location of the item
34 |     // internally, pool uses full 64-bit numbers for head and tail
35 |     // however, the valid range for item_offset is limited to (MEHCACHED_ITEM_OFFSET_MASK + 1)
36 |     // we resolve this inconsistency by applying MEHCACHED_ITEM_OFFSET_MASK mask
37 |     // whenever returning the offset to the outside or using a masked offset given from the outside
38 |     uint64_t head;  // start offset of items
39 |     uint64_t tail;  // end offset of items
40 | } MEHCACHED_ALIGNED(64);
41 | 
42 | static
43 | void
44 | mehcached_pool_init(struct mehcached_pool *alloc, uint64_t size, bool concurrent_alloc_read, bool concurrent_alloc_write, size_t numa_node);
45 | 
46 | static
47 | void
48 | mehcached_pool_free(struct mehcached_pool *alloc);
49 | 
50 | static
51 | void
52 | mehcached_pool_reset(struct mehcached_pool *alloc);
53 | 
54 | static
55 | void
56 | mehcached_pool_lock(struct mehcached_pool *alloc);
57 | 
58 | static
59 | void
60 | mehcached_pool_unlock(struct mehcached_pool *alloc);
61 | 
62 | static
63 | struct mehcached_alloc_item *
64 | mehcached_pool_item(const struct mehcached_pool *alloc, uint64_t pool_offset);
65 | 
66 | static
67 | void
68 | mehcached_pool_check_invariants(const struct mehcached_pool *alloc);
69 | 
70 | static
71 | void
72 | mehcached_pool_pop_head(struct mehcached_pool *alloc);
73 | 
74 | static
75 | uint64_t
76 | mehcached_pool_push_tail(struct mehcached_pool *alloc, uint32_t item_size);
77 | 
78 | static
79 | uint64_t
80 | mehcached_pool_allocate(struct mehcached_pool *alloc, uint32_t item_size);
81 | 
82 | static
83 | bool
84 | mehcached_pool_is_valid(const struct mehcached_pool *alloc, uint64_t pool_offset);
85 | 
86 | MEHCACHED_END
87 | 
88 | 


--------------------------------------------------------------------------------
/src/basic_types.h:
--------------------------------------------------------------------------------
 1 | // Copyright 2014 Carnegie Mellon University
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | 
15 | #pragma once
16 | 
17 | #include <stdint.h>
18 | #include <unistd.h>
19 | 
20 | typedef int bool;
21 | #define true (1)
22 | #define false (0)
23 | 
24 | 


--------------------------------------------------------------------------------
/src/city.c:
--------------------------------------------------------------------------------
  1 | // city.c - cityhash-c
  2 | // CityHash on C
  3 | // Copyright (c) 2011-2012, Alexander Nusov
  4 | //
  5 | // - original copyright notice -
  6 | // Copyright (c) 2011 Google, Inc.
  7 | //
  8 | // Permission is hereby granted, free of charge, to any person obtaining a copy
  9 | // of this software and associated documentation files (the "Software"), to deal
 10 | // in the Software without restriction, including without limitation the rights
 11 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 12 | // copies of the Software, and to permit persons to whom the Software is
 13 | // furnished to do so, subject to the following conditions:
 14 | //
 15 | // The above copyright notice and this permission notice shall be included in
 16 | // all copies or substantial portions of the Software.
 17 | //
 18 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 19 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 20 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 21 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 22 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 23 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 24 | // THE SOFTWARE.
 25 | //
 26 | // CityHash, by Geoff Pike and Jyrki Alakuijala
 27 | //
 28 | // This file provides CityHash64() and related functions.
 29 | //
 30 | // It's probably possible to create even faster hash functions by
 31 | // writing a program that systematically explores some of the space of
 32 | // possible hash functions, by using SIMD instructions, or by
 33 | // compromising on hash quality.
 34 | 
 35 | #include <string.h>
 36 | #include "city.h"
 37 | 
 38 | static uint64 UNALIGNED_LOAD64(const char *p) {
 39 |   uint64 result;
 40 |   memcpy(&result, p, sizeof(result));
 41 |   return result;
 42 | }
 43 | 
 44 | static uint32 UNALIGNED_LOAD32(const char *p) {
 45 |   uint32 result;
 46 |   memcpy(&result, p, sizeof(result));
 47 |   return result;
 48 | }
 49 | 
 50 | #if !defined(WORDS_BIGENDIAN)
 51 | 
 52 | #define uint32_in_expected_order(x) (x)
 53 | #define uint64_in_expected_order(x) (x)
 54 | 
 55 | #else
 56 | 
 57 | #ifdef _MSC_VER
 58 | #include <stdlib.h>
 59 | #define bswap_32(x) _byteswap_ulong(x)
 60 | #define bswap_64(x) _byteswap_uint64(x)
 61 | 
 62 | #elif defined(__APPLE__)
 63 | // Mac OS X / Darwin features
 64 | #include <libkern/OSByteOrder.h>
 65 | #define bswap_32(x) OSSwapInt32(x)
 66 | #define bswap_64(x) OSSwapInt64(x)
 67 | 
 68 | #else
 69 | #include <byteswap.h>
 70 | #endif
 71 | 
 72 | #define uint32_in_expected_order(x) (bswap_32(x))
 73 | #define uint64_in_expected_order(x) (bswap_64(x))
 74 | 
 75 | #endif  // WORDS_BIGENDIAN
 76 | 
 77 | #if !defined(LIKELY)
 78 | #if HAVE_BUILTIN_EXPECT
 79 | #define LIKELY(x) (__builtin_expect(!!(x), 1))
 80 | #else
 81 | #define LIKELY(x) (x)
 82 | #endif
 83 | #endif
 84 | 
 85 | static uint64 Fetch64(const char *p) {
 86 |   return uint64_in_expected_order(UNALIGNED_LOAD64(p));
 87 | }
 88 | 
 89 | static uint32 Fetch32(const char *p) {
 90 |   return uint32_in_expected_order(UNALIGNED_LOAD32(p));
 91 | }
 92 | 
 93 | // Some primes between 2^63 and 2^64 for various uses.
 94 | static const uint64 k0 = 0xc3a5c85c97cb3127ULL;
 95 | static const uint64 k1 = 0xb492b66fbe98f273ULL;
 96 | static const uint64 k2 = 0x9ae16a3b2f90404fULL;
 97 | static const uint64 k3 = 0xc949d7c7509e6557ULL;
 98 | 
 99 | // Hash 128 input bits down to 64 bits of output.
100 | // This is intended to be a reasonably good hash function.
101 | static inline uint64 Hash128to64(const uint128 x) {
102 |   // Murmur-inspired hashing.
103 |   const uint64 kMul = 0x9ddfea08eb382d69ULL;
104 |   uint64 a = (Uint128Low64(x) ^ Uint128High64(x)) * kMul;
105 |   a ^= (a >> 47);
106 |   uint64 b = (Uint128High64(x) ^ a) * kMul;
107 |   b ^= (b >> 47);
108 |   b *= kMul;
109 |   return b;
110 | }
111 | 
112 | 
113 | // Bitwise right rotate.  Normally this will compile to a single
114 | // instruction, especially if the shift is a manifest constant.
115 | static uint64 Rotate(uint64 val, int shift) {
116 |   // Avoid shifting by 64: doing so yields an undefined result.
117 |   return shift == 0 ? val : ((val >> shift) | (val << (64 - shift)));
118 | }
119 | 
120 | // Equivalent to Rotate(), but requires the second arg to be non-zero.
121 | // On x86-64, and probably others, it's possible for this to compile
122 | // to a single instruction if both args are already in registers.
123 | static uint64 RotateByAtLeast1(uint64 val, int shift) {
124 |   return (val >> shift) | (val << (64 - shift));
125 | }
126 | 
127 | static uint64 ShiftMix(uint64 val) {
128 |   return val ^ (val >> 47);
129 | }
130 | 
131 | static uint64 HashLen16(uint64 u, uint64 v) {
132 |   uint128 result;
133 |   result.first = u;
134 |   result.second = v;
135 |   return Hash128to64(result);
136 | }
137 | 
138 | static uint64 HashLen0to16(const char *s, size_t len) {
139 |   if (len > 8) {
140 |     uint64 a = Fetch64(s);
141 |     uint64 b = Fetch64(s + len - 8);
142 |     return HashLen16(a, RotateByAtLeast1(b + len, (int)len)) ^ b;
143 |   }
144 |   if (len >= 4) {
145 |     uint64 a = Fetch32(s);
146 |     return HashLen16(len + (a << 3), Fetch32(s + len - 4));
147 |   }
148 |   if (len > 0) {
149 |     uint8 a = (uint8)s[0];
150 |     uint8 b = (uint8)s[len >> 1];
151 |     uint8 c = (uint8)s[len - 1];
152 |     uint32 y = (uint32)(a) + ((uint32)(b) << 8);
153 |     uint32 z = (uint32)len + ((uint32)(c) << 2);
154 |     return ShiftMix(y * k2 ^ z * k3) * k2;
155 |   }
156 |   return k2;
157 | }
158 | 
159 | // This probably works well for 16-byte strings as well, but it may be overkill
160 | // in that case.
161 | static uint64 HashLen17to32(const char *s, size_t len) {
162 |   uint64 a = Fetch64(s) * k1;
163 |   uint64 b = Fetch64(s + 8);
164 |   uint64 c = Fetch64(s + len - 8) * k2;
165 |   uint64 d = Fetch64(s + len - 16) * k0;
166 |   return HashLen16(Rotate(a - b, 43) + Rotate(c, 30) + d,
167 |                    a + Rotate(b ^ k3, 20) - c + len);
168 | }
169 | 
170 | // Return a 16-byte hash for 48 bytes.  Quick and dirty.
171 | // Callers do best to use "random-looking" values for a and b.
172 | // static pair<uint64, uint64> WeakHashLen32WithSeeds(
173 | uint128 WeakHashLen32WithSeeds6(
174 |     uint64 w, uint64 x, uint64 y, uint64 z, uint64 a, uint64 b) {
175 |   a += w;
176 |   b = Rotate(b + a + z, 21);
177 |   uint64 c = a;
178 |   a += x;
179 |   a += y;
180 |   b += Rotate(a, 44);
181 | 
182 |   uint128 result;
183 |   result.first = (uint64) (a + z);
184 |   result.second = (uint64) (b + c);
185 |   return result;
186 | }
187 | 
188 | // Return a 16-byte hash for s[0] ... s[31], a, and b.  Quick and dirty.
189 | // static pair<uint64, uint64> WeakHashLen32WithSeeds(
190 | uint128 WeakHashLen32WithSeeds(
191 |     const char* s, uint64 a, uint64 b) {
192 |   return WeakHashLen32WithSeeds6(Fetch64(s),
193 |                                 Fetch64(s + 8),
194 |                                 Fetch64(s + 16),
195 |                                 Fetch64(s + 24),
196 |                                 a,
197 |                                 b);
198 | }
199 | 
200 | // Return an 8-byte hash for 33 to 64 bytes.
201 | static uint64 HashLen33to64(const char *s, size_t len) {
202 |   uint64 z = Fetch64(s + 24);
203 |   uint64 a = Fetch64(s) + (len + Fetch64(s + len - 16)) * k0;
204 |   uint64 b = Rotate(a + z, 52);
205 |   uint64 c = Rotate(a, 37);
206 |   a += Fetch64(s + 8);
207 |   c += Rotate(a, 7);
208 |   a += Fetch64(s + 16);
209 |   uint64 vf = a + z;
210 |   uint64 vs = b + Rotate(a, 31) + c;
211 |   a = Fetch64(s + 16) + Fetch64(s + len - 32);
212 |   z = Fetch64(s + len - 8);
213 |   b = Rotate(a + z, 52);
214 |   c = Rotate(a, 37);
215 |   a += Fetch64(s + len - 24);
216 |   c += Rotate(a, 7);
217 |   a += Fetch64(s + len - 16);
218 |   uint64 wf = a + z;
219 |   uint64 ws = b + Rotate(a, 31) + c;
220 |   uint64 r = ShiftMix((vf + ws) * k2 + (wf + vs) * k0);
221 |   return ShiftMix(r * k0 + vs) * k2;
222 | }
223 | 
224 | uint64 CityHash64(const char *s, size_t len) {
225 |   if (len <= 32) {
226 |     if (len <= 16) {
227 |       return HashLen0to16(s, len);
228 |     } else {
229 |       return HashLen17to32(s, len);
230 |     }
231 |   } else if (len <= 64) {
232 |     return HashLen33to64(s, len);
233 |   }
234 | 
235 |   // For strings over 64 bytes we hash the end first, and then as we
236 |   // loop we keep 56 bytes of state: v, w, x, y, and z.
237 |   uint64 x = Fetch64(s + len - 40);
238 |   uint64 y = Fetch64(s + len - 16) + Fetch64(s + len - 56);
239 |   uint64 z = HashLen16(Fetch64(s + len - 48) + len, Fetch64(s + len - 24));
240 |   uint64 temp;
241 |   uint128 v = WeakHashLen32WithSeeds(s + len - 64, len, z);
242 |   uint128 w = WeakHashLen32WithSeeds(s + len - 32, y + k1, x);
243 |   x = x * k1 + Fetch64(s);
244 | 
245 |   // Decrease len to the nearest multiple of 64, and operate on 64-byte chunks.
246 |   len = (len - 1) & ~(size_t)(63);
247 |   do {
248 |     x = Rotate(x + y + v.first + Fetch64(s + 8), 37) * k1;
249 |     y = Rotate(y + v.second + Fetch64(s + 48), 42) * k1;
250 |     x ^= w.second;
251 |     y += v.first + Fetch64(s + 40);
252 |     z = Rotate(z + w.first, 33) * k1;
253 |     v = WeakHashLen32WithSeeds(s, v.second * k1, x + w.first);
254 |     w = WeakHashLen32WithSeeds(s + 32, z + w.second, y + Fetch64(s + 16));
255 |     temp = z;
256 |     z = x;
257 |     x = temp;
258 |     s += 64;
259 |     len -= 64;
260 |   } while (len != 0);
261 |   return HashLen16(HashLen16(v.first, w.first) + ShiftMix(y) * k1 + z,
262 |                    HashLen16(v.second, w.second) + x);
263 | }
264 | 
265 | uint64 CityHash64WithSeed(const char *s, size_t len, uint64 seed) {
266 |   return CityHash64WithSeeds(s, len, k2, seed);
267 | }
268 | 
269 | uint64 CityHash64WithSeeds(const char *s, size_t len,
270 |                            uint64 seed0, uint64 seed1) {
271 |   return HashLen16(CityHash64(s, len) - seed0, seed1);
272 | }
273 | 
274 | // A subroutine for CityHash128().  Returns a decent 128-bit hash for strings
275 | // of any length representable in signed long.  Based on City and Murmur.
276 | static uint128 CityMurmur(const char *s, size_t len, uint128 seed) {
277 |   uint64 a = Uint128Low64(seed);
278 |   uint64 b = Uint128High64(seed);
279 |   uint64 c = 0;
280 |   uint64 d = 0;
281 |   signed long l = (signed long)(len - 16);
282 |   if (l <= 0) {  // len <= 16
283 |     a = ShiftMix(a * k1) * k1;
284 |     c = b * k1 + HashLen0to16(s, len);
285 |     d = ShiftMix(a + (len >= 8 ? Fetch64(s) : c));
286 |   } else {  // len > 16
287 |     c = HashLen16(Fetch64(s + len - 8) + k1, a);
288 |     d = HashLen16(b + len, c + Fetch64(s + len - 16));
289 |     a += d;
290 |     do {
291 |       a ^= ShiftMix(Fetch64(s) * k1) * k1;
292 |       a *= k1;
293 |       b ^= a;
294 |       c ^= ShiftMix(Fetch64(s + 8) * k1) * k1;
295 |       c *= k1;
296 |       d ^= c;
297 |       s += 16;
298 |       l -= 16;
299 |     } while (l > 0);
300 |   }
301 |   a = HashLen16(a, c);
302 |   b = HashLen16(d, b);
303 | 
304 |   uint128 result;
305 |   result.first = (uint64) (a ^ b);
306 |   result.second = (uint64) (HashLen16(b,a));
307 |   return result;
308 | }
309 | 
310 | uint128 CityHash128WithSeed(const char *s, size_t len, uint128 seed) {
311 |   if (len < 128) {
312 |     return CityMurmur(s, len, seed);
313 |   }
314 | 
315 |   // We expect len >= 128 to be the common case.  Keep 56 bytes of state:
316 |   // v, w, x, y, and z.
317 |   uint128 v, w;
318 |   uint64 x = Uint128Low64(seed);
319 |   uint64 y = Uint128High64(seed);
320 |   uint64 z = len * k1;
321 |   uint64 temp;
322 |   v.first = Rotate(y ^ k1, 49) * k1 + Fetch64(s);
323 |   v.second = Rotate(v.first, 42) * k1 + Fetch64(s + 8);
324 |   w.first = Rotate(y + z, 35) * k1 + x;
325 |   w.second = Rotate(x + Fetch64(s + 88), 53) * k1;
326 | 
327 |   // This is the same inner loop as CityHash64(), manually unrolled.
328 |   do {
329 |     x = Rotate(x + y + v.first + Fetch64(s + 8), 37) * k1;
330 |     y = Rotate(y + v.second + Fetch64(s + 48), 42) * k1;
331 |     x ^= w.second;
332 |     y += v.first + Fetch64(s + 40);
333 |     z = Rotate(z + w.first, 33) * k1;
334 |     v = WeakHashLen32WithSeeds(s, v.second * k1, x + w.first);
335 |     w = WeakHashLen32WithSeeds(s + 32, z + w.second, y + Fetch64(s + 16));
336 |     temp = z;
337 |     z = x;
338 |     x = temp;
339 |     s += 64;
340 |     x = Rotate(x + y + v.first + Fetch64(s + 8), 37) * k1;
341 |     y = Rotate(y + v.second + Fetch64(s + 48), 42) * k1;
342 |     x ^= w.second;
343 |     y += v.first + Fetch64(s + 40);
344 |     z = Rotate(z + w.first, 33) * k1;
345 |     v = WeakHashLen32WithSeeds(s, v.second * k1, x + w.first);
346 |     w = WeakHashLen32WithSeeds(s + 32, z + w.second, y + Fetch64(s + 16));
347 |     temp = z;
348 |     z = x;
349 |     x = temp;
350 |     s += 64;
351 |     len -= 128;
352 |   } while (LIKELY(len >= 128));
353 |   x += Rotate(v.first + z, 49) * k0;
354 |   z += Rotate(w.first, 37) * k0;
355 |   // If 0 < len < 128, hash up to 4 chunks of 32 bytes each from the end of s.
356 |   size_t tail_done;
357 |   for (tail_done = 0; tail_done < len; ) {
358 |     tail_done += 32;
359 |     y = Rotate(x + y, 42) * k0 + v.second;
360 |     w.first += Fetch64(s + len - tail_done + 16);
361 |     x = x * k0 + w.first;
362 |     z += w.second + Fetch64(s + len - tail_done);
363 |     w.second += v.first;
364 |     v = WeakHashLen32WithSeeds(s + len - tail_done, v.first + z, v.second);
365 |   }
366 |   // At this point our 56 bytes of state should contain more than
367 |   // enough information for a strong 128-bit hash.  We use two
368 |   // different 56-byte-to-8-byte hashes to get a 16-byte final result.
369 |   x = HashLen16(x, v.first);
370 |   y = HashLen16(y + z, w.first);
371 | 
372 |   uint128 result;
373 |   result.first = (uint64) (HashLen16(x + v.second, w.second) + y);
374 |   result.second = (uint64) HashLen16(x + w.second, y + v.second);
375 |   return result;
376 | }
377 | 
378 | uint128 CityHash128(const char *s, size_t len) {
379 |   uint128 r;
380 |   if (len >= 16) {
381 |     r.first = (uint64) (Fetch64(s) ^ k3);
382 |     r.second = (uint64) (Fetch64(s + 8));
383 | 		
384 |     return CityHash128WithSeed(s + 16,
385 |                                len - 16,
386 |                                r);
387 | 
388 |   } else if (len >= 8) {
389 |     r.first = (uint64) (Fetch64(s) ^ (len * k0));
390 |     r.second = (uint64) (Fetch64(s + len - 8) ^ k1);
391 | 	
392 |     return CityHash128WithSeed(NULL,
393 |                                0,
394 |                                r);
395 |   } else {
396 |     r.first = (uint64) k0;
397 |     r.second = (uint64) k1;
398 |     return CityHash128WithSeed(s, len, r);
399 |   }
400 | }
401 | 
402 | #ifdef __SSE4_2__
403 | #include "citycrc.h"
404 | #include <nmmintrin.h>
405 | 
406 | // Requires len >= 240.
407 | static void CityHashCrc256Long(const char *s, size_t len,
408 |                                uint32 seed, uint64 *result) {
409 |   uint64 a = Fetch64(s + 56) + k0;
410 |   uint64 b = Fetch64(s + 96) + k0;
411 |   uint64 c = result[0] = HashLen16(b, len);
412 |   uint64 d = result[1] = Fetch64(s + 120) * k0 + len;
413 |   uint64 e = Fetch64(s + 184) + seed;
414 |   uint64 f = seed;
415 |   uint64 g = 0;
416 |   uint64 h = 0;
417 |   uint64 i = 0;
418 |   uint64 j = 0;
419 |   uint64 t = c + d;
420 | 
421 |   // 240 bytes of input per iter.
422 |   size_t iters = len / 240;
423 |   len -= iters * 240;
424 |   do {
425 | #define CHUNK(multiplier, z)                                    \
426 |     {                                                           \
427 |       uint64 old_a = a;                                         \
428 |       a = Rotate(b, 41 ^ z) * multiplier + Fetch64(s);          \
429 |       b = Rotate(c, 27 ^ z) * multiplier + Fetch64(s + 8);      \
430 |       c = Rotate(d, 41 ^ z) * multiplier + Fetch64(s + 16);     \
431 |       d = Rotate(e, 33 ^ z) * multiplier + Fetch64(s + 24);     \
432 |       e = Rotate(t, 25 ^ z) * multiplier + Fetch64(s + 32);     \
433 |       t = old_a;                                                \
434 |     }                                                           \
435 |     f = _mm_crc32_u64(f, a);                                    \
436 |     g = _mm_crc32_u64(g, b);                                    \
437 |     h = _mm_crc32_u64(h, c);                                    \
438 |     i = _mm_crc32_u64(i, d);                                    \
439 |     j = _mm_crc32_u64(j, e);                                    \
440 |     s += 40
441 | 
442 |     CHUNK(1, 1); CHUNK(k0, 0);
443 |     CHUNK(1, 1); CHUNK(k0, 0);
444 |     CHUNK(1, 1); CHUNK(k0, 0);
445 |   } while (--iters > 0);
446 | 
447 |   while (len >= 40) {
448 |     CHUNK(k0, 0);
449 |     len -= 40;
450 |   }
451 |   if (len > 0) {
452 |     s = s + len - 40;
453 |     CHUNK(k0, 0);
454 |   }
455 |   j += i << 32;
456 |   a = HashLen16(a, j);
457 |   h += g << 32;
458 |   b += h;
459 |   c = HashLen16(c, f) + i;
460 |   d = HashLen16(d, e + result[0]);
461 |   j += e;
462 |   i += HashLen16(h, t);
463 |   e = HashLen16(a, d) + j;
464 |   f = HashLen16(b, c) + a;
465 |   g = HashLen16(j, i) + c;
466 |   result[0] = e + f + g + h;
467 |   a = ShiftMix((a + g) * k0) * k0 + b;
468 |   result[1] += a + result[0];
469 |   a = ShiftMix(a * k0) * k0 + c;
470 |   result[2] = a + result[1];
471 |   a = ShiftMix((a + e) * k0) * k0;
472 |   result[3] = a + result[2];
473 | }
474 | 
475 | // Requires len < 240.
476 | static void CityHashCrc256Short(const char *s, size_t len, uint64 *result) {
477 |   char buf[240];
478 |   memcpy(buf, s, len);
479 |   memset(buf + len, 0, 240 - len);
480 |   CityHashCrc256Long(buf, 240, ~(uint32)(len), result);
481 | }
482 | 
483 | void CityHashCrc256(const char *s, size_t len, uint64 *result) {
484 |   if (LIKELY(len >= 240)) {
485 |     CityHashCrc256Long(s, len, 0, result);
486 |   } else {
487 |     CityHashCrc256Short(s, len, result);
488 |   }
489 | }
490 | 
491 | uint128 CityHashCrc128WithSeed(const char *s, size_t len, uint128 seed) {
492 |   if (len <= 900) {
493 |     return CityHash128WithSeed(s, len, seed);
494 |   } else {
495 |     uint64 result[4];
496 |     CityHashCrc256(s, len, result);
497 |     uint64 u = Uint128High64(seed) + result[0];
498 |     uint64 v = Uint128Low64(seed) + result[1];
499 |     uint128 crc;
500 |     crc.first = (uint64) (HashLen16(u, v + result[2]));
501 |     crc.second = (uint64) (HashLen16(Rotate(v, 32), u * k0 + result[3]));
502 |     return crc;
503 |   }
504 | }
505 | 
506 | uint128 CityHashCrc128(const char *s, size_t len) {
507 |   if (len <= 900) {
508 |     return CityHash128(s, len);
509 |   } else {
510 |     uint64 result[4];
511 |     CityHashCrc256(s, len, result);
512 |     uint128 crc;
513 |     crc.first = (uint64) result[2];
514 |     crc.second = (uint64) result[3];
515 |     return crc;
516 |   }
517 | }
518 | 
519 | #endif
520 | 
521 | 


--------------------------------------------------------------------------------
/src/city.h:
--------------------------------------------------------------------------------
 1 | // city.h - cityhash-c
 2 | // CityHash on C
 3 | // Copyright (c) 2011-2012, Alexander Nusov
 4 | //
 5 | // - original copyright notice -
 6 | // Copyright (c) 2011 Google, Inc.
 7 | //
 8 | // Permission is hereby granted, free of charge, to any person obtaining a copy
 9 | // of this software and associated documentation files (the "Software"), to deal
10 | // in the Software without restriction, including without limitation the rights
11 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12 | // copies of the Software, and to permit persons to whom the Software is
13 | // furnished to do so, subject to the following conditions:
14 | //
15 | // The above copyright notice and this permission notice shall be included in
16 | // all copies or substantial portions of the Software.
17 | //
18 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
24 | // THE SOFTWARE.
25 | //
26 | // CityHash, by Geoff Pike and Jyrki Alakuijala
27 | //
28 | // This file provides a few functions for hashing strings. On x86-64
29 | // hardware in 2011, CityHash64() is faster than other high-quality
30 | // hash functions, such as Murmur.  This is largely due to higher
31 | // instruction-level parallelism.  CityHash64() and CityHash128() also perform
32 | // well on hash-quality tests.
33 | //
34 | // CityHash128() is optimized for relatively long strings and returns
35 | // a 128-bit hash.  For strings more than about 2000 bytes it can be
36 | // faster than CityHash64().
37 | //
38 | // Functions in the CityHash family are not suitable for cryptography.
39 | //
40 | // WARNING: This code has not been tested on big-endian platforms!
41 | // It is known to work well on little-endian platforms that have a small penalty
42 | // for unaligned reads, such as current Intel and AMD moderate-to-high-end CPUs.
43 | //
44 | // By the way, for some hash functions, given strings a and b, the hash
45 | // of a+b is easily derived from the hashes of a and b.  This property
46 | // doesn't hold for any hash functions in this file.
47 | 
48 | #ifndef CITY_HASH_H_
49 | #define CITY_HASH_H_
50 | 
51 | #include <stdlib.h>
52 | #include <stdint.h>
53 | 
54 | typedef uint8_t uint8;
55 | typedef uint32_t uint32;
56 | typedef uint64_t uint64;
57 | 
58 | typedef struct _uint128 uint128;
59 | struct _uint128 {
60 |   uint64 first;
61 |   uint64 second;
62 | };
63 | 
64 | #define Uint128Low64(x) 	(x).first
65 | #define Uint128High64(x)	(x).second
66 | 
67 | // Hash function for a byte array.
68 | uint64 CityHash64(const char *buf, size_t len);
69 | 
70 | // Hash function for a byte array.  For convenience, a 64-bit seed is also
71 | // hashed into the result.
72 | uint64 CityHash64WithSeed(const char *buf, size_t len, uint64 seed);
73 | 
74 | // Hash function for a byte array.  For convenience, two seeds are also
75 | // hashed into the result.
76 | uint64 CityHash64WithSeeds(const char *buf, size_t len,
77 |                            uint64 seed0, uint64 seed1);
78 | 
79 | // Hash function for a byte array.
80 | uint128 CityHash128(const char *s, size_t len);
81 | 
82 | // Hash function for a byte array.  For convenience, a 128-bit seed is also
83 | // hashed into the result.
84 | uint128 CityHash128WithSeed(const char *s, size_t len, uint128 seed);
85 | 
86 | #endif  // CITY_HASH_H_
87 | 
88 | 


--------------------------------------------------------------------------------
/src/citycrc.h:
--------------------------------------------------------------------------------
 1 | // citycrc.h - cityhash-c
 2 | // CityHash on C
 3 | // Copyright (c) 2011-2012, Alexander Nusov
 4 | //
 5 | // - original copyright notice -
 6 | // Copyright (c) 2011 Google, Inc.
 7 | //
 8 | // Permission is hereby granted, free of charge, to any person obtaining a copy
 9 | // of this software and associated documentation files (the "Software"), to deal
10 | // in the Software without restriction, including without limitation the rights
11 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12 | // copies of the Software, and to permit persons to whom the Software is
13 | // furnished to do so, subject to the following conditions:
14 | //
15 | // The above copyright notice and this permission notice shall be included in
16 | // all copies or substantial portions of the Software.
17 | //
18 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
24 | // THE SOFTWARE.
25 | //
26 | // CityHash, by Geoff Pike and Jyrki Alakuijala
27 | //
28 | // This file declares the subset of the CityHash functions that require
29 | // _mm_crc32_u64().  See the CityHash README for details.
30 | //
31 | // Functions in the CityHash family are not suitable for cryptography.
32 | 
33 | #ifndef CITY_HASH_CRC_H_
34 | #define CITY_HASH_CRC_H_
35 | 
36 | #include "city.h"
37 | 
38 | // Hash function for a byte array.
39 | uint128 CityHashCrc128(const char *s, size_t len);
40 | 
41 | // Hash function for a byte array.  For convenience, a 128-bit seed is also
42 | // hashed into the result.
43 | uint128 CityHashCrc128WithSeed(const char *s, size_t len, uint128 seed);
44 | 
45 | // Hash function for a byte array.  Sets result[0] ... result[3].
46 | void CityHashCrc256(const char *s, size_t len, uint64 *result);
47 | 
48 | #endif  // CITY_HASH_CRC_H_
49 | 
50 | 


--------------------------------------------------------------------------------
/src/common.h:
--------------------------------------------------------------------------------
 1 | // Copyright 2014 Carnegie Mellon University
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | 
15 | #pragma once
16 | 
17 | #include "basic_types.h"
18 | 
19 | #ifdef __cplusplus
20 | #define MEHCACHED_BEGIN extern "C" {
21 | #define MEHCACHED_END }
22 | #else
23 | #define MEHCACHED_BEGIN
24 | #define MEHCACHED_END
25 | #endif
26 | 
27 | #define MEHCACHED_UNUSED __attribute__((unused))
28 | #define MEHCACHED_WARN_UNUSED_RESULT __attribute__((warn_unused_result))
29 | #define MEHCACHED_ALWAYS_INLINE __attribute__((always_inline))
30 | 
31 | #define MEHCACHED_ALIGNED(alignment) __attribute__ ((aligned (alignment)))
32 | 
33 | #include "config.h"
34 | 
35 | 


--------------------------------------------------------------------------------
/src/config.h:
--------------------------------------------------------------------------------
 1 | // Copyright 2014 Carnegie Mellon University
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | 
15 | #pragma once
16 | 
17 | // mehcached configuration
18 | 
19 | // be verbose (only for debugging)
20 | //#define MEHCACHED_VERBOSE
21 | 
22 | // use counters to collect statistics
23 | //#define MEHCACHED_COLLECT_STATS
24 | 
25 | 
26 | // support for concurrent access
27 | #define MEHCACHED_CONCURRENT
28 | 
29 | 
30 | // store mode
31 | // #define MEHCACHED_NO_EVICTION
32 | 
33 | // use log-structured pool allocator (other MEHCACHED_ALLOC_* must be undef)
34 | //#define MEHCACHED_ALLOC_POOL
35 | #ifndef MEHCACHED_NO_EVICTION
36 | #define MEHCACHED_ALLOC_POOL
37 | #endif
38 | 
39 | // use malloc allocator for each item (other MEHCACHED_ALLOC_* must be undef)
40 | //#define MEHCACHED_ALLOC_MALLOC
41 | 
42 | // use custom dynamic allocator for each item (other MEHCACHED_ALLOC_* must be undef)
43 | //#define MEHCACHED_ALLOC_DYNAMIC
44 | #ifdef MEHCACHED_NO_EVICTION
45 | #define MEHCACHED_ALLOC_DYNAMIC
46 | #endif
47 | 
48 | 


--------------------------------------------------------------------------------
/src/hash.c:
--------------------------------------------------------------------------------
 1 | // Copyright 2014 Carnegie Mellon University
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | 
15 | #include "hash.h"
16 | 
17 | // random vector from http://home.comcast.net/~bretm/hash/10.html
18 | const uint32_t sbox[] = 
19 |     {
20 |         0xF53E1837, 0x5F14C86B, 0x9EE3964C, 0xFA796D53,
21 |         0x32223FC3, 0x4D82BC98, 0xA0C7FA62, 0x63E2C982,
22 |         0x24994A5B, 0x1ECE7BEE, 0x292B38EF, 0xD5CD4E56,
23 |         0x514F4303, 0x7BE12B83, 0x7192F195, 0x82DC7300,
24 |         0x084380B4, 0x480B55D3, 0x5F430471, 0x13F75991,
25 |         0x3F9CF22C, 0x2FE0907A, 0xFD8E1E69, 0x7B1D5DE8,
26 |         0xD575A85C, 0xAD01C50A, 0x7EE00737, 0x3CE981E8,
27 |         0x0E447EFA, 0x23089DD6, 0xB59F149F, 0x13600EC7,
28 |         0xE802C8E6, 0x670921E4, 0x7207EFF0, 0xE74761B0,
29 |         0x69035234, 0xBFA40F19, 0xF63651A0, 0x29E64C26,
30 |         0x1F98CCA7, 0xD957007E, 0xE71DDC75, 0x3E729595,
31 |         0x7580B7CC, 0xD7FAF60B, 0x92484323, 0xA44113EB,
32 |         0xE4CBDE08, 0x346827C9, 0x3CF32AFA, 0x0B29BCF1,
33 |         0x6E29F7DF, 0xB01E71CB, 0x3BFBC0D1, 0x62EDC5B8,
34 |         0xB7DE789A, 0xA4748EC9, 0xE17A4C4F, 0x67E5BD03,
35 |         0xF3B33D1A, 0x97D8D3E9, 0x09121BC0, 0x347B2D2C,
36 |         0x79A1913C, 0x504172DE, 0x7F1F8483, 0x13AC3CF6,
37 |         0x7A2094DB, 0xC778FA12, 0xADF7469F, 0x21786B7B,
38 |         0x71A445D0, 0xA8896C1B, 0x656F62FB, 0x83A059B3,
39 |         0x972DFE6E, 0x4122000C, 0x97D9DA19, 0x17D5947B,
40 |         0xB1AFFD0C, 0x6EF83B97, 0xAF7F780B, 0x4613138A,
41 |         0x7C3E73A6, 0xCF15E03D, 0x41576322, 0x672DF292,
42 |         0xB658588D, 0x33EBEFA9, 0x938CBF06, 0x06B67381,
43 |         0x07F192C6, 0x2BDA5855, 0x348EE0E8, 0x19DBB6E3,
44 |         0x3222184B, 0xB69D5DBA, 0x7E760B88, 0xAF4D8154,
45 |         0x007A51AD, 0x35112500, 0xC9CD2D7D, 0x4F4FB761,
46 |         0x694772E3, 0x694C8351, 0x4A7E3AF5, 0x67D65CE1,
47 |         0x9287DE92, 0x2518DB3C, 0x8CB4EC06, 0xD154D38F,
48 |         0xE19A26BB, 0x295EE439, 0xC50A1104, 0x2153C6A7,
49 |         0x82366656, 0x0713BC2F, 0x6462215A, 0x21D9BFCE,
50 |         0xBA8EACE6, 0xAE2DF4C1, 0x2A8D5E80, 0x3F7E52D1,
51 |         0x29359399, 0xFEA1D19C, 0x18879313, 0x455AFA81,
52 |         0xFADFE838, 0x62609838, 0xD1028839, 0x0736E92F,
53 |         0x3BCA22A3, 0x1485B08A, 0x2DA7900B, 0x852C156D,
54 |         0xE8F24803, 0x00078472, 0x13F0D332, 0x2ACFD0CF,
55 |         0x5F747F5C, 0x87BB1E2F, 0xA7EFCB63, 0x23F432F0,
56 |         0xE6CE7C5C, 0x1F954EF6, 0xB609C91B, 0x3B4571BF,
57 |         0xEED17DC0, 0xE556CDA0, 0xA7846A8D, 0xFF105F94,
58 |         0x52B7CCDE, 0x0E33E801, 0x664455EA, 0xF2C70414,
59 |         0x73E7B486, 0x8F830661, 0x8B59E826, 0xBB8AEDCA,
60 |         0xF3D70AB9, 0xD739F2B9, 0x4A04C34A, 0x88D0F089,
61 |         0xE02191A2, 0xD89D9C78, 0x192C2749, 0xFC43A78F,
62 |         0x0AAC88CB, 0x9438D42D, 0x9E280F7A, 0x36063802,
63 |         0x38E8D018, 0x1C42A9CB, 0x92AAFF6C, 0xA24820C5,
64 |         0x007F077F, 0xCE5BC543, 0x69668D58, 0x10D6FF74,
65 |         0xBE00F621, 0x21300BBE, 0x2E9E8F46, 0x5ACEA629,
66 |         0xFA1F86C7, 0x52F206B8, 0x3EDF1A75, 0x6DA8D843,
67 |         0xCF719928, 0x73E3891F, 0xB4B95DD6, 0xB2A42D27,
68 |         0xEDA20BBF, 0x1A58DBDF, 0xA449AD03, 0x6DDEF22B,
69 |         0x900531E6, 0x3D3BFF35, 0x5B24ABA2, 0x472B3E4C,
70 |         0x387F2D75, 0x4D8DBA36, 0x71CB5641, 0xE3473F3F,
71 |         0xF6CD4B7F, 0xBF7D1428, 0x344B64D0, 0xC5CDFCB6,
72 |         0xFE2E0182, 0x2C37A673, 0xDE4EB7A3, 0x63FDC933,
73 |         0x01DC4063, 0x611F3571, 0xD167BFAF, 0x4496596F,
74 |         0x3DEE0689, 0xD8704910, 0x7052A114, 0x068C9EC5,
75 |         0x75D0E766, 0x4D54CC20, 0xB44ECDE2, 0x4ABC653E,
76 |         0x2C550A21, 0x1A52C0DB, 0xCFED03D0, 0x119BAFE2,
77 |         0x876A6133, 0xBC232088, 0x435BA1B2, 0xAE99BBFA,
78 |         0xBB4F08E4, 0xA62B5F49, 0x1DA4B695, 0x336B84DE,
79 |         0xDC813D31, 0x00C134FB, 0x397A98E6, 0x151F0E64,
80 |         0xD9EB3E69, 0xD3C7DF60, 0xD2F2C336, 0x2DDD067B,
81 |         0xBD122835, 0xB0B3BD3A, 0xB0D54E46, 0x8641F1E4,
82 |         0xA0B38F96, 0x51D39199, 0x37A6AD75, 0xDF84EE41,
83 |         0x3C034CBA, 0xACDA62FC, 0x11923B8B, 0x45EF170A,
84 |     };
85 | 
86 | 


--------------------------------------------------------------------------------
/src/hash.h:
--------------------------------------------------------------------------------
  1 | // Copyright 2014 Carnegie Mellon University
  2 | //
  3 | // Licensed under the Apache License, Version 2.0 (the "License");
  4 | // you may not use this file except in compliance with the License.
  5 | // You may obtain a copy of the License at
  6 | //
  7 | //     http://www.apache.org/licenses/LICENSE-2.0
  8 | //
  9 | // Unless required by applicable law or agreed to in writing, software
 10 | // distributed under the License is distributed on an "AS IS" BASIS,
 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | // See the License for the specific language governing permissions and
 13 | // limitations under the License.
 14 | 
 15 | #pragma once
 16 | 
 17 | #include <stddef.h>
 18 | #include <stdint.h>
 19 | #include <assert.h>
 20 | #include "citycrc.h"
 21 | 
 22 | /*
 23 | static uint32_t crc32_le(uint32_t crc, const uint8_t *data, size_t len)
 24 | {
 25 |     // SSE 4.2 & 64-bit required
 26 |     size_t words = len >> 3;
 27 |     size_t tail = len & 7;
 28 |     while (words)
 29 |     {
 30 |         words--;
 31 |         crc = (uint32_t)__builtin_ia32_crc32di(crc, *(const uint64_t *)data);
 32 |         data += 8;
 33 |     }
 34 |     while (tail)
 35 |     {
 36 |         tail--;
 37 |         crc = __builtin_ia32_crc32qi(crc, *data);
 38 |         data++;
 39 |     }
 40 |     return crc;
 41 | }
 42 | */
 43 | 
 44 | extern const uint32_t sbox[];
 45 | 
 46 | static uint64_t tab_hash(const uint8_t *key, size_t len)
 47 | {
 48 |     // a large prime number
 49 |     uint32_t h = 4294967291U;
 50 |     while (len)
 51 |     {
 52 |         len--;
 53 |         // tabulation hashing -- Carter and Wegman (STOC'77)
 54 |         h ^= sbox[*key];
 55 |         key++;
 56 |     }
 57 |     return (uint64_t)h;
 58 | }
 59 | 
 60 | static uint64_t sbox_hash(uint8_t *key, size_t len)
 61 | {
 62 |     // a large prime number
 63 |     uint32_t h = 4294967291U;
 64 |     while (len)
 65 |     {
 66 |         len--;
 67 |         h ^= sbox[*key];
 68 |         h *= 3;
 69 |         key++;
 70 |     }
 71 |     return (uint64_t)h;
 72 | }
 73 | 
 74 | static uint64_t noop_hash(const uint8_t *key, size_t len)
 75 | {
 76 |     assert(len == sizeof(uint64_t));
 77 |     (void)len;
 78 |     return *(uint64_t *)key;
 79 | }
 80 | 
 81 | static uint64_t mul_hash(const uint8_t *key, size_t len)
 82 | {
 83 |     assert(len == sizeof(uint64_t));
 84 |     (void)len;
 85 |     // a large prime number
 86 |     return *(uint64_t *)key * 18446744073709551557UL;
 87 | }
 88 | 
 89 | // MD4 truncated to 12 B
 90 | #include <openssl/md4.h>
 91 | static uint64_t hash_md4(const uint8_t *key, size_t len)
 92 | {
 93 |     size_t temp_hash[(MD4_DIGEST_LENGTH + sizeof(size_t) - 1) / sizeof(size_t)];
 94 |     MD4(key, len, (uint8_t *)temp_hash);
 95 |     assert(8 <= MD4_DIGEST_LENGTH);
 96 |     return *(size_t *)temp_hash;
 97 | }
 98 | 
 99 | static uint64_t hash(const uint8_t *key, size_t len)
100 | {
101 |     //return noop_hash(key, len);
102 |     //return mul_hash(key, len);
103 |     //return tab_hash(key, len);
104 |     //return (uint64_t)crc32_le(0xffffffff, key, len);
105 |     //return (uint64_t)crc32_le(0xffffffff, key, len) * 18446744073709551557UL;
106 |     //return sbox_hash(key, len);
107 |     //return hash_md4(key, len);
108 |     //return CityHashCrc128((const char *)key, len).first;
109 |     return CityHash64((const char *)key, len);
110 | }
111 | 
112 | 


--------------------------------------------------------------------------------
/src/load.c:
--------------------------------------------------------------------------------
 1 | // Copyright 2014 Carnegie Mellon University
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | 
15 | #include <stdio.h>
16 | #include <assert.h>
17 | 
18 | #include "mehcached.h"
19 | #include "hash.h"
20 | 
21 | void
22 | test_load()
23 | {
24 |     printf("test_load()\n");
25 | 
26 |     const size_t num_items = 1048576;
27 |     //const size_t num_items = 1048576 * 10;
28 | 
29 |     struct mehcached_table table_o;
30 |     struct mehcached_table *table = &table_o;
31 |     size_t numa_nodes[] = {(size_t)-1};
32 |     size_t alloc_overhead = sizeof(struct mehcached_item);
33 | #ifdef MEHCACHED_ALLOC_DYNAMIC
34 |     alloc_overhead += MEHCAHCED_DYNAMIC_OVERHEAD;
35 | #endif
36 |     mehcached_table_init(table, (num_items + MEHCACHED_ITEMS_PER_BUCKET - 1) / MEHCACHED_ITEMS_PER_BUCKET, 1, num_items * /*MEHCACHED_ROUNDUP64*/(alloc_overhead + 8 + 8), false, false, false, numa_nodes[0], numa_nodes, MEHCACHED_MTH_THRESHOLD_FIFO);
37 | 
38 |     bool first_failure = false;
39 |     size_t first_failure_i = 0;
40 |     size_t success_count = 0;
41 | 
42 |     size_t i;
43 |     for (i = 0; i < num_items; i++)
44 |     {
45 |         size_t key = i;
46 |         size_t value = i;
47 |         uint64_t key_hash = hash((const uint8_t *)&key, sizeof(key));
48 |         mehcached_set(0, table, key_hash, (const uint8_t *)&key, sizeof(key), (const uint8_t *)&value, sizeof(value), 0, false);
49 |     }
50 | 
51 |     for (i = 0; i < num_items; i++)
52 |     {
53 |         size_t key = i;
54 |         size_t value;
55 |         size_t value_len = sizeof(value);
56 |         uint64_t key_hash = hash((const uint8_t *)&key, sizeof(key));
57 | 
58 |         if (mehcached_get(0, table, key_hash, (const uint8_t *)&key, sizeof(key), (uint8_t *)&value, &value_len, NULL, false))
59 |             success_count++;
60 |         else
61 |         {
62 |             if (!first_failure)
63 |             {
64 |                 first_failure = true;
65 |                 first_failure_i = i;
66 |             }
67 |         }
68 |     }
69 | 
70 |     printf("first_failure: %zu (%.2f%%)\n", first_failure_i, 100. * (double)first_failure_i / (double)num_items);
71 |     printf("success_count: %zu (%.2f%%)\n", success_count, 100. * (double)success_count / (double)num_items);
72 | 
73 |     //mehcached_print_buckets(table);
74 |     mehcached_print_stats(table);
75 | 
76 |     mehcached_table_free(table);
77 | }
78 | 
79 | int
80 | main(int argc MEHCACHED_UNUSED, const char *argv[] MEHCACHED_UNUSED)
81 | {
82 | 	const size_t page_size = 1048576 * 2;
83 | 	const size_t num_numa_nodes = 2;
84 |     const size_t num_pages_to_try = 16384;
85 |     const size_t num_pages_to_reserve = 16384 - 2048;   // give 2048 pages to dpdk
86 | 
87 | 	mehcached_shm_init(page_size, num_numa_nodes, num_pages_to_try, num_pages_to_reserve);
88 | 
89 |     test_load();
90 | 
91 |     return EXIT_SUCCESS;
92 | }
93 | 
94 | 


--------------------------------------------------------------------------------
/src/mehcached.h:
--------------------------------------------------------------------------------
 1 | // Copyright 2014 Carnegie Mellon University
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | 
15 | #pragma once
16 | 
17 | #include "table.c"
18 | #include "alloc_pool.c"
19 | #include "alloc_malloc.c"
20 | #include "alloc_dynamic.c"
21 | 
22 | 


--------------------------------------------------------------------------------
/src/net_common.c:
--------------------------------------------------------------------------------
  1 | // Copyright 2014 Carnegie Mellon University
  2 | //
  3 | // Licensed under the Apache License, Version 2.0 (the "License");
  4 | // you may not use this file except in compliance with the License.
  5 | // You may obtain a copy of the License at
  6 | //
  7 | //     http://www.apache.org/licenses/LICENSE-2.0
  8 | //
  9 | // Unless required by applicable law or agreed to in writing, software
 10 | // distributed under the License is distributed on an "AS IS" BASIS,
 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | // See the License for the specific language governing permissions and
 13 | // limitations under the License.
 14 | 
 15 | #include "net_common.h"
 16 | #include "util.h"
 17 | #include "stopwatch.h"
 18 | 
 19 | #include <stdio.h>
 20 | #include <string.h>
 21 | #include <assert.h>
 22 | 
 23 | #include <rte_eal.h>
 24 | #include <rte_lcore.h>
 25 | #include <rte_byteorder.h>
 26 | #include <rte_ethdev.h>
 27 | #include <rte_log.h>
 28 | #include <rte_debug.h>
 29 | 
 30 | #define MEHCACHED_MBUF_ENTRY_SIZE (2048 + sizeof(struct rte_mbuf) + RTE_PKTMBUF_HEADROOM)
 31 | #define MEHCACHED_MBUF_SIZE (MEHCACHED_MAX_PORTS * MEHCACHED_MAX_QUEUES * 4096)     // TODO: need to divide by numa node count
 32 | 
 33 | #define MEHCACHED_MAX_PKT_BURST (32)
 34 | 
 35 | #define MEHCACHED_RX_PTHRESH (8)
 36 | #define MEHCACHED_RX_HTHRESH (8)
 37 | #define MEHCACHED_RX_WTHRESH (4)
 38 | 
 39 | #define MEHCACHED_TX_PTHRESH (36)
 40 | #define MEHCACHED_TX_HTHRESH (0)
 41 | #define MEHCACHED_TX_WTHRESH (0)
 42 | 
 43 | #define RTE_TEST_RX_DESC_DEFAULT (128)
 44 | #define RTE_TEST_TX_DESC_DEFAULT (512)
 45 | static uint16_t mehcached_num_rx_desc = RTE_TEST_RX_DESC_DEFAULT;
 46 | static uint16_t mehcached_num_tx_desc = RTE_TEST_TX_DESC_DEFAULT;
 47 | 
 48 | //#define MEHCACHED_USE_QUICK_SLEEP
 49 | //#define MEHCACHED_USE_DEEP_SLEEP
 50 | 
 51 | static const struct rte_eth_conf mehcached_port_conf = {
 52 | 	.rxmode = {
 53 |         .max_rx_pkt_len = ETHER_MAX_LEN,
 54 | 		.split_hdr_size = 0,
 55 | 		.header_split   = 0, /**< Header Split disabled */
 56 | 		.hw_ip_checksum = 0, /**< IP checksum offload disabled */
 57 | 		.hw_vlan_filter = 0, /**< VLAN filtering disabled */
 58 | 		.jumbo_frame    = 0, /**< Jumbo Frame Support disabled */
 59 | 		.hw_strip_crc   = 0, /**< CRC stripped by hardware */
 60 | 		.mq_mode = ETH_MQ_RX_NONE,
 61 | 	},
 62 | 	.txmode = {
 63 | 		.mq_mode = ETH_MQ_TX_NONE,
 64 | 	},
 65 | 	.fdir_conf = {
 66 | 		//.mode =             RTE_FDIR_MODE_NONE,
 67 | 		.mode =             RTE_FDIR_MODE_PERFECT,
 68 | 		.pballoc =          RTE_FDIR_PBALLOC_64K,
 69 | 		//.pballoc =          RTE_FDIR_PBALLOC_256K,
 70 | #ifndef NDEBUG
 71 | 		.status =           RTE_FDIR_NO_REPORT_STATUS,
 72 | #else
 73 | 		.status =           RTE_FDIR_REPORT_STATUS_ALWAYS,
 74 | #endif
 75 | 		.flexbytes_offset = 0,
 76 | 		.drop_queue =       0,
 77 | 	},
 78 | };
 79 | 
 80 | static const struct rte_eth_rxconf mehcached_rx_conf = {
 81 | 	.rx_thresh = {
 82 | 		.pthresh = MEHCACHED_RX_PTHRESH,
 83 | 		.hthresh = MEHCACHED_RX_HTHRESH,
 84 | 		.wthresh = MEHCACHED_RX_WTHRESH,
 85 | 	},
 86 | 	.rx_free_thresh = 32,	// for DPDK >= 1.3
 87 | 	.rx_drop_en = 0,		// (does not seem to be used)
 88 | };
 89 | 
 90 | static const struct rte_eth_txconf mehcached_tx_conf = {
 91 | 	.tx_thresh = {
 92 | 		.pthresh = MEHCACHED_TX_PTHRESH,
 93 | 		.hthresh = MEHCACHED_TX_HTHRESH,
 94 | 		.wthresh = MEHCACHED_TX_WTHRESH,
 95 | 	},
 96 | 	.tx_free_thresh = 0, /* Use PMD default values */
 97 | 	.tx_rs_thresh = 0, /* Use PMD default values */
 98 | #ifndef MEHCACHED_USE_SOFT_FDIR
 99 |     .txq_flags = (ETH_TXQ_FLAGS_NOMULTSEGS | ETH_TXQ_FLAGS_NOREFCOUNT | ETH_TXQ_FLAGS_NOMULTMEMP | ETH_TXQ_FLAGS_NOOFFLOADS),
100 | #else
101 |     .txq_flags = (ETH_TXQ_FLAGS_NOMULTSEGS | ETH_TXQ_FLAGS_NOREFCOUNT | ETH_TXQ_FLAGS_NOOFFLOADS),
102 | #endif
103 | };
104 | 
105 | 
106 | struct mehcached_queue_state {
107 | 	struct rte_mbuf *rx_mbufs[MEHCACHED_MAX_PKT_BURST];
108 | 	uint16_t rx_length;
109 | 	uint16_t rx_next_to_use;
110 | 
111 | #ifdef MEHCACHED_USE_QUICK_SLEEP
112 | 	uint16_t rx_quick_sleep;
113 | 	uint16_t rx_full_quick_sleep_count;
114 | #endif
115 | #ifdef MEHCACHED_USE_DEEP_SLEEP
116 | 	uint64_t rx_last_seen;
117 | 	uint64_t rx_deep_sleep_until;
118 | 	uint64_t rx_inter_batch_time;
119 | #endif
120 | 
121 | 	struct rte_mbuf *tx_mbufs[MEHCACHED_MAX_PKT_BURST];
122 | 	uint16_t tx_length;
123 | 
124 | 	uint64_t num_rx_burst;
125 | 	uint64_t num_rx_received;
126 | 
127 | 	uint64_t num_tx_burst;
128 | 	uint64_t num_tx_sent;
129 | 	uint64_t num_tx_dropped;
130 | } __rte_cache_aligned;
131 | 
132 | static struct rte_mempool *mehcached_pktmbuf_pool[MEHCACHED_MAX_NUMA_NODES];
133 | 
134 | //static uint16_t mehcached_lcore_to_queue[MEHCACHED_MAX_LCORES];
135 | //static struct ether_addr mehcached_eth_addr[MEHCACHED_MAX_PORTS];
136 | 
137 | static struct mehcached_queue_state *mehcached_queue_states[MEHCACHED_MAX_QUEUES * MEHCACHED_MAX_PORTS];
138 | 
139 | struct rte_mbuf *
140 | mehcached_packet_alloc()
141 | {
142 | 	return rte_pktmbuf_alloc(mehcached_pktmbuf_pool[rte_socket_id()]);
143 | }
144 | 
145 | void
146 | mehcached_packet_free(struct rte_mbuf *mbuf)
147 | {
148 | 	rte_pktmbuf_free(mbuf);
149 | }
150 | 
151 | struct rte_mbuf *
152 | mehcached_receive_packet(uint8_t port_id)
153 | {
154 | 	uint32_t lcore = rte_lcore_id();
155 | 	// uint16_t queue = mehcached_lcore_to_queue[lcore];
156 | 	// assert(queue != (uint16_t)-1);
157 | 	uint16_t queue = (uint16_t)lcore;
158 | 	struct mehcached_queue_state *state = mehcached_queue_states[queue * MEHCACHED_MAX_PORTS + port_id];
159 | 
160 | 	if (state->rx_next_to_use == state->rx_length)
161 | 	{
162 | #ifdef MEHCACHED_USE_QUICK_SLEEP
163 | 		if (state->rx_quick_sleep > 0)
164 | 		{
165 | 			// struct rte_mbuf *t = mehcached_packet_alloc();
166 | 			// if (t == NULL)
167 | 			// 	printf("cannot alloc mbuf\n");
168 | 			// mehcached_packet_free(t);
169 | 			state->rx_quick_sleep--;
170 | 			return NULL;
171 | 		}
172 | #endif
173 | 
174 | #ifdef MEHCACHED_USE_DEEP_SLEEP
175 | 		uint64_t now = mehcached_stopwatch_now();
176 | 
177 | 		// too small value makes deep sleep ineffective
178 | 		// too large value may incorrectly penalize a queue with occasional underflows
179 | 		const uint64_t max_deep_sleep_time = mehcached_stopwatch_1_usec * 50;
180 | 
181 | 		// still need to sleep?
182 | 		if (state->rx_deep_sleep_until - now <= max_deep_sleep_time)
183 | 		{
184 | 			// assumed invariant: rx_deep_sleep_until <= now + max_deep_sleep_time
185 | 			//   (when no overflow happens)
186 | 			// the condition in the if statement checks the sleep time correctly under this invariant
187 | 			return NULL;
188 | 		}
189 | #endif
190 | 
191 | 		state->rx_length = rte_eth_rx_burst(port_id, queue, state->rx_mbufs, MEHCACHED_MAX_PKT_BURST);
192 | 		state->num_rx_received += state->rx_length;
193 | 		state->rx_next_to_use = 0;
194 | 		state->num_rx_burst++;
195 | 
196 | #ifdef MEHCACHED_USE_QUICK_SLEEP
197 | 		// sleep if no enough RX packets were received
198 | 		// this helps reduce PCIe traffic when # of RX packets is imbalanced across queues used by the same core
199 | 		state->rx_quick_sleep = (uint16_t)(MEHCACHED_MAX_PKT_BURST - state->rx_length);
200 | 		if (state->rx_length != 0)
201 | 			state->rx_full_quick_sleep_count = 0;
202 | 		else
203 | 		{
204 | 			if (state->rx_full_quick_sleep_count < 1024)
205 | 				state->rx_full_quick_sleep_count++;
206 | 			state->rx_quick_sleep = (uint16_t)(state->rx_quick_sleep * state->rx_full_quick_sleep_count);
207 | 		}
208 | 
209 | #endif
210 | 
211 | #ifdef MEHCACHED_USE_DEEP_SLEEP
212 | 		uint64_t to_sleep;
213 | 		uint64_t inter_batch_time;
214 | 
215 | 		// adjust sleep time so that the next rx_burst can get MEHCACHED_MAX_PKT_BURST packets
216 | 		// note (state->rx_length + 1): this makes inter_batch_time slightly smaller than actual expectation
217 | 		// because we do not know whether there are additional subsequent batches
218 | 		inter_batch_time = (now - state->rx_last_seen) * MEHCACHED_MAX_PKT_BURST / (state->rx_length + 1);
219 | 		if (inter_batch_time > max_deep_sleep_time)
220 | 			inter_batch_time = max_deep_sleep_time;
221 | 		state->rx_last_seen = now;
222 | 
223 | 		state->rx_inter_batch_time = (state->rx_inter_batch_time * 7 + inter_batch_time * 1) / 8;
224 | 
225 | 		// deep sleep to prevent excessive PCIe traffic when RX across cores is imbalanced
226 | 		state->rx_deep_sleep_until = now + state->rx_inter_batch_time;
227 | 
228 | 		// for debugging batch size
229 | 		// if ((state->num_rx_burst & 0xffffUL) == 0)
230 | 		// {
231 | 		// 	printf("port = %zu, queue = %zu; average_batch size = %lf, inter batch time = %lf us\n", port, queue, (double)state->num_rx_received / (double)state->num_rx_burst, (double)state->rx_inter_batch_time / (double)mehcached_stopwatch_1_usec);
232 | 		// 	state->num_rx_received = 0;
233 | 		// 	state->num_rx_burst = 0;
234 | 		// }
235 | #endif
236 | 	}
237 | 
238 | 	if (state->rx_next_to_use < state->rx_length)
239 |     {
240 | #ifndef NDEBUG
241 |         //printf("mehcached_receive_packet: lcore=%zu, port=%zu, queue=%zu\n", lcore, port, queue);
242 | #endif
243 | 		return state->rx_mbufs[state->rx_next_to_use++];
244 |     }
245 | 	else
246 | 		return NULL;
247 | }
248 | 
249 | void
250 | mehcached_receive_packets(uint8_t port_id, struct rte_mbuf **mbufs, size_t *in_out_num_mbufs)
251 | {
252 | 	uint32_t lcore = rte_lcore_id();
253 | 	// uint16_t queue = mehcached_lcore_to_queue[lcore];
254 | 	// assert(queue != (uint16_t)-1);
255 | 	uint16_t queue = (uint16_t)lcore;
256 | 	struct mehcached_queue_state *state = mehcached_queue_states[queue * MEHCACHED_MAX_PORTS + port_id];
257 | 
258 | 	*in_out_num_mbufs = (size_t)rte_eth_rx_burst(port_id, queue, mbufs, (uint16_t)*in_out_num_mbufs);
259 | 	state->num_rx_received += *in_out_num_mbufs;
260 | 	state->num_rx_burst++;
261 | }
262 | 
263 | void
264 | mehcached_send_packet(uint8_t port_id, struct rte_mbuf *mbuf)
265 | {
266 | 	uint32_t lcore = rte_lcore_id();
267 | 	// uint16_t queue = mehcached_lcore_to_queue[lcore];
268 | 	// assert(queue != (uint16_t)-1);
269 | 	uint16_t queue = (uint16_t)lcore;
270 | 	struct mehcached_queue_state *state = mehcached_queue_states[queue * MEHCACHED_MAX_PORTS + port_id];
271 | 
272 | #ifndef NDEBUG
273 |     //printf("mehcached_send_packet: lcore=%zu, port=%zu, queue=%zu\n", lcore, port, queue);
274 | #endif
275 | 
276 | 	state->tx_mbufs[state->tx_length++] = mbuf;
277 | 	if (state->tx_length == MEHCACHED_MAX_PKT_BURST)
278 | 	{
279 | 		uint16_t count = rte_eth_tx_burst(port_id, queue, state->tx_mbufs, MEHCACHED_MAX_PKT_BURST);
280 | 		state->num_tx_sent += count;
281 | 		state->num_tx_dropped += (uint64_t)(MEHCACHED_MAX_PKT_BURST - count);
282 | 		for (; count < MEHCACHED_MAX_PKT_BURST; count++)
283 | 			rte_pktmbuf_free(state->tx_mbufs[count]);
284 | 		state->tx_length = 0;
285 | 		state->num_tx_burst++;
286 | 	}
287 | }
288 | 
289 | void
290 | mehcached_send_packet_flush(uint8_t port_id)
291 | {
292 | 	uint32_t lcore = rte_lcore_id();
293 | 	// uint16_t queue = mehcached_lcore_to_queue[lcore];
294 | 	// assert(queue != (uint16_t)-1);
295 | 	uint16_t queue = (uint16_t)lcore;
296 | 	struct mehcached_queue_state *state = mehcached_queue_states[queue * MEHCACHED_MAX_PORTS + port_id];
297 | 
298 | 	if (state->tx_length > 0)
299 | 	{
300 | 		uint16_t count = rte_eth_tx_burst(port_id, queue, state->tx_mbufs, state->tx_length);
301 | 		state->num_tx_sent += count;
302 | 		state->num_tx_dropped += (uint64_t)(state->tx_length - count);
303 | 		for (; count < state->tx_length; count++)
304 | 			rte_pktmbuf_free(state->tx_mbufs[count]);
305 | 		state->tx_length = 0;
306 | 		state->num_tx_burst++;
307 | 	}
308 | }
309 | 
310 | void
311 | mehcached_get_stats(uint8_t port_id, uint64_t *out_num_rx_burst, uint64_t *out_num_rx_received, uint64_t *out_num_tx_burst, uint64_t *out_num_tx_sent, uint64_t *out_num_tx_dropped)
312 | {
313 | 	mehcached_get_stats_lcore(port_id, rte_lcore_id(), out_num_rx_burst, out_num_rx_received, out_num_tx_burst, out_num_tx_sent, out_num_tx_dropped);
314 | }
315 | 
316 | void
317 | mehcached_get_stats_lcore(uint8_t port_id, uint32_t lcore, uint64_t *out_num_rx_burst, uint64_t *out_num_rx_received, uint64_t *out_num_tx_burst, uint64_t *out_num_tx_sent, uint64_t *out_num_tx_dropped)
318 | {
319 | 	// uint16_t queue = mehcached_lcore_to_queue[lcore];
320 | 	// assert(queue != (uint16_t)-1);
321 | 	uint16_t queue = (uint16_t)lcore;
322 | 	struct mehcached_queue_state *state = mehcached_queue_states[queue * MEHCACHED_MAX_PORTS + port_id];
323 | 
324 | 	if (out_num_rx_burst)
325 | 		*out_num_rx_burst = state->num_rx_burst;
326 | 	if (out_num_rx_received)
327 | 		*out_num_rx_received = state->num_rx_received;
328 | 	if (out_num_tx_burst)
329 | 		*out_num_tx_burst = state->num_tx_burst;
330 | 	if (out_num_tx_sent)
331 | 		*out_num_tx_sent = state->num_tx_sent;
332 | 	if (out_num_tx_dropped)
333 | 		*out_num_tx_dropped = state->num_tx_dropped;
334 | 
335 |     //struct rte_eth_stats stats;
336 |     //rte_eth_stats_get(port, &stats);
337 |     //printf("port %zu i %lu o %lu ie %lu oe %lu\n", port, stats.ipackets, stats.opackets, stats.ierrors, stats.oerrors);
338 | }
339 | 
340 | struct rte_mbuf *
341 | mehcached_clone_packet(struct rte_mbuf *mbuf_src)
342 | {
343 | 	return rte_pktmbuf_clone(mbuf_src, mehcached_pktmbuf_pool[rte_socket_id()]);
344 | }
345 | 
346 | bool
347 | mehcached_init_network(uint64_t cpu_mask, uint64_t port_mask, uint8_t *out_num_ports)
348 | {
349 | 	int ret;
350 | 	size_t i;
351 | 
352 | 	size_t num_numa_nodes = 0;
353 | 	uint16_t num_queues = 0;
354 | 
355 | 	assert(rte_lcore_count() <= MEHCACHED_MAX_LCORES);
356 | 
357 | 	// count required queues
358 | 	for (i = 0; i < rte_lcore_count(); i++)
359 | 	{
360 | 		if ((cpu_mask & ((uint64_t)1 << i)) != 0)
361 | 			num_queues++;
362 | 	}
363 | 	assert(num_numa_nodes <= MEHCACHED_MAX_QUEUES);
364 | 
365 | 	// count numa nodes
366 | 	for (i = 0; i < rte_lcore_count(); i++)
367 | 	{
368 | 		uint32_t socket_id = (uint32_t)rte_lcore_to_socket_id((unsigned int)i);
369 | 		if (num_numa_nodes <= socket_id)
370 | 			num_numa_nodes = socket_id + 1;
371 | 	}
372 | 	assert(num_numa_nodes <= MEHCACHED_MAX_NUMA_NODES);
373 | 
374 | 	// initialize pktmbuf
375 | 	for (i = 0; i < num_numa_nodes; i++)
376 | 	{
377 | 		printf("allocating pktmbuf on node %zu... \n", i);
378 | 		char pool_name[64];
379 | 		snprintf(pool_name, sizeof(pool_name), "pktmbuf_pool%zu", i);
380 | 		// if this is not big enough, RX/TX performance may not be consistent, e.g., between CREW and CRCW experiments
381 | 		// the maximum cache size can be adjusted in DPDK's .config file: CONFIG_RTE_MEMPOOL_CACHE_MAX_SIZE
382 | 		const unsigned int cache_size = MEHCACHED_MAX_PORTS * 1024;
383 | 		mehcached_pktmbuf_pool[i] = rte_mempool_create(pool_name, MEHCACHED_MBUF_SIZE, MEHCACHED_MBUF_ENTRY_SIZE, cache_size, sizeof(struct rte_pktmbuf_pool_private), rte_pktmbuf_pool_init, NULL, rte_pktmbuf_init, NULL, (int)i, 0);
384 | 		if (mehcached_pktmbuf_pool[i] == NULL)
385 | 		{
386 | 			fprintf(stderr, "failed to allocate mbuf for numa node %zu\n", i);
387 | 			return false;
388 | 		}
389 | 	}
390 | 
391 | 	// initialize driver
392 | #ifdef RTE_LIBRTE_IXGBE_PMD
393 | 	printf("initializing PMD\n");
394 | 	if (rte_ixgbe_pmd_init() < 0)
395 | 	{
396 | 		fprintf(stderr, "failed to initialize ixgbe pmd\n");
397 | 		return false;
398 | 	}
399 | #endif
400 | 
401 | 	printf("probing PCI\n");
402 | 	if (rte_eal_pci_probe() < 0)
403 | 	{
404 | 		fprintf(stderr, "failed to probe PCI\n");
405 | 		return false;
406 | 	}
407 | 
408 | 	// TODO: initialize and set up timer for forced TX
409 | 
410 | 	// check port and queue limits
411 | 	uint8_t num_ports = rte_eth_dev_count();
412 | 	assert(num_ports <= MEHCACHED_MAX_PORTS);
413 | 	*out_num_ports = num_ports;
414 | 
415 | 	printf("checking queue limits\n");
416 | 	uint8_t port_id;
417 | 	for (port_id = 0; port_id < num_ports; port_id++)
418 | 	{
419 | 		if ((port_mask & ((uint64_t)1 << port_id)) == 0)
420 | 			continue;
421 | 
422 | 		struct rte_eth_dev_info dev_info;
423 | 		rte_eth_dev_info_get((uint8_t)port_id, &dev_info);
424 | 
425 | 		if (num_queues > dev_info.max_tx_queues || num_queues > dev_info.max_rx_queues)
426 | 		{
427 | 			fprintf(stderr, "device supports too few queues\n");
428 | 			return false;
429 | 		}
430 | 	}
431 | 
432 | 	// map queues to lcores
433 | 	uint32_t lcore = 0;
434 | 	// uint16_t queue = 0;
435 | // 	for (lcore = 0; lcore < rte_lcore_count(); lcore++)
436 | // 	{
437 | // 		if ((cpu_mask & ((uint64_t)1 << i)) == 0)
438 | // 		{
439 | // 			mehcached_lcore_to_queue[lcore] = (uint16_t)-1;
440 | // 			continue;
441 | // 		}
442 | 
443 | // 		mehcached_lcore_to_queue[lcore] = queue;
444 | // #ifndef NDEBUG
445 | // 		printf("queue %hhu mapped to lcore %hu\n", queue, lcore);
446 | // #endif
447 | // 		queue++;
448 | // 	}
449 | 
450 | 	// initialize ports
451 | 	for (port_id = 0; port_id < num_ports; port_id++)
452 | 	{
453 | 		if ((port_mask & ((uint64_t)1 << port_id)) == 0)
454 | 			continue;
455 | 
456 | 		printf("initializing port %hhu...\n", port_id);
457 | 
458 | 		// get mac address
459 | 		//rte_eth_macaddr_get((uint8_t)port, &mehcached_eth_addr[port]);
460 | 
461 | 		ret = rte_eth_dev_configure(port_id, num_queues, num_queues, &mehcached_port_conf);
462 | 		if (ret < 0)
463 | 		{
464 | 			fprintf(stderr, "failed to configure port %hhu (err=%d)\n", port_id, ret);
465 | 			return false;
466 | 		}
467 | 
468 | 		uint32_t lcore;
469 | 		for (lcore = 0; lcore < rte_lcore_count(); lcore++)
470 | 		{
471 | 			// uint16_t queue = mehcached_lcore_to_queue[lcore];
472 | 			// if (queue == (uint16_t)-1)
473 | 			// 	continue;
474 | 			uint16_t queue = (uint16_t)lcore;
475 | 
476 | 			size_t numa_node = rte_lcore_to_socket_id((unsigned int)lcore);
477 | 
478 | 			ret = rte_eth_rx_queue_setup(port_id, queue, (unsigned int)mehcached_num_rx_desc, (unsigned int)numa_node, &mehcached_rx_conf, mehcached_pktmbuf_pool[numa_node]);
479 | 			if (ret < 0)
480 | 			{
481 | 				fprintf(stderr, "failed to configure port %hhu rx_queue %hu (err=%d)\n", port_id, queue, ret);
482 | 				return false;
483 | 			}
484 | 
485 | 			ret = rte_eth_tx_queue_setup(port_id, queue, (unsigned int)mehcached_num_tx_desc, (unsigned int)numa_node, &mehcached_tx_conf);
486 | 			if (ret < 0)
487 | 			{
488 | 				fprintf(stderr, "failed to configure port %hhu tx_queue %hu (err=%d)\n", port_id, queue, ret);
489 | 				return false;
490 | 			}
491 | 		}
492 | 
493 | 		// start device
494 | 		ret = rte_eth_dev_start(port_id);
495 | 		if (ret < 0)
496 | 		{
497 | 			fprintf(stderr, "failed to start port %hhu (err=%d)\n", port_id, ret);
498 | 			return false;
499 | 		}
500 | 
501 | // 		// turn on promiscuous mode
502 | // #ifndef NDEBUG
503 | // 		printf("setting promiscuous mode on port %hhu...\n", port_id);
504 | // #endif
505 | // 		rte_eth_promiscuous_enable(port_id);
506 | 	}
507 | 
508 | 	// the following takes some time, but this ensures the device ready for full speed RX/TX when the initialization is done
509 | 	// without this, the initial packet transmission may be blocked
510 | 	for (port_id = 0; port_id < num_ports; port_id++)
511 | 	{
512 | 		if ((port_mask & ((uint64_t)1 << port_id)) == 0)
513 | 			continue;
514 | 
515 | 		printf("querying port %hhu... ", port_id);
516 | 		fflush(stdout);
517 | 
518 | 		struct rte_eth_link link;
519 | 		rte_eth_link_get(port_id, &link);
520 | 		if (!link.link_status)
521 | 		{
522 | 			printf("link down\n");
523 | 			return false;
524 | 		}
525 | 
526 | 		printf("%hu Gbps (%s)\n", link.link_speed / 1000, (link.link_duplex == ETH_LINK_FULL_DUPLEX) ? ("full-duplex") : ("half-duplex"));
527 | 	}
528 | 
529 | 	memset(mehcached_queue_states, 0, sizeof(mehcached_queue_states));
530 | 	for (port_id = 0; port_id < num_ports; port_id++)
531 | 		for (lcore = 0; lcore < rte_lcore_count(); lcore++)
532 | 		{
533 | 			uint16_t queue = (uint16_t)lcore;
534 | 			mehcached_queue_states[queue * MEHCACHED_MAX_PORTS + port_id] = mehcached_eal_malloc_lcore(sizeof(struct mehcached_queue_state), lcore);
535 | 			memset(mehcached_queue_states[queue * MEHCACHED_MAX_PORTS + port_id], 0, sizeof(struct mehcached_queue_state));
536 | 		}
537 | 
538 | 	return true;
539 | }
540 | 
541 | void
542 | mehcached_free_network(uint64_t port_mask)
543 | {
544 | 	uint8_t port_id;
545 | 	uint8_t num_ports = rte_eth_dev_count();
546 | 	
547 | 	for (port_id = 0; port_id < num_ports; port_id++)
548 | 	{
549 | 		if ((port_mask & ((uint64_t)1 << port_id)) == 0)
550 | 			continue;
551 | 
552 | 		printf("stopping port %hhu...\n", port_id);
553 | 		rte_eth_dev_stop(port_id);
554 | 	}
555 | 
556 | 	for (port_id = 0; port_id < num_ports; port_id++)
557 | 	{
558 | 		if ((port_mask & ((uint64_t)1 << port_id)) == 0)
559 | 			continue;
560 | 
561 | 		printf("closing port %hhu...\n", port_id);
562 | 		rte_eth_dev_close(port_id);
563 | 	}
564 | }
565 | 
566 | bool
567 | mehcached_set_dst_port_mask(uint8_t port_id, uint16_t l4_dst_port_mask)
568 | {
569 | 	struct rte_fdir_masks mask;
570 | 	memset(&mask, 0, sizeof(mask));
571 | 	mask.dst_port_mask = l4_dst_port_mask;	// this must be little-endian (host)
572 | 
573 | 	int ret = rte_eth_dev_fdir_set_masks(port_id, &mask);
574 | 	if (ret < 0)
575 | 	{
576 | 		fprintf(stderr, "failed to set perfect filter mask on port %hhu (err=%d)\n", port_id, ret);
577 | 		return false;
578 | 	}
579 | 
580 | 	return true;
581 | }
582 | 
583 | bool
584 | mehcached_set_dst_port_mapping(uint8_t port_id, uint16_t l4_dst_port, uint32_t lcore)
585 | {
586 | 	// uint16_t queue = mehcached_lcore_to_queue[lcore];
587 | 	// if (queue == (uint16_t)-1)
588 | 	// {
589 | 	// 	fprintf(stderr, "no queue on port %hhu exists for lcore %u\n", port_id, lcore);
590 | 	// 	return false;
591 | 	// }
592 | 	uint16_t queue = (uint16_t)lcore;
593 | 
594 | 	struct rte_fdir_filter filter;
595 | 	memset(&filter, 0, sizeof(filter));
596 | 	filter.iptype = RTE_FDIR_IPTYPE_IPV4;
597 | 	filter.l4type = RTE_FDIR_L4TYPE_UDP;
598 | 	filter.port_dst = rte_cpu_to_be_16((uint16_t)l4_dst_port);    // this must be big-endian
599 |     uint16_t soft_id = (uint16_t)l4_dst_port;	// will be unique on each port (with perfect filter)
600 | 
601 | 	int ret = rte_eth_dev_fdir_add_perfect_filter(port_id, &filter, soft_id, (uint8_t)queue, 0);
602 | 	if (ret < 0)
603 | 	{
604 | 		fprintf(stderr, "failed to add perfect filter entry on port %hhu (err=%d)\n", port_id, ret);
605 | 		return false;
606 | 	}
607 | 
608 | 	return true;
609 | }
610 | 


--------------------------------------------------------------------------------
/src/net_common.h:
--------------------------------------------------------------------------------
 1 | // Copyright 2014 Carnegie Mellon University
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | 
15 | #pragma once
16 | 
17 | #include "common.h"
18 | 
19 | #include <rte_mbuf.h>
20 | 
21 | #define MEHCACHED_MAX_LCORES (16)
22 | #define MEHCACHED_MAX_NUMA_NODES (2)
23 | 
24 | #define MEHCACHED_MAX_PORTS (8)
25 | #define MEHCACHED_MAX_QUEUES (16)
26 | 
27 | struct rte_mbuf *
28 | mehcached_packet_alloc();
29 | 
30 | void
31 | mehcached_packet_free(struct rte_mbuf *mbuf);
32 | 
33 | struct rte_mbuf *
34 | mehcached_receive_packet(uint8_t port_id);
35 | 
36 | void
37 | mehcached_receive_packets(uint8_t port_id, struct rte_mbuf **mbufs, size_t *in_out_num_mbufs);
38 | 
39 | void
40 | mehcached_send_packet(uint8_t port_id, struct rte_mbuf *mbuf);
41 | 
42 | void
43 | mehcached_send_packet_flush(uint8_t port_id);
44 | 
45 | void
46 | mehcached_get_stats(uint8_t port_id, uint64_t *out_num_rx_burst, uint64_t *out_num_rx_received, uint64_t *out_num_tx_burst, uint64_t *out_num_tx_sent, uint64_t *out_num_tx_dropped);
47 | 
48 | void
49 | mehcached_get_stats_lcore(uint8_t port_id, uint32_t lcore, uint64_t *out_num_rx_burst, uint64_t *out_num_rx_received, uint64_t *out_num_tx_burst, uint64_t *out_num_tx_sent, uint64_t *out_num_tx_dropped);
50 | 
51 | struct rte_mbuf *
52 | mehcached_clone_packet(struct rte_mbuf *mbuf_src);
53 | 
54 | bool
55 | mehcached_init_network(uint64_t cpu_mask, uint64_t port_mask, uint8_t *out_num_ports);
56 | 
57 | void
58 | mehcached_free_network(uint64_t port_mask);
59 | 
60 | bool
61 | mehcached_set_dst_port_mask(uint8_t port_id, uint16_t l4_dst_port_mask);
62 | 
63 | bool
64 | mehcached_set_dst_port_mapping(uint8_t port_id, uint16_t l4_dst_port, uint32_t lcore);
65 | 


--------------------------------------------------------------------------------
/src/netbench_analysis.c:
--------------------------------------------------------------------------------
  1 | // Copyright 2014 Carnegie Mellon University
  2 | //
  3 | // Licensed under the Apache License, Version 2.0 (the "License");
  4 | // you may not use this file except in compliance with the License.
  5 | // You may obtain a copy of the License at
  6 | //
  7 | //     http://www.apache.org/licenses/LICENSE-2.0
  8 | //
  9 | // Unless required by applicable law or agreed to in writing, software
 10 | // distributed under the License is distributed on an "AS IS" BASIS,
 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | // See the License for the specific language governing permissions and
 13 | // limitations under the License.
 14 | 
 15 | #include <stdio.h>
 16 | #include <stdlib.h>
 17 | #include <string.h>
 18 | #include <pthread.h>
 19 | #include <assert.h>
 20 | #include <signal.h>
 21 | 
 22 | #include "mehcached.h"
 23 | #include "hash.h"
 24 | #include "zipf.h"
 25 | #include "stopwatch.h"
 26 | #include "netbench_config.h"
 27 | 
 28 | // uncomment this to use CREW instead of EREW
 29 | //#define MEHCACHED_LOAD_BALANCE_USE_CREW_PARTITIONS
 30 | 
 31 | static
 32 | uint64_t
 33 | mehcached_hash_key(uint64_t int_key)
 34 | {
 35 | 	return hash((const uint8_t *)&int_key, 8);
 36 | }
 37 | 
 38 | static
 39 | void
 40 | mehcached_print_array_uint64_t(uint64_t arr[], size_t num_elements)
 41 | {
 42 | 	size_t i;
 43 | 	for (i = 0; i < num_elements; i++)
 44 | 		printf("[%3zu]%lu ", i, arr[i]);
 45 | 	printf("\n");
 46 | }
 47 | 
 48 | static
 49 | void
 50 | mehcached_print_array_normalized(uint64_t arr[], size_t num_elements)
 51 | {
 52 | 	size_t i;
 53 | 	uint64_t max_elem = 0;
 54 | 	uint64_t min_elem = (uint64_t)-1;
 55 | 	uint64_t sum_elem = 0;
 56 | 	for (i = 0; i < num_elements; i++)
 57 | 	{
 58 | 		if (max_elem < arr[i])
 59 | 			max_elem = arr[i];
 60 | 		if (min_elem > arr[i])
 61 | 			min_elem = arr[i];
 62 | 		sum_elem += arr[i];
 63 | 	}
 64 | 	if (max_elem == 0)
 65 | 		max_elem = 1;	// to avoid divide by zero
 66 | 	for (i = 0; i < num_elements; i++)
 67 | 	{
 68 | 		printf("[%3zu]%lf", i, (double)arr[i] / (double)max_elem);
 69 | 		if (i % 8 != 7)
 70 | 			printf(" ");
 71 | 		else if (i != num_elements - 1)
 72 | 			printf("\n");
 73 | 	}
 74 | 	printf("\n");
 75 | 	printf("min = %lf\n", (double)min_elem / (double)max_elem);
 76 | 	printf("avg = %lf\n", (double)sum_elem / (double)max_elem / (double)num_elements);
 77 | }
 78 | 
 79 | static
 80 | void
 81 | mehcached_calc_thread_load(uint64_t out_thread_load[], const uint64_t partition_load[], const uint64_t hot_item_load[], const uint8_t partition_to_thread_org[], const uint8_t hot_item_to_thread_org[], const uint8_t partition_to_thread_new[], const uint8_t hot_item_to_thread_new[], uint8_t num_threads, uint64_t num_partitions, uint64_t num_hot_items, uint8_t num_numa_nodes, double get_ratio, bool isolated_server_numa_nodes)
 82 | {
 83 | 	uint16_t partition_id;
 84 | 	uint8_t thread_id;
 85 | 	uint64_t key;
 86 | 
 87 | #ifndef MEHCACHED_LOAD_BALANCE_USE_CREW_PARTITIONS
 88 | 	(void)partition_to_thread_org;
 89 | #endif
 90 | 
 91 | 	for (thread_id = 0; thread_id < num_threads; thread_id++)
 92 | 		out_thread_load[thread_id] = 0;
 93 | 
 94 | 	for (partition_id = 0; partition_id < num_partitions; partition_id++)
 95 | 	{
 96 | #ifndef MEHCACHED_LOAD_BALANCE_USE_CREW_PARTITIONS
 97 | 		// EREW
 98 | 		thread_id = partition_to_thread_new[partition_id];
 99 | 		out_thread_load[thread_id] += partition_load[partition_id];
100 | #else
101 | 		// CR
102 | 		uint8_t numa_node_id = (uint8_t)(partition_to_thread_org[partition_id] & 1);
103 | 		for (thread_id = 0; thread_id < num_threads; thread_id++)
104 | 		{
105 | 			if (isolated_server_numa_nodes)
106 | 			{
107 | 				if (thread_id % num_numa_nodes != numa_node_id)
108 | 					continue;
109 | 				out_thread_load[thread_id] += (uint64_t)((double)partition_load[partition_id] * get_ratio / (double)(num_threads / num_numa_nodes));
110 | 			}
111 | 			else
112 | 				out_thread_load[thread_id] += (uint64_t)((double)partition_load[partition_id] * get_ratio / (double)num_threads);
113 | 		}
114 | 		// EW
115 | 		thread_id = partition_to_thread_new[partition_id];
116 | 		out_thread_load[thread_id] += (uint64_t)((double)partition_load[partition_id] * (1. - get_ratio));
117 | #endif
118 | 	}
119 | 
120 | 	for (key = 0; key < num_hot_items; key++)
121 | 	{
122 | 		// CR
123 | 		uint8_t numa_node_id = (uint8_t)(hot_item_to_thread_org[key] & 1);
124 | 		for (thread_id = 0; thread_id < num_threads; thread_id++)
125 | 		{
126 | 			if (isolated_server_numa_nodes)
127 | 			{
128 | 				if (thread_id % num_numa_nodes != numa_node_id)
129 | 					continue;
130 | 				out_thread_load[thread_id] += (uint64_t)((double)hot_item_load[key] * get_ratio / (double)(num_threads / num_numa_nodes));
131 | 			}
132 | 			else
133 | 				out_thread_load[thread_id] += (uint64_t)((double)hot_item_load[key] * get_ratio / (double)num_threads);
134 | 		}
135 | 		// EW
136 | 		thread_id = hot_item_to_thread_new[key];
137 | 		out_thread_load[thread_id] += (uint64_t)((double)hot_item_load[key] * (1. - get_ratio));
138 | 	}
139 | }
140 | 
141 | static
142 | void
143 | mehcached_load_balance(const uint64_t partition_load[], const uint64_t hot_item_load[], const uint8_t partition_to_thread_org[], uint8_t out_partition_to_thread_new[], const uint8_t hot_item_to_thread_org[], uint8_t out_hot_item_to_thread_new[], uint8_t num_threads, uint64_t num_partitions, uint64_t num_hot_items, uint8_t num_numa_nodes, double get_ratio, bool isolated_server_numa_nodes)
144 | {
145 | 	uint64_t max_num_entries = num_partitions + num_hot_items;
146 | 	size_t entry_type[max_num_entries];
147 | 	size_t entry_id[max_num_entries];
148 | 	uint64_t entry_load[max_num_entries];
149 | 
150 | 	uint8_t numa_node_id;
151 | 	for (numa_node_id = 0; numa_node_id < num_numa_nodes; numa_node_id++)
152 | 	{
153 | 		uint16_t partition_id;
154 | 		uint64_t key;
155 | 
156 | 		// enumerate all entries to consider
157 | 		uint64_t num_entries = 0;
158 | 		for (partition_id = 0; partition_id < num_partitions; partition_id++)
159 | 		{
160 | 			if (partition_to_thread_org[partition_id] % num_numa_nodes != numa_node_id)
161 | 				continue;
162 | 			entry_type[num_entries] = 0;
163 | 			entry_id[num_entries] = partition_id;
164 | 			entry_load[num_entries] = partition_load[partition_id];
165 | 			num_entries++;
166 | 		}
167 | 
168 | 		for (key = 0; key < num_hot_items; key++)
169 | 		{
170 | 			if (hot_item_to_thread_org[key] % num_numa_nodes != numa_node_id)
171 | 				continue;
172 | 			entry_type[num_entries] = 1;
173 | 			entry_id[num_entries] = key;
174 | 			entry_load[num_entries] = hot_item_load[key];
175 | 			num_entries++;
176 | 		}
177 | 
178 | 
179 | 		uint64_t thread_load[num_threads];
180 | 
181 | 		uint8_t thread_id;
182 | 		for (thread_id = 0; thread_id < num_threads; thread_id++)
183 | 			thread_load[thread_id] = 0;
184 | 
185 | 		uint64_t i;
186 | 		uint64_t j;
187 | 
188 | #ifdef MEHCACHED_LOAD_BALANCE_USE_CREW_PARTITIONS
189 | 		// apply concurrent read load from partitions (CREW)
190 | 		for (i = 0; i < num_entries; i++)
191 | 			if (entry_type[i] == 0)
192 | 			{
193 | 				partition_id = (uint16_t)entry_id[i];
194 | 				for (thread_id = 0; thread_id < num_threads; thread_id++)
195 | 				{
196 | 					if (isolated_server_numa_nodes)
197 | 					{
198 | 						if (thread_id % num_numa_nodes != numa_node_id)
199 | 							continue;
200 | 						else
201 | 							thread_load[thread_id] += (uint64_t)((double)entry_load[i] * get_ratio / (double)(num_threads / num_numa_nodes));
202 | 					}
203 | 					else
204 | 						thread_load[thread_id] += (uint64_t)((double)entry_load[i] * get_ratio / (double)num_threads);
205 | 				}
206 | 				entry_load[i] = (uint64_t)((double)entry_load[i] * (1. - get_ratio));
207 | 			}
208 | #endif
209 | 
210 | 		// apply concurrent read load from hot items (CREW)
211 | 		for (i = 0; i < num_entries; i++)
212 | 			if (entry_type[i] == 1)
213 | 			{
214 | 				key = entry_id[i];
215 | 				for (thread_id = 0; thread_id < num_threads; thread_id++)
216 | 				{
217 | 					if (isolated_server_numa_nodes)
218 | 					{
219 | 						if (thread_id % num_numa_nodes != numa_node_id)
220 | 							continue;
221 | 						else
222 | 							thread_load[thread_id] += (uint64_t)((double)entry_load[i] * get_ratio / (double)(num_threads / num_numa_nodes));
223 | 					}
224 | 					else
225 | 						thread_load[thread_id] += (uint64_t)((double)entry_load[i] * get_ratio / (double)num_threads);
226 | 				}
227 | 				entry_load[i] = (uint64_t)((double)entry_load[i] * (1. - get_ratio));
228 | 			}
229 | 
230 | 		// guarantee non-zero load to spread partitions across cores even when all access can be done by any core
231 | 		for (i = 0; i < num_entries; i++)
232 | 			if (entry_load[i] == 0)
233 | 				entry_load[i] = 1;
234 | 
235 | 		// sort in descending order
236 | 		for (i = 0; i < num_entries; i++)
237 | 			for (j = i + 1; j < num_entries; j++)
238 | 			{
239 | 				if (entry_load[i] < entry_load[j])
240 | 				{
241 | 					size_t t0 = entry_type[i];
242 | 					entry_type[i] = entry_type[j];
243 | 					entry_type[j] = t0;
244 | 					t0 = entry_id[i];
245 | 					entry_id[i] = entry_id[j];
246 | 					entry_id[j] = t0;
247 | 					uint64_t t1 = entry_load[i];
248 | 					entry_load[i] = entry_load[j];
249 | 					entry_load[j] = t1;
250 | 				}
251 | 			}
252 | 
253 | 		// best fit
254 | 		for (i = 0; i < num_entries; i++)
255 | 		{
256 | 			uint8_t min_load_thread_id = numa_node_id;
257 | 			for (thread_id = 0; thread_id < num_threads; thread_id++)
258 | 			{
259 | 				if (thread_id % num_numa_nodes != numa_node_id)	// do not move anything across NUMA nodes
260 | 					continue;
261 | 				if (thread_load[min_load_thread_id] > thread_load[thread_id])
262 | 					min_load_thread_id = thread_id;
263 | 			}
264 | 
265 | 			if (entry_type[i] == 0)
266 | 			{
267 | 				//min_load_thread_id = out_partition_to_thread[entry_id[i]];	// uncomment this when we are doing object remapping only
268 | 				out_partition_to_thread_new[entry_id[i]] = min_load_thread_id;
269 | 			}
270 | 			else
271 | 				out_hot_item_to_thread_new[entry_id[i]] = min_load_thread_id;
272 | 			thread_load[min_load_thread_id] += entry_load[i];
273 | 		}
274 | 	}
275 | }
276 | 
277 | static
278 | void
279 | mehcached_benchmark_analysis(uint64_t num_hot_items, double zipf_theta, double get_ratio, bool isolated_server_numa_nodes)
280 | {
281 | 	printf("num_hot_items = %lu\n", num_hot_items);
282 | 	printf("zipf_theta = %lf\n", zipf_theta);
283 | 	printf("get_ratio = %lf\n", get_ratio);
284 | 	printf("\n");
285 | 
286 | 	uint8_t num_numa_nodes = 2;
287 | 	uint8_t num_threads = 16;
288 | 	//uint16_t num_partitions = 64;
289 | 	uint16_t num_partitions = 16;
290 | 	uint64_t num_items = 192 * 1048576;
291 | 
292 | 	uint64_t partition_load[num_partitions];
293 | 	uint64_t hot_item_load[num_hot_items];
294 | 	memset(partition_load, 0, sizeof(partition_load));
295 | 	memset(hot_item_load, 0, sizeof(hot_item_load));
296 | 
297 | 	uint8_t partition_to_thread_org[num_partitions];
298 | 	uint8_t partition_to_thread_new[num_partitions];
299 | 	uint8_t hot_item_to_thread_org[num_hot_items];
300 | 	uint8_t hot_item_to_thread_new[num_hot_items];
301 | 
302 | 	uint16_t partition_id;
303 | 	uint64_t key;
304 | 	uint64_t key_hash;
305 | 
306 | 	uint64_t i;
307 | 	uint64_t num_samples = 1048576;
308 | 
309 | 	// measure load
310 | 	struct zipf_gen_state zipf_state;
311 | 	mehcached_zipf_init(&zipf_state, num_items, zipf_theta, 0);
312 | 
313 | 	for (i = 0; i < num_samples; i++)
314 | 	{
315 | 		key = mehcached_zipf_next(&zipf_state);
316 | 
317 | 		key_hash = mehcached_hash_key(key);
318 | 	    partition_id = (uint16_t)(key_hash >> 48) & (uint16_t)(num_partitions - 1);
319 | 
320 | 	    if (key < num_hot_items)
321 | 		    hot_item_load[key]++;
322 | 		else
323 | 		    partition_load[partition_id]++;
324 | 	}
325 | 
326 | 	// fix zero load (e.g., for single key workloads)
327 | 	for (partition_id = 0; partition_id < num_partitions; partition_id++)
328 | 		if (partition_load[partition_id] == 0)
329 | 			partition_load[partition_id] = 1;
330 | 	for (key = 0; key < num_hot_items; key++)
331 | 		if (hot_item_load[key] == 0)
332 | 			hot_item_load[key] = 1;
333 | 
334 | 	// initial mapping
335 | 	for (partition_id = 0; partition_id < num_partitions; partition_id++)
336 | 	{
337 | 		partition_to_thread_org[partition_id] = (uint8_t)(partition_id % num_threads);
338 | 		partition_to_thread_new[partition_id] = partition_to_thread_org[partition_id];
339 | 	}
340 | 
341 | 	for (key = 0; key < num_hot_items; key++)
342 | 	{
343 | 		key_hash = mehcached_hash_key(key);
344 | 	    partition_id = (uint16_t)(key_hash >> 48) & (uint16_t)(num_partitions - 1);
345 | 	    hot_item_to_thread_org[key] = partition_to_thread_org[partition_id];
346 | 	    hot_item_to_thread_new[key] = hot_item_to_thread_org[key];
347 | 	}
348 | 
349 | 	uint64_t thread_load[num_threads];
350 | 
351 | 	// printf("partition load\n");
352 | 	// mehcached_print_array_normalized(partition_load, num_partitions);
353 | 	// printf("\n");
354 | 	// printf("hot item load\n");
355 | 	// mehcached_print_array_normalized(hot_item_load, num_hot_items);
356 | 	// printf("\n");
357 | 
358 | 	printf("original thread load\n");
359 | 	mehcached_calc_thread_load(thread_load, partition_load, hot_item_load, partition_to_thread_org, hot_item_to_thread_org, partition_to_thread_new, hot_item_to_thread_new, num_threads, num_partitions, num_hot_items, num_numa_nodes, get_ratio, isolated_server_numa_nodes);
360 | 	mehcached_print_array_normalized(thread_load, num_threads);
361 | 	printf("\n");
362 | 
363 | 	printf("load-balanced thread load\n");
364 | 	mehcached_load_balance(partition_load, hot_item_load, partition_to_thread_org, partition_to_thread_new, hot_item_to_thread_org, hot_item_to_thread_new, num_threads, num_partitions, num_hot_items, num_numa_nodes, get_ratio, isolated_server_numa_nodes);
365 | 	mehcached_calc_thread_load(thread_load, partition_load, hot_item_load, partition_to_thread_org, hot_item_to_thread_org, partition_to_thread_new, hot_item_to_thread_new, num_threads, num_partitions, num_hot_items, num_numa_nodes, get_ratio, isolated_server_numa_nodes);
366 | 	mehcached_print_array_normalized(thread_load, num_threads);
367 | 	printf("\n");
368 | 
369 | 	printf("partition_to_thread: \n");
370 | 	for (partition_id = 0; partition_id < num_partitions; partition_id++)
371 | 	{
372 | 		printf("%hhu", partition_to_thread_new[partition_id]);
373 | 		if (partition_id != num_partitions - 1)
374 | 			printf(",");
375 | 	}
376 | 	printf("\n\n");
377 | 
378 | 	printf("hot_item_to_thread: \n");
379 | 	for (key = 0; key < num_hot_items; key++)
380 | 	{
381 | 		key_hash = mehcached_hash_key(key);
382 | 		printf("(0x%016lx,%hhu)", key_hash, hot_item_to_thread_new[key]);
383 | 		if (key != num_hot_items - 1)
384 | 			printf(",");
385 | 	}
386 | 	printf("\n\n");
387 | }
388 | 
389 | int
390 | main(int argc, const char *argv[])
391 | {
392 | 	if (argc < 5)
393 | 	{
394 | 		printf("%s NUM-HOT-ITEMS ZIPF-THETA GET-RATIO ISOLATED-SERVER-NUMA-NODES\n", argv[0]);
395 | 
396 | 		mehcached_test_zipf(0.);
397 | 		mehcached_test_zipf(0.01);
398 | 		mehcached_test_zipf(0.1);
399 | 		mehcached_test_zipf(0.5);
400 | 		mehcached_test_zipf(0.9);
401 | 		mehcached_test_zipf(0.99);
402 | 		mehcached_test_zipf(0.992);
403 | 		mehcached_test_zipf(0.993);
404 | 		mehcached_test_zipf(0.994);
405 | 		mehcached_test_zipf(0.999);
406 | 		mehcached_test_zipf(1.);
407 | 		mehcached_test_zipf(10.);
408 | 		mehcached_test_zipf(20.);
409 | 		mehcached_test_zipf(30.);
410 | 		mehcached_test_zipf(40.);
411 | 		mehcached_test_zipf(50.);
412 | 		mehcached_test_zipf(100.);
413 | 
414 | 		return EXIT_FAILURE;
415 | 	}
416 | 
417 | 	mehcached_benchmark_analysis((uint64_t)atol(argv[1]), atof(argv[2]), atof(argv[3]), atoi(argv[4]));
418 | 
419 |     return EXIT_SUCCESS;
420 | }
421 | 


--------------------------------------------------------------------------------
/src/netbench_config.c:
--------------------------------------------------------------------------------
  1 | // Copyright 2014 Carnegie Mellon University
  2 | //
  3 | // Licensed under the Apache License, Version 2.0 (the "License");
  4 | // you may not use this file except in compliance with the License.
  5 | // You may obtain a copy of the License at
  6 | //
  7 | //     http://www.apache.org/licenses/LICENSE-2.0
  8 | //
  9 | // Unless required by applicable law or agreed to in writing, software
 10 | // distributed under the License is distributed on an "AS IS" BASIS,
 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | // See the License for the specific language governing permissions and
 13 | // limitations under the License.
 14 | 
 15 | #include "netbench_config.h"
 16 | #include <stdio.h>
 17 | #include <stdlib.h>
 18 | #include <string.h>
 19 | #include <assert.h>
 20 | 
 21 | struct mehcached_server_conf *
 22 | mehcached_get_server_conf(const char *filename, const char *server_name)
 23 | {
 24 | 	FILE *fp = fopen(filename, "r");
 25 | 	if (!fp)
 26 | 	{
 27 | 		fprintf(stderr, "cannot open %s\n", filename);
 28 | 		return NULL;
 29 | 	}
 30 | 
 31 | 	struct mehcached_server_conf *conf = malloc(sizeof(struct mehcached_server_conf));
 32 | 	memset(conf, 0, sizeof(struct mehcached_server_conf));
 33 | 
 34 | 	while (true)
 35 | 	{
 36 | 		char buf[4096];
 37 | 		int ret = fscanf(fp, "server,%[^,\n]\n", buf);
 38 | 		if (ret == EOF)
 39 | 			break;
 40 | 		if (strcmp(buf, server_name) != 0)
 41 | 		{
 42 | 			// skip
 43 | 			while (true)
 44 | 			{
 45 | 				if (fgets(buf, sizeof(buf), fp) == NULL)
 46 | 					break;
 47 | 				if (buf[0] == '\n')
 48 | 					break;
 49 | 			}
 50 | 			continue;
 51 | 		}
 52 | 
 53 | 		while (true)
 54 | 		{
 55 | 			if (fgets(buf, sizeof(buf), fp) == NULL)
 56 | 				break;
 57 | 
 58 | 			{
 59 | 				char ip_addr[4096];
 60 | 				char mac_addr[4096];
 61 | 				ret = sscanf(buf, "server_port,%[^,],%[^,\n]\n", mac_addr, ip_addr);
 62 | 				if (ret == 2)
 63 | 				{
 64 | 					size_t i;
 65 | 					char *p = mac_addr;
 66 | 					for (i = 0; i < 6; i++, p++)
 67 | 						conf->ports[conf->num_ports].mac_addr[i] = (uint8_t)strtoul(p, &p, 16);
 68 | 					p = ip_addr;
 69 | 					for (i = 0; i < 4; i++, p++)
 70 | 						conf->ports[conf->num_ports].ip_addr[i] = (uint8_t)strtoul(p, &p, 10);
 71 | 					conf->num_ports++;
 72 | 					assert(conf->num_ports <= MEHCACHED_MAX_PORTS);
 73 | 					continue;
 74 | 				}
 75 | 				else if (ret != 0)
 76 | 				{
 77 | 					fprintf(stderr, "parse error: %s (in %s)\n", buf, filename);
 78 | 					continue;
 79 | 				}
 80 | 			}
 81 | 			{
 82 | 				char port_ids[4096];
 83 | 				ret = sscanf(buf, "server_thread,%[^,\n]\n", port_ids);
 84 | 				if (ret == 1)
 85 | 				{
 86 | 					char *p = port_ids;
 87 | 					while (*p != 0)
 88 | 					{
 89 | 						conf->threads[conf->num_threads].port_ids[conf->threads[conf->num_threads].num_ports] = (uint8_t)strtoul(p, &p, 10);
 90 | 						conf->threads[conf->num_threads].num_ports++;
 91 | 						assert(conf->threads[conf->num_threads].num_ports <= MEHCACHED_MAX_PORTS);
 92 | 						if (*p != 0) p++;
 93 | 					}
 94 | 					conf->num_threads++;
 95 | 					assert(conf->num_threads <= MEHCACHED_MAX_THREADS);
 96 | 					continue;
 97 | 				}
 98 | 				else if (ret != 0)
 99 | 				{
100 | 					fprintf(stderr, "parse error: %s (in %s)\n", buf, filename);
101 | 					continue;
102 | 				}
103 | 			}
104 | 			{
105 | 				uint64_t num_items;
106 | 				uint64_t alloc_size;
107 | 				uint8_t concurrent_table_read;
108 | 				uint8_t concurrent_table_write;
109 | 				uint8_t concurrent_alloc_write;
110 | 				uint8_t thread_id;
111 | 				double mth_threshold;
112 | 				ret = sscanf(buf, "server_partition,%lu,%lu,%hhu,%hhu,%hhu,%hhu,%lf\n", &num_items, &alloc_size, &concurrent_table_read, &concurrent_table_write, &concurrent_alloc_write, &thread_id, &mth_threshold);
113 | 				if (ret == 7)
114 | 				{
115 | 					conf->partitions[conf->num_partitions].num_items = num_items;
116 | 					conf->partitions[conf->num_partitions].alloc_size = alloc_size;
117 | 					conf->partitions[conf->num_partitions].concurrent_table_read = concurrent_table_read;
118 | 					conf->partitions[conf->num_partitions].concurrent_table_write = concurrent_table_write;
119 | 					conf->partitions[conf->num_partitions].concurrent_alloc_write = concurrent_alloc_write;
120 | 					conf->partitions[conf->num_partitions].thread_id = thread_id;
121 | 					conf->partitions[conf->num_partitions].mth_threshold = mth_threshold;
122 | 					conf->num_partitions++;
123 | 					assert(conf->num_partitions <= MEHCACHED_MAX_PARTITIONS);
124 | 					continue;
125 | 				}
126 | 				else if (ret != 0)
127 | 				{
128 | 					fprintf(stderr, "parse error: %s (in %s)\n", buf, filename);
129 | 					continue;
130 | 				}
131 | 			}
132 | 			{
133 | 				uint64_t key_hash;
134 | 				uint8_t thread_id;
135 | 				ret = sscanf(buf, "server_hot_item,%lx,%hhu\n", &key_hash, &thread_id);
136 | 				if (ret == 2)
137 | 				{
138 | 					conf->hot_items[conf->num_hot_items].key_hash = key_hash;
139 | 					conf->hot_items[conf->num_hot_items].thread_id = thread_id;
140 | 					conf->num_hot_items++;
141 | 					assert(conf->num_hot_items <= MEHCACHED_MAX_HOT_ITEMS);
142 | 					continue;
143 | 				}
144 | 				else if (ret != 0)
145 | 				{
146 | 					fprintf(stderr, "parse error: %s (in %s)\n", buf, filename);
147 | 					continue;
148 | 				}
149 | 			}
150 | 			if (buf[0] == '\n')
151 | 				break;
152 | 			fprintf(stderr, "parse error: %s (in %s)\n", buf, filename);
153 | 		}
154 | 	}
155 | 
156 | 	fclose(fp);
157 | 	return conf;
158 | }
159 | 
160 | struct mehcached_client_conf *
161 | mehcached_get_client_conf(const char *filename, const char *client_name)
162 | {
163 | 	FILE *fp = fopen(filename, "r");
164 | 	if (!fp)
165 | 	{
166 | 		fprintf(stderr, "cannot open %s\n", filename);
167 | 		return NULL;
168 | 	}
169 | 
170 | 	struct mehcached_client_conf *conf = malloc(sizeof(struct mehcached_client_conf));
171 | 	memset(conf, 0, sizeof(struct mehcached_client_conf));
172 | 
173 | 	while (true)
174 | 	{
175 | 		char buf[4096];
176 | 		int ret = fscanf(fp, "client,%[^,\n]\n", buf);
177 | 		if (ret == EOF)
178 | 			break;
179 | 		if (strcmp(buf, client_name) != 0)
180 | 		{
181 | 			// skip
182 | 			while (true)
183 | 			{
184 | 				if (fgets(buf, sizeof(buf), fp) == NULL)
185 | 					break;
186 | 				if (buf[0] == '\n')
187 | 					break;
188 | 			}
189 | 			continue;
190 | 		}
191 | 
192 | 		while (true)
193 | 		{
194 | 			if (fgets(buf, sizeof(buf), fp) == NULL)
195 | 				break;
196 | 
197 | 			{
198 | 				char ip_addr[4096];
199 | 				char mac_addr[4096];
200 | 				ret = sscanf(buf, "client_port,%[^,],%[^,\n]\n", mac_addr, ip_addr);
201 | 				if (ret == 2)
202 | 				{
203 | 					size_t i;
204 | 					char *p = mac_addr;
205 | 					for (i = 0; i < 6; i++, p++)
206 | 						conf->ports[conf->num_ports].mac_addr[i] = (uint8_t)strtoul(p, &p, 16);
207 | 					p = ip_addr;
208 | 					for (i = 0; i < 4; i++, p++)
209 | 						conf->ports[conf->num_ports].ip_addr[i] = (uint8_t)strtoul(p, &p, 10);
210 | 					conf->num_ports++;
211 | 					assert(conf->num_ports <= MEHCACHED_MAX_PORTS);
212 | 					continue;
213 | 				}
214 | 				else if (ret != 0)
215 | 				{
216 | 					fprintf(stderr, "parse error: %s (in %s)\n", buf, filename);
217 | 					continue;
218 | 				}
219 | 			}
220 | 			{
221 | 				if (strcmp(buf, "client_thread,\n") == 0)
222 | 				{
223 | 					conf->num_threads++;
224 | 					assert(conf->num_threads <= MEHCACHED_MAX_THREADS);
225 | 					continue;
226 | 				}
227 | 			}
228 | 			if (buf[0] == '\n')
229 | 				break;
230 | 			fprintf(stderr, "parse error: %s (in %s)\n", buf, filename);
231 | 		}
232 | 	}
233 | 
234 | 	fclose(fp);
235 | 	return conf;
236 | }
237 | 
238 | struct mehcached_prepopulation_conf *
239 | mehcached_get_prepopulation_conf(const char *filename, const char *server_name)
240 | {
241 | 	FILE *fp = fopen(filename, "r");
242 | 	if (!fp)
243 | 	{
244 | 		fprintf(stderr, "cannot open %s\n", filename);
245 | 		return NULL;
246 | 	}
247 | 
248 | 	struct mehcached_prepopulation_conf *conf = malloc(sizeof(struct mehcached_prepopulation_conf));
249 | 	memset(conf, 0, sizeof(struct mehcached_prepopulation_conf));
250 | 
251 | 	while (true)
252 | 	{
253 | 		char buf[4096];
254 | 		int ret = fscanf(fp, "prepopulation,%[^,\n]\n", buf);
255 | 		if (ret == EOF)
256 | 			break;
257 | 		if (strcmp(buf, server_name) != 0)
258 | 		{
259 | 			// skip
260 | 			while (true)
261 | 			{
262 | 				if (fgets(buf, sizeof(buf), fp) == NULL)
263 | 					break;
264 | 				if (buf[0] == '\n')
265 | 					break;
266 | 			}
267 | 			continue;
268 | 		}
269 | 
270 | 		while (true)
271 | 		{
272 | 			if (fgets(buf, sizeof(buf), fp) == NULL)
273 | 				break;
274 | 
275 | 			{
276 | 				uint64_t num_items;
277 | 				size_t key_length;
278 | 				size_t value_length;
279 | 				int ret = sscanf(buf, "dataset,%lu,%zu,%zu\n", &num_items, &key_length, &value_length);
280 | 				if (ret == 3)
281 | 				{
282 | 					conf->num_items = num_items;
283 | 					conf->key_length = key_length;
284 | 					conf->value_length = value_length;
285 | 					continue;
286 | 				}
287 | 				else if (ret != 0)
288 | 				{
289 | 					fprintf(stderr, "parse error: %s (in %s)\n", buf, filename);
290 | 					continue;
291 | 				}
292 | 			}
293 | 
294 | 			if (buf[0] == '\n')
295 | 				break;
296 | 			fprintf(stderr, "parse error: %s (in %s)\n", buf, filename);
297 | 		}
298 | 	}
299 | 
300 | 	fclose(fp);
301 | 	return conf;
302 | }
303 | struct mehcached_workload_conf *
304 | mehcached_get_workload_conf(const char *filename, const char *client_name)
305 | {
306 | 	FILE *fp = fopen(filename, "r");
307 | 	if (!fp)
308 | 	{
309 | 		fprintf(stderr, "cannot open %s\n", filename);
310 | 		return NULL;
311 | 	}
312 | 
313 | 	struct mehcached_workload_conf *conf = malloc(sizeof(struct mehcached_workload_conf));
314 | 	memset(conf, 0, sizeof(struct mehcached_workload_conf));
315 | 
316 | 	while (true)
317 | 	{
318 | 		char buf[4096];
319 | 		int ret = fscanf(fp, "workload,%[^,\n]\n", buf);
320 | 		if (ret == EOF)
321 | 			break;
322 | 		if (strcmp(buf, client_name) != 0)
323 | 		{
324 | 			// skip
325 | 			while (true)
326 | 			{
327 | 				if (fgets(buf, sizeof(buf), fp) == NULL)
328 | 					break;
329 | 				if (buf[0] == '\n')
330 | 					break;
331 | 			}
332 | 			continue;
333 | 		}
334 | 
335 | 		while (true)
336 | 		{
337 | 			if (fgets(buf, sizeof(buf), fp) == NULL)
338 | 				break;
339 | 
340 | 			{
341 | 				char port_ids[4096];
342 | 				char server_name[4096];
343 | 				int8_t partition_mode;
344 | 				uint64_t num_items;
345 | 				size_t key_length;
346 | 				size_t value_length;
347 | 				double zipf_theta;
348 | 				double get_ratio;
349 | 				double put_ratio;
350 | 				double increment_ratio;
351 | 				uint8_t batch_size;
352 | 				uint64_t num_operations;
353 | 				double duration;
354 | 				int ret = sscanf(buf, "workload_thread,%[^,],%[^,],%hhd,%lu,%zu,%zu,%lf,%lf,%lf,%lf,%hhu,%lu,%lf\n", port_ids, server_name, &partition_mode, &num_items, &key_length, &value_length, &zipf_theta, &get_ratio, &put_ratio, &increment_ratio, &batch_size, &num_operations, &duration);
355 | 				if (ret == 13)
356 | 				{
357 | 					char *p = port_ids;
358 | 					while (*p != 0)
359 | 					{
360 | 						conf->threads[conf->num_threads].port_ids[conf->threads[conf->num_threads].num_ports] = (uint8_t)strtoul(p, &p, 10);
361 | 						conf->threads[conf->num_threads].num_ports++;
362 | 						assert(conf->threads[conf->num_threads].num_ports <= MEHCACHED_MAX_PORTS);
363 | 						if (*p != 0) p++;
364 | 					}
365 | 					strcpy(conf->threads[conf->num_threads].server_name, server_name);
366 | 					conf->threads[conf->num_threads].partition_mode = partition_mode;
367 | 					conf->threads[conf->num_threads].num_items = num_items;
368 | 					conf->threads[conf->num_threads].key_length = key_length;
369 | 					conf->threads[conf->num_threads].value_length = value_length;
370 | 					conf->threads[conf->num_threads].zipf_theta = zipf_theta;
371 | 					conf->threads[conf->num_threads].get_ratio = get_ratio;
372 | 					conf->threads[conf->num_threads].put_ratio = put_ratio;
373 | 					conf->threads[conf->num_threads].increment_ratio = increment_ratio;
374 | 					conf->threads[conf->num_threads].batch_size = batch_size;
375 | 					conf->threads[conf->num_threads].num_operations = num_operations;
376 | 					conf->threads[conf->num_threads].duration = duration;
377 | 					conf->num_threads++;
378 | 					assert(conf->num_threads <= MEHCACHED_MAX_THREADS);
379 | 					continue;
380 | 				}
381 | 				else if (ret != 0)
382 | 				{
383 | 					fprintf(stderr, "parse error: %s (in %s)\n", buf, filename);
384 | 					continue;
385 | 				}
386 | 			}
387 | 
388 | 			if (buf[0] == '\n')
389 | 				break;
390 | 			fprintf(stderr, "parse error: %s (in %s)\n", buf, filename);
391 | 		}
392 | 	}
393 | 
394 | 	fclose(fp);
395 | 	return conf;
396 | }
397 | 


--------------------------------------------------------------------------------
/src/netbench_config.h:
--------------------------------------------------------------------------------
  1 | // Copyright 2014 Carnegie Mellon University
  2 | //
  3 | // Licensed under the Apache License, Version 2.0 (the "License");
  4 | // you may not use this file except in compliance with the License.
  5 | // You may obtain a copy of the License at
  6 | //
  7 | //     http://www.apache.org/licenses/LICENSE-2.0
  8 | //
  9 | // Unless required by applicable law or agreed to in writing, software
 10 | // distributed under the License is distributed on an "AS IS" BASIS,
 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | // See the License for the specific language governing permissions and
 13 | // limitations under the License.
 14 | 
 15 | #pragma once
 16 | 
 17 | #include "common.h"
 18 | #include "net_common.h"
 19 | 
 20 | //#define MEHCACHED_MAX_PORTS (8)
 21 | #define MEHCACHED_MAX_THREADS (16)
 22 | #define MEHCACHED_MAX_PARTITIONS (64)
 23 | #define MEHCACHED_MAX_WORKLOAD_THREADS (16)
 24 | #define MEHCACHED_MAX_HOT_ITEMS (64)
 25 | 
 26 | 
 27 | // common
 28 | struct mehcached_port_conf
 29 | {
 30 | 	uint8_t mac_addr[6];
 31 | 	uint8_t ip_addr[4];
 32 | };
 33 | 
 34 | 
 35 | // server
 36 | struct mehcached_server_thread_conf
 37 | {
 38 | 	uint8_t num_ports;
 39 | 	uint8_t port_ids[MEHCACHED_MAX_PORTS];
 40 | };
 41 | 
 42 | struct mehcached_server_partition_conf
 43 | {
 44 | 	uint64_t num_items;
 45 | 	uint64_t alloc_size;
 46 | 	uint8_t concurrent_table_read;
 47 | 	uint8_t concurrent_table_write;
 48 | 	uint8_t concurrent_alloc_write;
 49 | 	uint8_t thread_id;
 50 | 	double mth_threshold;
 51 | };
 52 | 
 53 | struct mehcached_server_hot_item_conf
 54 | {
 55 | 	uint64_t key_hash;
 56 | 	uint8_t thread_id;
 57 | };
 58 | 
 59 | struct mehcached_server_conf
 60 | {
 61 | 	uint8_t num_ports;
 62 | 	struct mehcached_port_conf ports[MEHCACHED_MAX_PORTS];
 63 | 	uint8_t num_threads;
 64 | 	struct mehcached_server_thread_conf threads[MEHCACHED_MAX_THREADS];
 65 | 	uint16_t num_partitions;
 66 | 	struct mehcached_server_partition_conf partitions[MEHCACHED_MAX_PARTITIONS];
 67 | 	uint8_t num_hot_items;
 68 | 	struct mehcached_server_hot_item_conf hot_items[MEHCACHED_MAX_HOT_ITEMS];
 69 | };
 70 | 
 71 | #define MEHCACHED_CONCURRENT_TABLE_READ(server_conf, partition_id) ((server_conf)->partitions[partition_id].concurrent_table_read)
 72 | #define MEHCACHED_CONCURRENT_TABLE_WRITE(server_conf, partition_id) ((server_conf)->partitions[partition_id].concurrent_table_write)
 73 | #define MEHCACHED_CONCURRENT_ALLOC_WRITE(server_conf, partition_id) ((server_conf)->partitions[partition_id].concurrent_alloc_write)
 74 | 
 75 | 
 76 | // client
 77 | struct mehcached_client_conf
 78 | {
 79 | 	uint8_t num_ports;
 80 | 	struct mehcached_port_conf ports[MEHCACHED_MAX_PORTS];
 81 | 	uint8_t num_threads;
 82 | };
 83 | 
 84 | 
 85 | // prepopulation
 86 | struct mehcached_prepopulation_conf
 87 | {
 88 | 	// TODO: support multiple datasets
 89 | 	uint64_t num_items;
 90 | 	size_t key_length;
 91 | 	size_t value_length;
 92 | };
 93 | 
 94 | 
 95 | // workload
 96 | struct mehcached_workload_thread_conf
 97 | {
 98 | 	uint8_t num_ports;
 99 | 	uint8_t port_ids[MEHCACHED_MAX_PORTS];
100 | 	char server_name[64];
101 | 	int8_t partition_mode;
102 | 	uint64_t num_items;
103 | 	size_t key_length;
104 | 	size_t value_length;
105 | 	double zipf_theta;
106 | 	uint8_t batch_size;
107 | 	double get_ratio;
108 | 	double put_ratio;
109 | 	double increment_ratio;
110 | 	uint64_t num_operations;
111 | 	double duration;
112 | };
113 | 
114 | struct mehcached_workload_conf
115 | {
116 | 	uint8_t num_threads;
117 | 	struct mehcached_workload_thread_conf threads[MEHCACHED_MAX_WORKLOAD_THREADS];
118 | };
119 | 
120 | 
121 | // functions
122 | struct mehcached_server_conf *
123 | mehcached_get_server_conf(const char *filename, const char *server_name);
124 | 
125 | struct mehcached_client_conf *
126 | mehcached_get_client_conf(const char *filename, const char *client_name);
127 | 
128 | struct mehcached_prepopulation_conf *
129 | mehcached_get_prepopulation_conf(const char *filename, const char *server_name);
130 | 
131 | struct mehcached_workload_conf *
132 | mehcached_get_workload_conf(const char *filename, const char *client_name);
133 | 


--------------------------------------------------------------------------------
/src/netbench_hot_item_hash.h:
--------------------------------------------------------------------------------
 1 | // Copyright 2014 Carnegie Mellon University
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | 
15 | #pragma once
16 | 
17 | #include "common.h"
18 | 
19 | struct mehcached_hot_item_hash
20 | {
21 | };
22 | 
23 | static
24 | void
25 | mehcached_calc_hot_item_hash(struct mehcached_server_conf *server_conf, struct mehcached_hot_item_hash *hot_item_hash)
26 | {
27 | 	(void)server_conf;
28 | 	(void)hot_item_hash;
29 | }
30 | 
31 | static
32 | uint8_t
33 | mehcached_get_hot_item_id(struct mehcached_server_conf *server_conf, struct mehcached_hot_item_hash *hot_item_hash, uint64_t key_hash)
34 | {
35 | 	(void)server_conf;
36 | 	(void)hot_item_hash;
37 | 	(void)key_hash;
38 | 	return (uint8_t)-1;
39 | }
40 | 


--------------------------------------------------------------------------------
/src/perf_count/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | # Copyright 2014 Carnegie Mellon University
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | cmake_minimum_required(VERSION 2.6)
16 | 
17 | # basic configuration 
18 | if(CMAKE_CURRENT_SOURCE_DIR STREQUAL CMAKE_CURRENT_BINARY_DIR)
19 | 	message(FATAL_ERROR "Use out-of-source build only!")
20 | endif(CMAKE_CURRENT_SOURCE_DIR STREQUAL CMAKE_CURRENT_BINARY_DIR)
21 | 
22 | project(PERF_COUNT)
23 | 
24 | add_definitions(-g -Wall -Wextra -Wconversion -Wsign-conversion)
25 | add_definitions(-O3)
26 | add_definitions(-std=gnu99)
27 | 
28 | add_library(perf_count perf_count.c)
29 | 
30 | 


--------------------------------------------------------------------------------
/src/perf_count/perf_count.c:
--------------------------------------------------------------------------------
  1 | // Copyright 2014 Carnegie Mellon University
  2 | //
  3 | // Licensed under the Apache License, Version 2.0 (the "License");
  4 | // you may not use this file except in compliance with the License.
  5 | // You may obtain a copy of the License at
  6 | //
  7 | //     http://www.apache.org/licenses/LICENSE-2.0
  8 | //
  9 | // Unless required by applicable law or agreed to in writing, software
 10 | // distributed under the License is distributed on an "AS IS" BASIS,
 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | // See the License for the specific language governing permissions and
 13 | // limitations under the License.
 14 | 
 15 | #include "perf_count.h"
 16 | 
 17 | #ifdef __cplusplus
 18 | extern "C" {
 19 | #endif
 20 | 
 21 | #ifdef __linux__
 22 | #include <linux/perf_event.h>
 23 | #include <sys/types.h>
 24 | #include <sys/syscall.h>
 25 | #endif
 26 | 
 27 | #include <unistd.h>
 28 | #include <stdlib.h>
 29 | #include <stdio.h>
 30 | #include <string.h>
 31 | 
 32 | struct perf_count_ctx
 33 | {
 34 | 	size_t num_groups;
 35 | 	size_t num_events;
 36 | 	enum PERF_COUNT_TYPE *types;
 37 | #ifdef __linux__
 38 | 	struct perf_event_attr *events;
 39 | #endif
 40 | 	int *fds;
 41 | 	uint64_t *counters;
 42 | };
 43 | 
 44 | static const char *perf_count_name[] = 
 45 | {
 46 | 	"CPUCycles",
 47 | 	"Instructions",
 48 | 	"CacheReferences",
 49 | 	"CacheMisses",
 50 | 	"BranchInstructions",
 51 | 	"BranchMisses",
 52 | 	"BUSCycles",
 53 | 	"L1IReadAccess",
 54 | 	"L1IReadMiss",
 55 | 	"L1IWriteAccess",
 56 | 	"L1IWriteMiss",
 57 | 	"L1IPrefetchAccess",
 58 | 	"L1IPrefetchMiss",
 59 | 	"L1DReadAccess",
 60 | 	"L1DReadMiss",
 61 | 	"L1DWriteAccess",
 62 | 	"L1DWriteMiss",
 63 | 	"L1DPrefetchAccess",
 64 | 	"L1DPrefetchMiss",
 65 | 	"LLReadAccess",
 66 | 	"LLReadMiss",
 67 | 	"LLWriteAccess",
 68 | 	"LLWriteMiss",
 69 | 	"LLPrefetchAccess",
 70 | 	"LLPrefetchMiss",
 71 | 	"ITLBReadAccess",
 72 | 	"ITLBReadMiss",
 73 | 	"ITLBWriteAccess",
 74 | 	"ITLBWriteMiss",
 75 | 	"ITLBPrefetchAccess",
 76 | 	"ITLBPrefetchMiss",
 77 | 	"DTLBReadAccess",
 78 | 	"DTLBReadMiss",
 79 | 	"DTLBWriteAccess",
 80 | 	"DTLBWriteMiss",
 81 | 	"DTLBPrefetchAccess",
 82 | 	"DTLBPrefetchMiss",
 83 | 	"CPUClock",
 84 | 	"TaskClock",
 85 | 	"PageFaults",
 86 | 	"ContextSwitches",
 87 | 	"CPUMigrations",
 88 | 	"PageFaultsMinor",
 89 | 	"PageFaultsMajor",
 90 | 	"AlignmentFaults",
 91 | 	"EmulationFaults",
 92 | };
 93 | 
 94 | #ifdef __linux__
 95 | static const struct perf_event_attr perf_count_mapping[] =
 96 | {
 97 | 	{ .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES },
 98 | 	{ .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_INSTRUCTIONS },
 99 | 	{ .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CACHE_REFERENCES },
100 | 	{ .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CACHE_MISSES },
101 | 	{ .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS },
102 | 	{ .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_MISSES },
103 | 	{ .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BUS_CYCLES },
104 | 	{ .type = PERF_TYPE_HW_CACHE, .config = PERF_COUNT_HW_CACHE_L1I  | (PERF_COUNT_HW_CACHE_OP_READ << 8)     | (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) },
105 | 	{ .type = PERF_TYPE_HW_CACHE, .config = PERF_COUNT_HW_CACHE_L1I  | (PERF_COUNT_HW_CACHE_OP_READ << 8)     | (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) },
106 | 	{ .type = PERF_TYPE_HW_CACHE, .config = PERF_COUNT_HW_CACHE_L1I  | (PERF_COUNT_HW_CACHE_OP_WRITE << 8)    | (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) },
107 | 	{ .type = PERF_TYPE_HW_CACHE, .config = PERF_COUNT_HW_CACHE_L1I  | (PERF_COUNT_HW_CACHE_OP_WRITE << 8)    | (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) },
108 | 	{ .type = PERF_TYPE_HW_CACHE, .config = PERF_COUNT_HW_CACHE_L1I  | (PERF_COUNT_HW_CACHE_OP_PREFETCH << 8) | (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) },
109 | 	{ .type = PERF_TYPE_HW_CACHE, .config = PERF_COUNT_HW_CACHE_L1I  | (PERF_COUNT_HW_CACHE_OP_PREFETCH << 8) | (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) },
110 | 	{ .type = PERF_TYPE_HW_CACHE, .config = PERF_COUNT_HW_CACHE_L1D  | (PERF_COUNT_HW_CACHE_OP_READ << 8)     | (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) },
111 | 	{ .type = PERF_TYPE_HW_CACHE, .config = PERF_COUNT_HW_CACHE_L1D  | (PERF_COUNT_HW_CACHE_OP_READ << 8)     | (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) },
112 | 	{ .type = PERF_TYPE_HW_CACHE, .config = PERF_COUNT_HW_CACHE_L1D  | (PERF_COUNT_HW_CACHE_OP_WRITE << 8)    | (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) },
113 | 	{ .type = PERF_TYPE_HW_CACHE, .config = PERF_COUNT_HW_CACHE_L1D  | (PERF_COUNT_HW_CACHE_OP_WRITE << 8)    | (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) },
114 | 	{ .type = PERF_TYPE_HW_CACHE, .config = PERF_COUNT_HW_CACHE_L1D  | (PERF_COUNT_HW_CACHE_OP_PREFETCH << 8) | (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) },
115 | 	{ .type = PERF_TYPE_HW_CACHE, .config = PERF_COUNT_HW_CACHE_L1D  | (PERF_COUNT_HW_CACHE_OP_PREFETCH << 8) | (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) },
116 | 	{ .type = PERF_TYPE_HW_CACHE, .config = PERF_COUNT_HW_CACHE_LL   | (PERF_COUNT_HW_CACHE_OP_READ << 8)     | (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) },
117 | 	{ .type = PERF_TYPE_HW_CACHE, .config = PERF_COUNT_HW_CACHE_LL   | (PERF_COUNT_HW_CACHE_OP_READ << 8)     | (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) },
118 | 	{ .type = PERF_TYPE_HW_CACHE, .config = PERF_COUNT_HW_CACHE_LL   | (PERF_COUNT_HW_CACHE_OP_WRITE << 8)    | (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) },
119 | 	{ .type = PERF_TYPE_HW_CACHE, .config = PERF_COUNT_HW_CACHE_LL   | (PERF_COUNT_HW_CACHE_OP_WRITE << 8)    | (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) },
120 | 	{ .type = PERF_TYPE_HW_CACHE, .config = PERF_COUNT_HW_CACHE_LL   | (PERF_COUNT_HW_CACHE_OP_PREFETCH << 8) | (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) },
121 | 	{ .type = PERF_TYPE_HW_CACHE, .config = PERF_COUNT_HW_CACHE_LL   | (PERF_COUNT_HW_CACHE_OP_PREFETCH << 8) | (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) },
122 | 	{ .type = PERF_TYPE_HW_CACHE, .config = PERF_COUNT_HW_CACHE_ITLB | (PERF_COUNT_HW_CACHE_OP_READ << 8)     | (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) },
123 | 	{ .type = PERF_TYPE_HW_CACHE, .config = PERF_COUNT_HW_CACHE_ITLB | (PERF_COUNT_HW_CACHE_OP_READ << 8)     | (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) },
124 | 	{ .type = PERF_TYPE_HW_CACHE, .config = PERF_COUNT_HW_CACHE_ITLB | (PERF_COUNT_HW_CACHE_OP_WRITE << 8)    | (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) },
125 | 	{ .type = PERF_TYPE_HW_CACHE, .config = PERF_COUNT_HW_CACHE_ITLB | (PERF_COUNT_HW_CACHE_OP_WRITE << 8)    | (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) },
126 | 	{ .type = PERF_TYPE_HW_CACHE, .config = PERF_COUNT_HW_CACHE_ITLB | (PERF_COUNT_HW_CACHE_OP_PREFETCH << 8) | (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) },
127 | 	{ .type = PERF_TYPE_HW_CACHE, .config = PERF_COUNT_HW_CACHE_ITLB | (PERF_COUNT_HW_CACHE_OP_PREFETCH << 8) | (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) },
128 | 	{ .type = PERF_TYPE_HW_CACHE, .config = PERF_COUNT_HW_CACHE_DTLB | (PERF_COUNT_HW_CACHE_OP_READ << 8)     | (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) },
129 | 	{ .type = PERF_TYPE_HW_CACHE, .config = PERF_COUNT_HW_CACHE_DTLB | (PERF_COUNT_HW_CACHE_OP_READ << 8)     | (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) },
130 | 	{ .type = PERF_TYPE_HW_CACHE, .config = PERF_COUNT_HW_CACHE_DTLB | (PERF_COUNT_HW_CACHE_OP_WRITE << 8)    | (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) },
131 | 	{ .type = PERF_TYPE_HW_CACHE, .config = PERF_COUNT_HW_CACHE_DTLB | (PERF_COUNT_HW_CACHE_OP_WRITE << 8)    | (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) },
132 | 	{ .type = PERF_TYPE_HW_CACHE, .config = PERF_COUNT_HW_CACHE_DTLB | (PERF_COUNT_HW_CACHE_OP_PREFETCH << 8) | (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) },
133 | 	{ .type = PERF_TYPE_HW_CACHE, .config = PERF_COUNT_HW_CACHE_DTLB | (PERF_COUNT_HW_CACHE_OP_PREFETCH << 8) | (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) },
134 | 	{ .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CPU_CLOCK },
135 | 	{ .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK },
136 | 	{ .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_PAGE_FAULTS },
137 | 	{ .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CONTEXT_SWITCHES },
138 | 	{ .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CPU_MIGRATIONS },
139 | 	{ .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_PAGE_FAULTS_MIN },
140 | 	{ .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_PAGE_FAULTS_MAJ },
141 | 	{ .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_ALIGNMENT_FAULTS },
142 | 	{ .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_EMULATION_FAULTS },
143 | };
144 | #endif
145 | 
146 | #ifdef __linux__
147 | static int
148 | sys_perf_event_open(struct perf_event_attr *attr, pid_t pid, int cpu, int group_fd, unsigned long flags)
149 | {
150 | 	attr->size = sizeof(*attr);
151 | 	return (int)syscall(__NR_perf_event_open, attr, pid, cpu, group_fd, flags);
152 | }
153 | #endif
154 | 
155 | perf_count_t
156 | perf_count_init(const enum PERF_COUNT_TYPE *perf_count_types, size_t num_events, int system_wide)
157 | {
158 | #ifdef __linux__
159 | 	if (perf_count_types == NULL)
160 | 		return NULL;
161 | 
162 | 	for (size_t event = 0; event < num_events; event++)
163 | 		if (perf_count_types[event] < 0 || perf_count_types[event] >= PERF_COUNT_TYPE_MAX)
164 | 			return NULL;
165 | 
166 | 	struct perf_count_ctx *ctx = (struct perf_count_ctx *)malloc(sizeof(struct perf_count_ctx));
167 | 	if (!ctx)
168 | 		return NULL;
169 | 
170 | 	if (system_wide)
171 | 		ctx->num_groups = (size_t)sysconf(_SC_NPROCESSORS_ONLN);
172 | 	else
173 | 		ctx->num_groups = 1;
174 | 	ctx->num_events = num_events;
175 | 
176 | 	ctx->types = (enum PERF_COUNT_TYPE *)calloc(sizeof(enum PERF_COUNT_TYPE), (size_t)ctx->num_events);
177 | 	if (!ctx->types)
178 | 	{
179 | 		free(ctx);
180 | 		return NULL;
181 | 	}
182 | 	ctx->events = (struct perf_event_attr *)calloc(sizeof(struct perf_event_attr), (size_t)ctx->num_events);
183 | 	if (!ctx->events)
184 | 	{
185 | 		free(ctx->types);
186 | 		free(ctx);
187 | 		return NULL;
188 | 	}
189 | 	ctx->fds = (int *)calloc(sizeof(int), (size_t)ctx->num_groups * (size_t)ctx->num_events);
190 | 	if (!ctx->fds)
191 | 	{
192 | 		free(ctx->events);
193 | 		free(ctx->types);
194 | 		free(ctx);
195 | 		return NULL;
196 | 	}
197 | 	ctx->counters = (uint64_t *)calloc(sizeof(uint64_t), (size_t)ctx->num_events);
198 | 	if (!ctx->counters)
199 | 	{
200 | 		free(ctx->fds);
201 | 		free(ctx->events);
202 | 		free(ctx->types);
203 | 		free(ctx);
204 | 		return NULL;
205 | 	}
206 | 
207 | 	for (size_t event = 0; event < ctx->num_events; event++)
208 | 	{
209 | 		ctx->types[event] = perf_count_types[event];
210 | 		ctx->events[event] = perf_count_mapping[perf_count_types[event]];
211 | 		ctx->events[event].read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | PERF_FORMAT_TOTAL_TIME_RUNNING;
212 | 		if (!system_wide)
213 | 			ctx->events[event].inherit = 1;
214 | 	}
215 | 
216 | 	for (size_t group = 0; group < ctx->num_groups; group++)
217 | 		for (size_t event = 0; event < ctx->num_events; event++)
218 | 		{
219 | 			pid_t pid;
220 | 			int cpu;
221 | 
222 | 			if (system_wide)
223 | 			{
224 | 				pid = -1;
225 | 				// XXX: assuming the IDs of online cpus range from 0 to (num_cpus - 1)
226 | 				cpu = (int)group;
227 | 			}
228 | 			else
229 | 			{
230 | 				// this process
231 | 				pid = 0;
232 | 				cpu = -1;
233 | 			}
234 | 
235 | 			ctx->fds[group * ctx->num_events + event] = sys_perf_event_open(&ctx->events[event], pid, cpu, -1, 0);
236 | 			if (ctx->fds[group * ctx->num_events + event] < 0)
237 | 			{
238 | 				perror("perf_count: error while sys_perf_event_open()");
239 | 				break;
240 | 			}
241 | 		}
242 | 
243 | 	return ctx;
244 | #else
245 | 	return NULL;
246 | #endif
247 | }
248 | 
249 | void
250 | perf_count_free(perf_count_t perf_count)
251 | {
252 | #ifdef __linux__
253 | 	struct perf_count_ctx *ctx = (struct perf_count_ctx *)perf_count;
254 | 
255 | 	for (size_t group = 0; group < ctx->num_groups; group++)
256 | 		for (size_t event = 0; event < ctx->num_events; event++)
257 | 		{
258 | 			if (ctx->fds[group * ctx->num_events + event] >= 0)
259 | 				close(ctx->fds[group * ctx->num_events + event]);
260 | 		}
261 | 
262 | 	free(ctx->counters);
263 | 	free(ctx->fds);
264 | 	free(ctx->events);
265 | 	free(ctx->types);
266 | 	free(ctx);
267 | #endif
268 | }
269 | 
270 | static void
271 | perf_count_accumulate(perf_count_t perf_count, int additive)
272 | {
273 | #ifdef __linux__
274 | 	struct perf_count_ctx *ctx = (struct perf_count_ctx *)perf_count;
275 | 
276 | 	for (size_t event = 0; event < ctx->num_events; event++)
277 | 	{
278 | 		uint64_t count[3];
279 | 		uint64_t accum_count[3] = {0, 0, 0};
280 | 
281 | 		for (size_t group = 0; group < ctx->num_groups; group++)
282 | 		{
283 | 			if (ctx->fds[group * ctx->num_events + event] < 0)
284 | 				continue;
285 | 
286 | 			count[0] = count[1] = count[2] = 0;
287 | 			ssize_t len = read(ctx->fds[group * ctx->num_events + event], count, sizeof(count));
288 | 			//printf("%d %ld %ld %ld\n", len, count[0], count[1], count[2]);
289 | 			if (len < 0)
290 | 			{
291 | 				perror("perf_count: error while reading stats");
292 | 				break;
293 | 			}
294 | 			else if ((size_t)len != sizeof(count))
295 | 			{
296 | 				fprintf(stderr, "perf_count: invalid stats reading; did you really use -std=gnu99 when compiling?\n");
297 | 				break;
298 | 			}
299 | 
300 | 			accum_count[0] += count[0];
301 | 			accum_count[1] += count[1];
302 | 			accum_count[2] += count[2];
303 | 		}
304 | 
305 | 		if (accum_count[2] == 0)
306 | 		{
307 | 			// no event occurred at all
308 | 		}
309 | 		else
310 | 		{
311 | 			if (accum_count[2] < accum_count[1])
312 | 			{
313 | 				// need to scale
314 | 				accum_count[0] = (uint64_t)((double)accum_count[0] * (double)accum_count[1] / (double)accum_count[2] + 0.5);
315 | 			}
316 | 		}
317 | 
318 | 		if (additive)
319 | 		{
320 | 			ctx->counters[event] += accum_count[0];
321 | 			// due to the scaling, we may observe a negative increment
322 | 			if ((int64_t)ctx->counters[event] < 0)
323 | 				ctx->counters[event] = 0;
324 | 		}
325 | 		else
326 | 			ctx->counters[event] -= accum_count[0];
327 | 	}
328 | #endif
329 | }
330 | 
331 | void
332 | perf_count_start(perf_count_t perf_count)
333 | {
334 | 	perf_count_accumulate(perf_count, 0);
335 | }
336 | 
337 | void
338 | perf_count_stop(perf_count_t perf_count)
339 | {
340 | 	perf_count_accumulate(perf_count, 1);
341 | }
342 | 
343 | void
344 | perf_count_reset(perf_count_t perf_count)
345 | {
346 | #ifdef __linux__
347 | 	struct perf_count_ctx *ctx = (struct perf_count_ctx *)perf_count;
348 | 
349 | 	for (size_t event = 0; event < ctx->num_events; event++)
350 | 		ctx->counters[event] = 0;
351 | #endif
352 | }
353 | 
354 | uint64_t
355 | perf_count_get_by_index(perf_count_t perf_count, size_t index)
356 | {
357 | #ifdef __linux__
358 | 	struct perf_count_ctx *ctx = (struct perf_count_ctx *)perf_count;
359 | 
360 | 	if (index >= ctx->num_events)
361 | 		return (uint64_t)-1;
362 | 
363 | 	return ctx->counters[index];
364 | #else
365 | 	return (uint64_t)-1;
366 | #endif
367 | }
368 | 
369 | uint64_t
370 | perf_count_get_by_type(perf_count_t perf_count, enum PERF_COUNT_TYPE type)
371 | {
372 | #ifdef __linux__
373 | 	if (type < 0 || type >= PERF_COUNT_TYPE_MAX)
374 | 		return (uint64_t)-1;
375 | 
376 | 	struct perf_count_ctx *ctx = (struct perf_count_ctx *)perf_count;
377 | 
378 | 	for (size_t event = 0; event < ctx->num_events; event++)
379 | 		if (ctx->types[event] == type)
380 | 			return ctx->counters[event];
381 | #endif
382 | 
383 | 	return (uint64_t)-1;
384 | }
385 | 
386 | const char *
387 | perf_count_name_by_type(enum PERF_COUNT_TYPE type)
388 | {
389 | #ifdef __linux__
390 | 	if (type < 0 || type >= PERF_COUNT_TYPE_MAX)
391 | 		return NULL;
392 | 
393 | 	return perf_count_name[type];
394 | #else
395 | 	return NULL;
396 | #endif
397 | }
398 | 
399 | enum PERF_COUNT_TYPE
400 | perf_count_type_by_name(const char *name)
401 | {
402 | #ifdef __linux__
403 | 	if (!name)
404 | 		return PERF_COUNT_TYPE_INVALID;
405 | 
406 | 	for (size_t type = 0; type < PERF_COUNT_TYPE_MAX; type++)
407 | 		if (strcmp(perf_count_name[type], name) == 0)
408 | 			return type;
409 | #endif
410 | 
411 | 	return PERF_COUNT_TYPE_INVALID;
412 | }
413 | 
414 | #ifdef __cplusplus
415 | }
416 | #endif
417 | 
418 | 


--------------------------------------------------------------------------------
/src/perf_count/perf_count.h:
--------------------------------------------------------------------------------
  1 | // Copyright 2014 Carnegie Mellon University
  2 | //
  3 | // Licensed under the Apache License, Version 2.0 (the "License");
  4 | // you may not use this file except in compliance with the License.
  5 | // You may obtain a copy of the License at
  6 | //
  7 | //     http://www.apache.org/licenses/LICENSE-2.0
  8 | //
  9 | // Unless required by applicable law or agreed to in writing, software
 10 | // distributed under the License is distributed on an "AS IS" BASIS,
 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | // See the License for the specific language governing permissions and
 13 | // limitations under the License.
 14 | 
 15 | #ifndef __PERF_COUNT__
 16 | #define __PERF_COUNT__
 17 | 
 18 | #ifdef __cplusplus
 19 | extern "C" {
 20 | #endif
 21 | 
 22 | #include <stddef.h>
 23 | #include <stdint.h>
 24 | 
 25 | enum PERF_COUNT_TYPE 
 26 | {
 27 | 	PERF_COUNT_TYPE_HW_CPU_CYCLES,
 28 | 	PERF_COUNT_TYPE_HW_INSTRUCTIONS,
 29 | 	PERF_COUNT_TYPE_HW_CACHE_REFERENCES,
 30 | 	PERF_COUNT_TYPE_HW_CACHE_MISSES,
 31 | 	PERF_COUNT_TYPE_HW_BRANCH_INSTRUCTIONS,
 32 | 	PERF_COUNT_TYPE_HW_BRANCH_MISSES,
 33 | 	PERF_COUNT_TYPE_HW_BUS_CYCLES,
 34 | 	PERF_COUNT_TYPE_HW_CACHE_L1I_READ_ACCESS,
 35 | 	PERF_COUNT_TYPE_HW_CACHE_L1I_READ_MISS,
 36 | 	PERF_COUNT_TYPE_HW_CACHE_L1I_WRITE_ACCESS,
 37 | 	PERF_COUNT_TYPE_HW_CACHE_L1I_WRITE_MISS,
 38 | 	PERF_COUNT_TYPE_HW_CACHE_L1I_PREFETCH_ACCESS,		// not working?
 39 | 	PERF_COUNT_TYPE_HW_CACHE_L1I_PREFETCH_MISS,			// not working?
 40 | 	PERF_COUNT_TYPE_HW_CACHE_L1D_READ_ACCESS,
 41 | 	PERF_COUNT_TYPE_HW_CACHE_L1D_READ_MISS,
 42 | 	PERF_COUNT_TYPE_HW_CACHE_L1D_WRITE_ACCESS,
 43 | 	PERF_COUNT_TYPE_HW_CACHE_L1D_WRITE_MISS,
 44 | 	PERF_COUNT_TYPE_HW_CACHE_L1D_PREFETCH_ACCESS,		// not working?
 45 | 	PERF_COUNT_TYPE_HW_CACHE_L1D_PREFETCH_MISS,			// not working?
 46 | 	PERF_COUNT_TYPE_HW_CACHE_LL_READ_ACCESS,
 47 | 	PERF_COUNT_TYPE_HW_CACHE_LL_READ_MISS,
 48 | 	PERF_COUNT_TYPE_HW_CACHE_LL_WRITE_ACCESS,
 49 | 	PERF_COUNT_TYPE_HW_CACHE_LL_WRITE_MISS,
 50 | 	PERF_COUNT_TYPE_HW_CACHE_LL_PREFETCH_ACCESS,		// not working?
 51 | 	PERF_COUNT_TYPE_HW_CACHE_LL_PREFETCH_MISS,			// not working?
 52 | 	PERF_COUNT_TYPE_HW_CACHE_ITLB_READ_ACCESS,
 53 | 	PERF_COUNT_TYPE_HW_CACHE_ITLB_READ_MISS,
 54 | 	PERF_COUNT_TYPE_HW_CACHE_ITLB_WRITE_ACCESS,
 55 | 	PERF_COUNT_TYPE_HW_CACHE_ITLB_WRITE_MISS,
 56 | 	PERF_COUNT_TYPE_HW_CACHE_ITLB_PREFETCH_ACCESS,		// not working?
 57 | 	PERF_COUNT_TYPE_HW_CACHE_ITLB_PREFETCH_MISS,		// not working?
 58 | 	PERF_COUNT_TYPE_HW_CACHE_DTLB_READ_ACCESS,
 59 | 	PERF_COUNT_TYPE_HW_CACHE_DTLB_READ_MISS,
 60 | 	PERF_COUNT_TYPE_HW_CACHE_DTLB_WRITE_ACCESS,
 61 | 	PERF_COUNT_TYPE_HW_CACHE_DTLB_WRITE_MISS,
 62 | 	PERF_COUNT_TYPE_HW_CACHE_DTLB_PREFETCH_ACCESS,		// not working?
 63 | 	PERF_COUNT_TYPE_HW_CACHE_DTLB_PREFETCH_MISS,		// not working?
 64 | 	PERF_COUNT_TYPE_SW_CPU_CLOCK,
 65 | 	PERF_COUNT_TYPE_SW_TASK_CLOCK,
 66 | 	PERF_COUNT_TYPE_SW_PAGE_FAULTS,
 67 | 	PERF_COUNT_TYPE_SW_CONTEXT_SWITCHES,
 68 | 	PERF_COUNT_TYPE_SW_CPU_MIGRATIONS,
 69 | 	PERF_COUNT_TYPE_SW_PAGE_FAULTS_MIN,
 70 | 	PERF_COUNT_TYPE_SW_PAGE_FAULTS_MAJ,
 71 | 	PERF_COUNT_TYPE_SW_ALIGNMENT_FAULTS,
 72 | 	PERF_COUNT_TYPE_SW_EMULATION_FAULTS,
 73 | 
 74 | 	PERF_COUNT_TYPE_MAX,
 75 | 
 76 | 	PERF_COUNT_TYPE_INVALID = -1,
 77 | };
 78 | 
 79 | typedef void *perf_count_t;
 80 | 
 81 | // system_wide would require CAP_SYS_ADMIN
 82 | perf_count_t perf_count_init(const enum PERF_COUNT_TYPE *perf_count_types, size_t num_events, int system_wide);
 83 | void perf_count_free(perf_count_t perf_count);
 84 | 
 85 | void perf_count_start(perf_count_t perf_count);
 86 | void perf_count_stop(perf_count_t perf_count);
 87 | void perf_count_reset(perf_count_t perf_count);
 88 | 
 89 | uint64_t perf_count_get_by_index(perf_count_t perf_count, size_t index);
 90 | uint64_t perf_count_get_by_type(perf_count_t perf_count, enum PERF_COUNT_TYPE type);
 91 | 
 92 | const char *perf_count_name_by_type(enum PERF_COUNT_TYPE type);
 93 | enum PERF_COUNT_TYPE perf_count_type_by_name(const char *name);
 94 | 
 95 | #ifdef __cplusplus
 96 | }
 97 | #endif
 98 | 
 99 | #endif
100 | 


--------------------------------------------------------------------------------
/src/proto.h:
--------------------------------------------------------------------------------
 1 | // Copyright 2014 Carnegie Mellon University
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | 
15 | #pragma once
16 | 
17 | #include "common.h"
18 | #include "table.h"
19 | 
20 | #include <rte_ether.h>
21 | #include <rte_ip.h>
22 | #include <rte_udp.h>
23 | 
24 | // override opaque to measure latency
25 | #define MEHCACHED_ENABLE_THROTTLING
26 | 
27 | // override expire time to measure end-to-end latency
28 | // use this only for full-RX latency measurement using core 0; this may lower throughput due to more I/O and processing on core0 and less responsive throttling
29 | //#define MEHCACHED_MEASURE_LATENCY
30 | 
31 | // (ETHER_MAX_LEN - ETHER_CRC_LEN - sizeof(struct mehcached_batch_packet)) / (sizeof(struct mehcached_request) + 8 + 8)
32 | #define MEHCACHED_MAX_BATCH_SIZE (36)
33 | 
34 | // use software flow director (slower); this does not disable hardware flow director on the server, but the client will send packets to all cores regardless of the concurrency mode
35 | //#define MEHCACHED_USE_SOFT_FDIR
36 | 
37 | // collect per-partition load
38 | #define MEHCACHED_COLLECT_PER_PARTITION_LOAD
39 | 
40 | struct mehcached_batch_packet
41 | {
42 | 	// 0
43 | 	uint8_t header[sizeof(struct ether_hdr) + sizeof(struct ipv4_hdr) + sizeof(struct udp_hdr)];
44 | 	// 42
45 | 	uint8_t num_requests;
46 | 	uint8_t reserved0;
47 | 	// 44
48 | 	uint32_t opaque;
49 | 	// 48
50 |     uint8_t data[0];
51 |     // batch
52 | };
53 | 


--------------------------------------------------------------------------------
/src/shm.h:
--------------------------------------------------------------------------------
 1 | // Copyright 2014 Carnegie Mellon University
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | 
15 | #pragma once
16 | 
17 | #include "common.h"
18 | 
19 | MEHCACHED_BEGIN
20 | 
21 | size_t
22 | mehcached_shm_adjust_size(size_t size);
23 | 
24 | void
25 | mehcached_shm_dump_page_info();
26 | 
27 | void
28 | mehcached_shm_init(size_t page_size, size_t num_numa_nodes, size_t num_pages_to_try, size_t num_pages_to_reserve);
29 | 
30 | void *
31 | mehcached_shm_find_free_address(size_t size);
32 | 
33 | size_t
34 | mehcached_shm_alloc(size_t length, size_t numa_node);
35 | 
36 | bool
37 | mehcached_shm_schedule_remove(size_t entry_id);
38 | 
39 | bool
40 | mehcached_shm_map(size_t entry_id, void *ptr, size_t offset, size_t length);
41 | 
42 | bool
43 | mehcached_shm_unmap(void *ptr);
44 | 
45 | size_t
46 | mehcached_shm_get_page_size();
47 | 
48 | size_t
49 | mehcached_shm_get_memuse();
50 | 
51 | void *
52 | mehcached_shm_malloc_contiguous(size_t size, size_t numa_node);
53 | 
54 | void *
55 | mehcached_shm_malloc_contiguous_local(size_t size);
56 | 
57 | void
58 | mehcached_shm_free_contiguous(void *ptr);
59 | 
60 | void *
61 | mehcached_shm_malloc_striped(size_t size);
62 | 
63 | void
64 | mehcached_shm_free_striped(void *ptr);
65 | 
66 | MEHCACHED_END
67 | 
68 | 


--------------------------------------------------------------------------------
/src/stopwatch.c:
--------------------------------------------------------------------------------
 1 | // Copyright 2014 Carnegie Mellon University
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | 
15 | #include "stopwatch.h"
16 | #include <stdio.h>
17 | #include <sys/time.h>
18 | 
19 | static struct timeval mehcached_stopwatch_init_tv;
20 | static uint64_t mehcached_stopwatch_init_s;
21 | 
22 | uint64_t mehcached_stopwatch_1_sec = 0UL;
23 | uint64_t mehcached_stopwatch_1_msec = 0UL;
24 | uint64_t mehcached_stopwatch_1_usec = 0UL;
25 | 
26 | void
27 | mehcached_stopwatch_init_start()
28 | {
29 | 	mehcached_stopwatch_init_s = mehcached_stopwatch_now();
30 | 	gettimeofday(&mehcached_stopwatch_init_tv, NULL);
31 | }
32 | 
33 | void
34 | mehcached_stopwatch_init_end()
35 | {
36 |     struct timeval tv_now;
37 | 
38 | 	const uint64_t s_1_sec = 1000000UL;
39 | 
40 | 	while (true)
41 | 	{
42 | 		gettimeofday(&tv_now, NULL);
43 | 
44 |         uint64_t diff = (uint64_t)(tv_now.tv_sec - mehcached_stopwatch_init_tv.tv_sec) * 1000000UL + (uint64_t)(tv_now.tv_usec - mehcached_stopwatch_init_tv.tv_usec);
45 |         if (diff >= s_1_sec)
46 |         {
47 | 			uint64_t s = mehcached_stopwatch_now();
48 | 			mehcached_stopwatch_1_sec = (s - mehcached_stopwatch_init_s) * s_1_sec / diff;
49 | 			mehcached_stopwatch_1_msec = mehcached_stopwatch_1_sec / 1000UL;
50 | 			mehcached_stopwatch_1_usec = mehcached_stopwatch_1_msec / 1000UL;
51 |         	break;
52 |         }
53 | 	}
54 | }
55 | 


--------------------------------------------------------------------------------
/src/stopwatch.h:
--------------------------------------------------------------------------------
 1 | // Copyright 2014 Carnegie Mellon University
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | 
15 | #pragma once
16 | 
17 | #include "common.h"
18 | #include <rte_cycles.h>
19 | 
20 | extern uint64_t mehcached_stopwatch_1_sec;		// this should be used whenever possible for better accuracy than below
21 | extern uint64_t mehcached_stopwatch_1_msec;
22 | extern uint64_t mehcached_stopwatch_1_usec;
23 | 
24 | void
25 | mehcached_stopwatch_init_start();
26 | 
27 | void
28 | mehcached_stopwatch_init_end();
29 | 
30 | static
31 | uint64_t
32 | mehcached_stopwatch_now()
33 | {
34 | 	return rte_rdtsc();
35 | }
36 | 
37 | static
38 | uint64_t
39 | mehcached_stopwatch_diff_in_us(uint64_t new_t, uint64_t old_t)
40 | {
41 | 	return (new_t - old_t) * 1000000UL / mehcached_stopwatch_1_sec;
42 | }
43 | 
44 | static
45 | double
46 | mehcached_stopwatch_diff_in_s(uint64_t new_t, uint64_t old_t)
47 | {
48 | 	return (double)mehcached_stopwatch_diff_in_us(new_t, old_t) * 0.000001;
49 | }


--------------------------------------------------------------------------------
/src/table.h:
--------------------------------------------------------------------------------
  1 | // Copyright 2014 Carnegie Mellon University
  2 | //
  3 | // Licensed under the Apache License, Version 2.0 (the "License");
  4 | // you may not use this file except in compliance with the License.
  5 | // You may obtain a copy of the License at
  6 | //
  7 | //     http://www.apache.org/licenses/LICENSE-2.0
  8 | //
  9 | // Unless required by applicable law or agreed to in writing, software
 10 | // distributed under the License is distributed on an "AS IS" BASIS,
 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | // See the License for the specific language governing permissions and
 13 | // limitations under the License.
 14 | 
 15 | #pragma once
 16 | 
 17 | #include "common.h"
 18 | #include "alloc_pool.h"
 19 | #include "alloc_malloc.h"
 20 | #include "alloc_dynamic.h"
 21 | 
 22 | MEHCACHED_BEGIN
 23 | 
 24 | #define MEHCACHED_MAX_KEY_LENGTH (255)
 25 | #define MEHCACHED_MAX_VALUE_LENGTH (1048575)
 26 | 
 27 | 
 28 | #ifndef MEHCACHED_NO_EVICTION
 29 | // #define MEHCACHED_ITEMS_PER_BUCKET (7)
 30 | #define MEHCACHED_ITEMS_PER_BUCKET (15)
 31 | #else
 32 | #define MEHCACHED_ITEMS_PER_BUCKET (7)
 33 | // #define MEHCACHED_ITEMS_PER_BUCKET (15)
 34 | #endif
 35 | 
 36 | // do move-to-head if when (item's distance from tail) >= (pool size) * mth_threshold
 37 | // 0.0: full LRU; 1.0: full FIFO
 38 | #define MEHCACHED_MTH_THRESHOLD_FIFO (1.0)
 39 | #define MEHCACHED_MTH_THRESHOLD_LRU (0.0)
 40 | 
 41 | #define MEHCACHED_SINGLE_ALLOC
 42 | 
 43 | #ifdef MEHCACHED_COLLECT_STATS
 44 | #define MEHCACHED_STAT_INC(table, name) do { __sync_add_and_fetch(&(table)->stats.name, 1); } while (0)
 45 | #define MEHCACHED_STAT_DEC(table, name) do { __sync_sub_and_fetch(&(table)->stats.name, 1); } while (0)
 46 | #else
 47 | #define MEHCACHED_STAT_INC(table, name) do { (void)table; } while (0)
 48 | #define MEHCACHED_STAT_DEC(table, name) do { (void)table; } while (0)
 49 | #endif
 50 | 
 51 | typedef enum _MEHCACHED_RESULT
 52 | {
 53 |     MEHCACHED_OK = 0,
 54 |     MEHCACHED_ERROR,
 55 |     MEHCACHED_FULL,
 56 |     MEHCACHED_EXIST,
 57 |     MEHCACHED_NOT_FOUND,
 58 |     MEHCACHED_PARTIAL_VALUE,
 59 |     MEHCACHED_NOT_PROCESSED,
 60 | } MEHCACHED_RESULT;
 61 | 
 62 | struct mehcached_bucket
 63 | {
 64 |     uint32_t version;   // XXX: is uint32_t wide enough?
 65 |     uint32_t next_extra_bucket_index;   // 1-base; 0 = no extra bucket
 66 |     uint64_t item_vec[MEHCACHED_ITEMS_PER_BUCKET];
 67 | 
 68 |     // 16: tag (1-base)
 69 |     //  8: alloc id
 70 |     // 40: item offset
 71 |     // item == 0: empty item
 72 | 
 73 |     #define MEHCACHED_TAG_MASK (((uint64_t)1 << 16) - 1)
 74 |     #define MEHCACHED_TAG(item_vec) ((item_vec) >> 48)
 75 | 
 76 | #ifndef MEHCACHED_SINGLE_ALLOC
 77 |     #define MEHCACHED_ALLOC_ID_MASK (((uint64_t)1 << 8) - 1)
 78 |     #define MEHCACHED_ALLOC_ID(item_vec) (((item_vec) >> 40) & MEHCACHED_ALLOC_ID_MASK)
 79 | #else
 80 |     #define MEHCACHED_ALLOC_ID(item_vec) (0LU)
 81 | #endif
 82 | 
 83 | #ifndef MEHCACHED_SINGLE_ALLOC
 84 |     #define MEHCACHED_ITEM_OFFSET_MASK (((uint64_t)1 << 40) - 1)
 85 | #else
 86 |     #define MEHCACHED_ITEM_OFFSET_MASK (((uint64_t)1 << 48) - 1)
 87 | #endif
 88 |     #define MEHCACHED_ITEM_OFFSET(item_vec) ((item_vec) & MEHCACHED_ITEM_OFFSET_MASK)
 89 | 
 90 | #ifndef MEHCACHED_SINGLE_ALLOC
 91 |     #define MEHCACHED_ITEM_VEC(tag, alloc_id, item_offset) (((uint64_t)(tag) << 48) | ((uint64_t)(alloc_id) << 40) | (uint64_t)(item_offset))
 92 | #else
 93 |     #define MEHCACHED_ITEM_VEC(tag, alloc_id, item_offset) (((uint64_t)(tag) << 48) | (uint64_t)(item_offset))
 94 | #endif
 95 | };
 96 | 
 97 | struct mehcached_item
 98 | {
 99 |     struct mehcached_alloc_item alloc_item;
100 | 
101 |     uint32_t kv_length_vec; // key_length: 8, value_length: 24; kv_length_vec == 0: empty item
102 | 
103 |     #define MEHCACHED_KEY_MASK (((uint32_t)1 << 8) - 1)
104 |     #define MEHCACHED_KEY_LENGTH(kv_length_vec) ((kv_length_vec) >> 24)
105 | 
106 |     #define MEHCACHED_VALUE_MASK (((uint32_t)1 << 24) - 1)
107 |     #define MEHCACHED_VALUE_LENGTH(kv_length_vec) ((kv_length_vec) & MEHCACHED_VALUE_MASK)
108 | 
109 |     #define MEHCACHED_KV_LENGTH_VEC(key_length, value_length) (((uint32_t)(key_length) << 24) | (uint32_t)(value_length))
110 | 
111 |     // the rest is meaningful only when kv_length_vec != 0
112 |     uint32_t expire_time;
113 |     uint64_t key_hash;
114 |     uint8_t data[0];
115 | };
116 | 
117 | #define MEHCACHED_MAX_POOLS (16)
118 | 
119 | struct mehcached_table
120 | {
121 | #ifdef MEHCACHED_ALLOC_POOL
122 |     struct mehcached_pool alloc[MEHCACHED_MAX_POOLS];
123 |     uint8_t alloc_id_mask;
124 |     uint64_t mth_threshold;
125 | #endif
126 | #ifdef MEHCACHED_ALLOC_MALLOC
127 |     struct mehcached_malloc alloc;
128 | #endif
129 | #ifdef MEHCACHED_ALLOC_DYNAMIC
130 |     struct mehcached_dynamic alloc;
131 | #endif
132 | 
133 |     struct mehcached_bucket *buckets;
134 |     struct mehcached_bucket *extra_buckets; // = (buckets + num_buckets); extra_buckets[0] is not used because index 0 indicates "no more extra bucket"
135 | 
136 |     uint8_t concurrent_access_mode;
137 | 
138 |     uint32_t num_buckets;
139 |     uint32_t num_buckets_mask;
140 |     uint32_t num_extra_buckets;
141 | 
142 |     struct
143 |     {
144 |         uint32_t lock;
145 |         uint32_t head;   // 1-base; 0 = no extra bucket
146 |     } extra_bucket_free_list MEHCACHED_ALIGNED(64);
147 | 
148 |     uint8_t rshift;
149 | 
150 | #ifdef MEHCACHED_COLLECT_STATS
151 |     struct
152 |     {
153 |         size_t count;
154 |         size_t set_nooverwrite;
155 |         size_t set_new;
156 |         size_t set_inplace;
157 |         size_t set_evicted;
158 |         size_t get_found;
159 |         size_t get_notfound;
160 |         size_t test_found;
161 |         size_t test_notfound;
162 |         size_t delete_found;
163 |         size_t delete_notfound;
164 |         size_t cleanup;
165 |         size_t move_to_head_performed;
166 |         size_t move_to_head_skipped;
167 |         size_t move_to_head_failed;
168 |     } stats;
169 | #endif
170 | } MEHCACHED_ALIGNED(64);
171 | 
172 | struct mehcached_prefetch_state
173 | {
174 |     struct mehcached_table *table;
175 |     struct mehcached_bucket *bucket;
176 |     uint64_t key_hash;
177 | };
178 | 
179 | typedef enum _MEHCACHED_OPERATION
180 | {
181 |     MEHCACHED_NOOP_READ = 0,
182 |     MEHCACHED_NOOP_WRITE,
183 |     MEHCACHED_ADD,
184 |     MEHCACHED_SET,
185 |     MEHCACHED_GET,
186 |     MEHCACHED_TEST,
187 |     MEHCACHED_DELETE,
188 |     MEHCACHED_INCREMENT,
189 | } MEHCACHED_OPERATION;
190 | 
191 | struct mehcached_request
192 | {
193 |     // 0
194 |     uint8_t operation;  // of enum MEHCACHED_OPERATION type
195 |     uint8_t result;     // of enum MEHCACHED_RESULT type
196 |     // 2
197 |     uint16_t reserved0;
198 |     // 4
199 |     uint32_t kv_length_vec;
200 |     // 8
201 |     uint64_t key_hash;
202 |     // 16
203 |     uint32_t expire_time;
204 |     // 20
205 |     uint32_t reserved1;
206 |     // 24
207 | };
208 | 
209 | static
210 | void
211 | mehcached_print_bucket(const struct mehcached_bucket *bucket);
212 | 
213 | static
214 | void
215 | mehcached_print_buckets(const struct mehcached_table *table);
216 | 
217 | static
218 | void
219 | mehcached_print_stats(const struct mehcached_table *table);
220 | 
221 | static
222 | void
223 | mehcached_reset_table_stats(struct mehcached_table *table);
224 | 
225 | static
226 | uint32_t
227 | mehcached_calc_bucket_index(const struct mehcached_table *table, uint64_t key_hash);
228 | 
229 | static
230 | uint16_t
231 | mehcached_calc_tag(uint64_t key_hash);
232 | 
233 | static
234 | void
235 | mehcached_set_item(struct mehcached_item *item, uint64_t key_hash, const uint8_t *key, uint32_t key_length, const uint8_t *value, uint32_t value_length, uint32_t expire_time);
236 | 
237 | static
238 | void
239 | mehcached_set_item_value(struct mehcached_item *item, const uint8_t *value, uint32_t value_length, uint32_t expire_time);
240 | 
241 | static
242 | bool
243 | mehcached_compare_keys(const uint8_t *key1, size_t key1_len, const uint8_t *key2, size_t key2_len);
244 | 
245 | static
246 | void
247 | mehcached_cleanup_all(uint8_t current_alloc_id, struct mehcached_table *table);
248 | 
249 | static
250 | void
251 | mehcached_prefetch_table(struct mehcached_table *table, uint64_t key_hash, struct mehcached_prefetch_state *out_prefetch_state);
252 | 
253 | static
254 | void
255 | mehcached_prefetch_alloc(struct mehcached_prefetch_state *in_out_prefetch_state);
256 | 
257 | static
258 | bool
259 | mehcached_get(uint8_t current_alloc_id, struct mehcached_table *table, uint64_t key_hash, const uint8_t *key, size_t key_length, uint8_t *out_value, size_t *in_out_value_length, uint32_t *out_expire_time, bool readonly);
260 | 
261 | static
262 | bool
263 | mehcached_test(uint8_t current_alloc_id, struct mehcached_table *table, uint64_t key_hash, const uint8_t *key, size_t key_length);
264 | 
265 | static
266 | bool
267 | mehcached_set(uint8_t current_alloc_id, struct mehcached_table *table, uint64_t key_hash, const uint8_t *key, size_t key_length, const uint8_t *value, size_t value_length, uint32_t expire_time, bool overwrite);
268 | 
269 | static
270 | bool
271 | mehcached_delete(uint8_t current_alloc_id, struct mehcached_table *table, uint64_t key_hash, const uint8_t *key, size_t key_length);
272 | 
273 | static
274 | bool
275 | mehcached_increment(uint8_t current_alloc_id, struct mehcached_table *table, uint64_t key_hash, const uint8_t *key, size_t key_length, uint64_t increment, uint64_t *out_new_value, uint32_t expire_time);
276 | 
277 | static
278 | void
279 | mehcached_process_batch(uint8_t current_alloc_id, struct mehcached_table *table, struct mehcached_request *requests, size_t num_requests, const uint8_t *in_data, uint8_t *out_data, size_t *out_data_length, bool readonly);
280 | 
281 | static
282 | void
283 | mehcached_table_reset(struct mehcached_table *table);
284 | 
285 | static
286 | void
287 | mehcached_table_init(struct mehcached_table *table, size_t num_buckets, size_t num_pools, size_t pool_size, bool concurrent_table_read, bool concurrent_table_write, bool concurrent_alloc_write, size_t table_numa_node, size_t alloc_numa_nodes[], double mth_threshold);
288 | 
289 | static
290 | void
291 | mehcached_table_free(struct mehcached_table *table);
292 | 
293 | MEHCACHED_END
294 | 
295 | 


--------------------------------------------------------------------------------
/src/test.c:
--------------------------------------------------------------------------------
 1 | // Copyright 2014 Carnegie Mellon University
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | 
15 | #include <stdio.h>
16 | #include <assert.h>
17 | 
18 | #include "mehcached.h"
19 | #include "hash.h"
20 | 
21 | void
22 | test_basic()
23 | {
24 |     printf("test_basic()\n");
25 | 
26 |     struct mehcached_table table_o;
27 |     struct mehcached_table *table = &table_o;
28 |     size_t numa_nodes[] = {(size_t)-1};
29 |     mehcached_table_init(table, 1, 1, 256, false, false, false, numa_nodes[0], numa_nodes, MEHCACHED_MTH_THRESHOLD_FIFO);
30 |     assert(table);
31 | 
32 |     size_t i;
33 |     for (i = 0; i < MEHCACHED_ITEMS_PER_BUCKET; i++)
34 |     {
35 |         size_t key = i;
36 |         size_t value = i;
37 |         uint64_t key_hash = hash((const uint8_t *)&key, sizeof(key));
38 |         //printf("add key = %zu, value = %zu, key_hash = %lx\n", key, value, key_hash);
39 | 
40 |         if (!mehcached_set(0, table, key_hash, (const uint8_t *)&key, sizeof(key), (const uint8_t *)&value, sizeof(value), 0, false))
41 |             assert(false);
42 |     }
43 |     for (i = 0; i < MEHCACHED_ITEMS_PER_BUCKET; i++)
44 |     {
45 |         size_t key = i;
46 |         size_t value = 100 + i;
47 |         uint64_t key_hash = hash((const uint8_t *)&key, sizeof(key));
48 |         //printf("set key = %zu, value = %zu, key_hash = %lx\n", key, value, key_hash);
49 | 
50 |         if (!mehcached_set(0, table, key_hash, (const uint8_t *)&key, sizeof(key), (const uint8_t *)&value, sizeof(value), 0, true))
51 |             assert(false);
52 |     }
53 | 
54 |     size_t value = 0;
55 |     for (i = 0; i < MEHCACHED_ITEMS_PER_BUCKET; i++)
56 |     {
57 |         size_t key = i;
58 |         uint64_t key_hash = hash((const uint8_t *)&key, sizeof(key));
59 | 
60 |         size_t value_length = sizeof(value);
61 |         if (!mehcached_get(0, table, key_hash, (const uint8_t *)&key, sizeof(key), (uint8_t *)&value, &value_length, NULL, false))
62 |         {
63 |             printf("get key = %zu, value = <not found>\n", key);
64 |             continue;
65 |         }
66 |         assert(value_length == sizeof(value));
67 |         printf("get key = %zu, value = %zu\n", key, value);
68 |     }
69 | 
70 |     mehcached_print_stats(table);
71 | 
72 |     mehcached_table_free(table);
73 | }
74 | 
75 | int
76 | main(int argc MEHCACHED_UNUSED, const char *argv[] MEHCACHED_UNUSED)
77 | {
78 | 	const size_t page_size = 1048576 * 2;
79 | 	const size_t num_numa_nodes = 2;
80 |     const size_t num_pages_to_try = 16384;
81 |     const size_t num_pages_to_reserve = 16384 - 2048;   // give 2048 pages to dpdk
82 | 
83 | 	mehcached_shm_init(page_size, num_numa_nodes, num_pages_to_try, num_pages_to_reserve);
84 | 
85 |     test_basic();
86 | 
87 |     return EXIT_SUCCESS;
88 | }
89 | 
90 | 


--------------------------------------------------------------------------------
/src/util.c:
--------------------------------------------------------------------------------
 1 | // Copyright 2014 Carnegie Mellon University
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | 
15 | #include "util.h"
16 | 
17 | 


--------------------------------------------------------------------------------
/src/util.h:
--------------------------------------------------------------------------------
  1 | // Copyright 2014 Carnegie Mellon University
  2 | //
  3 | // Licensed under the Apache License, Version 2.0 (the "License");
  4 | // you may not use this file except in compliance with the License.
  5 | // You may obtain a copy of the License at
  6 | //
  7 | //     http://www.apache.org/licenses/LICENSE-2.0
  8 | //
  9 | // Unless required by applicable law or agreed to in writing, software
 10 | // distributed under the License is distributed on an "AS IS" BASIS,
 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | // See the License for the specific language governing permissions and
 13 | // limitations under the License.
 14 | 
 15 | #pragma once
 16 | 
 17 | #include "common.h"
 18 | #include "shm.h"
 19 | 
 20 | #include <assert.h>
 21 | 
 22 | #include <stdlib.h>
 23 | #include <string.h>
 24 | 
 25 | #include <sys/time.h>
 26 | #include <sys/resource.h>
 27 | 
 28 | #ifdef USE_DPDK
 29 | #include <rte_memcpy.h>
 30 | #endif
 31 | 
 32 | #define MEHCACHED_ROUNDUP8(x) (((x) + 7UL) & (~7UL))
 33 | #define MEHCACHED_ROUNDUP64(x) (((x) + 63UL) & (~63UL))
 34 | #define MEHCACHED_ROUNDUP4K(x) (((x) + 4095UL) & (~4095UL))
 35 | #define MEHCACHED_ROUNDUP1M(x) (((x) + 1048575UL) & (~1048575UL))
 36 | #define MEHCACHED_ROUNDUP2M(x) (((x) + 2097151UL) & (~2097151UL))
 37 | 
 38 | MEHCACHED_BEGIN
 39 | 
 40 | static
 41 | size_t
 42 | mehcached_next_power_of_two(size_t v)
 43 | {
 44 |     size_t s = 0;
 45 |     while (((size_t)1 << s) < v)
 46 |         s++;
 47 |     return (size_t)1 << s;
 48 | }
 49 | 
 50 | static
 51 | void
 52 | memory_barrier()
 53 | {
 54 |     asm volatile("" ::: "memory");
 55 | }
 56 | 
 57 | static
 58 | void
 59 | mehcached_memcpy8(uint8_t *dest, const uint8_t *src, size_t length)
 60 | {
 61 |     length = MEHCACHED_ROUNDUP8(length);
 62 | #ifndef USE_DPDK
 63 |     switch (length >> 3)
 64 |     {
 65 |         case 0:
 66 |             break;
 67 |         case 1:
 68 |             *(uint64_t *)(dest + 0) = *(const uint64_t *)(src + 0);
 69 |             break;
 70 |         case 2:
 71 |             *(uint64_t *)(dest + 0) = *(const uint64_t *)(src + 0);
 72 |             *(uint64_t *)(dest + 8) = *(const uint64_t *)(src + 8);
 73 |             break;
 74 |         case 3:
 75 |             *(uint64_t *)(dest + 0) = *(const uint64_t *)(src + 0);
 76 |             *(uint64_t *)(dest + 8) = *(const uint64_t *)(src + 8);
 77 |             *(uint64_t *)(dest + 16) = *(const uint64_t *)(src + 16);
 78 |             break;
 79 |         case 4:
 80 |             *(uint64_t *)(dest + 0) = *(const uint64_t *)(src + 0);
 81 |             *(uint64_t *)(dest + 8) = *(const uint64_t *)(src + 8);
 82 |             *(uint64_t *)(dest + 16) = *(const uint64_t *)(src + 16);
 83 |             *(uint64_t *)(dest + 24) = *(const uint64_t *)(src + 24);
 84 |             break;
 85 |         default:
 86 |             memcpy(dest, src, length);
 87 |             break;
 88 |     }
 89 | #else
 90 |     rte_memcpy(dest, src, length);
 91 | #endif
 92 | }
 93 | 
 94 | static
 95 | bool
 96 | mehcached_memcmp8(const uint8_t *dest, const uint8_t *src, size_t length)
 97 | {
 98 |     length = MEHCACHED_ROUNDUP8(length);
 99 |     switch (length >> 3)
100 |     {
101 |         case 0:
102 |             return true;
103 |         case 1:
104 |             if (*(const uint64_t *)(dest + 0) != *(const uint64_t *)(src + 0))
105 |                 return false;
106 |             return true;
107 |         case 2:
108 |             if (*(const uint64_t *)(dest + 0) != *(const uint64_t *)(src + 0))
109 |                 return false;
110 |             if (*(const uint64_t *)(dest + 8) != *(const uint64_t *)(src + 8))
111 |                 return false;
112 |             return true;
113 |         case 3:
114 |             if (*(const uint64_t *)(dest + 0) != *(const uint64_t *)(src + 0))
115 |                 return false;
116 |             if (*(const uint64_t *)(dest + 8) != *(const uint64_t *)(src + 8))
117 |                 return false;
118 |             if (*(const uint64_t *)(dest + 16) != *(const uint64_t *)(src + 16))
119 |                 return false;
120 |             return true;
121 |         case 4:
122 |             if (*(const uint64_t *)(dest + 0) != *(const uint64_t *)(src + 0))
123 |                 return false;
124 |             if (*(const uint64_t *)(dest + 8) != *(const uint64_t *)(src + 8))
125 |                 return false;
126 |             if (*(const uint64_t *)(dest + 16) != *(const uint64_t *)(src + 16))
127 |                 return false;
128 |             if (*(const uint64_t *)(dest + 24) != *(const uint64_t *)(src + 24))
129 |                 return false;
130 |             return true;
131 |         default:
132 |             return memcmp(dest, src, length) == 0;
133 |     }
134 | }
135 | 
136 | static
137 | uint32_t
138 | mehcached_rand(uint64_t *state)
139 | {
140 |     // same as Java's
141 |     *state = (*state * 0x5deece66dUL + 0xbUL) & ((1UL << 48) - 1);
142 |     return (uint32_t)(*state >> (48 - 32));
143 | }
144 | 
145 | static
146 | double
147 | mehcached_rand_d(uint64_t *state)
148 | {
149 |     // caution: this is maybe too non-random
150 |     *state = (*state * 0x5deece66dUL + 0xbUL) & ((1UL << 48) - 1);
151 |     return (double)*state / (double)((1UL << 48) - 1);
152 | }
153 | 
154 | static
155 | size_t
156 | mehcached_get_memuse()
157 | {
158 |     struct rusage usage;
159 |     int ret = getrusage(RUSAGE_SELF, &usage);
160 |     assert(ret == 0);
161 |     (void)ret;
162 |     return (size_t)usage.ru_maxrss * 1024 + mehcached_shm_get_memuse();
163 | }
164 | 
165 | #include <rte_launch.h>
166 | #include <rte_eal.h>
167 | #include <rte_lcore.h>
168 | #include <rte_byteorder.h>
169 | #include <rte_log.h>
170 | #include <rte_malloc.h>
171 | #include <rte_debug.h>
172 | 
173 | // use this for EAL-related memory allocation only (use mehcached_shm_malloc* instead for other cases)
174 | struct mehcached_eal_malloc_arg
175 | {
176 |     size_t size;
177 |     void *ret;
178 | };
179 | 
180 | static
181 | int
182 | mehcached_eal_malloc_lcore_internal(void *arg)
183 | {
184 |     struct mehcached_eal_malloc_arg *malloc_arg = (struct mehcached_eal_malloc_arg *)arg;
185 |     malloc_arg->ret = rte_malloc(NULL, malloc_arg->size, 0);
186 |     return 0;
187 | }
188 | 
189 | static
190 | void *
191 | mehcached_eal_malloc_lcore(size_t size, size_t lcore)
192 | {
193 |     struct mehcached_eal_malloc_arg malloc_arg;
194 |     malloc_arg.size = size;
195 |     if (lcore == rte_lcore_id())
196 |         mehcached_eal_malloc_lcore_internal(&malloc_arg);
197 |     else
198 |     {
199 |         assert(rte_lcore_id() == rte_get_master_lcore());
200 |         rte_eal_remote_launch(mehcached_eal_malloc_lcore_internal, &malloc_arg, (unsigned int)lcore);
201 |         rte_eal_mp_wait_lcore();
202 |     }
203 |     return malloc_arg.ret;
204 | }
205 | 
206 | static
207 | void
208 | rte_eal_launch(lcore_function_t *f, void *arg, unsigned int core_id)
209 | {
210 |     if (core_id == rte_lcore_id())
211 |         f(arg);
212 |     else
213 |         rte_eal_remote_launch(f, arg, core_id);
214 | }
215 | 
216 | MEHCACHED_END
217 | 
218 | 


--------------------------------------------------------------------------------
/src/zipf.h:
--------------------------------------------------------------------------------
  1 | // Copyright 2014 Carnegie Mellon University
  2 | //
  3 | // Licensed under the Apache License, Version 2.0 (the "License");
  4 | // you may not use this file except in compliance with the License.
  5 | // You may obtain a copy of the License at
  6 | //
  7 | //     http://www.apache.org/licenses/LICENSE-2.0
  8 | //
  9 | // Unless required by applicable law or agreed to in writing, software
 10 | // distributed under the License is distributed on an "AS IS" BASIS,
 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | // See the License for the specific language governing permissions and
 13 | // limitations under the License.
 14 | 
 15 | #pragma once
 16 | 
 17 | #include <stdlib.h>
 18 | #include <string.h>
 19 | #include <math.h>
 20 | #include <stdio.h>
 21 | #include "util.h"
 22 | 
 23 | struct zipf_gen_state
 24 | {
 25 | 	uint64_t n;			// number of items (input)
 26 | 	double theta;		// skewness (input) in (0, 1); or, 0 = uniform, 1 = always zero
 27 | 	double alpha;		// only depends on theta
 28 | 	double thres;		// only depends on theta
 29 | 	uint64_t last_n;	// last n used to calculate the following
 30 | 	double dbl_n;
 31 | 	double zetan;
 32 | 	double eta;
 33 | 	// unsigned short rand_state[3];		// prng state
 34 | 	uint64_t rand_state;
 35 | };
 36 | 
 37 | static
 38 | double
 39 | mehcached_pow_approx(double a, double b)
 40 | {
 41 | 	// from http://martin.ankerl.com/2012/01/25/optimized-approximative-pow-in-c-and-cpp/
 42 | 
 43 | 	// calculate approximation with fraction of the exponent
 44 | 	int e = (int)b;
 45 | 	union
 46 | 	{
 47 | 		double d;
 48 | 		int x[2];
 49 | 	} u = { a };
 50 | 	u.x[1] = (int)((b - (double)e) * (double)(u.x[1] - 1072632447) + 1072632447.);
 51 | 	u.x[0] = 0;
 52 | 
 53 | 	// exponentiation by squaring with the exponent's integer part
 54 | 	// double r = u.d makes everything much slower, not sure why
 55 | 	// TODO: use popcount?
 56 | 	double r = 1.;
 57 | 	while (e)
 58 | 	{
 59 | 		if (e & 1)
 60 | 			r *= a;
 61 | 		a *= a;
 62 | 		e >>= 1;
 63 | 	}
 64 | 
 65 | 	return r * u.d;
 66 | }
 67 | 
 68 | static
 69 | void
 70 | mehcached_zipf_init(struct zipf_gen_state *state, uint64_t n, double theta, uint64_t rand_seed)
 71 | {
 72 | 	assert(n > 0);
 73 | 	if (theta > 0.992 && theta < 1)
 74 | 		fprintf(stderr, "theta > 0.992 will be inaccurate due to approximation\n");
 75 | 	if (theta >= 1. && theta < 40.)
 76 | 	{
 77 | 		fprintf(stderr, "theta in [1., 40.) is not supported\n");
 78 | 		assert(false);
 79 | 	}
 80 | 	assert(theta == -1. || (theta >= 0. && theta < 1.) || theta >= 40.);
 81 | 	assert(rand_seed < (1UL << 48));
 82 | 	memset(state, 0, sizeof(struct zipf_gen_state));
 83 | 	state->n = n;
 84 | 	state->theta = theta;
 85 | 	if (theta == -1.)
 86 | 		rand_seed = rand_seed % n;
 87 | 	else if (theta > 0. && theta < 1.)
 88 | 	{
 89 | 		state->alpha = 1. / (1. - theta);
 90 | 		state->thres = 1. + mehcached_pow_approx(0.5, theta);
 91 | 	}
 92 | 	else
 93 | 	{
 94 | 		state->alpha = 0.;	// unused
 95 | 		state->thres = 0.;	// unused
 96 | 	}
 97 | 	state->last_n = 0;
 98 | 	state->zetan = 0.;
 99 | 	// state->rand_state[0] = (unsigned short)(rand_seed >> 0);
100 | 	// state->rand_state[1] = (unsigned short)(rand_seed >> 16);
101 | 	// state->rand_state[2] = (unsigned short)(rand_seed >> 32);
102 | 	state->rand_state = rand_seed;
103 | }
104 | 
105 | static
106 | void
107 | mehcached_zipf_init_copy(struct zipf_gen_state *state, const struct zipf_gen_state *src_state, uint64_t rand_seed)
108 | {
109 | 	assert(rand_seed < (1UL << 48));
110 | 	memcpy(state, src_state, sizeof(struct zipf_gen_state));
111 | 	// state->rand_state[0] = (unsigned short)(rand_seed >> 0);
112 | 	// state->rand_state[1] = (unsigned short)(rand_seed >> 16);
113 | 	// state->rand_state[2] = (unsigned short)(rand_seed >> 32);
114 | 	state->rand_state = rand_seed;
115 | }
116 | 
117 | static
118 | void
119 | mehcached_zipf_change_n(struct zipf_gen_state *state, uint64_t n)
120 | {
121 | 	state->n = n;
122 | }
123 | 
124 | static
125 | double
126 | mehcached_zeta(uint64_t last_n, double last_sum, uint64_t n, double theta)
127 | {
128 | 	if (last_n > n)
129 | 	{
130 | 		last_n = 0;
131 | 		last_sum = 0.;
132 | 	}
133 | 	while (last_n < n)
134 | 	{
135 | 		last_sum += 1. / mehcached_pow_approx((double)last_n + 1., theta);
136 | 		last_n++;
137 | 	}
138 | 	return last_sum;
139 | }
140 | 
141 | static
142 | uint64_t
143 | mehcached_zipf_next(struct zipf_gen_state *state)
144 | {
145 | 	if (state->last_n != state->n)
146 | 	{
147 | 		if (state->theta > 0. && state->theta < 1.)
148 | 		{
149 | 			state->zetan = mehcached_zeta(state->last_n, state->zetan, state->n, state->theta);
150 | 			state->eta = (1. - mehcached_pow_approx(2. / (double)state->n, 1. - state->theta)) /
151 | 						 (1. - mehcached_zeta(0, 0., 2, state->theta) / state->zetan);
152 | 		}
153 | 		state->last_n = state->n;
154 | 		state->dbl_n = (double)state->n;
155 | 	}
156 | 
157 | 	if (state->theta == -1.)
158 | 	{
159 | 		uint64_t v = state->rand_state;
160 | 		if (++state->rand_state >= state->n)
161 | 			state->rand_state = 0;
162 | 		return v;
163 | 	}
164 | 	else if (state->theta == 0.)
165 | 	{
166 | 		double u = mehcached_rand_d(&state->rand_state);
167 | 		return (uint64_t)(state->dbl_n * u);
168 | 	}
169 | 	else if (state->theta >= 40.)
170 | 	{
171 | 		return 0UL;
172 | 	}
173 | 	else
174 | 	{
175 | 		// from J. Gray et al. Quickly generating billion-record synthetic databases. In SIGMOD, 1994.
176 | 
177 | 		// double u = erand48(state->rand_state);
178 | 		double u = mehcached_rand_d(&state->rand_state);
179 | 		double uz = u * state->zetan;
180 | 		if (uz < 1.)
181 | 			return 0UL;
182 | 		else if (uz < state->thres)
183 | 			return 1UL;
184 | 		else
185 | 			return (uint64_t)(state->dbl_n * mehcached_pow_approx(state->eta * (u - 1.) + 1., state->alpha));
186 | 	}
187 | }
188 | 
189 | static
190 | void
191 | mehcached_test_zipf(double theta)
192 | {
193 | 	double zetan = 0.;
194 | 	const uint64_t n = 1000000UL;
195 | 	uint64_t i;
196 | 
197 | 	for (i = 0; i < n; i++)
198 | 		zetan += 1. / pow((double)i + 1., theta);
199 | 
200 | 	struct zipf_gen_state state;
201 | 	if (theta < 1. || theta >= 40.)
202 | 		mehcached_zipf_init(&state, n, theta, 0);
203 | 
204 | 	uint64_t num_key0 = 0;
205 | 	const uint64_t num_samples = 10000000UL;
206 | 	if (theta < 1. || theta >= 40.)
207 | 	{
208 | 		for (i = 0; i < num_samples; i++)
209 | 			if (mehcached_zipf_next(&state) == 0)
210 | 				num_key0++;
211 | 	}
212 | 
213 | 	printf("theta = %lf; using pow(): %.10lf", theta, 1. / zetan);
214 | 	if (theta < 1. || theta >= 40.)
215 | 		printf(", using approx-pow(): %.10lf", (double)num_key0 / (double)num_samples);
216 | 	printf("\n");
217 | }
218 | 


--------------------------------------------------------------------------------