├── CMakeLists.txt ├── DEBBGED.md ├── Dockerfile ├── LICENSE ├── README.md ├── config.h ├── ctpl.h ├── gd.cc ├── grad.cc ├── grad.h ├── input.cc ├── input.h ├── jit.cc ├── jit.h ├── pro.log ├── rgd.cc ├── rgd.proto ├── rgdJit.h ├── rgd_op.h ├── test.h ├── util.cc ├── util.h ├── wheels ├── concurrentqueue │ └── queue.h ├── lockfreehash │ ├── cuckoo │ │ ├── Makefile │ │ ├── benchmark_lockfree_ht.h │ │ ├── benchmark_unordered_map.h │ │ ├── cycle_timer.h │ │ ├── hash_table.h │ │ ├── lockfree_hash_table.cpp │ │ ├── lockfree_hash_table.h │ │ ├── main.cpp │ │ └── thread_service.h │ └── lprobe │ │ ├── Makefile │ │ ├── alloc.h │ │ ├── benchmark_lprobe.h │ │ ├── benchmark_lprobe_ptr.h │ │ ├── block_allocator.h │ │ ├── concurrent_stack.h │ │ ├── cycle_timer.h │ │ ├── data.h │ │ ├── data_ptr.h │ │ ├── get_time.h │ │ ├── hash_table.h │ │ ├── main.cc │ │ ├── memory_size.h │ │ ├── monoid.h │ │ ├── parallel.h │ │ ├── seq.h │ │ ├── sequence_ops.h │ │ ├── thread_service.h │ │ ├── thread_service_ptr.h │ │ └── utilities.h └── threadpool │ ├── ThreadPool.h │ ├── ctpl.h │ └── threadpool_example.cpp ├── xxhash.cc └── xxhash.h /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.5.1) 2 | 3 | project(rgd C CXX) 4 | 5 | set(CMAKE_CXX_STANDARD 14) 6 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -g -std=c++14 -mcx16 -march=native -fno-builtin-malloc -fno-builtin-calloc -fno-builtin-realloc -fno-builtin-free") 7 | # set(CMAKE_LINKER_FLAGS "${CMAKE_LINKER_FLAGS} -fsanitize=address") 8 | set(CMAKE_TEST_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin/test) 9 | 10 | find_package(LLVM 12 REQUIRED CONFIG) 11 | 12 | if (LLVM_FOUND) 13 | message(STATUS "Found LLVM ${LLVM_PACKAGE_VERSION}") 14 | message(STATUS "Using LLVMConfig.cmake in: ${LLVM_DIR}") 15 | else() 16 | message(FATAL_ERROR "You haven't install LLVM !") 17 | endif() 18 | 19 | 20 | include_directories(${LLVM_INCLUDE_DIRS}) 21 | add_definitions(${LLVM_DEFINITIONS}) 22 | 23 | # Proto file 24 | get_filename_component(rgd_proto "rgd.proto" ABSOLUTE) 25 | get_filename_component(rgd_proto_path "${rgd_proto}" PATH) 26 | 27 | add_custom_target(proto ALL DEPENDS ${rgd_proto_srcs} ${rgd_proto_hdrs}) 28 | # Generated sources 29 | set(rgd_proto_srcs "${CMAKE_CURRENT_BINARY_DIR}/rgd.pb.cc") 30 | set(rgd_proto_hdrs "${CMAKE_CURRENT_BINARY_DIR}/rgd.pb.h") 31 | add_custom_command( 32 | OUTPUT "${rgd_proto_srcs}" "${rgd_proto_hdrs}" 33 | COMMAND protoc 34 | ARGS --cpp_out "${CMAKE_CURRENT_BINARY_DIR}" 35 | -I "${rgd_proto_path}" 36 | "${rgd_proto}" 37 | DEPENDS "${rgd_proto}") 38 | 39 | # Include generated *.pb.h files 40 | include_directories("${CMAKE_CURRENT_BINARY_DIR}") 41 | 42 | add_library(gd 43 | gd.cc 44 | input.cc 45 | grad.cc 46 | ) 47 | 48 | add_executable(rgd 49 | rgd.cc 50 | jit.cc 51 | util.cc 52 | xxhash.cc 53 | ${rgd_proto_srcs}) 54 | 55 | target_link_libraries(rgd 56 | protobuf 57 | tcmalloc 58 | profiler 59 | gd 60 | LLVM 61 | pthread 62 | boost_system 63 | boost_filesystem) 64 | -------------------------------------------------------------------------------- /DEBBGED.md: -------------------------------------------------------------------------------- 1 | ## AST encoding 2 | 3 | Previsouly we only consider non-leaf parts of AST in the function cache. This becomes a problem when cache is used with tree-deduplication. 4 | 5 | Consider two expressions Sub(Add(A+B),A) and Sub(Add(A+B),B), where A, B are inputs. When the first query is processed, the JITTed function Sub(Add(arg0,arg1),arg0) will be saved in the cache. Because of tree-dedupliation, then second expression will be parsed as Sub(Add(arg0,arg1),arg1). But because only non-leaf are considered, the JITed function saved for the first function will be used, resulting a function mismatch. 6 | 7 | In the new design, we traverse the tree using the post-order. We assigns index for each inputs and each constant and then include the indices for comparing the AST tree. In this way, the first expression and second expression will be different functions in the cache. 8 | 9 | ## New handling of relational expressions for better perf. in solving nested branches 10 | 11 | In the context of nested branches, it is common to see the exactly opposite checks in the constraints. (e.g Ult(a,b) and Uge (a,b)). In the previsou design, Ult(a,b) and Uge(a,b) are compiled as different function, resulting a lot of cache miss. To solve this issue, we compile the same function for Ult(a,b) and Uge(a,b), which just outputs zext(a) and zext(b). And then we calculates the distance by invoking the JIT function. 12 | 13 | | Comparison | JITed function | 14 | |--------------------------------------------------------------|--------------------------| 15 | | ult(a,b) ugt(a,b) ule(a,b) uge(a,b) equal(a,b) distance(a,b) | Outputs zext(a), zext(b) | 16 | | slt(a,b) sgt(a,b) sle(a,b) sge(a,b) | Outputs sext(a), sext(b) | 17 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ubuntu:18.04 2 | 3 | RUN apt-get update 4 | RUN apt-get install -y cmake build-essential autoconf git libtool python3-setuptools libboost-all-dev 5 | RUN apt-get install -y golang 6 | RUN apt-get install -y clang-9 libclang-9-dev llvm-9-dev curl gperf libgmp-dev 7 | RUN apt-get install -y cmake bison flex libboost-all-dev python perl minisat 8 | 9 | #protobuf 10 | RUN git clone https://github.com/protocolbuffers/protobuf.git /protobuf && \ 11 | cd /protobuf && \ 12 | git submodule update --init --recursive && \ 13 | ./autogen.sh && \ 14 | ./configure && \ 15 | make -j && \ 16 | make install && \ 17 | ldconfig 18 | 19 | 20 | 21 | WORKDIR /src 22 | #gperf 23 | RUN cd /src && git clone https://github.com/gperftools/gperftools.git && cd gperftools && \ 24 | git checkout gperftools-2.9.1 && ./autogen.sh && ./configure && make -j && make install && ldconfig 25 | 26 | COPY . /src/jigsaw 27 | 28 | RUN cd /src/jigsaw/ && mkdir build && cd build && cmake .. && make rgd -j 29 | 30 | 31 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # JIGSAW 2 | 3 | **Build** 4 | ``` 5 | mkdir build 6 | cd build && camke .. && make 7 | ``` 8 | 9 | **Using Docker** 10 | 11 | ``` 12 | docker build -t jigsaw-test . 13 | 14 | # copy constraints files to /out/readelf inside the container 15 | docker run jigsaw-test /src/jigsaw/build/rgd 1 0 /out/readelf 16 | ``` 17 | 18 | **Replay from constraints files** 19 | ``` 20 | Command: 21 | ./rgd num_of_threads pin_core_start test_dir 22 | 23 | Example: 24 | # solve objdump constraints using 8 cores, starting from core 0 25 | ./rgd 8 0 objdump 26 | ``` 27 | 28 | **Constraints Files** 29 | 30 | https://jigsaw.cs.ucr.edu 31 | 32 | 33 | -------------------------------------------------------------------------------- /config.h: -------------------------------------------------------------------------------- 1 | #ifndef CONFIG_H_ 2 | #define CONFIG_H_ 3 | #define MAX_NUM_MINIMAL_OPTIMA_ROUND 32 4 | #define MAX_EXEC_TIMES 1000 5 | #endif 6 | -------------------------------------------------------------------------------- /ctpl.h: -------------------------------------------------------------------------------- 1 | 2 | /********************************************************* 3 | * 4 | * Copyright (C) 2014 by Vitaliy Vitsentiy 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); 7 | * you may not use this file except in compliance with the License. 8 | * You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | * 18 | *********************************************************/ 19 | 20 | 21 | #ifndef __ctpl_thread_pool_H__ 22 | #define __ctpl_thread_pool_H__ 23 | 24 | #include 25 | #include 26 | #include 27 | #include 28 | #include 29 | #include 30 | #include 31 | #include 32 | #include 33 | 34 | 35 | #ifndef _ctplThreadPoolLength_ 36 | #define _ctplThreadPoolLength_ 2000000 37 | #endif 38 | 39 | 40 | // thread pool to run user's functors with signature 41 | // ret func(int id, other_params) 42 | // where id is the index of the thread that runs the functor 43 | // ret is some return type 44 | 45 | 46 | namespace ctpl { 47 | 48 | class thread_pool { 49 | 50 | public: 51 | 52 | thread_pool() : q(_ctplThreadPoolLength_) { this->init(); } 53 | thread_pool(int nThreads, int pin_core_start, int queueSize = 2000000) : core_start_(pin_core_start), q(queueSize) { this->init(); this->resize(nThreads); } 54 | 55 | // the destructor waits for all the functions in the queue to be finished 56 | ~thread_pool() { 57 | this->stop(true); 58 | } 59 | 60 | // get the number of running threads in the pool 61 | int size() { return static_cast(this->threads.size()); } 62 | 63 | // number of idle threads 64 | int n_idle() { return this->nWaiting; } 65 | std::thread & get_thread(int i) { return *this->threads[i]; } 66 | 67 | // change the number of threads in the pool 68 | // should be called from one thread, otherwise be careful to not interleave, also with this->stop() 69 | // nThreads must be >= 0 70 | void resize(int nThreads) { 71 | if (!this->isStop && !this->isDone) { 72 | int oldNThreads = static_cast(this->threads.size()); 73 | if (oldNThreads <= nThreads) { // if the number of threads is increased 74 | this->threads.resize(nThreads); 75 | this->flags.resize(nThreads); 76 | 77 | for (int i = oldNThreads; i < nThreads; ++i) { 78 | this->flags[i] = std::make_shared>(false); 79 | this->set_thread(i); 80 | } 81 | } 82 | else { // the number of threads is decreased 83 | for (int i = oldNThreads - 1; i >= nThreads; --i) { 84 | *this->flags[i] = true; // this thread will finish 85 | this->threads[i]->detach(); 86 | } 87 | { 88 | // stop the detached threads that were waiting 89 | std::unique_lock lock(this->mutex); 90 | this->cv.notify_all(); 91 | } 92 | this->threads.resize(nThreads); // safe to delete because the threads are detached 93 | this->flags.resize(nThreads); // safe to delete because the threads have copies of shared_ptr of the flags, not originals 94 | } 95 | } 96 | } 97 | 98 | // empty the queue 99 | void clear_queue() { 100 | std::function * _f; 101 | while (this->q.pop(_f)) 102 | delete _f; // empty the queue 103 | } 104 | 105 | // pops a functional wraper to the original function 106 | std::function pop() { 107 | std::function * _f = nullptr; 108 | this->q.pop(_f); 109 | std::unique_ptr> func(_f); // at return, delete the function even if an exception occurred 110 | 111 | std::function f; 112 | if (_f) 113 | f = *_f; 114 | return f; 115 | } 116 | 117 | 118 | // wait for all computing threads to finish and stop all threads 119 | // may be called asyncronously to not pause the calling thread while waiting 120 | // if isWait == true, all the functions in the queue are run, otherwise the queue is cleared without running the functions 121 | void stop(bool isWait = false) { 122 | if (!isWait) { 123 | if (this->isStop) 124 | return; 125 | this->isStop = true; 126 | for (int i = 0, n = this->size(); i < n; ++i) { 127 | *this->flags[i] = true; // command the threads to stop 128 | } 129 | this->clear_queue(); // empty the queue 130 | } 131 | else { 132 | if (this->isDone || this->isStop) 133 | return; 134 | this->isDone = true; // give the waiting threads a command to finish 135 | } 136 | { 137 | std::unique_lock lock(this->mutex); 138 | this->cv.notify_all(); // stop all waiting threads 139 | } 140 | for (int i = 0; i < static_cast(this->threads.size()); ++i) { // wait for the computing threads to finish 141 | if (this->threads[i]->joinable()) 142 | this->threads[i]->join(); 143 | } 144 | // if there were no threads in the pool but some functors in the queue, the functors are not deleted by the threads 145 | // therefore delete them here 146 | this->clear_queue(); 147 | this->threads.clear(); 148 | this->flags.clear(); 149 | } 150 | 151 | template 152 | auto push(F && f, Rest&&... rest) ->std::future { 153 | auto pck = std::make_shared>( 154 | std::bind(std::forward(f), std::placeholders::_1, std::forward(rest)...) 155 | ); 156 | 157 | auto _f = new std::function([pck](int id) { 158 | (*pck)(id); 159 | }); 160 | this->q.push(_f); 161 | 162 | std::unique_lock lock(this->mutex); 163 | this->cv.notify_one(); 164 | 165 | return pck->get_future(); 166 | } 167 | 168 | // run the user's function that excepts argument int - id of the running thread. returned value is templatized 169 | // operator returns std::future, where the user can get the result and rethrow the catched exceptins 170 | template 171 | auto push(F && f) ->std::future { 172 | auto pck = std::make_shared>(std::forward(f)); 173 | 174 | auto _f = new std::function([pck](int id) { 175 | (*pck)(id); 176 | }); 177 | this->q.push(_f); 178 | 179 | std::unique_lock lock(this->mutex); 180 | this->cv.notify_one(); 181 | 182 | return pck->get_future(); 183 | } 184 | 185 | 186 | private: 187 | 188 | // deleted 189 | thread_pool(const thread_pool &);// = delete; 190 | thread_pool(thread_pool &&);// = delete; 191 | thread_pool & operator=(const thread_pool &);// = delete; 192 | thread_pool & operator=(thread_pool &&);// = delete; 193 | 194 | void set_thread(int i) { 195 | std::shared_ptr> flag(this->flags[i]); // a copy of the shared ptr to the flag 196 | auto f = [this, i, flag/* a copy of the shared ptr to the flag */]() { 197 | std::atomic & _flag = *flag; 198 | std::function * _f; 199 | bool isPop = this->q.pop(_f); 200 | while (true) { 201 | while (isPop) { // if there is anything in the queue 202 | std::unique_ptr> func(_f); // at return, delete the function even if an exception occurred 203 | (*_f)(i); 204 | 205 | if (_flag) 206 | return; // the thread is wanted to stop, return even if the queue is not empty yet 207 | else 208 | isPop = this->q.pop(_f); 209 | } 210 | 211 | // the queue is empty here, wait for the next command 212 | std::unique_lock lock(this->mutex); 213 | ++this->nWaiting; 214 | this->cv.wait(lock, [this, &_f, &isPop, &_flag](){ isPop = this->q.pop(_f); return isPop || this->isDone || _flag; }); 215 | --this->nWaiting; 216 | 217 | if (!isPop) 218 | return; // if the queue is empty and this->isDone == true or *flag then return 219 | } 220 | }; 221 | this->threads[i].reset(new std::thread(f)); // compiler may not support std::make_unique() 222 | CPU_ZERO(&cpuset); 223 | CPU_SET(i+core_start_,&cpuset); 224 | pthread_setaffinity_np(this->threads[i]->native_handle(),sizeof(cpu_set_t), &cpuset); 225 | } 226 | 227 | void init() { this->nWaiting = 0; this->isStop = false; this->isDone = false; } 228 | 229 | std::vector> threads; 230 | std::vector>> flags; 231 | mutable boost::lockfree::queue *> q; 232 | std::atomic isDone; 233 | std::atomic isStop; 234 | std::atomic nWaiting; // how many threads are waiting 235 | 236 | std::mutex mutex; 237 | std::condition_variable cv; 238 | cpu_set_t cpuset; 239 | int core_start_; 240 | }; 241 | 242 | } 243 | 244 | #endif // __ctpl_thread_pool_H__ 245 | 246 | 247 | -------------------------------------------------------------------------------- /grad.cc: -------------------------------------------------------------------------------- 1 | #include "grad.h" 2 | #include 3 | #include 4 | 5 | Grad::Grad(size_t size) : grads(size) { 6 | } 7 | 8 | std::vector& Grad::get_value() { 9 | return grads; 10 | } 11 | 12 | 13 | uint64_t Grad::max_val() { 14 | uint64_t ret = 0; 15 | for (auto gradu : grads) { 16 | //std::cout << "graud value is " << gradu.val < ret) 18 | ret = gradu.val; 19 | } 20 | return ret; 21 | } 22 | 23 | void Grad::normalize() { 24 | double max_grad = (double)max_val(); 25 | if (max_grad > 0.0) { 26 | for(auto &grad : grads) { 27 | grad.pct = 1.0 * ((double)grad.val / max_grad); 28 | } 29 | } 30 | } 31 | 32 | void Grad::clear() { 33 | for (auto gradu : grads) { 34 | gradu.val = 0; 35 | gradu.pct = 0.0; 36 | } 37 | } 38 | 39 | size_t Grad::len() { 40 | return grads.size(); 41 | } 42 | 43 | 44 | uint64_t Grad::val_sum() { 45 | uint64_t ret = 0; 46 | for (auto gradu : grads) { 47 | //FIXME: saturating_add 48 | ret += gradu.val; 49 | } 50 | return ret; 51 | } 52 | 53 | -------------------------------------------------------------------------------- /grad.h: -------------------------------------------------------------------------------- 1 | #ifndef GRAD_H 2 | #define GRAD_H 3 | #include 4 | #include 5 | #include 6 | class GradUnit { 7 | public: 8 | bool sign; 9 | uint64_t val; 10 | double pct; 11 | }; 12 | 13 | 14 | class Grad { 15 | private: 16 | std::vector grads; 17 | public: 18 | Grad(size_t size); 19 | std::vector& get_value(); 20 | uint64_t max_val(); 21 | void clear(); 22 | size_t len(); 23 | uint64_t val_sum(); 24 | void normalize(); 25 | }; 26 | #endif 27 | -------------------------------------------------------------------------------- /input.cc: -------------------------------------------------------------------------------- 1 | #include "input.h" 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | void MutInput::update(size_t index, bool direction, uint64_t delta) 8 | { 9 | if (direction) 10 | value[index] += delta; 11 | else 12 | value[index] -= delta; 13 | } 14 | 15 | uint8_t MutInput::get_rand() 16 | { 17 | uint8_t r = (uint8_t)r_val; 18 | r_val >>= 8; 19 | r_idx++; 20 | if (r_idx == 4) { 21 | random_r(&r_d, &r_val); 22 | r_idx = 0; 23 | } 24 | return r; 25 | } 26 | 27 | void MutInput::assign(std::vector> &input) { 28 | for (int i = 0; i < size_; i++) { 29 | value[i] = input[i].second; 30 | //std::cout << "randomize " << i << " and assign value " << (int)value[i] << std::endl; 31 | } 32 | } 33 | 34 | void MutInput::flip(size_t index, size_t bit_index) { 35 | uint8_t val = value[index]; 36 | uint8_t mask = 1; 37 | mask = mask << bit_index; 38 | value[index] = val^mask; 39 | } 40 | 41 | void MutInput::set(const size_t index, uint8_t val) 42 | { 43 | value[index] = val; 44 | } 45 | 46 | uint64_t MutInput::len() { 47 | return size_; 48 | } 49 | 50 | uint64_t MutInput::val_len() { 51 | return size_; 52 | } 53 | 54 | MutInput& MutInput::operator=(const MutInput &other) 55 | { 56 | MutInput::copy(this, &other); 57 | return *this; 58 | } 59 | 60 | void MutInput::dump() { 61 | // printf("dumping input and value size is %lu\n",value.size()); 62 | // for(auto i : value) 63 | // printf("%d, ",i); 64 | // printf("\n"); 65 | } 66 | 67 | void MutInput::randomize() { 68 | for(int i=0;i 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | class InputMeta { 11 | public: 12 | bool sign; 13 | size_t offset; 14 | size_t size; 15 | }; 16 | 17 | 18 | class MutInput { 19 | public: 20 | // std::vector value; 21 | uint8_t* value; 22 | // std::vector meta; 23 | size_t size_; 24 | size_t get_size(); 25 | MutInput(size_t size); 26 | ~MutInput(); 27 | void dump(); 28 | uint64_t len(); 29 | uint64_t val_len(); 30 | void randomize(); 31 | //random 32 | char r_s[256]; 33 | struct random_data r_d; 34 | int32_t r_val; 35 | int32_t r_idx; 36 | uint8_t get_rand(); 37 | 38 | uint8_t get(const size_t i); 39 | void update(size_t index, bool direction, uint64_t delta); 40 | void flip(size_t index, size_t bit_index); 41 | void set(const size_t index, uint8_t value); 42 | void assign(std::vector> &input); 43 | MutInput& operator=(const MutInput &other); 44 | 45 | static void copy(MutInput *dst, const MutInput *src) 46 | { 47 | uint8_t *dst_value = dst->value; 48 | memcpy(dst, src, sizeof(MutInput)); 49 | if (!dst_value) 50 | dst->value = (uint8_t*)malloc(src->size_); 51 | else 52 | dst->value = dst_value; 53 | memcpy(dst->value, src->value, src->size_); 54 | } 55 | }; 56 | #endif 57 | -------------------------------------------------------------------------------- /jit.h: -------------------------------------------------------------------------------- 1 | #ifndef JIT_H_ 2 | #define JIT_H_ 3 | 4 | #include "rgd.pb.h" 5 | #include "test.h" 6 | 7 | int addFunction(const rgd::AstNode* node, 8 | std::map &local_map, 9 | uint64_t id, 10 | std::unordered_map &expr_cache); 11 | 12 | test_fn_type performJit(uint64_t id); 13 | 14 | #endif -------------------------------------------------------------------------------- /pro.log: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/R-Fuzz/jigsaw/5459da15d2e4a581a7e6e6f817c6f1b78d01527d/pro.log -------------------------------------------------------------------------------- /rgd.proto: -------------------------------------------------------------------------------- 1 | syntax = "proto3"; 2 | 3 | package rgd; 4 | 5 | service RGD { 6 | rpc sendExpression (AstNode) returns (JitReply) {} 7 | rpc sendCmd (JitCmd) returns (JitReply) {} 8 | rpc sendCmdv2 (JitCmdv2) returns (JitReply) {} 9 | rpc startNewSession (ResetCmd) returns (JitReply) {} 10 | } 11 | 12 | message JitEmpty {} 13 | 14 | message JitCmdv2 { 15 | uint32 cmd = 1; //reset;solve;expression 16 | int32 test_value = 2; //this the value output by Z3 17 | repeated bytes expr_string = 3; // 18 | // repeated AstNode expr = 3; // 19 | string file_name = 4; 20 | uint64 bhash = 5; 21 | uint64 shash = 6; 22 | bool direction = 7; 23 | repeated AstNode expr = 8; 24 | } 25 | 26 | message ResetCmd { 27 | string file_name = 1; 28 | } 29 | 30 | message JitBranchContext { 31 | uint64 bhash = 1; 32 | uint64 shash = 2; 33 | uint32 order = 3; 34 | bool direction = 4; 35 | } 36 | 37 | message JitCmd { 38 | uint32 cmd = 1; //reset;solve;expression 39 | int32 test_value = 2; //this the value output by Z3 40 | } 41 | 42 | // AST node for symbolic expressions 43 | message AstNode { 44 | uint32 kind = 1; 45 | uint32 boolvalue = 2; //used by bool expr 46 | uint32 bits = 3; 47 | string value = 4; //used by constant expr 48 | repeated AstNode children = 5; 49 | string name = 6; //used for debugging 50 | uint32 index = 7; //used by read expr for index and extract expr 51 | uint32 label = 8; //for expression dedup 52 | uint32 hash = 9; //for node dedup 53 | uint32 direction = 10; 54 | uint32 sessionid = 11; 55 | uint32 full = 12; 56 | } 57 | 58 | // The response message containing the greetings 59 | message JitReply { 60 | string message = 1; 61 | } 62 | 63 | message DupReply { 64 | bool dup = 1; 65 | } 66 | 67 | message BmReply { 68 | bytes bm = 1; 69 | } 70 | -------------------------------------------------------------------------------- /rgdJit.h: -------------------------------------------------------------------------------- 1 | #ifndef GRAD_JIT_H 2 | #define GRAD_JIT_H 3 | 4 | #include "llvm/ADT/StringRef.h" 5 | #include "llvm/ExecutionEngine/JITSymbol.h" 6 | #include "llvm/ExecutionEngine/Orc/CompileUtils.h" 7 | #include "llvm/ExecutionEngine/Orc/Core.h" 8 | #include "llvm/ExecutionEngine/Orc/ExecutionUtils.h" 9 | #include "llvm/ExecutionEngine/Orc/IRCompileLayer.h" 10 | #include "llvm/ExecutionEngine/Orc/IRTransformLayer.h" 11 | #include "llvm/ExecutionEngine/Orc/JITTargetMachineBuilder.h" 12 | #include "llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h" 13 | #include "llvm/ExecutionEngine/Orc/TargetProcessControl.h" 14 | #include "llvm/ExecutionEngine/SectionMemoryManager.h" 15 | #include "llvm/IR/DataLayout.h" 16 | #include "llvm/IR/LLVMContext.h" 17 | #include "llvm/IR/LegacyPassManager.h" 18 | #include "llvm/Transforms/InstCombine/InstCombine.h" 19 | #include "llvm/Transforms/Scalar.h" 20 | #include "llvm/Transforms/Scalar/GVN.h" 21 | 22 | #include 23 | #include 24 | #include 25 | #include 26 | #include 27 | #include 28 | 29 | namespace rgd { 30 | 31 | class GradJit { 32 | private: 33 | llvm::orc::ExecutionSession ES; 34 | llvm::orc::RTDyldObjectLinkingLayer ObjectLayer; 35 | llvm::orc::IRCompileLayer CompileLayer; 36 | llvm::orc::IRTransformLayer OptimizeLayer; 37 | //std::unique_ptr TM; 38 | 39 | llvm::DataLayout DL; 40 | llvm::orc::MangleAndInterner Mangle; 41 | llvm::orc::JITDylib *MainJD; 42 | // llvm::orc::ThreadSafeContext Ctx; 43 | // std::unique_ptr CompileCallbackManager; 44 | // llvm::orc::CompileOnDemandLayer CODLayer; 45 | 46 | public: 47 | GradJit(llvm::orc::JITTargetMachineBuilder JTMB, llvm::DataLayout DL) 48 | : ObjectLayer(ES, 49 | []() { return std::make_unique(); }), 50 | // TM(llvm::EngineBuilder().selectTarget()), 51 | CompileLayer(ES, ObjectLayer, std::make_unique(std::move(JTMB))), 52 | OptimizeLayer(ES, CompileLayer, optimizeModule), 53 | DL(std::move(DL)), Mangle(ES, this->DL) 54 | // CompileCallbackManager( 55 | // llvm::orc::createLocalCompileCallbackManager(TM->getTargetTriple(), ES, 0)), 56 | // CODLayer(ES, OptimizeLayer, 57 | // [this](llvm::Function &F) { return std::set({&F}); }, 58 | // *CompileCallbackManager, 59 | // llvm::orc::createLocalIndirectStubsManagerBuilder( 60 | // TM->getTargetTriple())) 61 | // Ctx(std::make_unique()) 62 | { 63 | MainJD = &cantFail(ES.createJITDylib("main")); 64 | 65 | MainJD->addGenerator( 66 | cantFail(llvm::orc::DynamicLibrarySearchGenerator::GetForCurrentProcess( 67 | DL.getGlobalPrefix()))); 68 | } 69 | 70 | ~GradJit() { 71 | if (auto Err = ES.endSession()) 72 | ES.reportError(std::move(Err)); 73 | } 74 | 75 | const llvm::DataLayout &getDataLayout() const { return DL; } 76 | // llvm::LLVMContext &getContext() { return *Ctx.getContext(); } 77 | // llvm::orc::ThreadSafeContext &getTSC() {return Ctx;} 78 | 79 | static llvm::Expected> Create() { 80 | auto JTMB = llvm::orc::JITTargetMachineBuilder::detectHost(); 81 | 82 | if (!JTMB) 83 | return JTMB.takeError(); 84 | llvm::outs() << "Host triples: " << JTMB->getTargetTriple().str() << "\n"; 85 | 86 | auto DL = JTMB->getDefaultDataLayoutForTarget(); 87 | if (!DL) 88 | return DL.takeError(); 89 | 90 | return std::make_unique(std::move(*JTMB), std::move(*DL)); 91 | } 92 | 93 | llvm::Error addModule(std::unique_ptr M, 94 | std::unique_ptr ctx) { 95 | return OptimizeLayer.add(*MainJD, 96 | llvm::orc::ThreadSafeModule(std::move(M), std::move(ctx))); 97 | } 98 | 99 | llvm::Expected lookup(llvm::StringRef Name) { 100 | return ES.lookup({MainJD}, Mangle(Name.str())); 101 | } 102 | 103 | private: 104 | static llvm::orc::ThreadSafeModule 105 | optimizeModule(llvm::orc::ThreadSafeModule TSM, const llvm::orc::MaterializationResponsibility &R) { 106 | // Create a function pass manager. 107 | auto FPM = std::make_unique(TSM.getModuleUnlocked()); 108 | 109 | // Add some optimizations. 110 | FPM->add(llvm::createInstructionCombiningPass()); 111 | FPM->add(llvm::createReassociatePass()); 112 | FPM->add(llvm::createGVNPass()); 113 | FPM->add(llvm::createInstSimplifyLegacyPass()); 114 | // FPM->add(llvm::createCFGSimplificationPass()); 115 | FPM->doInitialization(); 116 | 117 | // Run the optimizations over all functions in the module being added to 118 | // the JIT. 119 | for (auto &F : *TSM.getModuleUnlocked()) 120 | FPM->run(F); 121 | 122 | return TSM; 123 | } 124 | }; 125 | } 126 | 127 | #endif // LLVM_EXECUTIONENGINE_ORC_KALEIDOSCOPEJIT_H 128 | 129 | -------------------------------------------------------------------------------- /rgd_op.h: -------------------------------------------------------------------------------- 1 | #ifndef __IR__H_ 2 | #define __IR__H_ 3 | 4 | #include 5 | 6 | namespace rgd { 7 | enum Kind { 8 | Bool, // 0 9 | Constant, // 1 10 | Read, // 2 11 | Concat, // 3 12 | Extract, // 4 13 | 14 | ZExt, // 5 15 | SExt, // 6 16 | 17 | // Arithmetic 18 | Add, // 7 19 | Sub, // 8 20 | Mul, // 9 21 | UDiv, // 10 22 | SDiv, // 11 23 | URem, // 12 24 | SRem, // 13 25 | Neg, // 14 26 | 27 | // Bit 28 | Not, // 15 29 | And, // 16 30 | Or, // 17 31 | Xor, // 18 32 | Shl, // 19 33 | LShr, // 20 34 | AShr, // 21 35 | 36 | // Compare 37 | Equal, // 22 38 | Distinct, // 23 39 | Ult, // 24 40 | Ule, // 25 41 | Ugt, // 26 42 | Uge, // 27 43 | Slt, // 28 44 | Sle, // 29 45 | Sgt, // 30 46 | Sge, // 31 47 | 48 | // Logical 49 | LOr, // 32 50 | LAnd, // 33 51 | LNot, // 34 52 | 53 | // Special 54 | Ite, // 35 55 | Load, // 36 to be worked with TT-Fuzzer 56 | Memcmp, //37 57 | }; 58 | } 59 | 60 | static inline bool isRelational(uint32_t kind) { 61 | if (kind == rgd::Equal || kind == rgd::Distinct || kind == rgd::Ugt || kind == rgd::Ult 62 | || kind == rgd::Uge || kind == rgd::Ule || kind == rgd::Sgt || kind == rgd::Slt 63 | || kind == rgd::Sge || kind == rgd::Sle) 64 | return true; 65 | else 66 | return false; 67 | } 68 | 69 | #endif 70 | -------------------------------------------------------------------------------- /test.h: -------------------------------------------------------------------------------- 1 | #ifndef TEST_H_ 2 | #define TEST_H_ 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | #include "grad.h" 11 | #include "input.h" 12 | 13 | // function under test 14 | // constraint: 0 = equal, 1 = distinct, 2 = lt, 3 = le, 4 = gt, 5 = ge 15 | typedef void(*test_fn_type)(uint64_t*); 16 | 17 | //the first two slots of the arguments for reseved for the left and right operands 18 | static const int RET_OFFSET = 2; 19 | 20 | struct Constraint { 21 | // JIT'ed function for a comparison expression 22 | test_fn_type fn; 23 | // the relational operator 24 | uint32_t comparison; 25 | 26 | // During constraint collection, (symbolic) input bytes are recorded 27 | // as offsets from the beginning of the input. However, the JIT'ed 28 | // function consumes inputs as an input array. So, when building the 29 | // function, we need to map the offset to the idx in input array, 30 | // which is stored in local_map. 31 | std::map local_map; 32 | // if const {false, const value}, if symbolic {true, index in the inputs} 33 | // during local search, we use a single global array (to avoid memory 34 | // allocation and free) to prepare the inputs, so we need to know where 35 | // to load the input values into the input array. 36 | std::vector> input_args; 37 | // map the offset to iv (initial value) 38 | std::unordered_map inputs; 39 | // shape information about the input (e.g., 1, 2, 4, 8 bytes) 40 | std::unordered_map shapes; 41 | // number of constant in the input array 42 | uint32_t const_num; 43 | }; 44 | 45 | 46 | struct ConsMeta { 47 | // per-FUT arg mapping, so we can share the constraints 48 | std::vector> input_args; 49 | // input2state inference related 50 | bool i2s_feasible; 51 | uint64_t op1, op2; 52 | }; 53 | 54 | struct FUT { 55 | FUT(): scratch_args(nullptr), max_const_num(0) {} 56 | ~FUT() { if (scratch_args) free(scratch_args); } 57 | uint32_t num_exprs; 58 | // constraints, could be shared, strictly read-only 59 | std::vector> constraints; 60 | // per-FUT mutable metadata of constraints 61 | std::vector> consmeta; 62 | 63 | // inputs as pairs of 64 | std::vector> inputs; 65 | // shape information at each offset 66 | std::unordered_map shapes; 67 | // max number of constants in the input array 68 | uint32_t max_const_num; 69 | // record constraints that use a certain input byte 70 | std::unordered_map> cmap; 71 | // the input array used for all JIT'ed functions 72 | // all input bytes are extended to 64 bits 73 | uint64_t* scratch_args; 74 | 75 | // intermediate states for the search 76 | std::vector orig_distances; 77 | std::vector distances; 78 | 79 | // statistics 80 | uint64_t start; //start time 81 | bool stopped = false; 82 | int attempts = 0; 83 | int num_minimal_optima = 0; 84 | bool gsol = false; 85 | bool opti_hit = false; 86 | 87 | // solutions 88 | std::unordered_map *rgd_solution; 89 | std::unordered_map *opti_solution; 90 | std::unordered_map *hint_solution; 91 | 92 | void finalize() { 93 | // aggregate the contraints, map each input byte to a constraint to 94 | // an index in the "global" input array (i.e., the scratch_args) 95 | std::unordered_map sym_map; 96 | uint32_t gidx = 0; 97 | for (size_t i = 0; i < constraints.size(); i++) { 98 | std::unique_ptr cm = std::make_unique(); 99 | cm->input_args = constraints[i]->input_args; 100 | uint32_t last_offset = -1; 101 | cm->i2s_feasible = true; 102 | for (const auto& [offset, lidx] : constraints[i]->local_map) { 103 | auto gitr = sym_map.find(offset); 104 | if (gitr == sym_map.end()) { 105 | gidx = inputs.size(); 106 | sym_map[offset] = gidx; 107 | inputs.push_back(std::make_pair(offset, constraints[i]->inputs.at(offset))); 108 | shapes[offset] = constraints[i]->shapes.at(offset); 109 | } else { 110 | gidx = gitr->second; 111 | } 112 | auto slot = cmap.find(gidx); 113 | if (slot != cmap.end()) { 114 | slot->second.push_back(i); 115 | } else { 116 | cmap.emplace(std::make_pair(gidx, std::vector{i})); 117 | } 118 | // save the mapping between the local index (i.e., where the JIT'ed 119 | // function is going to read the input from) and the global index 120 | // (i.e., where the current value corresponding to the input byte 121 | // is stored in MutInput) 122 | cm->input_args[lidx].second = gidx; 123 | 124 | // check if the input bytes are consecutive 125 | // using std::map ensures that the offsets (keys) are sorted 126 | if (last_offset != -1 && last_offset + 1 != offset) { 127 | cm->i2s_feasible = false; 128 | } 129 | last_offset = offset; 130 | } 131 | // FIXME: only support up to 64-bit for now 132 | if (constraints[i]->local_map.size() > 8) { 133 | cm->i2s_feasible = false; 134 | } 135 | 136 | // update the number of required constants in the input array 137 | if (max_const_num < constraints[i]->const_num) 138 | max_const_num = constraints[i]->const_num; 139 | 140 | // insert the constraint metadata 141 | consmeta.push_back(std::move(cm)); 142 | } 143 | 144 | // allocate the input array, reserver 2 for comparison operands a,b 145 | scratch_args = (uint64_t*)aligned_alloc(sizeof(*scratch_args), 146 | (2 + inputs.size() + max_const_num + 1) * sizeof(*scratch_args)); 147 | orig_distances.resize(constraints.size(), 0); 148 | distances.resize(constraints.size(), 0); 149 | } 150 | 151 | void load_hint() { // load hint 152 | for (auto itr = inputs.begin(); itr != inputs.end(); itr++) { 153 | auto got = hint_solution->find(itr->first); 154 | if (got != hint_solution->end()) 155 | itr->second = got->second; 156 | } 157 | } 158 | 159 | }; 160 | 161 | #endif // TEST_H_ -------------------------------------------------------------------------------- /util.cc: -------------------------------------------------------------------------------- 1 | #include "rgd_op.h" 2 | #include 3 | #include 4 | #include 5 | #include "rgd.pb.h" 6 | #include 7 | #include 8 | #include 9 | 10 | using namespace google::protobuf::io; 11 | using namespace rgd; 12 | 13 | const uint64_t kUsToS = 1000000; 14 | 15 | uint64_t getTimeStamp() { 16 | struct timeval tv; 17 | gettimeofday(&tv, NULL); 18 | return tv.tv_sec * kUsToS + tv.tv_usec; 19 | } 20 | 21 | static std::string get_name(uint32_t kind) { 22 | switch (kind) { 23 | case rgd::Bool: return "bool"; 24 | case rgd::Constant: return "constant"; 25 | case rgd::Read: return "read"; 26 | case rgd::Concat: return "concat"; 27 | case rgd::Extract: return "extract"; 28 | 29 | case rgd::ZExt: return "zext"; 30 | case rgd::SExt: return "sext"; 31 | 32 | // Arithmetic 33 | case rgd::Add: return "add"; 34 | case rgd::Sub: return "sub"; 35 | case rgd::Mul: return "mul"; 36 | case rgd::UDiv: return "udiv"; 37 | case rgd::SDiv: return "sdiv"; 38 | case rgd::URem: return "urem"; 39 | case rgd::SRem: return "srem"; 40 | case rgd::Neg: return "neg"; 41 | 42 | // Bit 43 | case rgd::Not: return "not"; 44 | case rgd::And: return "and"; 45 | case rgd::Or: return "or"; 46 | case rgd::Xor: return "xor"; 47 | case rgd::Shl: return "shl"; 48 | case rgd::LShr: return "lshr"; 49 | case rgd::AShr: return "ashr"; 50 | 51 | // Compare 52 | case rgd::Equal: return "equal"; 53 | case rgd::Distinct: return "distinct"; 54 | case rgd::Ult: return "ult"; 55 | case rgd::Ule: return "ule"; 56 | case rgd::Ugt: return "ugt"; 57 | case rgd::Uge: return "uge"; 58 | case rgd::Slt: return "slt"; 59 | case rgd::Sle: return "sle"; 60 | case rgd::Sgt: return "sgt"; 61 | case rgd::Sge: return "sge"; 62 | 63 | // Logical 64 | case rgd::LOr: return "lor"; 65 | case rgd::LAnd: return "land"; 66 | case rgd::LNot: return "lnot"; 67 | 68 | // Special 69 | case rgd::Ite: return "ite"; 70 | case rgd::Memcmp: return "memcmp"; 71 | 72 | default: return "unknown"; 73 | } 74 | } 75 | 76 | static void do_print(const AstNode* req) { 77 | std::cerr << get_name(req->kind()) << "("; 78 | //std::cerr << req->name() << "("; 79 | std::cerr << "width=" << req->bits() << ","; 80 | //std::cerr << " hash=" << req->hash() << ","; 81 | std::cerr << " label=" << req->label() << ","; 82 | //std::cerr << " hash=" << req->hash() << ","; 83 | if (req->kind() == rgd::Bool) { 84 | std::cerr << req->value(); 85 | } 86 | if (req->kind() == rgd::Constant) { 87 | std::cerr << req->value() << ", "; 88 | // std::cerr << req->index(); 89 | } 90 | if (req->kind() == rgd::Memcmp) { 91 | std::cerr << req->value() << ", "; 92 | // std::cerr << req->index(); 93 | } 94 | if (req->kind() == rgd::Read || req->kind() == rgd::Extract) { 95 | std::cerr << req->index() << ", "; 96 | } 97 | for(int i = 0; i < req->children_size(); i++) { 98 | do_print(&req->children(i)); 99 | if (i != req->children_size() - 1) 100 | std::cerr << ", "; 101 | } 102 | std::cerr << ")"; 103 | } 104 | 105 | static void verbose_do_print(int depth,const AstNode* req) { 106 | for (int i = 0; i< depth;i++) 107 | std::cerr << "\t"; 108 | std::cerr << req->name() << "("; 109 | // std::cerr << "width=" << req->bits() << ","; 110 | // std::cerr << " hash=" << req->hash() << ","; 111 | std::cerr << "label=" << req->label() << ","; 112 | if (req->kind() == rgd::Bool) { 113 | std::cerr << req->value(); 114 | } 115 | if (req->kind() == rgd::Constant) { 116 | std::cerr << req->value(); 117 | } 118 | if (req->kind() == rgd::Read) { 119 | std::cerr << req->index(); 120 | } 121 | std::cerr << std::endl; 122 | for(int i = 0; i < req->children_size(); i++) { 123 | verbose_do_print(depth+1,&req->children(i)); 124 | if (i != req->children_size() - 1) 125 | std::cerr << ", "; 126 | } 127 | for(int i = 0; i< depth;i++) 128 | std::cerr << "\t"; 129 | std::cerr << std::endl; 130 | std::cerr << ")"; 131 | } 132 | 133 | void printExpression(const AstNode* req) { 134 | do_print(req); 135 | std::cerr << std::endl; 136 | } 137 | 138 | 139 | static bool writeDelimitedTo( 140 | const google::protobuf::MessageLite& message, 141 | google::protobuf::io::ZeroCopyOutputStream* rawOutput) { 142 | // We create a new coded stream for each message. Don't worry, this is fast. 143 | google::protobuf::io::CodedOutputStream output(rawOutput); 144 | 145 | // Write the size. 146 | const int size = message.ByteSizeLong(); 147 | output.WriteVarint32(size); 148 | 149 | uint8_t* buffer = output.GetDirectBufferForNBytesAndAdvance(size); 150 | if (buffer != NULL) { 151 | // Optimization: The message fits in one buffer, so use the faster 152 | // direct-to-array serialization path. 153 | message.SerializeWithCachedSizesToArray(buffer); 154 | } else { 155 | // Slightly-slower path when the message is multiple buffers. 156 | message.SerializeWithCachedSizes(&output); 157 | if (output.HadError()) return false; 158 | } 159 | 160 | return true; 161 | } 162 | 163 | bool saveRequest( 164 | const google::protobuf::MessageLite& message, 165 | const char* path) { 166 | mode_t mode = S_IRUSR | S_IWUSR; 167 | int fd = open(path, O_CREAT | O_WRONLY | O_APPEND, mode); 168 | ZeroCopyOutputStream* rawOutput = new google::protobuf::io::FileOutputStream(fd); 169 | bool suc = writeDelimitedTo(message,rawOutput); 170 | delete rawOutput; 171 | sync(); 172 | close(fd); 173 | return suc; 174 | } 175 | 176 | -------------------------------------------------------------------------------- /util.h: -------------------------------------------------------------------------------- 1 | #ifndef UTIL_H_ 2 | #define UTIL_H_ 3 | #include "rgd.pb.h" 4 | using namespace rgd; 5 | bool saveRequest(const google::protobuf::MessageLite& message, 6 | const char* path); 7 | void printExpression(const AstNode* req); 8 | #endif 9 | -------------------------------------------------------------------------------- /wheels/lockfreehash/cuckoo/Makefile: -------------------------------------------------------------------------------- 1 | all: 2 | g++ main.cpp lockfree_hash_table.cpp -pthread -std=c++11 3 | clean: 4 | rm -rf a.out 5 | -------------------------------------------------------------------------------- /wheels/lockfreehash/cuckoo/benchmark_lockfree_ht.h: -------------------------------------------------------------------------------- 1 | #ifndef BENCHMARK_LOCKFREE_HT 2 | #define BENCHMARK_LOCKFREE_HT 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | #include "cycle_timer.h" 13 | #include "lockfree_hash_table.h" 14 | #include "thread_service.h" 15 | 16 | #define NUM_ITERS 3 17 | #define MAX_THREADS 24 18 | 19 | #define C_NUM_ELEMS 500 20 | 21 | class BenchmarkLockFreeHT 22 | { 23 | public: 24 | BenchmarkLockFreeHT(int op_count, int capacity, 25 | int rweight, int idweight, 26 | int thread_count, 27 | double load_factor); 28 | 29 | void benchmark_correctness(); 30 | void benchmark_hp(); 31 | void benchmark_all(); 32 | void run(); 33 | 34 | private: 35 | int m_rweight; 36 | int m_idweight; 37 | 38 | int m_thread_count; 39 | int m_op_count; 40 | int m_capacity; 41 | double m_load_factor; 42 | }; 43 | 44 | BenchmarkLockFreeHT::BenchmarkLockFreeHT(int op_count, int capacity, 45 | int rweight, int idweight, 46 | int thread_count, double load_factor) 47 | { 48 | std::cout << "*** BENCHMARKING LockFreeHT ***" << std::endl; 49 | m_op_count = op_count; 50 | m_load_factor = load_factor; 51 | m_capacity = capacity; 52 | m_thread_count = thread_count; 53 | 54 | m_rweight = rweight; 55 | m_idweight = idweight; 56 | } 57 | 58 | void BenchmarkLockFreeHT::benchmark_correctness() 59 | { 60 | bool correct = true; 61 | 62 | Lockfree_hash_table ht(2 * C_NUM_ELEMS, m_thread_count); 63 | std::unordered_map map; 64 | map.reserve(2 * C_NUM_ELEMS); 65 | 66 | std::random_device rd; 67 | std::mt19937 mt(rd()); 68 | std::uniform_int_distribution rng; 69 | 70 | int elems[C_NUM_ELEMS]; 71 | for (int i = 0; i < C_NUM_ELEMS; i++) 72 | { 73 | int k = rng(mt); 74 | elems[i] = k; 75 | map[k] = k; 76 | } 77 | 78 | pthread_t workers[MAX_THREADS]; 79 | WorkerArgs args[MAX_THREADS]; 80 | 81 | for (int i = 0; i < 2; i++) 82 | { 83 | args[i].num_elems = C_NUM_ELEMS / 2; 84 | args[i].ht_p = (void*)&ht; 85 | args[i].elems = elems; 86 | args[i].start = i * (C_NUM_ELEMS / 2); 87 | args[i].tid = i; 88 | 89 | pthread_create(&workers[i], NULL, thread_insert, (void*)&args[i]); 90 | } 91 | 92 | for (int i = 0; i < 2; i++) 93 | { 94 | pthread_join(workers[i], NULL); 95 | } 96 | 97 | int count = 0; 98 | for (std::pair e : map) 99 | { 100 | std::pair r = ht.search(e.first, 0); 101 | if (!r.second || e.second != r.first) 102 | { 103 | 104 | std::cout << "\t" << "Expected value, Received value, Received result = " << e.second << " " << r.second << " "<< r.first << std::endl; 105 | correct = false; 106 | count++; 107 | } 108 | } 109 | 110 | std::cout << "\t" << count << "/" << C_NUM_ELEMS << " errors" << std::endl; 111 | 112 | if (correct) 113 | std::cout << "\t" << "Correctness test passed" << std::endl; 114 | else 115 | std::cout << "\t" << "Correctness test failed" << std::endl; 116 | 117 | } 118 | 119 | void BenchmarkLockFreeHT::benchmark_hp() 120 | { 121 | Lockfree_hash_table ht(400000, m_thread_count); 122 | 123 | std::random_device rd; 124 | std::mt19937 mt(rd()); 125 | std::uniform_int_distribution rng; 126 | 127 | std::array weights; 128 | weights[0] = m_rweight; 129 | weights[1] = m_idweight; 130 | weights[2] = m_idweight; 131 | 132 | std::default_random_engine g; 133 | std::discrete_distribution drng(weights.begin(), weights.end()); 134 | 135 | int insert[200000]; 136 | for (int i = 0; i < 200000; i++) 137 | { 138 | int k = rng(mt); 139 | int v = rng(mt); 140 | insert[i] = k; 141 | ht.insert(k, v, 0); 142 | } 143 | 144 | pthread_t workers[MAX_THREADS]; 145 | WorkerArgs args[MAX_THREADS]; 146 | 147 | int num_elems = 200000 / m_thread_count; 148 | for (int i = 0; i < m_thread_count; i++) 149 | { 150 | args[i].num_elems = num_elems; 151 | args[i].ht_p = (void*)&ht; 152 | args[i].elems = insert; 153 | args[i].start = i * num_elems; 154 | args[i].tid = i; 155 | args[i].remove = i < (m_thread_count / 4); 156 | 157 | pthread_create(&workers[i], NULL, thread_remove, (void*)&args[i]); 158 | } 159 | 160 | for (int i = 0; i < m_thread_count; i++) 161 | { 162 | pthread_join(workers[i], NULL); 163 | } 164 | 165 | std::cout << "\t" << "Hazard Pointer test passed" << std::endl; 166 | 167 | } 168 | 169 | void BenchmarkLockFreeHT::benchmark_all() 170 | { 171 | Lockfree_hash_table ht(m_capacity, m_thread_count); 172 | 173 | std::random_device rd; 174 | std::mt19937 mt(rd()); 175 | std::uniform_int_distribution rng; 176 | 177 | std::array weights; 178 | weights[0] = m_rweight; 179 | weights[1] = m_idweight; 180 | weights[2] = m_idweight; 181 | 182 | std::default_random_engine g; 183 | std::discrete_distribution drng(weights.begin(), weights.end()); 184 | 185 | // Warm-up table to load factor 186 | int num_warmup = static_cast(static_cast(m_capacity) * m_load_factor); 187 | for (int i = 0; i < num_warmup; i++) 188 | { 189 | int k = rng(mt); 190 | int v = rng(mt); 191 | 192 | ht.insert(k, v, 0); 193 | } 194 | 195 | // Run benchmark 196 | std::vector results; 197 | for (int iter = 0; iter < NUM_ITERS; iter++) 198 | { 199 | int num_elems = m_op_count / m_thread_count; 200 | pthread_t workers[MAX_THREADS]; 201 | WorkerArgs args[MAX_THREADS]; 202 | 203 | double start = CycleTimer::currentSeconds(); 204 | for (int i = 0; i < m_thread_count; i++) 205 | { 206 | args[i].num_elems = num_elems; 207 | args[i].rweight = m_rweight; 208 | args[i].iweight = m_idweight / 2; 209 | args[i].dweight = m_idweight / 2; 210 | args[i].ht_p = (void*)&ht; 211 | args[i].tid = i; 212 | pthread_create(&workers[i], NULL, thread_service, (void*)&args[i]); 213 | } 214 | 215 | for (int i = 0; i < m_thread_count; i++) 216 | { 217 | pthread_join(workers[i], NULL); 218 | } 219 | double time = CycleTimer::currentSeconds() - start; 220 | results.push_back(time); 221 | } 222 | 223 | // Publish Results 224 | double best_time = *std::min_element(results.begin(), results.end()); 225 | double avg_time = std::accumulate(results.begin(), results.end(), 0.0) / static_cast(results.size()); 226 | std::cout << "\t" << "Max Throughput: " << m_op_count / best_time / 1000.0 << " ops/ms" << std::endl; 227 | std::cout << "\t" << "Avg Throughput: " << m_op_count / avg_time / 1000.0 << " ops/ms" << std::endl; 228 | 229 | results.clear(); 230 | 231 | int* keys = new int[m_op_count]; 232 | 233 | for (int iter = 0; iter < NUM_ITERS; iter++) 234 | { 235 | int num_elems = m_op_count / m_thread_count; 236 | pthread_t workers[MAX_THREADS]; 237 | WorkerArgs args[MAX_THREADS]; 238 | 239 | double start = CycleTimer::currentSeconds(); 240 | for (int i = 0; i < m_thread_count; i++) 241 | { 242 | args[i].num_elems = num_elems; 243 | args[i].rweight = m_rweight; 244 | args[i].iweight = m_idweight / 2; 245 | args[i].dweight = m_idweight / 2; 246 | args[i].ht_p = (void*)&ht; 247 | args[i].tid = i; 248 | args[i].elems = keys; 249 | args[i].start = i * num_elems; 250 | pthread_create(&workers[i], NULL, thread_service_low_contention, (void*)&args[i]); 251 | } 252 | 253 | for (int i = 0; i < m_thread_count; i++) 254 | { 255 | pthread_join(workers[i], NULL); 256 | } 257 | double time = CycleTimer::currentSeconds() - start; 258 | results.push_back(time); 259 | } 260 | 261 | // Publish Results 262 | best_time = *std::min_element(results.begin(), results.end()); 263 | avg_time = std::accumulate(results.begin(), results.end(), 0.0) / static_cast(results.size()); 264 | std::cout << "\t" << "Max Throughput (Low): " << m_op_count / best_time / 1000.0 << " ops/ms" << std::endl; 265 | std::cout << "\t" << "Avg Throughput (Low): " << m_op_count / avg_time / 1000.0 << " ops/ms" << std::endl; 266 | 267 | results.clear(); 268 | 269 | for (int iter = 0; iter < NUM_ITERS; iter++) 270 | { 271 | int num_elems = m_op_count / m_thread_count; 272 | pthread_t workers[MAX_THREADS]; 273 | WorkerArgs args[MAX_THREADS]; 274 | 275 | double start = CycleTimer::currentSeconds(); 276 | for (int i = 0; i < m_thread_count; i++) 277 | { 278 | args[i].num_elems = num_elems; 279 | args[i].rweight = m_rweight; 280 | args[i].iweight = m_idweight / 2; 281 | args[i].dweight = m_idweight / 2; 282 | args[i].ht_p = (void*)&ht; 283 | args[i].tid = i; 284 | ht.insert(0, 0, 0); 285 | pthread_create(&workers[i], NULL, thread_service_high_contention, (void*)&args[i]); 286 | } 287 | 288 | for (int i = 0; i < m_thread_count; i++) 289 | { 290 | pthread_join(workers[i], NULL); 291 | } 292 | double time = CycleTimer::currentSeconds() - start; 293 | results.push_back(time); 294 | } 295 | 296 | // Publish Results 297 | best_time = *std::min_element(results.begin(), results.end()); 298 | avg_time = std::accumulate(results.begin(), results.end(), 0.0) / static_cast(results.size()); 299 | std::cout << "\t" << "Max Throughput (High): " << m_op_count / best_time / 1000.0 << " ops/ms" << std::endl; 300 | std::cout << "\t" << "Avg Throughput (High): " << m_op_count / avg_time / 1000.0 << " ops/ms" << std::endl; 301 | 302 | 303 | } 304 | 305 | void BenchmarkLockFreeHT::run() 306 | { 307 | benchmark_correctness(); 308 | benchmark_hp(); 309 | benchmark_all(); 310 | } 311 | 312 | #endif 313 | -------------------------------------------------------------------------------- /wheels/lockfreehash/cuckoo/benchmark_unordered_map.h: -------------------------------------------------------------------------------- 1 | #ifndef BENCHMARK_UNORDERED_MAP 2 | #define BENCHMARK_UNORDERED_MAP 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | #include "cycle_timer.h" 11 | 12 | #define NUM_ITERS 3 13 | 14 | class BenchmarkUnorderedMap 15 | { 16 | public: 17 | BenchmarkUnorderedMap(int op_count, int capacity, 18 | int rweight, int idweight, 19 | double load_factor); 20 | 21 | void benchmark_all(); 22 | void run(); 23 | private: 24 | int m_rweight; 25 | int m_idweight; 26 | 27 | int m_op_count; 28 | int m_capacity; 29 | double m_load_factor; 30 | }; 31 | 32 | BenchmarkUnorderedMap::BenchmarkUnorderedMap(int op_count, int capacity, 33 | int rweight, int idweight, 34 | double load_factor) 35 | { 36 | std::cout << "*** BENCHMARKING UnorderedMap ***" << std::endl; 37 | m_op_count = op_count; 38 | m_load_factor = load_factor; 39 | m_capacity = capacity; 40 | 41 | m_rweight = rweight; 42 | m_idweight = idweight; 43 | } 44 | 45 | void BenchmarkUnorderedMap::benchmark_all() 46 | { 47 | std::unordered_map map; 48 | map.reserve(m_capacity); 49 | 50 | std::random_device rd; 51 | std::mt19937 mt(rd()); 52 | std::uniform_int_distribution rng; 53 | 54 | std::array weights; 55 | weights[0] = m_rweight; 56 | weights[1] = m_idweight; 57 | weights[2] = m_idweight; 58 | 59 | std::default_random_engine g; 60 | std::discrete_distribution drng(weights.begin(), weights.end()); 61 | 62 | // Warm-up table to load factor 63 | int num_warmup = static_cast(static_cast(m_capacity) * m_load_factor); 64 | for (int i = 0; i < num_warmup; i++) 65 | { 66 | int k = rng(mt); 67 | int v = rng(mt); 68 | map[k] = v; 69 | } 70 | 71 | // Run benchmark (single-threaded) 72 | std::vector results; 73 | for (int iter = 0; iter < NUM_ITERS; iter++) 74 | { 75 | double start = CycleTimer::currentSeconds(); 76 | for (int i = 0; i < m_op_count; i++) 77 | { 78 | int k = rng(mt); 79 | int v = rng(mt); 80 | int a = drng(g); 81 | 82 | if (a == 0) 83 | map.find(k); 84 | else if (a == 1) 85 | map[k] = v; 86 | else 87 | map.erase(k); 88 | } 89 | double time = CycleTimer::currentSeconds() - start; 90 | results.push_back(time); 91 | } 92 | 93 | // Publish Results 94 | double best_time = *std::min_element(results.begin(), results.end()); 95 | double avg_time = std::accumulate(results.begin(), results.end(), 0.0) / static_cast(results.size()); 96 | std::cout << "\t" << "Max Throughput: " << static_cast(m_op_count) / best_time / 1000.0 << " ops/ms" << std::endl; 97 | std::cout << "\t" << "Avg Throughput: " << static_cast(m_op_count) / avg_time / 1000.0 << " ops/ms" << std::endl; 98 | 99 | results.clear(); 100 | int *keys = new int[m_op_count]; 101 | int s = 0; 102 | int e = 0; 103 | for (int iter = 0; iter < NUM_ITERS; iter++) 104 | { 105 | double start = CycleTimer::currentSeconds(); 106 | for (int i = 0; i < m_op_count; i++) 107 | { 108 | int k = rng(mt); 109 | int v = rng(mt); 110 | int a = drng(g); 111 | 112 | if (s == e || a == 1) { 113 | map[k] = v; 114 | keys[e++] = k; 115 | } else if (a == 0) { 116 | map.find(keys[k % (e - s) + s]); 117 | } else { 118 | map.erase(keys[s++]); 119 | } 120 | } 121 | double time = CycleTimer::currentSeconds() - start; 122 | results.push_back(time); 123 | } 124 | 125 | // Publish Results 126 | best_time = *std::min_element(results.begin(), results.end()); 127 | avg_time = std::accumulate(results.begin(), results.end(), 0.0) / static_cast(results.size()); 128 | std::cout << "\t" << "Max Throughput (Low): " << static_cast(m_op_count) / best_time / 1000.0 << " ops/ms" << std::endl; 129 | std::cout << "\t" << "Avg Throughput (Low): " << static_cast(m_op_count) / avg_time / 1000.0 << " ops/ms" << std::endl; 130 | 131 | results.clear(); 132 | for (int iter = 0; iter < NUM_ITERS; iter++) 133 | { 134 | double start = CycleTimer::currentSeconds(); 135 | map[0] = 0; 136 | for (int i = 0; i < m_op_count; i++) 137 | { 138 | int x = map[0]; 139 | } 140 | double time = CycleTimer::currentSeconds() - start; 141 | results.push_back(time); 142 | } 143 | 144 | // Publish Results 145 | best_time = *std::min_element(results.begin(), results.end()); 146 | avg_time = std::accumulate(results.begin(), results.end(), 0.0) / static_cast(results.size()); 147 | std::cout << "\t" << "Max Throughput (High): " << static_cast(m_op_count) / best_time / 1000.0 << " ops/ms" << std::endl; 148 | std::cout << "\t" << "Avg Throughput (High): " << static_cast(m_op_count) / avg_time / 1000.0 << " ops/ms" << std::endl; 149 | } 150 | 151 | void BenchmarkUnorderedMap::run() 152 | { 153 | benchmark_all(); 154 | } 155 | 156 | #endif 157 | -------------------------------------------------------------------------------- /wheels/lockfreehash/cuckoo/cycle_timer.h: -------------------------------------------------------------------------------- 1 | #ifndef _SYRAH_CYCLE_TIMER_H_ 2 | #define _SYRAH_CYCLE_TIMER_H_ 3 | 4 | #if defined(__APPLE__) 5 | #if defined(__x86_64__) 6 | #include 7 | #else 8 | #include 9 | #include 10 | #endif // __x86_64__ or not 11 | 12 | #include // fprintf 13 | #include // exit 14 | 15 | #elif _WIN32 16 | # include 17 | # include 18 | #else 19 | # include 20 | # include 21 | # include 22 | # include 23 | #endif 24 | 25 | 26 | // This uses the cycle counter of the processor. Different 27 | // processors in the system will have different values for this. If 28 | // you process moves across processors, then the delta time you 29 | // measure will likely be incorrect. This is mostly for fine 30 | // grained measurements where the process is likely to be on the 31 | // same processor. For more global things you should use the 32 | // Time interface. 33 | 34 | // Also note that if you processors' speeds change (i.e. processors 35 | // scaling) or if you are in a heterogenous environment, you will 36 | // likely get spurious results. 37 | class CycleTimer { 38 | public: 39 | typedef unsigned long long SysClock; 40 | 41 | ////////// 42 | // Return the current CPU time, in terms of clock ticks. 43 | // Time zero is at some arbitrary point in the past. 44 | static SysClock currentTicks() { 45 | #if defined(__APPLE__) && !defined(__x86_64__) 46 | return mach_absolute_time(); 47 | #elif defined(_WIN32) 48 | LARGE_INTEGER qwTime; 49 | QueryPerformanceCounter(&qwTime); 50 | return qwTime.QuadPart; 51 | #elif defined(__x86_64__) 52 | unsigned int a, d; 53 | asm volatile("rdtsc" : "=a" (a), "=d" (d)); 54 | return static_cast(a) | 55 | (static_cast(d) << 32); 56 | #elif defined(__ARM_NEON__) && 0 // mrc requires superuser. 57 | unsigned int val; 58 | asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r"(val)); 59 | return val; 60 | #else 61 | timespec spec; 62 | clock_gettime(CLOCK_THREAD_CPUTIME_ID, &spec); 63 | return CycleTimer::SysClock(static_cast(spec.tv_sec) * 1e9 + static_cast(spec.tv_nsec)); 64 | #endif 65 | } 66 | 67 | ////////// 68 | // Return the current CPU time, in terms of seconds. 69 | // This is slower than currentTicks(). Time zero is at 70 | // some arbitrary point in the past. 71 | static double currentSeconds() { 72 | return currentTicks() * secondsPerTick(); 73 | } 74 | 75 | ////////// 76 | // Return the conversion from seconds to ticks. 77 | static double ticksPerSecond() { 78 | return 1.0/secondsPerTick(); 79 | } 80 | 81 | static const char* tickUnits() { 82 | #if defined(__APPLE__) && !defined(__x86_64__) 83 | return "ns"; 84 | #elif defined(__WIN32__) || defined(__x86_64__) 85 | return "cycles"; 86 | #else 87 | return "ns"; // clock_gettime 88 | #endif 89 | } 90 | 91 | ////////// 92 | // Return the conversion from ticks to seconds. 93 | static double secondsPerTick() { 94 | static bool initialized = false; 95 | static double secondsPerTick_val; 96 | if (initialized) return secondsPerTick_val; 97 | #if defined(__APPLE__) 98 | #ifdef __x86_64__ 99 | int args[] = {CTL_HW, HW_CPU_FREQ}; 100 | unsigned int Hz; 101 | size_t len = sizeof(Hz); 102 | if (sysctl(args, 2, &Hz, &len, NULL, 0) != 0) { 103 | fprintf(stderr, "Failed to initialize secondsPerTick_val!\n"); 104 | exit(-1); 105 | } 106 | secondsPerTick_val = 1.0 / (double) Hz; 107 | #else 108 | mach_timebase_info_data_t time_info; 109 | mach_timebase_info(&time_info); 110 | 111 | // Scales to nanoseconds without 1e-9f 112 | secondsPerTick_val = (1e-9*static_cast(time_info.numer))/ 113 | static_cast(time_info.denom); 114 | #endif // x86_64 or not 115 | #elif defined(_WIN32) 116 | LARGE_INTEGER qwTicksPerSec; 117 | QueryPerformanceFrequency(&qwTicksPerSec); 118 | secondsPerTick_val = 1.0/static_cast(qwTicksPerSec.QuadPart); 119 | #else 120 | FILE *fp = fopen("/proc/cpuinfo","r"); 121 | char input[1024]; 122 | if (!fp) { 123 | fprintf(stderr, "CycleTimer::resetScale failed: couldn't find /proc/cpuinfo."); 124 | exit(-1); 125 | } 126 | // In case we don't find it, e.g. on the N900 127 | secondsPerTick_val = 1e-9; 128 | while (!feof(fp) && fgets(input, 1024, fp)) { 129 | // NOTE(boulos): Because reading cpuinfo depends on dynamic 130 | // frequency scaling it's better to read the @ sign first 131 | float GHz, MHz; 132 | if (strstr(input, "model name")) { 133 | char* at_sign = strstr(input, "@"); 134 | if (at_sign) { 135 | char* after_at = at_sign + 1; 136 | char* GHz_str = strstr(after_at, "GHz"); 137 | char* MHz_str = strstr(after_at, "MHz"); 138 | if (GHz_str) { 139 | *GHz_str = '\0'; 140 | if (1 == sscanf(after_at, "%f", &GHz)) { 141 | //printf("GHz = %f\n", GHz); 142 | secondsPerTick_val = 1e-9f / GHz; 143 | break; 144 | } 145 | } else if (MHz_str) { 146 | *MHz_str = '\0'; 147 | if (1 == sscanf(after_at, "%f", &MHz)) { 148 | //printf("MHz = %f\n", MHz); 149 | secondsPerTick_val = 1e-6f / MHz; 150 | break; 151 | } 152 | } 153 | } 154 | } else if (1 == sscanf(input, "cpu MHz : %f", &MHz)) { 155 | //printf("MHz = %f\n", MHz); 156 | secondsPerTick_val = 1e-6f / MHz; 157 | break; 158 | } 159 | } 160 | fclose(fp); 161 | #endif 162 | 163 | initialized = true; 164 | return secondsPerTick_val; 165 | } 166 | 167 | ////////// 168 | // Return the conversion from ticks to milliseconds. 169 | static double msPerTick() { 170 | return secondsPerTick() * 1000.0; 171 | } 172 | 173 | private: 174 | CycleTimer(); 175 | }; 176 | 177 | #endif // #ifndef _SYRAH_CYCLE_TIMER_H_ 178 | -------------------------------------------------------------------------------- /wheels/lockfreehash/cuckoo/hash_table.h: -------------------------------------------------------------------------------- 1 | #ifndef HASH_TABLE 2 | #define HASH_TABLE 3 | 4 | #include 5 | 6 | struct Hash_table { 7 | virtual std::pair search(int key) = 0; 8 | virtual void insert(int key, int val) = 0; 9 | virtual void remove(int key) = 0; 10 | }; 11 | 12 | #endif 13 | -------------------------------------------------------------------------------- /wheels/lockfreehash/cuckoo/lockfree_hash_table.h: -------------------------------------------------------------------------------- 1 | #ifndef LOCKFREE_HASH_TABLE 2 | #define LOCKFREE_HASH_TABLE 3 | 4 | #define MAX_BUF 256 5 | 6 | #include "hash_table.h" 7 | #include 8 | #include 9 | 10 | struct Hash_entry { 11 | int key; 12 | int val; 13 | }; 14 | 15 | // Alternate count_ptr definition using unused bits 16 | typedef Hash_entry* Count_ptr; 17 | 18 | enum Find_result { FIRST, SECOND, NIL }; 19 | 20 | struct Lockfree_hash_table { 21 | Lockfree_hash_table(int capacity, int thread_count); 22 | ~Lockfree_hash_table(); 23 | 24 | std::pair search(int key, int tid); 25 | void insert(int key, int val, int tid); 26 | void remove(int key, int tid); 27 | 28 | private: 29 | Count_ptr *table[2]; 30 | int size1; 31 | int size2; 32 | 33 | std::vector> rlist; 34 | std::vector rcount; 35 | std::vector> hp_rec; 36 | 37 | int hash1(int key); 38 | int hash2(int key); 39 | bool check_counter(int ts1, int ts2, int ts1x, int ts2x); 40 | Find_result find(int key, Count_ptr &ptr1, Count_ptr &ptr2, int tid); 41 | bool relocate(int which, int index, int tid); 42 | void help_relocate(int which, int index, bool initiator, int tid); 43 | void del_dup(int idx1, Count_ptr ptr1, int idx2, Count_ptr ptr2, int tid); 44 | 45 | void retire_node(Hash_entry* node, int tid); 46 | void scan(int tid); 47 | }; 48 | #endif 49 | -------------------------------------------------------------------------------- /wheels/lockfreehash/cuckoo/main.cpp: -------------------------------------------------------------------------------- 1 | #include "benchmark_unordered_map.h" 2 | #include "benchmark_lockfree_ht.h" 3 | //#include "benchmark_tbb.h" 4 | 5 | #include "thread_service.h" 6 | #include "cycle_timer.h" 7 | 8 | #include 9 | 10 | #include 11 | #include 12 | #include 13 | #include 14 | 15 | #define DEFAULT_OP_COUNT 2000000 16 | #define DEFAULT_THREAD_COUNT 24 17 | #define DEFAULT_READ_PERCENT 90 18 | #define DEFAULT_LOAD_FACTOR 40 19 | #define CAPACITY 8000016 20 | 21 | int main(int argc, char *argv[]) 22 | { 23 | char c; 24 | int op_count = DEFAULT_OP_COUNT; 25 | int num_threads = DEFAULT_THREAD_COUNT; 26 | int read_percent = DEFAULT_READ_PERCENT; 27 | int load_factor = DEFAULT_LOAD_FACTOR; 28 | 29 | char *out_file = NULL; 30 | 31 | // Parse cmd args 32 | while ((c = getopt(argc, argv, "n:t:or:hl:")) != -1) 33 | { 34 | switch (c) 35 | { 36 | case 'n': 37 | op_count = atoi(optarg); 38 | break; 39 | case 't': 40 | printf("Here"); 41 | num_threads = atoi(optarg); 42 | break; 43 | case 'o': 44 | out_file = optarg; 45 | break; 46 | case 'r': 47 | read_percent = atoi(optarg); 48 | break; 49 | case 'l': 50 | load_factor = atoi(optarg); 51 | break; 52 | case 'h': 53 | printf("Options: \n" 54 | "-n num_elements \n" 55 | "-t num_threads \n" 56 | "-l load_factor \n" 57 | "-r read_percent \n" 58 | "-o output_file \n"); 59 | break; 60 | default: 61 | break; 62 | } 63 | } 64 | 65 | int rweight = read_percent; 66 | int idweight = 100 - read_percent; 67 | double lfactor = load_factor / 100.0; 68 | 69 | printf("%d", num_threads); 70 | 71 | // Run tests 72 | std::cout << "*** STARTING Benchmark ***" << std::endl; 73 | std::cout << "Parameters: " << std::endl; 74 | std::cout << "\t" << "op_count : " << op_count << std::endl; 75 | std::cout << "\t" << "num_threads : " << num_threads << std::endl; 76 | std::cout << "\t" << "load_factor : " << load_factor << "%" << std::endl; 77 | std::cout << "\t" << "read_percent : " << read_percent << "%" << std::endl; 78 | 79 | BenchmarkUnorderedMap benchmark_unordered_map(op_count, CAPACITY, rweight, idweight, lfactor); 80 | benchmark_unordered_map.run(); 81 | 82 | // BenchmarkTBB benchmark_tbb(op_count, CAPACITY, rweight, idweight, num_threads, lfactor); 83 | // benchmark_tbb.run(); 84 | 85 | BenchmarkLockFreeHT benchmark_lockfree_ht(op_count, CAPACITY, rweight, idweight, num_threads, lfactor); 86 | benchmark_lockfree_ht.run(); 87 | 88 | } 89 | -------------------------------------------------------------------------------- /wheels/lockfreehash/cuckoo/thread_service.h: -------------------------------------------------------------------------------- 1 | #ifndef THREAD_SERVICE 2 | #define THREAD_SERVICE 3 | 4 | #include 5 | #include 6 | 7 | struct WorkerArgs 8 | { 9 | int num_elems; 10 | // R/I/D weights, normalized to 100 11 | int rweight; 12 | int iweight; 13 | int dweight; 14 | void* ht_p; 15 | 16 | bool remove; 17 | int tid; 18 | int start; 19 | int* elems; 20 | }; 21 | 22 | template 23 | void* thread_service(void* threadArgs) 24 | { 25 | WorkerArgs* args = static_cast(threadArgs); 26 | 27 | std::random_device rd; 28 | std::mt19937 mt(rd()); 29 | std::uniform_int_distribution rng; 30 | 31 | std::array weights; 32 | weights[0] = args->rweight; 33 | weights[1] = args->iweight; 34 | weights[2] = args->dweight; 35 | 36 | std::default_random_engine g; 37 | std::discrete_distribution drng(weights.begin(), weights.end()); 38 | 39 | int tid = args->tid; 40 | int num_elems = args->num_elems; 41 | T* ht_p = static_cast(args->ht_p); 42 | 43 | for (int i = 0; i < num_elems; i++) 44 | { 45 | // Key, Value pair 46 | int k = rng(mt); 47 | int v = rng(mt); 48 | // Action : 0 -> Search, 1 -> Insert, 2 -> Remove 49 | int a = drng(g); 50 | 51 | if (a == 0) 52 | ht_p->search(k, tid); 53 | else if (a == 1) 54 | ht_p->insert(k, v, tid); 55 | else 56 | ht_p->remove(k, tid); 57 | } 58 | } 59 | 60 | template 61 | void* thread_service_low_contention(void* threadArgs) 62 | { 63 | WorkerArgs* args = static_cast(threadArgs); 64 | 65 | std::random_device rd; 66 | std::mt19937 mt(rd()); 67 | std::uniform_int_distribution rng; 68 | 69 | std::array weights; 70 | weights[0] = args->rweight; 71 | weights[1] = args->iweight; 72 | weights[2] = args->dweight; 73 | 74 | std::default_random_engine g; 75 | std::discrete_distribution drng(weights.begin(), weights.end()); 76 | 77 | int tid = args->tid; 78 | int num_elems = args->num_elems; 79 | T* ht_p = static_cast(args->ht_p); 80 | 81 | int *keys = (args->elems + args->start); 82 | 83 | int start = 0; 84 | int end = 0; 85 | for (int i = 0; i < num_elems; i++) 86 | { 87 | // Action : 0 -> Search, 1 -> Insert, 2 -> Remove 88 | int a = drng(g); 89 | 90 | if (start == end || a == 1) 91 | { 92 | int k = rng(mt) % num_elems + tid * num_elems; 93 | keys[end++] = k; 94 | ht_p->insert(k, k, tid); 95 | } 96 | else if (a == 0) 97 | { 98 | int k = rng(mt) % (end - start) + start; 99 | ht_p->search(k, tid); 100 | } 101 | else 102 | { 103 | int k = keys[start++]; 104 | ht_p->remove(k, tid); 105 | } 106 | } 107 | } 108 | 109 | template 110 | void* thread_service_high_contention(void* threadArgs) 111 | { 112 | WorkerArgs* args = static_cast(threadArgs); 113 | 114 | std::random_device rd; 115 | std::mt19937 mt(rd()); 116 | std::uniform_int_distribution rng; 117 | 118 | std::array weights; 119 | weights[0] = args->rweight; 120 | weights[1] = args->iweight; 121 | weights[2] = args->dweight; 122 | 123 | std::default_random_engine g; 124 | std::discrete_distribution drng(weights.begin(), weights.end()); 125 | 126 | int tid = args->tid; 127 | int num_elems = args->num_elems; 128 | T* ht_p = static_cast(args->ht_p); 129 | 130 | for (int i = 0; i < num_elems; i++) 131 | { 132 | ht_p->search(0, tid); 133 | } 134 | } 135 | 136 | template 137 | void* thread_insert(void* threadArgs) 138 | { 139 | WorkerArgs* args = static_cast(threadArgs); 140 | int* elems = args->elems; 141 | T* ht_p = static_cast(args->ht_p); 142 | int start = args->start; 143 | int num_elems = args->num_elems; 144 | int tid = args->tid; 145 | 146 | for (int i = start; i < start + num_elems; i++) 147 | { 148 | ht_p->insert(elems[i], elems[i], tid); 149 | } 150 | 151 | } 152 | 153 | template 154 | void* thread_remove(void* threadArgs) 155 | { 156 | WorkerArgs* args = static_cast(threadArgs); 157 | int* elems = args->elems; 158 | T* ht_p = static_cast(args->ht_p); 159 | int start = args->start; 160 | int num_elems = args->num_elems; 161 | int tid = args->tid; 162 | bool remove = args->remove; 163 | 164 | std::random_device rd; 165 | std::mt19937 mt(rd()); 166 | std::uniform_int_distribution rng(0, 200000 - 1); 167 | 168 | for (int i = start; i < start + num_elems; i++) 169 | { 170 | if (remove) 171 | ht_p->remove(elems[i], tid); 172 | else 173 | ht_p->search(elems[rng(mt)], tid); 174 | } 175 | 176 | } 177 | 178 | #endif 179 | -------------------------------------------------------------------------------- /wheels/lockfreehash/lprobe/Makefile: -------------------------------------------------------------------------------- 1 | all: 2 | g++ main.cc -std=c++14 -mcx16 -march=native -pthread 3 | clean: 4 | rm -rf a.out 5 | -------------------------------------------------------------------------------- /wheels/lockfreehash/lprobe/alloc.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | namespace pbbs { 4 | void* my_alloc(size_t); 5 | void my_free(void*); 6 | } 7 | 8 | #include 9 | #include 10 | #include 11 | #include "utilities.h" 12 | #include "concurrent_stack.h" 13 | #include "utilities.h" 14 | #include "block_allocator.h" 15 | #include "memory_size.h" 16 | #include "get_time.h" 17 | 18 | namespace pbbs { 19 | 20 | #if defined(__APPLE__) // a little behind the times 21 | void* aligned_alloc(size_t, size_t n) {return malloc(n);} 22 | #endif 23 | 24 | 25 | // **************************************** 26 | // pool_allocator 27 | // **************************************** 28 | 29 | // Allocates headerless blocks from pools of different sizes. 30 | // A vector of pool sizes is given to the constructor. 31 | // Sizes must be at least 8, and must increase. 32 | // For pools of small blocks (below large_threshold) each thread keeps a 33 | // thread local list of elements from each pool using the 34 | // block_allocator. 35 | // For pools of large blocks there is only one shared pool for each. 36 | struct pool_allocator { 37 | 38 | private: 39 | static const size_t large_align = 64; 40 | static const size_t large_threshold = (1 << 20); 41 | size_t num_buckets; 42 | size_t num_small; 43 | size_t max_small; 44 | size_t max_size; 45 | std::atomic large_allocated{0}; 46 | 47 | concurrent_stack* large_buckets; 48 | struct block_allocator *small_allocators; 49 | std::vector sizes; 50 | 51 | void* allocate_large(size_t n) { 52 | 53 | size_t bucket = num_small; 54 | size_t alloc_size; 55 | 56 | if (n <= max_size) { 57 | while (n > sizes[bucket]) bucket++; 58 | maybe r = large_buckets[bucket-num_small].pop(); 59 | if (r) return *r; 60 | alloc_size = sizes[bucket]; 61 | } else alloc_size = n; 62 | 63 | void* a = (void*) aligned_alloc(large_align, alloc_size); 64 | if (a == NULL) throw std::bad_alloc(); 65 | 66 | large_allocated += n; 67 | return a; 68 | } 69 | 70 | void deallocate_large(void* ptr, size_t n) { 71 | if (n > max_size) { 72 | free(ptr); 73 | large_allocated -= n; 74 | } else { 75 | size_t bucket = num_small; 76 | while (n > sizes[bucket]) bucket++; 77 | large_buckets[bucket-num_small].push(ptr); 78 | } 79 | } 80 | 81 | const size_t small_alloc_block_size = (1 << 20); 82 | 83 | public: 84 | ~pool_allocator() { 85 | for (size_t i=0; i < num_small; i++) 86 | small_allocators[i].~block_allocator(); 87 | free(small_allocators); 88 | clear(); 89 | delete[] large_buckets; 90 | } 91 | 92 | pool_allocator() {} 93 | 94 | pool_allocator(std::vector const &sizes) : sizes(sizes) { 95 | timer t; 96 | num_buckets = sizes.size(); 97 | max_size = sizes[num_buckets-1]; 98 | num_small = 0; 99 | while (sizes[num_small] < large_threshold && num_small < num_buckets) 100 | num_small++; 101 | max_small = (num_small > 0) ? sizes[num_small - 1] : 0; 102 | 103 | large_buckets = new concurrent_stack[num_buckets-num_small]; 104 | 105 | small_allocators = (struct block_allocator*) 106 | malloc(num_buckets * sizeof(struct block_allocator)); 107 | size_t prev_bucket_size = 0; 108 | 109 | for (size_t i = 0; i < num_small; i++) { 110 | size_t bucket_size = sizes[i]; 111 | if (bucket_size < 8) 112 | throw std::invalid_argument("for small_allocator, bucket sizes must be at least 8"); 113 | if (!(bucket_size > prev_bucket_size)) 114 | throw std::invalid_argument("for small_allocator, bucket sizes must increase"); 115 | prev_bucket_size = bucket_size; 116 | new (static_cast(std::addressof(small_allocators[i]))) 117 | block_allocator(bucket_size, 0, small_alloc_block_size - 64); 118 | } 119 | } 120 | 121 | void* allocate(size_t n) { 122 | if (n > max_small) return allocate_large(n); 123 | size_t bucket = 0; 124 | while (n > sizes[bucket]) bucket++; 125 | return small_allocators[bucket].alloc(); 126 | } 127 | 128 | void deallocate(void* ptr, size_t n) { 129 | if (n > max_small) deallocate_large(ptr, n); 130 | else { 131 | size_t bucket = 0; 132 | while (n > sizes[bucket]) bucket++; 133 | small_allocators[bucket].free(ptr); 134 | } 135 | } 136 | 137 | // allocate, touch, and free to make sure space for small blocks is paged in 138 | void reserve(size_t bytes) { 139 | size_t bc = bytes/small_alloc_block_size; 140 | std::vector h(bc); 141 | parallel_for(0, bc, [&] (size_t i) { 142 | h[i] = allocate(small_alloc_block_size); 143 | }, 1); 144 | parallel_for(0, bc, [&] (size_t i) { 145 | for (size_t j=0; j < small_alloc_block_size; j += (1 << 12)) 146 | ((char*) h[i])[j] = 0; 147 | }, 1); 148 | for (size_t i=0; i < bc; i++) 149 | deallocate(h[i], small_alloc_block_size); 150 | } 151 | 152 | void print_stats() { 153 | size_t total_a = 0; 154 | size_t total_u = 0; 155 | for (size_t i = 0; i < num_small; i++) { 156 | size_t bucket_size = sizes[i]; 157 | size_t allocated = small_allocators[i].num_allocated_blocks(); 158 | size_t used = small_allocators[i].num_used_blocks(); 159 | total_a += allocated * bucket_size; 160 | total_u += used * bucket_size; 161 | cout << "size = " << bucket_size << ", allocated = " << allocated 162 | << ", used = " << used << endl; 163 | } 164 | cout << "Large allocated = " << large_allocated << endl; 165 | cout << "Total bytes allocated = " << total_a + large_allocated << endl; 166 | cout << "Total bytes used = " << total_u << endl; 167 | } 168 | 169 | void clear() { 170 | for (size_t i = num_small; i < num_buckets; i++) { 171 | maybe r = large_buckets[i-num_small].pop(); 172 | while (r) { 173 | large_allocated -= sizes[i]; 174 | free(*r); 175 | r = large_buckets[i-num_small].pop(); 176 | } 177 | } 178 | } 179 | }; 180 | 181 | // **************************************** 182 | // default_allocator (uses powers of two as pool sizes) 183 | // **************************************** 184 | 185 | // these are bucket sizes used by the default allocator. 186 | std::vector default_sizes() { 187 | size_t log_min_size = 4; 188 | size_t log_max_size = pbbs::log2_up(getMemorySize()/64); 189 | 190 | std::vector sizes; 191 | for (size_t i = log_min_size; i <= log_max_size; i++) 192 | sizes.push_back(1 << i); 193 | return sizes; 194 | } 195 | 196 | pool_allocator default_allocator(default_sizes()); 197 | 198 | // **************************************** 199 | // Following Matches the c++ Allocator specification (minimally) 200 | // https://en.cppreference.com/w/cpp/named_req/Allocator 201 | // Can therefore be used for containers, e.g.: 202 | // std::vector> 203 | // **************************************** 204 | 205 | template 206 | struct allocator { 207 | using value_type = T; 208 | T* allocate(size_t n) { 209 | return (T*) default_allocator.allocate(n * sizeof(T)); 210 | } 211 | void deallocate(T* ptr, size_t n) { 212 | default_allocator.deallocate((void*) ptr, n * sizeof(T)); 213 | } 214 | 215 | allocator() = default; 216 | template constexpr allocator(const allocator&) {} 217 | }; 218 | 219 | template 220 | bool operator==(const allocator&, const allocator&) { return true; } 221 | template 222 | bool operator!=(const allocator&, const allocator&) { return false; } 223 | 224 | // **************************************** 225 | // Static allocator for single items of a given type, e.g. 226 | // using long_allocator = type_allocator; 227 | // long* foo = long_allocator::alloc(); 228 | // *foo = (long) 23; 229 | // long_allocator::free(foo); 230 | // Uses block allocator, and is headerless 231 | // **************************************** 232 | 233 | template 234 | class type_allocator { 235 | public: 236 | static constexpr size_t default_alloc_size = 0; 237 | static block_allocator allocator; 238 | static const bool initialized{true}; 239 | static T* alloc() { return (T*) allocator.alloc();} 240 | static void free(T* ptr) {allocator.free((void*) ptr);} 241 | 242 | // for backward compatibility 243 | //static void init(size_t _alloc_size = 0, size_t _list_size=0) {}; 244 | static void init(size_t, size_t) {}; 245 | static void init() {}; 246 | static void reserve(size_t n = default_alloc_size) { 247 | allocator.reserve(n); 248 | } 249 | static void finish() {allocator.clear(); 250 | } 251 | static size_t block_size () {return allocator.block_size();} 252 | static size_t num_allocated_blocks() {return allocator.num_allocated_blocks();} 253 | static size_t num_used_blocks() {return allocator.num_used_blocks();} 254 | static size_t num_used_bytes() {return num_used_blocks() * block_size();} 255 | static void print_stats() {allocator.print_stats();} 256 | }; 257 | 258 | template 259 | block_allocator type_allocator::allocator = block_allocator(sizeof(T)); 260 | 261 | // **************************************** 262 | // my_alloc and my_free (add size tags) 263 | // **************************************** 264 | // ifdefed to either use malloc or the pbbs allocator 265 | // **************************************** 266 | 267 | #ifdef USEMALLOC 268 | 269 | #include 270 | 271 | struct __mallopt { 272 | __mallopt() { 273 | mallopt(M_MMAP_MAX,0); 274 | mallopt(M_TRIM_THRESHOLD,-1); 275 | } 276 | }; 277 | 278 | __mallopt __mallopt_var; 279 | 280 | inline void* my_alloc(size_t i) {return malloc(i);} 281 | inline void my_free(void* p) {free(p);} 282 | void allocator_clear() {} 283 | void allocator_reserve(size_t bytes) {} 284 | 285 | #else 286 | 287 | constexpr size_t size_offset = 1; // in size_t sized words 288 | 289 | // needs to be at least size_offset * size_offset(size_t) 290 | inline size_t header_size(size_t n) { // in bytes 291 | return (n >= 1024) ? 64 : (n & 15) ? 8 : (n & 63) ? 16 : 64; 292 | } 293 | 294 | // allocates and tags with a header (8, 16 or 64 bytes) that contains the size 295 | void* my_alloc(size_t n) { 296 | size_t hsize = header_size(n); 297 | void* ptr; 298 | ptr = default_allocator.allocate(n + hsize); 299 | void* r = (void*) (((char*) ptr) + hsize); 300 | *(((size_t*) r)-size_offset) = n; // puts size in header 301 | return r; 302 | } 303 | 304 | // reads the size, offsets the header and frees 305 | void my_free(void *ptr) { 306 | size_t n = *(((size_t*) ptr)-size_offset); 307 | size_t hsize = header_size(n); 308 | if (hsize > (1ul << 48)) { 309 | cout << "corrupted header in my_free" << endl; 310 | throw std::bad_alloc(); 311 | } 312 | default_allocator.deallocate((void*) (((char*) ptr) - hsize), n + hsize); 313 | } 314 | 315 | void allocator_clear() { 316 | default_allocator.clear(); 317 | } 318 | 319 | void allocator_reserve(size_t bytes) { 320 | default_allocator.reserve(bytes); 321 | } 322 | #endif 323 | 324 | // **************************************** 325 | // common across allocators (key routines used by sequences) 326 | // **************************************** 327 | 328 | // Does not initialize the array 329 | template 330 | E* new_array_no_init(size_t n) { 331 | return (E*) my_alloc(n * sizeof(E)); 332 | } 333 | 334 | // Initializes in parallel 335 | template 336 | E* new_array(size_t n) { 337 | E* r = new_array_no_init(n); 338 | if (!std::is_trivially_default_constructible::value) 339 | parallel_for(0, n, [&] (size_t i) { 340 | new ((void*) (r+i)) E;}); 341 | return r; 342 | } 343 | 344 | inline void free_array(void* a) { 345 | my_free(a); 346 | } 347 | 348 | // Destructs in parallel 349 | template 350 | void delete_array(E* A, size_t n) { 351 | // C++14 -- supported by gnu C++11 352 | if (!std::is_trivially_destructible::value) 353 | parallel_for(0, n, [&] (size_t i) { 354 | A[i].~E();}); 355 | else if (std::is_pointer::value) 356 | parallel_for(0, n, [&] (size_t i) { 357 | if (A[i] != nullptr) delete A[i];}); 358 | my_free(A); 359 | } 360 | } 361 | -------------------------------------------------------------------------------- /wheels/lockfreehash/lprobe/benchmark_lprobe.h: -------------------------------------------------------------------------------- 1 | #ifndef BENCHMARK_LOCKFREE_HT 2 | #define BENCHMARK_LOCKFREE_HT 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | #include "cycle_timer.h" 14 | #include "hash_table.h" 15 | #include "thread_service.h" 16 | 17 | #define NUM_ITERS 3 18 | #define MAX_THREADS 24 19 | 20 | #define C_NUM_ELEMS 76800*24 21 | #include "data.h" 22 | using namespace pbbs; 23 | 24 | 25 | 26 | class BenchmarkLockFreeHT 27 | { 28 | public: 29 | BenchmarkLockFreeHT(int op_count, int capacity, 30 | int rweight, int idweight, 31 | int thread_count, 32 | double load_factor); 33 | 34 | void benchmark_correctness(); 35 | void benchmark_hp(); 36 | void benchmark_all(); 37 | void run(); 38 | 39 | private: 40 | int m_rweight; 41 | int m_idweight; 42 | 43 | int m_thread_count; 44 | int m_op_count; 45 | int m_capacity; 46 | double m_load_factor; 47 | }; 48 | 49 | BenchmarkLockFreeHT::BenchmarkLockFreeHT(int op_count, int capacity, 50 | int rweight, int idweight, 51 | int thread_count, double load_factor) 52 | { 53 | std::cout << "*** BENCHMARKING LockFreeHT ***" << std::endl; 54 | m_op_count = op_count; 55 | m_load_factor = load_factor; 56 | m_capacity = capacity; 57 | m_thread_count = thread_count; 58 | 59 | m_rweight = rweight; 60 | m_idweight = idweight; 61 | } 62 | 63 | void BenchmarkLockFreeHT::benchmark_correctness() 64 | { 65 | bool correct = true; 66 | 67 | //Lockfree_hash_table ht(2 * C_NUM_ELEMS, m_thread_count); 68 | Table ht(2*C_NUM_ELEMS, hashKV(), 1.3); 69 | std::unordered_map map; 70 | map.reserve(2 * C_NUM_ELEMS); 71 | 72 | std::random_device rd; 73 | std::mt19937 mt(rd()); 74 | std::uniform_int_distribution rng; 75 | 76 | int elems[C_NUM_ELEMS]; 77 | for (int i = 0; i < C_NUM_ELEMS; i++) 78 | { 79 | //int k = rng(mt); 80 | int k = 100; 81 | elems[i] = k; 82 | map[k] = k; 83 | } 84 | //adding err 85 | //elems[5*C_NUM_ELEMS/24 + 34] = elems[6*C_NUM_ELEMS/24 + 49]; 86 | //elems[22*C_NUM_ELEMS/24 + 199] = elems[9*C_NUM_ELEMS/24 + 347]; 87 | //elems[21*C_NUM_ELEMS/24 + 199] = elems[9*C_NUM_ELEMS/24 + 347]; 88 | //elems[19*C_NUM_ELEMS/24 + 199] = elems[9*C_NUM_ELEMS/24 + 347]; 89 | for (int i=0;i<23;i++) 90 | for (int j=0;j<20;j++) 91 | elems[i*C_NUM_ELEMS/24 + 34+j] = 101+i*20+j; 92 | 93 | pthread_t workers[MAX_THREADS]; 94 | WorkerArgs args[MAX_THREADS]; 95 | 96 | for (int i = 0; i < 24; i++) 97 | { 98 | args[i].num_elems = C_NUM_ELEMS / 24; 99 | args[i].ht_p = (void*)&ht; 100 | args[i].elems = elems; 101 | args[i].start = i * (C_NUM_ELEMS / 24); 102 | args[i].tid = i; 103 | 104 | pthread_create(&workers[i], NULL, thread_checkmiss>, (void*)&args[i]); 105 | } 106 | 107 | for (int i = 0; i < 24; i++) 108 | { 109 | pthread_join(workers[i], NULL); 110 | } 111 | 112 | std::cout << "hash table count is " << ht.count() << std::endl; 113 | std::cout << "miss is " << miss << std::endl; 114 | assert(miss==461); 115 | 116 | int count = 0; 117 | for (std::pair e : map) 118 | { 119 | //std::pair r = ht.search(e.first, 0); 120 | struct KV res = ht.find(e.first); 121 | std::pair r; 122 | if (res.k == -1) 123 | r = {-1,false}; 124 | else 125 | r = {res.v,true}; 126 | if (!r.second || e.second != r.first) 127 | { 128 | 129 | std::cout << "\t" << "Expected value, Received value, Received result = " << e.second << " " << r.second << " "<< r.first << std::endl; 130 | correct = false; 131 | count++; 132 | } 133 | } 134 | 135 | std::cout << "\t" << count << "/" << C_NUM_ELEMS << " errors" << std::endl; 136 | 137 | if (correct) 138 | std::cout << "\t" << "Correctness test passed" << std::endl; 139 | else 140 | std::cout << "\t" << "Correctness test failed" << std::endl; 141 | 142 | } 143 | 144 | void BenchmarkLockFreeHT::benchmark_hp() 145 | { 146 | //Lockfree_hash_table ht(400000, m_thread_count); 147 | Table ht(400000, hashKV(), 1.3); 148 | 149 | std::random_device rd; 150 | std::mt19937 mt(rd()); 151 | std::uniform_int_distribution rng; 152 | 153 | std::array weights; 154 | weights[0] = m_rweight; 155 | weights[1] = m_idweight; 156 | weights[2] = m_idweight; 157 | 158 | std::default_random_engine g; 159 | std::discrete_distribution drng(weights.begin(), weights.end()); 160 | 161 | int insert[200000]; 162 | for (int i = 0; i < 200000; i++) 163 | { 164 | int k = rng(mt); 165 | int v = rng(mt); 166 | insert[i] = k; 167 | //ht.insert(k, v, 0); 168 | ht.insert({k,v}); 169 | } 170 | 171 | pthread_t workers[MAX_THREADS]; 172 | WorkerArgs args[MAX_THREADS]; 173 | 174 | int num_elems = 200000 / m_thread_count; 175 | for (int i = 0; i < m_thread_count; i++) 176 | { 177 | args[i].num_elems = num_elems; 178 | args[i].ht_p = (void*)&ht; 179 | args[i].elems = insert; 180 | args[i].start = i * num_elems; 181 | args[i].tid = i; 182 | args[i].remove = i < (m_thread_count / 4); 183 | 184 | pthread_create(&workers[i], NULL, thread_remove>, (void*)&args[i]); 185 | } 186 | 187 | for (int i = 0; i < m_thread_count; i++) 188 | { 189 | pthread_join(workers[i], NULL); 190 | } 191 | 192 | std::cout << "\t" << "Hazard Pointer test passed" << std::endl; 193 | 194 | } 195 | 196 | void BenchmarkLockFreeHT::benchmark_all() 197 | { 198 | // Lockfree_hash_table ht(m_capacity, m_thread_count); 199 | Table ht(m_capacity, hashKV(), 1.3); 200 | 201 | std::random_device rd; 202 | std::mt19937 mt(rd()); 203 | std::uniform_int_distribution rng; 204 | 205 | std::array weights; 206 | weights[0] = m_rweight; 207 | weights[1] = m_idweight; 208 | weights[2] = m_idweight; 209 | 210 | std::default_random_engine g; 211 | std::discrete_distribution drng(weights.begin(), weights.end()); 212 | 213 | // Warm-up table to load factor 214 | int num_warmup = static_cast(static_cast(m_capacity) * m_load_factor); 215 | for (int i = 0; i < num_warmup; i++) 216 | { 217 | int k = rng(mt); 218 | int v = rng(mt); 219 | 220 | //ht.insert(k, v, 0); 221 | ht.insert({k,v}); 222 | } 223 | 224 | // Run benchmark 225 | std::vector results; 226 | for (int iter = 0; iter < NUM_ITERS; iter++) 227 | { 228 | int num_elems = m_op_count / m_thread_count; 229 | pthread_t workers[MAX_THREADS]; 230 | WorkerArgs args[MAX_THREADS]; 231 | 232 | double start = CycleTimer::currentSeconds(); 233 | for (int i = 0; i < m_thread_count; i++) 234 | { 235 | args[i].num_elems = num_elems; 236 | args[i].rweight = m_rweight; 237 | args[i].iweight = m_idweight / 2; 238 | args[i].dweight = m_idweight / 2; 239 | args[i].ht_p = (void*)&ht; 240 | args[i].tid = i; 241 | pthread_create(&workers[i], NULL, thread_service>, (void*)&args[i]); 242 | } 243 | 244 | for (int i = 0; i < m_thread_count; i++) 245 | { 246 | pthread_join(workers[i], NULL); 247 | } 248 | double time = CycleTimer::currentSeconds() - start; 249 | results.push_back(time); 250 | } 251 | 252 | // Publish Results 253 | double best_time = *std::min_element(results.begin(), results.end()); 254 | double avg_time = std::accumulate(results.begin(), results.end(), 0.0) / static_cast(results.size()); 255 | std::cout << "\t" << "Max Throughput: " << m_op_count / best_time / 1000.0 << " ops/ms" << std::endl; 256 | std::cout << "\t" << "Avg Throughput: " << m_op_count / avg_time / 1000.0 << " ops/ms" << std::endl; 257 | 258 | results.clear(); 259 | 260 | int* keys = new int[m_op_count]; 261 | 262 | for (int iter = 0; iter < NUM_ITERS; iter++) 263 | { 264 | int num_elems = m_op_count / m_thread_count; 265 | pthread_t workers[MAX_THREADS]; 266 | WorkerArgs args[MAX_THREADS]; 267 | 268 | double start = CycleTimer::currentSeconds(); 269 | for (int i = 0; i < m_thread_count; i++) 270 | { 271 | args[i].num_elems = num_elems; 272 | args[i].rweight = m_rweight; 273 | args[i].iweight = m_idweight / 2; 274 | args[i].dweight = m_idweight / 2; 275 | args[i].ht_p = (void*)&ht; 276 | args[i].tid = i; 277 | args[i].elems = keys; 278 | args[i].start = i * num_elems; 279 | pthread_create(&workers[i], NULL, thread_service_low_contention>, (void*)&args[i]); 280 | } 281 | 282 | for (int i = 0; i < m_thread_count; i++) 283 | { 284 | pthread_join(workers[i], NULL); 285 | } 286 | double time = CycleTimer::currentSeconds() - start; 287 | results.push_back(time); 288 | } 289 | 290 | // Publish Results 291 | best_time = *std::min_element(results.begin(), results.end()); 292 | avg_time = std::accumulate(results.begin(), results.end(), 0.0) / static_cast(results.size()); 293 | std::cout << "\t" << "Max Throughput (Low): " << m_op_count / best_time / 1000.0 << " ops/ms" << std::endl; 294 | std::cout << "\t" << "Avg Throughput (Low): " << m_op_count / avg_time / 1000.0 << " ops/ms" << std::endl; 295 | 296 | results.clear(); 297 | 298 | for (int iter = 0; iter < NUM_ITERS; iter++) 299 | { 300 | int num_elems = m_op_count / m_thread_count; 301 | pthread_t workers[MAX_THREADS]; 302 | WorkerArgs args[MAX_THREADS]; 303 | 304 | double start = CycleTimer::currentSeconds(); 305 | for (int i = 0; i < m_thread_count; i++) 306 | { 307 | args[i].num_elems = num_elems; 308 | args[i].rweight = m_rweight; 309 | args[i].iweight = m_idweight / 2; 310 | args[i].dweight = m_idweight / 2; 311 | args[i].ht_p = (void*)&ht; 312 | args[i].tid = i; 313 | //ht.insert(0, 0, 0); 314 | ht.insert({0,0}); 315 | pthread_create(&workers[i], NULL, thread_service_high_contention>, (void*)&args[i]); 316 | } 317 | 318 | for (int i = 0; i < m_thread_count; i++) 319 | { 320 | pthread_join(workers[i], NULL); 321 | } 322 | double time = CycleTimer::currentSeconds() - start; 323 | results.push_back(time); 324 | } 325 | 326 | // Publish Results 327 | best_time = *std::min_element(results.begin(), results.end()); 328 | avg_time = std::accumulate(results.begin(), results.end(), 0.0) / static_cast(results.size()); 329 | std::cout << "\t" << "Max Throughput (High): " << m_op_count / best_time / 1000.0 << " ops/ms" << std::endl; 330 | std::cout << "\t" << "Avg Throughput (High): " << m_op_count / avg_time / 1000.0 << " ops/ms" << std::endl; 331 | 332 | 333 | } 334 | 335 | void BenchmarkLockFreeHT::run() 336 | { 337 | benchmark_correctness(); 338 | // benchmark_hp(); 339 | // benchmark_all(); 340 | } 341 | 342 | #endif 343 | -------------------------------------------------------------------------------- /wheels/lockfreehash/lprobe/benchmark_lprobe_ptr.h: -------------------------------------------------------------------------------- 1 | #ifndef BENCHMARK_LOCKFREE_HT 2 | #define BENCHMARK_LOCKFREE_HT 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include "assert.h" 12 | 13 | #include "cycle_timer.h" 14 | #include "hash_table.h" 15 | #include "thread_service_ptr.h" 16 | 17 | #define NUM_ITERS 3 18 | #define MAX_THREADS 24 19 | 20 | #define C_NUM_ELEMS 76800*24 21 | #include "data_ptr.h" 22 | using namespace pbbs; 23 | 24 | 25 | 26 | class BenchmarkLockFreeHT 27 | { 28 | public: 29 | BenchmarkLockFreeHT(int op_count, int capacity, 30 | int rweight, int idweight, 31 | int thread_count, 32 | double load_factor); 33 | 34 | void benchmark_correctness(); 35 | void benchmark_hp(); 36 | void benchmark_all(); 37 | void run(); 38 | 39 | private: 40 | int m_rweight; 41 | int m_idweight; 42 | 43 | int m_thread_count; 44 | int m_op_count; 45 | int m_capacity; 46 | double m_load_factor; 47 | }; 48 | 49 | BenchmarkLockFreeHT::BenchmarkLockFreeHT(int op_count, int capacity, 50 | int rweight, int idweight, 51 | int thread_count, double load_factor) 52 | { 53 | std::cout << "*** BENCHMARKING LockFreeHT ***" << std::endl; 54 | m_op_count = op_count; 55 | m_load_factor = load_factor; 56 | m_capacity = capacity; 57 | m_thread_count = thread_count; 58 | 59 | m_rweight = rweight; 60 | m_idweight = idweight; 61 | } 62 | 63 | void BenchmarkLockFreeHT::benchmark_correctness() 64 | { 65 | bool correct = true; 66 | 67 | //Lockfree_hash_table ht(2 * C_NUM_ELEMS, m_thread_count); 68 | Table ht(2*C_NUM_ELEMS, hashKV(), 1.3); 69 | std::unordered_map map; 70 | map.reserve(2 * C_NUM_ELEMS); 71 | 72 | std::random_device rd; 73 | std::mt19937 mt(rd()); 74 | std::uniform_int_distribution rng; 75 | 76 | int elems[C_NUM_ELEMS]; 77 | for (int i = 0; i < C_NUM_ELEMS; i++) 78 | { 79 | //int k = rng(mt); 80 | int k = 100; 81 | elems[i] = k; 82 | map[k] = k; 83 | } 84 | //adding err 85 | //elems[5*C_NUM_ELEMS/24 + 34] = elems[6*C_NUM_ELEMS/24 + 49]; 86 | //elems[22*C_NUM_ELEMS/24 + 199] = elems[9*C_NUM_ELEMS/24 + 347]; 87 | //elems[21*C_NUM_ELEMS/24 + 199] = elems[9*C_NUM_ELEMS/24 + 347]; 88 | //elems[19*C_NUM_ELEMS/24 + 199] = elems[9*C_NUM_ELEMS/24 + 347]; 89 | for (int i=0;i<23;i++) 90 | for (int j=0;j<20;j++) 91 | elems[i*C_NUM_ELEMS/24 + 34+j] = 101+i*20+j; 92 | 93 | pthread_t workers[MAX_THREADS]; 94 | WorkerArgs args[MAX_THREADS]; 95 | 96 | for (int i = 0; i < 24; i++) 97 | { 98 | args[i].num_elems = C_NUM_ELEMS / 24; 99 | args[i].ht_p = (void*)&ht; 100 | args[i].elems = elems; 101 | args[i].start = i * (C_NUM_ELEMS / 24); 102 | args[i].tid = i; 103 | 104 | pthread_create(&workers[i], NULL, thread_checkmiss>, (void*)&args[i]); 105 | } 106 | 107 | for (int i = 0; i < 24; i++) 108 | { 109 | pthread_join(workers[i], NULL); 110 | } 111 | 112 | 113 | std::cout << "hash table count is " << ht.count() << std::endl; 114 | std::cout << "miss is " << miss << std::endl; 115 | assert(miss==461); 116 | int count = 0; 117 | for (std::pair e : map) 118 | { 119 | //std::pair r = ht.search(e.first, 0); 120 | struct KV *res = ht.find(e.first); 121 | std::pair r; 122 | if (res==nullptr || res->k == -1) 123 | r = {-1,false}; 124 | else 125 | r = {res->v,true}; 126 | if (!r.second || e.second != r.first) 127 | { 128 | 129 | std::cout << "\t" << "Expected value, Received value, Received result = " << e.second << " " << r.second << " "<< r.first << std::endl; 130 | correct = false; 131 | count++; 132 | } 133 | } 134 | 135 | std::cout << "\t" << count << "/" << C_NUM_ELEMS << " errors" << std::endl; 136 | 137 | if (correct) 138 | std::cout << "\t" << "Correctness test passed" << std::endl; 139 | else 140 | std::cout << "\t" << "Correctness test failed" << std::endl; 141 | 142 | } 143 | 144 | void BenchmarkLockFreeHT::benchmark_hp() 145 | { 146 | //Lockfree_hash_table ht(400000, m_thread_count); 147 | Table ht(400000, hashKV(), 1.3); 148 | 149 | std::random_device rd; 150 | std::mt19937 mt(rd()); 151 | std::uniform_int_distribution rng; 152 | 153 | std::array weights; 154 | weights[0] = m_rweight; 155 | weights[1] = m_idweight; 156 | weights[2] = m_idweight; 157 | 158 | std::default_random_engine g; 159 | std::discrete_distribution drng(weights.begin(), weights.end()); 160 | 161 | int insert[200000]; 162 | for (int i = 0; i < 200000; i++) 163 | { 164 | int k = rng(mt); 165 | int v = rng(mt); 166 | insert[i] = k; 167 | //ht.insert(k, v, 0); 168 | ht.insert(new struct KV(k,v)); 169 | } 170 | 171 | pthread_t workers[MAX_THREADS]; 172 | WorkerArgs args[MAX_THREADS]; 173 | 174 | int num_elems = 200000 / m_thread_count; 175 | for (int i = 0; i < m_thread_count; i++) 176 | { 177 | args[i].num_elems = num_elems; 178 | args[i].ht_p = (void*)&ht; 179 | args[i].elems = insert; 180 | args[i].start = i * num_elems; 181 | args[i].tid = i; 182 | args[i].remove = i < (m_thread_count / 4); 183 | 184 | pthread_create(&workers[i], NULL, thread_remove>, (void*)&args[i]); 185 | } 186 | 187 | for (int i = 0; i < m_thread_count; i++) 188 | { 189 | pthread_join(workers[i], NULL); 190 | } 191 | 192 | std::cout << "\t" << "Hazard Pointer test passed" << std::endl; 193 | 194 | } 195 | 196 | void BenchmarkLockFreeHT::benchmark_all() 197 | { 198 | // Lockfree_hash_table ht(m_capacity, m_thread_count); 199 | Table ht(m_capacity, hashKV(), 1.3); 200 | 201 | std::random_device rd; 202 | std::mt19937 mt(rd()); 203 | std::uniform_int_distribution rng; 204 | 205 | std::array weights; 206 | weights[0] = m_rweight; 207 | weights[1] = m_idweight; 208 | weights[2] = m_idweight; 209 | 210 | std::default_random_engine g; 211 | std::discrete_distribution drng(weights.begin(), weights.end()); 212 | 213 | // Warm-up table to load factor 214 | int num_warmup = static_cast(static_cast(m_capacity) * m_load_factor); 215 | for (int i = 0; i < num_warmup; i++) 216 | { 217 | int k = rng(mt); 218 | int v = rng(mt); 219 | 220 | //ht.insert(k, v, 0); 221 | ht.insert(new struct KV(k,v)); 222 | } 223 | 224 | // Run benchmark 225 | std::vector results; 226 | for (int iter = 0; iter < NUM_ITERS; iter++) 227 | { 228 | int num_elems = m_op_count / m_thread_count; 229 | pthread_t workers[MAX_THREADS]; 230 | WorkerArgs args[MAX_THREADS]; 231 | 232 | double start = CycleTimer::currentSeconds(); 233 | for (int i = 0; i < m_thread_count; i++) 234 | { 235 | args[i].num_elems = num_elems; 236 | args[i].rweight = m_rweight; 237 | args[i].iweight = m_idweight / 2; 238 | args[i].dweight = m_idweight / 2; 239 | args[i].ht_p = (void*)&ht; 240 | args[i].tid = i; 241 | pthread_create(&workers[i], NULL, thread_service>, (void*)&args[i]); 242 | } 243 | 244 | for (int i = 0; i < m_thread_count; i++) 245 | { 246 | pthread_join(workers[i], NULL); 247 | } 248 | double time = CycleTimer::currentSeconds() - start; 249 | results.push_back(time); 250 | } 251 | 252 | // Publish Results 253 | double best_time = *std::min_element(results.begin(), results.end()); 254 | double avg_time = std::accumulate(results.begin(), results.end(), 0.0) / static_cast(results.size()); 255 | std::cout << "\t" << "Max Throughput: " << m_op_count / best_time / 1000.0 << " ops/ms" << std::endl; 256 | std::cout << "\t" << "Avg Throughput: " << m_op_count / avg_time / 1000.0 << " ops/ms" << std::endl; 257 | 258 | results.clear(); 259 | 260 | int* keys = new int[m_op_count]; 261 | 262 | for (int iter = 0; iter < NUM_ITERS; iter++) 263 | { 264 | int num_elems = m_op_count / m_thread_count; 265 | pthread_t workers[MAX_THREADS]; 266 | WorkerArgs args[MAX_THREADS]; 267 | 268 | double start = CycleTimer::currentSeconds(); 269 | for (int i = 0; i < m_thread_count; i++) 270 | { 271 | args[i].num_elems = num_elems; 272 | args[i].rweight = m_rweight; 273 | args[i].iweight = m_idweight / 2; 274 | args[i].dweight = m_idweight / 2; 275 | args[i].ht_p = (void*)&ht; 276 | args[i].tid = i; 277 | args[i].elems = keys; 278 | args[i].start = i * num_elems; 279 | pthread_create(&workers[i], NULL, thread_service_low_contention>, (void*)&args[i]); 280 | } 281 | 282 | for (int i = 0; i < m_thread_count; i++) 283 | { 284 | pthread_join(workers[i], NULL); 285 | } 286 | double time = CycleTimer::currentSeconds() - start; 287 | results.push_back(time); 288 | } 289 | 290 | // Publish Results 291 | best_time = *std::min_element(results.begin(), results.end()); 292 | avg_time = std::accumulate(results.begin(), results.end(), 0.0) / static_cast(results.size()); 293 | std::cout << "\t" << "Max Throughput (Low): " << m_op_count / best_time / 1000.0 << " ops/ms" << std::endl; 294 | std::cout << "\t" << "Avg Throughput (Low): " << m_op_count / avg_time / 1000.0 << " ops/ms" << std::endl; 295 | 296 | results.clear(); 297 | 298 | for (int iter = 0; iter < NUM_ITERS; iter++) 299 | { 300 | int num_elems = m_op_count / m_thread_count; 301 | pthread_t workers[MAX_THREADS]; 302 | WorkerArgs args[MAX_THREADS]; 303 | 304 | double start = CycleTimer::currentSeconds(); 305 | for (int i = 0; i < m_thread_count; i++) 306 | { 307 | args[i].num_elems = num_elems; 308 | args[i].rweight = m_rweight; 309 | args[i].iweight = m_idweight / 2; 310 | args[i].dweight = m_idweight / 2; 311 | args[i].ht_p = (void*)&ht; 312 | args[i].tid = i; 313 | //ht.insert(0, 0, 0); 314 | ht.insert(new struct KV(0,0)); 315 | pthread_create(&workers[i], NULL, thread_service_high_contention>, (void*)&args[i]); 316 | } 317 | 318 | for (int i = 0; i < m_thread_count; i++) 319 | { 320 | pthread_join(workers[i], NULL); 321 | } 322 | double time = CycleTimer::currentSeconds() - start; 323 | results.push_back(time); 324 | } 325 | 326 | // Publish Results 327 | best_time = *std::min_element(results.begin(), results.end()); 328 | avg_time = std::accumulate(results.begin(), results.end(), 0.0) / static_cast(results.size()); 329 | std::cout << "\t" << "Max Throughput (High): " << m_op_count / best_time / 1000.0 << " ops/ms" << std::endl; 330 | std::cout << "\t" << "Avg Throughput (High): " << m_op_count / avg_time / 1000.0 << " ops/ms" << std::endl; 331 | 332 | 333 | } 334 | 335 | void BenchmarkLockFreeHT::run() 336 | { 337 | benchmark_correctness(); 338 | // benchmark_hp(); 339 | // benchmark_all(); 340 | } 341 | 342 | #endif 343 | -------------------------------------------------------------------------------- /wheels/lockfreehash/lprobe/block_allocator.h: -------------------------------------------------------------------------------- 1 | // This code is part of the Problem Based Benchmark Suite (PBBS) 2 | // Copyright (c) 2016 Guy Blelloch, Daniel Ferizovic, and the PBBS team 3 | // 4 | // Permission is hereby granted, free of charge, to any person obtaining a 5 | // copy of this software and associated documentation files (the 6 | // "Software"), to deal in the Software without restriction, including 7 | // without limitation the rights (to use, copy, modify, merge, publish, 8 | // distribute, sublicense, and/or sell copies of the Software, and to 9 | // permit persons to whom the Software is furnished to do so, subject to 10 | // the following conditions: 11 | // 12 | // The above copyright notice and this permission notice shall be included 13 | // in all copies or substantial portions of the Software. 14 | // 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 16 | // OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 17 | // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 18 | // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 19 | // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 20 | // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 21 | // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 22 | 23 | // A concurrent allocator for any fixed type T 24 | // Keeps a local pool per processor 25 | // Grabs list_size elements from a global pool if empty, and 26 | // Returns list_size elements to the global pool when local pool=2*list_size 27 | // Keeps track of number of allocated elements. 28 | // Probably more efficient than a general purpose allocator 29 | 30 | #pragma once 31 | 32 | #include 33 | #include 34 | #include 35 | #include 36 | #include "concurrent_stack.h" 37 | #include "utilities.h" 38 | #include "memory_size.h" 39 | 40 | struct block_allocator { 41 | private: 42 | 43 | static const size_t default_list_bytes = (1 << 22) - 64; // in bytes 44 | static const size_t pad_size = 256; 45 | 46 | struct block { 47 | block* next; 48 | }; 49 | 50 | using block_p = block*; 51 | 52 | struct alignas(64) thread_list { 53 | size_t sz; 54 | block_p head; 55 | block_p mid; 56 | char cache_line[pad_size]; 57 | thread_list() : sz(0), head(NULL) {}; 58 | }; 59 | 60 | bool initialized{false}; 61 | block_p initialize_list(block_p); 62 | block_p get_list(); 63 | concurrent_stack pool_roots; 64 | concurrent_stack global_stack; 65 | thread_list* local_lists; 66 | 67 | size_t list_length; 68 | size_t max_blocks; 69 | size_t block_size_; 70 | //std::atomic blocks_allocated; 71 | size_t blocks_allocated; 72 | char* allocate_blocks(size_t num_blocks); 73 | 74 | public: 75 | static int thread_count; 76 | void* alloc(); 77 | void free(void*); 78 | void reserve(size_t n); 79 | void clear(); 80 | void print_stats(); 81 | size_t block_size () {return block_size_;} 82 | size_t num_allocated_blocks() {return blocks_allocated;} 83 | size_t num_used_blocks(); 84 | 85 | ~block_allocator(); 86 | block_allocator(size_t block_size, 87 | size_t reserved_blocks = 0, 88 | size_t list_length_ = 0, 89 | size_t max_blocks_ = 0); 90 | block_allocator() {}; 91 | }; 92 | 93 | int block_allocator::thread_count = num_workers(); 94 | 95 | // Allocate a new list of list_length elements 96 | 97 | auto block_allocator::initialize_list(block_p start) -> block_p { 98 | parallel_for (0, list_length - 1, [&] (size_t i) { 99 | block_p p = (block_p) (((char*) start) + i * block_size_); 100 | p->next = (block_p) (((char*) p) + block_size_); 101 | }, 1000, true); 102 | block_p last = (block_p) (((char*) start) + (list_length-1) * block_size_); 103 | last->next = NULL; 104 | return start; 105 | } 106 | 107 | size_t block_allocator::num_used_blocks() { 108 | size_t free_blocks = global_stack.size()*list_length; 109 | for (int i = 0; i < thread_count; ++i) 110 | free_blocks += local_lists[i].sz; 111 | return blocks_allocated - free_blocks; 112 | } 113 | 114 | auto block_allocator::allocate_blocks(size_t num_blocks) -> char* { 115 | //char* start = (char*) aligned_alloc(pad_size, 116 | //num_blocks * block_size_+ pad_size); 117 | char* start = (char*) pbbs::my_alloc(num_blocks * block_size_); 118 | if (start == NULL) { 119 | fprintf(stderr, "Cannot allocate space in block_allocator"); 120 | exit(1); } 121 | 122 | pbbs::fetch_and_add(&blocks_allocated, num_blocks); // atomic 123 | 124 | if (blocks_allocated > max_blocks) { 125 | fprintf(stderr, "Too many blocks in block_allocator, change max_blocks"); 126 | exit(1); } 127 | 128 | pool_roots.push(start); // keep track so can free later 129 | return start; 130 | } 131 | 132 | // Either grab a list from the global pool, or if there is none 133 | // then allocate a new list 134 | auto block_allocator::get_list() -> block_p { 135 | maybe rem = global_stack.pop(); 136 | if (rem) return *rem; 137 | block_p start = (block_p) allocate_blocks(list_length); 138 | return initialize_list(start); 139 | } 140 | 141 | // Allocate n elements across however many lists are needed (rounded up) 142 | void block_allocator::reserve(size_t n) { 143 | size_t num_lists = thread_count + ceil(n / (double)list_length); 144 | char* start = allocate_blocks(list_length*num_lists); 145 | parallel_for(0, num_lists, [&] (size_t i) { 146 | block_p offset = (block_p) (start + i * list_length * block_size_); 147 | global_stack.push(initialize_list(offset)); 148 | }); 149 | } 150 | 151 | void block_allocator::print_stats() { 152 | size_t used = num_used_blocks(); 153 | size_t allocated = num_allocated_blocks(); 154 | size_t size = block_size(); 155 | std::cout << "Used: " << used << ", allocated: " << allocated 156 | << ", block size: " << size 157 | << ", bytes: " << size*allocated << std::endl; 158 | } 159 | 160 | block_allocator::block_allocator(size_t block_size, 161 | size_t reserved_blocks, 162 | size_t list_length_, 163 | size_t max_blocks_) { 164 | blocks_allocated = 0; 165 | block_size_ = block_size; 166 | if (list_length_ == 0) 167 | list_length = default_list_bytes / block_size; 168 | else list_length = list_length_ / block_size; 169 | if (max_blocks_ == 0) 170 | max_blocks = (3*getMemorySize()/block_size)/4; 171 | else max_blocks = max_blocks_; 172 | 173 | reserve(reserved_blocks); 174 | 175 | // all local lists start out empty 176 | local_lists = new thread_list[thread_count]; 177 | initialized = true; 178 | } 179 | 180 | void block_allocator::clear() { 181 | if (num_used_blocks() > 0) 182 | cout << "Warning: not clearing memory pool, block_size=" << block_size() 183 | << " : allocated blocks remain" << endl; 184 | else { 185 | // clear lists 186 | for (int i = 0; i < thread_count; ++i) 187 | local_lists[i].sz = 0; 188 | 189 | // throw away all allocated memory 190 | maybe x; 191 | while ((x = pool_roots.pop())) pbbs::my_free(*x); //std::free(*x); 192 | pool_roots.clear(); 193 | global_stack.clear(); 194 | blocks_allocated = 0; 195 | } 196 | } 197 | 198 | block_allocator::~block_allocator() { 199 | clear(); 200 | delete[] local_lists; 201 | } 202 | 203 | void block_allocator::free(void* ptr) { 204 | block_p new_node = (block_p) ptr; 205 | int id = worker_id(); 206 | 207 | if (local_lists[id].sz == list_length+1) { 208 | local_lists[id].mid = local_lists[id].head; 209 | } else if (local_lists[id].sz == 2*list_length) { 210 | global_stack.push(local_lists[id].mid->next); 211 | local_lists[id].mid->next = NULL; 212 | local_lists[id].sz = list_length; 213 | } 214 | new_node->next = local_lists[id].head; 215 | local_lists[id].head = new_node; 216 | local_lists[id].sz++; 217 | } 218 | 219 | inline void* block_allocator::alloc() { 220 | int id = worker_id(); 221 | 222 | if (local_lists[id].sz == 0) { 223 | local_lists[id].head = get_list(); 224 | local_lists[id].sz = list_length; 225 | } 226 | 227 | local_lists[id].sz--; 228 | block_p p = local_lists[id].head; 229 | local_lists[id].head = local_lists[id].head->next; 230 | 231 | return (void*) p; 232 | } 233 | 234 | -------------------------------------------------------------------------------- /wheels/lockfreehash/lprobe/concurrent_stack.h: -------------------------------------------------------------------------------- 1 | // This code is part of the Problem Based Benchmark Suite (PBBS) 2 | // Copyright (c) 2016 Guy Blelloch, Daniel Ferizovic, and the PBBS team 3 | // 4 | // Permission is hereby granted, free of charge, to any person obtaining a 5 | // copy of this software and associated documentation files (the 6 | // "Software"), to deal in the Software without restriction, including 7 | // without limitation the rights (to use, copy, modify, merge, publish, 8 | // distribute, sublicense, and/or sell copies of the Software, and to 9 | // permit persons to whom the Software is furnished to do so, subject to 10 | // the following conditions: 11 | // 12 | // The above copyright notice and this permission notice shall be included 13 | // in all copies or substantial portions of the Software. 14 | // 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 16 | // OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 17 | // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 18 | // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 19 | // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 20 | // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 21 | // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 22 | 23 | // Lock free, linearizable implementation of a concurrent stack 24 | // supporting: 25 | // push 26 | // pop 27 | // size 28 | // Works for elements of any type T 29 | // It requires memory proportional to the largest it has been 30 | // This can be cleared, but only when noone else is using it. 31 | // Requires 128-bit-compare-and-swap 32 | // Counter could overflow "in theory", but would require over 500 years even 33 | // if updated every nanosecond (and must be updated sequentially) 34 | 35 | #pragma once 36 | #include 37 | #include 38 | #include 39 | #include "utilities.h" 40 | 41 | template 42 | class concurrent_stack { 43 | 44 | struct Node { 45 | T value; 46 | Node* next; 47 | size_t length; 48 | }; 49 | 50 | class alignas(64) prim_concurrent_stack { 51 | struct nodeAndCounter { 52 | Node* node; 53 | uint64_t counter; 54 | }; 55 | 56 | union CAS_t { 57 | __uint128_t x; 58 | nodeAndCounter NC; 59 | }; 60 | CAS_t head; 61 | 62 | size_t length(Node* n) { 63 | if (n == NULL) return 0; 64 | else return n->length; 65 | } 66 | 67 | public: 68 | prim_concurrent_stack() { 69 | head.NC.node = NULL; 70 | head.NC.counter = 0; 71 | std::atomic_thread_fence(std::memory_order_seq_cst); 72 | } 73 | 74 | size_t size() { 75 | return length(head.NC.node);} 76 | 77 | void push(Node* newNode){ 78 | CAS_t oldHead, newHead; 79 | do { 80 | oldHead = head; 81 | newNode->next = oldHead.NC.node; 82 | newNode->length = length(oldHead.NC.node) + 1; 83 | //std::atomic_thread_fence(std::memory_order_release); 84 | std::atomic_thread_fence(std::memory_order_seq_cst); 85 | newHead.NC.node = newNode; 86 | newHead.NC.counter = oldHead.NC.counter + 1; 87 | } while (!__sync_bool_compare_and_swap_16(&head.x,oldHead.x, newHead.x)); 88 | } 89 | Node* pop() { 90 | Node* result; 91 | CAS_t oldHead, newHead; 92 | do { 93 | oldHead = head; 94 | result = oldHead.NC.node; 95 | if (result == NULL) return result; 96 | newHead.NC.node = result->next; 97 | newHead.NC.counter = oldHead.NC.counter + 1; 98 | } while (!__sync_bool_compare_and_swap_16(&head.x,oldHead.x, newHead.x)); 99 | 100 | return result; 101 | } 102 | };// __attribute__((aligned(16))); 103 | 104 | prim_concurrent_stack a; 105 | prim_concurrent_stack b; 106 | 107 | public: 108 | 109 | size_t size() { return a.size();} 110 | 111 | void push(T v) { 112 | Node* x = b.pop(); 113 | if (!x) x = (Node*) malloc(sizeof(Node)); 114 | x->value = v; 115 | a.push(x); 116 | } 117 | 118 | maybe pop() { 119 | Node* x = a.pop(); 120 | if (!x) return maybe(); 121 | T r = x->value; 122 | b.push(x); 123 | return maybe(r); 124 | } 125 | 126 | // assumes no push or pop in progress 127 | void clear() { 128 | Node* x; 129 | while ((x = a.pop())) free(x); 130 | while ((x = b.pop())) free(x); 131 | } 132 | 133 | concurrent_stack() {} 134 | ~concurrent_stack() { clear();} 135 | }; 136 | -------------------------------------------------------------------------------- /wheels/lockfreehash/lprobe/cycle_timer.h: -------------------------------------------------------------------------------- 1 | #ifndef _SYRAH_CYCLE_TIMER_H_ 2 | #define _SYRAH_CYCLE_TIMER_H_ 3 | 4 | #if defined(__APPLE__) 5 | #if defined(__x86_64__) 6 | #include 7 | #else 8 | #include 9 | #include 10 | #endif // __x86_64__ or not 11 | 12 | #include // fprintf 13 | #include // exit 14 | 15 | #elif _WIN32 16 | # include 17 | # include 18 | #else 19 | # include 20 | # include 21 | # include 22 | # include 23 | #endif 24 | 25 | 26 | // This uses the cycle counter of the processor. Different 27 | // processors in the system will have different values for this. If 28 | // you process moves across processors, then the delta time you 29 | // measure will likely be incorrect. This is mostly for fine 30 | // grained measurements where the process is likely to be on the 31 | // same processor. For more global things you should use the 32 | // Time interface. 33 | 34 | // Also note that if you processors' speeds change (i.e. processors 35 | // scaling) or if you are in a heterogenous environment, you will 36 | // likely get spurious results. 37 | class CycleTimer { 38 | public: 39 | typedef unsigned long long SysClock; 40 | 41 | ////////// 42 | // Return the current CPU time, in terms of clock ticks. 43 | // Time zero is at some arbitrary point in the past. 44 | static SysClock currentTicks() { 45 | #if defined(__APPLE__) && !defined(__x86_64__) 46 | return mach_absolute_time(); 47 | #elif defined(_WIN32) 48 | LARGE_INTEGER qwTime; 49 | QueryPerformanceCounter(&qwTime); 50 | return qwTime.QuadPart; 51 | #elif defined(__x86_64__) 52 | unsigned int a, d; 53 | asm volatile("rdtsc" : "=a" (a), "=d" (d)); 54 | return static_cast(a) | 55 | (static_cast(d) << 32); 56 | #elif defined(__ARM_NEON__) && 0 // mrc requires superuser. 57 | unsigned int val; 58 | asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r"(val)); 59 | return val; 60 | #else 61 | timespec spec; 62 | clock_gettime(CLOCK_THREAD_CPUTIME_ID, &spec); 63 | return CycleTimer::SysClock(static_cast(spec.tv_sec) * 1e9 + static_cast(spec.tv_nsec)); 64 | #endif 65 | } 66 | 67 | ////////// 68 | // Return the current CPU time, in terms of seconds. 69 | // This is slower than currentTicks(). Time zero is at 70 | // some arbitrary point in the past. 71 | static double currentSeconds() { 72 | return currentTicks() * secondsPerTick(); 73 | } 74 | 75 | ////////// 76 | // Return the conversion from seconds to ticks. 77 | static double ticksPerSecond() { 78 | return 1.0/secondsPerTick(); 79 | } 80 | 81 | static const char* tickUnits() { 82 | #if defined(__APPLE__) && !defined(__x86_64__) 83 | return "ns"; 84 | #elif defined(__WIN32__) || defined(__x86_64__) 85 | return "cycles"; 86 | #else 87 | return "ns"; // clock_gettime 88 | #endif 89 | } 90 | 91 | ////////// 92 | // Return the conversion from ticks to seconds. 93 | static double secondsPerTick() { 94 | static bool initialized = false; 95 | static double secondsPerTick_val; 96 | if (initialized) return secondsPerTick_val; 97 | #if defined(__APPLE__) 98 | #ifdef __x86_64__ 99 | int args[] = {CTL_HW, HW_CPU_FREQ}; 100 | unsigned int Hz; 101 | size_t len = sizeof(Hz); 102 | if (sysctl(args, 2, &Hz, &len, NULL, 0) != 0) { 103 | fprintf(stderr, "Failed to initialize secondsPerTick_val!\n"); 104 | exit(-1); 105 | } 106 | secondsPerTick_val = 1.0 / (double) Hz; 107 | #else 108 | mach_timebase_info_data_t time_info; 109 | mach_timebase_info(&time_info); 110 | 111 | // Scales to nanoseconds without 1e-9f 112 | secondsPerTick_val = (1e-9*static_cast(time_info.numer))/ 113 | static_cast(time_info.denom); 114 | #endif // x86_64 or not 115 | #elif defined(_WIN32) 116 | LARGE_INTEGER qwTicksPerSec; 117 | QueryPerformanceFrequency(&qwTicksPerSec); 118 | secondsPerTick_val = 1.0/static_cast(qwTicksPerSec.QuadPart); 119 | #else 120 | FILE *fp = fopen("/proc/cpuinfo","r"); 121 | char input[1024]; 122 | if (!fp) { 123 | fprintf(stderr, "CycleTimer::resetScale failed: couldn't find /proc/cpuinfo."); 124 | exit(-1); 125 | } 126 | // In case we don't find it, e.g. on the N900 127 | secondsPerTick_val = 1e-9; 128 | while (!feof(fp) && fgets(input, 1024, fp)) { 129 | // NOTE(boulos): Because reading cpuinfo depends on dynamic 130 | // frequency scaling it's better to read the @ sign first 131 | float GHz, MHz; 132 | if (strstr(input, "model name")) { 133 | char* at_sign = strstr(input, "@"); 134 | if (at_sign) { 135 | char* after_at = at_sign + 1; 136 | char* GHz_str = strstr(after_at, "GHz"); 137 | char* MHz_str = strstr(after_at, "MHz"); 138 | if (GHz_str) { 139 | *GHz_str = '\0'; 140 | if (1 == sscanf(after_at, "%f", &GHz)) { 141 | //printf("GHz = %f\n", GHz); 142 | secondsPerTick_val = 1e-9f / GHz; 143 | break; 144 | } 145 | } else if (MHz_str) { 146 | *MHz_str = '\0'; 147 | if (1 == sscanf(after_at, "%f", &MHz)) { 148 | //printf("MHz = %f\n", MHz); 149 | secondsPerTick_val = 1e-6f / MHz; 150 | break; 151 | } 152 | } 153 | } 154 | } else if (1 == sscanf(input, "cpu MHz : %f", &MHz)) { 155 | //printf("MHz = %f\n", MHz); 156 | secondsPerTick_val = 1e-6f / MHz; 157 | break; 158 | } 159 | } 160 | fclose(fp); 161 | #endif 162 | 163 | initialized = true; 164 | return secondsPerTick_val; 165 | } 166 | 167 | ////////// 168 | // Return the conversion from ticks to milliseconds. 169 | static double msPerTick() { 170 | return secondsPerTick() * 1000.0; 171 | } 172 | 173 | private: 174 | CycleTimer(); 175 | }; 176 | 177 | #endif // #ifndef _SYRAH_CYCLE_TIMER_H_ 178 | -------------------------------------------------------------------------------- /wheels/lockfreehash/lprobe/data.h: -------------------------------------------------------------------------------- 1 | #ifndef DATA_ELEMENT_ 2 | #define DATA_ELEMENT_ 3 | #include "utilities.h" 4 | using namespace pbbs; 5 | struct KV { 6 | int k; 7 | int v; 8 | bool operator== (struct KV other) { return k == other.k && v == other.v ;} 9 | bool operator!= (struct KV other) { return k != other.k || v != other.v ;} 10 | KV(int ak, int av) {k=ak;v=av;} 11 | }; 12 | 13 | struct hashKV { 14 | using eType = struct KV; 15 | using kType = int; 16 | eType empty() {return {-1,-1};} 17 | kType getKey(eType v) {return v.k;} 18 | //int hash(kType v) {return v * 999029;} //hash64_2(v);} 19 | int hash(kType v) {return hash64_2(v);} 20 | //int cmp(kType v, kType b) {return (v > b) ? 1 : ((v == b) ? 0 : -1);} 21 | int cmp(kType v, kType b) {return (v == b) ? 0 : -1;} 22 | bool replaceQ(eType, eType) {return 0;} 23 | eType update(eType v, eType) {return v;} 24 | bool cas(eType* p, eType o, eType n) {return 25 | atomic_compare_and_swap(p, o, n);} 26 | }; 27 | #endif 28 | -------------------------------------------------------------------------------- /wheels/lockfreehash/lprobe/data_ptr.h: -------------------------------------------------------------------------------- 1 | #ifndef DATA_ELEMENT_ 2 | #define DATA_ELEMENT_ 3 | #include "utilities.h" 4 | using namespace pbbs; 5 | struct KV { 6 | int k; 7 | int v; 8 | //bool operator== (struct KV other) { return k == other.k && v == other.v ;} 9 | //bool operator!= (struct KV other) { return k != other.k || v != other.v ;} 10 | KV(int ak, int av) {k=ak;v=av;} 11 | }; 12 | 13 | struct hashKV { 14 | using eType = struct KV*; 15 | using kType = int; 16 | //eType empty() {return new struct KV(-1,-1);} 17 | eType empty() {return nullptr;} 18 | kType getKey(eType v) {return v->k;} 19 | int hash(kType v) {return v * 999029;} //hash64_2(v);} 20 | //int hash(kType v) {return hash64_2(v);} 21 | //int cmp(kType v, kType b) {return (v > b) ? 1 : ((v == b) ? 0 : -1);} 22 | int cmp(kType v, kType b) {return (v == b) ? 0 : -1;} 23 | bool replaceQ(eType, eType) {return 0;} 24 | eType update(eType v, eType) {return v;} 25 | bool cas(eType* p, eType o, eType n) {return 26 | atomic_compare_and_swap(p, o, n);} 27 | }; 28 | #endif 29 | -------------------------------------------------------------------------------- /wheels/lockfreehash/lprobe/get_time.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | struct timer { 10 | double total_time; 11 | double last_time; 12 | bool on; 13 | std::string name; 14 | struct timezone tzp; 15 | 16 | timer(std::string name = "PBBS time", bool _start = true) 17 | : total_time(0.0), on(false), name(name), tzp({0,0}) { 18 | if (_start) start(); 19 | } 20 | 21 | double get_time() { 22 | timeval now; 23 | gettimeofday(&now, &tzp); 24 | return ((double) now.tv_sec) + ((double) now.tv_usec)/1000000.; 25 | } 26 | 27 | void start () { 28 | on = 1; 29 | last_time = get_time(); 30 | } 31 | 32 | double stop () { 33 | on = 0; 34 | double d = (get_time()-last_time); 35 | total_time += d; 36 | return d; 37 | } 38 | 39 | void reset() { 40 | total_time=0.0; 41 | on=0; 42 | } 43 | 44 | double get_total() { 45 | if (on) return total_time + get_time() - last_time; 46 | else return total_time; 47 | } 48 | 49 | double get_next() { 50 | if (!on) return 0.0; 51 | double t = get_time(); 52 | double td = t - last_time; 53 | total_time += td; 54 | last_time = t; 55 | return td; 56 | } 57 | 58 | void report(double time, std::string str) { 59 | std::ios::fmtflags cout_settings = std::cout.flags(); 60 | std::cout.precision(4); 61 | std::cout << std::fixed; 62 | std::cout << name << ": "; 63 | if (str.length() > 0) 64 | std::cout << str << ": "; 65 | std::cout << time << std::endl; 66 | std::cout.flags(cout_settings); 67 | } 68 | 69 | void total() { 70 | report(get_total(),"total"); 71 | total_time = 0.0; 72 | } 73 | 74 | void reportTotal(std::string str) { 75 | report(get_total(), str); 76 | } 77 | 78 | void next(std::string str) { 79 | if (on) report(get_next(), str); 80 | } 81 | }; 82 | 83 | static timer _tm; 84 | #define startTime() _tm.start(); 85 | #define nextTime(_string) _tm.next(_string); 86 | -------------------------------------------------------------------------------- /wheels/lockfreehash/lprobe/hash_table.h: -------------------------------------------------------------------------------- 1 | // This code is part of the Problem Based Benchmark Suite (PBBS) 2 | // Copyright (c) 2010 Guy Blelloch and the PBBS team 3 | // 4 | // Permission is hereby granted, free of charge, to any person obtaining a 5 | // copy of this software and associated documentation files (the 6 | // "Software"), to deal in the Software without restriction, including 7 | // without limitation the rights (to use, copy, modify, merge, publish, 8 | // distribute, sublicense, and/or sell copies of the Software, and to 9 | // permit persons to whom the Software is furnished to do so, subject to 10 | // the following conditions: 11 | // 12 | // The above copyright notice and this permission notice shall be included 13 | // in all copies or substantial portions of the Software. 14 | // 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 16 | // OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 17 | // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 18 | // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 19 | // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 20 | // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 21 | // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 22 | #pragma once 23 | #include "utilities.h" 24 | #include "sequence_ops.h" 25 | 26 | namespace pbbs { 27 | 28 | // A "history independent" hash table that supports insertion, and searching 29 | // It is described in the paper 30 | // Julian Shun and Guy E. Blelloch 31 | // Phase-concurrent hash tables for determinism 32 | // SPAA 2014: 96-107 33 | // Insertions can happen in parallel 34 | // Searches can happen in parallel 35 | // Deletion can happen in parallel 36 | // but insertions cannot happen in parallel with searches or deletions 37 | // and searches cannot happen in parallel with deletions 38 | // i.e. each of the three types of operations have to happen in phase 39 | template 40 | class Table { 41 | private: 42 | using eType = typename HASH::eType; 43 | using kType = typename HASH::kType; 44 | size_t m; 45 | eType empty; 46 | HASH hashStruct; 47 | eType* TA; 48 | using index = long; 49 | 50 | static void clear(eType* A, size_t n, eType v) { 51 | auto f = [&] (size_t i) { 52 | assign_uninitialized(A[i], v);}; 53 | parallel_for(0, n, f, granularity(n)); 54 | } 55 | 56 | struct notEmptyF { 57 | eType e; notEmptyF(eType _e) : e(_e) {} 58 | int operator() (eType a) {return e != a;}}; 59 | 60 | index hashToRange(index h) {return (int) h % (uint) m;} 61 | index firstIndex(kType v) {return hashToRange(hashStruct.hash(v));} 62 | index incrementIndex(index h) {return (h + 1 == (long) m) ? 0 : h+1;} 63 | index decrementIndex(index h) {return (h == 0) ? m-1 : h-1;} 64 | bool lessIndex(index a, index b) {return (a < b) ? (2*(b-a) < m) : (2*(a-b) > m);} 65 | bool lessEqIndex(index a, index b) {return a==b || lessIndex(a,b);} 66 | 67 | public: 68 | // Size is the maximum number of values the hash table will hold. 69 | // Overfilling the table could put it into an infinite loop. 70 | Table(size_t size, HASH hashF, float load = 1.5) : 71 | m(((size_t) 100.0 + load * size)), 72 | empty(hashF.empty()), 73 | hashStruct(hashF), 74 | TA(new_array_no_init(m)) { 75 | clear(TA, m, empty); } 76 | 77 | ~Table() { delete_array(TA, m);}; 78 | 79 | // prioritized linear probing 80 | // a new key will bump an existing key up if it has a higher priority 81 | // an equal key will replace an old key if replaceQ(new,old) is true 82 | // returns 0 if not inserted (i.e. equal and replaceQ false) and 1 otherwise 83 | bool insert(eType v) { 84 | index i = firstIndex(hashStruct.getKey(v)); 85 | while (true) { 86 | eType c = TA[i]; 87 | if (c == empty) { 88 | if (hashStruct.cas(&TA[i],c,v)) return true; 89 | } else { 90 | int cmp = hashStruct.cmp(hashStruct.getKey(v),hashStruct.getKey(c)); 91 | if (cmp == 0) { 92 | if (!hashStruct.replaceQ(v,c)) return false; 93 | else if (hashStruct.cas(&TA[i],c,v)) return true; 94 | } else if (cmp < 0) 95 | i = incrementIndex(i); 96 | else if (hashStruct.cas(&TA[i],c,v)) { 97 | v = c; 98 | i = incrementIndex(i); 99 | } 100 | } 101 | } 102 | } 103 | 104 | // prioritized linear probing 105 | // a new key will bump an existing key up if it has a higher priority 106 | // an equal key will replace an old key if replaceQ(new,old) is true 107 | // returns 0 if not inserted (i.e. equal and replaceQ false) and 1 otherwise 108 | bool update(eType v) { 109 | index i = firstIndex(hashStruct.getKey(v)); 110 | while (true) { 111 | eType c = TA[i]; 112 | if (c == empty) { 113 | if (hashStruct.cas(&TA[i],c,v)) return true; 114 | } else { 115 | int cmp = hashStruct.cmp(hashStruct.getKey(v),hashStruct.getKey(c)); 116 | if (cmp == 0) { 117 | if (!hashStruct.replaceQ(v,c)) return false; 118 | else { 119 | eType new_val = hashStruct.update(c,v); 120 | if (hashStruct.cas(&TA[i],c,new_val)) return true; 121 | } 122 | } else if (cmp < 0) 123 | i = incrementIndex(i); 124 | else if (hashStruct.cas(&TA[i],c,v)) { 125 | v = c; 126 | i = incrementIndex(i); 127 | } 128 | } 129 | } 130 | } 131 | 132 | bool deleteVal(kType v) { 133 | index i = firstIndex(v); 134 | int cmp; 135 | 136 | // find first element less than or equal to v in priority order 137 | index j = i; 138 | eType c = TA[j]; 139 | 140 | if (c == empty) return true; 141 | 142 | // find first location with priority less or equal to v's priority 143 | while ((cmp = (c==empty) ? 1 : hashStruct.cmp(v, hashStruct.getKey(c))) < 0) { 144 | j = incrementIndex(j); 145 | c = TA[j]; 146 | } 147 | while (true) { 148 | // Invariants: 149 | // v is the key that needs to be deleted 150 | // j is our current index into TA 151 | // if v appears in TA, then at least one copy must appear at or before j 152 | // c = TA[j] at some previous time (could now be changed) 153 | // i = h(v) 154 | // cmp = compare v to key of c (positive if greater, 0 equal, negative less) 155 | if (cmp != 0) { 156 | // v does not match key of c, need to move down one and exit if 157 | // moving before h(v) 158 | if (j == i) return true; 159 | j = decrementIndex(j); 160 | c = TA[j]; 161 | cmp = (c == empty) ? 1 : hashStruct.cmp(v, hashStruct.getKey(c)); 162 | } else { // found v at location j (at least at some prior time) 163 | 164 | // Find next available element to fill location j. 165 | // This is a little tricky since we need to skip over elements for 166 | // which the hash index is greater than j, and need to account for 167 | // things being moved downwards by others as we search. 168 | // Makes use of the fact that values in a cell can only decrease 169 | // during a delete phase as elements are moved from the right to left. 170 | index jj = incrementIndex(j); 171 | eType x = TA[jj]; 172 | while (x != empty && lessIndex(j, firstIndex(hashStruct.getKey(x)))) { 173 | jj = incrementIndex(jj); 174 | x = TA[jj]; 175 | } 176 | index jjj = decrementIndex(jj); 177 | while (jjj != j) { 178 | eType y = TA[jjj]; 179 | if (y == empty || !lessIndex(j, firstIndex(hashStruct.getKey(y)))) { 180 | x = y; 181 | jj = jjj; 182 | } 183 | jjj = decrementIndex(jjj); 184 | } 185 | 186 | // try to copy the the replacement element into j 187 | if (hashStruct.cas(&TA[j],c,x)) { 188 | // swap was successful 189 | // if the replacement element was empty, we are done 190 | if (x == empty) return true; 191 | 192 | // Otherwise there are now two copies of the replacement element x 193 | // delete one copy (probably the original) by starting to look at jj. 194 | // Note that others can come along in the meantime and delete 195 | // one or both of them, but that is fine. 196 | v = hashStruct.getKey(x); 197 | j = jj; 198 | i = firstIndex(v); 199 | } 200 | c = TA[j]; 201 | cmp = (c == empty) ? 1 : hashStruct.cmp(v, hashStruct.getKey(c)); 202 | } 203 | } 204 | } 205 | 206 | // Returns the value if an equal value is found in the table 207 | // otherwise returns the "empty" element. 208 | // due to prioritization, can quit early if v is greater than cell 209 | eType find(kType v) { 210 | index h = firstIndex(v); 211 | eType c = TA[h]; 212 | while (true) { 213 | if (c == empty) {return empty;} 214 | int cmp = hashStruct.cmp(v,hashStruct.getKey(c)); 215 | if (cmp >= 0) { 216 | /*Ju we disable >0 case, because the +1 is not defined for our JitRequest*/ 217 | if (cmp > 0) return empty; 218 | else return c; 219 | //return c; 220 | } 221 | h = incrementIndex(h); 222 | c = TA[h]; 223 | } 224 | } 225 | 226 | // returns the number of entries 227 | size_t count() { 228 | auto is_full = [&] (size_t i) -> size_t { 229 | return (TA[i] == empty) ? 0 : 1;}; 230 | return reduce(delayed_seq(m, is_full), addm()); 231 | } 232 | 233 | // returns all the current entries compacted into a sequence 234 | sequence entries() { 235 | return filter(range(TA, TA+m), 236 | [&] (eType v) {return v != empty;}); 237 | } 238 | 239 | index findIndex(kType v) { 240 | index h = firstIndex(v); 241 | eType c = TA[h]; 242 | while (true) { 243 | if (c == empty) return -1; 244 | int cmp = hashStruct.cmp(v,hashStruct.getKey(c)); 245 | if (cmp >= 0) { 246 | if (cmp > 0) return -1; 247 | else return h; 248 | } 249 | h = incrementIndex(h); 250 | c = TA[h]; 251 | } 252 | } 253 | 254 | sequence get_index() { 255 | auto is_full = [&] (const size_t i) -> int { 256 | if (TA[i] != empty) return 1; else return 0;}; 257 | sequence x(m, is_full); 258 | scan_inplace(x.slice(), addm()); 259 | return x; 260 | } 261 | 262 | // prints the current entries along with the index they are stored at 263 | void print() { 264 | cout << "vals = "; 265 | for (size_t i=0; i < m; i++) 266 | if (TA[i] != empty) 267 | cout << i << ":" << TA[i] << ","; 268 | cout << endl; 269 | } 270 | }; 271 | 272 | template 273 | sequence remove_duplicates(sequence const &S, H const &hash, size_t m=0) { 274 | timer t("remove duplicates", false); 275 | if (m==0) m = S.size(); 276 | Table T(m, hash, 1.3); 277 | t.next("build table"); 278 | parallel_for(0, S.size(), [&] (size_t i) { T.insert(S[i]);}); 279 | t.next("insert"); 280 | sequence result = T.entries(); 281 | t.next("entries"); 282 | return result; 283 | } 284 | 285 | // T must be some integer type 286 | template 287 | struct hashInt { 288 | using eType = T; 289 | using kType = T; 290 | eType empty() {return -1;} 291 | kType getKey(eType v) {return v;} 292 | T hash(kType v) {return v * 999029;} //hash64_2(v);} 293 | int cmp(kType v, kType b) {return (v > b) ? 1 : ((v == b) ? 0 : -1);} 294 | bool replaceQ(eType, eType) {return 0;} 295 | eType update(eType v, eType) {return v;} 296 | bool cas(eType* p, eType o, eType n) {return 297 | atomic_compare_and_swap(p, o, n);} 298 | }; 299 | 300 | // works for non-negative integers (uses -1 to mark cell as empty) 301 | template 302 | sequence remove_duplicates(sequence const &A) { 303 | return remove_duplicates(A, hashInt()); 304 | } 305 | 306 | } 307 | -------------------------------------------------------------------------------- /wheels/lockfreehash/lprobe/main.cc: -------------------------------------------------------------------------------- 1 | #include "hash_table.h" 2 | #include "benchmark_lprobe.h" 3 | #include "data.h" 4 | using namespace pbbs; 5 | //#define DEFAULT_OP_COUNT 2000 6 | //#define DEFAULT_THREAD_COUNT 2 7 | //#define DEFAULT_READ_PERCENT 90 8 | //#define DEFAULT_LOAD_FACTOR 40 9 | //#define CAPACITY 8000016 10 | //#define CAPACITY 800000 11 | 12 | #define DEFAULT_OP_COUNT 2000000 13 | #define DEFAULT_THREAD_COUNT 24 14 | #define DEFAULT_READ_PERCENT 90 15 | #define DEFAULT_LOAD_FACTOR 40 16 | #define CAPACITY 8000016 17 | 18 | 19 | int main() { 20 | 21 | int op_count = DEFAULT_OP_COUNT; 22 | int num_threads = DEFAULT_THREAD_COUNT; 23 | int read_percent = DEFAULT_READ_PERCENT; 24 | int load_factor = DEFAULT_LOAD_FACTOR; 25 | 26 | int rweight = read_percent; 27 | int idweight = 100 - read_percent; 28 | /* 29 | Table T(100000, hashKV(), 1.3); 30 | T.insert({1,2}); 31 | T.insert({2,45}); 32 | struct KV res = T.find(2); 33 | std::cout << "return value is " << res.v << std::endl; 34 | */ 35 | 36 | BenchmarkLockFreeHT benchmark_lockfree_ht(op_count, CAPACITY, rweight, idweight, num_threads, 0.3); 37 | benchmark_lockfree_ht.run(); 38 | 39 | return 0; 40 | } 41 | -------------------------------------------------------------------------------- /wheels/lockfreehash/lprobe/memory_size.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Author: David Robert Nadeau 3 | * Site: http://NadeauSoftware.com/ 4 | * License: Creative Commons Attribution 3.0 Unported License 5 | * http://creativecommons.org/licenses/by/3.0/deed.en_US 6 | */ 7 | 8 | #pragma once 9 | 10 | #if defined(_WIN32) 11 | #include 12 | 13 | #elif defined(__unix__) || defined(__unix) || defined(unix) || (defined(__APPLE__) && defined(__MACH__)) 14 | #include 15 | #include 16 | #include 17 | #if defined(BSD) 18 | #include 19 | #endif 20 | 21 | #else 22 | #error "Unable to define getMemorySize( ) for an unknown OS." 23 | #endif 24 | 25 | 26 | 27 | /** 28 | * Returns the size of physical memory (RAM) in bytes. 29 | */ 30 | static size_t getMemorySize( ) 31 | { 32 | #if defined(_WIN32) && (defined(__CYGWIN__) || defined(__CYGWIN32__)) 33 | /* Cygwin under Windows. ------------------------------------ */ 34 | /* New 64-bit MEMORYSTATUSEX isn't available. Use old 32.bit */ 35 | MEMORYSTATUS status; 36 | status.dwLength = sizeof(status); 37 | GlobalMemoryStatus( &status ); 38 | return (size_t)status.dwTotalPhys; 39 | 40 | #elif defined(_WIN32) 41 | /* Windows. ------------------------------------------------- */ 42 | /* Use new 64-bit MEMORYSTATUSEX, not old 32-bit MEMORYSTATUS */ 43 | MEMORYSTATUSEX status; 44 | status.dwLength = sizeof(status); 45 | GlobalMemoryStatusEx( &status ); 46 | return (size_t)status.ullTotalPhys; 47 | 48 | #elif defined(__unix__) || defined(__unix) || defined(unix) || (defined(__APPLE__) && defined(__MACH__)) 49 | /* UNIX variants. ------------------------------------------- */ 50 | /* Prefer sysctl() over sysconf() except sysctl() HW_REALMEM and HW_PHYSMEM */ 51 | 52 | #if defined(CTL_HW) && (defined(HW_MEMSIZE) || defined(HW_PHYSMEM64)) 53 | int mib[2]; 54 | mib[0] = CTL_HW; 55 | #if defined(HW_MEMSIZE) 56 | mib[1] = HW_MEMSIZE;/* OSX. --------------------- */ 57 | #elif defined(HW_PHYSMEM64) 58 | mib[1] = HW_PHYSMEM64;/* NetBSD, OpenBSD. --------- */ 59 | #endif 60 | int64_t size = 0;/* 64-bit */ 61 | size_t len = sizeof( size ); 62 | if ( sysctl( mib, 2, &size, &len, NULL, 0 ) == 0 ) 63 | return (size_t)size; 64 | return 0L;/* Failed? */ 65 | 66 | #elif defined(_SC_AIX_REALMEM) 67 | /* AIX. ----------------------------------------------------- */ 68 | return (size_t)sysconf( _SC_AIX_REALMEM ) * (size_t)1024L; 69 | 70 | #elif defined(_SC_PHYS_PAGES) && defined(_SC_PAGESIZE) 71 | /* FreeBSD, Linux, OpenBSD, and Solaris. -------------------- */ 72 | return (size_t)sysconf( _SC_PHYS_PAGES ) * 73 | (size_t)sysconf( _SC_PAGESIZE ); 74 | 75 | #elif defined(_SC_PHYS_PAGES) && defined(_SC_PAGE_SIZE) 76 | /* Legacy. -------------------------------------------------- */ 77 | return (size_t)sysconf( _SC_PHYS_PAGES ) * 78 | (size_t)sysconf( _SC_PAGE_SIZE ); 79 | 80 | #elif defined(CTL_HW) && (defined(HW_PHYSMEM) || defined(HW_REALMEM)) 81 | /* DragonFly BSD, FreeBSD, NetBSD, OpenBSD, and OSX. -------- */ 82 | int mib[2]; 83 | mib[0] = CTL_HW; 84 | #if defined(HW_REALMEM) 85 | mib[1] = HW_REALMEM;/* FreeBSD. ----------------- */ 86 | #elif defined(HW_PYSMEM) 87 | mib[1] = HW_PHYSMEM;/* Others. ------------------ */ 88 | #endif 89 | unsigned int size = 0;/* 32-bit */ 90 | size_t len = sizeof( size ); 91 | if ( sysctl( mib, 2, &size, &len, NULL, 0 ) == 0 ) 92 | return (size_t)size; 93 | return 0L;/* Failed? */ 94 | #endif /* sysctl and sysconf variants */ 95 | 96 | #else 97 | return 0L;/* Unknown OS. */ 98 | #endif 99 | } 100 | -------------------------------------------------------------------------------- /wheels/lockfreehash/lprobe/monoid.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | #include 5 | 6 | // Definition of various monoids 7 | // each consists of: 8 | // T : type of the values 9 | // static T identity() : returns identity for the monoid 10 | // static T add(T, T) : adds two elements, must be associative 11 | 12 | namespace pbbs { 13 | 14 | template 15 | struct monoid { 16 | using T = TT; 17 | F f; 18 | TT identity; 19 | monoid(F f, TT id) : f(f), identity(id) {} 20 | }; 21 | 22 | template 23 | monoid make_monoid (F f, T id) { 24 | return monoid(f, id); 25 | } 26 | 27 | template 28 | auto pair_monoid (M1 m1, M2 m2) { 29 | using P = std::pair; 30 | auto f = [&] (P a, P b) { 31 | return P(m1.f(a.first, b.first), m2.f(a.second, b.second));}; 32 | return make_monoid(f, P(m1.identity, m2.identity)); 33 | } 34 | 35 | template 36 | auto array_monoid (M m) { 37 | using Ar = std::array; 38 | auto f = [&] (Ar a, Ar b) { 39 | Ar r; 40 | for (size_t i=0; i < n; i++) 41 | r[i] = m.f(a[i], b[i]); 42 | return r; 43 | }; 44 | Ar id; 45 | for (size_t i=0; i < n; i++) id[i] = m.identity; 46 | return make_monoid(f, id); 47 | } 48 | 49 | template 50 | struct addm { 51 | using T = TT; 52 | addm() : identity(0) {} 53 | T identity; 54 | static T f(T a, T b) {return a + b;} 55 | }; 56 | 57 | template 58 | T lowest() {return std::numeric_limits::lowest();} 59 | 60 | template 61 | T highest() {return std::numeric_limits::max();} 62 | 63 | template 64 | struct maxm{ 65 | using T = TT; 66 | maxm() : identity(lowest()) {} 67 | T identity; 68 | static T f(T a, T b) {return std::max(a,b);} 69 | }; 70 | 71 | template 72 | struct maxm> { 73 | using T = std::pair; 74 | maxm() : identity(std::make_pair(lowest(), lowest())) {} 75 | T identity; 76 | static T f(T a, T b) {return std::max(a,b);} 77 | }; 78 | 79 | template 80 | struct minm { 81 | using T = TT; 82 | minm() : identity(highest()) {} 83 | T identity; 84 | static T f(T a, T b) {return std::min(a,b);} 85 | }; 86 | 87 | template 88 | struct minm> { 89 | using T = std::pair; 90 | minm() : identity(std::make_pair(highest(), highest())) {} 91 | T identity; 92 | static T f(T a, T b) {return std::max(a,b);} 93 | }; 94 | 95 | template 96 | struct xorm { 97 | using T = TT; 98 | xorm() : identity(0) {} 99 | T identity; 100 | static T f(T a, T b) {return a ^ b;} 101 | }; 102 | 103 | template 104 | struct minmaxm { 105 | using T = std::pair; 106 | minmaxm() : identity(T(highest(), lowest())) {} 107 | T identity; 108 | static T f(T a, T b) {return T(std::min(a.first,b.first), 109 | std::max(a.second,b.second));} 110 | }; 111 | 112 | template 113 | struct Add { 114 | using T = TT; 115 | static T identity() {return (T) 0;} 116 | static T add(T a, T b) {return a + b;} 117 | }; 118 | 119 | template 120 | struct Max { 121 | using T = TT; 122 | static T identity() { 123 | return (T) std::numeric_limits::min();} 124 | static T add(T a, T b) {return std::max(a,b);} 125 | }; 126 | 127 | template 128 | struct Min { 129 | using T = TT; 130 | static T identity() { 131 | return (T) std::numeric_limits::max();} 132 | static T add(T a, T b) {return std::min(a,b);} 133 | }; 134 | 135 | template 136 | struct Add_Pair { 137 | using T = std::pair; 138 | static T identity() {return T(A1::identity(), A2::identity());} 139 | static T add(T a, T b) { 140 | return T(A1::add(a.first,b.first), A2::add(a.second,b.second));} 141 | }; 142 | 143 | template 144 | struct Add_Array { 145 | using S = std::tuple_size; 146 | using T = std::array; 147 | static T identity() { 148 | T r; 149 | for (size_t i=0; i < S::value; i++) 150 | r[i] = 0; 151 | return r; 152 | } 153 | static T add(T a, T b) { 154 | T r; 155 | for (size_t i=0; i < S::value; i++) 156 | r[i] = a[i] + b[i]; 157 | return r; 158 | } 159 | }; 160 | 161 | template 162 | struct Add_Nested_Array { 163 | using T = AT; 164 | using S = std::tuple_size; 165 | using SS = std::tuple_size; 166 | static T identity() { 167 | T r; 168 | for (size_t i=0; i < S::value; i++) 169 | for (size_t j=0; j < SS::value; j++) r[i][j] = 0; 170 | return r; 171 | } 172 | static T add(T a, T b) { 173 | T r; 174 | for (size_t i=0; i < S::value; i++) 175 | for (size_t j=0; j < SS::value; j++) 176 | r[i][j] = a[i][j] + b[i][j]; 177 | return r; 178 | } 179 | }; 180 | 181 | } 182 | -------------------------------------------------------------------------------- /wheels/lockfreehash/lprobe/parallel.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | //*************************************** 4 | // All the pbbs library uses only four functions for 5 | // accessing parallelism. 6 | // These can be implemented on top of any scheduler. 7 | //*************************************** 8 | // number of threads available from OS 9 | //template <> 10 | static int num_workers(); 11 | 12 | // id of running thread, should be numbered from [0...num-workers) 13 | static int worker_id(); 14 | 15 | // the granularity of a simple loop (e.g. adding one to each element 16 | // of an array) to reasonably hide cost of scheduler 17 | // #define PAR_GRANULARITY 2000 18 | 19 | // parallel loop from start (inclusive) to end (exclusive) running 20 | // function f. 21 | // f should map long to void. 22 | // granularity is the number of iterations to run sequentially 23 | // if 0 (default) then the scheduler will decide 24 | // conservative uses a safer scheduler 25 | template 26 | static void parallel_for(long start, long end, F f, 27 | long granularity = 0, 28 | bool conservative = false); 29 | 30 | // runs the thunks left and right in parallel. 31 | // both left and write should map void to void 32 | // conservative uses a safer scheduler 33 | template 34 | static void par_do(Lf left, Rf right, bool conservative=false); 35 | 36 | //*************************************** 37 | 38 | // cilkplus 39 | #if defined(CILK) 40 | #include 41 | #include 42 | #include 43 | #include 44 | #define PAR_GRANULARITY 2000 45 | 46 | inline int num_workers() {return __cilkrts_get_nworkers();} 47 | inline int worker_id() {return __cilkrts_get_worker_number();} 48 | inline void set_num_workers(int) { 49 | throw std::runtime_error("don't know how to set worker count!"); 50 | } 51 | 52 | // Not sure this still works 53 | //__cilkrts_end_cilk(); 54 | // std::stringstream ss; ss << n; 55 | // if (0 != __cilkrts_set_param("nworkers", ss.str().c_str())) 56 | 57 | 58 | template 59 | inline void par_do(Lf left, Rf right, bool) { 60 | cilk_spawn right(); 61 | left(); 62 | cilk_sync; 63 | } 64 | 65 | template 66 | inline void parallel_for(long start, long end, F f, 67 | long granularity, 68 | bool) { 69 | if (granularity == 0) 70 | cilk_for(long i=start; i 85 | #define PAR_GRANULARITY 200000 86 | 87 | inline int num_workers() { return omp_get_max_threads(); } 88 | inline int worker_id() { return omp_get_thread_num(); } 89 | inline void set_num_workers(int n) { omp_set_num_threads(n); } 90 | 91 | template 92 | inline void parallel_for(long start, long end, F f, 93 | long granularity, 94 | bool conservative) { 95 | _Pragma("omp parallel for") 96 | for(long i=start; i 102 | inline void par_do(Lf left, Rf right, bool conservative) { 103 | if (!in_par_do) { 104 | in_par_do = true; // at top level start up tasking 105 | #pragma omp parallel 106 | #pragma omp single 107 | #pragma omp task 108 | left(); 109 | #pragma omp task 110 | right(); 111 | #pragma omp taskwait 112 | in_par_do = false; 113 | } else { // already started 114 | #pragma omp task 115 | left(); 116 | #pragma omp task 117 | right(); 118 | #pragma omp taskwait 119 | } 120 | } 121 | 122 | template 123 | inline void parallel_run(Job job, int num_threads=0) { 124 | job(); 125 | } 126 | 127 | // Guy's scheduler (ABP) 128 | #elif defined(HOMEGROWN) 129 | #include "scheduler.h" 130 | 131 | #ifdef NOTMAIN 132 | extern fork_join_scheduler fj; 133 | #else 134 | fork_join_scheduler fj; 135 | #endif 136 | 137 | // Calls fj.destroy() before the program exits 138 | inline void destroy_fj() { 139 | fj.destroy(); 140 | } 141 | 142 | struct __atexit {__atexit() {std::atexit(destroy_fj);}}; 143 | static __atexit __atexit_var; 144 | 145 | #define PAR_GRANULARITY 512 146 | 147 | inline int num_workers() { 148 | return fj.num_workers(); 149 | } 150 | 151 | inline int worker_id() { 152 | return fj.worker_id(); 153 | } 154 | 155 | inline void set_num_workers(int n) { 156 | fj.set_num_workers(n); 157 | } 158 | 159 | template 160 | inline void parallel_for(long start, long end, F f, 161 | long granularity, 162 | bool conservative) { 163 | if (end > start) 164 | fj.parfor(start, end, f, granularity, conservative); 165 | } 166 | 167 | template 168 | inline void par_do(Lf left, Rf right, bool conservative) { 169 | return fj.pardo(left, right, conservative); 170 | } 171 | 172 | template 173 | inline void parallel_run(Job job, int) { 174 | job(); 175 | } 176 | 177 | // c++ 178 | #else 179 | 180 | inline int num_workers() { return 1;} 181 | inline int worker_id() { return 0;} 182 | inline void set_num_workers(int) { ; } 183 | #define PAR_GRANULARITY 1000 184 | 185 | template 186 | inline void parallel_for(long start, long end, F f, 187 | long, // granularity, 188 | bool) { // conservative) { 189 | for (long i=start; i 195 | inline void par_do(Lf left, Rf right, bool) { // conservative) { 196 | left(); right(); 197 | } 198 | 199 | template 200 | inline void parallel_run(Job job, int) { // num_threads=0) { 201 | job(); 202 | } 203 | 204 | #endif 205 | -------------------------------------------------------------------------------- /wheels/lockfreehash/lprobe/seq.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "utilities.h" 4 | #include "alloc.h" 5 | #include 6 | #include 7 | 8 | #ifdef CONCEPTS 9 | template 10 | concept bool Seq = 11 | requires(T t, size_t u) { 12 | typename T::value_type; 13 | { t.size() } -> size_t; 14 | { t.slice() }; 15 | { t[u] }; 16 | }; 17 | 18 | template 19 | concept bool Range = 20 | Seq && requires(T t, size_t u) { 21 | { t[u] } -> typename T::value_type&; 22 | typename T::iterator; 23 | }; 24 | #define SEQ Seq 25 | #define RANGE Range 26 | #else 27 | #define SEQ typename 28 | #define RANGE typename 29 | #endif 30 | 31 | namespace pbbs { 32 | 33 | constexpr bool report_copy = false; 34 | constexpr bool bounds_check = false; 35 | 36 | template 37 | struct range { 38 | public: 39 | using value_type = typename std::iterator_traits::value_type; 40 | using iterator = Iterator; 41 | range() {}; 42 | range(iterator s, iterator e) : s(s), e(e) {}; 43 | value_type& operator[] (const size_t i) const {return s[i];} 44 | range slice(size_t ss, size_t ee) const { 45 | return range(s + ss, s + ee); } 46 | range slice() const {return range(s,e);}; 47 | size_t size() const { return e - s;} 48 | iterator begin() const {return s;} 49 | iterator end() const {return e;} 50 | 51 | range> 52 | rslice(size_t ss, size_t ee) const { 53 | auto i = std::make_reverse_iterator(e); 54 | return range(i + ss, i + ee); 55 | } 56 | range> 57 | rslice() const {return rslice(0, std::distance(s,e));}; 58 | 59 | private: 60 | iterator s; 61 | iterator e; 62 | }; 63 | 64 | template 65 | range make_range(Iter s, Iter e) { 66 | return range(s,e); 67 | } 68 | 69 | template 70 | struct delayed_sequence { 71 | using value_type = T; 72 | delayed_sequence(size_t n, F _f) : f(_f), s(0), e(n) {}; 73 | delayed_sequence(size_t n, value_type v) : f([&] (size_t) {return v;}), s(0), e(n) {}; 74 | delayed_sequence(size_t s, size_t e, F _f) : f(_f), s(s), e(e) {}; 75 | const value_type operator[] (size_t i) const {return (f)(i+s);} 76 | delayed_sequence slice(size_t ss, size_t ee) const { 77 | return delayed_sequence(s+ss,s+ee,f); } 78 | delayed_sequence slice() const { 79 | return delayed_sequence(s,e,f); } 80 | size_t size() const { return e - s;} 81 | private: 82 | F f; 83 | const size_t s, e; 84 | }; 85 | 86 | // used so second template argument can be inferred 87 | template 88 | delayed_sequence delayed_seq (size_t n, F f) { 89 | return delayed_sequence(n,f); 90 | } 91 | 92 | template 93 | auto dseq (size_t n, F f) -> delayed_sequence 94 | { 95 | using T = decltype(f(0)); 96 | return delayed_sequence(n,f); 97 | } 98 | 99 | template > 100 | struct sequence { 101 | public: 102 | using value_type = T; 103 | //using iterator = T*; 104 | 105 | sequence() { empty(); } 106 | 107 | // copy constructor 108 | sequence(const sequence& a) { 109 | if (report_copy && !a.is_small()) 110 | cout << "copy constructor: len: " << a.size() 111 | << " element size: " << sizeof(value_type) << endl; 112 | if (a.is_small()) val = a.val; 113 | else copy_from(a.val.large.s, a.val.large.n); 114 | } 115 | 116 | // move constructor 117 | sequence(sequence&& a) { 118 | val = a.val; a.empty();} 119 | 120 | // // copy assignment 121 | // sequence& operator = (const sequence& a) { 122 | // if (report_copy && !a.is_small()) 123 | // cout << "copy assignment: len: " << a.size() 124 | // << " element size: " << sizeof(T) << endl; 125 | // if (this != &a) { 126 | // clear(); 127 | // if (a.is_small()) val = a.val; 128 | // else copy_from(a.val.large.s, a.val.large.n);} 129 | // return *this; 130 | // } 131 | 132 | // //move assignment 133 | // sequence& operator = (sequence&& a) { 134 | // if (this != &a) {clear(); val = a.val; a.empty();} 135 | // return *this; 136 | // } 137 | 138 | // unified copy/move assignment using the copy and swap idiom 139 | // now safer for exceptions 140 | sequence& operator = (sequence a) { 141 | swap(a); 142 | return *this; 143 | } 144 | 145 | // constructs a sequence of length sz 146 | // with each element default constructed 147 | sequence(const size_t sz) { 148 | alloc(sz);} 149 | 150 | // constructs a sequence of length sz initialized with v 151 | sequence(const size_t sz, value_type v) { 152 | T* start = alloc_no_init(sz); 153 | parallel_for(0, sz, [=] (size_t i) { 154 | assign_uninitialized(start[i], (value_type) v);}, 300); 155 | }; 156 | 157 | // constructs a sequence by applying f to indices [0, ..., sz-1] 158 | template 159 | sequence(const size_t sz, Func f, size_t granularity=300) { 160 | value_type* start = alloc_no_init(sz); 161 | parallel_for(0, sz, [&] (size_t i) { 162 | assign_uninitialized(start[i], f(i));}, granularity); 163 | }; 164 | 165 | // construct a sequence from initializer list 166 | sequence(std::initializer_list l) { 167 | size_t sz = l.end() - l.begin(); 168 | value_type* start = alloc(sz); 169 | size_t i = 0; 170 | for (value_type a : l) start[i++] = a; 171 | } 172 | 173 | // constructs from a range 174 | template 175 | sequence(range const &a) { 176 | copy_from(a.begin(), a.size()); 177 | } 178 | 179 | // constructs from a delayed sequence 180 | template 181 | sequence(delayed_sequence const &a) { 182 | copy_from(a, a.size()); 183 | } 184 | 185 | // uninitialized sequence of length sz 186 | // dangerous if non primitive types and not immediately initialized 187 | static sequence no_init(const size_t sz) { 188 | sequence r; 189 | r.alloc_no_init(sz); 190 | return r; 191 | }; 192 | 193 | // Constructs a sequence by taking ownership of an 194 | // allocated value_type array. 195 | // Only use if a is allocated by the same allocator as 196 | // the sequence since the sequence delete will destruct it. 197 | sequence(value_type* a, const size_t sz) { 198 | set(a, sz); 199 | // cout << "dangerous: " << size(); 200 | }; 201 | 202 | // Copies a Seq type 203 | // Uses enable_if to avoid matching on integer argument, which creates 204 | // a sequece of the specified length 205 | //template ::value>> 206 | //sequence(Seq const &a) { 207 | // copy_from(a.begin(), a.size()); 208 | //} 209 | 210 | ~sequence() { clear();} 211 | 212 | range slice(size_t ss, size_t ee) const { 213 | return range(begin() + ss, begin() + ee); 214 | } 215 | 216 | range> 217 | rslice(size_t ss, size_t ee) const { 218 | auto iter = std::make_reverse_iterator(begin() + size()); 219 | return range(iter + ss, iter + ee); 220 | } 221 | 222 | range> 223 | rslice() const {return rslice(0, size());}; 224 | 225 | range slice() const { 226 | return range(begin(), begin() + size()); 227 | } 228 | 229 | // gives up ownership, returning an array of the elements 230 | // only use if will be freed by same allocator as sequence 231 | value_type* to_array() { 232 | value_type* r = begin(); empty(); return r;} 233 | 234 | // frees the memory assuming elements are already destructed, 235 | // and sets pointer to Null (empty()); 236 | void clear_no_destruct() { 237 | if (size() != 0 && !is_small()) 238 | //pbbs::free_array(val.large.s); 239 | Allocator().deallocate(val.large.s, val.large.n); 240 | empty(); 241 | } 242 | 243 | // destructs the sequence 244 | void clear() { 245 | delete_elements(); 246 | clear_no_destruct(); 247 | } 248 | 249 | value_type& operator[] (const size_t i) const { 250 | if (bounds_check && i >= size()) 251 | throw std::out_of_range("in sequence access: length = " 252 | + std::to_string(size()) 253 | + " index = " + std::to_string(i)); 254 | return begin()[i]; 255 | } 256 | 257 | value_type& get(const size_t i) const { 258 | return begin()[i]; 259 | } 260 | 261 | void swap(sequence& b) { 262 | std::swap(val.large.s, b.val.large.s); 263 | std::swap(val.large.n, b.val.large.n); 264 | } 265 | 266 | size_t size() const { 267 | if (is_small()) return val.small[flag_loc]; 268 | return val.large.n;} 269 | 270 | value_type* begin() const { 271 | if (is_small()) return (value_type*) &val.small; 272 | return val.large.s;} 273 | 274 | value_type* end() const {return begin() + size();} 275 | 276 | private: 277 | 278 | struct lg { value_type *s; size_t n; }; 279 | static constexpr size_t lg_size = sizeof(lg); 280 | static constexpr size_t T_size = sizeof(value_type); 281 | static constexpr size_t max_sso_size = 8; 282 | static constexpr size_t flag_loc = 15; 283 | // For future use in c++20 284 | // --- (std::endian::native == std::endian::big) ? 8 : 15; 285 | 286 | // Uses short string optimization (SSO). 287 | // Applied if T_size <= max_sso_size 288 | // Stores flag in byte 15 (flag_loc) of the small array 289 | // It assumes the machine is little_endian so this is 290 | // the high order bits of the size field (n) 291 | union { 292 | lg large; 293 | char small[lg_size]; // for SSO 294 | } val; 295 | 296 | // sets start and size 297 | void set(value_type* start, size_t sz) { 298 | val.large.n = sz; 299 | val.large.s = start; 300 | } 301 | 302 | // marks as empty 303 | void empty() {set(NULL, 0);} 304 | 305 | // is a given size small 306 | inline bool is_small(size_t sz) const { 307 | return ((T_size <= max_sso_size) && 308 | sz < (lg_size/T_size) && 309 | sz > 0); } 310 | 311 | // am I small 312 | inline bool is_small() const { 313 | //return is_small(val.small[flag_loc]); 314 | if (T_size <= max_sso_size) { 315 | size_t sz = val.small[flag_loc]; 316 | return (sz > 0 && sz < (lg_size/T_size)); 317 | } 318 | return false; 319 | } 320 | 321 | void initialize_elements() { 322 | if (!std::is_trivially_default_constructible::value) 323 | parallel_for(0, size(), [&] (size_t i) { 324 | new ((void*) (begin()+i)) value_type;}); 325 | } 326 | 327 | void delete_elements() { 328 | if (!std::is_trivially_destructible::value) 329 | parallel_for(0, size(), [&] (size_t i) { 330 | (begin()+i)->~value_type();}); 331 | } 332 | 333 | // allocate and set size without initialization 334 | value_type* alloc_no_init(size_t sz) { 335 | if (is_small(sz)) { 336 | val.small[flag_loc] = sz; 337 | return (value_type*) &val.small; 338 | } else { 339 | //T* loc = (sz == 0) ? NULL : pbbs::new_array_no_init(sz); 340 | value_type* loc = (sz == 0) ? NULL : Allocator().allocate(sz); 341 | set(loc, sz); 342 | return loc; 343 | } 344 | } 345 | 346 | // allocate and set size with initialization 347 | value_type* alloc(size_t sz) { 348 | value_type* loc = alloc_no_init(sz); 349 | initialize_elements(); 350 | return loc; 351 | } 352 | 353 | // Allocates and copies sequence from random access iterator 354 | // Only used if not short string optimized. 355 | template 356 | void copy_from(Iter a, size_t sz) { 357 | value_type* start = alloc_no_init(sz); 358 | parallel_for(0, sz, [&] (size_t i) { 359 | assign_uninitialized(start[i], a[i]);}, 1000); 360 | } 361 | 362 | }; 363 | 364 | template 365 | bool slice_eq(range a, range b) { 366 | return a.begin() == b.begin();} 367 | 368 | template 369 | bool slice_eq(SeqA, SeqB) { return false;} 370 | 371 | template 372 | auto to_sequence(Seq const &s) -> sequence { 373 | using T = typename Seq::value_type; 374 | return sequence(s.size(), [&] (size_t i) { 375 | return s[i];}); 376 | } 377 | 378 | template 379 | auto seq (size_t n, F f) -> sequence 380 | { 381 | return sequence(n,f); 382 | } 383 | 384 | std::ostream& operator<<(std::ostream& os, sequence const &s) 385 | { 386 | // pad with a zero 387 | sequence out(s.size()+1, [&] (size_t i) { 388 | return i == s.size() ? 0 : s[i];}); 389 | os << out.begin(); 390 | return os; 391 | } 392 | } 393 | 394 | -------------------------------------------------------------------------------- /wheels/lockfreehash/lprobe/thread_service.h: -------------------------------------------------------------------------------- 1 | #ifndef THREAD_SERVICE 2 | #define THREAD_SERVICE 3 | 4 | #include 5 | #include 6 | #include "data.h" 7 | 8 | 9 | std::atomic miss(0); 10 | 11 | struct WorkerArgs 12 | { 13 | int num_elems; 14 | // R/I/D weights, normalized to 100 15 | int rweight; 16 | int iweight; 17 | int dweight; 18 | void* ht_p; 19 | 20 | bool remove; 21 | int tid; 22 | int start; 23 | int* elems; 24 | }; 25 | 26 | template 27 | void* thread_service(void* threadArgs) 28 | { 29 | WorkerArgs* args = static_cast(threadArgs); 30 | 31 | std::random_device rd; 32 | std::mt19937 mt(rd()); 33 | std::uniform_int_distribution rng; 34 | 35 | std::array weights; 36 | weights[0] = args->rweight; 37 | weights[1] = args->iweight; 38 | weights[2] = args->dweight; 39 | 40 | std::default_random_engine g; 41 | std::discrete_distribution drng(weights.begin(), weights.end()); 42 | 43 | int tid = args->tid; 44 | int num_elems = args->num_elems; 45 | T* ht_p = static_cast(args->ht_p); 46 | 47 | for (int i = 0; i < num_elems; i++) 48 | { 49 | // Key, Value pair 50 | int k = rng(mt); 51 | int v = rng(mt); 52 | // Action : 0 -> Search, 1 -> Insert, 2 -> Remove 53 | int a = drng(g); 54 | 55 | if (a == 0) 56 | ht_p->find(k); 57 | else if (a == 1) 58 | ht_p->insert({k, v}); 59 | else 60 | ht_p->deleteVal(k); 61 | } 62 | } 63 | 64 | template 65 | void* thread_service_low_contention(void* threadArgs) 66 | { 67 | WorkerArgs* args = static_cast(threadArgs); 68 | 69 | std::random_device rd; 70 | std::mt19937 mt(rd()); 71 | std::uniform_int_distribution rng; 72 | 73 | std::array weights; 74 | weights[0] = args->rweight; 75 | weights[1] = args->iweight; 76 | weights[2] = args->dweight; 77 | 78 | std::default_random_engine g; 79 | std::discrete_distribution drng(weights.begin(), weights.end()); 80 | 81 | int tid = args->tid; 82 | int num_elems = args->num_elems; 83 | T* ht_p = static_cast(args->ht_p); 84 | 85 | int *keys = (args->elems + args->start); 86 | 87 | int start = 0; 88 | int end = 0; 89 | for (int i = 0; i < num_elems; i++) 90 | { 91 | // Action : 0 -> Search, 1 -> Insert, 2 -> Remove 92 | int a = drng(g); 93 | 94 | if (start == end || a == 1) 95 | { 96 | int k = rng(mt) % num_elems + tid * num_elems; 97 | keys[end++] = k; 98 | ht_p->insert({k, k}); 99 | } 100 | else if (a == 0) 101 | { 102 | int k = rng(mt) % (end - start) + start; 103 | ht_p->find(k); 104 | } 105 | else 106 | { 107 | int k = keys[start++]; 108 | ht_p->deleteVal(k); 109 | } 110 | } 111 | } 112 | 113 | template 114 | void* thread_service_high_contention(void* threadArgs) 115 | { 116 | WorkerArgs* args = static_cast(threadArgs); 117 | 118 | std::random_device rd; 119 | std::mt19937 mt(rd()); 120 | std::uniform_int_distribution rng; 121 | 122 | std::array weights; 123 | weights[0] = args->rweight; 124 | weights[1] = args->iweight; 125 | weights[2] = args->dweight; 126 | 127 | std::default_random_engine g; 128 | std::discrete_distribution drng(weights.begin(), weights.end()); 129 | 130 | int tid = args->tid; 131 | int num_elems = args->num_elems; 132 | T* ht_p = static_cast(args->ht_p); 133 | 134 | for (int i = 0; i < num_elems; i++) 135 | { 136 | ht_p->find(0); 137 | } 138 | } 139 | 140 | template 141 | void* thread_checkmiss(void* threadArgs) 142 | { 143 | WorkerArgs* args = static_cast(threadArgs); 144 | int* elems = args->elems; 145 | T* ht_p = static_cast(args->ht_p); 146 | int start = args->start; 147 | int num_elems = args->num_elems; 148 | int tid = args->tid; 149 | 150 | for (int i = start; i < start + num_elems; i++) 151 | { 152 | #if 0 153 | struct KV res = ht_p->find(elems[i]); 154 | if (res.k == -1) { 155 | ++miss; 156 | ht_p->insert({elems[i], elems[i]}); 157 | printf("miss! key is %d\n", elems[i]); 158 | } 159 | #endif 160 | bool res = ht_p->insert({elems[i], elems[i]}); 161 | if (res) { 162 | ++miss; 163 | printf("miss!\n"); 164 | } 165 | 166 | } 167 | 168 | } 169 | 170 | 171 | template 172 | void* thread_insert(void* threadArgs) 173 | { 174 | WorkerArgs* args = static_cast(threadArgs); 175 | int* elems = args->elems; 176 | T* ht_p = static_cast(args->ht_p); 177 | int start = args->start; 178 | int num_elems = args->num_elems; 179 | int tid = args->tid; 180 | 181 | for (int i = start; i < start + num_elems; i++) 182 | { 183 | ht_p->insert({elems[i], elems[i]}); 184 | } 185 | 186 | } 187 | 188 | template 189 | void* thread_remove(void* threadArgs) 190 | { 191 | WorkerArgs* args = static_cast(threadArgs); 192 | int* elems = args->elems; 193 | T* ht_p = static_cast(args->ht_p); 194 | int start = args->start; 195 | int num_elems = args->num_elems; 196 | int tid = args->tid; 197 | bool remove = args->remove; 198 | 199 | std::random_device rd; 200 | std::mt19937 mt(rd()); 201 | std::uniform_int_distribution rng(0, 200000 - 1); 202 | 203 | for (int i = start; i < start + num_elems; i++) 204 | { 205 | if (remove) 206 | ht_p->deleteVal(elems[i]); 207 | else 208 | ht_p->find(elems[rng(mt)]); 209 | } 210 | 211 | } 212 | 213 | #endif 214 | -------------------------------------------------------------------------------- /wheels/lockfreehash/lprobe/thread_service_ptr.h: -------------------------------------------------------------------------------- 1 | #ifndef THREAD_SERVICE 2 | #define THREAD_SERVICE 3 | 4 | #include 5 | #include 6 | #include "data_ptr.h" 7 | 8 | std::atomic miss(0); 9 | 10 | struct WorkerArgs 11 | { 12 | int num_elems; 13 | // R/I/D weights, normalized to 100 14 | int rweight; 15 | int iweight; 16 | int dweight; 17 | void* ht_p; 18 | 19 | bool remove; 20 | int tid; 21 | int start; 22 | int* elems; 23 | }; 24 | 25 | template 26 | void* thread_service(void* threadArgs) 27 | { 28 | WorkerArgs* args = static_cast(threadArgs); 29 | 30 | std::random_device rd; 31 | std::mt19937 mt(rd()); 32 | std::uniform_int_distribution rng; 33 | 34 | std::array weights; 35 | weights[0] = args->rweight; 36 | weights[1] = args->iweight; 37 | weights[2] = args->dweight; 38 | 39 | std::default_random_engine g; 40 | std::discrete_distribution drng(weights.begin(), weights.end()); 41 | 42 | int tid = args->tid; 43 | int num_elems = args->num_elems; 44 | T* ht_p = static_cast(args->ht_p); 45 | 46 | for (int i = 0; i < num_elems; i++) 47 | { 48 | // Key, Value pair 49 | int k = rng(mt); 50 | int v = rng(mt); 51 | // Action : 0 -> Search, 1 -> Insert, 2 -> Remove 52 | int a = drng(g); 53 | 54 | if (a == 0) 55 | ht_p->find(k); 56 | else if (a == 1) 57 | ht_p->insert(new struct KV (k, v)); 58 | else 59 | ht_p->deleteVal(k); 60 | } 61 | } 62 | 63 | template 64 | void* thread_service_low_contention(void* threadArgs) 65 | { 66 | WorkerArgs* args = static_cast(threadArgs); 67 | 68 | std::random_device rd; 69 | std::mt19937 mt(rd()); 70 | std::uniform_int_distribution rng; 71 | 72 | std::array weights; 73 | weights[0] = args->rweight; 74 | weights[1] = args->iweight; 75 | weights[2] = args->dweight; 76 | 77 | std::default_random_engine g; 78 | std::discrete_distribution drng(weights.begin(), weights.end()); 79 | 80 | int tid = args->tid; 81 | int num_elems = args->num_elems; 82 | T* ht_p = static_cast(args->ht_p); 83 | 84 | int *keys = (args->elems + args->start); 85 | 86 | int start = 0; 87 | int end = 0; 88 | for (int i = 0; i < num_elems; i++) 89 | { 90 | // Action : 0 -> Search, 1 -> Insert, 2 -> Remove 91 | int a = drng(g); 92 | 93 | if (start == end || a == 1) 94 | { 95 | int k = rng(mt) % num_elems + tid * num_elems; 96 | keys[end++] = k; 97 | ht_p->insert(new struct KV(k, k)); 98 | } 99 | else if (a == 0) 100 | { 101 | int k = rng(mt) % (end - start) + start; 102 | ht_p->find(k); 103 | } 104 | else 105 | { 106 | int k = keys[start++]; 107 | ht_p->deleteVal(k); 108 | } 109 | } 110 | } 111 | 112 | template 113 | void* thread_service_high_contention(void* threadArgs) 114 | { 115 | WorkerArgs* args = static_cast(threadArgs); 116 | 117 | std::random_device rd; 118 | std::mt19937 mt(rd()); 119 | std::uniform_int_distribution rng; 120 | 121 | std::array weights; 122 | weights[0] = args->rweight; 123 | weights[1] = args->iweight; 124 | weights[2] = args->dweight; 125 | 126 | std::default_random_engine g; 127 | std::discrete_distribution drng(weights.begin(), weights.end()); 128 | 129 | int tid = args->tid; 130 | int num_elems = args->num_elems; 131 | T* ht_p = static_cast(args->ht_p); 132 | 133 | for (int i = 0; i < num_elems; i++) 134 | { 135 | ht_p->find(0); 136 | } 137 | } 138 | 139 | template 140 | void* thread_checkmiss(void* threadArgs) 141 | { 142 | WorkerArgs* args = static_cast(threadArgs); 143 | int* elems = args->elems; 144 | T* ht_p = static_cast(args->ht_p); 145 | int start = args->start; 146 | int num_elems = args->num_elems; 147 | int tid = args->tid; 148 | 149 | for (int i = start; i < start + num_elems; i++) 150 | { 151 | #if 1 152 | struct KV* res = ht_p->find(elems[i]); 153 | if (res == nullptr || res->k == -1) { 154 | ++miss; 155 | ht_p->insert(new struct KV (elems[i], elems[i])); 156 | printf("miss!! key is %d\n",elems[i]); 157 | } 158 | #endif 159 | #if 0 160 | bool res = ht_p->insert(new struct KV(elems[i], elems[i])); 161 | if (res) { 162 | ++miss; 163 | printf("miss! key is %d\n",elems[i]); 164 | } 165 | #endif 166 | } 167 | 168 | } 169 | 170 | 171 | template 172 | void* thread_insert(void* threadArgs) 173 | { 174 | WorkerArgs* args = static_cast(threadArgs); 175 | int* elems = args->elems; 176 | T* ht_p = static_cast(args->ht_p); 177 | int start = args->start; 178 | int num_elems = args->num_elems; 179 | int tid = args->tid; 180 | 181 | for (int i = start; i < start + num_elems; i++) 182 | { 183 | ht_p->insert({elems[i], elems[i]}); 184 | } 185 | 186 | } 187 | 188 | template 189 | void* thread_remove(void* threadArgs) 190 | { 191 | WorkerArgs* args = static_cast(threadArgs); 192 | int* elems = args->elems; 193 | T* ht_p = static_cast(args->ht_p); 194 | int start = args->start; 195 | int num_elems = args->num_elems; 196 | int tid = args->tid; 197 | bool remove = args->remove; 198 | 199 | std::random_device rd; 200 | std::mt19937 mt(rd()); 201 | std::uniform_int_distribution rng(0, 200000 - 1); 202 | 203 | for (int i = start; i < start + num_elems; i++) 204 | { 205 | if (remove) 206 | ht_p->deleteVal(elems[i]); 207 | else 208 | ht_p->find(elems[rng(mt)]); 209 | } 210 | 211 | } 212 | 213 | #endif 214 | -------------------------------------------------------------------------------- /wheels/lockfreehash/lprobe/utilities.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include "parallel.h" 13 | 14 | using std::cout; 15 | using std::endl; 16 | 17 | template 18 | static void par_do_if(bool do_parallel, Lf left, Rf right, bool cons=false) { 19 | if (do_parallel) par_do(left, right, cons); 20 | else {left(); right();} 21 | } 22 | 23 | template 24 | inline void par_do3(Lf left, Mf mid, Rf right) { 25 | auto left_mid = [&] () {par_do(left,mid);}; 26 | par_do(left_mid, right); 27 | } 28 | 29 | template 30 | static void par_do3_if(bool do_parallel, Lf left, Mf mid, Rf right) { 31 | if (do_parallel) par_do3(left, mid, right); 32 | else {left(); mid(); right();} 33 | } 34 | 35 | namespace pbbs { 36 | template 37 | size_t log2_up(T); 38 | } 39 | 40 | template 41 | struct maybe { 42 | T value; 43 | bool valid; 44 | 45 | maybe(T v, bool u) : value(v) { 46 | valid = u; 47 | } 48 | maybe(T v) : value(v) { 49 | valid = true; 50 | } 51 | maybe() { 52 | valid = false; 53 | } 54 | 55 | bool operator !() const { 56 | return !valid; 57 | } 58 | operator bool() const { 59 | return valid; 60 | }; 61 | T& operator * () { 62 | return value; 63 | } 64 | }; 65 | 66 | namespace pbbs { 67 | 68 | struct empty {}; 69 | 70 | typedef uint32_t flags; 71 | const flags no_flag = 0; 72 | const flags fl_sequential = 1; 73 | const flags fl_debug = 2; 74 | const flags fl_time = 4; 75 | const flags fl_conservative = 8; 76 | const flags fl_inplace = 16; 77 | 78 | template 79 | inline void assign_uninitialized(T& a, const T& b) { 80 | new (static_cast(std::addressof(a))) T(b); 81 | } 82 | 83 | template 84 | inline void assign_uninitialized(T& a, T&& b) { 85 | new (static_cast(std::addressof(a))) T(std::move(b)); 86 | } 87 | 88 | template 89 | inline void move_uninitialized(T& a, const T b) { 90 | new (static_cast(std::addressof(a))) T(std::move(b)); 91 | } 92 | 93 | template 94 | inline void copy_memory(T& a, const T &b) { 95 | std::memcpy(&a, &b, sizeof(T)); 96 | } 97 | 98 | enum _copy_type { _assign, _move, _copy}; 99 | 100 | template<_copy_type copy_type, typename T> 101 | inline void copy_val(T& a, const T &b) { 102 | switch (copy_type) { 103 | case _assign: assign_uninitialized(a, b); break; 104 | case _move: move_uninitialized(a, b); break; 105 | case _copy: copy_memory(a,b); break; 106 | } 107 | } 108 | 109 | // a 32-bit hash function 110 | inline uint32_t hash32(uint32_t a) { 111 | a = (a+0x7ed55d16) + (a<<12); 112 | a = (a^0xc761c23c) ^ (a>>19); 113 | a = (a+0x165667b1) + (a<<5); 114 | a = (a+0xd3a2646c) ^ (a<<9); 115 | a = (a+0xfd7046c5) + (a<<3); 116 | a = (a^0xb55a4f09) ^ (a>>16); 117 | return a; 118 | } 119 | 120 | inline uint32_t hash32_2(uint32_t a) { 121 | uint32_t z = (a + 0x6D2B79F5UL); 122 | z = (z ^ (z >> 15)) * (z | 1UL); 123 | z ^= z + (z ^ (z >> 7)) * (z | 61UL); 124 | return z ^ (z >> 14); 125 | } 126 | 127 | inline uint32_t hash32_3(uint32_t a) { 128 | uint32_t z = a + 0x9e3779b9; 129 | z ^= z >> 15; // 16 for murmur3 130 | z *= 0x85ebca6b; 131 | z ^= z >> 13; 132 | z *= 0xc2b2ae3d; // 0xc2b2ae35 for murmur3 133 | return z ^= z >> 16; 134 | } 135 | 136 | 137 | // from numerical recipes 138 | inline uint64_t hash64(uint64_t u ) 139 | { 140 | uint64_t v = u * 3935559000370003845ul + 2691343689449507681ul; 141 | v ^= v >> 21; 142 | v ^= v << 37; 143 | v ^= v >> 4; 144 | v *= 4768777513237032717ul; 145 | v ^= v << 20; 146 | v ^= v >> 41; 147 | v ^= v << 5; 148 | return v; 149 | } 150 | 151 | // a slightly cheaper, but possibly not as good version 152 | // based on splitmix64 153 | inline uint64_t hash64_2(uint64_t x) { 154 | x = (x ^ (x >> 30)) * UINT64_C(0xbf58476d1ce4e5b9); 155 | x = (x ^ (x >> 27)) * UINT64_C(0x94d049bb133111eb); 156 | x = x ^ (x >> 31); 157 | return x; 158 | } 159 | 160 | 161 | template 162 | inline bool atomic_compare_and_swap(ET* a, ET oldval, ET newval) { 163 | static_assert(sizeof(ET) <= 8, "Bad CAS length"); 164 | if (sizeof(ET) == 1) { 165 | uint8_t r_oval, r_nval; 166 | std::memcpy(&r_oval, &oldval, 1); 167 | std::memcpy(&r_nval, &newval, 1); 168 | return __sync_bool_compare_and_swap(reinterpret_cast(a), r_oval, r_nval); 169 | } else if (sizeof(ET) == 4) { 170 | uint32_t r_oval, r_nval; 171 | std::memcpy(&r_oval, &oldval, 4); 172 | std::memcpy(&r_nval, &newval, 4); 173 | return __sync_bool_compare_and_swap(reinterpret_cast(a), r_oval, r_nval); 174 | } else { // if (sizeof(ET) == 8) { 175 | uint64_t r_oval, r_nval; 176 | std::memcpy(&r_oval, &oldval, 8); 177 | std::memcpy(&r_nval, &newval, 8); 178 | return __sync_bool_compare_and_swap(reinterpret_cast(a), r_oval, r_nval); 179 | } 180 | } 181 | 182 | template 183 | inline E fetch_and_add(E *a, EV b) { 184 | volatile E newV, oldV; 185 | do {oldV = *a; newV = oldV + b;} 186 | while (!atomic_compare_and_swap(a, oldV, newV)); 187 | return oldV; 188 | } 189 | 190 | template 191 | inline void write_add(E *a, EV b) { 192 | //volatile E newV, oldV; 193 | E newV, oldV; 194 | do {oldV = *a; newV = oldV + b;} 195 | while (!atomic_compare_and_swap(a, oldV, newV)); 196 | } 197 | 198 | template 199 | inline void write_add(std::atomic *a, EV b) { 200 | //volatile E newV, oldV; 201 | E newV, oldV; 202 | do {oldV = a->load(); newV = oldV + b;} 203 | while (!std::atomic_compare_exchange_strong(a, &oldV, newV)); 204 | } 205 | 206 | template 207 | inline bool write_min(ET *a, ET b, F less) { 208 | ET c; bool r=0; 209 | do c = *a; 210 | while (less(b,c) && !(r=atomic_compare_and_swap(a,c,b))); 211 | return r; 212 | } 213 | 214 | template 215 | inline bool write_min(std::atomic *a, ET b, F less) { 216 | ET c; bool r=0; 217 | do c = a->load(); 218 | while (less(b,c) && !(r=std::atomic_compare_exchange_strong(a, &c, b))); 219 | return r; 220 | } 221 | 222 | template 223 | inline bool write_max(ET *a, ET b, F less) { 224 | ET c; bool r=0; 225 | do c = *a; 226 | while (less(c,b) && !(r=atomic_compare_and_swap(a,c,b))); 227 | return r; 228 | } 229 | 230 | template 231 | inline bool write_max(std::atomic *a, ET b, F less) { 232 | ET c; bool r=0; 233 | do c = a->load(); 234 | while (less(c,b) && !(r=std::atomic_compare_exchange_strong(a, &c, b))); 235 | return r; 236 | } 237 | 238 | // returns the log base 2 rounded up (works on ints or longs or unsigned versions) 239 | template 240 | size_t log2_up(T i) { 241 | size_t a=0; 242 | T b=i-1; 243 | while (b > 0) {b = b >> 1; a++;} 244 | return a; 245 | } 246 | 247 | inline size_t granularity(size_t n) { 248 | return (n > 100) ? ceil(pow(n,0.5)) : 100; 249 | } 250 | } 251 | -------------------------------------------------------------------------------- /wheels/threadpool/ThreadPool.h: -------------------------------------------------------------------------------- 1 | #ifndef THREAD_POOL_H 2 | #define THREAD_POOL_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | class ThreadPool { 15 | public: 16 | ThreadPool(size_t); 17 | template 18 | auto enqueue(F&& f, Args&&... args) 19 | -> std::future::type>; 20 | ~ThreadPool(); 21 | private: 22 | // need to keep track of threads so we can join them 23 | std::vector< std::thread > workers; 24 | // the task queue 25 | std::queue< std::function > tasks; 26 | 27 | // synchronization 28 | std::mutex queue_mutex; 29 | std::condition_variable condition; 30 | bool stop; 31 | }; 32 | 33 | // the constructor just launches some amount of workers 34 | inline ThreadPool::ThreadPool(size_t threads) 35 | : stop(false) 36 | { 37 | for(size_t i = 0;i task; 44 | 45 | { 46 | std::unique_lock lock(this->queue_mutex); 47 | this->condition.wait(lock, 48 | [this]{ return this->stop || !this->tasks.empty(); }); 49 | if(this->stop && this->tasks.empty()) 50 | return; 51 | task = std::move(this->tasks.front()); 52 | this->tasks.pop(); 53 | } 54 | 55 | task(); 56 | } 57 | } 58 | ); 59 | } 60 | 61 | // add new work item to the pool 62 | template 63 | auto ThreadPool::enqueue(F&& f, Args&&... args) 64 | -> std::future::type> 65 | { 66 | using return_type = typename std::result_of::type; 67 | 68 | auto task = std::make_shared< std::packaged_task >( 69 | std::bind(std::forward(f), std::forward(args)...) 70 | ); 71 | 72 | std::future res = task->get_future(); 73 | { 74 | std::unique_lock lock(queue_mutex); 75 | 76 | // don't allow enqueueing after stopping the pool 77 | if(stop) 78 | throw std::runtime_error("enqueue on stopped ThreadPool"); 79 | 80 | tasks.emplace([task](){ (*task)(); }); 81 | } 82 | condition.notify_one(); 83 | return res; 84 | } 85 | 86 | // the destructor joins all threads 87 | inline ThreadPool::~ThreadPool() 88 | { 89 | { 90 | std::unique_lock lock(queue_mutex); 91 | stop = true; 92 | } 93 | condition.notify_all(); 94 | for(std::thread &worker: workers) 95 | worker.join(); 96 | } 97 | 98 | #endif 99 | -------------------------------------------------------------------------------- /wheels/threadpool/ctpl.h: -------------------------------------------------------------------------------- 1 | 2 | /********************************************************* 3 | * 4 | * Copyright (C) 2014 by Vitaliy Vitsentiy 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); 7 | * you may not use this file except in compliance with the License. 8 | * You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | * 18 | *********************************************************/ 19 | 20 | 21 | #ifndef __ctpl_thread_pool_H__ 22 | #define __ctpl_thread_pool_H__ 23 | 24 | #include 25 | #include 26 | #include 27 | #include 28 | #include 29 | #include 30 | #include 31 | #include 32 | #include 33 | 34 | 35 | #ifndef _ctplThreadPoolLength_ 36 | #define _ctplThreadPoolLength_ 1000000 37 | #endif 38 | 39 | 40 | // thread pool to run user's functors with signature 41 | // ret func(int id, other_params) 42 | // where id is the index of the thread that runs the functor 43 | // ret is some return type 44 | 45 | 46 | namespace ctpl { 47 | 48 | class thread_pool { 49 | 50 | public: 51 | 52 | thread_pool() : q(_ctplThreadPoolLength_) { this->init(); } 53 | thread_pool(int nThreads, int queueSize = _ctplThreadPoolLength_) : q(queueSize) { this->init(); this->resize(nThreads); } 54 | 55 | // the destructor waits for all the functions in the queue to be finished 56 | ~thread_pool() { 57 | this->stop(true); 58 | } 59 | 60 | // get the number of running threads in the pool 61 | int size() { return static_cast(this->threads.size()); } 62 | 63 | // number of idle threads 64 | int n_idle() { return this->nWaiting; } 65 | std::thread & get_thread(int i) { return *this->threads[i]; } 66 | 67 | // change the number of threads in the pool 68 | // should be called from one thread, otherwise be careful to not interleave, also with this->stop() 69 | // nThreads must be >= 0 70 | void resize(int nThreads) { 71 | if (!this->isStop && !this->isDone) { 72 | int oldNThreads = static_cast(this->threads.size()); 73 | if (oldNThreads <= nThreads) { // if the number of threads is increased 74 | this->threads.resize(nThreads); 75 | this->flags.resize(nThreads); 76 | 77 | for (int i = oldNThreads; i < nThreads; ++i) { 78 | this->flags[i] = std::make_shared>(false); 79 | this->set_thread(i); 80 | } 81 | } 82 | else { // the number of threads is decreased 83 | for (int i = oldNThreads - 1; i >= nThreads; --i) { 84 | *this->flags[i] = true; // this thread will finish 85 | this->threads[i]->detach(); 86 | } 87 | { 88 | // stop the detached threads that were waiting 89 | std::unique_lock lock(this->mutex); 90 | this->cv.notify_all(); 91 | } 92 | this->threads.resize(nThreads); // safe to delete because the threads are detached 93 | this->flags.resize(nThreads); // safe to delete because the threads have copies of shared_ptr of the flags, not originals 94 | } 95 | } 96 | } 97 | 98 | // empty the queue 99 | void clear_queue() { 100 | std::function * _f; 101 | while (this->q.pop(_f)) 102 | delete _f; // empty the queue 103 | } 104 | 105 | // pops a functional wraper to the original function 106 | std::function pop() { 107 | std::function * _f = nullptr; 108 | this->q.pop(_f); 109 | std::unique_ptr> func(_f); // at return, delete the function even if an exception occurred 110 | 111 | std::function f; 112 | if (_f) 113 | f = *_f; 114 | return f; 115 | } 116 | 117 | 118 | // wait for all computing threads to finish and stop all threads 119 | // may be called asyncronously to not pause the calling thread while waiting 120 | // if isWait == true, all the functions in the queue are run, otherwise the queue is cleared without running the functions 121 | void stop(bool isWait = false) { 122 | if (!isWait) { 123 | if (this->isStop) 124 | return; 125 | this->isStop = true; 126 | for (int i = 0, n = this->size(); i < n; ++i) { 127 | *this->flags[i] = true; // command the threads to stop 128 | } 129 | this->clear_queue(); // empty the queue 130 | } 131 | else { 132 | if (this->isDone || this->isStop) 133 | return; 134 | this->isDone = true; // give the waiting threads a command to finish 135 | } 136 | { 137 | std::unique_lock lock(this->mutex); 138 | this->cv.notify_all(); // stop all waiting threads 139 | } 140 | for (int i = 0; i < static_cast(this->threads.size()); ++i) { // wait for the computing threads to finish 141 | if (this->threads[i]->joinable()) 142 | this->threads[i]->join(); 143 | } 144 | // if there were no threads in the pool but some functors in the queue, the functors are not deleted by the threads 145 | // therefore delete them here 146 | this->clear_queue(); 147 | this->threads.clear(); 148 | this->flags.clear(); 149 | } 150 | 151 | template 152 | auto push(F && f, Rest&&... rest) ->std::future { 153 | auto pck = std::make_shared>( 154 | std::bind(std::forward(f), std::placeholders::_1, std::forward(rest)...) 155 | ); 156 | 157 | auto _f = new std::function([pck](int id) { 158 | (*pck)(id); 159 | }); 160 | this->q.push(_f); 161 | 162 | std::unique_lock lock(this->mutex); 163 | this->cv.notify_one(); 164 | 165 | return pck->get_future(); 166 | } 167 | 168 | // run the user's function that excepts argument int - id of the running thread. returned value is templatized 169 | // operator returns std::future, where the user can get the result and rethrow the catched exceptins 170 | template 171 | auto push(F && f) ->std::future { 172 | auto pck = std::make_shared>(std::forward(f)); 173 | 174 | auto _f = new std::function([pck](int id) { 175 | (*pck)(id); 176 | }); 177 | this->q.push(_f); 178 | 179 | std::unique_lock lock(this->mutex); 180 | this->cv.notify_one(); 181 | 182 | return pck->get_future(); 183 | } 184 | 185 | 186 | private: 187 | 188 | // deleted 189 | thread_pool(const thread_pool &);// = delete; 190 | thread_pool(thread_pool &&);// = delete; 191 | thread_pool & operator=(const thread_pool &);// = delete; 192 | thread_pool & operator=(thread_pool &&);// = delete; 193 | 194 | void set_thread(int i) { 195 | std::shared_ptr> flag(this->flags[i]); // a copy of the shared ptr to the flag 196 | auto f = [this, i, flag/* a copy of the shared ptr to the flag */]() { 197 | std::atomic & _flag = *flag; 198 | std::function * _f; 199 | bool isPop = this->q.pop(_f); 200 | while (true) { 201 | while (isPop) { // if there is anything in the queue 202 | std::unique_ptr> func(_f); // at return, delete the function even if an exception occurred 203 | (*_f)(i); 204 | 205 | if (_flag) 206 | return; // the thread is wanted to stop, return even if the queue is not empty yet 207 | else 208 | isPop = this->q.pop(_f); 209 | } 210 | 211 | // the queue is empty here, wait for the next command 212 | std::unique_lock lock(this->mutex); 213 | ++this->nWaiting; 214 | this->cv.wait(lock, [this, &_f, &isPop, &_flag](){ isPop = this->q.pop(_f); return isPop || this->isDone || _flag; }); 215 | --this->nWaiting; 216 | 217 | if (!isPop) 218 | return; // if the queue is empty and this->isDone == true or *flag then return 219 | } 220 | }; 221 | this->threads[i].reset(new std::thread(f)); // compiler may not support std::make_unique() 222 | } 223 | 224 | void init() { this->nWaiting = 0; this->isStop = false; this->isDone = false; } 225 | 226 | std::vector> threads; 227 | std::vector>> flags; 228 | mutable boost::lockfree::queue *> q; 229 | std::atomic isDone; 230 | std::atomic isStop; 231 | std::atomic nWaiting; // how many threads are waiting 232 | 233 | std::mutex mutex; 234 | std::condition_variable cv; 235 | }; 236 | 237 | } 238 | 239 | #endif // __ctpl_thread_pool_H__ 240 | 241 | 242 | -------------------------------------------------------------------------------- /wheels/threadpool/threadpool_example.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include "ctpl.h" 6 | #include 7 | #include 8 | #include /* mmap() is defined in this header */ 9 | #include 10 | #include 11 | #include 12 | 13 | 14 | struct SArg { 15 | int x; 16 | int y; 17 | }; 18 | 19 | 20 | 21 | ctpl::thread_pool* pool; 22 | ctpl::thread_pool* spool; 23 | 24 | int some_function(int id, struct SArg *arg) { 25 | std::cout << "hello " << arg->x << std::endl; 26 | std::this_thread::sleep_for(std::chrono::seconds(1)); 27 | std::cout << "world " << arg->y << std::endl; 28 | return arg->y; 29 | } 30 | 31 | static void generate_input(uint64_t fid) { 32 | char path[1000]; 33 | std::string __output_dir = "/hyper/fuzz/tmp"; 34 | std::string output_file = std::string(__output_dir) + "/" + 35 | std::to_string(fid) + "-id"; 36 | //std::string input_file = std::string(__output_dir) + "/" + taint_file; 37 | std::string input_file = "/home/cju/e2e/filter_des/0-id"; 38 | //std::cout << "out file is " << output_file << std::endl; 39 | struct stat statbuf; 40 | void *src, *dst; 41 | int fdout, fdin; 42 | int mode = 0x777; 43 | 44 | /* open the input file */ 45 | if ((fdin = open (input_file.c_str(), O_RDONLY)) < 0) 46 | { 47 | //assert(false && "can't open file for reading"); 48 | printf("cannot open input file!\n"); 49 | return; 50 | } 51 | 52 | /* open/create the output file */ 53 | if ((fdout = open (output_file.c_str(), O_RDWR | O_CREAT | O_TRUNC, mode)) < 0)//edited here 54 | { 55 | //assert(false && "can't create file for writing"); 56 | return; 57 | } 58 | 59 | /* find size of input file */ 60 | if (fstat (fdin,&statbuf) < 0) 61 | { 62 | //assert (false && "fstat error"); 63 | close(fdin); 64 | return; 65 | } 66 | 67 | /* mmap the input file */ 68 | if ((src = mmap (0, statbuf.st_size, PROT_READ, MAP_SHARED, fdin, 0)) 69 | == (caddr_t) -1) { 70 | close(fdin); 71 | return; 72 | } 73 | 74 | dst = malloc(statbuf.st_size); 75 | 76 | /* this copies the input file to the output file */ 77 | memcpy (dst, src, statbuf.st_size); 78 | for (int i=0;i<4;i++) { 79 | ((uint8_t*)dst)[i] = i; 80 | //printf("generate_input index is %u and value is %u\n", it->first,(uint32_t)it->second); 81 | } 82 | 83 | if (write(fdout, dst, statbuf.st_size) < 0) { 84 | return; 85 | } 86 | 87 | close(fdin); 88 | close(fdout); 89 | free(dst); 90 | } 91 | 92 | std::atomic id(0); 93 | std::atomic count(0); 94 | 95 | void addAll(int i) { 96 | generate_input(++id); 97 | count++; 98 | std::cout << "task count is " << count << std::endl; 99 | //uint64_t sum = 0; 100 | //std::this_thread::sleep_for (std::chrono::milliseconds(10)); 101 | /* 102 | for(int i=0;i<10000000;i++) { 103 | sum += i; 104 | } 105 | */ 106 | // return sum; 107 | } 108 | 109 | void task(int i) { 110 | spool->push(addAll); 111 | } 112 | 113 | 114 | 115 | int main(int argc, char** argv) 116 | { 117 | int num_of_threads = 0; 118 | if (sscanf (argv[1], "%i", &num_of_threads) != 1) { 119 | fprintf(stderr, "error - not an integer"); 120 | } 121 | pool = new ctpl::thread_pool(num_of_threads); 122 | spool = new ctpl::thread_pool(num_of_threads); 123 | std::vector< std::future > results; 124 | 125 | for(int i = 0; i < 10000; ++i) { 126 | pool->push(task); 127 | } 128 | 129 | //std::cout <<"check results" << std::endl; 130 | //for(auto && result: results) 131 | // std::cout << result.get() << ' '; 132 | //std::cout << std::endl; 133 | //delete pool; 134 | spool->stop(true); 135 | pool->stop(true); 136 | 137 | return 0; 138 | } 139 | --------------------------------------------------------------------------------