├── CMakeLists.txt
├── DEBBGED.md
├── Dockerfile
├── LICENSE
├── README.md
├── config.h
├── ctpl.h
├── gd.cc
├── grad.cc
├── grad.h
├── input.cc
├── input.h
├── jit.cc
├── jit.h
├── pro.log
├── rgd.cc
├── rgd.proto
├── rgdJit.h
├── rgd_op.h
├── test.h
├── util.cc
├── util.h
├── wheels
    ├── concurrentqueue
    │   └── queue.h
    ├── lockfreehash
    │   ├── cuckoo
    │   │   ├── Makefile
    │   │   ├── benchmark_lockfree_ht.h
    │   │   ├── benchmark_unordered_map.h
    │   │   ├── cycle_timer.h
    │   │   ├── hash_table.h
    │   │   ├── lockfree_hash_table.cpp
    │   │   ├── lockfree_hash_table.h
    │   │   ├── main.cpp
    │   │   └── thread_service.h
    │   └── lprobe
    │   │   ├── Makefile
    │   │   ├── alloc.h
    │   │   ├── benchmark_lprobe.h
    │   │   ├── benchmark_lprobe_ptr.h
    │   │   ├── block_allocator.h
    │   │   ├── concurrent_stack.h
    │   │   ├── cycle_timer.h
    │   │   ├── data.h
    │   │   ├── data_ptr.h
    │   │   ├── get_time.h
    │   │   ├── hash_table.h
    │   │   ├── main.cc
    │   │   ├── memory_size.h
    │   │   ├── monoid.h
    │   │   ├── parallel.h
    │   │   ├── seq.h
    │   │   ├── sequence_ops.h
    │   │   ├── thread_service.h
    │   │   ├── thread_service_ptr.h
    │   │   └── utilities.h
    └── threadpool
    │   ├── ThreadPool.h
    │   ├── ctpl.h
    │   └── threadpool_example.cpp
├── xxhash.cc
└── xxhash.h


/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 3.5.1)
 2 | 
 3 | project(rgd C CXX)
 4 | 
 5 | set(CMAKE_CXX_STANDARD 14)
 6 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -g -std=c++14 -mcx16 -march=native -fno-builtin-malloc -fno-builtin-calloc -fno-builtin-realloc -fno-builtin-free")
 7 | # set(CMAKE_LINKER_FLAGS "${CMAKE_LINKER_FLAGS} -fsanitize=address")
 8 | set(CMAKE_TEST_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin/test)
 9 | 
10 | find_package(LLVM 12 REQUIRED CONFIG)
11 | 
12 | if (LLVM_FOUND)
13 |   message(STATUS "Found LLVM ${LLVM_PACKAGE_VERSION}")
14 |   message(STATUS "Using LLVMConfig.cmake in: ${LLVM_DIR}")
15 | else()
16 |   message(FATAL_ERROR "You haven't install LLVM !")
17 | endif()
18 | 
19 | 
20 | include_directories(${LLVM_INCLUDE_DIRS})
21 | add_definitions(${LLVM_DEFINITIONS})
22 | 
23 | # Proto file
24 | get_filename_component(rgd_proto "rgd.proto" ABSOLUTE)
25 | get_filename_component(rgd_proto_path "${rgd_proto}" PATH)
26 | 
27 | add_custom_target(proto ALL DEPENDS ${rgd_proto_srcs} ${rgd_proto_hdrs})
28 | # Generated sources
29 | set(rgd_proto_srcs "${CMAKE_CURRENT_BINARY_DIR}/rgd.pb.cc")
30 | set(rgd_proto_hdrs "${CMAKE_CURRENT_BINARY_DIR}/rgd.pb.h")
31 | add_custom_command(
32 |   OUTPUT "${rgd_proto_srcs}" "${rgd_proto_hdrs}"
33 |   COMMAND protoc
34 |   ARGS --cpp_out "${CMAKE_CURRENT_BINARY_DIR}"
35 |     -I "${rgd_proto_path}"
36 |     "${rgd_proto}"
37 |   DEPENDS "${rgd_proto}")
38 | 
39 | # Include generated *.pb.h files
40 | include_directories("${CMAKE_CURRENT_BINARY_DIR}")
41 | 
42 | add_library(gd
43 |   gd.cc
44 |   input.cc
45 |   grad.cc
46 | )
47 | 
48 | add_executable(rgd
49 |   rgd.cc
50 |   jit.cc
51 |   util.cc
52 |   xxhash.cc
53 |   ${rgd_proto_srcs})
54 | 
55 | target_link_libraries(rgd
56 |   protobuf
57 |   tcmalloc
58 |   profiler
59 |   gd
60 |   LLVM
61 |   pthread
62 |   boost_system
63 |   boost_filesystem)
64 | 


--------------------------------------------------------------------------------
/DEBBGED.md:
--------------------------------------------------------------------------------
 1 | ## AST encoding
 2 | 
 3 | Previsouly we only consider non-leaf parts of AST in the function cache. This becomes a problem when cache is used with tree-deduplication. 
 4 | 
 5 | Consider two expressions Sub(Add(A+B),A) and Sub(Add(A+B),B), where A, B are inputs. When the first query is processed, the JITTed function Sub(Add(arg0,arg1),arg0) will be saved in the cache. Because of tree-dedupliation, then second expression will be parsed as Sub(Add(arg0,arg1),arg1). But because only non-leaf are considered,  the JITed function saved for the first function will be used, resulting a function mismatch. 
 6 | 
 7 | In the new design, we traverse the tree using the post-order. We assigns index for each inputs and each constant and then include the indices for comparing the AST tree.  In this way, the first expression and second expression will be different functions in the cache.
 8 | 
 9 | ##  New handling of relational expressions for better perf. in solving nested branches
10 | 
11 | In the context of nested branches, it is common to see the exactly opposite checks in the constraints. (e.g Ult(a,b) and Uge (a,b)). In the previsou design, Ult(a,b) and Uge(a,b) are compiled as different function, resulting a lot of cache miss. To solve this issue, we compile the same function for Ult(a,b) and Uge(a,b), which just outputs zext(a) and zext(b). And then we calculates the distance by invoking the JIT function.  
12 | 
13 | | Comparison                                                   | JITed function           |
14 | |--------------------------------------------------------------|--------------------------|
15 | | ult(a,b) ugt(a,b) ule(a,b) uge(a,b) equal(a,b) distance(a,b) | Outputs zext(a), zext(b) |
16 | | slt(a,b) sgt(a,b) sle(a,b) sge(a,b)                          | Outputs sext(a), sext(b) |
17 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM ubuntu:18.04
 2 | 
 3 | RUN apt-get update
 4 | RUN apt-get install -y cmake build-essential autoconf git libtool python3-setuptools libboost-all-dev
 5 | RUN apt-get install -y golang
 6 | RUN apt-get install -y clang-9 libclang-9-dev llvm-9-dev curl gperf libgmp-dev
 7 | RUN apt-get install -y cmake bison flex libboost-all-dev python perl minisat
 8 | 
 9 | #protobuf
10 | RUN git clone https://github.com/protocolbuffers/protobuf.git /protobuf  && \
11 |     cd /protobuf && \
12 |     git submodule update --init --recursive && \
13 |     ./autogen.sh && \
14 |     ./configure  && \
15 |     make -j && \
16 |     make install && \
17 |     ldconfig
18 | 
19 | 
20 | 
21 | WORKDIR /src
22 | #gperf
23 | RUN cd /src && git clone https://github.com/gperftools/gperftools.git && cd gperftools && \
24 |      git checkout gperftools-2.9.1 && ./autogen.sh && ./configure && make -j && make install && ldconfig
25 | 
26 | COPY . /src/jigsaw
27 | 
28 | RUN cd /src/jigsaw/ && mkdir build && cd build && cmake .. && make rgd -j
29 | 
30 | 
31 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [yyyy] [name of copyright owner]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # JIGSAW
 2 | 
 3 | **Build**
 4 | ```
 5 | mkdir build 
 6 | cd build && camke .. && make
 7 | ```
 8 | 
 9 | **Using Docker**
10 | 
11 | ```
12 | docker build -t jigsaw-test .
13 | 
14 | # copy constraints files to /out/readelf inside the container
15 | docker run jigsaw-test /src/jigsaw/build/rgd 1 0 /out/readelf
16 | ```
17 | 
18 | **Replay from constraints files**
19 | ```
20 | Command:
21 | ./rgd num_of_threads pin_core_start test_dir
22 | 
23 | Example:
24 | # solve objdump constraints using 8 cores, starting from core 0
25 | ./rgd 8 0 objdump 
26 | ```
27 | 
28 | **Constraints Files**
29 | 
30 | https://jigsaw.cs.ucr.edu
31 | 
32 | 
33 | 


--------------------------------------------------------------------------------
/config.h:
--------------------------------------------------------------------------------
1 | #ifndef CONFIG_H_
2 | #define CONFIG_H_
3 | #define MAX_NUM_MINIMAL_OPTIMA_ROUND 32
4 | #define MAX_EXEC_TIMES 1000
5 | #endif
6 | 


--------------------------------------------------------------------------------
/ctpl.h:
--------------------------------------------------------------------------------
  1 | 
  2 | /*********************************************************
  3 |  *
  4 |  *  Copyright (C) 2014 by Vitaliy Vitsentiy
  5 |  *
  6 |  *  Licensed under the Apache License, Version 2.0 (the "License");
  7 |  *  you may not use this file except in compliance with the License.
  8 |  *  You may obtain a copy of the License at
  9 |  *
 10 |  *     http://www.apache.org/licenses/LICENSE-2.0
 11 |  *
 12 |  *  Unless required by applicable law or agreed to in writing, software
 13 |  *  distributed under the License is distributed on an "AS IS" BASIS,
 14 |  *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 15 |  *  See the License for the specific language governing permissions and
 16 |  *  limitations under the License.
 17 |  *
 18 |  *********************************************************/
 19 | 
 20 | 
 21 | #ifndef __ctpl_thread_pool_H__
 22 | #define __ctpl_thread_pool_H__
 23 | 
 24 | #include <functional>
 25 | #include <thread>
 26 | #include <atomic>
 27 | #include <vector>
 28 | #include <memory>
 29 | #include <exception>
 30 | #include <future>
 31 | #include <mutex>
 32 | #include <boost/lockfree/queue.hpp>
 33 | 
 34 | 
 35 | #ifndef _ctplThreadPoolLength_
 36 | #define _ctplThreadPoolLength_  2000000
 37 | #endif
 38 | 
 39 | 
 40 | // thread pool to run user's functors with signature
 41 | //      ret func(int id, other_params)
 42 | // where id is the index of the thread that runs the functor
 43 | // ret is some return type
 44 | 
 45 | 
 46 | namespace ctpl {
 47 | 
 48 |     class thread_pool {
 49 | 
 50 |     public:
 51 | 
 52 |         thread_pool() : q(_ctplThreadPoolLength_) { this->init(); }
 53 |         thread_pool(int nThreads, int pin_core_start, int queueSize = 2000000) : core_start_(pin_core_start), q(queueSize) { this->init(); this->resize(nThreads); }
 54 | 
 55 |         // the destructor waits for all the functions in the queue to be finished
 56 |         ~thread_pool() {
 57 |             this->stop(true);
 58 |         }
 59 | 
 60 |         // get the number of running threads in the pool
 61 |         int size() { return static_cast<int>(this->threads.size()); }
 62 | 
 63 |         // number of idle threads
 64 |         int n_idle() { return this->nWaiting; }
 65 |         std::thread & get_thread(int i) { return *this->threads[i]; }
 66 | 
 67 |         // change the number of threads in the pool
 68 |         // should be called from one thread, otherwise be careful to not interleave, also with this->stop()
 69 |         // nThreads must be >= 0
 70 |         void resize(int nThreads) {
 71 |             if (!this->isStop && !this->isDone) {
 72 |                 int oldNThreads = static_cast<int>(this->threads.size());
 73 |                 if (oldNThreads <= nThreads) {  // if the number of threads is increased
 74 |                     this->threads.resize(nThreads);
 75 |                     this->flags.resize(nThreads);
 76 | 
 77 |                     for (int i = oldNThreads; i < nThreads; ++i) {
 78 |                         this->flags[i] = std::make_shared<std::atomic<bool>>(false);
 79 |                         this->set_thread(i);
 80 |                     }
 81 |                 }
 82 |                 else {  // the number of threads is decreased
 83 |                     for (int i = oldNThreads - 1; i >= nThreads; --i) {
 84 |                         *this->flags[i] = true;  // this thread will finish
 85 |                         this->threads[i]->detach();
 86 |                     }
 87 |                     {
 88 |                         // stop the detached threads that were waiting
 89 |                         std::unique_lock<std::mutex> lock(this->mutex);
 90 |                         this->cv.notify_all();
 91 |                     }
 92 |                     this->threads.resize(nThreads);  // safe to delete because the threads are detached
 93 |                     this->flags.resize(nThreads);  // safe to delete because the threads have copies of shared_ptr of the flags, not originals
 94 |                 }
 95 |             }
 96 |         }
 97 | 
 98 |         // empty the queue
 99 |         void clear_queue() {
100 |             std::function<void(int id)> * _f;
101 |             while (this->q.pop(_f))
102 |                 delete _f;  // empty the queue
103 |         }
104 | 
105 |         // pops a functional wraper to the original function
106 |         std::function<void(int)> pop() {
107 |             std::function<void(int id)> * _f = nullptr;
108 |             this->q.pop(_f);
109 |             std::unique_ptr<std::function<void(int id)>> func(_f);  // at return, delete the function even if an exception occurred
110 |             
111 |             std::function<void(int)> f;
112 |             if (_f)
113 |                 f = *_f;
114 |             return f;
115 |         }
116 | 
117 | 
118 |         // wait for all computing threads to finish and stop all threads
119 |         // may be called asyncronously to not pause the calling thread while waiting
120 |         // if isWait == true, all the functions in the queue are run, otherwise the queue is cleared without running the functions
121 |         void stop(bool isWait = false) {
122 |             if (!isWait) {
123 |                 if (this->isStop)
124 |                     return;
125 |                 this->isStop = true;
126 |                 for (int i = 0, n = this->size(); i < n; ++i) {
127 |                     *this->flags[i] = true;  // command the threads to stop
128 |                 }
129 |                 this->clear_queue();  // empty the queue
130 |             }
131 |             else {
132 |                 if (this->isDone || this->isStop)
133 |                     return;
134 |                 this->isDone = true;  // give the waiting threads a command to finish
135 |             }
136 |             {
137 |                 std::unique_lock<std::mutex> lock(this->mutex);
138 |                 this->cv.notify_all();  // stop all waiting threads
139 |             }
140 |             for (int i = 0; i < static_cast<int>(this->threads.size()); ++i) {  // wait for the computing threads to finish
141 |                 if (this->threads[i]->joinable())
142 |                     this->threads[i]->join();
143 |             }
144 |             // if there were no threads in the pool but some functors in the queue, the functors are not deleted by the threads
145 |             // therefore delete them here
146 |             this->clear_queue();
147 |             this->threads.clear();
148 |             this->flags.clear();
149 |         }
150 | 
151 |         template<typename F, typename... Rest>
152 |         auto push(F && f, Rest&&... rest) ->std::future<decltype(f(0, rest...))> {
153 |             auto pck = std::make_shared<std::packaged_task<decltype(f(0, rest...))(int)>>(
154 |                 std::bind(std::forward<F>(f), std::placeholders::_1, std::forward<Rest>(rest)...)
155 |             );
156 | 
157 |             auto _f = new std::function<void(int id)>([pck](int id) {
158 |                 (*pck)(id);
159 |             });
160 |             this->q.push(_f);
161 | 
162 |             std::unique_lock<std::mutex> lock(this->mutex);
163 |             this->cv.notify_one();
164 | 
165 |             return pck->get_future();
166 |         }
167 | 
168 |         // run the user's function that excepts argument int - id of the running thread. returned value is templatized
169 |         // operator returns std::future, where the user can get the result and rethrow the catched exceptins
170 |         template<typename F>
171 |         auto push(F && f) ->std::future<decltype(f(0))> {
172 |             auto pck = std::make_shared<std::packaged_task<decltype(f(0))(int)>>(std::forward<F>(f));
173 | 
174 |             auto _f = new std::function<void(int id)>([pck](int id) {
175 |                 (*pck)(id);
176 |             });
177 |             this->q.push(_f);
178 | 
179 |             std::unique_lock<std::mutex> lock(this->mutex);
180 |             this->cv.notify_one();
181 | 
182 |             return pck->get_future();
183 |         }
184 | 
185 | 
186 |     private:
187 | 
188 |         // deleted
189 |         thread_pool(const thread_pool &);// = delete;
190 |         thread_pool(thread_pool &&);// = delete;
191 |         thread_pool & operator=(const thread_pool &);// = delete;
192 |         thread_pool & operator=(thread_pool &&);// = delete;
193 | 
194 |         void set_thread(int i) {
195 |             std::shared_ptr<std::atomic<bool>> flag(this->flags[i]);  // a copy of the shared ptr to the flag
196 |             auto f = [this, i, flag/* a copy of the shared ptr to the flag */]() {
197 |                 std::atomic<bool> & _flag = *flag;
198 |                 std::function<void(int id)> * _f;
199 |                 bool isPop = this->q.pop(_f);
200 |                 while (true) {
201 |                     while (isPop) {  // if there is anything in the queue
202 |                         std::unique_ptr<std::function<void(int id)>> func(_f);  // at return, delete the function even if an exception occurred
203 |                         (*_f)(i);
204 | 
205 |                         if (_flag)
206 |                             return;  // the thread is wanted to stop, return even if the queue is not empty yet
207 |                         else
208 |                             isPop = this->q.pop(_f);
209 |                     }
210 | 
211 |                     // the queue is empty here, wait for the next command
212 |                     std::unique_lock<std::mutex> lock(this->mutex);
213 |                     ++this->nWaiting;
214 |                     this->cv.wait(lock, [this, &_f, &isPop, &_flag](){ isPop = this->q.pop(_f); return isPop || this->isDone || _flag; });
215 |                     --this->nWaiting;
216 | 
217 |                     if (!isPop)
218 |                         return;  // if the queue is empty and this->isDone == true or *flag then return
219 |                 }
220 |             };
221 |             this->threads[i].reset(new std::thread(f));  // compiler may not support std::make_unique()
222 | 						CPU_ZERO(&cpuset);
223 | 						CPU_SET(i+core_start_,&cpuset);
224 | 						pthread_setaffinity_np(this->threads[i]->native_handle(),sizeof(cpu_set_t), &cpuset);
225 |         }
226 | 
227 |         void init() { this->nWaiting = 0; this->isStop = false; this->isDone = false; }
228 | 
229 |         std::vector<std::unique_ptr<std::thread>> threads;
230 |         std::vector<std::shared_ptr<std::atomic<bool>>> flags;
231 |         mutable boost::lockfree::queue<std::function<void(int id)> *> q;
232 |         std::atomic<bool> isDone;
233 |         std::atomic<bool> isStop;
234 |         std::atomic<int> nWaiting;  // how many threads are waiting
235 | 
236 |         std::mutex mutex;
237 |         std::condition_variable cv;
238 | 				cpu_set_t cpuset;
239 | 				int core_start_;
240 |     };
241 | 
242 | }
243 | 
244 | #endif // __ctpl_thread_pool_H__
245 | 
246 | 
247 | 


--------------------------------------------------------------------------------
/grad.cc:
--------------------------------------------------------------------------------
 1 | #include "grad.h"
 2 | #include <stdint.h>
 3 | #include <iostream>
 4 | 
 5 | Grad::Grad(size_t size) : grads(size) {
 6 | }
 7 | 
 8 | std::vector<GradUnit>& Grad::get_value() {
 9 | 	return grads;
10 | }
11 | 
12 | 
13 | uint64_t Grad::max_val() {
14 | 	uint64_t ret = 0; 
15 | 	for (auto gradu : grads) { 
16 | 		//std::cout << "graud value is " << gradu.val <<std::endl; 
17 | 		if (gradu.val > ret)
18 | 			ret = gradu.val;
19 | 	}
20 | 	return ret;
21 | }
22 | 
23 | void Grad::normalize() {
24 | 	double max_grad = (double)max_val();
25 | 	if (max_grad > 0.0) {
26 | 		for(auto &grad : grads) {
27 | 			grad.pct = 1.0 * ((double)grad.val / max_grad);
28 | 		}
29 | 	}
30 | }
31 | 
32 | void Grad::clear() {
33 | 	for (auto gradu : grads) {
34 | 		gradu.val = 0;
35 | 		gradu.pct = 0.0;
36 | 	} 
37 | }
38 | 
39 | size_t Grad::len() {
40 | 	return grads.size();
41 | }
42 | 
43 | 
44 | uint64_t Grad::val_sum() {
45 | 	uint64_t ret = 0;
46 | 	for (auto gradu : grads) {
47 | 		//FIXME: saturating_add
48 | 		ret += gradu.val;
49 | 	}
50 | 	return ret;
51 | }
52 | 
53 | 


--------------------------------------------------------------------------------
/grad.h:
--------------------------------------------------------------------------------
 1 | #ifndef GRAD_H
 2 | #define GRAD_H
 3 | #include <vector>
 4 | #include <stdint.h>
 5 | #include <stddef.h>
 6 | class GradUnit {
 7 | 	public:
 8 | 		bool sign;
 9 | 		uint64_t val;
10 | 		double pct;
11 | };
12 | 
13 | 
14 | class Grad {
15 | 	private: 
16 | 		std::vector<GradUnit> grads;
17 | 	public:
18 | 		Grad(size_t size);
19 | 		std::vector<GradUnit>& get_value();
20 | 		uint64_t max_val();
21 | 		void clear(); 
22 | 		size_t len();
23 | 		uint64_t val_sum();
24 | 		void normalize();
25 | };
26 | #endif
27 | 


--------------------------------------------------------------------------------
/input.cc:
--------------------------------------------------------------------------------
 1 | #include "input.h"
 2 | #include <ctime>
 3 | #include <cstdlib>
 4 | #include <iostream>
 5 | #include <cstring>
 6 | 
 7 | void MutInput::update(size_t index, bool direction, uint64_t delta)
 8 | {
 9 |   if (direction)
10 |     value[index] += delta;
11 |   else
12 |     value[index] -= delta;
13 | }
14 | 
15 | uint8_t MutInput::get_rand()
16 | {
17 |   uint8_t r = (uint8_t)r_val;
18 |   r_val >>= 8;
19 |   r_idx++;
20 |   if (r_idx == 4) {
21 |     random_r(&r_d, &r_val);
22 |     r_idx = 0;
23 |   }
24 |   return r;
25 | }
26 | 
27 | void MutInput::assign(std::vector<std::pair<uint32_t,uint8_t>> &input) {
28 |   for (int i = 0; i < size_; i++) {
29 |     value[i] = input[i].second;
30 |     //std::cout << "randomize " << i << " and assign value " << (int)value[i] << std::endl;
31 |   }
32 | }
33 | 
34 | void MutInput::flip(size_t index, size_t bit_index) {
35 |   uint8_t val = value[index];
36 |   uint8_t mask = 1;
37 |   mask = mask << bit_index;
38 |   value[index] = val^mask;
39 | }
40 | 
41 | void MutInput::set(const size_t index, uint8_t val)
42 | {
43 |   value[index] = val;
44 | }
45 | 
46 | uint64_t MutInput::len() {
47 |   return size_;
48 | }
49 | 
50 | uint64_t MutInput::val_len() {
51 |   return size_;
52 | }
53 | 
54 | MutInput& MutInput::operator=(const MutInput &other)
55 | {
56 |   MutInput::copy(this, &other);
57 |   return *this;
58 | }
59 | 
60 | void MutInput::dump() {
61 |   // printf("dumping input and value size is %lu\n",value.size());
62 |   // for(auto i : value)
63 |   //   printf("%d, ",i);
64 |   // printf("\n");
65 | }
66 | 
67 | void MutInput::randomize() {
68 |   for(int i=0;i<size_;i++) {
69 |     value[i] = get_rand();
70 |     //std::cout << "randomize " << i << " and assign value " << (int)value[i] << std::endl;
71 |   }
72 | }
73 | 
74 | uint8_t MutInput::get(const size_t i) {
75 |   return value[i];
76 | }
77 | 
78 | MutInput::MutInput(size_t size) {
79 |   r_idx = 0;
80 |   value = (uint8_t*)malloc(size);
81 |   size_ = size;
82 |   unsigned int seed;
83 |   //_rdseed32_step(&seed);
84 |   seed = (unsigned)time(NULL);
85 |   memset(r_s, 0, 256);
86 |   memset(&r_d, 0, sizeof(struct random_data));
87 |   initstate_r(seed, r_s, 256, &r_d);
88 |   random_r(&r_d, &r_val);
89 | }
90 | 
91 | MutInput::~MutInput()
92 | {
93 |   if (value)
94 |     free(value);
95 | }
96 | 


--------------------------------------------------------------------------------
/input.h:
--------------------------------------------------------------------------------
 1 | #ifndef _INPUT_H_
 2 | #define _INPUT_H_
 3 | #include <stddef.h>
 4 | #include <stdint.h>
 5 | #include <vector>
 6 | #include <string.h>
 7 | #include <stdlib.h>
 8 | #include <utility>
 9 | 
10 | class InputMeta {
11 | public:
12 |   bool sign;
13 |   size_t offset;
14 |   size_t size;
15 | };
16 | 
17 | 
18 | class MutInput {
19 | public:
20 |   // std::vector<uint8_t> value;
21 |   uint8_t* value;
22 |   // std::vector<InputMeta> meta;
23 |   size_t size_;
24 |   size_t get_size();
25 |   MutInput(size_t size);
26 |   ~MutInput();  
27 |   void dump();
28 |   uint64_t len();
29 |   uint64_t val_len();
30 |   void randomize();
31 |   //random
32 |   char r_s[256];
33 |   struct random_data r_d;
34 |   int32_t r_val;
35 |   int32_t r_idx;
36 |   uint8_t get_rand();
37 | 
38 |   uint8_t get(const size_t i);
39 |   void update(size_t index, bool direction, uint64_t delta);
40 |   void flip(size_t index, size_t bit_index);
41 |   void set(const size_t index, uint8_t value);
42 |   void assign(std::vector<std::pair<uint32_t,uint8_t>> &input);
43 |   MutInput& operator=(const MutInput &other);
44 |   
45 |   static void copy(MutInput *dst, const MutInput *src)
46 |   {
47 |       uint8_t *dst_value = dst->value;
48 |       memcpy(dst, src, sizeof(MutInput));
49 |       if (!dst_value)
50 |         dst->value = (uint8_t*)malloc(src->size_);
51 |       else
52 |         dst->value = dst_value;
53 |       memcpy(dst->value, src->value, src->size_);
54 |   }
55 | };
56 | #endif
57 | 


--------------------------------------------------------------------------------
/jit.h:
--------------------------------------------------------------------------------
 1 | #ifndef JIT_H_
 2 | #define JIT_H_
 3 | 
 4 | #include "rgd.pb.h"
 5 | #include "test.h"
 6 | 
 7 | int addFunction(const rgd::AstNode* node,
 8 |     std::map<uint32_t, uint32_t> &local_map,
 9 |     uint64_t id,
10 |     std::unordered_map<uint32_t, rgd::AstNode*> &expr_cache);
11 | 
12 | test_fn_type performJit(uint64_t id);
13 | 
14 | #endif


--------------------------------------------------------------------------------
/pro.log:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/R-Fuzz/jigsaw/5459da15d2e4a581a7e6e6f817c6f1b78d01527d/pro.log


--------------------------------------------------------------------------------
/rgd.proto:
--------------------------------------------------------------------------------
 1 | syntax = "proto3";
 2 | 
 3 | package rgd;
 4 | 
 5 | service RGD {
 6 |   rpc sendExpression (AstNode) returns (JitReply) {}
 7 |   rpc sendCmd (JitCmd) returns (JitReply) {}
 8 |   rpc sendCmdv2 (JitCmdv2) returns (JitReply) {}
 9 |   rpc startNewSession (ResetCmd) returns (JitReply) {}
10 | }
11 | 
12 | message JitEmpty {}
13 | 
14 | message JitCmdv2 {
15 |   uint32 cmd = 1; //reset;solve;expression
16 |   int32 test_value = 2; //this the value output by Z3
17 | 	repeated bytes expr_string = 3; //
18 | //	repeated AstNode expr = 3; //
19 | 	string file_name = 4;
20 | 	uint64 bhash = 5;
21 | 	uint64 shash = 6;
22 | 	bool direction = 7;
23 | 	repeated AstNode expr = 8;
24 | }
25 | 
26 | message ResetCmd {
27 | 	string file_name = 1;
28 | }
29 | 
30 | message JitBranchContext {
31 | 	uint64 bhash = 1;
32 | 	uint64 shash = 2;
33 | 	uint32 order = 3;
34 | 	bool direction = 4;
35 | }
36 | 
37 | message JitCmd {
38 |   uint32 cmd = 1; //reset;solve;expression
39 |   int32 test_value = 2; //this the value output by Z3
40 | }
41 | 
42 | // AST node for symbolic expressions
43 | message AstNode {
44 |   uint32 kind = 1;
45 |   uint32 boolvalue = 2;  //used by bool expr
46 |   uint32 bits = 3;
47 |   string value = 4;  //used by constant expr
48 |   repeated AstNode children = 5;
49 |   string name = 6;  //used for debugging
50 |   uint32 index = 7;  //used by read expr for index and extract expr
51 |   uint32 label = 8;  //for expression dedup
52 |   uint32 hash = 9;  //for node dedup
53 | 	uint32 direction = 10;
54 | 	uint32 sessionid = 11;
55 | 	uint32 full = 12;
56 | }
57 | 
58 | // The response message containing the greetings
59 | message JitReply {
60 |   string message = 1;
61 | }
62 | 
63 | message DupReply {
64 |   bool dup = 1;
65 | }
66 | 
67 | message BmReply {
68 | 	bytes bm = 1;
69 | }
70 | 


--------------------------------------------------------------------------------
/rgdJit.h:
--------------------------------------------------------------------------------
  1 | #ifndef GRAD_JIT_H
  2 | #define GRAD_JIT_H
  3 | 
  4 | #include "llvm/ADT/StringRef.h"
  5 | #include "llvm/ExecutionEngine/JITSymbol.h"
  6 | #include "llvm/ExecutionEngine/Orc/CompileUtils.h"
  7 | #include "llvm/ExecutionEngine/Orc/Core.h"
  8 | #include "llvm/ExecutionEngine/Orc/ExecutionUtils.h"
  9 | #include "llvm/ExecutionEngine/Orc/IRCompileLayer.h"
 10 | #include "llvm/ExecutionEngine/Orc/IRTransformLayer.h"
 11 | #include "llvm/ExecutionEngine/Orc/JITTargetMachineBuilder.h"
 12 | #include "llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h"
 13 | #include "llvm/ExecutionEngine/Orc/TargetProcessControl.h"
 14 | #include "llvm/ExecutionEngine/SectionMemoryManager.h"
 15 | #include "llvm/IR/DataLayout.h"
 16 | #include "llvm/IR/LLVMContext.h"
 17 | #include "llvm/IR/LegacyPassManager.h"
 18 | #include "llvm/Transforms/InstCombine/InstCombine.h"
 19 | #include "llvm/Transforms/Scalar.h"
 20 | #include "llvm/Transforms/Scalar/GVN.h"
 21 | 
 22 | #include <algorithm>
 23 | #include <map>
 24 | #include <memory>
 25 | #include <set>
 26 | #include <string>
 27 | #include <vector>
 28 | 
 29 | namespace rgd {
 30 | 
 31 |   class GradJit {
 32 |     private:
 33 |       llvm::orc::ExecutionSession ES;
 34 |       llvm::orc::RTDyldObjectLinkingLayer ObjectLayer;
 35 |       llvm::orc::IRCompileLayer CompileLayer;
 36 |       llvm::orc::IRTransformLayer OptimizeLayer;
 37 |       //std::unique_ptr<llvm::TargetMachine> TM;
 38 | 
 39 |       llvm::DataLayout DL;
 40 |       llvm::orc::MangleAndInterner Mangle;
 41 |       llvm::orc::JITDylib *MainJD;
 42 |       // llvm::orc::ThreadSafeContext Ctx;
 43 |       // std::unique_ptr<llvm::orc::JITCompileCallbackManager> CompileCallbackManager;
 44 |       // llvm::orc::CompileOnDemandLayer CODLayer;
 45 | 
 46 |     public:
 47 |       GradJit(llvm::orc::JITTargetMachineBuilder JTMB, llvm::DataLayout DL)
 48 |         : ObjectLayer(ES,
 49 |             []() { return std::make_unique<llvm::SectionMemoryManager>(); }), 
 50 |         // TM(llvm::EngineBuilder().selectTarget()),
 51 |         CompileLayer(ES, ObjectLayer, std::make_unique<llvm::orc::ConcurrentIRCompiler>(std::move(JTMB))),
 52 |         OptimizeLayer(ES, CompileLayer, optimizeModule),
 53 |         DL(std::move(DL)), Mangle(ES, this->DL)
 54 |         // CompileCallbackManager(
 55 |         //   llvm::orc::createLocalCompileCallbackManager(TM->getTargetTriple(), ES, 0)),
 56 |         //   CODLayer(ES, OptimizeLayer,
 57 |         //   [this](llvm::Function &F) { return std::set<llvm::Function*>({&F}); },
 58 |         //   *CompileCallbackManager,
 59 |         //   llvm::orc::createLocalIndirectStubsManagerBuilder(
 60 |         //     TM->getTargetTriple()))
 61 |         //   Ctx(std::make_unique<llvm::LLVMContext>()) 
 62 |         {
 63 |           MainJD = &cantFail(ES.createJITDylib("main"));
 64 | 
 65 |           MainJD->addGenerator(
 66 |               cantFail(llvm::orc::DynamicLibrarySearchGenerator::GetForCurrentProcess(
 67 |                 DL.getGlobalPrefix())));
 68 |         }
 69 | 
 70 |       ~GradJit() {
 71 |         if (auto Err = ES.endSession())
 72 |           ES.reportError(std::move(Err));
 73 |       }
 74 | 
 75 |       const llvm::DataLayout &getDataLayout() const { return DL; }
 76 |       // llvm::LLVMContext &getContext() { return *Ctx.getContext(); }
 77 |       // llvm::orc::ThreadSafeContext &getTSC() {return Ctx;}
 78 | 
 79 |       static llvm::Expected<std::unique_ptr<GradJit>> Create() {
 80 |         auto JTMB = llvm::orc::JITTargetMachineBuilder::detectHost();
 81 | 
 82 |         if (!JTMB)
 83 |           return JTMB.takeError();
 84 |         llvm::outs() << "Host triples: " << JTMB->getTargetTriple().str() << "\n";
 85 | 
 86 |         auto DL = JTMB->getDefaultDataLayoutForTarget();
 87 |         if (!DL)
 88 |           return DL.takeError();
 89 | 
 90 |         return std::make_unique<GradJit>(std::move(*JTMB), std::move(*DL));
 91 |       }
 92 | 
 93 |       llvm::Error addModule(std::unique_ptr<llvm::Module> M,
 94 |                             std::unique_ptr<llvm::LLVMContext> ctx) {
 95 |         return OptimizeLayer.add(*MainJD,
 96 |           llvm::orc::ThreadSafeModule(std::move(M), std::move(ctx)));
 97 |       }
 98 | 
 99 |       llvm::Expected<llvm::JITEvaluatedSymbol> lookup(llvm::StringRef Name) {
100 |         return ES.lookup({MainJD}, Mangle(Name.str()));
101 |       }
102 | 
103 |     private:
104 |       static llvm::orc::ThreadSafeModule
105 |       optimizeModule(llvm::orc::ThreadSafeModule TSM, const llvm::orc::MaterializationResponsibility &R) {
106 |         // Create a function pass manager.
107 |         auto FPM = std::make_unique<llvm::legacy::FunctionPassManager>(TSM.getModuleUnlocked());
108 | 
109 |         // Add some optimizations.
110 |         FPM->add(llvm::createInstructionCombiningPass());
111 |         FPM->add(llvm::createReassociatePass());
112 |         FPM->add(llvm::createGVNPass());
113 |         FPM->add(llvm::createInstSimplifyLegacyPass());
114 |         // FPM->add(llvm::createCFGSimplificationPass());
115 |         FPM->doInitialization();
116 | 
117 |         // Run the optimizations over all functions in the module being added to
118 |         // the JIT.
119 |         for (auto &F : *TSM.getModuleUnlocked())
120 |           FPM->run(F);
121 | 
122 |         return TSM;
123 |       }
124 |   };
125 | }
126 | 
127 | #endif // LLVM_EXECUTIONENGINE_ORC_KALEIDOSCOPEJIT_H
128 | 
129 | 


--------------------------------------------------------------------------------
/rgd_op.h:
--------------------------------------------------------------------------------
 1 | #ifndef __IR__H_
 2 | #define __IR__H_
 3 | 
 4 | #include <stdint.h>
 5 | 
 6 | namespace rgd {
 7 |   enum Kind {
 8 |     Bool, // 0
 9 |     Constant, // 1
10 |     Read, // 2
11 |     Concat, // 3
12 |     Extract, // 4
13 | 
14 |     ZExt, // 5
15 |     SExt, // 6
16 | 
17 |     // Arithmetic
18 |     Add, // 7
19 |     Sub, // 8
20 |     Mul, // 9
21 |     UDiv, // 10
22 |     SDiv, // 11
23 |     URem, // 12
24 |     SRem, // 13
25 |     Neg,  // 14
26 | 
27 |     // Bit
28 |     Not, // 15
29 |     And, // 16
30 |     Or, // 17
31 |     Xor, // 18
32 |     Shl, // 19
33 |     LShr, // 20
34 |     AShr, // 21
35 | 
36 |     // Compare
37 |     Equal, // 22
38 |     Distinct, // 23
39 |     Ult, // 24
40 |     Ule, // 25
41 |     Ugt, // 26
42 |     Uge, // 27
43 |     Slt, // 28
44 |     Sle, // 29
45 |     Sgt, // 30
46 |     Sge, // 31
47 | 
48 |     // Logical
49 |     LOr, // 32
50 |     LAnd, // 33
51 |     LNot, // 34
52 | 
53 |     // Special
54 |     Ite, // 35
55 |     Load, // 36    to be worked with TT-Fuzzer
56 |     Memcmp, //37
57 |   };
58 | }
59 | 
60 | static inline bool isRelational(uint32_t kind) {
61 |   if (kind == rgd::Equal || kind == rgd::Distinct || kind == rgd::Ugt || kind == rgd::Ult
62 |       || kind == rgd::Uge || kind == rgd::Ule || kind == rgd::Sgt || kind == rgd::Slt
63 |       || kind == rgd::Sge || kind == rgd::Sle)
64 |     return true;
65 |   else
66 |     return false;
67 | }
68 | 
69 | #endif
70 | 


--------------------------------------------------------------------------------
/test.h:
--------------------------------------------------------------------------------
  1 | #ifndef TEST_H_
  2 | #define TEST_H_
  3 | #include <stdint.h>
  4 | #include <vector>
  5 | #include <map>
  6 | #include <memory>
  7 | #include <unordered_map>
  8 | #include <bitset>
  9 | 
 10 | #include "grad.h"
 11 | #include "input.h"
 12 | 
 13 | // function under test
 14 | // constraint: 0 = equal, 1 = distinct, 2 = lt, 3 = le, 4 = gt, 5 = ge 
 15 | typedef void(*test_fn_type)(uint64_t*);
 16 | 
 17 | //the first two slots of the arguments for reseved for the left and right operands
 18 | static const int RET_OFFSET = 2;
 19 | 
 20 | struct Constraint {
 21 |   // JIT'ed function for a comparison expression
 22 |   test_fn_type fn;
 23 |   // the relational operator
 24 |   uint32_t comparison;
 25 | 
 26 |   // During constraint collection, (symbolic) input bytes are recorded
 27 |   // as offsets from the beginning of the input.  However, the JIT'ed
 28 |   // function consumes inputs as an input array.  So, when building the
 29 |   // function, we need to map the offset to the idx in input array,
 30 |   // which is stored in local_map.
 31 |   std::map<uint32_t, uint32_t> local_map;
 32 |   // if const {false, const value}, if symbolic {true, index in the inputs}
 33 |   // during local search, we use a single global array (to avoid memory
 34 |   // allocation and free) to prepare the inputs, so we need to know where
 35 |   // to load the input values into the input array.
 36 |   std::vector<std::pair<bool, uint64_t>> input_args;
 37 |   // map the offset to iv (initial value)
 38 |   std::unordered_map<uint32_t, uint8_t> inputs;
 39 |   // shape information about the input (e.g., 1, 2, 4, 8 bytes)
 40 |   std::unordered_map<uint32_t, uint32_t> shapes;
 41 |   // number of constant in the input array
 42 |   uint32_t const_num;
 43 | };
 44 | 
 45 | 
 46 | struct ConsMeta {
 47 |   // per-FUT arg mapping, so we can share the constraints
 48 |   std::vector<std::pair<bool, uint64_t>> input_args;
 49 |   // input2state inference related
 50 |   bool i2s_feasible;
 51 |   uint64_t op1, op2;
 52 | };
 53 | 
 54 | struct FUT {
 55 |   FUT(): scratch_args(nullptr), max_const_num(0) {}
 56 |   ~FUT() { if (scratch_args) free(scratch_args); }
 57 |   uint32_t num_exprs;
 58 |   // constraints, could be shared, strictly read-only
 59 |   std::vector<std::shared_ptr<const Constraint>> constraints;
 60 |   // per-FUT mutable metadata of constraints
 61 |   std::vector<std::unique_ptr<ConsMeta>> consmeta;
 62 | 
 63 |   // inputs as pairs of <offset (from the beginning of the input, and value>
 64 |   std::vector<std::pair<uint32_t, uint8_t>> inputs;
 65 |   // shape information at each offset
 66 |   std::unordered_map<uint32_t, uint32_t> shapes;
 67 |   // max number of constants in the input array
 68 |   uint32_t max_const_num;
 69 |   // record constraints that use a certain input byte
 70 |   std::unordered_map<uint32_t, std::vector<size_t>> cmap;
 71 |   // the input array used for all JIT'ed functions
 72 |   // all input bytes are extended to 64 bits
 73 |   uint64_t* scratch_args;
 74 | 
 75 |   // intermediate states for the search
 76 |   std::vector<uint64_t> orig_distances;
 77 |   std::vector<uint64_t> distances;
 78 | 
 79 |   // statistics
 80 |   uint64_t start; //start time
 81 |   bool stopped = false;
 82 |   int attempts = 0;
 83 |   int num_minimal_optima = 0;
 84 |   bool gsol = false;
 85 |   bool opti_hit = false;
 86 | 
 87 |   // solutions
 88 |   std::unordered_map<uint32_t, uint8_t> *rgd_solution;
 89 |   std::unordered_map<uint32_t, uint8_t> *opti_solution;
 90 |   std::unordered_map<uint32_t, uint8_t> *hint_solution;
 91 | 
 92 |   void finalize() {
 93 |     // aggregate the contraints, map each input byte to a constraint to
 94 |     // an index in the "global" input array (i.e., the scratch_args)
 95 |     std::unordered_map<uint32_t, uint32_t> sym_map;
 96 |     uint32_t gidx = 0;
 97 |     for (size_t i = 0; i < constraints.size(); i++) {
 98 |       std::unique_ptr<ConsMeta> cm = std::make_unique<ConsMeta>();
 99 |       cm->input_args = constraints[i]->input_args;
100 |       uint32_t last_offset = -1;
101 |       cm->i2s_feasible = true;
102 |       for (const auto& [offset, lidx] : constraints[i]->local_map) {
103 |         auto gitr = sym_map.find(offset);
104 |         if (gitr == sym_map.end()) {
105 |           gidx = inputs.size();
106 |           sym_map[offset] = gidx;
107 |           inputs.push_back(std::make_pair(offset, constraints[i]->inputs.at(offset)));
108 |           shapes[offset] = constraints[i]->shapes.at(offset);
109 |         } else {
110 |           gidx = gitr->second;
111 |         }
112 |         auto slot = cmap.find(gidx);
113 |         if (slot != cmap.end()) {
114 |           slot->second.push_back(i);
115 |         } else {
116 |           cmap.emplace(std::make_pair(gidx, std::vector<size_t>{i}));
117 |         }
118 |         // save the mapping between the local index (i.e., where the JIT'ed
119 |         // function is going to read the input from) and the global index
120 |         // (i.e., where the current value corresponding to the input byte
121 |         // is stored in MutInput)
122 |         cm->input_args[lidx].second = gidx;
123 | 
124 |         // check if the input bytes are consecutive
125 |         // using std::map ensures that the offsets (keys) are sorted
126 |         if (last_offset != -1 && last_offset + 1 != offset) {
127 |           cm->i2s_feasible = false;
128 |         }
129 |         last_offset = offset;
130 |       }
131 |       // FIXME: only support up to 64-bit for now
132 |       if (constraints[i]->local_map.size() > 8) {
133 |         cm->i2s_feasible = false;
134 |       }
135 | 
136 |       // update the number of required constants in the input array
137 |       if (max_const_num < constraints[i]->const_num)
138 |         max_const_num = constraints[i]->const_num;
139 | 
140 |       // insert the constraint metadata
141 |       consmeta.push_back(std::move(cm));
142 |     }
143 | 
144 |     // allocate the input array, reserver 2 for comparison operands a,b
145 |     scratch_args = (uint64_t*)aligned_alloc(sizeof(*scratch_args),
146 |         (2 + inputs.size() + max_const_num + 1) * sizeof(*scratch_args));
147 |     orig_distances.resize(constraints.size(), 0);
148 |     distances.resize(constraints.size(), 0);
149 |   }
150 | 
151 |   void load_hint() { // load hint
152 |     for (auto itr = inputs.begin(); itr != inputs.end(); itr++) {
153 |       auto got = hint_solution->find(itr->first);
154 |       if (got != hint_solution->end()) 
155 |         itr->second = got->second; 
156 |     }
157 |   }
158 | 
159 | };
160 | 
161 | #endif // TEST_H_


--------------------------------------------------------------------------------
/util.cc:
--------------------------------------------------------------------------------
  1 | #include "rgd_op.h"
  2 | #include <sys/time.h>
  3 | #include <sys/types.h>
  4 | #include <sys/stat.h>
  5 | #include "rgd.pb.h"
  6 | #include <google/protobuf/io/zero_copy_stream_impl.h>
  7 | #include <fcntl.h>
  8 | #include <unistd.h>
  9 | 
 10 | using namespace google::protobuf::io;
 11 | using namespace rgd;
 12 | 
 13 | const uint64_t kUsToS = 1000000;
 14 | 
 15 | uint64_t getTimeStamp() {
 16 |   struct timeval tv;
 17 |   gettimeofday(&tv, NULL);
 18 |   return tv.tv_sec * kUsToS + tv.tv_usec;
 19 | }
 20 | 
 21 | static std::string get_name(uint32_t kind) {
 22 |   switch (kind) {
 23 |     case rgd::Bool: return "bool";
 24 |     case rgd::Constant: return "constant";
 25 |     case rgd::Read: return "read";
 26 |     case rgd::Concat: return "concat";
 27 |     case rgd::Extract: return "extract";
 28 | 
 29 |     case rgd::ZExt: return "zext";
 30 |     case rgd::SExt: return "sext";
 31 | 
 32 |     // Arithmetic
 33 |     case rgd::Add:  return "add";
 34 |     case rgd::Sub:  return "sub";
 35 |     case rgd::Mul:  return "mul";
 36 |     case rgd::UDiv:  return "udiv";
 37 |     case rgd::SDiv:  return "sdiv";
 38 |     case rgd::URem:  return "urem";
 39 |     case rgd::SRem:  return "srem";
 40 |     case rgd::Neg:  return "neg";
 41 | 
 42 |     // Bit
 43 |     case rgd::Not: return "not";
 44 |     case rgd::And: return "and";
 45 |     case rgd::Or: return "or";
 46 |     case rgd::Xor: return "xor";
 47 |     case rgd::Shl: return "shl";
 48 |     case rgd::LShr: return "lshr";
 49 |     case rgd::AShr: return "ashr";
 50 | 
 51 |     // Compare
 52 |     case rgd::Equal: return "equal";
 53 |     case rgd::Distinct: return "distinct";
 54 |     case rgd::Ult: return "ult";
 55 |     case rgd::Ule: return "ule";
 56 |     case rgd::Ugt: return "ugt";
 57 |     case rgd::Uge: return "uge";
 58 |     case rgd::Slt: return "slt";
 59 |     case rgd::Sle: return "sle";
 60 |     case rgd::Sgt: return "sgt";
 61 |     case rgd::Sge: return "sge";
 62 | 
 63 |     // Logical
 64 |     case rgd::LOr: return "lor";
 65 |     case rgd::LAnd: return "land";
 66 |     case rgd::LNot: return "lnot";
 67 | 
 68 |     // Special
 69 |     case rgd::Ite: return "ite";
 70 |     case rgd::Memcmp: return "memcmp";
 71 | 
 72 |     default: return "unknown";
 73 |   }
 74 | }
 75 | 
 76 | static void do_print(const AstNode* req) {
 77 |   std::cerr << get_name(req->kind()) << "(";
 78 |   //std::cerr << req->name() << "(";
 79 |   std::cerr << "width=" << req->bits() << ",";
 80 |   //std::cerr << " hash=" << req->hash() << ",";
 81 |   std::cerr << " label=" << req->label() << ",";
 82 |   //std::cerr << " hash=" << req->hash() << ",";
 83 |   if (req->kind() == rgd::Bool) {
 84 |     std::cerr << req->value();
 85 |   }
 86 |   if (req->kind() == rgd::Constant) {
 87 |     std::cerr << req->value() << ", ";
 88 |   //  std::cerr << req->index();
 89 |   }
 90 |   if (req->kind() == rgd::Memcmp) {
 91 |     std::cerr << req->value() << ", ";
 92 |   //  std::cerr << req->index();
 93 |   }
 94 |   if (req->kind() == rgd::Read || req->kind() == rgd::Extract) {
 95 |     std::cerr << req->index() << ", ";
 96 |   }
 97 |   for(int i = 0; i < req->children_size(); i++) {
 98 |     do_print(&req->children(i));
 99 |     if (i != req->children_size() - 1)
100 |       std::cerr << ", ";
101 |   }
102 |   std::cerr << ")";
103 | }
104 | 
105 | static void verbose_do_print(int depth,const AstNode* req) {
106 |   for (int i = 0; i< depth;i++)
107 |     std::cerr << "\t";
108 |   std::cerr << req->name() << "(";
109 | //  std::cerr << "width=" << req->bits() << ",";
110 | //  std::cerr << " hash=" << req->hash() << ",";
111 |   std::cerr << "label=" << req->label() << ",";
112 |   if (req->kind() == rgd::Bool) {
113 |     std::cerr << req->value();
114 |   }
115 |   if (req->kind() == rgd::Constant) {
116 |     std::cerr << req->value();
117 |   }
118 |   if (req->kind() == rgd::Read) {
119 |     std::cerr << req->index();
120 |   }
121 |   std::cerr << std::endl;
122 |   for(int i = 0; i < req->children_size(); i++) {
123 |     verbose_do_print(depth+1,&req->children(i));
124 |     if (i != req->children_size() - 1)
125 |       std::cerr << ", ";
126 |   }
127 |   for(int i = 0; i< depth;i++)
128 |     std::cerr << "\t";
129 |   std::cerr << std::endl;
130 |   std::cerr << ")";
131 | }
132 | 
133 | void printExpression(const AstNode* req) {
134 |   do_print(req);
135 |   std::cerr << std::endl;
136 | }
137 | 
138 | 
139 | static bool writeDelimitedTo(
140 |     const google::protobuf::MessageLite& message,
141 |     google::protobuf::io::ZeroCopyOutputStream* rawOutput) {
142 |   // We create a new coded stream for each message.  Don't worry, this is fast.
143 |   google::protobuf::io::CodedOutputStream output(rawOutput);
144 | 
145 |   // Write the size.
146 |   const int size = message.ByteSizeLong();
147 |   output.WriteVarint32(size);
148 | 
149 |   uint8_t* buffer = output.GetDirectBufferForNBytesAndAdvance(size);
150 |   if (buffer != NULL) {
151 |     // Optimization:  The message fits in one buffer, so use the faster
152 |     // direct-to-array serialization path.
153 |     message.SerializeWithCachedSizesToArray(buffer);
154 |   } else {
155 |     // Slightly-slower path when the message is multiple buffers.
156 |     message.SerializeWithCachedSizes(&output);
157 |     if (output.HadError()) return false;
158 |   }
159 | 
160 |   return true;
161 | }
162 | 
163 | bool saveRequest(
164 |       const google::protobuf::MessageLite& message, 
165 |       const char* path) {
166 |     mode_t mode = S_IRUSR | S_IWUSR;
167 |     int fd = open(path, O_CREAT | O_WRONLY | O_APPEND, mode);
168 |     ZeroCopyOutputStream* rawOutput = new google::protobuf::io::FileOutputStream(fd);
169 |     bool suc = writeDelimitedTo(message,rawOutput);
170 |     delete rawOutput;
171 |     sync();
172 |     close(fd);
173 |     return suc;
174 | }
175 | 
176 | 


--------------------------------------------------------------------------------
/util.h:
--------------------------------------------------------------------------------
1 | #ifndef UTIL_H_
2 | #define UTIL_H_
3 | #include "rgd.pb.h"
4 | using namespace rgd;
5 | bool saveRequest(const google::protobuf::MessageLite& message,
6 | 								 const char* path);
7 | void printExpression(const AstNode* req);
8 | #endif
9 | 


--------------------------------------------------------------------------------
/wheels/lockfreehash/cuckoo/Makefile:
--------------------------------------------------------------------------------
1 | all:
2 | 	g++ main.cpp lockfree_hash_table.cpp -pthread -std=c++11
3 | clean:
4 | 	rm -rf a.out
5 | 


--------------------------------------------------------------------------------
/wheels/lockfreehash/cuckoo/benchmark_lockfree_ht.h:
--------------------------------------------------------------------------------
  1 | #ifndef BENCHMARK_LOCKFREE_HT
  2 | #define BENCHMARK_LOCKFREE_HT
  3 | 
  4 | #include <unordered_map>
  5 | #include <iostream>
  6 | #include <random>
  7 | #include <algorithm>
  8 | #include <pthread.h>
  9 | #include <array>
 10 | #include <unordered_map>
 11 | 
 12 | #include "cycle_timer.h"
 13 | #include "lockfree_hash_table.h"
 14 | #include "thread_service.h"
 15 | 
 16 | #define NUM_ITERS   3
 17 | #define MAX_THREADS 24
 18 | 
 19 | #define C_NUM_ELEMS 500
 20 | 
 21 | class BenchmarkLockFreeHT
 22 | {
 23 |   public:
 24 |     BenchmarkLockFreeHT(int op_count, int capacity, 
 25 |                         int rweight, int idweight,
 26 |                         int thread_count,
 27 |                         double load_factor);
 28 | 
 29 |     void benchmark_correctness();
 30 |     void benchmark_hp();
 31 |     void benchmark_all();
 32 |     void run();
 33 | 
 34 |   private:
 35 |     int    m_rweight;
 36 |     int    m_idweight;
 37 | 
 38 |     int    m_thread_count;
 39 |     int    m_op_count;
 40 |     int    m_capacity;
 41 |     double m_load_factor;
 42 | };
 43 | 
 44 | BenchmarkLockFreeHT::BenchmarkLockFreeHT(int op_count, int capacity, 
 45 |                                          int rweight, int idweight,
 46 |                                          int thread_count, double load_factor)
 47 | {
 48 |   std::cout << "*** BENCHMARKING LockFreeHT ***" << std::endl;
 49 |   m_op_count     = op_count;
 50 |   m_load_factor  = load_factor; 
 51 |   m_capacity     = capacity;
 52 |   m_thread_count = thread_count;
 53 | 
 54 |   m_rweight      = rweight;
 55 |   m_idweight     = idweight;
 56 | }
 57 | 
 58 | void BenchmarkLockFreeHT::benchmark_correctness()
 59 | {
 60 |   bool correct = true;
 61 | 
 62 |   Lockfree_hash_table ht(2 * C_NUM_ELEMS, m_thread_count);
 63 |   std::unordered_map<int, int> map;
 64 |   map.reserve(2 * C_NUM_ELEMS);
 65 |   
 66 |   std::random_device                 rd;
 67 |   std::mt19937                       mt(rd());
 68 |   std::uniform_int_distribution<int> rng;
 69 | 
 70 |   int elems[C_NUM_ELEMS];
 71 |   for (int i = 0; i < C_NUM_ELEMS; i++)
 72 |   {
 73 |     int k = rng(mt);
 74 |     elems[i] = k;
 75 |     map[k] = k;
 76 |   }
 77 |   
 78 |   pthread_t  workers[MAX_THREADS];
 79 |   WorkerArgs args[MAX_THREADS];
 80 | 
 81 |   for (int i = 0; i < 2; i++)
 82 |   {
 83 |     args[i].num_elems = C_NUM_ELEMS / 2;
 84 |     args[i].ht_p      = (void*)&ht;
 85 |     args[i].elems     = elems;
 86 |     args[i].start     = i * (C_NUM_ELEMS / 2);
 87 |     args[i].tid       = i;
 88 | 
 89 |     pthread_create(&workers[i], NULL, thread_insert<Lockfree_hash_table>, (void*)&args[i]);
 90 |   }
 91 | 
 92 |   for (int i = 0; i < 2; i++)
 93 |   {
 94 |     pthread_join(workers[i], NULL);
 95 |   }
 96 | 
 97 |   int count = 0;
 98 |   for (std::pair<int, int> e : map)
 99 |   {
100 |     std::pair<int, bool> r = ht.search(e.first, 0);
101 |     if (!r.second || e.second != r.first)
102 |     {
103 | 
104 |       std::cout << "\t" << "Expected value, Received value, Received result = " << e.second << " " << r.second << " "<< r.first << std::endl;
105 |       correct = false;
106 |       count++;
107 |     }
108 |   }
109 | 
110 |   std::cout << "\t" << count << "/" << C_NUM_ELEMS << " errors" << std::endl;
111 | 
112 |   if (correct)
113 |     std::cout << "\t" << "Correctness test passed" << std::endl;
114 |   else
115 |     std::cout << "\t" << "Correctness test failed" << std::endl;
116 | 
117 | }
118 | 
119 | void BenchmarkLockFreeHT::benchmark_hp()
120 | {
121 |   Lockfree_hash_table ht(400000, m_thread_count);
122 | 
123 |   std::random_device                 rd;
124 |   std::mt19937                       mt(rd());
125 |   std::uniform_int_distribution<int> rng;
126 | 
127 |   std::array<int, 3> weights;
128 |   weights[0] = m_rweight;
129 |   weights[1] = m_idweight;
130 |   weights[2] = m_idweight;
131 | 
132 |   std::default_random_engine         g;
133 |   std::discrete_distribution<int>    drng(weights.begin(), weights.end());
134 | 
135 |   int insert[200000];
136 |   for (int i = 0; i < 200000; i++)
137 |   {
138 |     int k = rng(mt);
139 |     int v = rng(mt);
140 |     insert[i] = k;
141 |     ht.insert(k, v, 0);
142 |   }
143 |   
144 |   pthread_t  workers[MAX_THREADS];
145 |   WorkerArgs args[MAX_THREADS];
146 | 
147 |   int num_elems = 200000 / m_thread_count;
148 |   for (int i = 0; i < m_thread_count; i++)
149 |   {
150 |     args[i].num_elems = num_elems;
151 |     args[i].ht_p      = (void*)&ht;
152 |     args[i].elems     = insert;
153 |     args[i].start     = i * num_elems;
154 |     args[i].tid       = i;
155 |     args[i].remove    = i < (m_thread_count / 4);
156 | 
157 |     pthread_create(&workers[i], NULL, thread_remove<Lockfree_hash_table>, (void*)&args[i]);
158 |   }
159 |   
160 |   for (int i = 0; i < m_thread_count; i++)
161 |   {
162 |     pthread_join(workers[i], NULL);
163 |   }
164 |    
165 |   std::cout << "\t" << "Hazard Pointer test passed" << std::endl;
166 | 
167 | }
168 | 
169 | void BenchmarkLockFreeHT::benchmark_all()
170 | {
171 |     Lockfree_hash_table ht(m_capacity, m_thread_count);
172 | 
173 |     std::random_device                 rd;
174 |     std::mt19937                       mt(rd());
175 |     std::uniform_int_distribution<int> rng;
176 | 
177 |     std::array<int, 3> weights;
178 |     weights[0] = m_rweight;
179 |     weights[1] = m_idweight;
180 |     weights[2] = m_idweight;
181 | 
182 |     std::default_random_engine         g;
183 |     std::discrete_distribution<int>    drng(weights.begin(), weights.end());
184 | 
185 |     // Warm-up table to load factor
186 |     int num_warmup = static_cast<int>(static_cast<double>(m_capacity) * m_load_factor);
187 |     for (int i = 0; i < num_warmup; i++)
188 |     {
189 |       int k = rng(mt); 
190 |       int v = rng(mt);
191 | 
192 |       ht.insert(k, v, 0);
193 |     }
194 | 
195 |     // Run benchmark
196 |     std::vector<double> results;
197 |     for (int iter = 0; iter < NUM_ITERS; iter++)
198 |     {
199 |       int num_elems = m_op_count / m_thread_count;
200 |       pthread_t  workers[MAX_THREADS];
201 |       WorkerArgs args[MAX_THREADS];
202 | 
203 |       double start = CycleTimer::currentSeconds();
204 |       for (int i = 0; i < m_thread_count; i++)
205 |       {
206 |         args[i].num_elems = num_elems;
207 |         args[i].rweight   = m_rweight;
208 |         args[i].iweight   = m_idweight / 2;
209 |         args[i].dweight   = m_idweight / 2;
210 |         args[i].ht_p      = (void*)&ht;
211 |         args[i].tid       = i;
212 |         pthread_create(&workers[i], NULL, thread_service<Lockfree_hash_table>, (void*)&args[i]);
213 |       }
214 | 
215 |       for (int i = 0; i < m_thread_count; i++)
216 |       {
217 |         pthread_join(workers[i], NULL);
218 |       }
219 |       double time  = CycleTimer::currentSeconds() - start;
220 |       results.push_back(time);
221 |     }
222 | 
223 |     // Publish Results
224 |     double best_time = *std::min_element(results.begin(), results.end());
225 |     double avg_time  = std::accumulate(results.begin(), results.end(), 0.0) / static_cast<double>(results.size());
226 |     std::cout << "\t" << "Max Throughput: " << m_op_count / best_time / 1000.0 << " ops/ms" << std::endl;
227 |     std::cout << "\t" << "Avg Throughput: " << m_op_count / avg_time  / 1000.0 << " ops/ms" << std::endl;
228 | 
229 |     results.clear();
230 | 
231 |     int* keys = new int[m_op_count];
232 | 
233 |     for (int iter = 0; iter < NUM_ITERS; iter++)
234 |     {
235 |       int num_elems = m_op_count / m_thread_count;
236 |       pthread_t  workers[MAX_THREADS];
237 |       WorkerArgs args[MAX_THREADS];
238 | 
239 |       double start = CycleTimer::currentSeconds();
240 |       for (int i = 0; i < m_thread_count; i++)
241 |       {
242 |         args[i].num_elems = num_elems;
243 |         args[i].rweight   = m_rweight;
244 |         args[i].iweight   = m_idweight / 2;
245 |         args[i].dweight   = m_idweight / 2;
246 |         args[i].ht_p      = (void*)&ht;
247 |         args[i].tid       = i;
248 |         args[i].elems     = keys;
249 |         args[i].start     = i * num_elems;
250 |         pthread_create(&workers[i], NULL, thread_service_low_contention<Lockfree_hash_table>, (void*)&args[i]);
251 |       }
252 | 
253 |       for (int i = 0; i < m_thread_count; i++)
254 |       {
255 |         pthread_join(workers[i], NULL);
256 |       }
257 |       double time  = CycleTimer::currentSeconds() - start;
258 |       results.push_back(time);
259 |     }
260 | 
261 |     // Publish Results
262 |     best_time = *std::min_element(results.begin(), results.end());
263 |     avg_time  = std::accumulate(results.begin(), results.end(), 0.0) / static_cast<double>(results.size());
264 |     std::cout << "\t" << "Max Throughput (Low): " << m_op_count / best_time / 1000.0 << " ops/ms" << std::endl;
265 |     std::cout << "\t" << "Avg Throughput (Low): " << m_op_count / avg_time  / 1000.0 << " ops/ms" << std::endl;
266 | 
267 |     results.clear();
268 | 
269 |     for (int iter = 0; iter < NUM_ITERS; iter++)
270 |     {
271 |       int num_elems = m_op_count / m_thread_count;
272 |       pthread_t  workers[MAX_THREADS];
273 |       WorkerArgs args[MAX_THREADS];
274 | 
275 |       double start = CycleTimer::currentSeconds();
276 |       for (int i = 0; i < m_thread_count; i++)
277 |       {
278 |         args[i].num_elems = num_elems;
279 |         args[i].rweight   = m_rweight;
280 |         args[i].iweight   = m_idweight / 2;
281 |         args[i].dweight   = m_idweight / 2;
282 |         args[i].ht_p      = (void*)&ht;
283 |         args[i].tid       = i;
284 |         ht.insert(0, 0, 0);
285 |         pthread_create(&workers[i], NULL, thread_service_high_contention<Lockfree_hash_table>, (void*)&args[i]);
286 |       }
287 | 
288 |       for (int i = 0; i < m_thread_count; i++)
289 |       {
290 |         pthread_join(workers[i], NULL);
291 |       }
292 |       double time  = CycleTimer::currentSeconds() - start;
293 |       results.push_back(time);
294 |     }
295 | 
296 |     // Publish Results
297 |     best_time = *std::min_element(results.begin(), results.end());
298 |     avg_time  = std::accumulate(results.begin(), results.end(), 0.0) / static_cast<double>(results.size());
299 |     std::cout << "\t" << "Max Throughput (High): " << m_op_count / best_time / 1000.0 << " ops/ms" << std::endl;
300 |     std::cout << "\t" << "Avg Throughput (High): " << m_op_count / avg_time  / 1000.0 << " ops/ms" << std::endl;
301 | 
302 | 
303 | }
304 | 
305 | void BenchmarkLockFreeHT::run()
306 | {
307 |   benchmark_correctness();
308 |   benchmark_hp();
309 |   benchmark_all();
310 | }
311 | 
312 | #endif
313 | 


--------------------------------------------------------------------------------
/wheels/lockfreehash/cuckoo/benchmark_unordered_map.h:
--------------------------------------------------------------------------------
  1 | #ifndef BENCHMARK_UNORDERED_MAP
  2 | #define BENCHMARK_UNORDERED_MAP
  3 | 
  4 | #include <unordered_map>
  5 | #include <iostream>
  6 | #include <random>
  7 | #include <algorithm>
  8 | #include <array>
  9 | 
 10 | #include "cycle_timer.h"
 11 | 
 12 | #define NUM_ITERS 3
 13 | 
 14 | class BenchmarkUnorderedMap
 15 | {
 16 |   public:
 17 |     BenchmarkUnorderedMap(int op_count, int capacity, 
 18 |                           int rweight, int idweight, 
 19 |                           double load_factor);
 20 | 
 21 |     void benchmark_all();
 22 |     void run();
 23 |   private:
 24 |     int    m_rweight;
 25 |     int    m_idweight;
 26 | 
 27 |     int    m_op_count;
 28 |     int    m_capacity;
 29 |     double m_load_factor;
 30 | };
 31 | 
 32 | BenchmarkUnorderedMap::BenchmarkUnorderedMap(int op_count, int capacity, 
 33 |                                              int rweight, int idweight,
 34 |                                              double load_factor)
 35 | {
 36 |   std::cout << "*** BENCHMARKING UnorderedMap ***" << std::endl;
 37 |   m_op_count    = op_count;
 38 |   m_load_factor = load_factor; 
 39 |   m_capacity    = capacity;
 40 | 
 41 |   m_rweight     = rweight;
 42 |   m_idweight    = idweight;
 43 | }
 44 | 
 45 | void BenchmarkUnorderedMap::benchmark_all()
 46 | {
 47 |     std::unordered_map<int, int> map;
 48 |     map.reserve(m_capacity);
 49 | 
 50 |     std::random_device                 rd;
 51 |     std::mt19937                       mt(rd());
 52 |     std::uniform_int_distribution<int> rng;
 53 | 
 54 |     std::array<int, 3> weights;
 55 |     weights[0] = m_rweight;
 56 |     weights[1] = m_idweight;
 57 |     weights[2] = m_idweight;
 58 | 
 59 |     std::default_random_engine         g;
 60 |     std::discrete_distribution<int>    drng(weights.begin(), weights.end());
 61 | 
 62 |     // Warm-up table to load factor
 63 |     int num_warmup = static_cast<int>(static_cast<double>(m_capacity) * m_load_factor);
 64 |     for (int i = 0; i < num_warmup; i++)
 65 |     {
 66 |       int k = rng(mt); 
 67 |       int v = rng(mt);
 68 |       map[k] = v;
 69 |     }
 70 | 
 71 |     // Run benchmark (single-threaded)
 72 |     std::vector<double> results;
 73 |     for (int iter = 0; iter < NUM_ITERS; iter++)
 74 |     {
 75 |       double start = CycleTimer::currentSeconds();
 76 |       for (int i = 0; i < m_op_count; i++)
 77 |       {
 78 |         int k = rng(mt);
 79 |         int v = rng(mt);
 80 |         int a = drng(g);
 81 | 
 82 |         if (a == 0)
 83 |           map.find(k);
 84 |         else if (a == 1)
 85 |           map[k] = v;
 86 |         else
 87 |           map.erase(k);
 88 |       }
 89 |       double time  = CycleTimer::currentSeconds() - start;
 90 |       results.push_back(time);
 91 |     }
 92 | 
 93 |     // Publish Results
 94 |     double best_time = *std::min_element(results.begin(), results.end());
 95 |     double avg_time  = std::accumulate(results.begin(), results.end(), 0.0) / static_cast<double>(results.size());
 96 |     std::cout << "\t" << "Max Throughput: " << static_cast<double>(m_op_count) / best_time / 1000.0 << " ops/ms" << std::endl;
 97 |     std::cout << "\t" << "Avg Throughput: " << static_cast<double>(m_op_count) / avg_time  / 1000.0 << " ops/ms" << std::endl;
 98 | 
 99 |     results.clear();
100 |     int *keys = new int[m_op_count];
101 |     int s = 0;
102 |     int e = 0;
103 |     for (int iter = 0; iter < NUM_ITERS; iter++)
104 |     {
105 |       double start = CycleTimer::currentSeconds();
106 |       for (int i = 0; i < m_op_count; i++)
107 |       {
108 |         int k = rng(mt);
109 |         int v = rng(mt);
110 |         int a = drng(g);
111 | 
112 |         if (s == e || a == 1) {
113 |           map[k] = v;
114 |           keys[e++] = k;
115 |         } else if (a == 0) {
116 |           map.find(keys[k % (e - s) + s]);
117 |         } else {
118 |           map.erase(keys[s++]);
119 |         }
120 |       }
121 |       double time  = CycleTimer::currentSeconds() - start;
122 |       results.push_back(time);
123 |     }
124 | 
125 |     // Publish Results
126 |     best_time = *std::min_element(results.begin(), results.end());
127 |     avg_time  = std::accumulate(results.begin(), results.end(), 0.0) / static_cast<double>(results.size());
128 |     std::cout << "\t" << "Max Throughput (Low): " << static_cast<double>(m_op_count) / best_time / 1000.0 << " ops/ms" << std::endl;
129 |     std::cout << "\t" << "Avg Throughput (Low): " << static_cast<double>(m_op_count) / avg_time  / 1000.0 << " ops/ms" << std::endl;
130 | 
131 |     results.clear();
132 |     for (int iter = 0; iter < NUM_ITERS; iter++)
133 |     {
134 |       double start = CycleTimer::currentSeconds();
135 |       map[0] = 0;
136 |       for (int i = 0; i < m_op_count; i++)
137 |       {
138 |         int x = map[0];
139 |       }
140 |       double time  = CycleTimer::currentSeconds() - start;
141 |       results.push_back(time);
142 |     }
143 | 
144 |     // Publish Results
145 |     best_time = *std::min_element(results.begin(), results.end());
146 |     avg_time  = std::accumulate(results.begin(), results.end(), 0.0) / static_cast<double>(results.size());
147 |     std::cout << "\t" << "Max Throughput (High): " << static_cast<double>(m_op_count) / best_time / 1000.0 << " ops/ms" << std::endl;
148 |     std::cout << "\t" << "Avg Throughput (High): " << static_cast<double>(m_op_count) / avg_time  / 1000.0 << " ops/ms" << std::endl;
149 | }
150 | 
151 | void BenchmarkUnorderedMap::run()
152 | {
153 |   benchmark_all();
154 | }
155 | 
156 | #endif
157 | 


--------------------------------------------------------------------------------
/wheels/lockfreehash/cuckoo/cycle_timer.h:
--------------------------------------------------------------------------------
  1 | #ifndef _SYRAH_CYCLE_TIMER_H_
  2 | #define _SYRAH_CYCLE_TIMER_H_
  3 | 
  4 | #if defined(__APPLE__)
  5 |   #if defined(__x86_64__)
  6 |     #include <sys/sysctl.h>
  7 |   #else
  8 |     #include <mach/mach.h>
  9 |     #include <mach/mach_time.h>
 10 |   #endif // __x86_64__ or not
 11 | 
 12 |   #include <stdio.h>  // fprintf
 13 |   #include <stdlib.h> // exit
 14 | 
 15 | #elif _WIN32
 16 | #  include <windows.h>
 17 | #  include <time.h>
 18 | #else
 19 | #  include <stdio.h>
 20 | #  include <stdlib.h>
 21 | #  include <string.h>
 22 | #  include <sys/time.h>
 23 | #endif
 24 | 
 25 | 
 26 |   // This uses the cycle counter of the processor.  Different
 27 |   // processors in the system will have different values for this.  If
 28 |   // you process moves across processors, then the delta time you
 29 |   // measure will likely be incorrect.  This is mostly for fine
 30 |   // grained measurements where the process is likely to be on the
 31 |   // same processor.  For more global things you should use the
 32 |   // Time interface.
 33 | 
 34 |   // Also note that if you processors' speeds change (i.e. processors
 35 |   // scaling) or if you are in a heterogenous environment, you will
 36 |   // likely get spurious results.
 37 |   class CycleTimer {
 38 |   public:
 39 |     typedef unsigned long long SysClock;
 40 | 
 41 |     //////////
 42 |     // Return the current CPU time, in terms of clock ticks.
 43 |     // Time zero is at some arbitrary point in the past.
 44 |     static SysClock currentTicks() {
 45 | #if defined(__APPLE__) && !defined(__x86_64__)
 46 |       return mach_absolute_time();
 47 | #elif defined(_WIN32)
 48 |       LARGE_INTEGER qwTime;
 49 |       QueryPerformanceCounter(&qwTime);
 50 |       return qwTime.QuadPart;
 51 | #elif defined(__x86_64__)
 52 |       unsigned int a, d;
 53 |       asm volatile("rdtsc" : "=a" (a), "=d" (d));
 54 |       return static_cast<unsigned long long>(a) |
 55 |         (static_cast<unsigned long long>(d) << 32);
 56 | #elif defined(__ARM_NEON__) && 0 // mrc requires superuser.
 57 |       unsigned int val;
 58 |       asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r"(val));
 59 |       return val;
 60 | #else
 61 |       timespec spec;
 62 |       clock_gettime(CLOCK_THREAD_CPUTIME_ID, &spec);
 63 |       return CycleTimer::SysClock(static_cast<float>(spec.tv_sec) * 1e9 + static_cast<float>(spec.tv_nsec));
 64 | #endif
 65 |     }
 66 | 
 67 |     //////////
 68 |     // Return the current CPU time, in terms of seconds.
 69 |     // This is slower than currentTicks().  Time zero is at
 70 |     // some arbitrary point in the past.
 71 |     static double currentSeconds() {
 72 |       return currentTicks() * secondsPerTick();
 73 |     }
 74 | 
 75 |     //////////
 76 |     // Return the conversion from seconds to ticks.
 77 |     static double ticksPerSecond() {
 78 |       return 1.0/secondsPerTick();
 79 |     }
 80 | 
 81 |     static const char* tickUnits() {
 82 | #if defined(__APPLE__) && !defined(__x86_64__)
 83 |       return "ns";
 84 | #elif defined(__WIN32__) || defined(__x86_64__)
 85 |       return "cycles";
 86 | #else
 87 |       return "ns"; // clock_gettime
 88 | #endif
 89 |     }
 90 | 
 91 |     //////////
 92 |     // Return the conversion from ticks to seconds.
 93 |     static double secondsPerTick() {
 94 |       static bool initialized = false;
 95 |       static double secondsPerTick_val;
 96 |       if (initialized) return secondsPerTick_val;
 97 | #if defined(__APPLE__)
 98 |   #ifdef __x86_64__
 99 |       int args[] = {CTL_HW, HW_CPU_FREQ};
100 |       unsigned int Hz;
101 |       size_t len = sizeof(Hz);
102 |       if (sysctl(args, 2, &Hz, &len, NULL, 0) != 0) {
103 |          fprintf(stderr, "Failed to initialize secondsPerTick_val!\n");
104 |          exit(-1);
105 |       }
106 |       secondsPerTick_val = 1.0 / (double) Hz;
107 |   #else
108 |       mach_timebase_info_data_t time_info;
109 |       mach_timebase_info(&time_info);
110 | 
111 |       // Scales to nanoseconds without 1e-9f
112 |       secondsPerTick_val = (1e-9*static_cast<double>(time_info.numer))/
113 |         static_cast<double>(time_info.denom);
114 |   #endif // x86_64 or not
115 | #elif defined(_WIN32)
116 |       LARGE_INTEGER qwTicksPerSec;
117 |       QueryPerformanceFrequency(&qwTicksPerSec);
118 |       secondsPerTick_val = 1.0/static_cast<double>(qwTicksPerSec.QuadPart);
119 | #else
120 |       FILE *fp = fopen("/proc/cpuinfo","r");
121 |       char input[1024];
122 |       if (!fp) {
123 |          fprintf(stderr, "CycleTimer::resetScale failed: couldn't find /proc/cpuinfo.");
124 |          exit(-1);
125 |       }
126 |       // In case we don't find it, e.g. on the N900
127 |       secondsPerTick_val = 1e-9;
128 |       while (!feof(fp) && fgets(input, 1024, fp)) {
129 |         // NOTE(boulos): Because reading cpuinfo depends on dynamic
130 |         // frequency scaling it's better to read the @ sign first
131 |         float GHz, MHz;
132 |         if (strstr(input, "model name")) {
133 |           char* at_sign = strstr(input, "@");
134 |           if (at_sign) {
135 |             char* after_at = at_sign + 1;
136 |             char* GHz_str = strstr(after_at, "GHz");
137 |             char* MHz_str = strstr(after_at, "MHz");
138 |             if (GHz_str) {
139 |               *GHz_str = '\0';
140 |               if (1 == sscanf(after_at, "%f", &GHz)) {
141 |                 //printf("GHz = %f\n", GHz);
142 |                 secondsPerTick_val = 1e-9f / GHz;
143 |                 break;
144 |               }
145 |             } else if (MHz_str) {
146 |               *MHz_str = '\0';
147 |               if (1 == sscanf(after_at, "%f", &MHz)) {
148 |                 //printf("MHz = %f\n", MHz);
149 |                 secondsPerTick_val = 1e-6f / MHz;
150 |                 break;
151 |               }
152 |             }
153 |           }
154 |         } else if (1 == sscanf(input, "cpu MHz : %f", &MHz)) {
155 |           //printf("MHz = %f\n", MHz);
156 |           secondsPerTick_val = 1e-6f / MHz;
157 |           break;
158 |         }
159 |       }
160 |       fclose(fp);
161 | #endif
162 | 
163 |       initialized = true;
164 |       return secondsPerTick_val;
165 |     }
166 | 
167 |     //////////
168 |     // Return the conversion from ticks to milliseconds.
169 |     static double msPerTick() {
170 |       return secondsPerTick() * 1000.0;
171 |     }
172 | 
173 |   private:
174 |     CycleTimer();
175 |   };
176 | 
177 | #endif // #ifndef _SYRAH_CYCLE_TIMER_H_
178 | 


--------------------------------------------------------------------------------
/wheels/lockfreehash/cuckoo/hash_table.h:
--------------------------------------------------------------------------------
 1 | #ifndef HASH_TABLE
 2 | #define HASH_TABLE
 3 | 
 4 | #include <utility>
 5 | 
 6 | struct Hash_table {
 7 |   virtual std::pair<int, bool> search(int key) = 0; 
 8 |   virtual void                 insert(int key, int val) = 0;
 9 |   virtual void                 remove(int key) = 0;
10 | };
11 | 
12 | #endif
13 | 


--------------------------------------------------------------------------------
/wheels/lockfreehash/cuckoo/lockfree_hash_table.h:
--------------------------------------------------------------------------------
 1 | #ifndef LOCKFREE_HASH_TABLE
 2 | #define LOCKFREE_HASH_TABLE
 3 | 
 4 | #define MAX_BUF 256
 5 | 
 6 | #include "hash_table.h"
 7 | #include <vector>
 8 | #include <array>
 9 | 
10 | struct Hash_entry {
11 |   int key;
12 |   int val;
13 | };
14 | 
15 | // Alternate count_ptr definition using unused bits
16 | typedef Hash_entry* Count_ptr;
17 | 
18 | enum Find_result { FIRST, SECOND, NIL };
19 | 
20 | struct Lockfree_hash_table {
21 |   Lockfree_hash_table(int capacity, int thread_count);
22 |   ~Lockfree_hash_table();
23 |   
24 |   std::pair<int, bool> search(int key, int tid);
25 |   void                 insert(int key, int val, int tid);
26 |   void                 remove(int key, int tid);
27 | 
28 | private:
29 |   Count_ptr *table[2];  
30 |   int size1;
31 |   int size2;
32 | 
33 |   std::vector<std::array<Hash_entry*, MAX_BUF>>   rlist;
34 |   std::vector<int>                                rcount;
35 |   std::vector<std::array<Hash_entry*, 2>>         hp_rec;
36 | 
37 |   int hash1(int key);
38 |   int hash2(int key);
39 |   bool check_counter(int ts1, int ts2, int ts1x, int ts2x);
40 |   Find_result find(int key, Count_ptr &ptr1, Count_ptr &ptr2, int tid);
41 |   bool relocate(int which, int index, int tid);
42 |   void help_relocate(int which, int index, bool initiator, int tid);
43 |   void del_dup(int idx1, Count_ptr ptr1, int idx2, Count_ptr ptr2, int tid);
44 | 
45 |   void retire_node(Hash_entry* node, int tid);
46 |   void scan(int tid);
47 | };
48 | #endif
49 | 


--------------------------------------------------------------------------------
/wheels/lockfreehash/cuckoo/main.cpp:
--------------------------------------------------------------------------------
 1 | #include "benchmark_unordered_map.h"
 2 | #include "benchmark_lockfree_ht.h"
 3 | //#include "benchmark_tbb.h"
 4 | 
 5 | #include "thread_service.h"
 6 | #include "cycle_timer.h"
 7 | 
 8 | #include <iostream>
 9 | 
10 | #include <ctype.h>
11 | #include <stdio.h>
12 | #include <stdlib.h>
13 | #include <unistd.h>
14 | 
15 | #define DEFAULT_OP_COUNT     2000000
16 | #define DEFAULT_THREAD_COUNT 24
17 | #define DEFAULT_READ_PERCENT 90
18 | #define DEFAULT_LOAD_FACTOR  40
19 | #define CAPACITY             8000016
20 | 
21 | int main(int argc, char *argv[])
22 | {
23 |   char c;
24 |   int  op_count     = DEFAULT_OP_COUNT; 
25 |   int  num_threads  = DEFAULT_THREAD_COUNT;
26 |   int  read_percent = DEFAULT_READ_PERCENT;
27 |   int  load_factor  = DEFAULT_LOAD_FACTOR;
28 | 
29 |   char *out_file   = NULL;
30 | 
31 |   // Parse cmd args
32 |   while ((c = getopt(argc, argv, "n:t:or:hl:")) != -1)
33 |   {
34 |     switch (c)
35 |     {
36 |       case 'n':
37 |         op_count = atoi(optarg);
38 |         break;
39 |       case 't':
40 |         printf("Here");
41 |         num_threads = atoi(optarg);
42 |         break;
43 |       case 'o':
44 |         out_file = optarg;
45 |         break;
46 |       case 'r':
47 |         read_percent = atoi(optarg);
48 |         break;
49 |       case 'l':
50 |         load_factor = atoi(optarg);
51 |         break;
52 |       case 'h':
53 |         printf("Options: \n"
54 |                "-n num_elements \n"
55 |                "-t num_threads \n"
56 |                "-l load_factor \n"
57 |                "-r read_percent \n"
58 |                "-o output_file \n");
59 |         break;
60 |       default:
61 |         break;
62 |     }
63 |   }
64 | 
65 |   int    rweight  = read_percent;
66 |   int    idweight = 100 - read_percent;
67 |   double lfactor  = load_factor / 100.0;
68 | 
69 |   printf("%d", num_threads);
70 | 
71 |   // Run tests
72 |   std::cout << "*** STARTING Benchmark ***" << std::endl;
73 |   std::cout << "Parameters: " << std::endl;
74 |   std::cout << "\t" << "op_count     : " << op_count << std::endl;
75 |   std::cout << "\t" << "num_threads  : " << num_threads << std::endl;
76 |   std::cout << "\t" << "load_factor  : " << load_factor << "%" << std::endl;
77 |   std::cout << "\t" << "read_percent : " << read_percent << "%" << std::endl;
78 | 
79 |   BenchmarkUnorderedMap benchmark_unordered_map(op_count, CAPACITY, rweight, idweight, lfactor);
80 |   benchmark_unordered_map.run();
81 | 
82 | //  BenchmarkTBB benchmark_tbb(op_count, CAPACITY, rweight, idweight, num_threads, lfactor);
83 | //  benchmark_tbb.run();
84 | 
85 |   BenchmarkLockFreeHT benchmark_lockfree_ht(op_count, CAPACITY, rweight, idweight, num_threads, lfactor);
86 |   benchmark_lockfree_ht.run();
87 | 
88 | }
89 | 


--------------------------------------------------------------------------------
/wheels/lockfreehash/cuckoo/thread_service.h:
--------------------------------------------------------------------------------
  1 | #ifndef THREAD_SERVICE
  2 | #define THREAD_SERVICE
  3 | 
  4 | #include <random>
  5 | #include <array>
  6 | 
  7 | struct WorkerArgs 
  8 | {
  9 |   int    num_elems;
 10 |   // R/I/D weights, normalized to 100
 11 |   int    rweight;
 12 |   int    iweight;
 13 |   int    dweight; 
 14 |   void*  ht_p;
 15 | 
 16 |   bool   remove;
 17 |   int    tid;
 18 |   int    start;
 19 |   int*   elems;
 20 | };
 21 | 
 22 | template<typename T>
 23 | void* thread_service(void* threadArgs)
 24 | {
 25 |   WorkerArgs* args = static_cast<WorkerArgs*>(threadArgs);
 26 | 
 27 |   std::random_device                 rd;
 28 |   std::mt19937                       mt(rd());
 29 |   std::uniform_int_distribution<int> rng;
 30 | 
 31 |   std::array<int, 3> weights;
 32 |   weights[0] = args->rweight;
 33 |   weights[1] = args->iweight;
 34 |   weights[2] = args->dweight;
 35 | 
 36 |   std::default_random_engine         g;
 37 |   std::discrete_distribution<int>    drng(weights.begin(), weights.end());
 38 | 
 39 |   int tid       = args->tid;
 40 |   int num_elems = args->num_elems;
 41 |   T* ht_p = static_cast<T*>(args->ht_p);
 42 | 
 43 |   for (int i = 0; i < num_elems; i++)
 44 |   {
 45 |     // Key, Value pair
 46 |     int k = rng(mt);
 47 |     int v = rng(mt);
 48 |     // Action : 0 -> Search, 1 -> Insert, 2 -> Remove
 49 |     int a = drng(g);
 50 | 
 51 |     if (a == 0)
 52 |       ht_p->search(k, tid);
 53 |     else if (a == 1)
 54 |       ht_p->insert(k, v, tid);
 55 |     else
 56 |       ht_p->remove(k, tid);
 57 |   }
 58 | }
 59 | 
 60 | template<typename T>
 61 | void* thread_service_low_contention(void* threadArgs)
 62 | {
 63 |   WorkerArgs* args = static_cast<WorkerArgs*>(threadArgs);
 64 | 
 65 |   std::random_device                 rd;
 66 |   std::mt19937                       mt(rd());
 67 |   std::uniform_int_distribution<int> rng;
 68 | 
 69 |   std::array<int, 3> weights;
 70 |   weights[0] = args->rweight;
 71 |   weights[1] = args->iweight;
 72 |   weights[2] = args->dweight;
 73 | 
 74 |   std::default_random_engine         g;
 75 |   std::discrete_distribution<int>    drng(weights.begin(), weights.end());
 76 | 
 77 |   int tid       = args->tid;
 78 |   int num_elems = args->num_elems;
 79 |   T* ht_p = static_cast<T*>(args->ht_p);
 80 | 
 81 |   int *keys = (args->elems + args->start);
 82 | 
 83 |   int start = 0;
 84 |   int end = 0;
 85 |   for (int i = 0; i < num_elems; i++)
 86 |   {
 87 |     // Action : 0 -> Search, 1 -> Insert, 2 -> Remove
 88 |     int a = drng(g);
 89 | 
 90 |     if (start == end || a == 1) 
 91 |     {
 92 |       int k = rng(mt) % num_elems + tid * num_elems; 
 93 |       keys[end++] = k;
 94 |       ht_p->insert(k, k, tid);
 95 |     }
 96 |     else if (a == 0)
 97 |     {
 98 |       int k = rng(mt) % (end - start) + start;
 99 |       ht_p->search(k, tid);
100 |     }
101 |     else
102 |     {
103 |       int k = keys[start++];
104 |       ht_p->remove(k, tid);
105 |     }
106 |   }
107 | }
108 | 
109 | template<typename T>
110 | void* thread_service_high_contention(void* threadArgs)
111 | {
112 |   WorkerArgs* args = static_cast<WorkerArgs*>(threadArgs);
113 | 
114 |   std::random_device                 rd;
115 |   std::mt19937                       mt(rd());
116 |   std::uniform_int_distribution<int> rng;
117 | 
118 |   std::array<int, 3> weights;
119 |   weights[0] = args->rweight;
120 |   weights[1] = args->iweight;
121 |   weights[2] = args->dweight;
122 | 
123 |   std::default_random_engine         g;
124 |   std::discrete_distribution<int>    drng(weights.begin(), weights.end());
125 | 
126 |   int tid       = args->tid;
127 |   int num_elems = args->num_elems;
128 |   T* ht_p = static_cast<T*>(args->ht_p);
129 | 
130 |   for (int i = 0; i < num_elems; i++)
131 |   {
132 |     ht_p->search(0, tid);
133 |   }
134 | }
135 | 
136 | template<typename T>
137 | void* thread_insert(void* threadArgs)
138 | {
139 |   WorkerArgs* args = static_cast<WorkerArgs*>(threadArgs);
140 |   int* elems = args->elems;
141 |   T*   ht_p  = static_cast<T*>(args->ht_p);
142 |   int  start     = args->start;
143 |   int  num_elems = args->num_elems;
144 |   int  tid       = args->tid;
145 | 
146 |   for (int i = start; i < start + num_elems; i++)
147 |   {
148 |     ht_p->insert(elems[i], elems[i], tid);
149 |   }
150 |   
151 | }
152 | 
153 | template<typename T>
154 | void* thread_remove(void* threadArgs)
155 | {
156 |   WorkerArgs* args = static_cast<WorkerArgs*>(threadArgs);
157 |   int* elems = args->elems;
158 |   T*   ht_p  = static_cast<T*>(args->ht_p);
159 |   int  start     = args->start;
160 |   int  num_elems = args->num_elems;
161 |   int  tid       = args->tid;
162 |   bool remove    = args->remove;
163 |   
164 |   std::random_device                 rd;
165 |   std::mt19937                       mt(rd());
166 |   std::uniform_int_distribution<int> rng(0, 200000 - 1);
167 | 
168 |   for (int i = start; i < start + num_elems; i++)
169 |   {
170 |     if (remove)
171 |       ht_p->remove(elems[i], tid);
172 |     else
173 |       ht_p->search(elems[rng(mt)], tid);
174 |   }
175 | 
176 | }
177 | 
178 | #endif
179 | 


--------------------------------------------------------------------------------
/wheels/lockfreehash/lprobe/Makefile:
--------------------------------------------------------------------------------
1 | all:
2 | 	g++ main.cc -std=c++14 -mcx16 -march=native -pthread
3 | clean:
4 | 	rm -rf a.out
5 | 


--------------------------------------------------------------------------------
/wheels/lockfreehash/lprobe/alloc.h:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | 
  3 | namespace pbbs {
  4 |   void* my_alloc(size_t);
  5 |   void my_free(void*);
  6 | }
  7 | 
  8 | #include <atomic>
  9 | #include <vector>
 10 | #include <new>
 11 | #include "utilities.h"
 12 | #include "concurrent_stack.h"
 13 | #include "utilities.h"
 14 | #include "block_allocator.h"
 15 | #include "memory_size.h"
 16 | #include "get_time.h"
 17 | 
 18 | namespace pbbs {
 19 | 
 20 | #if defined(__APPLE__) // a little behind the times
 21 |   void* aligned_alloc(size_t, size_t n) {return malloc(n);}
 22 | #endif
 23 | 
 24 | 
 25 |   // ****************************************
 26 |   //    pool_allocator
 27 |   // ****************************************
 28 | 
 29 |   // Allocates headerless blocks from pools of different sizes.
 30 |   // A vector of pool sizes is given to the constructor.
 31 |   // Sizes must be at least 8, and must increase.
 32 |   // For pools of small blocks (below large_threshold) each thread keeps a
 33 |   //   thread local list of elements from each pool using the
 34 |   //   block_allocator.
 35 |   // For pools of large blocks there is only one shared pool for each.
 36 |   struct pool_allocator {
 37 | 
 38 |   private:
 39 |     static const size_t large_align = 64;
 40 |     static const size_t large_threshold = (1 << 20);
 41 |     size_t num_buckets;
 42 |     size_t num_small;
 43 |     size_t max_small;
 44 |     size_t max_size;
 45 |     std::atomic<long> large_allocated{0};
 46 |   
 47 |     concurrent_stack<void*>* large_buckets;
 48 |     struct block_allocator *small_allocators;
 49 |     std::vector<size_t> sizes;
 50 | 
 51 |     void* allocate_large(size_t n) {
 52 | 
 53 |       size_t bucket = num_small;
 54 |       size_t alloc_size;
 55 | 
 56 |       if (n <= max_size) {
 57 | 	while (n > sizes[bucket]) bucket++;
 58 | 	maybe<void*> r = large_buckets[bucket-num_small].pop();
 59 | 	if (r) return *r;
 60 | 	alloc_size = sizes[bucket];
 61 |       } else alloc_size = n;
 62 | 
 63 |       void* a = (void*) aligned_alloc(large_align, alloc_size);
 64 |       if (a == NULL) throw std::bad_alloc();
 65 |       
 66 |       large_allocated += n;
 67 |       return a;
 68 |     }
 69 | 
 70 |     void deallocate_large(void* ptr, size_t n) {
 71 |       if (n > max_size) { 
 72 | 	free(ptr);
 73 | 	large_allocated -= n;
 74 |       } else {
 75 | 	size_t bucket = num_small;
 76 | 	while (n > sizes[bucket]) bucket++;
 77 | 	large_buckets[bucket-num_small].push(ptr);
 78 |       }
 79 |     }
 80 | 
 81 |     const size_t small_alloc_block_size = (1 << 20);
 82 | 
 83 |   public:
 84 |     ~pool_allocator() {
 85 |       for (size_t i=0; i < num_small; i++)
 86 | 	small_allocators[i].~block_allocator();
 87 |       free(small_allocators);
 88 |       clear();
 89 |       delete[] large_buckets;
 90 |     }
 91 | 
 92 |     pool_allocator() {}
 93 |   
 94 |     pool_allocator(std::vector<size_t> const &sizes) : sizes(sizes) {
 95 |       timer t;
 96 |       num_buckets = sizes.size();
 97 |       max_size = sizes[num_buckets-1];
 98 |       num_small = 0;
 99 |       while (sizes[num_small] < large_threshold && num_small < num_buckets)
100 | 	num_small++;
101 |       max_small = (num_small > 0) ? sizes[num_small - 1] : 0;
102 | 
103 |       large_buckets = new concurrent_stack<void*>[num_buckets-num_small];
104 | 
105 |       small_allocators = (struct block_allocator*)
106 | 	malloc(num_buckets * sizeof(struct block_allocator));
107 |       size_t prev_bucket_size = 0;
108 |     
109 |       for (size_t i = 0; i < num_small; i++) {
110 | 	size_t bucket_size = sizes[i];
111 | 	if (bucket_size < 8)
112 | 	  throw std::invalid_argument("for small_allocator, bucket sizes must be at least 8");
113 | 	if (!(bucket_size > prev_bucket_size))
114 | 	  throw std::invalid_argument("for small_allocator, bucket sizes must increase");
115 | 	prev_bucket_size = bucket_size;
116 | 	new (static_cast<void*>(std::addressof(small_allocators[i]))) 
117 | 	  block_allocator(bucket_size, 0, small_alloc_block_size - 64); 
118 |       }
119 |     }
120 | 
121 |     void* allocate(size_t n) {
122 |       if (n > max_small) return allocate_large(n);
123 |       size_t bucket = 0;
124 |       while (n > sizes[bucket]) bucket++;
125 |       return small_allocators[bucket].alloc();
126 |     }
127 | 
128 |     void deallocate(void* ptr, size_t n) {
129 |       if (n > max_small) deallocate_large(ptr, n);
130 |       else {
131 | 	size_t bucket = 0;
132 | 	while (n > sizes[bucket]) bucket++;
133 | 	small_allocators[bucket].free(ptr);
134 |       }
135 |     }
136 | 
137 |     // allocate, touch, and free to make sure space for small blocks is paged in
138 |     void reserve(size_t bytes) {
139 |       size_t bc = bytes/small_alloc_block_size;
140 |       std::vector<void*> h(bc);
141 |       parallel_for(0, bc, [&] (size_t i) {
142 | 	  h[i] = allocate(small_alloc_block_size);
143 | 	}, 1);
144 |       parallel_for(0, bc, [&] (size_t i) {
145 | 	  for (size_t j=0; j < small_alloc_block_size; j += (1 << 12))
146 | 	    ((char*) h[i])[j] = 0;
147 | 	}, 1);
148 |       for (size_t i=0; i < bc; i++)
149 |       	deallocate(h[i], small_alloc_block_size);
150 |     }
151 | 
152 |     void print_stats() {
153 |       size_t total_a = 0;
154 |       size_t total_u = 0;
155 |       for (size_t i = 0; i < num_small; i++) {
156 | 	size_t bucket_size = sizes[i];
157 | 	size_t allocated = small_allocators[i].num_allocated_blocks();
158 | 	size_t used = small_allocators[i].num_used_blocks();
159 | 	total_a += allocated * bucket_size;
160 | 	total_u += used * bucket_size;
161 | 	cout << "size = " << bucket_size << ", allocated = " << allocated
162 | 	     << ", used = " << used << endl;
163 |       }
164 |       cout << "Large allocated = " << large_allocated << endl;
165 |       cout << "Total bytes allocated = " << total_a + large_allocated << endl;
166 |       cout << "Total bytes used = " << total_u << endl;
167 |     }
168 | 
169 |     void clear() {
170 |       for (size_t i = num_small; i < num_buckets; i++) {
171 | 	maybe<void*> r = large_buckets[i-num_small].pop();
172 | 	while (r) {
173 | 	  large_allocated -= sizes[i];
174 | 	  free(*r);
175 | 	  r = large_buckets[i-num_small].pop();
176 | 	}
177 |       }
178 |     }
179 |   };
180 | 
181 |   // ****************************************
182 |   //    default_allocator (uses powers of two as pool sizes)
183 |   // ****************************************
184 | 
185 |   // these are bucket sizes used by the default allocator.
186 |   std::vector<size_t> default_sizes() {
187 |     size_t log_min_size = 4;
188 |     size_t log_max_size = pbbs::log2_up(getMemorySize()/64);
189 | 
190 |     std::vector<size_t> sizes;
191 |     for (size_t i = log_min_size; i <= log_max_size; i++)
192 |       sizes.push_back(1 << i);
193 |     return sizes;
194 |   }
195 | 
196 |   pool_allocator default_allocator(default_sizes());
197 | 
198 |   // ****************************************
199 |   // Following Matches the c++ Allocator specification (minimally)
200 |   // https://en.cppreference.com/w/cpp/named_req/Allocator
201 |   // Can therefore be used for containers, e.g.:
202 |   //    std::vector<int, pbbs::allocator<int>>
203 |   // ****************************************
204 | 
205 |   template <typename T>
206 |   struct allocator {
207 |     using value_type = T;
208 |     T* allocate(size_t n) {
209 |       return (T*) default_allocator.allocate(n * sizeof(T));
210 |     }
211 |     void deallocate(T* ptr, size_t n) {
212 |       default_allocator.deallocate((void*) ptr, n * sizeof(T));
213 |     }
214 | 
215 |     allocator() = default;
216 |     template <class U> constexpr allocator(const allocator<U>&) {}
217 |   };
218 | 
219 |   template <class T, class U>
220 |   bool operator==(const allocator<T>&, const allocator<U>&) { return true; }
221 |   template <class T, class U>
222 |   bool operator!=(const allocator<T>&, const allocator<U>&) { return false; }
223 | 
224 |   // ****************************************
225 |   // Static allocator for single items of a given type, e.g.
226 |   //   using long_allocator = type_allocator<long>;
227 |   //   long* foo = long_allocator::alloc();
228 |   //   *foo = (long) 23;
229 |   //   long_allocator::free(foo);
230 |   // Uses block allocator, and is headerless  
231 |   // ****************************************
232 | 
233 |   template <typename T>
234 |   class type_allocator {
235 |   public:
236 |     static constexpr size_t default_alloc_size = 0;
237 |     static block_allocator allocator;
238 |     static const bool initialized{true};
239 |     static T* alloc() { return (T*) allocator.alloc();}
240 |     static void free(T* ptr) {allocator.free((void*) ptr);}
241 | 
242 |     // for backward compatibility
243 |     //static void init(size_t _alloc_size = 0, size_t _list_size=0) {};
244 |     static void init(size_t, size_t) {};
245 |     static void init() {};
246 |     static void reserve(size_t n = default_alloc_size) {
247 |       allocator.reserve(n);
248 |     }
249 |     static void finish() {allocator.clear();
250 |     }
251 |     static size_t block_size () {return allocator.block_size();}
252 |     static size_t num_allocated_blocks() {return allocator.num_allocated_blocks();}
253 |     static size_t num_used_blocks() {return allocator.num_used_blocks();}
254 |     static size_t num_used_bytes() {return num_used_blocks() * block_size();}
255 |     static void print_stats() {allocator.print_stats();}
256 |   };
257 | 
258 |   template<typename T>
259 |   block_allocator type_allocator<T>::allocator = block_allocator(sizeof(T));
260 |   
261 |   // ****************************************
262 |   //    my_alloc and my_free (add size tags)
263 |   // ****************************************
264 |   //    ifdefed to either use malloc or the pbbs allocator
265 |   // ****************************************
266 | 
267 | #ifdef USEMALLOC
268 | 
269 | #include <malloc.h>
270 | 
271 |   struct __mallopt {
272 |     __mallopt() {
273 |       mallopt(M_MMAP_MAX,0);
274 |       mallopt(M_TRIM_THRESHOLD,-1);
275 |     }
276 |   };
277 | 
278 |   __mallopt __mallopt_var;
279 |   
280 |   inline void* my_alloc(size_t i) {return malloc(i);}
281 |   inline void my_free(void* p) {free(p);}
282 |   void allocator_clear() {}
283 |   void allocator_reserve(size_t bytes) {}
284 | 
285 | #else
286 | 
287 |   constexpr size_t size_offset = 1; // in size_t sized words
288 | 
289 |   // needs to be at least size_offset * size_offset(size_t)
290 |   inline size_t header_size(size_t n) { // in bytes
291 |     return (n >= 1024) ? 64 : (n & 15) ? 8 : (n & 63) ? 16 : 64;
292 |   }
293 | 
294 |   // allocates and tags with a header (8, 16 or 64 bytes) that contains the size
295 |   void* my_alloc(size_t n) {
296 |     size_t hsize = header_size(n);
297 |     void* ptr;
298 |     ptr = default_allocator.allocate(n + hsize);
299 |     void* r = (void*) (((char*) ptr) + hsize);
300 |     *(((size_t*) r)-size_offset) = n; // puts size in header
301 |     return r;
302 |   }
303 | 
304 |   // reads the size, offsets the header and frees
305 |   void my_free(void *ptr) {
306 |     size_t n = *(((size_t*) ptr)-size_offset);
307 |     size_t hsize = header_size(n);
308 |     if (hsize > (1ul << 48)) {
309 |       cout << "corrupted header in my_free" << endl;
310 |       throw std::bad_alloc(); 
311 |     }
312 |     default_allocator.deallocate((void*) (((char*) ptr) - hsize), n + hsize);
313 |   }
314 | 
315 |   void allocator_clear() {
316 |     default_allocator.clear();
317 |   }
318 | 
319 |   void allocator_reserve(size_t bytes) {
320 |     default_allocator.reserve(bytes);
321 |   }
322 | #endif
323 | 
324 |   // ****************************************
325 |   //    common across allocators (key routines used by sequences)
326 |   // ****************************************
327 | 
328 |   // Does not initialize the array
329 |   template<typename E>
330 |   E* new_array_no_init(size_t n) {
331 |     return (E*) my_alloc(n * sizeof(E));
332 |   }
333 | 
334 |   // Initializes in parallel
335 |   template<typename E>
336 |   E* new_array(size_t n) {
337 |     E* r = new_array_no_init<E>(n);
338 |     if (!std::is_trivially_default_constructible<E>::value) 
339 |       parallel_for(0, n, [&] (size_t i) {
340 |         new ((void*) (r+i)) E;});
341 |     return r;
342 |   }
343 | 
344 |   inline void free_array(void* a) {
345 |     my_free(a);
346 |   }
347 | 
348 |   // Destructs in parallel
349 |   template<typename E>
350 |   void delete_array(E* A, size_t n) {
351 |     // C++14 -- supported by gnu C++11
352 |     if (!std::is_trivially_destructible<E>::value)
353 |       parallel_for(0, n, [&] (size_t i) {
354 |         A[i].~E();});
355 |     else if (std::is_pointer<E>::value)
356 |       parallel_for(0, n, [&] (size_t i) {
357 |         if (A[i] != nullptr) delete A[i];});
358 |     my_free(A);
359 |   }
360 | }
361 | 


--------------------------------------------------------------------------------
/wheels/lockfreehash/lprobe/benchmark_lprobe.h:
--------------------------------------------------------------------------------
  1 | #ifndef BENCHMARK_LOCKFREE_HT
  2 | #define BENCHMARK_LOCKFREE_HT
  3 | 
  4 | #include <unordered_map>
  5 | #include <iostream>
  6 | #include <random>
  7 | #include <algorithm>
  8 | #include <pthread.h>
  9 | #include <array>
 10 | #include <unordered_map>
 11 | #include <assert.h>
 12 | 
 13 | #include "cycle_timer.h"
 14 | #include "hash_table.h"
 15 | #include "thread_service.h"
 16 | 
 17 | #define NUM_ITERS   3
 18 | #define MAX_THREADS 24
 19 | 
 20 | #define C_NUM_ELEMS	76800*24
 21 | #include "data.h"
 22 | using namespace pbbs;
 23 | 
 24 | 
 25 | 
 26 | class BenchmarkLockFreeHT
 27 | {
 28 |   public:
 29 |     BenchmarkLockFreeHT(int op_count, int capacity, 
 30 |                         int rweight, int idweight,
 31 |                         int thread_count,
 32 |                         double load_factor);
 33 | 
 34 |     void benchmark_correctness();
 35 |     void benchmark_hp();
 36 |     void benchmark_all();
 37 |     void run();
 38 | 
 39 |   private:
 40 |     int    m_rweight;
 41 |     int    m_idweight;
 42 | 
 43 |     int    m_thread_count;
 44 |     int    m_op_count;
 45 |     int    m_capacity;
 46 |     double m_load_factor;
 47 | };
 48 | 
 49 | BenchmarkLockFreeHT::BenchmarkLockFreeHT(int op_count, int capacity, 
 50 |                                          int rweight, int idweight,
 51 |                                          int thread_count, double load_factor)
 52 | {
 53 |   std::cout << "*** BENCHMARKING LockFreeHT ***" << std::endl;
 54 |   m_op_count     = op_count;
 55 |   m_load_factor  = load_factor; 
 56 |   m_capacity     = capacity;
 57 |   m_thread_count = thread_count;
 58 | 
 59 |   m_rweight      = rweight;
 60 |   m_idweight     = idweight;
 61 | }
 62 | 
 63 | void BenchmarkLockFreeHT::benchmark_correctness()
 64 | {
 65 |   bool correct = true;
 66 | 
 67 |   //Lockfree_hash_table ht(2 * C_NUM_ELEMS, m_thread_count);
 68 | 	Table<hashKV> ht(2*C_NUM_ELEMS, hashKV(), 1.3);
 69 |   std::unordered_map<int, int> map;
 70 |   map.reserve(2 * C_NUM_ELEMS);
 71 |   
 72 |   std::random_device                 rd;
 73 |   std::mt19937                       mt(rd());
 74 |   std::uniform_int_distribution<int> rng;
 75 | 
 76 |   int elems[C_NUM_ELEMS];
 77 |   for (int i = 0; i < C_NUM_ELEMS; i++)
 78 |   {
 79 |     //int k = rng(mt);
 80 | 		int k = 100;
 81 |     elems[i] = k;
 82 |     map[k] = k;
 83 |   }
 84 | 	//adding err
 85 | 	//elems[5*C_NUM_ELEMS/24 + 34]  = elems[6*C_NUM_ELEMS/24 + 49];
 86 | 	//elems[22*C_NUM_ELEMS/24 + 199]  = elems[9*C_NUM_ELEMS/24 + 347];
 87 | 	//elems[21*C_NUM_ELEMS/24 + 199]  = elems[9*C_NUM_ELEMS/24 + 347];
 88 | 	//elems[19*C_NUM_ELEMS/24 + 199]  = elems[9*C_NUM_ELEMS/24 + 347];
 89 | 	for (int i=0;i<23;i++)
 90 | 		for (int j=0;j<20;j++)
 91 | 		elems[i*C_NUM_ELEMS/24 + 34+j]  = 101+i*20+j;
 92 |   
 93 |   pthread_t  workers[MAX_THREADS];
 94 |   WorkerArgs args[MAX_THREADS];
 95 | 
 96 |   for (int i = 0; i < 24; i++)
 97 |   {
 98 |     args[i].num_elems = C_NUM_ELEMS / 24;
 99 |     args[i].ht_p      = (void*)&ht;
100 |     args[i].elems     = elems;
101 |     args[i].start     = i * (C_NUM_ELEMS / 24);
102 |     args[i].tid       = i;
103 | 
104 |     pthread_create(&workers[i], NULL, thread_checkmiss<Table<hashKV>>, (void*)&args[i]);
105 |   }
106 | 
107 |   for (int i = 0; i < 24; i++)
108 |   {
109 |     pthread_join(workers[i], NULL);
110 |   }
111 | 
112 | 	std::cout << "hash table count is " << ht.count() << std::endl;
113 | 	std::cout << "miss is " << miss << std::endl;
114 | 	assert(miss==461);
115 | 
116 |   int count = 0;
117 |   for (std::pair<int, int> e : map)
118 |   {
119 |     //std::pair<int, bool> r = ht.search(e.first, 0);
120 | 		struct KV res  = ht.find(e.first);
121 | 		std::pair<int,bool> r;
122 | 		if (res.k == -1)
123 | 			r = {-1,false};
124 | 		else
125 | 			r = {res.v,true};
126 |     if (!r.second || e.second != r.first)
127 |     {
128 | 
129 |       std::cout << "\t" << "Expected value, Received value, Received result = " << e.second << " " << r.second << " "<< r.first << std::endl;
130 |       correct = false;
131 |       count++;
132 |     }
133 |   }
134 | 
135 |   std::cout << "\t" << count << "/" << C_NUM_ELEMS << " errors" << std::endl;
136 | 
137 |   if (correct)
138 |     std::cout << "\t" << "Correctness test passed" << std::endl;
139 |   else
140 |     std::cout << "\t" << "Correctness test failed" << std::endl;
141 | 
142 | }
143 | 
144 | void BenchmarkLockFreeHT::benchmark_hp()
145 | {
146 |   //Lockfree_hash_table ht(400000, m_thread_count);
147 | 	Table<hashKV> ht(400000, hashKV(), 1.3);
148 | 
149 |   std::random_device                 rd;
150 |   std::mt19937                       mt(rd());
151 |   std::uniform_int_distribution<int> rng;
152 | 
153 |   std::array<int, 3> weights;
154 |   weights[0] = m_rweight;
155 |   weights[1] = m_idweight;
156 |   weights[2] = m_idweight;
157 | 
158 |   std::default_random_engine         g;
159 |   std::discrete_distribution<int>    drng(weights.begin(), weights.end());
160 | 
161 |   int insert[200000];
162 |   for (int i = 0; i < 200000; i++)
163 |   {
164 |     int k = rng(mt);
165 |     int v = rng(mt);
166 |     insert[i] = k;
167 |     //ht.insert(k, v, 0);
168 | 		ht.insert({k,v});
169 |   }
170 |   
171 |   pthread_t  workers[MAX_THREADS];
172 |   WorkerArgs args[MAX_THREADS];
173 | 
174 |   int num_elems = 200000 / m_thread_count;
175 |   for (int i = 0; i < m_thread_count; i++)
176 |   {
177 |     args[i].num_elems = num_elems;
178 |     args[i].ht_p      = (void*)&ht;
179 |     args[i].elems     = insert;
180 |     args[i].start     = i * num_elems;
181 |     args[i].tid       = i;
182 |     args[i].remove    = i < (m_thread_count / 4);
183 | 
184 |     pthread_create(&workers[i], NULL, thread_remove<Table<hashKV>>, (void*)&args[i]);
185 |   }
186 |   
187 |   for (int i = 0; i < m_thread_count; i++)
188 |   {
189 |     pthread_join(workers[i], NULL);
190 |   }
191 |    
192 |   std::cout << "\t" << "Hazard Pointer test passed" << std::endl;
193 | 
194 | }
195 | 
196 | void BenchmarkLockFreeHT::benchmark_all()
197 | {
198 |    // Lockfree_hash_table ht(m_capacity, m_thread_count);
199 | 		Table<hashKV> ht(m_capacity, hashKV(), 1.3);
200 | 
201 |     std::random_device                 rd;
202 |     std::mt19937                       mt(rd());
203 |     std::uniform_int_distribution<int> rng;
204 | 
205 |     std::array<int, 3> weights;
206 |     weights[0] = m_rweight;
207 |     weights[1] = m_idweight;
208 |     weights[2] = m_idweight;
209 | 
210 |     std::default_random_engine         g;
211 |     std::discrete_distribution<int>    drng(weights.begin(), weights.end());
212 | 
213 |     // Warm-up table to load factor
214 |     int num_warmup = static_cast<int>(static_cast<double>(m_capacity) * m_load_factor);
215 |     for (int i = 0; i < num_warmup; i++)
216 |     {
217 |       int k = rng(mt); 
218 |       int v = rng(mt);
219 | 
220 |       //ht.insert(k, v, 0);
221 |       ht.insert({k,v});
222 |     }
223 | 
224 |     // Run benchmark
225 |     std::vector<double> results;
226 |     for (int iter = 0; iter < NUM_ITERS; iter++)
227 |     {
228 |       int num_elems = m_op_count / m_thread_count;
229 |       pthread_t  workers[MAX_THREADS];
230 |       WorkerArgs args[MAX_THREADS];
231 | 
232 |       double start = CycleTimer::currentSeconds();
233 |       for (int i = 0; i < m_thread_count; i++)
234 |       {
235 |         args[i].num_elems = num_elems;
236 |         args[i].rweight   = m_rweight;
237 |         args[i].iweight   = m_idweight / 2;
238 |         args[i].dweight   = m_idweight / 2;
239 |         args[i].ht_p      = (void*)&ht;
240 |         args[i].tid       = i;
241 |         pthread_create(&workers[i], NULL, thread_service<Table<hashKV>>, (void*)&args[i]);
242 |       }
243 | 
244 |       for (int i = 0; i < m_thread_count; i++)
245 |       {
246 |         pthread_join(workers[i], NULL);
247 |       }
248 |       double time  = CycleTimer::currentSeconds() - start;
249 |       results.push_back(time);
250 |     }
251 | 
252 |     // Publish Results
253 |     double best_time = *std::min_element(results.begin(), results.end());
254 |     double avg_time  = std::accumulate(results.begin(), results.end(), 0.0) / static_cast<double>(results.size());
255 |     std::cout << "\t" << "Max Throughput: " << m_op_count / best_time / 1000.0 << " ops/ms" << std::endl;
256 |     std::cout << "\t" << "Avg Throughput: " << m_op_count / avg_time  / 1000.0 << " ops/ms" << std::endl;
257 | 
258 |     results.clear();
259 | 
260 |     int* keys = new int[m_op_count];
261 | 
262 |     for (int iter = 0; iter < NUM_ITERS; iter++)
263 |     {
264 |       int num_elems = m_op_count / m_thread_count;
265 |       pthread_t  workers[MAX_THREADS];
266 |       WorkerArgs args[MAX_THREADS];
267 | 
268 |       double start = CycleTimer::currentSeconds();
269 |       for (int i = 0; i < m_thread_count; i++)
270 |       {
271 |         args[i].num_elems = num_elems;
272 |         args[i].rweight   = m_rweight;
273 |         args[i].iweight   = m_idweight / 2;
274 |         args[i].dweight   = m_idweight / 2;
275 |         args[i].ht_p      = (void*)&ht;
276 |         args[i].tid       = i;
277 |         args[i].elems     = keys;
278 |         args[i].start     = i * num_elems;
279 |         pthread_create(&workers[i], NULL, thread_service_low_contention<Table<hashKV>>, (void*)&args[i]);
280 |       }
281 | 
282 |       for (int i = 0; i < m_thread_count; i++)
283 |       {
284 |         pthread_join(workers[i], NULL);
285 |       }
286 |       double time  = CycleTimer::currentSeconds() - start;
287 |       results.push_back(time);
288 |     }
289 | 
290 |     // Publish Results
291 |     best_time = *std::min_element(results.begin(), results.end());
292 |     avg_time  = std::accumulate(results.begin(), results.end(), 0.0) / static_cast<double>(results.size());
293 |     std::cout << "\t" << "Max Throughput (Low): " << m_op_count / best_time / 1000.0 << " ops/ms" << std::endl;
294 |     std::cout << "\t" << "Avg Throughput (Low): " << m_op_count / avg_time  / 1000.0 << " ops/ms" << std::endl;
295 | 
296 |     results.clear();
297 | 
298 |     for (int iter = 0; iter < NUM_ITERS; iter++)
299 |     {
300 |       int num_elems = m_op_count / m_thread_count;
301 |       pthread_t  workers[MAX_THREADS];
302 |       WorkerArgs args[MAX_THREADS];
303 | 
304 |       double start = CycleTimer::currentSeconds();
305 |       for (int i = 0; i < m_thread_count; i++)
306 |       {
307 |         args[i].num_elems = num_elems;
308 |         args[i].rweight   = m_rweight;
309 |         args[i].iweight   = m_idweight / 2;
310 |         args[i].dweight   = m_idweight / 2;
311 |         args[i].ht_p      = (void*)&ht;
312 |         args[i].tid       = i;
313 |         //ht.insert(0, 0, 0);
314 |         ht.insert({0,0});
315 |         pthread_create(&workers[i], NULL, thread_service_high_contention<Table<hashKV>>, (void*)&args[i]);
316 |       }
317 | 
318 |       for (int i = 0; i < m_thread_count; i++)
319 |       {
320 |         pthread_join(workers[i], NULL);
321 |       }
322 |       double time  = CycleTimer::currentSeconds() - start;
323 |       results.push_back(time);
324 |     }
325 | 
326 |     // Publish Results
327 |     best_time = *std::min_element(results.begin(), results.end());
328 |     avg_time  = std::accumulate(results.begin(), results.end(), 0.0) / static_cast<double>(results.size());
329 |     std::cout << "\t" << "Max Throughput (High): " << m_op_count / best_time / 1000.0 << " ops/ms" << std::endl;
330 |     std::cout << "\t" << "Avg Throughput (High): " << m_op_count / avg_time  / 1000.0 << " ops/ms" << std::endl;
331 | 
332 | 
333 | }
334 | 
335 | void BenchmarkLockFreeHT::run()
336 | {
337 |   benchmark_correctness();
338 | //  benchmark_hp();
339 | //  benchmark_all();
340 | }
341 | 
342 | #endif
343 | 


--------------------------------------------------------------------------------
/wheels/lockfreehash/lprobe/benchmark_lprobe_ptr.h:
--------------------------------------------------------------------------------
  1 | #ifndef BENCHMARK_LOCKFREE_HT
  2 | #define BENCHMARK_LOCKFREE_HT
  3 | 
  4 | #include <unordered_map>
  5 | #include <iostream>
  6 | #include <random>
  7 | #include <algorithm>
  8 | #include <pthread.h>
  9 | #include <array>
 10 | #include <unordered_map>
 11 | #include "assert.h"
 12 | 
 13 | #include "cycle_timer.h"
 14 | #include "hash_table.h"
 15 | #include "thread_service_ptr.h"
 16 | 
 17 | #define NUM_ITERS   3
 18 | #define MAX_THREADS 24
 19 | 
 20 | #define C_NUM_ELEMS	76800*24
 21 | #include "data_ptr.h"
 22 | using namespace pbbs;
 23 | 
 24 | 
 25 | 
 26 | class BenchmarkLockFreeHT
 27 | {
 28 |   public:
 29 |     BenchmarkLockFreeHT(int op_count, int capacity, 
 30 |                         int rweight, int idweight,
 31 |                         int thread_count,
 32 |                         double load_factor);
 33 | 
 34 |     void benchmark_correctness();
 35 |     void benchmark_hp();
 36 |     void benchmark_all();
 37 |     void run();
 38 | 
 39 |   private:
 40 |     int    m_rweight;
 41 |     int    m_idweight;
 42 | 
 43 |     int    m_thread_count;
 44 |     int    m_op_count;
 45 |     int    m_capacity;
 46 |     double m_load_factor;
 47 | };
 48 | 
 49 | BenchmarkLockFreeHT::BenchmarkLockFreeHT(int op_count, int capacity, 
 50 |                                          int rweight, int idweight,
 51 |                                          int thread_count, double load_factor)
 52 | {
 53 |   std::cout << "*** BENCHMARKING LockFreeHT ***" << std::endl;
 54 |   m_op_count     = op_count;
 55 |   m_load_factor  = load_factor; 
 56 |   m_capacity     = capacity;
 57 |   m_thread_count = thread_count;
 58 | 
 59 |   m_rweight      = rweight;
 60 |   m_idweight     = idweight;
 61 | }
 62 | 
 63 | void BenchmarkLockFreeHT::benchmark_correctness()
 64 | {
 65 |   bool correct = true;
 66 | 
 67 |   //Lockfree_hash_table ht(2 * C_NUM_ELEMS, m_thread_count);
 68 | 	Table<hashKV> ht(2*C_NUM_ELEMS, hashKV(), 1.3);
 69 |   std::unordered_map<int, int> map;
 70 |   map.reserve(2 * C_NUM_ELEMS);
 71 |   
 72 |   std::random_device                 rd;
 73 |   std::mt19937                       mt(rd());
 74 |   std::uniform_int_distribution<int> rng;
 75 | 
 76 |   int elems[C_NUM_ELEMS];
 77 |   for (int i = 0; i < C_NUM_ELEMS; i++)
 78 |   {
 79 |     //int k = rng(mt);
 80 | 		int k = 100;
 81 |     elems[i] = k;
 82 |     map[k] = k;
 83 |   }
 84 | 	//adding err
 85 | 	//elems[5*C_NUM_ELEMS/24 + 34]  = elems[6*C_NUM_ELEMS/24 + 49];
 86 | 	//elems[22*C_NUM_ELEMS/24 + 199]  = elems[9*C_NUM_ELEMS/24 + 347];
 87 | 	//elems[21*C_NUM_ELEMS/24 + 199]  = elems[9*C_NUM_ELEMS/24 + 347];
 88 | 	//elems[19*C_NUM_ELEMS/24 + 199]  = elems[9*C_NUM_ELEMS/24 + 347];
 89 | 	for (int i=0;i<23;i++)
 90 | 		for (int j=0;j<20;j++)
 91 | 		elems[i*C_NUM_ELEMS/24 + 34+j]  = 101+i*20+j;
 92 |   
 93 |   pthread_t  workers[MAX_THREADS];
 94 |   WorkerArgs args[MAX_THREADS];
 95 | 
 96 |   for (int i = 0; i < 24; i++)
 97 |   {
 98 |     args[i].num_elems = C_NUM_ELEMS / 24;
 99 |     args[i].ht_p      = (void*)&ht;
100 |     args[i].elems     = elems;
101 |     args[i].start     = i * (C_NUM_ELEMS / 24);
102 |     args[i].tid       = i;
103 | 
104 |     pthread_create(&workers[i], NULL, thread_checkmiss<Table<hashKV>>, (void*)&args[i]);
105 |   }
106 | 
107 |   for (int i = 0; i < 24; i++)
108 |   {
109 |     pthread_join(workers[i], NULL);
110 |   }
111 | 
112 | 
113 | 	std::cout << "hash table count is " << ht.count() << std::endl;
114 | 	std::cout << "miss is " << miss << std::endl;
115 | 	assert(miss==461);
116 |   int count = 0;
117 |   for (std::pair<int, int> e : map)
118 |   {
119 |     //std::pair<int, bool> r = ht.search(e.first, 0);
120 | 		struct KV *res  = ht.find(e.first);
121 | 		std::pair<int,bool> r;
122 | 		if (res==nullptr || res->k == -1)
123 | 			r = {-1,false};
124 | 		else
125 | 			r = {res->v,true};
126 |     if (!r.second || e.second != r.first)
127 |     {
128 | 
129 |       std::cout << "\t" << "Expected value, Received value, Received result = " << e.second << " " << r.second << " "<< r.first << std::endl;
130 |       correct = false;
131 |       count++;
132 |     }
133 |   }
134 | 
135 |   std::cout << "\t" << count << "/" << C_NUM_ELEMS << " errors" << std::endl;
136 | 
137 |   if (correct)
138 |     std::cout << "\t" << "Correctness test passed" << std::endl;
139 |   else
140 |     std::cout << "\t" << "Correctness test failed" << std::endl;
141 | 
142 | }
143 | 
144 | void BenchmarkLockFreeHT::benchmark_hp()
145 | {
146 |   //Lockfree_hash_table ht(400000, m_thread_count);
147 | 	Table<hashKV> ht(400000, hashKV(), 1.3);
148 | 
149 |   std::random_device                 rd;
150 |   std::mt19937                       mt(rd());
151 |   std::uniform_int_distribution<int> rng;
152 | 
153 |   std::array<int, 3> weights;
154 |   weights[0] = m_rweight;
155 |   weights[1] = m_idweight;
156 |   weights[2] = m_idweight;
157 | 
158 |   std::default_random_engine         g;
159 |   std::discrete_distribution<int>    drng(weights.begin(), weights.end());
160 | 
161 |   int insert[200000];
162 |   for (int i = 0; i < 200000; i++)
163 |   {
164 |     int k = rng(mt);
165 |     int v = rng(mt);
166 |     insert[i] = k;
167 |     //ht.insert(k, v, 0);
168 | 		ht.insert(new struct KV(k,v));
169 |   }
170 |   
171 |   pthread_t  workers[MAX_THREADS];
172 |   WorkerArgs args[MAX_THREADS];
173 | 
174 |   int num_elems = 200000 / m_thread_count;
175 |   for (int i = 0; i < m_thread_count; i++)
176 |   {
177 |     args[i].num_elems = num_elems;
178 |     args[i].ht_p      = (void*)&ht;
179 |     args[i].elems     = insert;
180 |     args[i].start     = i * num_elems;
181 |     args[i].tid       = i;
182 |     args[i].remove    = i < (m_thread_count / 4);
183 | 
184 |     pthread_create(&workers[i], NULL, thread_remove<Table<hashKV>>, (void*)&args[i]);
185 |   }
186 |   
187 |   for (int i = 0; i < m_thread_count; i++)
188 |   {
189 |     pthread_join(workers[i], NULL);
190 |   }
191 |    
192 |   std::cout << "\t" << "Hazard Pointer test passed" << std::endl;
193 | 
194 | }
195 | 
196 | void BenchmarkLockFreeHT::benchmark_all()
197 | {
198 |    // Lockfree_hash_table ht(m_capacity, m_thread_count);
199 | 		Table<hashKV> ht(m_capacity, hashKV(), 1.3);
200 | 
201 |     std::random_device                 rd;
202 |     std::mt19937                       mt(rd());
203 |     std::uniform_int_distribution<int> rng;
204 | 
205 |     std::array<int, 3> weights;
206 |     weights[0] = m_rweight;
207 |     weights[1] = m_idweight;
208 |     weights[2] = m_idweight;
209 | 
210 |     std::default_random_engine         g;
211 |     std::discrete_distribution<int>    drng(weights.begin(), weights.end());
212 | 
213 |     // Warm-up table to load factor
214 |     int num_warmup = static_cast<int>(static_cast<double>(m_capacity) * m_load_factor);
215 |     for (int i = 0; i < num_warmup; i++)
216 |     {
217 |       int k = rng(mt); 
218 |       int v = rng(mt);
219 | 
220 |       //ht.insert(k, v, 0);
221 |       ht.insert(new struct KV(k,v));
222 |     }
223 | 
224 |     // Run benchmark
225 |     std::vector<double> results;
226 |     for (int iter = 0; iter < NUM_ITERS; iter++)
227 |     {
228 |       int num_elems = m_op_count / m_thread_count;
229 |       pthread_t  workers[MAX_THREADS];
230 |       WorkerArgs args[MAX_THREADS];
231 | 
232 |       double start = CycleTimer::currentSeconds();
233 |       for (int i = 0; i < m_thread_count; i++)
234 |       {
235 |         args[i].num_elems = num_elems;
236 |         args[i].rweight   = m_rweight;
237 |         args[i].iweight   = m_idweight / 2;
238 |         args[i].dweight   = m_idweight / 2;
239 |         args[i].ht_p      = (void*)&ht;
240 |         args[i].tid       = i;
241 |         pthread_create(&workers[i], NULL, thread_service<Table<hashKV>>, (void*)&args[i]);
242 |       }
243 | 
244 |       for (int i = 0; i < m_thread_count; i++)
245 |       {
246 |         pthread_join(workers[i], NULL);
247 |       }
248 |       double time  = CycleTimer::currentSeconds() - start;
249 |       results.push_back(time);
250 |     }
251 | 
252 |     // Publish Results
253 |     double best_time = *std::min_element(results.begin(), results.end());
254 |     double avg_time  = std::accumulate(results.begin(), results.end(), 0.0) / static_cast<double>(results.size());
255 |     std::cout << "\t" << "Max Throughput: " << m_op_count / best_time / 1000.0 << " ops/ms" << std::endl;
256 |     std::cout << "\t" << "Avg Throughput: " << m_op_count / avg_time  / 1000.0 << " ops/ms" << std::endl;
257 | 
258 |     results.clear();
259 | 
260 |     int* keys = new int[m_op_count];
261 | 
262 |     for (int iter = 0; iter < NUM_ITERS; iter++)
263 |     {
264 |       int num_elems = m_op_count / m_thread_count;
265 |       pthread_t  workers[MAX_THREADS];
266 |       WorkerArgs args[MAX_THREADS];
267 | 
268 |       double start = CycleTimer::currentSeconds();
269 |       for (int i = 0; i < m_thread_count; i++)
270 |       {
271 |         args[i].num_elems = num_elems;
272 |         args[i].rweight   = m_rweight;
273 |         args[i].iweight   = m_idweight / 2;
274 |         args[i].dweight   = m_idweight / 2;
275 |         args[i].ht_p      = (void*)&ht;
276 |         args[i].tid       = i;
277 |         args[i].elems     = keys;
278 |         args[i].start     = i * num_elems;
279 |         pthread_create(&workers[i], NULL, thread_service_low_contention<Table<hashKV>>, (void*)&args[i]);
280 |       }
281 | 
282 |       for (int i = 0; i < m_thread_count; i++)
283 |       {
284 |         pthread_join(workers[i], NULL);
285 |       }
286 |       double time  = CycleTimer::currentSeconds() - start;
287 |       results.push_back(time);
288 |     }
289 | 
290 |     // Publish Results
291 |     best_time = *std::min_element(results.begin(), results.end());
292 |     avg_time  = std::accumulate(results.begin(), results.end(), 0.0) / static_cast<double>(results.size());
293 |     std::cout << "\t" << "Max Throughput (Low): " << m_op_count / best_time / 1000.0 << " ops/ms" << std::endl;
294 |     std::cout << "\t" << "Avg Throughput (Low): " << m_op_count / avg_time  / 1000.0 << " ops/ms" << std::endl;
295 | 
296 |     results.clear();
297 | 
298 |     for (int iter = 0; iter < NUM_ITERS; iter++)
299 |     {
300 |       int num_elems = m_op_count / m_thread_count;
301 |       pthread_t  workers[MAX_THREADS];
302 |       WorkerArgs args[MAX_THREADS];
303 | 
304 |       double start = CycleTimer::currentSeconds();
305 |       for (int i = 0; i < m_thread_count; i++)
306 |       {
307 |         args[i].num_elems = num_elems;
308 |         args[i].rweight   = m_rweight;
309 |         args[i].iweight   = m_idweight / 2;
310 |         args[i].dweight   = m_idweight / 2;
311 |         args[i].ht_p      = (void*)&ht;
312 |         args[i].tid       = i;
313 |         //ht.insert(0, 0, 0);
314 |         ht.insert(new struct KV(0,0));
315 |         pthread_create(&workers[i], NULL, thread_service_high_contention<Table<hashKV>>, (void*)&args[i]);
316 |       }
317 | 
318 |       for (int i = 0; i < m_thread_count; i++)
319 |       {
320 |         pthread_join(workers[i], NULL);
321 |       }
322 |       double time  = CycleTimer::currentSeconds() - start;
323 |       results.push_back(time);
324 |     }
325 | 
326 |     // Publish Results
327 |     best_time = *std::min_element(results.begin(), results.end());
328 |     avg_time  = std::accumulate(results.begin(), results.end(), 0.0) / static_cast<double>(results.size());
329 |     std::cout << "\t" << "Max Throughput (High): " << m_op_count / best_time / 1000.0 << " ops/ms" << std::endl;
330 |     std::cout << "\t" << "Avg Throughput (High): " << m_op_count / avg_time  / 1000.0 << " ops/ms" << std::endl;
331 | 
332 | 
333 | }
334 | 
335 | void BenchmarkLockFreeHT::run()
336 | {
337 |   benchmark_correctness();
338 | //  benchmark_hp();
339 | //  benchmark_all();
340 | }
341 | 
342 | #endif
343 | 


--------------------------------------------------------------------------------
/wheels/lockfreehash/lprobe/block_allocator.h:
--------------------------------------------------------------------------------
  1 | // This code is part of the Problem Based Benchmark Suite (PBBS)
  2 | // Copyright (c) 2016 Guy Blelloch, Daniel Ferizovic, and the PBBS team
  3 | //
  4 | // Permission is hereby granted, free of charge, to any person obtaining a
  5 | // copy of this software and associated documentation files (the
  6 | // "Software"), to deal in the Software without restriction, including
  7 | // without limitation the rights (to use, copy, modify, merge, publish,
  8 | // distribute, sublicense, and/or sell copies of the Software, and to
  9 | // permit persons to whom the Software is furnished to do so, subject to
 10 | // the following conditions:
 11 | //
 12 | // The above copyright notice and this permission notice shall be included
 13 | // in all copies or substantial portions of the Software.
 14 | //
 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
 16 | // OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 17 | // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 18 | // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
 19 | // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
 20 | // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
 21 | // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 22 | 
 23 | // A concurrent allocator for any fixed type T
 24 | // Keeps a local pool per processor
 25 | // Grabs list_size elements from a global pool if empty, and
 26 | // Returns list_size elements to the global pool when local pool=2*list_size
 27 | // Keeps track of number of allocated elements.
 28 | // Probably more efficient than a general purpose allocator
 29 | 
 30 | #pragma once
 31 | 
 32 | #include <stdio.h>
 33 | #include <stdlib.h>
 34 | #include <math.h>
 35 | #include <atomic>
 36 | #include "concurrent_stack.h"
 37 | #include "utilities.h"
 38 | #include "memory_size.h"
 39 | 
 40 | struct block_allocator {
 41 | private:
 42 | 
 43 |   static const size_t default_list_bytes = (1 << 22) - 64; // in bytes
 44 |   static const size_t pad_size = 256;
 45 | 
 46 |   struct block {
 47 |     block* next;
 48 |   };
 49 | 
 50 |   using block_p = block*;
 51 | 
 52 |   struct alignas(64) thread_list {
 53 |     size_t sz;
 54 |     block_p head;
 55 |     block_p mid;
 56 |     char cache_line[pad_size];
 57 |   thread_list() : sz(0), head(NULL) {};
 58 |   };
 59 | 
 60 |   bool initialized{false};
 61 |   block_p initialize_list(block_p);
 62 |   block_p get_list();
 63 |   concurrent_stack<char*> pool_roots;
 64 |   concurrent_stack<block_p> global_stack;
 65 |   thread_list* local_lists;
 66 | 
 67 |   size_t list_length;
 68 |   size_t max_blocks;
 69 |   size_t block_size_;
 70 |   //std::atomic<size_t> blocks_allocated;
 71 |   size_t blocks_allocated;
 72 |   char* allocate_blocks(size_t num_blocks);
 73 | 
 74 | public:
 75 |   static int thread_count;
 76 |   void* alloc();
 77 |   void free(void*);
 78 |   void reserve(size_t n);
 79 |   void clear();
 80 |   void print_stats();
 81 |   size_t block_size () {return block_size_;}
 82 |   size_t num_allocated_blocks() {return blocks_allocated;}
 83 |   size_t num_used_blocks();
 84 | 
 85 |   ~block_allocator();
 86 |   block_allocator(size_t block_size,
 87 | 		  size_t reserved_blocks = 0, 
 88 | 		  size_t list_length_ = 0, 
 89 | 		  size_t max_blocks_ = 0);
 90 |   block_allocator() {};
 91 | };
 92 | 
 93 | int block_allocator::thread_count = num_workers();
 94 | 
 95 | // Allocate a new list of list_length elements
 96 | 
 97 | auto block_allocator::initialize_list(block_p start) -> block_p {
 98 |   parallel_for (0, list_length - 1, [&] (size_t i) {
 99 |       block_p p =  (block_p) (((char*) start) + i * block_size_);
100 |       p->next = (block_p) (((char*) p) + block_size_);
101 |     }, 1000, true);
102 |   block_p last = (block_p) (((char*) start) + (list_length-1) * block_size_);
103 |   last->next = NULL;
104 |   return start;
105 | }
106 | 
107 | size_t block_allocator::num_used_blocks() {
108 |   size_t free_blocks = global_stack.size()*list_length;
109 |   for (int i = 0; i < thread_count; ++i) 
110 |     free_blocks += local_lists[i].sz;
111 |   return blocks_allocated - free_blocks;
112 | }
113 | 
114 | auto block_allocator::allocate_blocks(size_t num_blocks) -> char* {
115 |   //char* start = (char*) aligned_alloc(pad_size,
116 |   //num_blocks * block_size_+ pad_size);
117 |   char* start = (char*) pbbs::my_alloc(num_blocks * block_size_);
118 |   if (start == NULL) {
119 |     fprintf(stderr, "Cannot allocate space in block_allocator");
120 |     exit(1); }
121 | 
122 |   pbbs::fetch_and_add(&blocks_allocated, num_blocks); // atomic
123 | 
124 |   if (blocks_allocated > max_blocks) {
125 |     fprintf(stderr, "Too many blocks in block_allocator, change max_blocks");
126 |     exit(1);  }
127 | 
128 |   pool_roots.push(start); // keep track so can free later
129 |   return start;
130 | }
131 | 
132 | // Either grab a list from the global pool, or if there is none
133 | // then allocate a new list
134 | auto block_allocator::get_list() -> block_p {
135 |   maybe<block_p> rem = global_stack.pop();
136 |   if (rem) return *rem;
137 |   block_p start = (block_p) allocate_blocks(list_length);
138 |   return initialize_list(start);
139 | }
140 | 
141 | // Allocate n elements across however many lists are needed (rounded up)
142 | void block_allocator::reserve(size_t n) {
143 |   size_t num_lists = thread_count + ceil(n / (double)list_length);
144 |   char* start = allocate_blocks(list_length*num_lists);
145 |   parallel_for(0, num_lists, [&] (size_t i) {
146 |       block_p offset = (block_p) (start + i * list_length * block_size_);
147 |       global_stack.push(initialize_list(offset));
148 |     });
149 | }
150 | 
151 | void block_allocator::print_stats() {
152 |   size_t used = num_used_blocks();
153 |   size_t allocated = num_allocated_blocks();
154 |   size_t size = block_size();
155 |   std::cout << "Used: " << used << ", allocated: " << allocated
156 | 	    << ", block size: " << size
157 | 	    << ", bytes: " << size*allocated << std::endl;
158 | }
159 | 
160 | block_allocator::block_allocator(size_t block_size,
161 | 				 size_t reserved_blocks,
162 | 				 size_t list_length_,
163 | 				 size_t max_blocks_) {
164 |   blocks_allocated = 0;
165 |   block_size_ = block_size;
166 |   if (list_length_ == 0)
167 |     list_length = default_list_bytes / block_size;
168 |   else list_length = list_length_ / block_size;
169 |   if  (max_blocks_ == 0)
170 |     max_blocks = (3*getMemorySize()/block_size)/4;
171 |   else max_blocks = max_blocks_;
172 | 
173 |   reserve(reserved_blocks);
174 | 
175 |   // all local lists start out empty
176 |   local_lists = new thread_list[thread_count];
177 |   initialized = true;
178 | }
179 | 
180 | void block_allocator::clear() {
181 |   if (num_used_blocks() > 0) 
182 |     cout << "Warning: not clearing memory pool, block_size=" << block_size()
183 | 	 << " : allocated blocks remain" << endl;
184 |   else {
185 |     // clear lists
186 |     for (int i = 0; i < thread_count; ++i) 
187 |       local_lists[i].sz = 0;
188 |   
189 |     // throw away all allocated memory
190 |     maybe<char*> x;
191 |     while ((x = pool_roots.pop())) pbbs::my_free(*x); //std::free(*x);
192 |     pool_roots.clear();
193 |     global_stack.clear();
194 |     blocks_allocated = 0;
195 |   }
196 | }
197 | 
198 | block_allocator::~block_allocator() {
199 |   clear();
200 |   delete[] local_lists;
201 | }
202 | 
203 | void block_allocator::free(void* ptr) {
204 |   block_p new_node = (block_p) ptr;
205 |   int id = worker_id();
206 | 
207 |   if (local_lists[id].sz == list_length+1) {
208 |     local_lists[id].mid = local_lists[id].head;
209 |   } else if (local_lists[id].sz == 2*list_length) {
210 |     global_stack.push(local_lists[id].mid->next);
211 |     local_lists[id].mid->next = NULL;
212 |     local_lists[id].sz = list_length;
213 |   }
214 |   new_node->next = local_lists[id].head;
215 |   local_lists[id].head = new_node;
216 |   local_lists[id].sz++;
217 | }
218 | 
219 | inline void* block_allocator::alloc() {
220 |   int id = worker_id();
221 | 
222 |   if (local_lists[id].sz == 0)  {
223 |     local_lists[id].head = get_list();
224 |     local_lists[id].sz = list_length;
225 |   }
226 | 
227 |   local_lists[id].sz--;
228 |   block_p p = local_lists[id].head;
229 |   local_lists[id].head = local_lists[id].head->next;
230 | 
231 |   return (void*) p;
232 | }
233 | 
234 | 


--------------------------------------------------------------------------------
/wheels/lockfreehash/lprobe/concurrent_stack.h:
--------------------------------------------------------------------------------
  1 | // This code is part of the Problem Based Benchmark Suite (PBBS)
  2 | // Copyright (c) 2016 Guy Blelloch, Daniel Ferizovic, and the PBBS team
  3 | //
  4 | // Permission is hereby granted, free of charge, to any person obtaining a
  5 | // copy of this software and associated documentation files (the
  6 | // "Software"), to deal in the Software without restriction, including
  7 | // without limitation the rights (to use, copy, modify, merge, publish,
  8 | // distribute, sublicense, and/or sell copies of the Software, and to
  9 | // permit persons to whom the Software is furnished to do so, subject to
 10 | // the following conditions:
 11 | //
 12 | // The above copyright notice and this permission notice shall be included
 13 | // in all copies or substantial portions of the Software.
 14 | //
 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
 16 | // OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 17 | // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 18 | // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
 19 | // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
 20 | // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
 21 | // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 22 | 
 23 | // Lock free, linearizable implementation of a concurrent stack
 24 | // supporting:
 25 | //    push
 26 | //    pop
 27 | //    size
 28 | // Works for elements of any type T
 29 | // It requires memory proportional to the largest it has been
 30 | // This can be cleared, but only when noone else is using it.
 31 | // Requires 128-bit-compare-and-swap
 32 | // Counter could overflow "in theory", but would require over 500 years even
 33 | // if updated every nanosecond (and must be updated sequentially)
 34 | 
 35 | #pragma once
 36 | #include <cstdio>
 37 | #include <cstdint>
 38 | #include <iostream>
 39 | #include "utilities.h"
 40 | 
 41 | template<typename T>
 42 | class concurrent_stack {
 43 | 
 44 |   struct Node {
 45 |     T value;
 46 |     Node* next;
 47 |     size_t length;
 48 |   };
 49 | 
 50 |   class alignas(64) prim_concurrent_stack {
 51 |     struct nodeAndCounter {
 52 |       Node* node;
 53 |       uint64_t counter;
 54 |     };
 55 | 
 56 |     union CAS_t {
 57 |       __uint128_t x;
 58 |       nodeAndCounter NC;
 59 |     };
 60 |     CAS_t head;
 61 | 
 62 |     size_t length(Node* n) {
 63 |       if (n == NULL) return 0;
 64 |       else return n->length;
 65 |     }
 66 | 
 67 |   public:
 68 |     prim_concurrent_stack() {
 69 |       head.NC.node = NULL;
 70 |       head.NC.counter = 0;
 71 |       std::atomic_thread_fence(std::memory_order_seq_cst);
 72 |     }
 73 | 
 74 |     size_t size() {
 75 |       return length(head.NC.node);}
 76 | 
 77 |     void push(Node* newNode){
 78 |       CAS_t oldHead, newHead;
 79 |       do {
 80 | 	oldHead = head;
 81 | 	newNode->next = oldHead.NC.node;
 82 | 	newNode->length = length(oldHead.NC.node) + 1;
 83 | 	//std::atomic_thread_fence(std::memory_order_release);
 84 | 	std::atomic_thread_fence(std::memory_order_seq_cst);
 85 | 	newHead.NC.node = newNode;
 86 | 	newHead.NC.counter = oldHead.NC.counter + 1;
 87 |       } while (!__sync_bool_compare_and_swap_16(&head.x,oldHead.x, newHead.x));
 88 |     }
 89 |     Node* pop() {
 90 |       Node* result;
 91 |       CAS_t oldHead, newHead;
 92 |       do {
 93 | 	oldHead = head;
 94 | 	result = oldHead.NC.node;
 95 | 	if (result == NULL) return result;
 96 | 	newHead.NC.node = result->next;
 97 | 	newHead.NC.counter = oldHead.NC.counter + 1;
 98 |       } while (!__sync_bool_compare_and_swap_16(&head.x,oldHead.x, newHead.x));
 99 | 
100 |       return result;
101 |     }
102 |   };// __attribute__((aligned(16)));
103 | 
104 |   prim_concurrent_stack a;
105 |   prim_concurrent_stack b;
106 | 
107 |  public:
108 | 
109 |   size_t size() { return a.size();}
110 | 
111 |   void push(T v) {
112 |     Node* x = b.pop();
113 |     if (!x) x = (Node*) malloc(sizeof(Node));
114 |     x->value = v;
115 |     a.push(x);
116 |   }
117 | 
118 |   maybe<T> pop() {
119 |     Node* x = a.pop();
120 |     if (!x) return maybe<T>();
121 |     T r = x->value;
122 |     b.push(x);
123 |     return maybe<T>(r);
124 |   }
125 | 
126 |   // assumes no push or pop in progress
127 |   void clear() {
128 |     Node* x;
129 |     while ((x = a.pop())) free(x);
130 |     while ((x = b.pop())) free(x);
131 |   }
132 | 
133 |   concurrent_stack() {}
134 |   ~concurrent_stack() { clear();}
135 | };
136 | 


--------------------------------------------------------------------------------
/wheels/lockfreehash/lprobe/cycle_timer.h:
--------------------------------------------------------------------------------
  1 | #ifndef _SYRAH_CYCLE_TIMER_H_
  2 | #define _SYRAH_CYCLE_TIMER_H_
  3 | 
  4 | #if defined(__APPLE__)
  5 |   #if defined(__x86_64__)
  6 |     #include <sys/sysctl.h>
  7 |   #else
  8 |     #include <mach/mach.h>
  9 |     #include <mach/mach_time.h>
 10 |   #endif // __x86_64__ or not
 11 | 
 12 |   #include <stdio.h>  // fprintf
 13 |   #include <stdlib.h> // exit
 14 | 
 15 | #elif _WIN32
 16 | #  include <windows.h>
 17 | #  include <time.h>
 18 | #else
 19 | #  include <stdio.h>
 20 | #  include <stdlib.h>
 21 | #  include <string.h>
 22 | #  include <sys/time.h>
 23 | #endif
 24 | 
 25 | 
 26 |   // This uses the cycle counter of the processor.  Different
 27 |   // processors in the system will have different values for this.  If
 28 |   // you process moves across processors, then the delta time you
 29 |   // measure will likely be incorrect.  This is mostly for fine
 30 |   // grained measurements where the process is likely to be on the
 31 |   // same processor.  For more global things you should use the
 32 |   // Time interface.
 33 | 
 34 |   // Also note that if you processors' speeds change (i.e. processors
 35 |   // scaling) or if you are in a heterogenous environment, you will
 36 |   // likely get spurious results.
 37 |   class CycleTimer {
 38 |   public:
 39 |     typedef unsigned long long SysClock;
 40 | 
 41 |     //////////
 42 |     // Return the current CPU time, in terms of clock ticks.
 43 |     // Time zero is at some arbitrary point in the past.
 44 |     static SysClock currentTicks() {
 45 | #if defined(__APPLE__) && !defined(__x86_64__)
 46 |       return mach_absolute_time();
 47 | #elif defined(_WIN32)
 48 |       LARGE_INTEGER qwTime;
 49 |       QueryPerformanceCounter(&qwTime);
 50 |       return qwTime.QuadPart;
 51 | #elif defined(__x86_64__)
 52 |       unsigned int a, d;
 53 |       asm volatile("rdtsc" : "=a" (a), "=d" (d));
 54 |       return static_cast<unsigned long long>(a) |
 55 |         (static_cast<unsigned long long>(d) << 32);
 56 | #elif defined(__ARM_NEON__) && 0 // mrc requires superuser.
 57 |       unsigned int val;
 58 |       asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r"(val));
 59 |       return val;
 60 | #else
 61 |       timespec spec;
 62 |       clock_gettime(CLOCK_THREAD_CPUTIME_ID, &spec);
 63 |       return CycleTimer::SysClock(static_cast<float>(spec.tv_sec) * 1e9 + static_cast<float>(spec.tv_nsec));
 64 | #endif
 65 |     }
 66 | 
 67 |     //////////
 68 |     // Return the current CPU time, in terms of seconds.
 69 |     // This is slower than currentTicks().  Time zero is at
 70 |     // some arbitrary point in the past.
 71 |     static double currentSeconds() {
 72 |       return currentTicks() * secondsPerTick();
 73 |     }
 74 | 
 75 |     //////////
 76 |     // Return the conversion from seconds to ticks.
 77 |     static double ticksPerSecond() {
 78 |       return 1.0/secondsPerTick();
 79 |     }
 80 | 
 81 |     static const char* tickUnits() {
 82 | #if defined(__APPLE__) && !defined(__x86_64__)
 83 |       return "ns";
 84 | #elif defined(__WIN32__) || defined(__x86_64__)
 85 |       return "cycles";
 86 | #else
 87 |       return "ns"; // clock_gettime
 88 | #endif
 89 |     }
 90 | 
 91 |     //////////
 92 |     // Return the conversion from ticks to seconds.
 93 |     static double secondsPerTick() {
 94 |       static bool initialized = false;
 95 |       static double secondsPerTick_val;
 96 |       if (initialized) return secondsPerTick_val;
 97 | #if defined(__APPLE__)
 98 |   #ifdef __x86_64__
 99 |       int args[] = {CTL_HW, HW_CPU_FREQ};
100 |       unsigned int Hz;
101 |       size_t len = sizeof(Hz);
102 |       if (sysctl(args, 2, &Hz, &len, NULL, 0) != 0) {
103 |          fprintf(stderr, "Failed to initialize secondsPerTick_val!\n");
104 |          exit(-1);
105 |       }
106 |       secondsPerTick_val = 1.0 / (double) Hz;
107 |   #else
108 |       mach_timebase_info_data_t time_info;
109 |       mach_timebase_info(&time_info);
110 | 
111 |       // Scales to nanoseconds without 1e-9f
112 |       secondsPerTick_val = (1e-9*static_cast<double>(time_info.numer))/
113 |         static_cast<double>(time_info.denom);
114 |   #endif // x86_64 or not
115 | #elif defined(_WIN32)
116 |       LARGE_INTEGER qwTicksPerSec;
117 |       QueryPerformanceFrequency(&qwTicksPerSec);
118 |       secondsPerTick_val = 1.0/static_cast<double>(qwTicksPerSec.QuadPart);
119 | #else
120 |       FILE *fp = fopen("/proc/cpuinfo","r");
121 |       char input[1024];
122 |       if (!fp) {
123 |          fprintf(stderr, "CycleTimer::resetScale failed: couldn't find /proc/cpuinfo.");
124 |          exit(-1);
125 |       }
126 |       // In case we don't find it, e.g. on the N900
127 |       secondsPerTick_val = 1e-9;
128 |       while (!feof(fp) && fgets(input, 1024, fp)) {
129 |         // NOTE(boulos): Because reading cpuinfo depends on dynamic
130 |         // frequency scaling it's better to read the @ sign first
131 |         float GHz, MHz;
132 |         if (strstr(input, "model name")) {
133 |           char* at_sign = strstr(input, "@");
134 |           if (at_sign) {
135 |             char* after_at = at_sign + 1;
136 |             char* GHz_str = strstr(after_at, "GHz");
137 |             char* MHz_str = strstr(after_at, "MHz");
138 |             if (GHz_str) {
139 |               *GHz_str = '\0';
140 |               if (1 == sscanf(after_at, "%f", &GHz)) {
141 |                 //printf("GHz = %f\n", GHz);
142 |                 secondsPerTick_val = 1e-9f / GHz;
143 |                 break;
144 |               }
145 |             } else if (MHz_str) {
146 |               *MHz_str = '\0';
147 |               if (1 == sscanf(after_at, "%f", &MHz)) {
148 |                 //printf("MHz = %f\n", MHz);
149 |                 secondsPerTick_val = 1e-6f / MHz;
150 |                 break;
151 |               }
152 |             }
153 |           }
154 |         } else if (1 == sscanf(input, "cpu MHz : %f", &MHz)) {
155 |           //printf("MHz = %f\n", MHz);
156 |           secondsPerTick_val = 1e-6f / MHz;
157 |           break;
158 |         }
159 |       }
160 |       fclose(fp);
161 | #endif
162 | 
163 |       initialized = true;
164 |       return secondsPerTick_val;
165 |     }
166 | 
167 |     //////////
168 |     // Return the conversion from ticks to milliseconds.
169 |     static double msPerTick() {
170 |       return secondsPerTick() * 1000.0;
171 |     }
172 | 
173 |   private:
174 |     CycleTimer();
175 |   };
176 | 
177 | #endif // #ifndef _SYRAH_CYCLE_TIMER_H_
178 | 


--------------------------------------------------------------------------------
/wheels/lockfreehash/lprobe/data.h:
--------------------------------------------------------------------------------
 1 | #ifndef DATA_ELEMENT_
 2 | #define DATA_ELEMENT_
 3 | #include "utilities.h"
 4 | using namespace pbbs;
 5 | struct KV {
 6 | 	int k;
 7 | 	int v;
 8 | 	bool operator== (struct KV other) { return k == other.k && v == other.v ;}
 9 | 	bool operator!= (struct KV other) { return k != other.k || v != other.v ;}
10 | 	KV(int ak, int av) {k=ak;v=av;}
11 | };
12 | 
13 | struct hashKV {
14 | 	using eType = struct KV;
15 | 	using kType = int;
16 | 	eType empty() {return {-1,-1};}
17 | 	kType getKey(eType v) {return v.k;}
18 | 	//int hash(kType v) {return v * 999029;} //hash64_2(v);}
19 | 	int hash(kType v) {return hash64_2(v);}
20 | 	//int cmp(kType v, kType b) {return (v > b) ? 1 : ((v == b) ? 0 : -1);}
21 | 	int cmp(kType v, kType b) {return (v == b) ? 0 : -1;}
22 | 	bool replaceQ(eType, eType) {return 0;}
23 | 	eType update(eType v, eType) {return v;}
24 | 	bool cas(eType* p, eType o, eType n) {return
25 | 		atomic_compare_and_swap(p, o, n);}
26 | };
27 | #endif
28 | 


--------------------------------------------------------------------------------
/wheels/lockfreehash/lprobe/data_ptr.h:
--------------------------------------------------------------------------------
 1 | #ifndef DATA_ELEMENT_
 2 | #define DATA_ELEMENT_
 3 | #include "utilities.h"
 4 | using namespace pbbs;
 5 | struct KV {
 6 | 	int k;
 7 | 	int v;
 8 | 	//bool operator== (struct KV other) { return k == other.k && v == other.v ;}
 9 | 	//bool operator!= (struct KV other) { return k != other.k || v != other.v ;}
10 | 	KV(int ak, int av) {k=ak;v=av;}
11 | };
12 | 
13 | struct hashKV {
14 | 	using eType = struct KV*;
15 | 	using kType = int;
16 | 	//eType empty() {return new struct KV(-1,-1);}
17 | 	eType empty() {return nullptr;}
18 | 	kType getKey(eType v) {return v->k;}
19 | 	int hash(kType v) {return v * 999029;} //hash64_2(v);}
20 | 	//int hash(kType v) {return hash64_2(v);}
21 | 	//int cmp(kType v, kType b) {return (v > b) ? 1 : ((v == b) ? 0 : -1);}
22 | 	int cmp(kType v, kType b) {return (v == b) ? 0 : -1;}
23 | 	bool replaceQ(eType, eType) {return 0;}
24 | 	eType update(eType v, eType) {return v;}
25 | 	bool cas(eType* p, eType o, eType n) {return
26 | 		atomic_compare_and_swap(p, o, n);}
27 | };
28 | #endif
29 | 


--------------------------------------------------------------------------------
/wheels/lockfreehash/lprobe/get_time.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <stdlib.h>
 4 | #include <sys/time.h>
 5 | #include <iomanip>
 6 | #include <iostream>
 7 | #include <string>
 8 | 
 9 | struct timer {
10 |   double total_time;
11 |   double last_time;
12 |   bool on;
13 |   std::string name;
14 |   struct timezone tzp;
15 | 
16 |   timer(std::string name = "PBBS time", bool _start = true)
17 |   : total_time(0.0), on(false), name(name), tzp({0,0}) {
18 |     if (_start) start();
19 |   }
20 | 
21 |   double get_time() {
22 |     timeval now;
23 |     gettimeofday(&now, &tzp);
24 |     return ((double) now.tv_sec) + ((double) now.tv_usec)/1000000.;
25 |   }
26 | 
27 |   void start () {
28 |     on = 1;
29 |     last_time = get_time();
30 |   }
31 | 
32 |   double stop () {
33 |     on = 0;
34 |     double d = (get_time()-last_time);
35 |     total_time += d;
36 |     return d;
37 |   }
38 | 
39 |   void reset() {
40 |      total_time=0.0;
41 |      on=0;
42 |   }
43 | 
44 |   double get_total() {
45 |     if (on) return total_time + get_time() - last_time;
46 |     else return total_time;
47 |   }
48 | 
49 |   double get_next() {
50 |     if (!on) return 0.0;
51 |     double t = get_time();
52 |     double td = t - last_time;
53 |     total_time += td;
54 |     last_time = t;
55 |     return td;
56 |   }
57 | 
58 |   void report(double time, std::string str) {
59 |     std::ios::fmtflags cout_settings = std::cout.flags();
60 |     std::cout.precision(4);
61 |     std::cout << std::fixed;
62 |     std::cout << name << ": ";
63 |     if (str.length() > 0)
64 |       std::cout << str << ": ";
65 |     std::cout << time << std::endl;
66 |     std::cout.flags(cout_settings);
67 |   }
68 | 
69 |   void total() {
70 |     report(get_total(),"total");
71 |     total_time = 0.0;
72 |   }
73 | 
74 |   void reportTotal(std::string str) {
75 |     report(get_total(), str);
76 |   }
77 | 
78 |   void next(std::string str) {
79 |     if (on) report(get_next(), str);
80 |   }
81 | };
82 | 
83 | static timer _tm;
84 | #define startTime() _tm.start();
85 | #define nextTime(_string) _tm.next(_string);
86 | 


--------------------------------------------------------------------------------
/wheels/lockfreehash/lprobe/hash_table.h:
--------------------------------------------------------------------------------
  1 | // This code is part of the Problem Based Benchmark Suite (PBBS)
  2 | // Copyright (c) 2010 Guy Blelloch and the PBBS team
  3 | //
  4 | // Permission is hereby granted, free of charge, to any person obtaining a
  5 | // copy of this software and associated documentation files (the
  6 | // "Software"), to deal in the Software without restriction, including
  7 | // without limitation the rights (to use, copy, modify, merge, publish,
  8 | // distribute, sublicense, and/or sell copies of the Software, and to
  9 | // permit persons to whom the Software is furnished to do so, subject to
 10 | // the following conditions:
 11 | //
 12 | // The above copyright notice and this permission notice shall be included
 13 | // in all copies or substantial portions of the Software.
 14 | //
 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
 16 | // OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 17 | // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 18 | // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
 19 | // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
 20 | // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
 21 | // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 22 | #pragma once
 23 | #include "utilities.h"
 24 | #include "sequence_ops.h"
 25 | 
 26 | namespace pbbs {
 27 | 
 28 | 	// A "history independent" hash table that supports insertion, and searching
 29 | 	// It is described in the paper
 30 | 	//   Julian Shun and Guy E. Blelloch
 31 | 	//   Phase-concurrent hash tables for determinism
 32 | 	//   SPAA 2014: 96-107
 33 | 	// Insertions can happen in parallel
 34 | 	// Searches can happen in parallel
 35 | 	// Deletion can happen in parallel
 36 | 	// but insertions cannot happen in parallel with searches or deletions
 37 | 	// and searches cannot happen in parallel with deletions
 38 | 	// i.e. each of the three types of operations have to happen in phase
 39 | 	template <class HASH>
 40 | 		class Table {
 41 | 			private:
 42 | 				using eType = typename HASH::eType;
 43 | 				using kType = typename HASH::kType;
 44 | 				size_t m;
 45 | 				eType empty;
 46 | 				HASH hashStruct;
 47 | 				eType* TA;
 48 | 				using index = long;
 49 | 
 50 | 				static void clear(eType* A, size_t n, eType v) {
 51 | 					auto f = [&] (size_t i) {
 52 | 						assign_uninitialized(A[i], v);};
 53 | 					parallel_for(0, n, f, granularity(n));
 54 | 				}
 55 | 
 56 | 				struct notEmptyF {
 57 | 					eType e; notEmptyF(eType _e) : e(_e) {}
 58 | 					int operator() (eType a) {return e != a;}};
 59 | 
 60 | 				index hashToRange(index h) {return (int) h % (uint) m;}
 61 | 				index firstIndex(kType v) {return hashToRange(hashStruct.hash(v));}
 62 | 				index incrementIndex(index h) {return (h + 1 == (long) m) ? 0 : h+1;}
 63 | 				index decrementIndex(index h) {return (h == 0) ? m-1 : h-1;}
 64 | 				bool lessIndex(index a, index b) {return (a < b) ? (2*(b-a) < m) : (2*(a-b) > m);}
 65 | 				bool lessEqIndex(index a, index b) {return a==b || lessIndex(a,b);}
 66 | 
 67 | 			public:
 68 | 				// Size is the maximum number of values the hash table will hold.
 69 | 				// Overfilling the table could put it into an infinite loop.
 70 | 				Table(size_t size, HASH hashF, float load = 1.5) :
 71 | 					m(((size_t) 100.0 + load * size)),
 72 | 					empty(hashF.empty()),
 73 | 					hashStruct(hashF),
 74 | 					TA(new_array_no_init<eType>(m)) {
 75 | 						clear(TA, m, empty); }
 76 | 
 77 | 				~Table() { delete_array(TA, m);};
 78 | 
 79 | 				// prioritized linear probing
 80 | 				//   a new key will bump an existing key up if it has a higher priority
 81 | 				//   an equal key will replace an old key if replaceQ(new,old) is true
 82 | 				// returns 0 if not inserted (i.e. equal and replaceQ false) and 1 otherwise
 83 | 				bool insert(eType v) {
 84 | 					index i = firstIndex(hashStruct.getKey(v));
 85 | 					while (true) {
 86 | 						eType c = TA[i];
 87 | 						if (c == empty) {
 88 | 							if (hashStruct.cas(&TA[i],c,v)) return true;
 89 | 						} else {
 90 | 							int cmp = hashStruct.cmp(hashStruct.getKey(v),hashStruct.getKey(c));
 91 | 							if (cmp == 0) {
 92 | 								if (!hashStruct.replaceQ(v,c)) return false;
 93 | 								else if (hashStruct.cas(&TA[i],c,v)) return true;
 94 | 							} else if (cmp < 0)
 95 | 								i = incrementIndex(i);
 96 | 							else if (hashStruct.cas(&TA[i],c,v)) {
 97 | 								v = c;
 98 | 								i = incrementIndex(i);
 99 | 							}
100 | 						}
101 | 					}
102 | 				}
103 | 
104 | 				// prioritized linear probing
105 | 				//   a new key will bump an existing key up if it has a higher priority
106 | 				//   an equal key will replace an old key if replaceQ(new,old) is true
107 | 				// returns 0 if not inserted (i.e. equal and replaceQ false) and 1 otherwise
108 | 				bool update(eType v) {
109 | 					index i = firstIndex(hashStruct.getKey(v));
110 | 					while (true) {
111 | 						eType c = TA[i];
112 | 						if (c == empty) {
113 | 							if (hashStruct.cas(&TA[i],c,v)) return true;
114 | 						} else {
115 | 							int cmp = hashStruct.cmp(hashStruct.getKey(v),hashStruct.getKey(c));
116 | 							if (cmp == 0) {
117 | 								if (!hashStruct.replaceQ(v,c)) return false;
118 | 								else {
119 | 									eType new_val = hashStruct.update(c,v);
120 | 									if (hashStruct.cas(&TA[i],c,new_val)) return true;
121 | 								}
122 | 							} else if (cmp < 0)
123 | 								i = incrementIndex(i);
124 | 							else if (hashStruct.cas(&TA[i],c,v)) {
125 | 								v = c;
126 | 								i = incrementIndex(i);
127 | 							}
128 | 						}
129 | 					}
130 | 				}
131 | 
132 | 				bool deleteVal(kType v) {
133 | 					index i = firstIndex(v);
134 | 					int cmp;
135 | 
136 | 					// find first element less than or equal to v in priority order
137 | 					index j = i;
138 | 					eType c = TA[j];
139 | 
140 | 					if (c == empty) return true;
141 | 
142 | 					// find first location with priority less or equal to v's priority
143 | 					while ((cmp = (c==empty) ? 1 : hashStruct.cmp(v, hashStruct.getKey(c))) < 0) {
144 | 						j = incrementIndex(j);
145 | 						c = TA[j];
146 | 					}
147 | 					while (true) {
148 | 						// Invariants:
149 | 						//   v is the key that needs to be deleted
150 | 						//   j is our current index into TA
151 | 						//   if v appears in TA, then at least one copy must appear at or before j
152 | 						//   c = TA[j] at some previous time (could now be changed)
153 | 						//   i = h(v)
154 | 						//   cmp = compare v to key of c (positive if greater, 0 equal, negative less)
155 | 						if (cmp != 0) {
156 | 							// v does not match key of c, need to move down one and exit if
157 | 							// moving before h(v)
158 | 							if (j == i) return true;
159 | 							j = decrementIndex(j);
160 | 							c = TA[j];
161 | 							cmp = (c == empty) ? 1 : hashStruct.cmp(v, hashStruct.getKey(c));
162 | 						} else { // found v at location j (at least at some prior time)
163 | 
164 | 							// Find next available element to fill location j.
165 | 							// This is a little tricky since we need to skip over elements for
166 | 							// which the hash index is greater than j, and need to account for
167 | 							// things being moved downwards by others as we search.
168 | 							// Makes use of the fact that values in a cell can only decrease
169 | 							// during a delete phase as elements are moved from the right to left.
170 | 							index jj = incrementIndex(j);
171 | 							eType x = TA[jj];
172 | 							while (x != empty && lessIndex(j, firstIndex(hashStruct.getKey(x)))) {
173 | 								jj = incrementIndex(jj);
174 | 								x = TA[jj];
175 | 							}
176 | 							index jjj = decrementIndex(jj);
177 | 							while (jjj != j) {
178 | 								eType y = TA[jjj];
179 | 								if (y == empty || !lessIndex(j, firstIndex(hashStruct.getKey(y)))) {
180 | 									x = y;
181 | 									jj = jjj;
182 | 								}
183 | 								jjj = decrementIndex(jjj);
184 | 							}
185 | 
186 | 							// try to copy the the replacement element into j
187 | 							if (hashStruct.cas(&TA[j],c,x)) {
188 | 								// swap was successful
189 | 								// if the replacement element was empty, we are done
190 | 								if (x == empty) return true;
191 | 
192 | 								// Otherwise there are now two copies of the replacement element x
193 | 								// delete one copy (probably the original) by starting to look at jj.
194 | 								// Note that others can come along in the meantime and delete
195 | 								// one or both of them, but that is fine.
196 | 								v = hashStruct.getKey(x);
197 | 								j = jj;
198 | 								i = firstIndex(v);
199 | 							}
200 | 							c = TA[j];
201 | 							cmp = (c == empty) ? 1 : hashStruct.cmp(v, hashStruct.getKey(c));
202 | 						}
203 | 					}
204 | 				}
205 | 
206 | 				// Returns the value if an equal value is found in the table
207 | 				// otherwise returns the "empty" element.
208 | 				// due to prioritization, can quit early if v is greater than cell
209 | 				eType find(kType v) {
210 | 					index h = firstIndex(v);
211 | 					eType c = TA[h];
212 | 					while (true) {
213 | 						if (c == empty) {return empty;}
214 | 						int cmp = hashStruct.cmp(v,hashStruct.getKey(c));
215 | 						if (cmp >= 0) {
216 | 							/*Ju we disable >0 case, because the +1 is not defined for our JitRequest*/
217 | 							if (cmp > 0) return empty;
218 | 							else return c;
219 | 							//return c;
220 | 						}
221 | 						h = incrementIndex(h);
222 | 						c = TA[h];
223 | 					}
224 | 				}
225 | 
226 | 				// returns the number of entries
227 | 				size_t count() {
228 | 					auto is_full = [&] (size_t i) -> size_t {
229 | 						return (TA[i] == empty) ? 0 : 1;};
230 | 					return reduce(delayed_seq<size_t>(m, is_full), addm<size_t>());
231 | 				}
232 | 
233 | 				// returns all the current entries compacted into a sequence
234 | 				sequence<eType> entries() {
235 | 					return filter(range<eType*>(TA, TA+m),
236 | 							[&] (eType v) {return v != empty;});
237 | 				}
238 | 
239 | 				index findIndex(kType v) {
240 | 					index h = firstIndex(v);
241 | 					eType c = TA[h];
242 | 					while (true) {
243 | 						if (c == empty) return -1;
244 | 						int cmp = hashStruct.cmp(v,hashStruct.getKey(c));
245 | 						if (cmp >= 0) {
246 | 							if (cmp > 0) return -1;
247 | 							else return h;
248 | 						}
249 | 						h = incrementIndex(h);
250 | 						c = TA[h];
251 | 					}
252 | 				}
253 | 
254 | 				sequence<index> get_index() {
255 | 					auto is_full = [&] (const size_t i) -> int {
256 | 						if (TA[i] != empty) return 1; else return 0;};
257 | 					sequence<index> x(m, is_full);
258 | 					scan_inplace(x.slice(), addm<index>());
259 | 					return x;
260 | 				}
261 | 
262 | 				// prints the current entries along with the index they are stored at
263 | 				void print() {
264 | 					cout << "vals = ";
265 | 					for (size_t i=0; i < m; i++)
266 | 						if (TA[i] != empty)
267 | 							cout << i << ":" << TA[i] << ",";
268 | 					cout << endl;
269 | 				}
270 | 		};
271 | 
272 | 	template <class ET, class H>
273 | 		sequence<ET> remove_duplicates(sequence<ET> const &S, H const &hash, size_t m=0) {
274 | 			timer t("remove duplicates", false);
275 | 			if (m==0) m = S.size();
276 | 			Table<H> T(m, hash, 1.3);
277 | 			t.next("build table");
278 | 			parallel_for(0, S.size(), [&] (size_t i) { T.insert(S[i]);});
279 | 			t.next("insert");
280 | 			sequence<ET> result = T.entries();
281 | 			t.next("entries");
282 | 			return result;
283 | 		}
284 | 
285 | 	// T must be some integer type
286 | 	template <class T>
287 | 		struct hashInt {
288 | 			using eType = T;
289 | 			using kType = T;
290 | 			eType empty() {return -1;}
291 | 			kType getKey(eType v) {return v;}
292 | 			T hash(kType v) {return v * 999029;} //hash64_2(v);}
293 | 	int cmp(kType v, kType b) {return (v > b) ? 1 : ((v == b) ? 0 : -1);}
294 | 	bool replaceQ(eType, eType) {return 0;}
295 | 	eType update(eType v, eType) {return v;}
296 | 	bool cas(eType* p, eType o, eType n) {return
297 | 		atomic_compare_and_swap(p, o, n);}
298 | };
299 | 
300 | // works for non-negative integers (uses -1 to mark cell as empty)
301 | template <class T>
302 | sequence<T> remove_duplicates(sequence<T> const &A) {
303 | 	return remove_duplicates(A, hashInt<T>());
304 | }
305 | 
306 | }
307 | 


--------------------------------------------------------------------------------
/wheels/lockfreehash/lprobe/main.cc:
--------------------------------------------------------------------------------
 1 | #include "hash_table.h"
 2 | #include "benchmark_lprobe.h"
 3 | #include "data.h"
 4 | using namespace pbbs;
 5 | //#define DEFAULT_OP_COUNT     2000
 6 | //#define DEFAULT_THREAD_COUNT 2
 7 | //#define DEFAULT_READ_PERCENT 90
 8 | //#define DEFAULT_LOAD_FACTOR  40
 9 | //#define CAPACITY             8000016
10 | //#define CAPACITY             800000
11 | 
12 | #define DEFAULT_OP_COUNT     2000000
13 | #define DEFAULT_THREAD_COUNT 24
14 | #define DEFAULT_READ_PERCENT 90
15 | #define DEFAULT_LOAD_FACTOR  40
16 | #define CAPACITY             8000016
17 | 
18 | 
19 | int main() {
20 | 
21 |   int  op_count     = DEFAULT_OP_COUNT; 
22 |   int  num_threads  = DEFAULT_THREAD_COUNT;
23 |   int  read_percent = DEFAULT_READ_PERCENT;
24 |   int  load_factor  = DEFAULT_LOAD_FACTOR;
25 | 
26 |   int    rweight  = read_percent;
27 |   int    idweight = 100 - read_percent;
28 | /*
29 | 	Table<hashKV> T(100000, hashKV(), 1.3);
30 | 	T.insert({1,2});
31 | 	T.insert({2,45});
32 | 	struct KV res  = T.find(2);
33 | 	std::cout << "return value is " << res.v << std::endl;
34 | */
35 | 
36 |   BenchmarkLockFreeHT benchmark_lockfree_ht(op_count, CAPACITY, rweight, idweight, num_threads, 0.3);
37 |   benchmark_lockfree_ht.run();
38 | 
39 | 	return 0;
40 | }
41 | 


--------------------------------------------------------------------------------
/wheels/lockfreehash/lprobe/memory_size.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Author:  David Robert Nadeau
  3 |  * Site:    http://NadeauSoftware.com/
  4 |  * License: Creative Commons Attribution 3.0 Unported License
  5 |  *          http://creativecommons.org/licenses/by/3.0/deed.en_US
  6 |  */
  7 | 
  8 | #pragma once
  9 | 
 10 | #if defined(_WIN32)
 11 | #include <Windows.h>
 12 | 
 13 | #elif defined(__unix__) || defined(__unix) || defined(unix) || (defined(__APPLE__) && defined(__MACH__))
 14 | #include <unistd.h>
 15 | #include <sys/types.h>
 16 | #include <sys/param.h>
 17 | #if defined(BSD)
 18 | #include <sys/sysctl.h>
 19 | #endif
 20 | 
 21 | #else
 22 | #error "Unable to define getMemorySize( ) for an unknown OS."
 23 | #endif
 24 | 
 25 | 
 26 | 
 27 | /**
 28 |  * Returns the size of physical memory (RAM) in bytes.
 29 |  */
 30 | static size_t getMemorySize( )
 31 | {
 32 | #if defined(_WIN32) && (defined(__CYGWIN__) || defined(__CYGWIN32__))
 33 |   /* Cygwin under Windows. ------------------------------------ */
 34 |   /* New 64-bit MEMORYSTATUSEX isn't available.  Use old 32.bit */
 35 |   MEMORYSTATUS status;
 36 |   status.dwLength = sizeof(status);
 37 |   GlobalMemoryStatus( &status );
 38 |   return (size_t)status.dwTotalPhys;
 39 | 
 40 | #elif defined(_WIN32)
 41 |   /* Windows. ------------------------------------------------- */
 42 |   /* Use new 64-bit MEMORYSTATUSEX, not old 32-bit MEMORYSTATUS */
 43 |   MEMORYSTATUSEX status;
 44 |   status.dwLength = sizeof(status);
 45 |   GlobalMemoryStatusEx( &status );
 46 |   return (size_t)status.ullTotalPhys;
 47 | 
 48 | #elif defined(__unix__) || defined(__unix) || defined(unix) || (defined(__APPLE__) && defined(__MACH__))
 49 |   /* UNIX variants. ------------------------------------------- */
 50 |   /* Prefer sysctl() over sysconf() except sysctl() HW_REALMEM and HW_PHYSMEM */
 51 | 
 52 | #if defined(CTL_HW) && (defined(HW_MEMSIZE) || defined(HW_PHYSMEM64))
 53 |   int mib[2];
 54 |   mib[0] = CTL_HW;
 55 | #if defined(HW_MEMSIZE)
 56 |   mib[1] = HW_MEMSIZE;/* OSX. --------------------- */
 57 | #elif defined(HW_PHYSMEM64)
 58 |   mib[1] = HW_PHYSMEM64;/* NetBSD, OpenBSD. --------- */
 59 |   #endif
 60 |   int64_t size = 0;/* 64-bit */
 61 |   size_t len = sizeof( size );
 62 |   if ( sysctl( mib, 2, &size, &len, NULL, 0 ) == 0 )
 63 |     return (size_t)size;
 64 |   return 0L;/* Failed? */
 65 | 
 66 | #elif defined(_SC_AIX_REALMEM)
 67 |   /* AIX. ----------------------------------------------------- */
 68 |   return (size_t)sysconf( _SC_AIX_REALMEM ) * (size_t)1024L;
 69 | 
 70 | #elif defined(_SC_PHYS_PAGES) && defined(_SC_PAGESIZE)
 71 |   /* FreeBSD, Linux, OpenBSD, and Solaris. -------------------- */
 72 |   return (size_t)sysconf( _SC_PHYS_PAGES ) *
 73 |     (size_t)sysconf( _SC_PAGESIZE );
 74 | 
 75 | #elif defined(_SC_PHYS_PAGES) && defined(_SC_PAGE_SIZE)
 76 |   /* Legacy. -------------------------------------------------- */
 77 |   return (size_t)sysconf( _SC_PHYS_PAGES ) *
 78 |     (size_t)sysconf( _SC_PAGE_SIZE );
 79 | 
 80 | #elif defined(CTL_HW) && (defined(HW_PHYSMEM) || defined(HW_REALMEM))
 81 |   /* DragonFly BSD, FreeBSD, NetBSD, OpenBSD, and OSX. -------- */
 82 |   int mib[2];
 83 |   mib[0] = CTL_HW;
 84 | #if defined(HW_REALMEM)
 85 |   mib[1] = HW_REALMEM;/* FreeBSD. ----------------- */
 86 | #elif defined(HW_PYSMEM)
 87 |   mib[1] = HW_PHYSMEM;/* Others. ------------------ */
 88 |   #endif
 89 |   unsigned int size = 0;/* 32-bit */
 90 |   size_t len = sizeof( size );
 91 |   if ( sysctl( mib, 2, &size, &len, NULL, 0 ) == 0 )
 92 |     return (size_t)size;
 93 |   return 0L;/* Failed? */
 94 | #endif /* sysctl and sysconf variants */
 95 | 
 96 |   #else
 97 |   return 0L;/* Unknown OS. */
 98 |   #endif
 99 | }
100 | 


--------------------------------------------------------------------------------
/wheels/lockfreehash/lprobe/monoid.h:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | #include <limits>
  3 | #include <tuple>
  4 | #include <array>
  5 | 
  6 | // Definition of various monoids
  7 | // each consists of:
  8 | //   T : type of the values
  9 | //   static T identity() : returns identity for the monoid
 10 | //   static T add(T, T) : adds two elements, must be associative
 11 | 
 12 | namespace pbbs {
 13 | 
 14 |   template <class F, class TT>
 15 |   struct monoid {
 16 |     using T = TT;
 17 |     F f;
 18 |     TT identity;
 19 |     monoid(F f, TT id) : f(f), identity(id) {}
 20 |   };
 21 | 
 22 |   template <class F, class T>
 23 |   monoid<F,T> make_monoid (F f, T id) {
 24 |     return monoid<F,T>(f, id);
 25 |   }
 26 | 
 27 |   template <class M1, class M2>
 28 |   auto pair_monoid (M1 m1, M2 m2) {
 29 |     using P = std::pair<typename M1::T, typename M2::T>;
 30 |     auto f = [&] (P a, P b) {
 31 |       return P(m1.f(a.first, b.first), m2.f(a.second, b.second));};
 32 |     return make_monoid(f, P(m1.identity, m2.identity));
 33 |   }
 34 | 
 35 |   template <class M, size_t n>
 36 |   auto array_monoid (M m) {
 37 |     using Ar = std::array<typename M::T, n>;
 38 |     auto f = [&] (Ar a, Ar b) {
 39 |       Ar r;
 40 |       for (size_t i=0; i < n; i++)
 41 | 	r[i] = m.f(a[i], b[i]);
 42 |       return r;
 43 |     };
 44 |     Ar id;
 45 |     for (size_t i=0; i < n; i++) id[i] = m.identity;
 46 |     return make_monoid(f, id);
 47 |   }
 48 |   
 49 |   template <class TT>
 50 |   struct addm {
 51 |     using T = TT;
 52 |     addm() : identity(0) {}
 53 |     T identity;
 54 |     static T f(T a, T b) {return a + b;}
 55 |   };
 56 | 
 57 |   template <class T>
 58 |   T lowest() {return std::numeric_limits<T>::lowest();}
 59 | 
 60 |   template <class T>
 61 |   T highest() {return std::numeric_limits<T>::max();}
 62 | 
 63 |   template <class TT>
 64 |   struct maxm{
 65 |     using T = TT;
 66 |     maxm() : identity(lowest<T>()) {}
 67 |     T identity;
 68 |     static T f(T a, T b) {return std::max(a,b);}
 69 |   };
 70 | 
 71 |   template <class T1, class T2>
 72 |   struct maxm<std::pair<T1,T2>> {
 73 |     using T = std::pair<T1,T2>;
 74 |     maxm() : identity(std::make_pair(lowest<T1>(), lowest<T2>())) {}
 75 |     T identity;
 76 |     static T f(T a, T b) {return std::max(a,b);}
 77 |   };
 78 | 
 79 |   template <class TT>
 80 |   struct minm {
 81 |     using T = TT;
 82 |     minm() : identity(highest<T>()) {}
 83 |     T identity;
 84 |     static T f(T a, T b) {return std::min(a,b);}
 85 |   };
 86 | 
 87 |   template <class T1, class T2>
 88 |   struct minm<std::pair<T1,T2>> {
 89 |     using T = std::pair<T1,T2>;
 90 |     minm() : identity(std::make_pair(highest<T1>(), highest<T2>())) {}
 91 |     T identity;
 92 |     static T f(T a, T b) {return std::max(a,b);}
 93 |   };
 94 | 
 95 |   template <class TT>
 96 |   struct xorm {
 97 |     using T = TT;
 98 |     xorm() : identity(0) {}
 99 |     T identity;
100 |     static T f(T a, T b) {return a ^ b;}
101 |   };
102 | 
103 |   template <class TT>
104 |   struct minmaxm {
105 |     using T = std::pair<TT,TT>;
106 |     minmaxm() : identity(T(highest<T>(), lowest<T>())) {}
107 |     T identity;
108 |     static T f(T a, T b) {return T(std::min(a.first,b.first),
109 | 				   std::max(a.second,b.second));}
110 |   };
111 | 
112 |   template <class TT>
113 |   struct Add {
114 |     using T = TT;
115 |     static T identity() {return (T) 0;}
116 |     static T add(T a, T b) {return a + b;}
117 |   };
118 | 
119 |   template <class TT>
120 |   struct Max {
121 |     using T = TT;
122 |     static T identity() {
123 |       return (T) std::numeric_limits<T>::min();}
124 |     static T add(T a, T b) {return std::max(a,b);}
125 |   };
126 | 
127 |   template <class TT>
128 |   struct Min {
129 |     using T = TT;
130 |     static T identity() {
131 |       return (T) std::numeric_limits<T>::max();}
132 |     static T add(T a, T b) {return std::min(a,b);}
133 |   };
134 | 
135 |   template <class A1, class A2>
136 |   struct Add_Pair {
137 |     using T = std::pair<typename A1::T, typename A2::T>;
138 |     static T identity() {return T(A1::identity(), A2::identity());}
139 |     static T add(T a, T b) {
140 |       return T(A1::add(a.first,b.first), A2::add(a.second,b.second));}
141 |   };
142 | 
143 |   template <class AT>
144 |   struct Add_Array {
145 |     using S = std::tuple_size<AT>;
146 |     using T = std::array<typename AT::value_type, S::value>;
147 |     static T identity() {
148 |       T r;
149 |       for (size_t i=0; i < S::value; i++)
150 | 	r[i] = 0;
151 |       return r;
152 |     }
153 |     static T add(T a, T b) {
154 |       T r;
155 |       for (size_t i=0; i < S::value; i++)
156 | 	r[i] = a[i] + b[i];
157 |       return r;
158 |     }
159 |   };
160 | 
161 |   template <class AT>
162 |   struct Add_Nested_Array {
163 |     using T = AT;
164 |     using S = std::tuple_size<T>;
165 |     using SS = std::tuple_size<typename AT::value_type>;
166 |     static T identity() {
167 |       T r;
168 |       for (size_t i=0; i < S::value; i++)
169 | 	for (size_t j=0; j < SS::value; j++) r[i][j] = 0;
170 |       return r;
171 |     }
172 |     static T add(T a, T b) {
173 |       T r;
174 |       for (size_t i=0; i < S::value; i++)
175 | 	for (size_t j=0; j < SS::value; j++)
176 | 	  r[i][j] = a[i][j] + b[i][j];
177 |       return r;
178 |     }
179 |   };
180 | 
181 | }
182 | 


--------------------------------------------------------------------------------
/wheels/lockfreehash/lprobe/parallel.h:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | 
  3 | //***************************************
  4 | // All the pbbs library uses only four functions for
  5 | // accessing parallelism.
  6 | // These can be implemented on top of any scheduler.
  7 | //***************************************
  8 | // number of threads available from OS
  9 | //template <>
 10 | static int num_workers();
 11 | 
 12 | // id of running thread, should be numbered from [0...num-workers)
 13 | static int worker_id();
 14 | 
 15 | // the granularity of a simple loop (e.g. adding one to each element
 16 | // of an array) to reasonably hide cost of scheduler
 17 | // #define PAR_GRANULARITY 2000
 18 | 
 19 | // parallel loop from start (inclusive) to end (exclusive) running
 20 | // function f.
 21 | //    f should map long to void.
 22 | //    granularity is the number of iterations to run sequentially
 23 | //      if 0 (default) then the scheduler will decide
 24 | //    conservative uses a safer scheduler
 25 | template <typename F>
 26 | static void parallel_for(long start, long end, F f,
 27 | 			 long granularity = 0,
 28 | 			 bool conservative = false);
 29 | 
 30 | // runs the thunks left and right in parallel.
 31 | //    both left and write should map void to void
 32 | //    conservative uses a safer scheduler
 33 | template <typename Lf, typename Rf>
 34 | static void par_do(Lf left, Rf right, bool conservative=false);
 35 | 
 36 | //***************************************
 37 | 
 38 | // cilkplus
 39 | #if defined(CILK)
 40 | #include <cilk/cilk.h>
 41 | #include <cilk/cilk_api.h>
 42 | #include <iostream>
 43 | #include <sstream>
 44 | #define PAR_GRANULARITY 2000
 45 | 
 46 | inline int num_workers() {return __cilkrts_get_nworkers();}
 47 | inline int worker_id() {return __cilkrts_get_worker_number();}
 48 | inline void set_num_workers(int) {
 49 |   throw std::runtime_error("don't know how to set worker count!");
 50 | }
 51 | 
 52 | // Not sure this still works
 53 | //__cilkrts_end_cilk();
 54 | //  std::stringstream ss; ss << n;
 55 | //  if (0 != __cilkrts_set_param("nworkers", ss.str().c_str())) 
 56 | 
 57 | 
 58 | template <typename Lf, typename Rf>
 59 | inline void par_do(Lf left, Rf right, bool) {
 60 |     cilk_spawn right();
 61 |     left();
 62 |     cilk_sync;
 63 | }
 64 | 
 65 | template <typename F>
 66 | inline void parallel_for(long start, long end, F f,
 67 | 			 long granularity,
 68 | 			 bool) {
 69 |   if (granularity == 0)
 70 |     cilk_for(long i=start; i<end; i++) f(i);
 71 |   else if ((end - start) <= granularity)
 72 |     for (long i=start; i < end; i++) f(i);
 73 |   else {
 74 |     long n = end-start;
 75 |     long mid = (start + (9*(n+1))/16);
 76 |     cilk_spawn parallel_for(start, mid, f, granularity);
 77 |     parallel_for(mid, end, f, granularity);
 78 |     cilk_sync;
 79 |   }
 80 | }
 81 | 
 82 | // openmp
 83 | #elif defined(OPENMP)
 84 | #include <omp.h>
 85 | #define PAR_GRANULARITY 200000
 86 | 
 87 | inline int num_workers() { return omp_get_max_threads(); }
 88 | inline int worker_id() { return omp_get_thread_num(); }
 89 | inline void set_num_workers(int n) { omp_set_num_threads(n); }
 90 | 
 91 | template <class F>
 92 | inline void parallel_for(long start, long end, F f,
 93 | 			 long granularity,
 94 | 			 bool conservative) {
 95 |   _Pragma("omp parallel for")
 96 |     for(long i=start; i<end; i++) f(i);
 97 | }
 98 | 
 99 | bool in_par_do = false;
100 | 
101 | template <typename Lf, typename Rf>
102 | inline void par_do(Lf left, Rf right, bool conservative) {
103 |   if (!in_par_do) {
104 |     in_par_do = true;  // at top level start up tasking
105 | #pragma omp parallel
106 | #pragma omp single
107 | #pragma omp task
108 |     left();
109 | #pragma omp task
110 |     right();
111 | #pragma omp taskwait
112 |     in_par_do = false;
113 |   } else {   // already started
114 | #pragma omp task
115 |     left();
116 | #pragma omp task
117 |     right();
118 | #pragma omp taskwait
119 |   }
120 | }
121 | 
122 | template <typename Job>
123 | inline void parallel_run(Job job, int num_threads=0) {
124 |   job();
125 | }
126 | 
127 | // Guy's scheduler (ABP)
128 | #elif defined(HOMEGROWN)
129 | #include "scheduler.h"
130 | 
131 | #ifdef NOTMAIN
132 | extern fork_join_scheduler fj;
133 | #else
134 | fork_join_scheduler fj;
135 | #endif
136 | 
137 | // Calls fj.destroy() before the program exits
138 | inline void destroy_fj() {
139 |   fj.destroy();
140 | }
141 | 
142 | struct __atexit {__atexit() {std::atexit(destroy_fj);}};
143 | static __atexit __atexit_var;
144 | 
145 | #define PAR_GRANULARITY 512
146 | 
147 | inline int num_workers() {
148 |   return fj.num_workers();
149 | }
150 | 
151 | inline int worker_id() {
152 |   return fj.worker_id();
153 | }
154 | 
155 | inline void set_num_workers(int n) {
156 |   fj.set_num_workers(n);
157 | }
158 | 
159 | template <class F>
160 | inline void parallel_for(long start, long end, F f,
161 | 			 long granularity,
162 | 			 bool conservative) {
163 |   if (end > start)
164 |     fj.parfor(start, end, f, granularity, conservative);
165 | }
166 | 
167 | template <typename Lf, typename Rf>
168 | inline void par_do(Lf left, Rf right, bool conservative) {
169 |   return fj.pardo(left, right, conservative);
170 | }
171 | 
172 | template <typename Job>
173 | inline void parallel_run(Job job, int) {
174 |   job();
175 | }
176 | 
177 | // c++
178 | #else
179 | 
180 | inline int num_workers() { return 1;}
181 | inline int worker_id() { return 0;}
182 | inline void set_num_workers(int) { ; }
183 | #define PAR_GRANULARITY 1000
184 | 
185 | template <class F>
186 | inline void parallel_for(long start, long end, F f,
187 | 			 long,   // granularity,
188 | 			 bool) { // conservative) {
189 |   for (long i=start; i<end; i++) {
190 |     f(i);
191 |   }
192 | }
193 | 
194 | template <typename Lf, typename Rf>
195 | inline void par_do(Lf left, Rf right, bool) { // conservative) {
196 |   left(); right();
197 | }
198 | 
199 | template <typename Job>
200 | inline void parallel_run(Job job, int) { // num_threads=0) {
201 |   job();
202 | }
203 | 
204 | #endif
205 | 


--------------------------------------------------------------------------------
/wheels/lockfreehash/lprobe/seq.h:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | 
  3 | #include "utilities.h"
  4 | #include "alloc.h"
  5 | #include <initializer_list>
  6 | #include <iterator>
  7 | 
  8 | #ifdef CONCEPTS
  9 | template<typename T>
 10 | concept bool Seq =
 11 |   requires(T t, size_t u) {
 12 |   typename T::value_type;
 13 |   { t.size() } -> size_t;
 14 |   { t.slice() };
 15 |   { t[u] };
 16 | };
 17 | 
 18 | template<typename T>
 19 | concept bool Range =
 20 |   Seq<T> && requires(T t, size_t u) {
 21 |   { t[u] } -> typename T::value_type&;
 22 |   typename T::iterator;
 23 | };
 24 | #define SEQ Seq
 25 | #define RANGE Range
 26 | #else
 27 | #define SEQ typename
 28 | #define RANGE typename
 29 | #endif
 30 | 
 31 | namespace pbbs {
 32 | 
 33 |   constexpr bool report_copy = false;
 34 |   constexpr bool bounds_check = false;
 35 |   
 36 |   template <typename Iterator>
 37 |   struct range {
 38 |   public:
 39 |     using value_type = typename std::iterator_traits<Iterator>::value_type;
 40 |     using iterator = Iterator;
 41 |     range() {};
 42 |     range(iterator s, iterator e) : s(s), e(e) {};
 43 |     value_type& operator[] (const size_t i) const {return s[i];}
 44 |     range slice(size_t ss, size_t ee) const {
 45 |       return range(s + ss, s + ee); }
 46 |     range slice() const {return range(s,e);};
 47 |     size_t size() const { return e - s;}
 48 |     iterator begin() const {return s;}
 49 |     iterator end() const {return e;}
 50 | 
 51 |     range<std::reverse_iterator<value_type*>>
 52 |     rslice(size_t ss, size_t ee) const {
 53 |       auto i = std::make_reverse_iterator(e);
 54 |       return range<decltype(i)>(i + ss, i + ee);
 55 |     }
 56 |     range<std::reverse_iterator<value_type*>>
 57 |     rslice() const {return rslice(0, std::distance(s,e));};
 58 | 
 59 |   private:
 60 |     iterator s;
 61 |     iterator e;
 62 |   };
 63 | 
 64 |   template <class Iter>
 65 |   range<Iter> make_range(Iter s, Iter e) {
 66 |     return range<Iter>(s,e);
 67 |   }
 68 | 
 69 |   template <typename T, typename F>
 70 |   struct delayed_sequence {
 71 |     using value_type = T;
 72 |     delayed_sequence(size_t n, F _f) : f(_f), s(0), e(n) {};
 73 |     delayed_sequence(size_t n, value_type v) : f([&] (size_t) {return v;}), s(0), e(n) {};
 74 |     delayed_sequence(size_t s, size_t e, F _f) : f(_f), s(s), e(e) {};
 75 |     const value_type operator[] (size_t i) const {return (f)(i+s);}
 76 |     delayed_sequence<T,F> slice(size_t ss, size_t ee) const {
 77 |       return delayed_sequence<T,F>(s+ss,s+ee,f); }
 78 |     delayed_sequence<T,F> slice() const {
 79 |       return delayed_sequence<T,F>(s,e,f); }
 80 |     size_t size() const { return e - s;}
 81 |   private:
 82 |     F f;
 83 |     const size_t s, e;
 84 |   };
 85 | 
 86 |   // used so second template argument can be inferred
 87 |   template <class T, class F>
 88 |   delayed_sequence<T,F> delayed_seq (size_t n, F f) {
 89 |     return delayed_sequence<T,F>(n,f);
 90 |   }
 91 | 
 92 |   template <class F>
 93 |   auto dseq (size_t n, F f) -> delayed_sequence<decltype(f(0)),F>
 94 |   {
 95 |     using T = decltype(f(0));
 96 |     return delayed_sequence<T,F>(n,f);
 97 |   }
 98 | 
 99 |   template <typename T, typename Allocator=pbbs::allocator<T>>
100 |   struct sequence {
101 |   public:
102 |     using value_type = T;
103 |     //using iterator = T*;
104 | 
105 |     sequence() { empty(); }
106 | 
107 |     // copy constructor
108 |     sequence(const sequence& a) {
109 |       if (report_copy && !a.is_small())
110 | 	cout << "copy constructor: len: " << a.size()
111 | 	     << " element size: " << sizeof(value_type) << endl;
112 |       if (a.is_small()) val = a.val;
113 |       else copy_from(a.val.large.s, a.val.large.n);
114 |     }
115 | 
116 |     // move constructor
117 |     sequence(sequence&& a) {
118 |       val = a.val; a.empty();}
119 | 
120 |     // // copy assignment
121 |     // sequence& operator = (const sequence& a) {
122 |     //   if (report_copy && !a.is_small())
123 |     // 	cout << "copy assignment: len: " << a.size()
124 |     // 	     << " element size: " << sizeof(T) << endl;
125 |     //   if (this != &a) {
126 |     // 	clear(); 
127 |     // 	if (a.is_small()) val = a.val;
128 |     // 	else copy_from(a.val.large.s, a.val.large.n);}
129 |     //   return *this;
130 |     // }
131 | 
132 |     // //move assignment
133 |     // sequence& operator = (sequence&& a) {
134 |     //   if (this != &a) {clear(); val = a.val; a.empty();}
135 |     //   return *this;
136 |     // }
137 | 
138 |     // unified copy/move assignment using the copy and swap idiom
139 |     // now safer for exceptions
140 |     sequence& operator = (sequence a) {
141 |       swap(a);
142 |       return *this;
143 |     }
144 | 
145 |     // constructs a sequence of length sz
146 |     // with each element default constructed
147 |     sequence(const size_t sz) {
148 |       alloc(sz);}
149 | 
150 |     // constructs a sequence of length sz initialized with v
151 |     sequence(const size_t sz, value_type v) {
152 |       T* start = alloc_no_init(sz);
153 |       parallel_for(0, sz, [=] (size_t i) {
154 | 	  assign_uninitialized(start[i], (value_type) v);}, 300);
155 |     };
156 | 
157 |     // constructs a sequence by applying f to indices [0, ..., sz-1]
158 |     template <typename Func>
159 |     sequence(const size_t sz, Func f, size_t granularity=300) {
160 |       value_type* start = alloc_no_init(sz);
161 |       parallel_for(0, sz, [&] (size_t i) {
162 | 	  assign_uninitialized<value_type>(start[i], f(i));}, granularity);
163 |     };
164 | 
165 |     // construct a sequence from initializer list
166 |     sequence(std::initializer_list<value_type> l) {
167 |       size_t sz = l.end() - l.begin();
168 |       value_type* start = alloc(sz);
169 |       size_t i = 0;
170 |       for (value_type a : l) start[i++] = a;
171 |     }
172 | 
173 |     // constructs from a range
174 |     template <typename Iter>
175 |     sequence(range<Iter> const &a) {
176 |       copy_from(a.begin(), a.size());
177 |     }
178 | 
179 |     // constructs from a delayed sequence
180 |     template <class F>
181 |     sequence(delayed_sequence<value_type,F> const &a) {
182 |       copy_from(a, a.size());
183 |     }
184 | 
185 |     // uninitialized sequence of length sz
186 |     // dangerous if non primitive types and not immediately initialized
187 |     static sequence<value_type> no_init(const size_t sz) {
188 |       sequence<value_type> r;
189 |       r.alloc_no_init(sz);
190 |       return r;
191 |     };
192 | 
193 |     // Constructs a sequence by taking ownership of an
194 |     // allocated value_type array.
195 |     // Only use if a is allocated by the same allocator as 
196 |     // the sequence since the sequence delete will destruct it.
197 |     sequence(value_type* a, const size_t sz) {
198 |       set(a, sz);
199 |       // cout << "dangerous: " << size();
200 |     };
201 | 
202 |     // Copies a Seq type 
203 |     // Uses enable_if to avoid matching on integer argument, which creates
204 |     // a sequece of the specified length
205 |     //template <class Seq, typename std::enable_if_t<!std::is_integral<Seq>::value>>
206 |     //sequence(Seq const &a) {
207 |     //  copy_from(a.begin(), a.size());
208 |     //}
209 | 
210 |     ~sequence() { clear();}
211 | 
212 |     range<value_type*> slice(size_t ss, size_t ee) const {
213 |       return range<value_type*>(begin() + ss, begin() + ee);
214 |     }
215 | 
216 |     range<std::reverse_iterator<value_type*>>
217 |     rslice(size_t ss, size_t ee) const {
218 |       auto iter = std::make_reverse_iterator(begin() + size());
219 |       return range<decltype(iter)>(iter + ss, iter + ee);
220 |     }
221 | 
222 |     range<std::reverse_iterator<value_type*>>
223 |     rslice() const {return rslice(0, size());};
224 | 
225 |     range<value_type*> slice() const {
226 |       return range<value_type*>(begin(), begin() + size());
227 |     }
228 | 
229 |     // gives up ownership, returning an array of the elements
230 |     // only use if will be freed by same allocator as sequence
231 |     value_type* to_array() {
232 |       value_type* r = begin(); empty(); return r;}
233 | 
234 |     // frees the memory assuming elements are already destructed,
235 |     // and sets pointer to Null (empty());
236 |     void clear_no_destruct() {
237 |       if (size() != 0 && !is_small()) 
238 | 	//pbbs::free_array(val.large.s);
239 | 	Allocator().deallocate(val.large.s, val.large.n);
240 |       empty();
241 |     }
242 | 
243 |     // destructs the sequence
244 |     void clear() {
245 |       delete_elements();
246 |       clear_no_destruct();
247 |     }
248 |     
249 |     value_type& operator[] (const size_t i) const {
250 |       if (bounds_check && i >= size()) 
251 |       	throw std::out_of_range("in sequence access: length = "
252 | 				+ std::to_string(size())
253 | 				+ " index = " + std::to_string(i));
254 |       return begin()[i];
255 |     }
256 | 
257 |     value_type& get(const size_t i) const {
258 |       return begin()[i];
259 |     }
260 | 
261 |     void swap(sequence& b) {
262 |       std::swap(val.large.s, b.val.large.s);
263 |       std::swap(val.large.n, b.val.large.n);
264 |     }
265 | 
266 |     size_t size() const {
267 |       if (is_small()) return val.small[flag_loc];
268 |       return val.large.n;}
269 | 
270 |     value_type* begin() const {
271 |       if (is_small()) return (value_type*) &val.small;
272 |       return val.large.s;}
273 | 
274 |     value_type* end() const {return begin() + size();}
275 | 
276 |   private:
277 | 
278 |     struct lg { value_type *s; size_t n; };
279 |     static constexpr size_t lg_size = sizeof(lg);
280 |     static constexpr size_t T_size = sizeof(value_type);
281 |     static constexpr size_t max_sso_size = 8;
282 |     static constexpr size_t flag_loc = 15;
283 |     // For future use in c++20
284 |     // --- (std::endian::native == std::endian::big) ? 8 : 15;
285 | 
286 |     // Uses short string optimization (SSO).
287 |     // Applied if T_size <= max_sso_size
288 |     // Stores flag in byte 15 (flag_loc) of the small array
289 |     // It assumes the machine is little_endian so this is
290 |     // the high order bits of the size field (n)
291 |     union {
292 |       lg large;
293 |       char small[lg_size]; // for SSO
294 |     } val;
295 | 
296 |     // sets start and size
297 |     void set(value_type* start, size_t sz) {
298 |       val.large.n = sz;
299 |       val.large.s = start;
300 |     }
301 |       
302 |     // marks as empty
303 |     void empty() {set(NULL, 0);}
304 | 
305 |     // is a given size small
306 |     inline bool is_small(size_t sz) const {
307 |       return ((T_size <= max_sso_size) &&
308 | 	      sz < (lg_size/T_size) &&
309 | 	      sz > 0); }
310 | 
311 |     // am I small
312 |     inline bool is_small() const {
313 |       //return is_small(val.small[flag_loc]);
314 |       if (T_size <= max_sso_size) {
315 |       	size_t sz = val.small[flag_loc];
316 |       	return (sz > 0 && sz < (lg_size/T_size));
317 |       }
318 |       return false;
319 |     }
320 |     
321 |     void initialize_elements() {
322 |       if (!std::is_trivially_default_constructible<value_type>::value) 
323 | 	parallel_for(0, size(), [&] (size_t i) {
324 | 	    new ((void*) (begin()+i)) value_type;});
325 |     }
326 | 
327 |     void delete_elements() {
328 |       if (!std::is_trivially_destructible<value_type>::value)
329 | 	parallel_for(0, size(), [&] (size_t i) {
330 | 	    (begin()+i)->~value_type();});
331 |     }
332 | 
333 |     // allocate and set size without initialization
334 |     value_type* alloc_no_init(size_t sz) {
335 |       if (is_small(sz)) {
336 | 	val.small[flag_loc] = sz;
337 | 	return (value_type*) &val.small;
338 |       } else {
339 | 	//T* loc = (sz == 0) ? NULL : pbbs::new_array_no_init<T>(sz);
340 | 	value_type* loc = (sz == 0) ? NULL : Allocator().allocate(sz); 
341 | 	set(loc, sz);
342 | 	return loc;
343 |       }
344 |     }
345 | 
346 |     // allocate and set size with initialization
347 |     value_type* alloc(size_t sz) {
348 |       value_type* loc = alloc_no_init(sz);
349 |       initialize_elements();
350 |       return loc;
351 |     }
352 | 
353 |     // Allocates and copies sequence from random access iterator
354 |     // Only used if not short string optimized.
355 |     template <class Iter>
356 |     void copy_from(Iter a, size_t sz) {
357 |       value_type* start = alloc_no_init(sz); 
358 |       parallel_for(0, sz, [&] (size_t i) {
359 | 	  assign_uninitialized(start[i], a[i]);}, 1000);
360 |     }
361 | 
362 |   };
363 | 
364 |   template <class Iter>
365 |   bool slice_eq(range<Iter> a, range<Iter> b) {
366 |     return a.begin() == b.begin();}
367 | 
368 |   template <class SeqA, class SeqB>
369 |   bool slice_eq(SeqA, SeqB) { return false;}
370 | 
371 |   template <class Seq>
372 |   auto to_sequence(Seq const &s) -> sequence<typename Seq::value_type> {
373 |     using T = typename Seq::value_type;
374 |     return sequence<T>(s.size(), [&] (size_t i) {
375 | 	return s[i];});
376 |   }
377 | 
378 |   template <class F>
379 |   auto seq (size_t n, F f) -> sequence<decltype(f(0))>
380 |   {
381 |     return sequence<decltype(f(0))>(n,f);
382 |   }
383 | 
384 |   std::ostream& operator<<(std::ostream& os, sequence<char> const &s)
385 |   {
386 |     // pad with a zero
387 |     sequence<char> out(s.size()+1, [&] (size_t i) {
388 | 	return i == s.size() ? 0 : s[i];});
389 |     os << out.begin();
390 |     return os;
391 |   }
392 | }
393 | 
394 | 


--------------------------------------------------------------------------------
/wheels/lockfreehash/lprobe/thread_service.h:
--------------------------------------------------------------------------------
  1 | #ifndef THREAD_SERVICE
  2 | #define THREAD_SERVICE
  3 | 
  4 | #include <random>
  5 | #include <array>
  6 | #include "data.h"
  7 | 
  8 | 
  9 | std::atomic<int> miss(0);
 10 | 
 11 | struct WorkerArgs 
 12 | {
 13 |   int    num_elems;
 14 |   // R/I/D weights, normalized to 100
 15 |   int    rweight;
 16 |   int    iweight;
 17 |   int    dweight; 
 18 |   void*  ht_p;
 19 | 
 20 |   bool   remove;
 21 |   int    tid;
 22 |   int    start;
 23 |   int*   elems;
 24 | };
 25 | 
 26 | template<typename T>
 27 | void* thread_service(void* threadArgs)
 28 | {
 29 |   WorkerArgs* args = static_cast<WorkerArgs*>(threadArgs);
 30 | 
 31 |   std::random_device                 rd;
 32 |   std::mt19937                       mt(rd());
 33 |   std::uniform_int_distribution<int> rng;
 34 | 
 35 |   std::array<int, 3> weights;
 36 |   weights[0] = args->rweight;
 37 |   weights[1] = args->iweight;
 38 |   weights[2] = args->dweight;
 39 | 
 40 |   std::default_random_engine         g;
 41 |   std::discrete_distribution<int>    drng(weights.begin(), weights.end());
 42 | 
 43 |   int tid       = args->tid;
 44 |   int num_elems = args->num_elems;
 45 |   T* ht_p = static_cast<T*>(args->ht_p);
 46 | 
 47 |   for (int i = 0; i < num_elems; i++)
 48 |   {
 49 |     // Key, Value pair
 50 |     int k = rng(mt);
 51 |     int v = rng(mt);
 52 |     // Action : 0 -> Search, 1 -> Insert, 2 -> Remove
 53 |     int a = drng(g);
 54 | 
 55 |     if (a == 0)
 56 |       ht_p->find(k);
 57 |     else if (a == 1)
 58 |       ht_p->insert({k, v});
 59 |     else
 60 |       ht_p->deleteVal(k);
 61 |   }
 62 | }
 63 | 
 64 | template<typename T>
 65 | void* thread_service_low_contention(void* threadArgs)
 66 | {
 67 |   WorkerArgs* args = static_cast<WorkerArgs*>(threadArgs);
 68 | 
 69 |   std::random_device                 rd;
 70 |   std::mt19937                       mt(rd());
 71 |   std::uniform_int_distribution<int> rng;
 72 | 
 73 |   std::array<int, 3> weights;
 74 |   weights[0] = args->rweight;
 75 |   weights[1] = args->iweight;
 76 |   weights[2] = args->dweight;
 77 | 
 78 |   std::default_random_engine         g;
 79 |   std::discrete_distribution<int>    drng(weights.begin(), weights.end());
 80 | 
 81 |   int tid       = args->tid;
 82 |   int num_elems = args->num_elems;
 83 |   T* ht_p = static_cast<T*>(args->ht_p);
 84 | 
 85 |   int *keys = (args->elems + args->start);
 86 | 
 87 |   int start = 0;
 88 |   int end = 0;
 89 |   for (int i = 0; i < num_elems; i++)
 90 |   {
 91 |     // Action : 0 -> Search, 1 -> Insert, 2 -> Remove
 92 |     int a = drng(g);
 93 | 
 94 |     if (start == end || a == 1) 
 95 |     {
 96 |       int k = rng(mt) % num_elems + tid * num_elems; 
 97 |       keys[end++] = k;
 98 |       ht_p->insert({k, k});
 99 |     }
100 |     else if (a == 0)
101 |     {
102 |       int k = rng(mt) % (end - start) + start;
103 |       ht_p->find(k);
104 |     }
105 |     else
106 |     {
107 |       int k = keys[start++];
108 |       ht_p->deleteVal(k);
109 |     }
110 |   }
111 | }
112 | 
113 | template<typename T>
114 | void* thread_service_high_contention(void* threadArgs)
115 | {
116 |   WorkerArgs* args = static_cast<WorkerArgs*>(threadArgs);
117 | 
118 |   std::random_device                 rd;
119 |   std::mt19937                       mt(rd());
120 |   std::uniform_int_distribution<int> rng;
121 | 
122 |   std::array<int, 3> weights;
123 |   weights[0] = args->rweight;
124 |   weights[1] = args->iweight;
125 |   weights[2] = args->dweight;
126 | 
127 |   std::default_random_engine         g;
128 |   std::discrete_distribution<int>    drng(weights.begin(), weights.end());
129 | 
130 |   int tid       = args->tid;
131 |   int num_elems = args->num_elems;
132 |   T* ht_p = static_cast<T*>(args->ht_p);
133 | 
134 |   for (int i = 0; i < num_elems; i++)
135 |   {
136 |     ht_p->find(0);
137 | 	}
138 | }
139 | 
140 | template<typename T>
141 | void* thread_checkmiss(void* threadArgs)
142 | {
143 |   WorkerArgs* args = static_cast<WorkerArgs*>(threadArgs);
144 |   int* elems = args->elems;
145 |   T*   ht_p  = static_cast<T*>(args->ht_p);
146 |   int  start     = args->start;
147 |   int  num_elems = args->num_elems;
148 |   int  tid       = args->tid;
149 | 
150 |   for (int i = start; i < start + num_elems; i++)
151 |   {
152 | #if 0 
153 | 		struct KV res = ht_p->find(elems[i]);
154 | 		if (res.k == -1) {
155 | 			++miss;
156 | 			ht_p->insert({elems[i], elems[i]});
157 | 			printf("miss! key is %d\n", elems[i]);
158 | 		}
159 | #endif
160 | 		bool res = ht_p->insert({elems[i], elems[i]});
161 | 		if (res) {
162 | 			++miss;
163 | 			printf("miss!\n");
164 | 		}
165 | 
166 |   }
167 | 
168 | }
169 | 
170 | 
171 | template<typename T>
172 | void* thread_insert(void* threadArgs)
173 | {
174 |   WorkerArgs* args = static_cast<WorkerArgs*>(threadArgs);
175 |   int* elems = args->elems;
176 |   T*   ht_p  = static_cast<T*>(args->ht_p);
177 |   int  start     = args->start;
178 |   int  num_elems = args->num_elems;
179 |   int  tid       = args->tid;
180 | 
181 |   for (int i = start; i < start + num_elems; i++)
182 |   {
183 |     ht_p->insert({elems[i], elems[i]});
184 |   }
185 |   
186 | }
187 | 
188 | template<typename T>
189 | void* thread_remove(void* threadArgs)
190 | {
191 |   WorkerArgs* args = static_cast<WorkerArgs*>(threadArgs);
192 |   int* elems = args->elems;
193 |   T*   ht_p  = static_cast<T*>(args->ht_p);
194 |   int  start     = args->start;
195 |   int  num_elems = args->num_elems;
196 |   int  tid       = args->tid;
197 |   bool remove    = args->remove;
198 |   
199 |   std::random_device                 rd;
200 |   std::mt19937                       mt(rd());
201 |   std::uniform_int_distribution<int> rng(0, 200000 - 1);
202 | 
203 |   for (int i = start; i < start + num_elems; i++)
204 |   {
205 |     if (remove)
206 |       ht_p->deleteVal(elems[i]);
207 |     else
208 |       ht_p->find(elems[rng(mt)]);
209 |   }
210 | 
211 | }
212 | 
213 | #endif
214 | 


--------------------------------------------------------------------------------
/wheels/lockfreehash/lprobe/thread_service_ptr.h:
--------------------------------------------------------------------------------
  1 | #ifndef THREAD_SERVICE
  2 | #define THREAD_SERVICE
  3 | 
  4 | #include <random>
  5 | #include <array>
  6 | #include "data_ptr.h"
  7 | 
  8 | std::atomic<int> miss(0);
  9 | 
 10 | struct WorkerArgs 
 11 | {
 12 |   int    num_elems;
 13 |   // R/I/D weights, normalized to 100
 14 |   int    rweight;
 15 |   int    iweight;
 16 |   int    dweight; 
 17 |   void*  ht_p;
 18 | 
 19 |   bool   remove;
 20 |   int    tid;
 21 |   int    start;
 22 |   int*   elems;
 23 | };
 24 | 
 25 | template<typename T>
 26 | void* thread_service(void* threadArgs)
 27 | {
 28 |   WorkerArgs* args = static_cast<WorkerArgs*>(threadArgs);
 29 | 
 30 |   std::random_device                 rd;
 31 |   std::mt19937                       mt(rd());
 32 |   std::uniform_int_distribution<int> rng;
 33 | 
 34 |   std::array<int, 3> weights;
 35 |   weights[0] = args->rweight;
 36 |   weights[1] = args->iweight;
 37 |   weights[2] = args->dweight;
 38 | 
 39 |   std::default_random_engine         g;
 40 |   std::discrete_distribution<int>    drng(weights.begin(), weights.end());
 41 | 
 42 |   int tid       = args->tid;
 43 |   int num_elems = args->num_elems;
 44 |   T* ht_p = static_cast<T*>(args->ht_p);
 45 | 
 46 |   for (int i = 0; i < num_elems; i++)
 47 |   {
 48 |     // Key, Value pair
 49 |     int k = rng(mt);
 50 |     int v = rng(mt);
 51 |     // Action : 0 -> Search, 1 -> Insert, 2 -> Remove
 52 |     int a = drng(g);
 53 | 
 54 |     if (a == 0)
 55 |       ht_p->find(k);
 56 |     else if (a == 1)
 57 |       ht_p->insert(new struct KV (k, v));
 58 |     else
 59 |       ht_p->deleteVal(k);
 60 |   }
 61 | }
 62 | 
 63 | template<typename T>
 64 | void* thread_service_low_contention(void* threadArgs)
 65 | {
 66 |   WorkerArgs* args = static_cast<WorkerArgs*>(threadArgs);
 67 | 
 68 |   std::random_device                 rd;
 69 |   std::mt19937                       mt(rd());
 70 |   std::uniform_int_distribution<int> rng;
 71 | 
 72 |   std::array<int, 3> weights;
 73 |   weights[0] = args->rweight;
 74 |   weights[1] = args->iweight;
 75 |   weights[2] = args->dweight;
 76 | 
 77 |   std::default_random_engine         g;
 78 |   std::discrete_distribution<int>    drng(weights.begin(), weights.end());
 79 | 
 80 |   int tid       = args->tid;
 81 |   int num_elems = args->num_elems;
 82 |   T* ht_p = static_cast<T*>(args->ht_p);
 83 | 
 84 |   int *keys = (args->elems + args->start);
 85 | 
 86 |   int start = 0;
 87 |   int end = 0;
 88 |   for (int i = 0; i < num_elems; i++)
 89 |   {
 90 |     // Action : 0 -> Search, 1 -> Insert, 2 -> Remove
 91 |     int a = drng(g);
 92 | 
 93 |     if (start == end || a == 1) 
 94 |     {
 95 |       int k = rng(mt) % num_elems + tid * num_elems; 
 96 |       keys[end++] = k;
 97 |       ht_p->insert(new struct KV(k, k));
 98 |     }
 99 |     else if (a == 0)
100 |     {
101 |       int k = rng(mt) % (end - start) + start;
102 |       ht_p->find(k);
103 |     }
104 |     else
105 |     {
106 |       int k = keys[start++];
107 |       ht_p->deleteVal(k);
108 |     }
109 |   }
110 | }
111 | 
112 | template<typename T>
113 | void* thread_service_high_contention(void* threadArgs)
114 | {
115 |   WorkerArgs* args = static_cast<WorkerArgs*>(threadArgs);
116 | 
117 |   std::random_device                 rd;
118 |   std::mt19937                       mt(rd());
119 |   std::uniform_int_distribution<int> rng;
120 | 
121 |   std::array<int, 3> weights;
122 |   weights[0] = args->rweight;
123 |   weights[1] = args->iweight;
124 |   weights[2] = args->dweight;
125 | 
126 |   std::default_random_engine         g;
127 |   std::discrete_distribution<int>    drng(weights.begin(), weights.end());
128 | 
129 |   int tid       = args->tid;
130 |   int num_elems = args->num_elems;
131 |   T* ht_p = static_cast<T*>(args->ht_p);
132 | 
133 |   for (int i = 0; i < num_elems; i++)
134 |   {
135 |     ht_p->find(0);
136 | 	}
137 | }
138 | 
139 | template<typename T>
140 | void* thread_checkmiss(void* threadArgs)
141 | {
142 |   WorkerArgs* args = static_cast<WorkerArgs*>(threadArgs);
143 |   int* elems = args->elems;
144 |   T*   ht_p  = static_cast<T*>(args->ht_p);
145 |   int  start     = args->start;
146 |   int  num_elems = args->num_elems;
147 |   int  tid       = args->tid;
148 | 
149 |   for (int i = start; i < start + num_elems; i++)
150 |   {
151 | #if 1
152 | 		struct KV* res = ht_p->find(elems[i]);
153 | 		if (res == nullptr || res->k == -1) {
154 | 			++miss;
155 | 			ht_p->insert(new struct KV (elems[i], elems[i]));
156 | 			printf("miss!! key is %d\n",elems[i]);
157 | 		}
158 | #endif
159 | #if 0
160 | 		bool res = ht_p->insert(new struct KV(elems[i], elems[i]));
161 | 		if (res) {
162 | 			++miss;
163 | 			printf("miss! key is %d\n",elems[i]);
164 | 		}
165 | #endif
166 |   }
167 | 
168 | }
169 | 
170 | 
171 | template<typename T>
172 | void* thread_insert(void* threadArgs)
173 | {
174 |   WorkerArgs* args = static_cast<WorkerArgs*>(threadArgs);
175 |   int* elems = args->elems;
176 |   T*   ht_p  = static_cast<T*>(args->ht_p);
177 |   int  start     = args->start;
178 |   int  num_elems = args->num_elems;
179 |   int  tid       = args->tid;
180 | 
181 |   for (int i = start; i < start + num_elems; i++)
182 |   {
183 |     ht_p->insert({elems[i], elems[i]});
184 |   }
185 |   
186 | }
187 | 
188 | template<typename T>
189 | void* thread_remove(void* threadArgs)
190 | {
191 |   WorkerArgs* args = static_cast<WorkerArgs*>(threadArgs);
192 |   int* elems = args->elems;
193 |   T*   ht_p  = static_cast<T*>(args->ht_p);
194 |   int  start     = args->start;
195 |   int  num_elems = args->num_elems;
196 |   int  tid       = args->tid;
197 |   bool remove    = args->remove;
198 |   
199 |   std::random_device                 rd;
200 |   std::mt19937                       mt(rd());
201 |   std::uniform_int_distribution<int> rng(0, 200000 - 1);
202 | 
203 |   for (int i = start; i < start + num_elems; i++)
204 |   {
205 |     if (remove)
206 |       ht_p->deleteVal(elems[i]);
207 |     else
208 |       ht_p->find(elems[rng(mt)]);
209 |   }
210 | 
211 | }
212 | 
213 | #endif
214 | 


--------------------------------------------------------------------------------
/wheels/lockfreehash/lprobe/utilities.h:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | 
  3 | #include <iostream>
  4 | #include <ctype.h>
  5 | #include <memory>
  6 | #include <stdlib.h>
  7 | #include <type_traits>
  8 | #include <type_traits>
  9 | #include <math.h>
 10 | #include <atomic>
 11 | #include <cstring>
 12 | #include "parallel.h"
 13 | 
 14 | using std::cout;
 15 | using std::endl;
 16 | 
 17 | template <typename Lf, typename Rf >
 18 | static void par_do_if(bool do_parallel, Lf left, Rf right, bool cons=false) {
 19 |   if (do_parallel) par_do(left, right, cons);
 20 |   else {left(); right();}
 21 | }
 22 | 
 23 | template <typename Lf, typename Mf, typename Rf >
 24 | inline void par_do3(Lf left, Mf mid, Rf right) {
 25 |   auto left_mid = [&] () {par_do(left,mid);};
 26 |   par_do(left_mid, right);
 27 | }
 28 | 
 29 | template <typename Lf, typename Mf, typename Rf >
 30 | static void par_do3_if(bool do_parallel, Lf left, Mf mid, Rf right) {
 31 |   if (do_parallel) par_do3(left, mid, right);
 32 |   else {left(); mid(); right();}
 33 | }
 34 | 
 35 | namespace pbbs {
 36 |   template <class T>
 37 |   size_t log2_up(T);
 38 | }
 39 | 
 40 | template <class T>
 41 | struct maybe {
 42 | 	T value;
 43 | 	bool valid;
 44 | 
 45 | 	maybe(T v, bool u) : value(v) {
 46 | 		valid = u;
 47 | 	}
 48 | 	maybe(T v) : value(v) {
 49 | 		valid = true;
 50 | 	}
 51 | 	maybe() {
 52 | 		valid = false;
 53 | 	}
 54 | 
 55 | 	bool operator !() const {
 56 | 		return !valid;
 57 | 	}
 58 | 	operator bool() const {
 59 | 		return valid;
 60 | 	};
 61 | 	T& operator * () {
 62 | 		return value;
 63 | 	}
 64 | };
 65 | 
 66 | namespace pbbs {
 67 | 
 68 |   struct empty {};
 69 | 
 70 |   typedef uint32_t flags;
 71 |   const flags no_flag = 0;
 72 |   const flags fl_sequential = 1;
 73 |   const flags fl_debug = 2;
 74 |   const flags fl_time = 4;
 75 |   const flags fl_conservative = 8;
 76 |   const flags fl_inplace = 16;
 77 | 
 78 |   template<typename T>
 79 |   inline void assign_uninitialized(T& a, const T& b) {
 80 |     new (static_cast<void*>(std::addressof(a))) T(b);
 81 |   }
 82 | 
 83 |   template<typename T>
 84 |   inline void assign_uninitialized(T& a, T&& b) { 
 85 |     new (static_cast<void*>(std::addressof(a))) T(std::move(b));
 86 |   }
 87 | 
 88 |   template<typename T>
 89 |   inline void move_uninitialized(T& a, const T b) {
 90 |     new (static_cast<void*>(std::addressof(a))) T(std::move(b));
 91 |   }
 92 | 
 93 |   template<typename T>
 94 |   inline void copy_memory(T& a, const T &b) {
 95 |     std::memcpy(&a, &b, sizeof(T));
 96 |   }
 97 | 
 98 |   enum _copy_type { _assign, _move, _copy};
 99 |   
100 |   template<_copy_type copy_type, typename T>
101 |   inline void copy_val(T& a, const T &b) {
102 |     switch (copy_type) {
103 |     case _assign: assign_uninitialized(a, b); break;
104 |     case _move: move_uninitialized(a, b); break;
105 |     case _copy: copy_memory(a,b); break;
106 |     }
107 |   }
108 |   
109 |   // a 32-bit hash function
110 |   inline uint32_t hash32(uint32_t a) {
111 |     a = (a+0x7ed55d16) + (a<<12);
112 |     a = (a^0xc761c23c) ^ (a>>19);
113 |     a = (a+0x165667b1) + (a<<5);
114 |     a = (a+0xd3a2646c) ^ (a<<9);
115 |     a = (a+0xfd7046c5) + (a<<3);
116 |     a = (a^0xb55a4f09) ^ (a>>16);
117 |     return a;
118 |   }
119 | 
120 |   inline uint32_t hash32_2(uint32_t a) {
121 |     uint32_t z = (a + 0x6D2B79F5UL);
122 |     z = (z ^ (z >> 15)) * (z | 1UL);
123 |     z ^= z + (z ^ (z >> 7)) * (z | 61UL);
124 |     return z ^ (z >> 14);
125 |   }
126 | 
127 |   inline uint32_t hash32_3(uint32_t a) {
128 |       uint32_t z = a + 0x9e3779b9;
129 |       z ^= z >> 15; // 16 for murmur3
130 |       z *= 0x85ebca6b;
131 |       z ^= z >> 13;
132 |       z *= 0xc2b2ae3d; // 0xc2b2ae35 for murmur3
133 |       return z ^= z >> 16;
134 |   }
135 | 
136 | 
137 |   // from numerical recipes
138 |   inline uint64_t hash64(uint64_t u )
139 |   {
140 |     uint64_t v = u * 3935559000370003845ul + 2691343689449507681ul;
141 |     v ^= v >> 21;
142 |     v ^= v << 37;
143 |     v ^= v >>  4;
144 |     v *= 4768777513237032717ul;
145 |     v ^= v << 20;
146 |     v ^= v >> 41;
147 |     v ^= v <<  5;
148 |     return v;
149 |   }
150 | 
151 |   // a slightly cheaper, but possibly not as good version
152 |   // based on splitmix64
153 |   inline uint64_t hash64_2(uint64_t x) {
154 |     x = (x ^ (x >> 30)) * UINT64_C(0xbf58476d1ce4e5b9);
155 |     x = (x ^ (x >> 27)) * UINT64_C(0x94d049bb133111eb);
156 |     x = x ^ (x >> 31);
157 |     return x;
158 |   }
159 | 
160 | 
161 |   template <typename ET>
162 |   inline bool atomic_compare_and_swap(ET* a, ET oldval, ET newval) {
163 |     static_assert(sizeof(ET) <= 8, "Bad CAS length");
164 |     if (sizeof(ET) == 1) {
165 |       uint8_t r_oval, r_nval;
166 |       std::memcpy(&r_oval, &oldval, 1);
167 |       std::memcpy(&r_nval, &newval, 1);
168 |       return __sync_bool_compare_and_swap(reinterpret_cast<uint8_t*>(a), r_oval, r_nval);
169 |     } else if (sizeof(ET) == 4) {
170 |       uint32_t r_oval, r_nval;
171 |       std::memcpy(&r_oval, &oldval, 4);
172 |       std::memcpy(&r_nval, &newval, 4);
173 |       return __sync_bool_compare_and_swap(reinterpret_cast<uint32_t*>(a), r_oval, r_nval);
174 |     } else { // if (sizeof(ET) == 8) {
175 |       uint64_t r_oval, r_nval;
176 |       std::memcpy(&r_oval, &oldval, 8);
177 |       std::memcpy(&r_nval, &newval, 8);
178 |       return __sync_bool_compare_and_swap(reinterpret_cast<uint64_t*>(a), r_oval, r_nval);
179 |     } 
180 |   }
181 | 
182 |   template <typename E, typename EV>
183 |   inline E fetch_and_add(E *a, EV b) {
184 |     volatile E newV, oldV;
185 |     do {oldV = *a; newV = oldV + b;}
186 |     while (!atomic_compare_and_swap(a, oldV, newV));
187 |     return oldV;
188 |   }
189 | 
190 |   template <typename E, typename EV>
191 |   inline void write_add(E *a, EV b) {
192 |     //volatile E newV, oldV;
193 |     E newV, oldV;
194 |     do {oldV = *a; newV = oldV + b;}
195 |     while (!atomic_compare_and_swap(a, oldV, newV));
196 |   }
197 | 
198 |   template <typename E, typename EV>
199 |   inline void write_add(std::atomic<E> *a, EV b) {
200 |     //volatile E newV, oldV;
201 |     E newV, oldV;
202 |     do {oldV = a->load(); newV = oldV + b;}
203 |     while (!std::atomic_compare_exchange_strong(a, &oldV, newV));
204 |   }
205 | 
206 |   template <typename ET, typename F>
207 |   inline bool write_min(ET *a, ET b, F less) {
208 |     ET c; bool r=0;
209 |     do c = *a;
210 |     while (less(b,c) && !(r=atomic_compare_and_swap(a,c,b)));
211 |     return r;
212 |   }
213 | 
214 |   template <typename ET, typename F>
215 |   inline bool write_min(std::atomic<ET> *a, ET b, F less) {
216 |     ET c; bool r=0;
217 |     do c = a->load();
218 |     while (less(b,c) && !(r=std::atomic_compare_exchange_strong(a, &c, b)));
219 |     return r;
220 |   }
221 | 
222 |   template <typename ET, typename F>
223 |   inline bool write_max(ET *a, ET b, F less) {
224 |     ET c; bool r=0;
225 |     do c = *a;
226 |     while (less(c,b) && !(r=atomic_compare_and_swap(a,c,b)));
227 |     return r;
228 |   }
229 | 
230 |   template <typename ET, typename F>
231 |   inline bool write_max(std::atomic<ET> *a, ET b, F less) {
232 |     ET c; bool r=0;
233 |     do c = a->load();
234 |     while (less(c,b) && !(r=std::atomic_compare_exchange_strong(a, &c, b)));
235 |     return r;
236 |   }
237 | 
238 |   // returns the log base 2 rounded up (works on ints or longs or unsigned versions)
239 |   template <class T>
240 |   size_t log2_up(T i) {
241 |     size_t a=0;
242 |     T b=i-1;
243 |     while (b > 0) {b = b >> 1; a++;}
244 |     return a;
245 |   }
246 | 
247 |   inline size_t granularity(size_t n) {
248 |     return (n > 100) ? ceil(pow(n,0.5)) : 100;
249 |   }
250 | }
251 | 


--------------------------------------------------------------------------------
/wheels/threadpool/ThreadPool.h:
--------------------------------------------------------------------------------
 1 | #ifndef THREAD_POOL_H
 2 | #define THREAD_POOL_H
 3 | 
 4 | #include <vector>
 5 | #include <queue>
 6 | #include <memory>
 7 | #include <thread>
 8 | #include <mutex>
 9 | #include <condition_variable>
10 | #include <future>
11 | #include <functional>
12 | #include <stdexcept>
13 | 
14 | class ThreadPool {
15 | public:
16 |     ThreadPool(size_t);
17 |     template<class F, class... Args>
18 |     auto enqueue(F&& f, Args&&... args) 
19 |         -> std::future<typename std::result_of<F(Args...)>::type>;
20 |     ~ThreadPool();
21 | private:
22 |     // need to keep track of threads so we can join them
23 |     std::vector< std::thread > workers;
24 |     // the task queue
25 |     std::queue< std::function<void()> > tasks;
26 |     
27 |     // synchronization
28 |     std::mutex queue_mutex;
29 |     std::condition_variable condition;
30 |     bool stop;
31 | };
32 |  
33 | // the constructor just launches some amount of workers
34 | inline ThreadPool::ThreadPool(size_t threads)
35 |     :   stop(false)
36 | {
37 |     for(size_t i = 0;i<threads;++i)
38 |         workers.emplace_back(
39 |             [this]
40 |             {
41 |                 for(;;)
42 |                 {
43 |                     std::function<void()> task;
44 | 
45 |                     {
46 |                         std::unique_lock<std::mutex> lock(this->queue_mutex);
47 |                         this->condition.wait(lock,
48 |                             [this]{ return this->stop || !this->tasks.empty(); });
49 |                         if(this->stop && this->tasks.empty())
50 |                             return;
51 |                         task = std::move(this->tasks.front());
52 |                         this->tasks.pop();
53 |                     }
54 | 
55 |                     task();
56 |                 }
57 |             }
58 |         );
59 | }
60 | 
61 | // add new work item to the pool
62 | template<class F, class... Args>
63 | auto ThreadPool::enqueue(F&& f, Args&&... args) 
64 |     -> std::future<typename std::result_of<F(Args...)>::type>
65 | {
66 |     using return_type = typename std::result_of<F(Args...)>::type;
67 | 
68 |     auto task = std::make_shared< std::packaged_task<return_type()> >(
69 |             std::bind(std::forward<F>(f), std::forward<Args>(args)...)
70 |         );
71 |         
72 |     std::future<return_type> res = task->get_future();
73 |     {
74 |         std::unique_lock<std::mutex> lock(queue_mutex);
75 | 
76 |         // don't allow enqueueing after stopping the pool
77 |         if(stop)
78 |             throw std::runtime_error("enqueue on stopped ThreadPool");
79 | 
80 |         tasks.emplace([task](){ (*task)(); });
81 |     }
82 |     condition.notify_one();
83 |     return res;
84 | }
85 | 
86 | // the destructor joins all threads
87 | inline ThreadPool::~ThreadPool()
88 | {
89 |     {
90 |         std::unique_lock<std::mutex> lock(queue_mutex);
91 |         stop = true;
92 |     }
93 |     condition.notify_all();
94 |     for(std::thread &worker: workers)
95 |         worker.join();
96 | }
97 | 
98 | #endif
99 | 


--------------------------------------------------------------------------------
/wheels/threadpool/ctpl.h:
--------------------------------------------------------------------------------
  1 | 
  2 | /*********************************************************
  3 |  *
  4 |  *  Copyright (C) 2014 by Vitaliy Vitsentiy
  5 |  *
  6 |  *  Licensed under the Apache License, Version 2.0 (the "License");
  7 |  *  you may not use this file except in compliance with the License.
  8 |  *  You may obtain a copy of the License at
  9 |  *
 10 |  *     http://www.apache.org/licenses/LICENSE-2.0
 11 |  *
 12 |  *  Unless required by applicable law or agreed to in writing, software
 13 |  *  distributed under the License is distributed on an "AS IS" BASIS,
 14 |  *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 15 |  *  See the License for the specific language governing permissions and
 16 |  *  limitations under the License.
 17 |  *
 18 |  *********************************************************/
 19 | 
 20 | 
 21 | #ifndef __ctpl_thread_pool_H__
 22 | #define __ctpl_thread_pool_H__
 23 | 
 24 | #include <functional>
 25 | #include <thread>
 26 | #include <atomic>
 27 | #include <vector>
 28 | #include <memory>
 29 | #include <exception>
 30 | #include <future>
 31 | #include <mutex>
 32 | #include <boost/lockfree/queue.hpp>
 33 | 
 34 | 
 35 | #ifndef _ctplThreadPoolLength_
 36 | #define _ctplThreadPoolLength_  1000000
 37 | #endif
 38 | 
 39 | 
 40 | // thread pool to run user's functors with signature
 41 | //      ret func(int id, other_params)
 42 | // where id is the index of the thread that runs the functor
 43 | // ret is some return type
 44 | 
 45 | 
 46 | namespace ctpl {
 47 | 
 48 |     class thread_pool {
 49 | 
 50 |     public:
 51 | 
 52 |         thread_pool() : q(_ctplThreadPoolLength_) { this->init(); }
 53 |         thread_pool(int nThreads, int queueSize = _ctplThreadPoolLength_) : q(queueSize) { this->init(); this->resize(nThreads); }
 54 | 
 55 |         // the destructor waits for all the functions in the queue to be finished
 56 |         ~thread_pool() {
 57 |             this->stop(true);
 58 |         }
 59 | 
 60 |         // get the number of running threads in the pool
 61 |         int size() { return static_cast<int>(this->threads.size()); }
 62 | 
 63 |         // number of idle threads
 64 |         int n_idle() { return this->nWaiting; }
 65 |         std::thread & get_thread(int i) { return *this->threads[i]; }
 66 | 
 67 |         // change the number of threads in the pool
 68 |         // should be called from one thread, otherwise be careful to not interleave, also with this->stop()
 69 |         // nThreads must be >= 0
 70 |         void resize(int nThreads) {
 71 |             if (!this->isStop && !this->isDone) {
 72 |                 int oldNThreads = static_cast<int>(this->threads.size());
 73 |                 if (oldNThreads <= nThreads) {  // if the number of threads is increased
 74 |                     this->threads.resize(nThreads);
 75 |                     this->flags.resize(nThreads);
 76 | 
 77 |                     for (int i = oldNThreads; i < nThreads; ++i) {
 78 |                         this->flags[i] = std::make_shared<std::atomic<bool>>(false);
 79 |                         this->set_thread(i);
 80 |                     }
 81 |                 }
 82 |                 else {  // the number of threads is decreased
 83 |                     for (int i = oldNThreads - 1; i >= nThreads; --i) {
 84 |                         *this->flags[i] = true;  // this thread will finish
 85 |                         this->threads[i]->detach();
 86 |                     }
 87 |                     {
 88 |                         // stop the detached threads that were waiting
 89 |                         std::unique_lock<std::mutex> lock(this->mutex);
 90 |                         this->cv.notify_all();
 91 |                     }
 92 |                     this->threads.resize(nThreads);  // safe to delete because the threads are detached
 93 |                     this->flags.resize(nThreads);  // safe to delete because the threads have copies of shared_ptr of the flags, not originals
 94 |                 }
 95 |             }
 96 |         }
 97 | 
 98 |         // empty the queue
 99 |         void clear_queue() {
100 |             std::function<void(int id)> * _f;
101 |             while (this->q.pop(_f))
102 |                 delete _f;  // empty the queue
103 |         }
104 | 
105 |         // pops a functional wraper to the original function
106 |         std::function<void(int)> pop() {
107 |             std::function<void(int id)> * _f = nullptr;
108 |             this->q.pop(_f);
109 |             std::unique_ptr<std::function<void(int id)>> func(_f);  // at return, delete the function even if an exception occurred
110 |             
111 |             std::function<void(int)> f;
112 |             if (_f)
113 |                 f = *_f;
114 |             return f;
115 |         }
116 | 
117 | 
118 |         // wait for all computing threads to finish and stop all threads
119 |         // may be called asyncronously to not pause the calling thread while waiting
120 |         // if isWait == true, all the functions in the queue are run, otherwise the queue is cleared without running the functions
121 |         void stop(bool isWait = false) {
122 |             if (!isWait) {
123 |                 if (this->isStop)
124 |                     return;
125 |                 this->isStop = true;
126 |                 for (int i = 0, n = this->size(); i < n; ++i) {
127 |                     *this->flags[i] = true;  // command the threads to stop
128 |                 }
129 |                 this->clear_queue();  // empty the queue
130 |             }
131 |             else {
132 |                 if (this->isDone || this->isStop)
133 |                     return;
134 |                 this->isDone = true;  // give the waiting threads a command to finish
135 |             }
136 |             {
137 |                 std::unique_lock<std::mutex> lock(this->mutex);
138 |                 this->cv.notify_all();  // stop all waiting threads
139 |             }
140 |             for (int i = 0; i < static_cast<int>(this->threads.size()); ++i) {  // wait for the computing threads to finish
141 |                 if (this->threads[i]->joinable())
142 |                     this->threads[i]->join();
143 |             }
144 |             // if there were no threads in the pool but some functors in the queue, the functors are not deleted by the threads
145 |             // therefore delete them here
146 |             this->clear_queue();
147 |             this->threads.clear();
148 |             this->flags.clear();
149 |         }
150 | 
151 |         template<typename F, typename... Rest>
152 |         auto push(F && f, Rest&&... rest) ->std::future<decltype(f(0, rest...))> {
153 |             auto pck = std::make_shared<std::packaged_task<decltype(f(0, rest...))(int)>>(
154 |                 std::bind(std::forward<F>(f), std::placeholders::_1, std::forward<Rest>(rest)...)
155 |             );
156 | 
157 |             auto _f = new std::function<void(int id)>([pck](int id) {
158 |                 (*pck)(id);
159 |             });
160 |             this->q.push(_f);
161 | 
162 |             std::unique_lock<std::mutex> lock(this->mutex);
163 |             this->cv.notify_one();
164 | 
165 |             return pck->get_future();
166 |         }
167 | 
168 |         // run the user's function that excepts argument int - id of the running thread. returned value is templatized
169 |         // operator returns std::future, where the user can get the result and rethrow the catched exceptins
170 |         template<typename F>
171 |         auto push(F && f) ->std::future<decltype(f(0))> {
172 |             auto pck = std::make_shared<std::packaged_task<decltype(f(0))(int)>>(std::forward<F>(f));
173 | 
174 |             auto _f = new std::function<void(int id)>([pck](int id) {
175 |                 (*pck)(id);
176 |             });
177 |             this->q.push(_f);
178 | 
179 |             std::unique_lock<std::mutex> lock(this->mutex);
180 |             this->cv.notify_one();
181 | 
182 |             return pck->get_future();
183 |         }
184 | 
185 | 
186 |     private:
187 | 
188 |         // deleted
189 |         thread_pool(const thread_pool &);// = delete;
190 |         thread_pool(thread_pool &&);// = delete;
191 |         thread_pool & operator=(const thread_pool &);// = delete;
192 |         thread_pool & operator=(thread_pool &&);// = delete;
193 | 
194 |         void set_thread(int i) {
195 |             std::shared_ptr<std::atomic<bool>> flag(this->flags[i]);  // a copy of the shared ptr to the flag
196 |             auto f = [this, i, flag/* a copy of the shared ptr to the flag */]() {
197 |                 std::atomic<bool> & _flag = *flag;
198 |                 std::function<void(int id)> * _f;
199 |                 bool isPop = this->q.pop(_f);
200 |                 while (true) {
201 |                     while (isPop) {  // if there is anything in the queue
202 |                         std::unique_ptr<std::function<void(int id)>> func(_f);  // at return, delete the function even if an exception occurred
203 |                         (*_f)(i);
204 | 
205 |                         if (_flag)
206 |                             return;  // the thread is wanted to stop, return even if the queue is not empty yet
207 |                         else
208 |                             isPop = this->q.pop(_f);
209 |                     }
210 | 
211 |                     // the queue is empty here, wait for the next command
212 |                     std::unique_lock<std::mutex> lock(this->mutex);
213 |                     ++this->nWaiting;
214 |                     this->cv.wait(lock, [this, &_f, &isPop, &_flag](){ isPop = this->q.pop(_f); return isPop || this->isDone || _flag; });
215 |                     --this->nWaiting;
216 | 
217 |                     if (!isPop)
218 |                         return;  // if the queue is empty and this->isDone == true or *flag then return
219 |                 }
220 |             };
221 |             this->threads[i].reset(new std::thread(f));  // compiler may not support std::make_unique()
222 |         }
223 | 
224 |         void init() { this->nWaiting = 0; this->isStop = false; this->isDone = false; }
225 | 
226 |         std::vector<std::unique_ptr<std::thread>> threads;
227 |         std::vector<std::shared_ptr<std::atomic<bool>>> flags;
228 |         mutable boost::lockfree::queue<std::function<void(int id)> *> q;
229 |         std::atomic<bool> isDone;
230 |         std::atomic<bool> isStop;
231 |         std::atomic<int> nWaiting;  // how many threads are waiting
232 | 
233 |         std::mutex mutex;
234 |         std::condition_variable cv;
235 |     };
236 | 
237 | }
238 | 
239 | #endif // __ctpl_thread_pool_H__
240 | 
241 | 
242 | 


--------------------------------------------------------------------------------
/wheels/threadpool/threadpool_example.cpp:
--------------------------------------------------------------------------------
  1 | #include <iostream>
  2 | #include <vector>
  3 | #include <chrono>
  4 | 
  5 | #include "ctpl.h"
  6 | #include <sys/types.h>
  7 | #include <sys/stat.h>
  8 | #include <sys/mman.h> /* mmap() is defined in this header */
  9 | #include <fcntl.h>
 10 | #include <unistd.h>
 11 | #include <string.h>
 12 | 
 13 | 
 14 | struct SArg {
 15 | 	int x;
 16 | 	int y;
 17 | };
 18 | 
 19 | 
 20 | 
 21 | ctpl::thread_pool* pool;
 22 | ctpl::thread_pool* spool;
 23 | 
 24 | int some_function(int id, struct SArg *arg) {
 25 | 	std::cout << "hello " << arg->x << std::endl;
 26 | 	std::this_thread::sleep_for(std::chrono::seconds(1));
 27 | 	std::cout << "world " << arg->y << std::endl;
 28 | 	return arg->y;
 29 | }
 30 | 
 31 | static void generate_input(uint64_t fid) {
 32 | 		char path[1000];
 33 | 		std::string __output_dir = "/hyper/fuzz/tmp";
 34 | 		std::string output_file = std::string(__output_dir) + "/" + 
 35 | 			std::to_string(fid) + "-id";
 36 | 		//std::string input_file = std::string(__output_dir) + "/" + taint_file;
 37 | 		std::string input_file =  "/home/cju/e2e/filter_des/0-id";
 38 | 		//std::cout << "out file is " << output_file << std::endl;
 39 | 		struct stat statbuf;
 40 | 		void *src, *dst;
 41 | 		int fdout, fdin;
 42 | 		int mode = 0x777;
 43 | 		
 44 | 		/* open the input file */
 45 | 		if ((fdin = open (input_file.c_str(), O_RDONLY)) < 0)
 46 | 		{
 47 | 			//assert(false && "can't open file for reading");
 48 | 			printf("cannot open input file!\n");
 49 | 			return;
 50 | 		}
 51 | 
 52 | 		/* open/create the output file */
 53 | 		if ((fdout = open (output_file.c_str(), O_RDWR | O_CREAT | O_TRUNC, mode)) < 0)//edited here
 54 | 		{
 55 | 			//assert(false && "can't create file for writing");
 56 | 			return;
 57 | 		}
 58 | 
 59 | 		/* find size of input file */
 60 | 		if (fstat (fdin,&statbuf) < 0)
 61 | 		{
 62 | 			//assert (false && "fstat error");
 63 | 			close(fdin);
 64 | 			return;
 65 | 		}	
 66 | 
 67 | 		/* mmap the input file */
 68 | 		if ((src = mmap (0, statbuf.st_size, PROT_READ, MAP_SHARED, fdin, 0))
 69 | 				== (caddr_t) -1) {
 70 | 			close(fdin);
 71 | 			return;
 72 | 		}
 73 | 
 74 | 		dst = malloc(statbuf.st_size);
 75 | 
 76 | 		/* this copies the input file to the output file */
 77 | 		memcpy (dst, src, statbuf.st_size);
 78 | 		for (int i=0;i<4;i++) {
 79 | 			((uint8_t*)dst)[i] = i;
 80 | 			//printf("generate_input index is %u and value is %u\n", it->first,(uint32_t)it->second);
 81 | 		}
 82 | 
 83 | 		if (write(fdout, dst, statbuf.st_size) < 0) {
 84 | 			return;
 85 | 		}
 86 | 
 87 | 		close(fdin);
 88 | 		close(fdout);
 89 | 		free(dst);
 90 | }
 91 | 
 92 | std::atomic<uint64_t> id(0);
 93 | std::atomic<uint64_t> count(0);
 94 | 
 95 | void addAll(int i) {
 96 | 	generate_input(++id);
 97 | 	count++;
 98 | 	std::cout << "task count is " << count << std::endl;
 99 | 	//uint64_t sum = 0;
100 | 	//std::this_thread::sleep_for (std::chrono::milliseconds(10));
101 | /*
102 | 	for(int i=0;i<10000000;i++) {
103 | 		sum += i;	
104 | 	}
105 | */
106 | //	return sum;
107 | }
108 | 
109 | void task(int i) {
110 | 	spool->push(addAll);	
111 | }
112 | 
113 | 
114 | 
115 | int main(int argc, char** argv)
116 | {
117 | 		int num_of_threads = 0;
118 | 		if (sscanf (argv[1], "%i", &num_of_threads) != 1) {
119 | 			fprintf(stderr, "error - not an integer");
120 | 		}
121 |   pool = new ctpl::thread_pool(num_of_threads);
122 | 	spool = new ctpl::thread_pool(num_of_threads);
123 | 	std::vector< std::future<uint64_t> > results;
124 | 
125 | 	for(int i = 0; i < 10000; ++i) {
126 | 				pool->push(task);
127 | 	}
128 | 
129 | 	//std::cout <<"check results" << std::endl;
130 | 	//for(auto && result: results)
131 | 	//	std::cout << result.get() << ' ';
132 | 	//std::cout << std::endl;
133 | 	//delete pool;
134 | 	spool->stop(true);
135 | 	pool->stop(true);
136 | 
137 | 	return 0;
138 | }
139 | 


--------------------------------------------------------------------------------