├── .gitignore
├── .gitmodules
├── CMakeLists.txt
├── Dockerfile
├── LICENSE.md
├── README.md
├── example
    ├── CMakeLists.txt
    ├── example1.cpp
    └── hybrid_timer.h
├── src
    ├── CMakeLists.txt
    ├── avx-utility.h
    ├── bitvector.cpp
    ├── bitvector.h
    ├── bitvector_block.cpp
    ├── bitvector_block.h
    ├── bitvector_iterator.cpp
    ├── bitvector_iterator.h
    ├── byteslice_column_block.cpp
    ├── byteslice_column_block.h
    ├── column.cpp
    ├── column.h
    ├── column_block.h
    ├── macros.h
    ├── naive_column_block.cpp
    ├── naive_column_block.h
    ├── param.h
    ├── sequential_binary_file.cpp
    ├── sequential_binary_file.h
    ├── types.cpp
    └── types.h
└── tests
    ├── CMakeLists.txt
    ├── avx-utility_test.cpp
    ├── bitvector_block_test.cpp
    ├── bitvector_iterator_test.cpp
    ├── bitvector_test.cpp
    ├── byteslice_column_block_test.cpp
    └── column_test.cpp


/.gitignore:
--------------------------------------------------------------------------------
 1 | *
 2 | !*/
 3 | !*.cpp
 4 | !*.h
 5 | !*.c
 6 | !*.cc
 7 | !*.gp
 8 | !*.sh
 9 | !*.awk
10 | !CMakeLists.txt
11 | 


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "third-party/googletest"]
2 | 	path = third-party/googletest
3 | 	url = https://github.com/google/googletest.git
4 | 


--------------------------------------------------------------------------------
/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 2.8 FATAL_ERROR)
 2 | 
 3 | project (ByteSlice)
 4 | 
 5 | 
 6 | set(warnings "-Wall -Werror")
 7 | set(misc "-mavx2 -m64 -std=c++11 -fopenmp")
 8 | 
 9 | # Set default build type as debug
10 | if(NOT CMAKE_BUILD_TYPE)
11 |     set(CMAKE_BUILD_TYPE Debug CACHE STRING "Default build type: Debug." FORCE)
12 | endif()
13 | 
14 | if(NOT CONFIGURED_ONCE)
15 |     set(CMAKE_CXX_FLAGS "${warnings} ${misc}"
16 |         CACHE STRING "Flags used by the compiler during all build types." FORCE)
17 |     set(CMAKE_CXX_FLAGS_DEBUG "-g -ggdb -O0" 
18 |         CACHE STRING "Flags used by the complier during debug build type." FORCE)
19 |     set(CMAKE_CXX_FLAGS_RELEASE "-O3 -D NDEBUG" 
20 |         CACHE STRING "Flags used by the compiler during release build type." FORCE)
21 |     set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-O3 -g -ggdb -D NDEBUG" 
22 |         CACHE STRING "Flags used by the compiler during relwithdebinfo build type." FORCE)
23 | endif()
24 | 
25 | 
26 | include_directories("${CMAKE_SOURCE_DIR}")
27 | add_subdirectory(src)
28 | add_subdirectory(example)
29 | 
30 | 
31 | #############################
32 | ##  Set up test
33 | ############################
34 | enable_testing()
35 | SET(testlog "tests/tests.log")
36 | add_custom_target(check-build)
37 | add_custom_target(check 
38 |     COMMAND ${CMAKE_CTEST_COMMAND} 
39 |         --output-on-failure 
40 |         --output-log ${testlog}
41 |     COMMENT "Test log is written to ${testlog}"
42 | )
43 | add_dependencies(check check-build)
44 | add_subdirectory(third-party/googletest/googletest EXCLUDE_FROM_ALL)
45 | add_subdirectory(tests EXCLUDE_FROM_ALL)  
46 | 
47 | 
48 | set(CONFIGURED_ONCE TRUE CACHE INTERNAL "A flag showing that CMake has configured at least once.")
49 | 
50 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM ubuntu:trusty
 2 | MAINTAINER Ziqiang Feng <zf@cs.cmu.edu>
 3 | 
 4 | RUN apt-get update && apt-get install -y \
 5 |     git \
 6 |     cmake \
 7 |     g++ \
 8 |     && apt-get clean \
 9 |     && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
10 | 
11 | ADD . /root/ByteSlice
12 | WORKDIR /root/ByteSlice
13 | RUN mkdir release
14 | RUN cd release && cmake -DCMAKE_BUILD_TYPE=release .. && make -j4
15 | 
16 | CMD /root/ByteSlice/release/example/example1 -s 16000000
17 | 
18 | 


--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
  1 | 
  2 |                                  Apache License
  3 |                            Version 2.0, January 2004
  4 |                         http://www.apache.org/licenses/
  5 | 
  6 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  7 | 
  8 |    1. Definitions.
  9 | 
 10 |       "License" shall mean the terms and conditions for use, reproduction,
 11 |       and distribution as defined by Sections 1 through 9 of this document.
 12 | 
 13 |       "Licensor" shall mean the copyright owner or entity authorized by
 14 |       the copyright owner that is granting the License.
 15 | 
 16 |       "Legal Entity" shall mean the union of the acting entity and all
 17 |       other entities that control, are controlled by, or are under common
 18 |       control with that entity. For the purposes of this definition,
 19 |       "control" means (i) the power, direct or indirect, to cause the
 20 |       direction or management of such entity, whether by contract or
 21 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 22 |       outstanding shares, or (iii) beneficial ownership of such entity.
 23 | 
 24 |       "You" (or "Your") shall mean an individual or Legal Entity
 25 |       exercising permissions granted by this License.
 26 | 
 27 |       "Source" form shall mean the preferred form for making modifications,
 28 |       including but not limited to software source code, documentation
 29 |       source, and configuration files.
 30 | 
 31 |       "Object" form shall mean any form resulting from mechanical
 32 |       transformation or translation of a Source form, including but
 33 |       not limited to compiled object code, generated documentation,
 34 |       and conversions to other media types.
 35 | 
 36 |       "Work" shall mean the work of authorship, whether in Source or
 37 |       Object form, made available under the License, as indicated by a
 38 |       copyright notice that is included in or attached to the work
 39 |       (an example is provided in the Appendix below).
 40 | 
 41 |       "Derivative Works" shall mean any work, whether in Source or Object
 42 |       form, that is based on (or derived from) the Work and for which the
 43 |       editorial revisions, annotations, elaborations, or other modifications
 44 |       represent, as a whole, an original work of authorship. For the purposes
 45 |       of this License, Derivative Works shall not include works that remain
 46 |       separable from, or merely link (or bind by name) to the interfaces of,
 47 |       the Work and Derivative Works thereof.
 48 | 
 49 |       "Contribution" shall mean any work of authorship, including
 50 |       the original version of the Work and any modifications or additions
 51 |       to that Work or Derivative Works thereof, that is intentionally
 52 |       submitted to Licensor for inclusion in the Work by the copyright owner
 53 |       or by an individual or Legal Entity authorized to submit on behalf of
 54 |       the copyright owner. For the purposes of this definition, "submitted"
 55 |       means any form of electronic, verbal, or written communication sent
 56 |       to the Licensor or its representatives, including but not limited to
 57 |       communication on electronic mailing lists, source code control systems,
 58 |       and issue tracking systems that are managed by, or on behalf of, the
 59 |       Licensor for the purpose of discussing and improving the Work, but
 60 |       excluding communication that is conspicuously marked or otherwise
 61 |       designated in writing by the copyright owner as "Not a Contribution."
 62 | 
 63 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 64 |       on behalf of whom a Contribution has been received by Licensor and
 65 |       subsequently incorporated within the Work.
 66 | 
 67 |    2. Grant of Copyright License. Subject to the terms and conditions of
 68 |       this License, each Contributor hereby grants to You a perpetual,
 69 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 70 |       copyright license to reproduce, prepare Derivative Works of,
 71 |       publicly display, publicly perform, sublicense, and distribute the
 72 |       Work and such Derivative Works in Source or Object form.
 73 | 
 74 |    3. Grant of Patent License. Subject to the terms and conditions of
 75 |       this License, each Contributor hereby grants to You a perpetual,
 76 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 77 |       (except as stated in this section) patent license to make, have made,
 78 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 79 |       where such license applies only to those patent claims licensable
 80 |       by such Contributor that are necessarily infringed by their
 81 |       Contribution(s) alone or by combination of their Contribution(s)
 82 |       with the Work to which such Contribution(s) was submitted. If You
 83 |       institute patent litigation against any entity (including a
 84 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 85 |       or a Contribution incorporated within the Work constitutes direct
 86 |       or contributory patent infringement, then any patent licenses
 87 |       granted to You under this License for that Work shall terminate
 88 |       as of the date such litigation is filed.
 89 | 
 90 |    4. Redistribution. You may reproduce and distribute copies of the
 91 |       Work or Derivative Works thereof in any medium, with or without
 92 |       modifications, and in Source or Object form, provided that You
 93 |       meet the following conditions:
 94 | 
 95 |       (a) You must give any other recipients of the Work or
 96 |           Derivative Works a copy of this License; and
 97 | 
 98 |       (b) You must cause any modified files to carry prominent notices
 99 |           stating that You changed the files; and
100 | 
101 |       (c) You must retain, in the Source form of any Derivative Works
102 |           that You distribute, all copyright, patent, trademark, and
103 |           attribution notices from the Source form of the Work,
104 |           excluding those notices that do not pertain to any part of
105 |           the Derivative Works; and
106 | 
107 |       (d) If the Work includes a "NOTICE" text file as part of its
108 |           distribution, then any Derivative Works that You distribute must
109 |           include a readable copy of the attribution notices contained
110 |           within such NOTICE file, excluding those notices that do not
111 |           pertain to any part of the Derivative Works, in at least one
112 |           of the following places: within a NOTICE text file distributed
113 |           as part of the Derivative Works; within the Source form or
114 |           documentation, if provided along with the Derivative Works; or,
115 |           within a display generated by the Derivative Works, if and
116 |           wherever such third-party notices normally appear. The contents
117 |           of the NOTICE file are for informational purposes only and
118 |           do not modify the License. You may add Your own attribution
119 |           notices within Derivative Works that You distribute, alongside
120 |           or as an addendum to the NOTICE text from the Work, provided
121 |           that such additional attribution notices cannot be construed
122 |           as modifying the License.
123 | 
124 |       You may add Your own copyright statement to Your modifications and
125 |       may provide additional or different license terms and conditions
126 |       for use, reproduction, or distribution of Your modifications, or
127 |       for any such Derivative Works as a whole, provided Your use,
128 |       reproduction, and distribution of the Work otherwise complies with
129 |       the conditions stated in this License.
130 | 
131 |    5. Submission of Contributions. Unless You explicitly state otherwise,
132 |       any Contribution intentionally submitted for inclusion in the Work
133 |       by You to the Licensor shall be under the terms and conditions of
134 |       this License, without any additional terms or conditions.
135 |       Notwithstanding the above, nothing herein shall supersede or modify
136 |       the terms of any separate license agreement you may have executed
137 |       with Licensor regarding such Contributions.
138 | 
139 |    6. Trademarks. This License does not grant permission to use the trade
140 |       names, trademarks, service marks, or product names of the Licensor,
141 |       except as required for reasonable and customary use in describing the
142 |       origin of the Work and reproducing the content of the NOTICE file.
143 | 
144 |    7. Disclaimer of Warranty. Unless required by applicable law or
145 |       agreed to in writing, Licensor provides the Work (and each
146 |       Contributor provides its Contributions) on an "AS IS" BASIS,
147 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
148 |       implied, including, without limitation, any warranties or conditions
149 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
150 |       PARTICULAR PURPOSE. You are solely responsible for determining the
151 |       appropriateness of using or redistributing the Work and assume any
152 |       risks associated with Your exercise of permissions under this License.
153 | 
154 |    8. Limitation of Liability. In no event and under no legal theory,
155 |       whether in tort (including negligence), contract, or otherwise,
156 |       unless required by applicable law (such as deliberate and grossly
157 |       negligent acts) or agreed to in writing, shall any Contributor be
158 |       liable to You for damages, including any direct, indirect, special,
159 |       incidental, or consequential damages of any character arising as a
160 |       result of this License or out of the use or inability to use the
161 |       Work (including but not limited to damages for loss of goodwill,
162 |       work stoppage, computer failure or malfunction, or any and all
163 |       other commercial damages or losses), even if such Contributor
164 |       has been advised of the possibility of such damages.
165 | 
166 |    9. Accepting Warranty or Additional Liability. While redistributing
167 |       the Work or Derivative Works thereof, You may choose to offer,
168 |       and charge a fee for, acceptance of support, warranty, indemnity,
169 |       or other liability obligations and/or rights consistent with this
170 |       License. However, in accepting such obligations, You may act only
171 |       on Your own behalf and on Your sole responsibility, not on behalf
172 |       of any other Contributor, and only if You agree to indemnify,
173 |       defend, and hold each Contributor harmless for any liability
174 |       incurred by, or claims asserted against, such Contributor by reason
175 |       of your accepting any such warranty or additional liability.
176 | 
177 |    END OF TERMS AND CONDITIONS
178 | 
179 |    APPENDIX: How to apply the Apache License to your work.
180 | 
181 |       To apply the Apache License to your work, attach the following
182 |       boilerplate notice, with the fields enclosed by brackets "[]"
183 |       replaced with your own identifying information. (Don't include
184 |       the brackets!)  The text should be enclosed in the appropriate
185 |       comment syntax for the file format. We also recommend that a
186 |       file or class name and description of purpose be included on the
187 |       same "printed page" as the copyright notice for easier
188 |       identification within third-party archives.
189 | 
190 |    Copyright [yyyy] [name of copyright owner]
191 | 
192 |    Licensed under the Apache License, Version 2.0 (the "License");
193 |    you may not use this file except in compliance with the License.
194 |    You may obtain a copy of the License at
195 | 
196 |        http://www.apache.org/licenses/LICENSE-2.0
197 | 
198 |    Unless required by applicable law or agreed to in writing, software
199 |    distributed under the License is distributed on an "AS IS" BASIS,
200 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
201 |    See the License for the specific language governing permissions and
202 |    limitations under the License.
203 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | **ByteSlice** is a main-memory data format for fixed length unsigned
  2 | integers, and attributes that can be encoded as such (e.g., age, datetime). 
  3 | It is primarily designed for highly efficient
  4 | ordinal comparison based *scan* and *lookup* in **column-store databases**. The basic idea is to
  5 | chop column values into multiple bytes and store the bytes at different
  6 | contiguous memory spaces.
  7 | 
  8 | The implementation heavily utilizes Single-Instruction-Multiple-Data
  9 | (**SIMD**) instruction sets on modern CPUs to achieve bare-metal speed
 10 | processing. The scan algorithms are optimized to reduce number of
 11 | instructions, memory footprint, branch mis-predictions and other
 12 | performance-critical factors.
 13 | 
 14 | 
 15 | # Using the library
 16 | 
 17 | A quick glimpse:
 18 | 
 19 | ```c++
 20 | // Create a column of two million 12-bit values in ByteSlice format
 21 | Column* column = new Column(ColumnType::kByteSlicePadRight, 12, 2*1024*1024);
 22 | // Prepare a bit vector to store scan results
 23 | BitVector* bitvector = new BitVector(column);
 24 | // Execute scan on the column with predicate value < 3
 25 | column->Scan(Comparator::kLess,
 26 |             3,
 27 |             bitvector,
 28 |             Bitwise::kSet);
 29 | 
 30 | ```
 31 | 
 32 | 
 33 | # Build from source
 34 | 
 35 | ## Clone
 36 | 
 37 | ```bash
 38 | git clone --recursive https://github.com/fzqneo/ByteSlice.git
 39 | ```
 40 | 
 41 | Or this after cloning without `--recursive`:
 42 | 
 43 | ```bash
 44 | git submodule update --init --recursive
 45 | ```
 46 | 
 47 | 
 48 | ## Build
 49 | 
 50 | You need [CMake](https://cmake.org/) to generate build scripts. Makefile is tested.
 51 | 
 52 | To generate debug build:
 53 | 
 54 | ```bash
 55 | mkdir debug
 56 | cd debug
 57 | cmake -DCMAKE_BUILD_TYPE=debug ..
 58 | make -j4
 59 | ```
 60 | 
 61 | To generate release build:
 62 | 
 63 | ```bash
 64 | mkdir release
 65 | cd release
 66 | cmake -DCMAKE_BUILD_TYPE=release ..
 67 | make -j4
 68 | ```
 69 | 
 70 | NOTE: The default build type is `debug`, which may not give optimal
 71 | performance.
 72 | 
 73 | 
 74 | # Running examples
 75 | 
 76 | Example programs are in 'example/' directory.
 77 | 
 78 | ```bash
 79 | example/example1 -s 10000000
 80 | ```
 81 | 
 82 | To see a full list of options:
 83 | 
 84 | ```bash
 85 | example/example1 -h
 86 | ```
 87 | 
 88 | NOTE: The source code of example program showcases how to use the library.
 89 | 
 90 | 
 91 | # Multithreading
 92 | 
 93 | Multithreading is controlled by OpenMP environment variables: (assume
 94 | you use GCC)
 95 | 
 96 | ```bash
 97 | OMP_NUM_THREADS=2 ./example/example1
 98 | ```
 99 | 
100 | NOTE: The default number of threads depends on the system, which is
101 | usually the number of cores. You may also want to set the thread
102 | affinity via GOMP_CPU_AFFINITY (assume you use GCC).
103 | 
104 | 
105 | # Running tests
106 | 
107 | ```bash
108 | make check
109 | ```
110 | 
111 | Build tests without running.
112 | 
113 | ```bash
114 | make check-build
115 | ```
116 | 
117 | 
118 | #  Documentation (work in progress)
119 | 
120 | You need doxygen to generate documentations in html and latex.
121 | 
122 | ```bash
123 |  doxygen
124 | ```
125 | 
126 | 
127 | # File structure
128 | 
129 | + `example/` - Example programs
130 | 
131 | + `third-party/` - Third-party libraries
132 | 
133 | + `src/` - ByteSlice library source files
134 | 
135 | + `tests/` - Unit tests written in GoogleTest framework
136 | 
137 | 
138 | # Run examples in Docker
139 | 
140 | A compiled release-build is contained in the Docker image [zf01/byteslice](https://hub.docker.com/r/zf01/byteslice/).
141 | You need to install [Docker](https://www.docker.com/).
142 | 
143 | Run with default parameters:
144 | ```bash
145 | docker run --rm zf01/byteslice
146 | ```
147 | 
148 | Run with custom parameters:
149 | ```bash
150 | docker run --rm -it zf01/byteslice /bin/bash
151 | OMP_NUM_THREADS=1 /root/ByteSlice/release/example/example1 -s 16000000 -b 17
152 | ```
153 | 
154 | ## Build Docker image from source
155 | 
156 | ```bash
157 | # Run inside the project directory
158 | docker build -t byteslice .
159 | ```
160 | 
161 | 
162 | # Citing this work
163 | 
164 | Ziqiang Feng, Eric Lo, Ben Kao, and Wenjian Xu. "**Byteslice: Pushing
165 | the envelop of main memory data processing with a new storage layout.**"
166 | In Proceedings of the 2015 ACM SIGMOD International Conference on
167 | Management of Data, pp. 31-46. ACM, 2015.
168 | 
169 | Download: http://dl.acm.org/citation.cfm?id=2747642
170 | 
171 | BibTex:
172 | ```
173 | @inproceedings{Feng:2015:BPE:2723372.2747642,
174 |  author = {Feng, Ziqiang and Lo, Eric and Kao, Ben and Xu, Wenjian},
175 |  title = {ByteSlice: Pushing the Envelop of Main Memory Data Processing with a New Storage Layout},
176 |  booktitle = {Proceedings of the 2015 ACM SIGMOD International Conference on Management of Data},
177 |  series = {SIGMOD '15},
178 |  year = {2015},
179 |  isbn = {978-1-4503-2758-9},
180 |  location = {Melbourne, Victoria, Australia},
181 |  pages = {31--46},
182 |  numpages = {16},
183 |  url = {http://doi.acm.org/10.1145/2723372.2747642},
184 |  doi = {10.1145/2723372.2747642},
185 |  acmid = {2747642},
186 |  publisher = {ACM},
187 |  address = {New York, NY, USA},
188 |  keywords = {column store, main memory, olap, simd, storage layout},
189 | } 
190 | ```
191 | 
192 | 
193 | # Contact
194 | 
195 | Ziqiang Feng ( zf at cs dot cmu dot edu )
196 | 
197 | 
198 | # Platform requirements
199 | 
200 | 1. C++ compiler supporting C++11, OpenMP and AVX2
201 | 2. CPU with AVX2 instruction set extension
202 | 
203 | 
204 | # Tested platform
205 | 
206 | This package has been tested with the following configuration:
207 | 
208 | - Linux 3.13.0-66-generic (64-bit)
209 | - Intel(R) Core(TM) i7-4770 CPU @ 3.40GHz
210 | - g++ 4.9.3
211 | 
212 | 
213 | # Known issues
214 | 
215 | 1. `posix_memalign()` is used in some files, causing compilation failure
216 |    on Windows.
217 | 


--------------------------------------------------------------------------------
/example/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | list(APPEND examples
 2 |     example1
 3 |     )
 4 | 
 5 | foreach(ee ${examples})
 6 |     add_executable(${ee} "${ee}.cpp")
 7 |     target_link_libraries(${ee} byteslice-core)
 8 | endforeach()
 9 | 
10 | 


--------------------------------------------------------------------------------
/example/example1.cpp:
--------------------------------------------------------------------------------
  1 | #include    <iostream>
  2 | #include    <unistd.h>
  3 | #include    <string>
  4 | #include    <cstdlib>
  5 | #include    <ctime>
  6 | #include    <omp.h>
  7 | #include    <map>
  8 | #include    <random>
  9 | #include    <functional>
 10 | 
 11 | #include "src/bitvector.h"
 12 | #include "src/column.h"
 13 | #include "src/types.h"
 14 | 
 15 | #include "hybrid_timer.h"
 16 | 
 17 | using namespace byteslice;
 18 | 
 19 | std::map<std::string, ColumnType> ctypeMap = {
 20 |     {"bs",  ColumnType::kByteSlicePadRight},
 21 |     {"na",  ColumnType::kNaive}
 22 | };
 23 | 
 24 | typedef struct {
 25 |     ColumnType  coltype = ColumnType::kByteSlicePadRight;
 26 |     size_t      size    = 16*1024*1024;
 27 |     size_t      nbits   = 12;
 28 |     double      selectivity = 0.1;
 29 |     size_t      repeat  = 3;
 30 | } arg_t;
 31 | 
 32 | 
 33 | void parse_arg(arg_t &arg, int &argc, char** &argv);
 34 | void print_arg(const arg_t& arg);
 35 |     
 36 | int main(int argc, char* argv[]){
 37 |     arg_t arg;
 38 |     parse_arg(arg, argc, argv);
 39 |     
 40 |     std::cout << "[INFO ] Creating column ..." << std::endl;
 41 |     Column* column = new Column(arg.coltype, arg.nbits, arg.size);
 42 |     std::cout << "[INFO ] Creating bit vector ..." << std::endl;
 43 |     BitVector* bitvector = new BitVector(column);
 44 |     
 45 |     std::cout << "[INFO ] Populating column with random values ..." << std::endl;
 46 |     auto dice = std::bind(std::uniform_int_distribution<WordUnit>(
 47 |                             std::numeric_limits<WordUnit>::min(),
 48 |                             std::numeric_limits<WordUnit>::max()),
 49 |                             std::default_random_engine(std::time(0)));
 50 |     WordUnit mask = (1ULL << arg.nbits) - 1;
 51 |     for(size_t i=0; i < arg.size; i++){
 52 |         column->SetTuple(i, dice() & mask);
 53 |     }
 54 |     
 55 |     std::cout << "[INFO ] omp_max_threads = " << omp_get_max_threads() << std::endl;
 56 |     std::cout << "[INFO ] Executing scan ..." << std::endl;
 57 |     HybridTimer t1;
 58 |     t1.Start();
 59 |     for(size_t r = 0; r < arg.repeat; r++){
 60 |         column->Scan(Comparator::kLess,
 61 |                     static_cast<WordUnit>(mask*arg.selectivity),
 62 |                     bitvector,
 63 |                     Bitwise::kSet);
 64 |     }
 65 |     t1.Stop();
 66 |     
 67 |     std::cout << "Wall time (sec), CPU cost (cycle/value)" << std::endl;
 68 |     std::cout << t1.GetSeconds()/arg.repeat << ", "
 69 |                 << double(t1.GetNumCycles()/arg.repeat)/arg.size
 70 |                 << std::endl;
 71 |                 
 72 |     std::cout << "[INFO ] Releasing memory ..." << std::endl;
 73 |     delete column;
 74 |     delete bitvector;
 75 | }
 76 | 
 77 | 
 78 | void parse_arg(arg_t &arg, int &argc, char** &argv){
 79 |     int c;
 80 |     std::string s;
 81 |     while((c = getopt(argc, argv, "t:s:b:y:r:h")) != -1){
 82 |         switch(c){
 83 |             case 'h':
 84 |                 std::cout << "Usage: " << argv[0]
 85 |                 << " [-t <column type = na|bs>]"
 86 |                 << " [-s <size (number of rows)>]"
 87 |                 << " [-b <bit width>]"
 88 |                 << " [-y <selectivity>]"
 89 |                 << " [-r <repeat>]"
 90 |                 << std::endl;
 91 |                 exit(0);
 92 |             case 't':
 93 |                 s = std::string(optarg);
 94 |                 if(ctypeMap.find(s) == ctypeMap.end()){
 95 |                     std::cerr << "Unknown column type: " << s << std::endl;
 96 |                     exit(1);
 97 |                 }
 98 |                 else{
 99 |                     arg.coltype = ctypeMap[s];
100 |                 }
101 |                 break;
102 |             case 's':
103 |                 arg.size = atoi(optarg);
104 |                 break;
105 |             case 'b':
106 |                 arg.nbits = atoi(optarg);
107 |                 break;
108 |             case 'y':
109 |                 arg.selectivity = atof(optarg);
110 |                 break;
111 |             case 'r':
112 |                 arg.repeat = atoi(optarg);
113 |                 break;
114 |         }
115 |     }
116 |     
117 |     print_arg(arg);   
118 | }
119 | 
120 | void print_arg(const arg_t& arg){
121 |     std::cout
122 |     << "[INFO ] column type = "  << arg.coltype  << std::endl
123 |     << "[INFO ] table size = "   << arg.size     << std::endl
124 |     << "[INFO ] bit width = "    << arg.nbits    << std::endl
125 |     << "[INFO ] selectivity = "  << arg.selectivity  << std::endl
126 |     << "[INFO ] repeat = "       << arg.repeat   << std::endl;
127 | }
128 | 


--------------------------------------------------------------------------------
/example/hybrid_timer.h:
--------------------------------------------------------------------------------
 1 | #ifndef _HYBRID_TIMER_H_
 2 | #define _HYBRID_TIMER_H_
 3 | 
 4 | #include <sys/time.h>
 5 | 
 6 | /*
 7 |    A hybrid timer that reports both the number of CPU cycles and wall clock time (in seconds).
 8 |    It uses the RDTSC instruction to obtain CPU cycles,
 9 |    and uses gettimeofday() to calculate elapsed seconds.
10 | */
11 | 
12 | typedef unsigned long long cycle_t;
13 | 
14 | static unsigned long long rdtsc(void);
15 | 
16 | class HybridTimer{
17 | public:
18 |     void Start();
19 |     void Stop();
20 | 
21 |     cycle_t GetNumCycles() const{
22 | 	return cycles_;
23 |     }
24 |     double GetSeconds() const{
25 | 	return seconds_;
26 |     }
27 | 
28 | private:
29 |     cycle_t cycles_;
30 |     double seconds_;
31 |     struct timeval time_;
32 | };
33 | 
34 | inline void HybridTimer::Start(){
35 |     cycles_ = rdtsc();
36 |     gettimeofday(&time_, NULL);
37 | }
38 | 
39 | inline void HybridTimer::Stop(){
40 |     cycles_ = rdtsc() - cycles_;
41 |     
42 |     struct timeval endtime;
43 |     gettimeofday(&endtime, NULL);
44 |     seconds_ = 1.0*(endtime.tv_sec - time_.tv_sec) + 1.0*(endtime.tv_usec - time_.tv_usec)/1000000;
45 | }
46 | 
47 | 
48 | //Getting CPU cycles with RDTSC instructions
49 | #if defined(__i386__)
50 | 
51 | static __inline__ unsigned long long rdtsc(void)
52 | {
53 |   unsigned long long int x;
54 |      __asm__ volatile (".byte 0x0f, 0x31" : "=A" (x));
55 |      return x;
56 | }
57 | #elif defined(__x86_64__)
58 | 
59 | static __inline__ unsigned long long rdtsc(void)
60 | {
61 |   unsigned hi, lo;
62 |   __asm__ __volatile__ ("rdtsc" : "=a"(lo), "=d"(hi));
63 |   return ( (unsigned long long)lo)|( ((unsigned long long)hi)<<32 );
64 | }
65 | 
66 | #elif defined(__powerpc__)
67 | 
68 | static __inline__ unsigned long long rdtsc(void)
69 | {
70 |   unsigned long long int result=0;
71 |   unsigned long int upper, lower,tmp;
72 |   __asm__ volatile(
73 |                 "0:                  \n"
74 |                 "\tmftbu   %0           \n"
75 |                 "\tmftb    %1           \n"
76 |                 "\tmftbu   %2           \n"
77 |                 "\tcmpw    %2,%0        \n"
78 |                 "\tbne     0b         \n"
79 |                 : "=r"(upper),"=r"(lower),"=r"(tmp)
80 |                 );
81 |   result = upper;
82 |   result = result<<32;
83 |   result = result|lower;
84 | 
85 |   return(result);
86 | }
87 | 
88 | #else
89 | 
90 | #error "No tick counter is available!"
91 | 
92 | #endif
93 | 
94 | #endif	//_HYBRID_TIMER_H_
95 | 
96 | 


--------------------------------------------------------------------------------
/src/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | list(APPEND byteslice-core_sources
 2 |     bitvector_block.cpp
 3 |     bitvector_iterator.cpp
 4 |     bitvector.cpp
 5 |     byteslice_column_block.cpp
 6 |     column.cpp
 7 |     naive_column_block.cpp
 8 |     sequential_binary_file.cpp
 9 |     types.cpp
10 |     )
11 | 
12 | add_library(byteslice-core STATIC ${byteslice-core_sources})
13 | 


--------------------------------------------------------------------------------
/src/avx-utility.h:
--------------------------------------------------------------------------------
  1 | /*******************************************************************************
  2 |  * Copyright (c) 2015
  3 |  * The Hong Kong Polytechnic University, Database Group
  4 |  *
  5 |  * Author: Ziqiang Feng (cszqfeng AT comp DOT polyu.edu.hk)
  6 |  *
  7 |  * See file LICENSE.md for details.
  8 |  *******************************************************************************/
  9 | #ifndef AVX_UTILITY_H
 10 | #define AVX_UTILITY_H
 11 | 
 12 | #include    <cstdint>
 13 | #include    <x86intrin.h>
 14 | 
 15 | namespace byteslice{
 16 | 
 17 | 
 18 | /* T should be uint8/16/32/64_t */
 19 | 
 20 | template <typename T>
 21 | inline T FLIP(T value){
 22 |     constexpr T offset = (static_cast<T>(1) << (sizeof(T)*8 - 1));
 23 |     return static_cast<T>(value ^ offset);
 24 | }
 25 | 
 26 | // Compare less
 27 | template <typename T>
 28 | inline __m256i avx_cmplt(const __m256i &a, const __m256i &b){
 29 |     switch(sizeof(T)){
 30 |         case 1:
 31 |             return _mm256_cmpgt_epi8(b, a);
 32 |         case 2:
 33 |             return _mm256_cmpgt_epi16(b, a);
 34 |         case 4:
 35 |             return _mm256_cmpgt_epi32(b, a);
 36 |         case 8:
 37 |             return _mm256_cmpgt_epi64(b, a);
 38 |     }
 39 | }
 40 | 
 41 | // Compare greater
 42 | template <typename T>
 43 | inline __m256i avx_cmpgt(const __m256i &a, const __m256i &b){
 44 |     switch(sizeof(T)){
 45 |         case 1:
 46 |             return _mm256_cmpgt_epi8(a, b);
 47 |         case 2:
 48 |             return _mm256_cmpgt_epi16(a, b);
 49 |         case 4:
 50 |             return _mm256_cmpgt_epi32(a, b);
 51 |         case 8:
 52 |             return _mm256_cmpgt_epi64(a, b);
 53 |     }
 54 | }
 55 | 
 56 | // Compare equal
 57 | template <typename T>
 58 | inline __m256i avx_cmpeq(const __m256i &a, const __m256i &b){
 59 |     switch(sizeof(T)){
 60 |         case 1:
 61 |             return _mm256_cmpeq_epi8(b, a);
 62 |         case 2:
 63 |             return _mm256_cmpeq_epi16(b, a);
 64 |         case 4:
 65 |             return _mm256_cmpeq_epi32(b, a);
 66 |         case 8:
 67 |             return _mm256_cmpeq_epi64(b, a);
 68 |     }
 69 | }
 70 | 
 71 | // Set1
 72 | template <typename T>
 73 | inline __m256i avx_set1(T a){
 74 |     switch(sizeof(T)){
 75 |         case 1:
 76 |             return _mm256_set1_epi8(static_cast<int8_t>(a));
 77 |         case 2:
 78 |             return _mm256_set1_epi16(static_cast<int16_t>(a));
 79 |         case 4:
 80 |             return _mm256_set1_epi32(static_cast<int32_t>(a));
 81 |         case 8:
 82 |             return _mm256_set1_epi64x(static_cast<int64_t>(a));
 83 |     }
 84 | }
 85 | 
 86 | // Zero
 87 | inline __m256i avx_zero(){
 88 |     return _mm256_setzero_si256();
 89 | }
 90 | 
 91 | // All ones
 92 | inline __m256i avx_ones(){
 93 |     return _mm256_set1_epi64x(-1ULL);
 94 | }
 95 | 
 96 | // Bitwise AND
 97 | inline __m256i avx_and(const __m256i &a, const __m256i &b){
 98 |     return _mm256_and_si256(a, b);
 99 | }
100 | 
101 | // Bitwise OR
102 | inline __m256i avx_or(const __m256i &a, const __m256i &b){
103 |     return _mm256_or_si256(a, b);
104 | }
105 | 
106 | // Bitwise XOR
107 | inline __m256i avx_xor(const __m256i &a, const __m256i &b){
108 |     return _mm256_xor_si256(a, b);
109 | }
110 | 
111 | // Bitwise NOT
112 | inline __m256i avx_not(const __m256i &a){
113 |     return _mm256_xor_si256(a, avx_ones());
114 | }
115 | 
116 | // Bitwise (NOT a) AND b
117 | inline __m256i avx_andnot(const __m256i &a, const __m256i &b){
118 |     return _mm256_andnot_si256(a, b);
119 | }
120 | 
121 | // Test is zero
122 | inline bool avx_iszero(const __m256i &a){
123 |     return _mm256_testz_si256(a, a);
124 | }
125 | 
126 | 
127 | }   // namespace
128 | 
129 | #endif  //AVX_UTILITY_H
130 | 


--------------------------------------------------------------------------------
/src/bitvector.cpp:
--------------------------------------------------------------------------------
  1 | /*******************************************************************************
  2 |  * Copyright (c) 2015
  3 |  * The Hong Kong Polytechnic University, Database Group
  4 |  *
  5 |  * Author: Ziqiang Feng (cszqfeng AT comp DOT polyu.edu.hk)
  6 |  *
  7 |  * See file LICENSE.md for details.
  8 |  *******************************************************************************/
  9 | #include "bitvector.h"
 10 | 
 11 | #include    <algorithm>
 12 | #include    <omp.h>
 13 | 
 14 | namespace byteslice{
 15 | 
 16 | BitVector::BitVector(const Column* column):
 17 |     BitVector(column->GetNumTuples()){
 18 | }
 19 | 
 20 | BitVector::BitVector(size_t num):
 21 |     num_(num){
 22 | 
 23 |     for(size_t count=0; count < num_; count += kNumTuplesPerBlock){
 24 |         BitVectorBlock* new_block = 
 25 |             new BitVectorBlock(std::min(kNumTuplesPerBlock, num_ - count));
 26 |         blocks_.push_back(new_block);
 27 |     }
 28 |     SetOnes();
 29 | }
 30 | 
 31 | 
 32 | BitVector::~BitVector(){
 33 |     while(!blocks_.empty()){
 34 |         delete blocks_.back();
 35 |         blocks_.pop_back();
 36 |     }
 37 | }
 38 | 
 39 | void BitVector::And(const BitVector* bitvector){
 40 |     assert(num_ == bitvector->num_);
 41 | 
 42 | #   pragma omp parallel for schedule(dynamic)
 43 |     for(size_t i=0; i < blocks_.size(); i++){
 44 |         blocks_[i]->And(bitvector->GetBVBlock(i));
 45 |     }
 46 | }
 47 | 
 48 | void BitVector::Or(const BitVector* bitvector){
 49 |     assert(num_ == bitvector->num_);
 50 | 
 51 | #   pragma omp parallel for schedule(dynamic)
 52 |     for(size_t i=0; i < blocks_.size(); i++){
 53 |         blocks_[i]->Or(bitvector->GetBVBlock(i));
 54 |     }
 55 | }
 56 | 
 57 | 
 58 | void BitVector::SetOnes(){
 59 | #   pragma omp parallel for schedule(dynamic)
 60 |     for(size_t i=0; i < blocks_.size(); i++){
 61 |         blocks_[i]->SetOnes();
 62 |     }
 63 | }
 64 | 
 65 | void BitVector::SetZeros(){
 66 | #   pragma omp parallel for schedule(dynamic)
 67 |     for(size_t i=0; i < blocks_.size(); i++){
 68 |         blocks_[i]->SetZeros();
 69 |     }
 70 | }
 71 | 
 72 | size_t BitVector::CountOnes() const{
 73 |     size_t count = 0;
 74 | #   pragma omp parallel for schedule(dynamic) reduction(+: count)
 75 |     for(size_t i=0; i < blocks_.size(); i++){
 76 |         count += blocks_[i]->CountOnes();
 77 |     }
 78 |     return count;
 79 | }
 80 | 
 81 | bool BitVector::GetBit(size_t pos){
 82 |     size_t block_id = pos / kNumTuplesPerBlock;
 83 |     size_t pos_in_block = pos % kNumTuplesPerBlock;
 84 |     return blocks_[block_id]->GetBit(pos_in_block);
 85 | }
 86 | 
 87 | void BitVector::SetBit(size_t pos){
 88 |     size_t block_id = pos / kNumTuplesPerBlock;
 89 |     size_t pos_in_block = pos % kNumTuplesPerBlock;
 90 |     blocks_[block_id]->SetBit(pos_in_block);
 91 | }
 92 | 
 93 | void BitVector::UnsetBit(size_t pos){
 94 |     size_t block_id = pos / kNumTuplesPerBlock;
 95 |     size_t pos_in_block = pos % kNumTuplesPerBlock;
 96 |     blocks_[block_id]->UnsetBit(pos_in_block);
 97 | }
 98 | 
 99 | }   // namespace
100 | 


--------------------------------------------------------------------------------
/src/bitvector.h:
--------------------------------------------------------------------------------
 1 | /*******************************************************************************
 2 |  * Copyright (c) 2015
 3 |  * The Hong Kong Polytechnic University, Database Group
 4 |  *
 5 |  * Author: Ziqiang Feng (cszqfeng AT comp DOT polyu.edu.hk)
 6 |  *
 7 |  * See file LICENSE.md for details.
 8 |  *******************************************************************************/
 9 | #ifndef BITVECTOR_H
10 | #define BITVECTOR_H
11 | 
12 | #include    <vector>
13 | 
14 | #include "../src/bitvector_block.h"
15 | #include "../src/column.h"
16 | #include "../src/types.h"
17 | 
18 | namespace byteslice{
19 | 
20 | /**
21 |     Notice: BitVector is created based on a column. We don't resize BitVectors.
22 | */
23 | 
24 | class Column;
25 | 
26 | class BitVector{
27 | /*
28 |    The bit vector (blocks) are guaranteed to be 32-byte aligned
29 |    so that it can also be used with 256-bit AVX instruction
30 | */
31 | public:
32 |     BitVector(const Column* column);
33 |     BitVector(size_t num);
34 |     ~BitVector();
35 | 
36 |     void SetOnes();
37 |     void SetZeros();
38 |     size_t CountOnes() const;
39 | 
40 |     //bitwise combination
41 |     void And(const BitVector* bitvector);
42 |     void Or(const BitVector* bitvector);
43 | 
44 |     //bit manipulation
45 |     bool GetBit(size_t pos);
46 |     void SetBit(size_t pos);
47 |     void UnsetBit(size_t pos);
48 | 
49 |     //accessors
50 |     size_t num() const;
51 |     size_t GetNumBlocks() const;
52 |     BitVectorBlock* GetBVBlock(size_t id) const;
53 | 
54 | private:
55 |     std::vector<BitVectorBlock*> blocks_;
56 |     const size_t num_;
57 | 
58 | };
59 | 
60 | inline size_t BitVector::num() const{
61 |     return num_;
62 | }
63 | 
64 | inline size_t BitVector::GetNumBlocks() const{
65 |     return blocks_.size();
66 | }
67 | 
68 | 
69 | inline BitVectorBlock* BitVector::GetBVBlock(size_t id) const{
70 |     return blocks_[id];
71 | }
72 | 
73 | }   // namespace
74 | 
75 | #endif  //BITVECTOR_H
76 | 


--------------------------------------------------------------------------------
/src/bitvector_block.cpp:
--------------------------------------------------------------------------------
  1 | /*******************************************************************************
  2 |  * Copyright (c) 2015
  3 |  * The Hong Kong Polytechnic University, Database Group
  4 |  *
  5 |  * Author: Ziqiang Feng (cszqfeng AT comp DOT polyu.edu.hk)
  6 |  *
  7 |  * See file LICENSE.md for details.
  8 |  *******************************************************************************/
  9 | #include "bitvector_block.h"
 10 | 
 11 | #include	<cassert>
 12 | #include    <cstdlib>
 13 | #include    <cstring>
 14 | 
 15 | namespace byteslice{
 16 | 
 17 | BitVectorBlock::BitVectorBlock(size_t num):
 18 |     num_(num), num_word_units_(CEIL(num, kNumAvxBits)*(kNumAvxBits/kNumWordBits)){
 19 |     assert(num_ <= kNumTuplesPerBlock);
 20 |     // always allocate a full-block's storage
 21 |     size_t count = posix_memalign((void**)&data_, 32, sizeof(WordUnit)*CEIL(kNumTuplesPerBlock, kNumWordBits));
 22 |     (void)count;
 23 |     SetOnes();
 24 | }
 25 | 
 26 | BitVectorBlock::~BitVectorBlock(){
 27 |     free(data_);
 28 | }
 29 | 
 30 | bool BitVectorBlock::GetBit(size_t pos){
 31 |     size_t word_id = pos / kNumWordBits;
 32 |     size_t offset = pos % kNumWordBits;
 33 |     WordUnit mask = 1ULL << offset;
 34 |     return (data_[word_id] & mask);
 35 | }
 36 | 
 37 | void BitVectorBlock::SetBit(size_t pos){
 38 |     size_t word_id = pos / kNumWordBits;
 39 |     size_t offset = pos % kNumWordBits;
 40 |     WordUnit mask = 1ULL << offset;
 41 |     data_[word_id] |= mask;
 42 | }
 43 | 
 44 | void BitVectorBlock::UnsetBit(size_t pos){
 45 |     size_t word_id = pos / kNumWordBits;
 46 |     size_t offset = pos % kNumWordBits;
 47 |     WordUnit mask = 1ULL << offset;
 48 |     data_[word_id] &= ~mask;
 49 | }
 50 | 
 51 | void BitVectorBlock::SetOnes(){
 52 |     memset(data_, 0xff, sizeof(WordUnit)*num_word_units_);
 53 |     ClearTail();
 54 | }
 55 | 
 56 | void BitVectorBlock::SetZeros(){
 57 |     memset(data_, 0x0, sizeof(WordUnit)*num_word_units_);
 58 | }
 59 | 
 60 | size_t BitVectorBlock::CountOnes(){
 61 |     size_t count = 0;
 62 |     for(size_t i=0; i<num_word_units_; i++){
 63 |         //count += _mm_popcnt_u64(data_[i]);
 64 |         count += POPCNT64(data_[i]);
 65 |     }
 66 |     return count;
 67 | }
 68 | 
 69 | void BitVectorBlock::And(const BitVectorBlock* block){
 70 |     for(size_t i=0; i<num_word_units_; i++){
 71 |         data_[i] &= block->GetWordUnit(i);
 72 |     }
 73 |     ClearTail();
 74 | }
 75 | 
 76 | void BitVectorBlock::Or(const BitVectorBlock* block){
 77 |     for(size_t i=0; i<num_word_units_; i++){
 78 |         data_[i] |= block->GetWordUnit(i);
 79 |     }
 80 |     ClearTail();
 81 | }
 82 | 
 83 | void BitVectorBlock::Set(const BitVectorBlock* block){
 84 |     for(size_t i=0; i<num_word_units_; i++){
 85 |         data_[i] = block->GetWordUnit(i);
 86 |     }
 87 |     ClearTail();
 88 | } 
 89 | 
 90 | 
 91 | void BitVectorBlock::ClearTail(){
 92 |     //I may have to clear up to 4 WordUnit
 93 |     size_t num_empty = kNumAvxBits - (num_ % kNumAvxBits);
 94 |     if(kNumAvxBits != num_empty){
 95 |         size_t i = num_word_units_ - 1;
 96 |         while(num_empty >= kNumWordBits){
 97 |             data_[i] = 0;
 98 |             num_empty -= kNumWordBits;
 99 |             i--;
100 |         }
101 |         data_[i] &= (-1ULL >> num_empty);
102 |     }
103 | 
104 | }
105 | 
106 | 
107 | }
108 | 


--------------------------------------------------------------------------------
/src/bitvector_block.h:
--------------------------------------------------------------------------------
 1 | /*******************************************************************************
 2 |  * Copyright (c) 2015
 3 |  * The Hong Kong Polytechnic University, Database Group
 4 |  *
 5 |  * Author: Ziqiang Feng (cszqfeng AT comp DOT polyu.edu.hk)
 6 |  *
 7 |  * See file LICENSE.md for details.
 8 |  *******************************************************************************/
 9 | #ifndef _BITVECTOR_BLOCK_H_
10 | #define _BITVECTOR_BLOCK_H_
11 | 
12 | #include "../src/macros.h"
13 | #include "../src/param.h"
14 | #include "../src/types.h"
15 | 
16 | namespace byteslice{
17 | 
18 | /**
19 |     Warning: 
20 |     Pay attention to the bit sequence:
21 |     Every 64 tuples are place in an uint64_t unit.
22 |     Within the word unit, tuple with SMALLER Id is
23 |     placed at LOWER (less significant) bits.
24 |     DO NOT interprent the bit vector with different
25 |     data types, otherwise you may fall into the
26 |     big-/little-endian pitfall.
27 | */
28 | class BitVectorBlock{
29 | /*
30 |    The bit vector block is guaranteed to be 32-byte aligned
31 |    and the number of WordUnit is guaranteed to be
32 |    a multiple of AVX registers
33 | */
34 | public:
35 |     BitVectorBlock(size_t num);
36 |     ~BitVectorBlock();
37 |     void SetOnes();
38 |     void SetZeros();
39 |     size_t CountOnes();
40 |     void ClearTail();
41 |     void And(const BitVectorBlock* block);
42 |     void Or(const BitVectorBlock* block);
43 |     void Set(const BitVectorBlock* block);
44 | 
45 |     //bit manipulation
46 |     bool GetBit(size_t pos);
47 |     void SetBit(size_t pos);
48 |     void UnsetBit(size_t pos);
49 | 
50 |     //mutators
51 |     void SetWordUnit(WordUnit word, size_t pos);
52 |     void SetAvxUnit(AvxUnit avxunit, size_t start_word_pos);
53 | 
54 |     //accessors
55 |     WordUnit GetWordUnit(size_t pos) const;
56 |     AvxUnit GetAvxUnit(size_t start_word_pos) const;
57 |     size_t num() const;
58 |     size_t num_word_units() const;
59 | 
60 | 
61 | private:
62 |     WordUnit* data_ = NULL;
63 |     size_t num_;
64 |     size_t num_word_units_;
65 | 
66 | };
67 | 
68 | //mutators
69 | inline void BitVectorBlock::SetWordUnit(WordUnit word, size_t pos){
70 |     data_[pos] = word;
71 | }
72 | inline void BitVectorBlock::SetAvxUnit(AvxUnit avxunit, size_t start_word_pos){
73 |     _mm256_storeu_si256((__m256i*)(data_+start_word_pos), avxunit);
74 | }
75 | 
76 | //accessors
77 | inline WordUnit BitVectorBlock::GetWordUnit(size_t pos) const{
78 |     return data_[pos];
79 | }
80 | 
81 | inline AvxUnit BitVectorBlock::GetAvxUnit(size_t start_word_pos) const{
82 |     return _mm256_lddqu_si256((__m256i*)(data_+start_word_pos));
83 | }
84 | 
85 | inline size_t BitVectorBlock::num() const{
86 |     return num_;
87 | }
88 | 
89 | inline size_t BitVectorBlock::num_word_units() const{
90 |     return num_word_units_;
91 | }
92 | 
93 | 
94 | }   // namespace
95 | 
96 | #endif  //BITVECTOR_BLOCK_H
97 | 


--------------------------------------------------------------------------------
/src/bitvector_iterator.cpp:
--------------------------------------------------------------------------------
 1 | /*******************************************************************************
 2 |  * Copyright (c) 2015
 3 |  * The Hong Kong Polytechnic University, Database Group
 4 |  *
 5 |  * Author: Ziqiang Feng (cszqfeng AT comp DOT polyu.edu.hk)
 6 |  *
 7 |  * See file LICENSE.md for details.
 8 |  *******************************************************************************/
 9 | #include "bitvector_iterator.h"
10 | 
11 | namespace byteslice{
12 | 
13 | BitVectorIterator::BitVectorIterator(const BitVector *bitvector):
14 |     bitvector_(bitvector),
15 |     cur_block_(bitvector_->GetBVBlock(0)){
16 | }
17 | 
18 | 
19 | BitVectorIterator::~BitVectorIterator(){
20 | }
21 | 
22 | 
23 | }   // namespace
24 | 


--------------------------------------------------------------------------------
/src/bitvector_iterator.h:
--------------------------------------------------------------------------------
 1 | /*******************************************************************************
 2 |  * Copyright (c) 2015
 3 |  * The Hong Kong Polytechnic University, Database Group
 4 |  *
 5 |  * Author: Ziqiang Feng (cszqfeng AT comp DOT polyu.edu.hk)
 6 |  *
 7 |  * See file LICENSE.md for details.
 8 |  *******************************************************************************/
 9 | #ifndef BITVECTOR_ITERATOR_H
10 | #define BITVECTOR_ITERATOR_H
11 | 
12 | #include "../src/bitvector.h"
13 | 
14 | namespace byteslice{
15 | 
16 | /**
17 |   Given a BitVector, extract the positions corresponding to the 1's
18 | */
19 | 
20 | class BitVector;
21 | class BitVectorBlock;
22 | 
23 | class BitVectorIterator{
24 | public:
25 |     BitVectorIterator(const BitVector *bitvector);
26 |     ~BitVectorIterator();
27 |     bool Next();    //Move the cursor to the next 1, return true if next exists
28 |     size_t GetPosition();   //Return the position of the cursor
29 | 
30 | private:
31 |     const BitVector *bitvector_;
32 |     size_t stack_[kNumWordBits];
33 |     size_t stack_top_ = 1;  //*top* is the available position to push in new item
34 | 
35 |     //These cursors mark the word unit that is TO BE CONSIDERED, i.e., NOT considered yet.
36 |     size_t cur_block_id_ = 0;
37 |     size_t cur_word_id_ = 0;
38 |     BitVectorBlock* cur_block_ = NULL;
39 |     size_t block_offset_ = 0;
40 | 
41 | };
42 | 
43 | inline size_t BitVectorIterator::GetPosition(){
44 |     return stack_[stack_top_ - 1];
45 | }
46 | 
47 | inline bool BitVectorIterator::Next(){
48 |     stack_top_--;
49 |     //Need to do heavy work only when stack is empty
50 |     if(0 == stack_top_){
51 |         WordUnit word = 0;
52 |         do{
53 |             //first, make sure the cursor is valid
54 |             //advance the cursor if appropriate
55 |             if(cur_word_id_ >= cur_block_->num_word_units()){
56 |                 //all words in this block are exhausted, proceed to next block
57 |                 cur_word_id_ = 0;
58 |                 cur_block_id_++;
59 |                 block_offset_ += cur_block_->num();
60 |                 //it's possible that we pass the last block
61 |                 if(cur_block_id_ >= bitvector_->GetNumBlocks()){ //all BV blocks are exhausted
62 |                     return false;
63 |                 }
64 |                 cur_block_ = bitvector_->GetBVBlock(cur_block_id_);
65 |             }
66 |             word = cur_block_->GetWordUnit(cur_word_id_);
67 |             cur_word_id_++;
68 |         }
69 |         while(0 == word);
70 | 
71 |         //Extract positions from this non-zero bit-vector word
72 |         //Attention: because we are using a *stack*,
73 |         //we must push larger positions first
74 |         size_t offset = block_offset_ + (cur_word_id_-1)*kNumWordBits;
75 |         for(size_t bit = kNumWordBits - 1; bit < kNumWordBits; bit--){
76 |             //standard technique to reduce branch miss
77 |             stack_[stack_top_] = offset + bit;
78 |             stack_top_ += ((word >> bit) & 1ULL);
79 |         }
80 | 
81 |     }
82 | 
83 |     return true;
84 | }
85 | 
86 | 
87 | }   // namespace
88 | 
89 | #endif //BITVECTOR_ITERATOR_H
90 | 


--------------------------------------------------------------------------------
/src/byteslice_column_block.cpp:
--------------------------------------------------------------------------------
  1 | /*******************************************************************************
  2 |  * Copyright (c) 2015
  3 |  * The Hong Kong Polytechnic University, Database Group
  4 |  *
  5 |  * Author: Ziqiang Feng (cszqfeng AT comp DOT polyu.edu.hk)
  6 |  *
  7 |  * See file LICENSE.md for details.
  8 |  *******************************************************************************/
  9 | #include "byteslice_column_block.h"
 10 | 
 11 | #include	<cassert>
 12 | #include    <cstdlib>
 13 | #include    <cstring>
 14 | 
 15 | #include "avx-utility.h"
 16 | 
 17 | namespace byteslice{
 18 |     
 19 | #ifdef      NEARLYSTOP
 20 | #warning    "Early-stop is disabled in ByteSliceColumnBlock!"
 21 | #endif
 22 | 
 23 | static constexpr size_t kPrefetchDistance = 512*2;
 24 | 
 25 | template <size_t BIT_WIDTH, Direction PDIRECTION>
 26 | ByteSliceColumnBlock<BIT_WIDTH, PDIRECTION>::ByteSliceColumnBlock(size_t num):
 27 |     ColumnBlock(
 28 |             PDIRECTION==Direction::kLeft ? 
 29 |                 ColumnType::kByteSlicePadLeft:ColumnType::kByteSlicePadRight, 
 30 |             BIT_WIDTH, 
 31 |             num)    
 32 | {
 33 |     //allocate memory space
 34 |     assert(num <= kNumTuplesPerBlock);
 35 |     for(size_t i=0; i < kNumBytesPerCode; i++){
 36 |         size_t ret = posix_memalign((void**)&data_[i], 32, kMemSizePerByteSlice);                    
 37 |         (void)ret;
 38 |         memset(data_[i], 0x0, kMemSizePerByteSlice);
 39 |     }
 40 | 
 41 | }
 42 | 
 43 | template <size_t BIT_WIDTH, Direction PDIRECTION>
 44 | ByteSliceColumnBlock<BIT_WIDTH, PDIRECTION>::~ByteSliceColumnBlock(){
 45 |     for(size_t i=0; i < kNumBytesPerCode; i++){
 46 |         free(data_[i]);
 47 |     }
 48 | }
 49 | 
 50 | template <size_t BIT_WIDTH, Direction PDIRECTION>
 51 | bool ByteSliceColumnBlock<BIT_WIDTH, PDIRECTION>::Resize(size_t num){
 52 |     num_tuples_ = num;
 53 |     return true;
 54 | }
 55 | 
 56 | template <size_t BIT_WIDTH, Direction PDIRECTION>
 57 | void ByteSliceColumnBlock<BIT_WIDTH, PDIRECTION>::
 58 |                     SerToFile(SequentialWriteBinaryFile &file) const{
 59 |     file.Append(&num_tuples_, sizeof(num_tuples_));
 60 |     for(size_t byte_id = 0; byte_id < kNumBytesPerCode; byte_id++){
 61 |         file.Append(data_[byte_id], kMemSizePerByteSlice);
 62 |     }
 63 | }
 64 | 
 65 | template <size_t BIT_WIDTH, Direction PDIRECTION>
 66 | void ByteSliceColumnBlock<BIT_WIDTH, PDIRECTION>::
 67 |                     DeserFromFile(const SequentialReadBinaryFile &file){
 68 |     file.Read(&num_tuples_, sizeof(num_tuples_));
 69 |     for(size_t byte_id = 0; byte_id < kNumBytesPerCode; byte_id++){
 70 |         file.Read(data_[byte_id], kMemSizePerByteSlice);
 71 |     }
 72 | }
 73 | 
 74 | 
 75 | //Scan against literal
 76 | template <size_t BIT_WIDTH, Direction PDIRECTION>
 77 | void ByteSliceColumnBlock<BIT_WIDTH, PDIRECTION>::Scan(Comparator comparator,
 78 |         WordUnit literal, BitVectorBlock* bvblock, Bitwise bit_opt) const{
 79 |     assert(bvblock->num() == num_tuples_);
 80 |     switch(comparator){
 81 |         case Comparator::kLess:
 82 |             return ScanHelper1<Comparator::kLess>(literal, bvblock, bit_opt);
 83 |         case Comparator::kGreater:
 84 |             return ScanHelper1<Comparator::kGreater>(literal, bvblock, bit_opt);
 85 |         case Comparator::kLessEqual:
 86 |             return ScanHelper1<Comparator::kLessEqual>(literal, bvblock, bit_opt);
 87 |         case Comparator::kGreaterEqual:
 88 |             return ScanHelper1<Comparator::kGreaterEqual>(literal, bvblock, bit_opt);
 89 |         case Comparator::kEqual:
 90 |             return ScanHelper1<Comparator::kEqual>(literal, bvblock, bit_opt);
 91 |         case Comparator::kInequal:
 92 |             return ScanHelper1<Comparator::kInequal>(literal, bvblock, bit_opt);
 93 |     }
 94 | }
 95 | 
 96 | template <size_t BIT_WIDTH, Direction PDIRECTION>
 97 | template <Comparator CMP>
 98 | void ByteSliceColumnBlock<BIT_WIDTH, PDIRECTION>::ScanHelper1(WordUnit literal,
 99 |                                     BitVectorBlock* bvblock, Bitwise bit_opt) const{
100 |      switch(bit_opt){
101 |         case Bitwise::kSet:
102 |             return ScanHelper2<CMP, Bitwise::kSet>(literal, bvblock);
103 |         case Bitwise::kAnd:
104 |             return ScanHelper2<CMP, Bitwise::kAnd>(literal, bvblock);
105 |         case Bitwise::kOr:
106 |             return ScanHelper2<CMP, Bitwise::kOr>(literal, bvblock);
107 |     }
108 | }
109 | 
110 | template <size_t BIT_WIDTH, Direction PDIRECTION>
111 | template <Comparator CMP, Bitwise OPT>
112 | void ByteSliceColumnBlock<BIT_WIDTH, PDIRECTION>::ScanHelper2(WordUnit literal,
113 |                                             BitVectorBlock* bvblock) const {
114 |     //Prepare byte-slices of literal
115 |     AvxUnit mask_literal[kNumBytesPerCode];
116 |     literal &= kCodeMask;
117 |     if(Direction::kRight == PDIRECTION){
118 |         literal <<= kNumPaddingBits;
119 |     }
120 |     for(size_t byte_id=0; byte_id < kNumBytesPerCode; byte_id++){
121 |          ByteUnit byte = FLIP(static_cast<ByteUnit>(literal >> 8*(kNumBytesPerCode - 1 - byte_id)));
122 |          mask_literal[byte_id] = avx_set1<ByteUnit>(byte);
123 |     }
124 |     
125 |     //for every kNumWordBits (64) tuples
126 |     for(size_t offset = 0, bv_word_id = 0; offset < num_tuples_; offset += kNumWordBits, bv_word_id++){
127 |         WordUnit bitvector_word = WordUnit(0);
128 |         //need several iteration of AVX scan
129 |         for(size_t i=0; i < kNumWordBits; i += kNumAvxBits/8){
130 |             AvxUnit m_less = avx_zero();
131 |             AvxUnit m_greater = avx_zero();
132 |             AvxUnit m_equal; 
133 |             int input_mask;
134 | 
135 |             switch(OPT){
136 |                 case Bitwise::kSet:
137 |                     m_equal = avx_ones();
138 |                     break;
139 |                 case Bitwise::kAnd:
140 |                     input_mask = static_cast<int>(bvblock->GetWordUnit(bv_word_id) >> i);
141 |                     m_equal = avx_ones();
142 |                     break;
143 |                 case Bitwise::kOr:
144 |                     input_mask = ~static_cast<int>(bvblock->GetWordUnit(bv_word_id) >> i);
145 |                     m_equal = avx_ones();
146 |                     break;
147 |             }
148 | 
149 |             if(
150 | #ifndef         NEARLYSTOP
151 |                 (OPT==Bitwise::kSet) ||  0 != input_mask
152 | #else           
153 |                 true
154 | #endif
155 |               ){
156 |                 __builtin_prefetch(data_[0] + offset + i + kPrefetchDistance);
157 |                 ScanKernel2<CMP, 0>(
158 |                         _mm256_lddqu_si256(reinterpret_cast<__m256i*>(data_[0]+offset+i)),
159 |                         mask_literal[0],
160 |                         m_less,
161 |                         m_greater,
162 |                         m_equal);
163 |                 if(kNumBytesPerCode > 1
164 | #ifndef                 NEARLYSTOP
165 |                         && ((OPT==Bitwise::kSet && !avx_iszero(m_equal))
166 |                             || (OPT!=Bitwise::kSet && 0!=(input_mask & _mm256_movemask_epi8(m_equal))))
167 | #endif
168 |                   ){
169 |                     __builtin_prefetch(data_[1] + offset + i + kPrefetchDistance);
170 |                     ScanKernel2<CMP, 1>(
171 |                             _mm256_lddqu_si256(reinterpret_cast<__m256i*>(data_[1]+offset+i)),
172 |                             mask_literal[1],
173 |                             m_less,
174 |                             m_greater,
175 |                             m_equal);
176 |                     if(kNumBytesPerCode > 2
177 | #ifndef                     NEARLYSTOP
178 |                             && ((OPT==Bitwise::kSet && !avx_iszero(m_equal)) 
179 |                                 || (OPT!=Bitwise::kSet && 0!=(input_mask & _mm256_movemask_epi8(m_equal))))
180 | #endif
181 |                       ){
182 |                         ScanKernel2<CMP, 2>(
183 |                                 _mm256_lddqu_si256(reinterpret_cast<__m256i*>(data_[2]+offset+i)),
184 |                                 mask_literal[2],
185 |                                 m_less,
186 |                                 m_greater,
187 |                                 m_equal);
188 |                         if(kNumBytesPerCode > 3
189 | #ifndef                         NEARLYSTOP
190 |                                 && ((OPT==Bitwise::kSet && !avx_iszero(m_equal)) 
191 |                                     || (OPT!=Bitwise::kSet && 0!=(input_mask & _mm256_movemask_epi8(m_equal))))
192 | #endif
193 |                           ){
194 |                             ScanKernel2<CMP, 3>(
195 |                                     _mm256_lddqu_si256(reinterpret_cast<__m256i*>(data_[3]+offset+i)),
196 |                                     mask_literal[3],
197 |                                     m_less,
198 |                                     m_greater,
199 |                                     m_equal);
200 |                         }
201 |                     }
202 |                 }
203 |             }
204 | 
205 |             AvxUnit m_result;
206 |             switch(CMP){
207 |                 case Comparator::kLessEqual:
208 |                     m_result = avx_or(m_less, m_equal);
209 |                     break;
210 |                 case Comparator::kLess:
211 |                     m_result = m_less;
212 |                     break;
213 |                 case Comparator::kGreaterEqual:
214 |                     m_result = avx_or(m_greater, m_equal);
215 |                     break;
216 |                 case Comparator::kGreater:
217 |                     m_result = m_greater;
218 |                     break;
219 |                 case Comparator::kEqual:
220 |                     m_result = m_equal;
221 |                     break;
222 |                 case Comparator::kInequal:
223 |                     m_result = avx_not(m_equal);
224 |                     break;
225 |             }
226 |             //move mask
227 |             uint32_t mmask = _mm256_movemask_epi8(m_result);
228 |             //save in temporary bit vector
229 |             bitvector_word |= (static_cast<WordUnit>(mmask) << i);
230 |         }
231 |         //put result bitvector into bitvector block
232 |         //size_t bv_word_id = offset / kNumWordBits;
233 |         WordUnit x = bitvector_word;
234 |         switch(OPT){
235 |             case Bitwise::kSet:
236 |                 break;
237 |             case Bitwise::kAnd:
238 |                 x &= bvblock->GetWordUnit(bv_word_id);
239 |                 break;
240 |             case Bitwise::kOr:
241 |                 x |= bvblock->GetWordUnit(bv_word_id);
242 |                 break;
243 |         }
244 |         bvblock->SetWordUnit(x, bv_word_id);
245 |     }
246 |     bvblock->ClearTail();
247 | }
248 | 
249 | //Scan against other block
250 | template <size_t BIT_WIDTH, Direction PDIRECTION>
251 | void ByteSliceColumnBlock<BIT_WIDTH, PDIRECTION>::Scan(Comparator comparator,
252 |         const ColumnBlock* other_block, BitVectorBlock* bvblock, Bitwise bit_opt) const{
253 | 
254 |     assert(bvblock->num() == num_tuples_);
255 |     assert(other_block->num_tuples() == num_tuples_);
256 |     assert(other_block->type() == type_);
257 |     assert(other_block->bit_width() == bit_width_);
258 | 
259 |     const ByteSliceColumnBlock<BIT_WIDTH, PDIRECTION>* block2 =
260 |         static_cast<const ByteSliceColumnBlock<BIT_WIDTH, PDIRECTION>*>(other_block);
261 |     
262 |     //multiplexing
263 |     switch(comparator){
264 |         case Comparator::kLess:
265 |             return ScanHelper1<Comparator::kLess>(block2, bvblock, bit_opt);
266 |         case Comparator::kGreater:
267 |             return ScanHelper1<Comparator::kGreater>(block2, bvblock, bit_opt);
268 |         case Comparator::kLessEqual:
269 |             return ScanHelper1<Comparator::kLessEqual>(block2, bvblock, bit_opt);
270 |         case Comparator::kGreaterEqual:
271 |             return ScanHelper1<Comparator::kGreaterEqual>(block2, bvblock, bit_opt);
272 |         case Comparator::kEqual:
273 |             return ScanHelper1<Comparator::kEqual>(block2, bvblock, bit_opt);
274 |         case Comparator::kInequal:
275 |             return ScanHelper1<Comparator::kInequal>(block2, bvblock, bit_opt);
276 |     }
277 | }
278 | 
279 | template <size_t BIT_WIDTH, Direction PDIRECTION>
280 | template <Comparator CMP>
281 | void ByteSliceColumnBlock<BIT_WIDTH, PDIRECTION>::ScanHelper1(
282 |                             const ByteSliceColumnBlock<BIT_WIDTH, PDIRECTION>* other_block,
283 |                             BitVectorBlock* bvblock, 
284 |                             Bitwise bit_opt) const {
285 |     switch(bit_opt){
286 |         case Bitwise::kSet:
287 |             return ScanHelper2<CMP, Bitwise::kSet>(other_block, bvblock);
288 |         case Bitwise::kAnd:
289 |             return ScanHelper2<CMP, Bitwise::kAnd>(other_block, bvblock);
290 |         case Bitwise::kOr:
291 |             return ScanHelper2<CMP, Bitwise::kOr>(other_block, bvblock);
292 |     }
293 | }
294 | 
295 | template <size_t BIT_WIDTH, Direction PDIRECTION>
296 | template <Comparator CMP, Bitwise OPT>
297 | void ByteSliceColumnBlock<BIT_WIDTH, PDIRECTION>::ScanHelper2(
298 |                             const ByteSliceColumnBlock<BIT_WIDTH, PDIRECTION>* other_block,
299 |                             BitVectorBlock* bvblock) const {
300 | 
301 |     //for every kNumWordBits (64) tuples
302 |     for(size_t offset = 0, bv_word_id = 0; offset < num_tuples_; offset += kNumWordBits, bv_word_id++){
303 |         WordUnit bitvector_word = WordUnit(0);
304 |         //need several iteration of AVX scan
305 |         for(size_t i=0; i < kNumWordBits; i += kNumAvxBits/8){
306 |             AvxUnit m_less = avx_zero();
307 |             AvxUnit m_greater = avx_zero();
308 |             AvxUnit m_equal; 
309 |             int input_mask = static_cast<int>(-1ULL);
310 | 
311 |             switch(OPT){
312 |                 case Bitwise::kSet:
313 |                     m_equal = avx_ones();
314 |                     break;
315 |                 case Bitwise::kAnd:
316 |                     input_mask = static_cast<int>(bvblock->GetWordUnit(bv_word_id) >> i);
317 |                     m_equal = avx_ones();
318 |                     break;
319 |                 case Bitwise::kOr:
320 |                     input_mask = ~static_cast<int>(bvblock->GetWordUnit(bv_word_id) >> i);
321 |                     m_equal = avx_ones();
322 |                     break;
323 |             }
324 | 
325 |             if((OPT==Bitwise::kSet) ||  0 != input_mask){
326 |                 __builtin_prefetch(data_[0] + offset + i + 1024);
327 |                 __builtin_prefetch(other_block->data_[0] + offset + i + 1024);
328 |                 ScanKernel2<CMP, 0>(
329 |                         _mm256_lddqu_si256(reinterpret_cast<__m256i*>(data_[0]+offset+i)),
330 |                         _mm256_lddqu_si256(reinterpret_cast<__m256i*>(other_block->data_[0]+offset+i)),
331 |                         m_less,
332 |                         m_greater,
333 |                         m_equal);
334 |                 if(kNumBytesPerCode > 1 && 
335 |                         ((OPT==Bitwise::kSet && !avx_iszero(m_equal)) 
336 |                         || (OPT!=Bitwise::kSet && 0!=(input_mask & _mm256_movemask_epi8(m_equal))))){
337 |                     __builtin_prefetch(data_[1] + offset + i + 1024);
338 |                     __builtin_prefetch(other_block->data_[1] + offset + i + 1024);
339 |                     ScanKernel2<CMP, 1>(
340 |                             _mm256_lddqu_si256(reinterpret_cast<__m256i*>(data_[1]+offset+i)),
341 |                             _mm256_lddqu_si256(reinterpret_cast<__m256i*>(other_block->data_[1]+offset+i)),
342 |                             m_less,
343 |                             m_greater,
344 |                             m_equal);
345 |                     if(kNumBytesPerCode > 2 && 
346 |                             ((OPT==Bitwise::kSet && !avx_iszero(m_equal)) 
347 |                             || (OPT!=Bitwise::kSet && 0!=(input_mask & _mm256_movemask_epi8(m_equal))))){
348 |                         ScanKernel2<CMP, 2>(
349 |                                 _mm256_lddqu_si256(reinterpret_cast<__m256i*>(data_[2]+offset+i)),
350 |                                 _mm256_lddqu_si256(reinterpret_cast<__m256i*>(other_block->data_[2]+offset+i)),
351 |                                 m_less,
352 |                                 m_greater,
353 |                                 m_equal);
354 |                         if(kNumBytesPerCode > 3 && 
355 |                                 ((OPT==Bitwise::kSet && !avx_iszero(m_equal)) 
356 |                                 || (OPT!=Bitwise::kSet && 0!=(input_mask & _mm256_movemask_epi8(m_equal))))){
357 |                             ScanKernel2<CMP, 3>(
358 |                                     _mm256_lddqu_si256(reinterpret_cast<__m256i*>(data_[3]+offset+i)),
359 |                                     _mm256_lddqu_si256(reinterpret_cast<__m256i*>(other_block->data_[3]+offset+i)),
360 |                                     m_less,
361 |                                     m_greater,
362 |                                     m_equal);
363 |                         }
364 |                     }
365 |                 }
366 |             }
367 | 
368 | 
369 |             AvxUnit m_result;
370 |             switch(CMP){
371 |                 case Comparator::kLessEqual:
372 |                     m_result = avx_or(m_less, m_equal);
373 |                     break;
374 |                 case Comparator::kLess:
375 |                     m_result = m_less;
376 |                     break;
377 |                 case Comparator::kGreaterEqual:
378 |                     m_result = avx_or(m_greater, m_equal);
379 |                     break;
380 |                 case Comparator::kGreater:
381 |                     m_result = m_greater;
382 |                     break;
383 |                 case Comparator::kEqual:
384 |                     m_result = m_equal;
385 |                     break;
386 |                 case Comparator::kInequal:
387 |                     m_result = avx_not(m_equal);
388 |                     break;
389 |             }
390 |             //move mask
391 |             uint32_t mmask = _mm256_movemask_epi8(m_result);
392 |             //save in temporary bit vector
393 |             bitvector_word |= (static_cast<WordUnit>(mmask) << i);
394 |         }
395 |         //put result bitvector into bitvector block
396 |         WordUnit x = bitvector_word;
397 |         switch(OPT){
398 |             case Bitwise::kSet:
399 |                 break;
400 |             case Bitwise::kAnd:
401 |                 x &= bvblock->GetWordUnit(bv_word_id);
402 |                 break;
403 |             case Bitwise::kOr:
404 |                 x |= bvblock->GetWordUnit(bv_word_id);
405 |                 break;
406 |         }
407 |         bvblock->SetWordUnit(x, bv_word_id);
408 |     }
409 |     bvblock->ClearTail();
410 |     
411 | }
412 | 
413 | 
414 | //Scan Kernel
415 | template <size_t BIT_WIDTH, Direction PDIRECTION>
416 | template <Comparator CMP>
417 | inline void ByteSliceColumnBlock<BIT_WIDTH, PDIRECTION>::ScanKernel
418 |                                                         (const AvxUnit &byteslice1,
419 |                                                          const AvxUnit &byteslice2,
420 |                                                          AvxUnit &mask_less,
421 |                                                          AvxUnit &mask_greater,
422 |                                                          AvxUnit &mask_equal) const {
423 |     switch(CMP){
424 |         case Comparator::kEqual:
425 |         case Comparator::kInequal:
426 |             mask_equal = 
427 |                 avx_and(mask_equal, avx_cmpeq<ByteUnit>(byteslice1, byteslice2));
428 |             break;
429 |         case Comparator::kLess:
430 |         case Comparator::kLessEqual:
431 |             mask_less = 
432 |                 avx_or(mask_less, avx_and(mask_equal, avx_cmplt<ByteUnit>(byteslice1, byteslice2)));
433 |             mask_equal = 
434 |                 avx_and(mask_equal, avx_cmpeq<ByteUnit>(byteslice1, byteslice2));
435 |             break;
436 |         case Comparator::kGreater:
437 |         case Comparator::kGreaterEqual:
438 |             mask_greater =
439 |                 avx_or(mask_greater, avx_and(mask_equal, avx_cmpgt<ByteUnit>(byteslice1, byteslice2)));
440 |             mask_equal = 
441 |                 avx_and(mask_equal, avx_cmpeq<ByteUnit>(byteslice1, byteslice2));
442 |             break;
443 |     }
444 | }
445 | 
446 | //Scan Kernel2 --- Optimized on Scan Kernel
447 | //to remove unnecessary equal comparison for last byte slice
448 | template <size_t BIT_WIDTH, Direction PDIRECTION>
449 | template <Comparator CMP, size_t BYTE_ID>
450 | inline void ByteSliceColumnBlock<BIT_WIDTH, PDIRECTION>::ScanKernel2
451 |                                                         (const AvxUnit &byteslice1,
452 |                                                          const AvxUnit &byteslice2,
453 |                                                          AvxUnit &mask_less,
454 |                                                          AvxUnit &mask_greater,
455 |                                                          AvxUnit &mask_equal) const {
456 | 
457 |     //internal ByteSlice --- not last BS                                                        
458 |     if(BYTE_ID < kNumBytesPerCode - 1){ 
459 |         switch(CMP){
460 |             case Comparator::kEqual:
461 |             case Comparator::kInequal:
462 |                 mask_equal = 
463 |                     avx_and(mask_equal, avx_cmpeq<ByteUnit>(byteslice1, byteslice2));
464 |                 break;
465 |             case Comparator::kLess:
466 |             case Comparator::kLessEqual:
467 |                 mask_less = 
468 |                     avx_or(mask_less, avx_and(mask_equal, avx_cmplt<ByteUnit>(byteslice1, byteslice2)));
469 |                 mask_equal = 
470 |                     avx_and(mask_equal, avx_cmpeq<ByteUnit>(byteslice1, byteslice2));
471 |                 break;
472 |             case Comparator::kGreater:
473 |             case Comparator::kGreaterEqual:
474 |                 mask_greater =
475 |                     avx_or(mask_greater, avx_and(mask_equal, avx_cmpgt<ByteUnit>(byteslice1, byteslice2)));
476 |                 mask_equal = 
477 |                     avx_and(mask_equal, avx_cmpeq<ByteUnit>(byteslice1, byteslice2));
478 |                 break;
479 |         }
480 |     }
481 |     //last BS: no need to compute mask_equal for some comparisons
482 |     else if(BYTE_ID == kNumBytesPerCode - 1){   
483 |         switch(CMP){
484 |             case Comparator::kEqual:
485 |             case Comparator::kInequal:
486 |                 mask_equal = 
487 |                     avx_and(mask_equal, avx_cmpeq<ByteUnit>(byteslice1, byteslice2));
488 |                 break;
489 |             case Comparator::kLessEqual:
490 |                 mask_less = 
491 |                     avx_or(mask_less, avx_and(mask_equal, avx_cmplt<ByteUnit>(byteslice1, byteslice2)));
492 |                 mask_equal = 
493 |                     avx_and(mask_equal, avx_cmpeq<ByteUnit>(byteslice1, byteslice2));
494 |                 break;
495 |             case Comparator::kLess:
496 |                 mask_less = 
497 |                     avx_or(mask_less, avx_and(mask_equal, avx_cmplt<ByteUnit>(byteslice1, byteslice2)));
498 |                 break;
499 |             case Comparator::kGreaterEqual:
500 |                 mask_greater =
501 |                     avx_or(mask_greater, avx_and(mask_equal, avx_cmpgt<ByteUnit>(byteslice1, byteslice2)));
502 |                 mask_equal = 
503 |                     avx_and(mask_equal, avx_cmpeq<ByteUnit>(byteslice1, byteslice2));
504 |                 break;
505 |             case Comparator::kGreater:
506 |                 mask_greater =
507 |                     avx_or(mask_greater, avx_and(mask_equal, avx_cmpgt<ByteUnit>(byteslice1, byteslice2)));
508 |                 break;
509 |         }
510 |     }
511 |     //otherwise, do nothing
512 | 
513 | }
514 | 
515 | 
516 | template <size_t BIT_WIDTH, Direction PDIRECTION>
517 | void ByteSliceColumnBlock<BIT_WIDTH, PDIRECTION>::BulkLoadArray(const WordUnit* codes,
518 |                                                         size_t num, size_t start_pos){
519 |     assert(start_pos + num <= num_tuples_);
520 |     for(size_t i = 0; i < num; i++){
521 |         SetTuple(start_pos+i, codes[i]);
522 |     }
523 | }
524 | 
525 | 
526 | 
527 | //explicit specialization
528 | //default padding: right
529 | template class ByteSliceColumnBlock<1>;
530 | template class ByteSliceColumnBlock<2>;
531 | template class ByteSliceColumnBlock<3>;
532 | template class ByteSliceColumnBlock<4>;
533 | template class ByteSliceColumnBlock<5>;
534 | template class ByteSliceColumnBlock<6>;
535 | template class ByteSliceColumnBlock<7>;
536 | template class ByteSliceColumnBlock<8>;
537 | template class ByteSliceColumnBlock<9>;
538 | template class ByteSliceColumnBlock<10>;
539 | template class ByteSliceColumnBlock<11>;
540 | template class ByteSliceColumnBlock<12>;
541 | template class ByteSliceColumnBlock<13>;
542 | template class ByteSliceColumnBlock<14>;
543 | template class ByteSliceColumnBlock<15>;
544 | template class ByteSliceColumnBlock<16>;
545 | template class ByteSliceColumnBlock<17>;
546 | template class ByteSliceColumnBlock<18>;
547 | template class ByteSliceColumnBlock<19>;
548 | template class ByteSliceColumnBlock<20>;
549 | template class ByteSliceColumnBlock<21>;
550 | template class ByteSliceColumnBlock<22>;
551 | template class ByteSliceColumnBlock<23>;
552 | template class ByteSliceColumnBlock<24>;
553 | template class ByteSliceColumnBlock<25>;
554 | template class ByteSliceColumnBlock<26>;
555 | template class ByteSliceColumnBlock<27>;
556 | template class ByteSliceColumnBlock<28>;
557 | template class ByteSliceColumnBlock<29>;
558 | template class ByteSliceColumnBlock<30>;
559 | template class ByteSliceColumnBlock<31>;
560 | template class ByteSliceColumnBlock<32>;
561 | 
562 | }   // namespace
563 | 


--------------------------------------------------------------------------------
/src/byteslice_column_block.h:
--------------------------------------------------------------------------------
  1 | /*******************************************************************************
  2 |  * Copyright (c) 2015
  3 |  * The Hong Kong Polytechnic University, Database Group
  4 |  *
  5 |  * Author: Ziqiang Feng (cszqfeng AT comp DOT polyu.edu.hk)
  6 |  *
  7 |  * See file LICENSE.md for details.
  8 |  *******************************************************************************/
  9 | #ifndef BYTESLICE_COLUMN_BLOCK_H
 10 | #define BYTESLICE_COLUMN_BLOCK_H
 11 | 
 12 | #include "../src/avx-utility.h"
 13 | #include "../src/column_block.h"
 14 | 
 15 | namespace byteslice{
 16 | 
 17 | 
 18 | /**
 19 | Warning:
 20 |     Bytes are FLIPPED in internal storage to preserve order.
 21 | */
 22 | 
 23 | static constexpr size_t kMemSizePerByteSlice = 
 24 |     sizeof(ByteUnit)*CEIL(kNumTuplesPerBlock, kNumAvxBits/8)*(kNumAvxBits/8);
 25 | 
 26 | template <size_t BIT_WIDTH, Direction PDIRECTION = Direction::kRight>
 27 | class ByteSliceColumnBlock: public ColumnBlock{
 28 | public:
 29 |     ByteSliceColumnBlock(size_t num=kNumTuplesPerBlock);
 30 |     virtual ~ByteSliceColumnBlock();
 31 | 
 32 |     WordUnit GetTuple(size_t pos) const override;
 33 |     void SetTuple(size_t pos, WordUnit value) override;
 34 | 
 35 |     void Scan(Comparator comparator, WordUnit literal, BitVectorBlock* bvblock,
 36 |             Bitwise bit_opt = Bitwise::kSet) const override;
 37 |     void Scan(Comparator comparator, const ColumnBlock* other_block,
 38 |             BitVectorBlock* bvblock, Bitwise bit_opt = Bitwise::kSet) const override;
 39 | 
 40 |     void BulkLoadArray(const WordUnit* codes, size_t num, size_t start_pos = 0) override;
 41 | 
 42 |     void SerToFile(SequentialWriteBinaryFile &file) const override;
 43 |     void DeserFromFile(const SequentialReadBinaryFile &file) override;
 44 |     bool Resize(size_t size) override;
 45 | 
 46 |     Direction GetPadDirection();
 47 |     
 48 | private:
 49 |     //Scan Helper: literal
 50 |     template <Comparator CMP>
 51 |     void ScanHelper1(WordUnit literal, BitVectorBlock* bvblock, Bitwise bit_opt) const;
 52 |     template <Comparator CMP, Bitwise OPT>
 53 |     void ScanHelper2(WordUnit literal, BitVectorBlock* bvblock) const;
 54 | 
 55 |     //Scan Helper: other block
 56 |     template <Comparator CMP>
 57 |     void ScanHelper1(const ByteSliceColumnBlock<BIT_WIDTH, PDIRECTION>* other_block,
 58 |                             BitVectorBlock* bvblock, Bitwise bit_opt) const;
 59 |     template <Comparator CMP, Bitwise OPT>
 60 |     void ScanHelper2(const ByteSliceColumnBlock<BIT_WIDTH, PDIRECTION>* other_block,
 61 |                             BitVectorBlock* bvblock) const;
 62 | 
 63 | 
 64 |     //Scan Kernel
 65 |     template <Comparator CMP>
 66 |     inline void ScanKernel(const AvxUnit &byteslice1, const AvxUnit &byteslice2,
 67 |             AvxUnit &mask_less, AvxUnit &mask_greater, AvxUnit &mask_equal) const;
 68 |     template <Comparator CMP, size_t BYTE_ID>
 69 |     inline void ScanKernel2(const AvxUnit &byteslice1, const AvxUnit &byteslice2,
 70 |             AvxUnit &mask_less, AvxUnit &mask_greater, AvxUnit &mask_equal) const;
 71 | 
 72 |     static constexpr size_t kNumBytesPerCode = CEIL(BIT_WIDTH, 8);
 73 |     static constexpr size_t kNumPaddingBits = kNumBytesPerCode * 8 - BIT_WIDTH;
 74 |     static constexpr Direction kPadDirection = PDIRECTION;
 75 |     static constexpr WordUnit kCodeMask = (1ULL << BIT_WIDTH) - 1;
 76 | 
 77 |     ByteUnit* data_[4];
 78 | 
 79 | 
 80 | };
 81 | 
 82 | template <size_t BIT_WIDTH, Direction PDIRECTION>
 83 | inline WordUnit ByteSliceColumnBlock<BIT_WIDTH, PDIRECTION>::GetTuple(size_t pos) const{
 84 |     WordUnit ret = 0ULL;
 85 |     switch(kNumBytesPerCode){
 86 |         case 4:
 87 |             ret = (static_cast<WordUnit>(FLIP(data_[0][pos])) << 24) |
 88 |                     (static_cast<WordUnit>(FLIP(data_[1][pos])) << 16) |
 89 |                     (static_cast<WordUnit>(FLIP(data_[2][pos])) << 8) |
 90 |                     static_cast<WordUnit>(FLIP(data_[3][pos]));
 91 |             break;
 92 |         case 3:
 93 |             ret = (static_cast<WordUnit>(FLIP(data_[0][pos])) << 16) |
 94 |                     (static_cast<WordUnit>(FLIP(data_[1][pos])) << 8) |
 95 |                     static_cast<WordUnit>(FLIP(data_[2][pos]));
 96 |             break;
 97 |         case 2:
 98 |             ret = (static_cast<WordUnit>(FLIP(data_[0][pos])) << 8) |
 99 |                     static_cast<WordUnit>(FLIP(data_[1][pos]));
100 |             break;
101 |         case 1:
102 |             ret = static_cast<WordUnit>(FLIP(data_[0][pos]));
103 |             break;
104 |     }
105 |     switch(PDIRECTION){
106 |         case Direction::kRight:
107 |             ret >>= kNumPaddingBits;
108 |             break;
109 |         case Direction::kLeft:
110 |             break;
111 |     }
112 |     return ret;
113 | }
114 | 
115 | template <size_t BIT_WIDTH, Direction PDIRECTION>
116 | inline void ByteSliceColumnBlock<BIT_WIDTH, PDIRECTION>::SetTuple(size_t pos, WordUnit value){
117 |     switch(PDIRECTION){
118 |         case Direction::kRight:
119 |             value <<= kNumPaddingBits;
120 |             break;
121 |         case Direction::kLeft:
122 |             break;
123 |     }
124 | 
125 |     switch(kNumBytesPerCode){
126 |         case 4:
127 |             data_[0][pos] = FLIP(static_cast<ByteUnit>(value >> 24));
128 |             data_[1][pos] = FLIP(static_cast<ByteUnit>(value >> 16));
129 |             data_[2][pos] = FLIP(static_cast<ByteUnit>(value >> 8));
130 |             data_[3][pos] = FLIP(static_cast<ByteUnit>(value));
131 |             break;
132 |         case 3:
133 |             data_[0][pos] = FLIP(static_cast<ByteUnit>(value >> 16));
134 |             data_[1][pos] = FLIP(static_cast<ByteUnit>(value >> 8));
135 |             data_[2][pos] = FLIP(static_cast<ByteUnit>(value));
136 |             break;
137 |         case 2:
138 |             data_[0][pos] = FLIP(static_cast<ByteUnit>(value >> 8));
139 |             data_[1][pos] = FLIP(static_cast<ByteUnit>(value));
140 |             break;
141 |         case 1:
142 |             data_[0][pos] = FLIP(static_cast<ByteUnit>(value));
143 |             break;
144 |     }
145 | }
146 | 
147 | }   // namespace
148 | #endif
149 | 


--------------------------------------------------------------------------------
/src/column.cpp:
--------------------------------------------------------------------------------
  1 | /*******************************************************************************
  2 |  * Copyright (c) 2015
  3 |  * The Hong Kong Polytechnic University, Database Group
  4 |  *
  5 |  * Author: Ziqiang Feng (cszqfeng AT comp DOT polyu.edu.hk)
  6 |  *
  7 |  * See file LICENSE.md for details.
  8 |  *******************************************************************************/
  9 | #include 	"column.h"
 10 | 
 11 | #include    <algorithm>
 12 | #include    <fstream>
 13 | #include    <iostream>
 14 | #include    <omp.h>
 15 | 
 16 | #include 	"byteslice_column_block.h"
 17 | #include 	"naive_column_block.h"
 18 | 
 19 | namespace byteslice {
 20 | 
 21 | Column::Column(ColumnType type, size_t bit_width, size_t num) :
 22 | 		type_(type), bit_width_(bit_width), num_tuples_(num) {
 23 | 
 24 | 	for (size_t count = 0; count < num; count += kNumTuplesPerBlock) {
 25 | 		ColumnBlock* new_block = CreateNewBlock();
 26 | 		new_block->Resize(std::min(kNumTuplesPerBlock, num - count));
 27 | 		blocks_.push_back(new_block);
 28 | 	}
 29 | }
 30 | 
 31 | Column::~Column() {
 32 | 	Destroy();
 33 | }
 34 | 
 35 | void Column::Destroy() {
 36 | 	while (!blocks_.empty()) {
 37 | 		delete blocks_.back();
 38 | 		blocks_.pop_back();
 39 | 	}
 40 | }
 41 | 
 42 | WordUnit Column::GetTuple(size_t id) const {
 43 | 	assert(id < num_tuples_);
 44 | 	size_t block_id = id / kNumTuplesPerBlock;
 45 | 	size_t pos_in_block = id % kNumTuplesPerBlock;
 46 | 	return blocks_[block_id]->GetTuple(pos_in_block);
 47 | }
 48 | 
 49 | void Column::SetTuple(size_t id, WordUnit value) {
 50 | 	size_t block_id = id / kNumTuplesPerBlock;
 51 | 	size_t pos_in_block = id % kNumTuplesPerBlock;
 52 | 	blocks_[block_id]->SetTuple(pos_in_block, value);
 53 | }
 54 | 
 55 | size_t Column::LoadTextFile(std::string filepath) {
 56 | 	std::ifstream infile;
 57 | 	infile.open(filepath, std::ifstream::in);
 58 | 	if (!infile.good()) {
 59 | 		std::cerr << "Can't open file: " << filepath << std::endl;
 60 | 		return -1;
 61 | 	}
 62 | 	WordUnit val;
 63 | 	size_t id = 0;
 64 | 	for (id = 0; (id < GetNumTuples()) && (infile >> val); id++) {
 65 | 		SetTuple(id, val);
 66 | 	}
 67 | 	infile.close();
 68 | 	return id;
 69 | }
 70 | 
 71 | void Column::Resize(size_t num) {
 72 | 	num_tuples_ = num;
 73 | 	const size_t new_num_blocks = CEIL(num, kNumTuplesPerBlock);
 74 | 	const size_t old_num_blocks = blocks_.size();
 75 | 	if (new_num_blocks > old_num_blocks) {    // need to add blocks
 76 | 		// fill up the last block
 77 | 		blocks_[old_num_blocks - 1]->Resize(kNumTuplesPerBlock);
 78 | 		// append new blocks
 79 | 		for (size_t bid = old_num_blocks; bid < new_num_blocks; bid++) {
 80 | 			ColumnBlock* new_block = CreateNewBlock();
 81 | 			new_block->Resize(kNumTuplesPerBlock);
 82 | 			blocks_.push_back(new_block);
 83 | 		}
 84 | 	} else if (new_num_blocks < old_num_blocks) {   // need to remove blocks
 85 | 		for (size_t bid = old_num_blocks - 1; bid > new_num_blocks; bid--) {
 86 | 			delete blocks_.back();
 87 | 			blocks_.pop_back();
 88 | 		}
 89 | 	}
 90 | 	// now the number of block is desired
 91 | 	// correct the size of the last block
 92 | 	size_t num_tuples_last_block = num % kNumTuplesPerBlock;
 93 | 	if (0 < num_tuples_last_block) {
 94 | 		blocks_.back()->Resize(num_tuples_last_block);
 95 | 	}
 96 | 
 97 | 	assert(blocks_.size() == new_num_blocks);
 98 | }
 99 | 
100 | void Column::SerToFile(SequentialWriteBinaryFile &file) const {
101 | 	for (auto block : blocks_) {
102 | 		block->SerToFile(file);
103 | 	}
104 | }
105 | 
106 | void Column::DeserFromFile(const SequentialReadBinaryFile &file) {
107 | 	for (auto block : blocks_) {
108 | 		block->DeserFromFile(file);
109 | 	}
110 | }
111 | 
112 | void Column::BulkLoadArray(const WordUnit* codes, size_t num, size_t pos) {
113 | 	assert(pos + num <= num_tuples_);
114 | 	size_t block_id = pos / kNumTuplesPerBlock;
115 | 	size_t pos_in_block = pos % kNumTuplesPerBlock;
116 | 	size_t num_remain_tuples = num;
117 | 	const WordUnit* data_ptr = codes;
118 | 	while (num_remain_tuples > 0) {
119 | 		size_t size = std::min(blocks_[block_id]->num_tuples() - pos_in_block,
120 | 				num_remain_tuples);
121 | 		blocks_[block_id]->BulkLoadArray(data_ptr, size, pos_in_block);
122 | 		data_ptr += size;
123 | 		num_remain_tuples -= size;
124 | 		pos_in_block = 0;
125 | 		block_id++;
126 | 	}
127 | }
128 | 
129 | void Column::Scan(Comparator comparator, WordUnit literal, BitVector* bitvector,
130 | 		Bitwise bit_opt) const {
131 | 
132 | 	assert(num_tuples_ == bitvector->num());
133 | 
134 | #pragma omp parallel for schedule(dynamic)
135 | 	for (size_t block_id = 0; block_id < blocks_.size(); block_id++) {
136 | 
137 | 		blocks_[block_id]->Scan(comparator, literal,
138 | 				bitvector->GetBVBlock(block_id), bit_opt);
139 | 	}
140 | }
141 | 
142 | void Column::Scan(Comparator comparator, const Column* other_column,
143 | 		BitVector* bitvector, Bitwise bit_opt) const {
144 | 	assert(num_tuples_ == bitvector->num());
145 | 	assert(type_ == other_column->GetType());
146 | 	assert(bit_width_ == other_column->GetBitWidth());
147 | 	assert(num_tuples_ == other_column->GetNumTuples());
148 | 
149 | #pragma omp parallel for schedule(dynamic)
150 | 	for (size_t block_id = 0; block_id < blocks_.size(); block_id++) {
151 | 		blocks_[block_id]->Scan(comparator, other_column->blocks_[block_id],
152 | 				bitvector->GetBVBlock(block_id), bit_opt);
153 | 	}
154 | 
155 | }
156 | 
157 | ColumnBlock* Column::CreateNewBlock() const {
158 | 	assert(0 < bit_width_ && 32 >= bit_width_);
159 | 	if (!(0 < bit_width_ && 32 >= bit_width_)) {
160 | 		std::cerr << "[FATAL] Incorrect bit width: " << bit_width_ << std::endl;
161 | 		exit(1);
162 | 	}
163 | 
164 | 	switch (type_) {
165 | 	case ColumnType::kNaive:
166 | 		switch (CEIL(bit_width_, 8)) {
167 | 		case 1:
168 | 			return new NaiveColumnBlock<uint8_t>();
169 | 		case 2:
170 | 			return new NaiveColumnBlock<uint16_t>();
171 | 		case 3:
172 | 		case 4:
173 | 			return new NaiveColumnBlock<uint32_t>();
174 | 		}
175 | 		break;
176 | 	case ColumnType::kByteSlicePadRight:
177 | 		switch (bit_width_) {
178 | 		case 1:
179 | 			return new ByteSliceColumnBlock<1>();
180 | 		case 2:
181 | 			return new ByteSliceColumnBlock<2>();
182 | 		case 3:
183 | 			return new ByteSliceColumnBlock<3>();
184 | 		case 4:
185 | 			return new ByteSliceColumnBlock<4>();
186 | 		case 5:
187 | 			return new ByteSliceColumnBlock<5>();
188 | 		case 6:
189 | 			return new ByteSliceColumnBlock<6>();
190 | 		case 7:
191 | 			return new ByteSliceColumnBlock<7>();
192 | 		case 8:
193 | 			return new ByteSliceColumnBlock<8>();
194 | 		case 9:
195 | 			return new ByteSliceColumnBlock<9>();
196 | 		case 10:
197 | 			return new ByteSliceColumnBlock<10>();
198 | 		case 11:
199 | 			return new ByteSliceColumnBlock<11>();
200 | 		case 12:
201 | 			return new ByteSliceColumnBlock<12>();
202 | 		case 13:
203 | 			return new ByteSliceColumnBlock<13>();
204 | 		case 14:
205 | 			return new ByteSliceColumnBlock<14>();
206 | 		case 15:
207 | 			return new ByteSliceColumnBlock<15>();
208 | 		case 16:
209 | 			return new ByteSliceColumnBlock<16>();
210 | 		case 17:
211 | 			return new ByteSliceColumnBlock<17>();
212 | 		case 18:
213 | 			return new ByteSliceColumnBlock<18>();
214 | 		case 19:
215 | 			return new ByteSliceColumnBlock<19>();
216 | 		case 20:
217 | 			return new ByteSliceColumnBlock<20>();
218 | 		case 21:
219 | 			return new ByteSliceColumnBlock<21>();
220 | 		case 22:
221 | 			return new ByteSliceColumnBlock<22>();
222 | 		case 23:
223 | 			return new ByteSliceColumnBlock<23>();
224 | 		case 24:
225 | 			return new ByteSliceColumnBlock<24>();
226 | 		case 25:
227 | 			return new ByteSliceColumnBlock<25>();
228 | 		case 26:
229 | 			return new ByteSliceColumnBlock<26>();
230 | 		case 27:
231 | 			return new ByteSliceColumnBlock<27>();
232 | 		case 28:
233 | 			return new ByteSliceColumnBlock<28>();
234 | 		case 29:
235 | 			return new ByteSliceColumnBlock<29>();
236 | 		case 30:
237 | 			return new ByteSliceColumnBlock<30>();
238 | 		case 31:
239 | 			return new ByteSliceColumnBlock<31>();
240 | 		case 32:
241 | 			return new ByteSliceColumnBlock<32>();
242 | 		}
243 | 		break;
244 | 	default:
245 | 		std::cerr << "[FATAL] Unknown column type." << std::endl;
246 | 		exit(1);
247 | 	}
248 | 
249 | 	return nullptr;
250 | }
251 | 
252 | }   // namespace
253 | 


--------------------------------------------------------------------------------
/src/column.h:
--------------------------------------------------------------------------------
 1 | /*******************************************************************************
 2 |  * Copyright (c) 2015
 3 |  * The Hong Kong Polytechnic University, Database Group
 4 |  *
 5 |  * Author: Ziqiang Feng (cszqfeng AT comp DOT polyu.edu.hk)
 6 |  *
 7 |  * See file LICENSE.md for details.
 8 |  *******************************************************************************/
 9 | #ifndef COLUMN_H
10 | #define COLUMN_H
11 | 
12 | 
13 | #include    <string>
14 | #include    <vector>
15 | 
16 | #include 	"bitvector.h"
17 | #include 	"column_block.h"
18 | #include 	"param.h"
19 | #include 	"sequential_binary_file.h"
20 | #include 	"types.h"
21 | 
22 | namespace byteslice{
23 | 
24 | class BitVector;
25 | 
26 | class Column{
27 | public:
28 |     Column(ColumnType type, size_t bit_width, size_t num=0);
29 |     ~Column();
30 |     void Destroy();    
31 | 
32 |     WordUnit GetTuple(size_t id) const;
33 |     void SetTuple(size_t id, WordUnit value);
34 |     void Resize(size_t num);
35 | 
36 |     void SerToFile(SequentialWriteBinaryFile &file) const;
37 |     void DeserFromFile(const SequentialReadBinaryFile &file);
38 | 
39 |     /**
40 |      * @brief Load the column from a projection file in text format.
41 |      * One integer per line.
42 |      */
43 |     size_t LoadTextFile(std::string filepath);
44 | 
45 |     /**
46 |      * @brief Load the column from a C-array.
47 |      */
48 |     void BulkLoadArray(const WordUnit* codes, size_t num, size_t pos=0);
49 | 
50 |     void Scan(Comparator comparator, WordUnit literal,
51 |             BitVector* bitvector, Bitwise bit_opt = Bitwise::kSet) const;
52 |     void Scan(Comparator comparator, const Column* other_column, 
53 |             BitVector* bitvector, Bitwise bit_opt = Bitwise::kSet) const;
54 | 
55 |     ColumnBlock* CreateNewBlock() const;
56 | 
57 |     size_t GetNumTuples() const { return num_tuples_;}
58 |     size_t GetBitWidth() const { return bit_width_;}
59 |     ColumnType GetType() const { return type_;}
60 |     size_t GetNumBlocks() const { return blocks_.size();}
61 |     ColumnBlock* GetBlock(size_t block_id) const {return blocks_[block_id];}
62 | 
63 | private:
64 |     ColumnType type_;
65 |     size_t bit_width_;
66 |     size_t num_tuples_;
67 |     std::vector<ColumnBlock*> blocks_;
68 | };
69 | 
70 | 
71 | }   // namespace
72 | 
73 | #endif  //COLUMN_H
74 | 


--------------------------------------------------------------------------------
/src/column_block.h:
--------------------------------------------------------------------------------
 1 | /*******************************************************************************
 2 |  * Copyright (c) 2015
 3 |  * The Hong Kong Polytechnic University, Database Group
 4 |  *
 5 |  * Author: Ziqiang Feng (cszqfeng AT comp DOT polyu.edu.hk)
 6 |  *
 7 |  * See file LICENSE.md for details.
 8 |  *******************************************************************************/
 9 | #ifndef     COLUMN_BLOCK_H
10 | #define     COLUMN_BLOCK_H
11 | 
12 | #include "../src/bitvector_block.h"
13 | #include "../src/macros.h"
14 | #include "../src/param.h"
15 | #include "../src/sequential_binary_file.h"
16 | #include "../src/types.h"
17 | 
18 | namespace byteslice{
19 | 
20 | class ColumnBlock{
21 | public:
22 |     virtual ~ColumnBlock(){
23 |     }
24 | 
25 |     virtual WordUnit GetTuple(size_t pos_in_block) const = 0;
26 |     virtual void SetTuple(size_t pos_in_block, WordUnit value) = 0;
27 |     virtual void Scan(Comparator comparator, WordUnit literal, BitVectorBlock* bv_block, Bitwise bit_opt=Bitwise::kSet) const = 0;
28 |     virtual void Scan(Comparator comparator, const ColumnBlock* column_block, BitVectorBlock* bv_block, Bitwise bit_opti=Bitwise::kSet) const = 0;
29 |     virtual void BulkLoadArray(const WordUnit* codes, size_t num, size_t start_pos=0) = 0;
30 |     virtual void SerToFile(SequentialWriteBinaryFile &file) const = 0;
31 |     virtual void DeserFromFile(const SequentialReadBinaryFile &file) = 0;
32 |     virtual bool Resize(size_t size) = 0;
33 | 
34 |     //accessors
35 |     ColumnType type() const;
36 |     size_t bit_width() const;
37 |     size_t num_tuples() const;
38 | 
39 | 
40 | protected:
41 |     ColumnBlock(ColumnType type, size_t bit_width, size_t num):
42 |         type_(type), bit_width_(bit_width), num_tuples_(num){
43 |     }
44 |     const ColumnType type_;
45 |     const size_t bit_width_;
46 |     size_t num_tuples_;
47 |     
48 | 
49 | };
50 | 
51 | inline ColumnType ColumnBlock::type() const{
52 |     return type_;
53 | }
54 | 
55 | inline size_t ColumnBlock::bit_width() const{
56 |     return bit_width_;
57 | }
58 | 
59 | inline size_t ColumnBlock::num_tuples() const{
60 |     return num_tuples_;
61 | }
62 | 
63 | }
64 | 
65 | #endif  //COLUMN_BLOCK_H
66 | 


--------------------------------------------------------------------------------
/src/macros.h:
--------------------------------------------------------------------------------
 1 | /*******************************************************************************
 2 |  * Copyright (c) 2015
 3 |  * The Hong Kong Polytechnic University, Database Group
 4 |  *
 5 |  * Author: Ziqiang Feng (cszqfeng AT comp DOT polyu.edu.hk)
 6 |  *
 7 |  * See file LICENSE.md for details.
 8 |  *******************************************************************************/
 9 | #ifndef     MACROS_H
10 | #define     MACROS_H
11 | 
12 | #include    <x86intrin.h>
13 | #include	<cassert>
14 | 
15 | #define CEIL(X,Y) (((X)-1) / (Y) + 1)
16 | 
17 | #define POPCNT64(X) (_mm_popcnt_u64(X))
18 | 
19 | #endif  // MACROS_H
20 | 


--------------------------------------------------------------------------------
/src/naive_column_block.cpp:
--------------------------------------------------------------------------------
  1 | /*******************************************************************************
  2 |  * Copyright (c) 2015
  3 |  * The Hong Kong Polytechnic University, Database Group
  4 |  *
  5 |  * Author: Ziqiang Feng (cszqfeng AT comp DOT polyu.edu.hk)
  6 |  *
  7 |  * See file LICENSE.md for details.
  8 |  *******************************************************************************/
  9 | #include "naive_column_block.h"
 10 | 
 11 | #include	<cassert>
 12 | #include    <cstring>
 13 | 
 14 | namespace byteslice{
 15 | 
 16 | template <typename DTYPE>
 17 | NaiveColumnBlock<DTYPE>::NaiveColumnBlock(size_t num):
 18 |     ColumnBlock(ColumnType::kNaive, sizeof(DTYPE)*8, num){
 19 |         data_ = new DTYPE[kNumTuplesPerBlock];
 20 |         memset(data_, 0x0, sizeof(DTYPE)*kNumTuplesPerBlock);
 21 | }
 22 | 
 23 | template <typename DTYPE>
 24 | NaiveColumnBlock<DTYPE>::~NaiveColumnBlock(){
 25 |     delete[] data_;
 26 | }
 27 | 
 28 | template <typename DTYPE>
 29 | bool NaiveColumnBlock<DTYPE>::Resize(size_t num){
 30 |     num_tuples_ = num;
 31 |     return true;
 32 | }
 33 | 
 34 | template <typename DTYPE>
 35 | void NaiveColumnBlock<DTYPE>::SerToFile(SequentialWriteBinaryFile &file) const{
 36 |     file.Append(&num_tuples_, sizeof(num_tuples_));
 37 |     file.Append(data_, sizeof(DTYPE)*kNumTuplesPerBlock);
 38 | }
 39 | 
 40 | template <typename DTYPE>
 41 | void NaiveColumnBlock<DTYPE>::DeserFromFile(const SequentialReadBinaryFile &file){
 42 |     file.Read(&num_tuples_, sizeof(num_tuples_));
 43 |     file.Read(data_, sizeof(DTYPE)*kNumTuplesPerBlock);
 44 | }
 45 | 
 46 | //Scan against a literal
 47 | template <typename DTYPE>
 48 | void NaiveColumnBlock<DTYPE>::Scan(Comparator comparator, WordUnit literal, 
 49 |         BitVectorBlock* bv_block, Bitwise bit_opt) const{
 50 |     assert(bv_block->num() == num_tuples_);
 51 |     switch(comparator){
 52 |         case Comparator::kLess:
 53 |             return ScanHelper1<Comparator::kLess>(literal, bv_block, bit_opt);
 54 |         case Comparator::kGreater:
 55 |             return ScanHelper1<Comparator::kGreater>(literal, bv_block, bit_opt);
 56 |         case Comparator::kLessEqual:
 57 |             return ScanHelper1<Comparator::kLessEqual>(literal, bv_block, bit_opt);
 58 |         case Comparator::kGreaterEqual:
 59 |             return ScanHelper1<Comparator::kGreaterEqual>(literal, bv_block, bit_opt);
 60 |         case Comparator::kEqual:
 61 |             return ScanHelper1<Comparator::kEqual>(literal, bv_block, bit_opt);
 62 |         case Comparator::kInequal:
 63 |             return ScanHelper1<Comparator::kInequal>(literal, bv_block, bit_opt);
 64 |     }
 65 | 
 66 | }
 67 | 
 68 | template <typename DTYPE>
 69 | template <Comparator CMP>
 70 | void NaiveColumnBlock<DTYPE>::ScanHelper1(WordUnit literal, BitVectorBlock* bv_block, 
 71 |         Bitwise bit_opt) const{
 72 |     switch(bit_opt){
 73 |         case Bitwise::kSet:
 74 |             return ScanHelper2<CMP, Bitwise::kSet>(literal, bv_block);
 75 |         case Bitwise::kAnd:
 76 |             return ScanHelper2<CMP, Bitwise::kAnd>(literal, bv_block);
 77 |         case Bitwise::kOr:
 78 |             return ScanHelper2<CMP, Bitwise::kOr>(literal, bv_block);
 79 |     }
 80 | }
 81 | 
 82 | template <typename DTYPE>
 83 | template <Comparator CMP, Bitwise OPT>
 84 | void NaiveColumnBlock<DTYPE>::ScanHelper2(WordUnit literal, BitVectorBlock* bv_block) const{
 85 |     //Do the real work here
 86 |     DTYPE lit = static_cast<DTYPE>(literal);
 87 |     for(size_t offset = 0; offset < num_tuples_; offset += kNumWordBits){
 88 |         WordUnit word = 0;
 89 |         for(size_t i = 0; i < kNumWordBits; i++){
 90 |             size_t pos = offset + i;
 91 |             if(pos >= num_tuples_){
 92 |                 break;
 93 |             }
 94 | 
 95 |             WordUnit bit;
 96 |             switch(CMP){
 97 |                 case Comparator::kLess:
 98 |                     bit = (data_[pos] < lit);
 99 |                     break;
100 |                 case Comparator::kGreater:
101 |                     bit = (data_[pos] > lit);
102 |                     break;
103 |                 case Comparator::kLessEqual:
104 |                     bit = (data_[pos] <= lit);
105 |                     break;
106 |                 case Comparator::kGreaterEqual:
107 |                     bit = (data_[pos] >= lit);
108 |                     break;
109 |                 case Comparator::kEqual:
110 |                     bit = (data_[pos] == lit);
111 |                     break;
112 |                 case Comparator::kInequal:
113 |                     bit = (data_[pos] != lit);
114 |                     break;
115 |             }
116 | 
117 |             //word |= (bit << (kNumWordBits -1 - i));
118 |             word |= (bit << i);
119 |         }
120 |         size_t bv_word_id = offset / kNumWordBits;
121 |         WordUnit x;
122 |         switch(OPT){
123 |             case Bitwise::kSet:
124 |                 x = word;
125 |                 break;
126 |             case Bitwise::kAnd:
127 |                 x = bv_block->GetWordUnit(bv_word_id);
128 |                 x &= word;
129 |                 break;
130 |             case Bitwise::kOr:
131 |                 x = bv_block->GetWordUnit(bv_word_id);
132 |                 x |= word;
133 |                 break;
134 |         }
135 |         bv_block->SetWordUnit(x, bv_word_id);
136 |     }
137 | 
138 | }
139 | 
140 | //Scan against another column block
141 | template <typename DTYPE>
142 | void NaiveColumnBlock<DTYPE>::Scan(Comparator comparator, const ColumnBlock* column_block, 
143 |         BitVectorBlock* bv_block, Bitwise bit_opt) const{
144 |     assert(column_block->type() == type_);
145 |     assert(column_block->num_tuples() == num_tuples_);
146 |     assert(column_block->bit_width() == bit_width_);
147 | 
148 |     switch(comparator){
149 |         case Comparator::kLess:
150 |             return ScanHelper1<Comparator::kLess>(column_block, bv_block, bit_opt);
151 |         case Comparator::kGreater:
152 |             return ScanHelper1<Comparator::kGreater>(column_block, bv_block, bit_opt);
153 |         case Comparator::kLessEqual:
154 |             return ScanHelper1<Comparator::kLessEqual>(column_block, bv_block, bit_opt);
155 |         case Comparator::kGreaterEqual:
156 |             return ScanHelper1<Comparator::kGreaterEqual>(column_block, bv_block, bit_opt);
157 |         case Comparator::kEqual:
158 |             return ScanHelper1<Comparator::kEqual>(column_block, bv_block, bit_opt);
159 |         case Comparator::kInequal:
160 |             return ScanHelper1<Comparator::kInequal>(column_block, bv_block, bit_opt);
161 |     }
162 | }
163 | 
164 | template <typename DTYPE>
165 | template <Comparator CMP>
166 | void NaiveColumnBlock<DTYPE>::ScanHelper1(const ColumnBlock* colblock,
167 |         BitVectorBlock* bvblock, Bitwise bit_opt) const{
168 |     switch(bit_opt){
169 |         case Bitwise::kSet:
170 |             return ScanHelper2<CMP, Bitwise::kSet>(colblock, bvblock);
171 |         case Bitwise::kAnd:
172 |             return ScanHelper2<CMP, Bitwise::kAnd>(colblock, bvblock);
173 |         case Bitwise::kOr:
174 |             return ScanHelper2<CMP, Bitwise::kOr>(colblock, bvblock);
175 |     }
176 | }
177 | 
178 | template <typename DTYPE>
179 | template <Comparator CMP, Bitwise OPT>
180 | void NaiveColumnBlock<DTYPE>::ScanHelper2(const ColumnBlock* colblock, 
181 |                                         BitVectorBlock* bvblock) const{
182 |     //DO the real work real
183 |     for(size_t offset = 0; offset < num_tuples_; offset += kNumWordBits){
184 |         WordUnit word = 0;
185 |         for(size_t i = 0; i < kNumWordBits; i++){
186 |             size_t pos = offset + i;
187 |             if(pos >= num_tuples_){
188 |                 break;
189 |             }
190 | 
191 |             WordUnit bit;
192 |             DTYPE lit = static_cast<DTYPE>(colblock->GetTuple(pos));
193 |             switch(CMP){
194 |                 case Comparator::kLess:
195 |                     bit = (data_[pos] < lit);
196 |                     break;
197 |                 case Comparator::kGreater:
198 |                     bit = (data_[pos] > lit);
199 |                     break;
200 |                 case Comparator::kLessEqual:
201 |                     bit = (data_[pos] <= lit);
202 |                     break;
203 |                 case Comparator::kGreaterEqual:
204 |                     bit = (data_[pos] >= lit);
205 |                     break;
206 |                 case Comparator::kEqual:
207 |                     bit = (data_[pos] == lit);
208 |                     break;
209 |                 case Comparator::kInequal:
210 |                     bit = (data_[pos] != lit);
211 |                     break;
212 |             }
213 | 
214 |             //word |= (bit << (kNumWordBits -1 - i));
215 |             word |= (bit << i);
216 |         }
217 |         size_t bv_word_id = offset / kNumWordBits;
218 |         WordUnit x;
219 |         switch(OPT){
220 |             case Bitwise::kSet:
221 |                 x = word;
222 |                 break;
223 |             case Bitwise::kAnd:
224 |                 x = bvblock->GetWordUnit(bv_word_id);
225 |                 x &= word;
226 |                 break;
227 |             case Bitwise::kOr:
228 |                 x = bvblock->GetWordUnit(bv_word_id);
229 |                 x |= word;
230 |                 break;
231 |         }
232 |         bvblock->SetWordUnit(x, bv_word_id);
233 |     }
234 | }
235 | 
236 | template <typename DTYPE>
237 | void NaiveColumnBlock<DTYPE>::BulkLoadArray(const WordUnit* codes, size_t num, 
238 |         size_t start_pos){
239 |     assert(start_pos + num <= num_tuples_);
240 |     for(size_t i = 0; i < num; i++){
241 |         data_[start_pos+i] = static_cast<DTYPE>(codes[i]);
242 |     }
243 | }
244 | 
245 | 
246 | template class NaiveColumnBlock<uint8_t>;
247 | template class NaiveColumnBlock<uint16_t>;
248 | template class NaiveColumnBlock<uint32_t>;
249 | template class NaiveColumnBlock<uint64_t>;
250 | 
251 | }   // namespace byteslice
252 | 


--------------------------------------------------------------------------------
/src/naive_column_block.h:
--------------------------------------------------------------------------------
 1 | /*******************************************************************************
 2 |  * Copyright (c) 2015
 3 |  * The Hong Kong Polytechnic University, Database Group
 4 |  *
 5 |  * Author: Ziqiang Feng (cszqfeng AT comp DOT polyu.edu.hk)
 6 |  *
 7 |  * See file LICENSE.md for details.
 8 |  *******************************************************************************/
 9 | #ifndef NAIVE_COLUMN_BLOCK_H
10 | #define NAIVE_COLUMN_BLOCK_H
11 | 
12 | #include "../src/column_block.h"
13 | 
14 | namespace byteslice{
15 | 
16 | /**
17 |   Store data in a naive array.
18 | */
19 | template <typename DTYPE>
20 | class NaiveColumnBlock: public ColumnBlock{
21 | public:
22 |     NaiveColumnBlock(size_t num=kNumTuplesPerBlock);
23 |     virtual ~NaiveColumnBlock();
24 | 
25 |     WordUnit GetTuple(size_t pos_in_block) const override;
26 |     void SetTuple(size_t pos_in_block, WordUnit value) override;
27 |     
28 |     void Scan(Comparator comparator, WordUnit literal, BitVectorBlock* bv_block,
29 |             Bitwise bit_opt=Bitwise::kSet) const override;
30 |     void Scan(Comparator comparator, const ColumnBlock* column_block,
31 |             BitVectorBlock* bv_block, Bitwise bit_opti=Bitwise::kSet) const override;
32 |     void BulkLoadArray(const WordUnit* codes, size_t num, size_t start_pos=0) override;
33 | 
34 |     void SerToFile(SequentialWriteBinaryFile &file) const override;
35 |     void DeserFromFile(const SequentialReadBinaryFile &file) override;
36 |     bool Resize(size_t size) override;
37 | 
38 | private:
39 |     DTYPE* data_;
40 |     //scan helper: against a given literal
41 |     template <Comparator CMP>
42 |     void ScanHelper1(WordUnit literal, BitVectorBlock* bv_block, Bitwise bit_opt) const;
43 |     template <Comparator CMP, Bitwise OPT>
44 |     void ScanHelper2(WordUnit literal, BitVectorBlock* bv_block) const;
45 |     //scan helper: against another column_block
46 |     template <Comparator CMP>
47 |     void ScanHelper1(const ColumnBlock* colblock, BitVectorBlock* bvblock, Bitwise bit_opt) const;
48 |     template <Comparator CMP, Bitwise OPT>
49 |     void ScanHelper2(const ColumnBlock* colblock, BitVectorBlock* bvblock) const;
50 | 
51 | };
52 | 
53 | template <typename DTYPE>
54 | inline WordUnit NaiveColumnBlock<DTYPE>::GetTuple(size_t pos_in_block) const{
55 |     return static_cast<WordUnit>(data_[pos_in_block]);
56 | }
57 | 
58 | template <typename DTYPE>
59 | inline void NaiveColumnBlock<DTYPE>::SetTuple(size_t pos_in_block, WordUnit value){
60 |     data_[pos_in_block] = static_cast<DTYPE>(value);
61 | }
62 | 
63 | 
64 | }   // namespace
65 | #endif  //NAIVE_COLUMN_BLOCK_H
66 | 


--------------------------------------------------------------------------------
/src/param.h:
--------------------------------------------------------------------------------
 1 | /*******************************************************************************
 2 |  * Copyright (c) 2015
 3 |  * The Hong Kong Polytechnic University, Database Group
 4 |  *
 5 |  * Author: Ziqiang Feng (cszqfeng AT comp DOT polyu.edu.hk)
 6 |  *
 7 |  * See file LICENSE.md for details.
 8 |  *******************************************************************************/
 9 | #ifndef     PARAM_H
10 | #define     PARAM_H
11 | 
12 | namespace byteslice{
13 | 
14 | constexpr size_t kNumTuplesPerBlock = 1024*1024;    // each block contains 1M tuples
15 | 
16 | }   // namespace
17 | 
18 | #endif  // PARAM_H
19 | 


--------------------------------------------------------------------------------
/src/sequential_binary_file.cpp:
--------------------------------------------------------------------------------
 1 | /*******************************************************************************
 2 |  * Copyright (c) 2015
 3 |  * The Hong Kong Polytechnic University, Database Group
 4 |  *
 5 |  * Author: Ziqiang Feng (cszqfeng AT comp DOT polyu.edu.hk)
 6 |  *
 7 |  * See file LICENSE.md for details.
 8 |  *******************************************************************************/
 9 | #include "sequential_binary_file.h"
10 | 
11 | #include    <cassert>
12 | #include	<fstream>
13 | #include    <iostream>
14 | 
15 | 
16 | namespace byteslice{
17 | 
18 | bool SequentialReadBinaryFile::Open(const std::string filename){
19 |     if(NULL != file_){
20 |         std::cerr << "Already opened file: " << filename_ << std::endl;
21 |         return false;
22 |     }
23 |     filename_ = filename;
24 |     file_ = fopen(filename_.c_str(), "rb");
25 |     if(NULL == file_){
26 |         std::cerr << "Can't open file: " << filename_ << std::endl;
27 |         return false;
28 |     }
29 |     return true;
30 | }
31 | 
32 | bool SequentialReadBinaryFile::Close(){
33 |     if(NULL == file_){
34 |         std::cerr << "No file opened." << std::endl;
35 |         return false;
36 |     }
37 |     if(0 != fclose(file_)){
38 |         std::cerr << "Error happens when closing file: " << filename_ << std::endl;
39 |         return false;
40 |     }
41 |     return true;
42 | }
43 | 
44 | bool SequentialReadBinaryFile::IsEnd(){
45 |     return feof(file_);
46 | }
47 | 
48 | size_t SequentialReadBinaryFile::Read(void* buf, size_t size) const{
49 |     size_t count = fread(buf, sizeof(char), size, file_);
50 |     return count;
51 | }
52 | 
53 | 
54 | bool SequentialWriteBinaryFile::Open(const std::string filename){
55 |     if(NULL != file_){
56 |         std::cerr << "Already opened file: " << filename_ << std::endl;
57 |         return false;
58 |     }
59 |     filename_ = filename;
60 |     file_ = fopen(filename_.c_str(), "wb");
61 |     if(NULL == file_){
62 |         std::cerr << "Can't open file: " << filename_ << std::endl;
63 |         return false;
64 |     }
65 |     return true;
66 | }
67 | 
68 | bool SequentialWriteBinaryFile::Close(){
69 |     if(NULL == file_){
70 |         std::cerr << "No file opened." << std::endl;
71 |         return false;
72 |     }
73 |     if(0 != fclose(file_)){
74 |         std::cerr << "Error happens when closing file: " << filename_ << std::endl;
75 |         return false;
76 |     }
77 |     return true;
78 | }
79 | 
80 | size_t SequentialWriteBinaryFile::Append(const void* data, size_t size){
81 |     size_t count = fwrite(data, sizeof(char), size, file_);
82 |     return count;
83 | }
84 | 
85 | bool SequentialWriteBinaryFile::Flush(){
86 |     if(0 != fflush(file_)){
87 |         return false;
88 |     }
89 |     else{
90 |         return true;
91 |     }
92 | }
93 | 
94 | }   // namespace
95 | 


--------------------------------------------------------------------------------
/src/sequential_binary_file.h:
--------------------------------------------------------------------------------
 1 | /*******************************************************************************
 2 |  * Copyright (c) 2015
 3 |  * The Hong Kong Polytechnic University, Database Group
 4 |  *
 5 |  * Author: Ziqiang Feng (cszqfeng AT comp DOT polyu.edu.hk)
 6 |  *
 7 |  * See file LICENSE.md for details.
 8 |  *******************************************************************************/
 9 | #ifndef	SEQUENTIAL_BINARY_FILE_H
10 | #define	SEQUENTIAL_BINARY_FILE_H
11 | 
12 | #include    <cstdio>
13 | #include    <string>
14 | 
15 | namespace byteslice{
16 | 
17 | class SequentialReadBinaryFile{
18 | public:
19 |     bool Open(const std::string filename);
20 |     bool Close();
21 |     size_t Read(void* buf, size_t size) const;
22 |     bool IsEnd();
23 | 
24 | private:
25 |     FILE* file_ = NULL;
26 |     std::string filename_;
27 | 
28 | };
29 | 
30 | class SequentialWriteBinaryFile{
31 | public:
32 |     bool Open(const std::string filename);
33 |     bool Close();
34 |     size_t Append(const void* data, size_t size);
35 |     bool Flush();
36 | 
37 | private:
38 |     FILE* file_ = NULL;
39 |     std::string filename_;
40 | 
41 | };
42 | 
43 | }   // namespace
44 | 
45 | #endif	// SEQUENTIAL_BINARY_FILE_H
46 | 


--------------------------------------------------------------------------------
/src/types.cpp:
--------------------------------------------------------------------------------
 1 | /*******************************************************************************
 2 |  * Copyright (c) 2015
 3 |  * The Hong Kong Polytechnic University, Database Group
 4 |  *
 5 |  * Author: Ziqiang Feng (cszqfeng AT comp DOT polyu.edu.hk)
 6 |  *
 7 |  * See file LICENSE.md for details.
 8 |  *******************************************************************************/
 9 | #include "types.h"
10 | 
11 | #include "column.h"
12 | 
13 | namespace byteslice{
14 | 
15 | std::ostream& operator<< (std::ostream &out, ColumnType type){
16 |     switch(type){
17 |         case ColumnType::kNaive:
18 |             out << "Naive";
19 |             break;
20 |        case ColumnType::kByteSlicePadRight:
21 |             out << "ByteSlicePadRight";
22 |             break;
23 |         case ColumnType::kByteSlicePadLeft:
24 |             out << "ByteSlicePadLeft";
25 |             break;
26 |     }
27 |     return out;
28 | }
29 | 
30 | std::ostream& operator<< (std::ostream &out, Comparator comp){
31 |     switch(comp){
32 |         case Comparator::kEqual:
33 |             out << "Equal";
34 |             break;
35 |         case Comparator::kInequal:
36 |             out << "Inequal";
37 |             break;
38 |         case Comparator::kLess:
39 |             out << "Less";
40 |             break;
41 |         case Comparator::kGreater:
42 |             out << "Greater";
43 |             break;
44 |         case Comparator::kLessEqual:
45 |             out << "LessEqual";
46 |             break;
47 |         case Comparator::kGreaterEqual:
48 |             out << "GreaterEqual";
49 |             break;
50 |     }
51 |     return out;
52 | }
53 | 
54 | 
55 | }   // namespace
56 | 


--------------------------------------------------------------------------------
/src/types.h:
--------------------------------------------------------------------------------
 1 | /*******************************************************************************
 2 |  * Copyright (c) 2015
 3 |  * The Hong Kong Polytechnic University, Database Group
 4 |  *
 5 |  * Author: Ziqiang Feng (cszqfeng AT comp DOT polyu.edu.hk)
 6 |  *
 7 |  * See file LICENSE.md for details.
 8 |  *******************************************************************************/
 9 | #ifndef TYPES_H
10 | #define TYPES_H
11 | 
12 | #include    <x86intrin.h>
13 | #include    <cstdint>
14 | #include    <iostream>
15 | 
16 | namespace byteslice{
17 | 
18 | typedef uint64_t WordUnit;
19 | typedef uint8_t ByteUnit;
20 | typedef __m256i AvxUnit;
21 | 
22 | constexpr size_t kNumWordBits = 8*sizeof(WordUnit);
23 | constexpr size_t kNumAvxBits = 8*sizeof(AvxUnit);
24 | 
25 | enum class ColumnType{
26 |     kNaive,
27 |     kByteSlicePadRight,
28 |     kByteSlicePadLeft
29 | };
30 | 
31 | 
32 | enum class Bitwise{
33 |     kSet,
34 |     kAnd,
35 |     kOr
36 | };
37 | 
38 | enum class Comparator{
39 |     kEqual,
40 |     kInequal,
41 |     kLess,
42 |     kGreater,
43 |     kLessEqual,
44 |     kGreaterEqual
45 | };
46 | 
47 | enum class Direction{
48 |     kLeft,
49 |     kRight
50 | };
51 | 
52 | 
53 | //for debug use
54 | std::ostream& operator<< (std::ostream &out, ColumnType type);
55 | std::ostream& operator<< (std::ostream &out, Comparator comp);
56 | 
57 | }   // namespace
58 | 
59 | #endif //TYPES_H
60 | 


--------------------------------------------------------------------------------
/tests/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | include_directories(${gtest_SOURCE_DIR}/include ${gtest_SOURCE_DIR})
 2 | 
 3 | list(APPEND test_list
 4 |         avx-utility_test
 5 |         bitvector_block_test
 6 |         bitvector_iterator_test
 7 |         bitvector_test
 8 |         byteslice_column_block_test
 9 |         column_test
10 |     )
11 | 
12 | # find_program(MEMCHECK_CMD valgrind )
13 | 
14 | foreach(tt ${test_list})
15 |     add_executable(${tt}  "${tt}.cpp")
16 |     target_link_libraries(${tt} byteslice-core gtest gtest_main)
17 | #    add_test(NAME ${tt} COMMAND ${MEMCHECK_CMD} --tool=memcheck --leak-check=yes ./${tt} --gtest_color=yes)
18 |     add_test(NAME ${tt} COMMAND ${tt} --gtest_color=yes)
19 |     add_dependencies(check-build ${tt})
20 | endforeach()
21 | 
22 | 
23 | 
24 | 


--------------------------------------------------------------------------------
/tests/avx-utility_test.cpp:
--------------------------------------------------------------------------------
 1 | /*******************************************************************************
 2 |  * Copyright (c) 2015
 3 |  * The Hong Kong Polytechnic University, Database Group
 4 |  *
 5 |  * Author: Ziqiang Feng (cszqfeng AT comp.polyu.edu.hk)
 6 |  *
 7 |  * See file LICENSE.md for details.
 8 |  *******************************************************************************/
 9 | 
10 | #include    <cstdlib>
11 | #include    <cstdint>
12 | #include    <cstdio>
13 | 
14 | #include    "gtest/gtest.h"
15 | 
16 | #include 	"src/avx-utility.h"
17 | 
18 | namespace byteslice{
19 | 
20 | class AvxUtilityTest: public ::testing::Test{
21 | public:
22 |     virtual void SetUp(){
23 |         std::srand(std::time(0));
24 |     }
25 | 
26 |     virtual void TearDown(){
27 |     }
28 | };
29 | 
30 | TEST_F(AvxUtilityTest, FlipTest){
31 |     // byte
32 |     for(size_t i = 0; i < 20; i++){
33 |         unsigned char a = std::rand()%256;
34 |         unsigned char b = std::rand()%256;
35 |         signed char x = static_cast<signed char>(FLIP<unsigned char>(a));
36 |         signed char y = static_cast<signed char>(FLIP<unsigned char>(b));
37 |         EXPECT_TRUE((a<b) == (x<y));
38 |         EXPECT_TRUE((a==b) == (a==b));
39 |     }
40 | }
41 | 
42 | TEST_F(AvxUtilityTest, Set1Test){
43 |     // byte
44 |     {
45 |         unsigned char a1 = 0x3e;
46 |         __m256i m1 = avx_set1<unsigned char>(a1);
47 |         int *p = (int*)(&m1);
48 |         EXPECT_EQ(0x3e3e3e3e, *p);
49 |     }
50 | 
51 |     // double byte
52 |     {
53 |         uint16_t a1 = 0x12ae;
54 |         __m256i m1 = avx_set1<uint16_t>(a1);
55 |         uint16_t *p = (uint16_t*)(&m1);
56 |         EXPECT_EQ(0x12ae, p[1]);
57 |         uint8_t *q = (uint8_t*)(&m1);
58 |         EXPECT_EQ(0xae, q[0]);
59 |         EXPECT_EQ(0x12, q[1]);
60 |     }
61 | 
62 | }
63 | 
64 | TEST_F(AvxUtilityTest, CompareTest){
65 |     // int32
66 |     __m256i a = _mm256_set_epi32(0xef, 0xcd, 0xab, 0x89, 0x67, 0x45, 0x23, 0x01);
67 |     __m256i b = avx_set1<int>(0x89);
68 |     int mmask;
69 | 
70 |     __m256i m_lt = avx_cmplt<int>(a, b);
71 |     mmask = _mm256_movemask_ps((__m256)m_lt);
72 |     EXPECT_EQ(0x0f, mmask);
73 | 
74 |     __m256i m_gt = avx_cmpgt<int>(a, b);
75 |     mmask = _mm256_movemask_ps(__m256(m_gt));
76 |     EXPECT_EQ(0xe0, mmask);
77 |     
78 |     __m256i m_eq = avx_cmpeq<int>(a, b);
79 |     mmask = _mm256_movemask_ps(__m256(m_eq));
80 |     EXPECT_EQ(0x10, mmask);
81 | 
82 | }
83 | 
84 | }   // namespace
85 | 


--------------------------------------------------------------------------------
/tests/bitvector_block_test.cpp:
--------------------------------------------------------------------------------
  1 | /*******************************************************************************
  2 |  * Copyright (c) 2015
  3 |  * The Hong Kong Polytechnic University, Database Group
  4 |  *
  5 |  * Author: Ziqiang Feng (cszqfeng AT comp.polyu.edu.hk)
  6 |  *
  7 |  * See file LICENSE.md for details.
  8 |  *******************************************************************************/
  9 | #include "../src/bitvector_block.h"
 10 | 
 11 | #include    <iostream>
 12 | 
 13 | #include "../src/macros.h"
 14 | #include "../src/param.h"
 15 | #include "../src/types.h"
 16 | #include    "gtest/gtest.h"
 17 | 
 18 | 
 19 | namespace byteslice{
 20 | 
 21 | class BitVectorBlockTest: public ::testing::Test{
 22 | public:
 23 |     virtual void SetUp(){
 24 |     }
 25 | 
 26 |     virtual void TearDown(){
 27 |     }
 28 | 
 29 | };
 30 | 
 31 | TEST_F(BitVectorBlockTest, ClearTail){
 32 |     //326 = 256 + 64 + 6
 33 |     BitVectorBlock* block = new BitVectorBlock(326);
 34 |     size_t count;
 35 |     for(int i=0; i<6; i++){
 36 |         block->SetWordUnit(-1ULL, i);
 37 |     }
 38 |     count = block->CountOnes();
 39 |     EXPECT_EQ(6*64UL, count);
 40 |     block->ClearTail();
 41 |     count = block->CountOnes();
 42 |     EXPECT_EQ(326UL, count);
 43 |     EXPECT_EQ(0x3fUL, block->GetWordUnit(5));
 44 | 
 45 |     delete block;
 46 | }
 47 | 
 48 | TEST_F(BitVectorBlockTest, SetZeros){
 49 |     BitVectorBlock* block = new BitVectorBlock(1000);
 50 |     block->SetZeros();
 51 |     size_t count = block->CountOnes();
 52 |     EXPECT_EQ(0UL, count);
 53 |     EXPECT_EQ(1000UL, block->num());
 54 |     EXPECT_EQ(16UL, block->num_word_units());
 55 |     delete block;
 56 | }
 57 | 
 58 | TEST_F(BitVectorBlockTest, SetOnes){
 59 |     BitVectorBlock* block = new BitVectorBlock(1000);
 60 |     block->SetOnes();
 61 |     size_t count = block->CountOnes();
 62 |     EXPECT_EQ(1000UL, count);
 63 |     EXPECT_EQ(1000UL, block->num());
 64 |     delete block;
 65 | }
 66 | 
 67 | TEST_F(BitVectorBlockTest, SetWordUnit){
 68 |     BitVectorBlock* block = new BitVectorBlock(kNumTuplesPerBlock);
 69 |     block->SetZeros();
 70 |     block->SetWordUnit(0xff, 23);
 71 |     EXPECT_EQ(0xffUL, block->GetWordUnit(23));
 72 |     EXPECT_EQ(0x0UL, block->GetWordUnit(22));
 73 |     EXPECT_EQ(8UL, block->CountOnes());
 74 |     delete block;
 75 | }
 76 | 
 77 | TEST_F(BitVectorBlockTest, AndOr){
 78 |     BitVectorBlock* block1 = new BitVectorBlock(kNumTuplesPerBlock);
 79 |     BitVectorBlock* block2 = new BitVectorBlock(kNumTuplesPerBlock);
 80 |     block1->SetOnes();
 81 |     block2->SetZeros();
 82 |     block2->SetWordUnit(0x1ff, 10);
 83 |     block1->And(block2);
 84 |     EXPECT_EQ(9UL, block1->CountOnes());
 85 |     block2->SetZeros();
 86 |     block2->Or(block1);
 87 |     EXPECT_EQ(9UL, block2->CountOnes());
 88 |     delete block1;
 89 |     delete block2;
 90 | }
 91 | 
 92 | TEST_F(BitVectorBlockTest, SetAvxUnit){
 93 |     BitVectorBlock* block1 = new BitVectorBlock(kNumTuplesPerBlock);
 94 |     block1->SetZeros();
 95 |     AvxUnit a1 = _mm256_set_epi64x(0xff, 0x0f, 0x3, 0x1);
 96 |     block1->SetAvxUnit(a1, 4);
 97 |     EXPECT_EQ(15UL, block1->CountOnes());
 98 |     delete block1;
 99 | }
100 | 
101 | }
102 | 


--------------------------------------------------------------------------------
/tests/bitvector_iterator_test.cpp:
--------------------------------------------------------------------------------
 1 | /*******************************************************************************
 2 |  * Copyright (c) 2015
 3 |  * The Hong Kong Polytechnic University, Database Group
 4 |  *
 5 |  * Author: Ziqiang Feng (cszqfeng AT comp.polyu.edu.hk)
 6 |  *
 7 |  * See file LICENSE.md for details.
 8 |  *******************************************************************************/
 9 | #include "../src/bitvector_iterator.h"
10 | 
11 | #include    "gtest/gtest.h"
12 | #include    <cstdlib>
13 | 
14 | namespace byteslice{
15 | 
16 | class BitVectorIteratorTest: public ::testing::Test{
17 | public:
18 |     virtual void SetUp(){
19 |         std::srand(std::time(0));
20 |         bitvector_ = new BitVector(num_);
21 |     }
22 | 
23 |     virtual void TearDown(){
24 |         delete bitvector_;
25 |     }
26 | 
27 | protected:
28 |     BitVector *bitvector_;
29 |     const size_t num_ = 3*kNumTuplesPerBlock + 2000;
30 | };
31 | 
32 | TEST_F(BitVectorIteratorTest, Simple){
33 |     bitvector_->SetZeros();
34 |     size_t jump = kNumTuplesPerBlock*3/4;
35 |     for(size_t i=1; i <=4; i++){
36 |         bitvector_->SetBit(i*jump);
37 |     }
38 | 
39 |     //Verify
40 |     BitVectorIterator* itor = new BitVectorIterator(bitvector_);
41 |     for(size_t i=1; i <=4; i++){
42 |         EXPECT_TRUE(itor->Next());
43 |         EXPECT_EQ(i*jump, itor->GetPosition());
44 |     }
45 |     EXPECT_FALSE(itor->Next());
46 |     delete itor;
47 | }
48 | 
49 | TEST_F(BitVectorIteratorTest, Random){
50 |     bitvector_->SetZeros();
51 |     size_t jump = kNumTuplesPerBlock*3/4;
52 |     size_t answer[4];
53 |     for(size_t i=0; i < 4; i++){
54 |         answer[i] = i*jump + std::rand() % jump;
55 |         bitvector_->SetBit(answer[i]);
56 |     }
57 | 
58 |     //Verify
59 |     BitVectorIterator* itor = new BitVectorIterator(bitvector_);
60 |     for(size_t i=0; i < 4; i++){
61 |         EXPECT_TRUE(itor->Next());
62 |         EXPECT_EQ(answer[i], itor->GetPosition());
63 |     }
64 |     EXPECT_FALSE(itor->Next());
65 |     delete itor;
66 | }
67 | 
68 | TEST_F(BitVectorIteratorTest, AllOnes){
69 |     bitvector_->SetOnes();
70 |     //Verify
71 |     BitVectorIterator* itor = new BitVectorIterator(bitvector_);
72 |     for(size_t i=0; i < num_; i++){
73 |         EXPECT_TRUE(itor->Next());
74 |         EXPECT_EQ(i, itor->GetPosition());
75 |     }
76 |     EXPECT_FALSE(itor->Next());
77 |     delete itor;
78 | }
79 | 
80 | }   // namespace
81 | 


--------------------------------------------------------------------------------
/tests/bitvector_test.cpp:
--------------------------------------------------------------------------------
 1 | /*******************************************************************************
 2 |  * Copyright (c) 2015
 3 |  * The Hong Kong Polytechnic University, Database Group
 4 |  *
 5 |  * Author: Ziqiang Feng (cszqfeng AT comp.polyu.edu.hk)
 6 |  *
 7 |  * See file LICENSE.md for details.
 8 |  *******************************************************************************/
 9 | #include "../src/bitvector.h"
10 | 
11 | #include "../src/macros.h"
12 | #include "../src/param.h"
13 | #include "../src/types.h"
14 | #include    "gtest/gtest.h"
15 | 
16 | namespace byteslice{
17 | 
18 | class BitVectorTest: public ::testing::Test{
19 | public:
20 |     virtual void SetUp(){
21 |     }
22 | 
23 |     virtual void TearDown(){
24 |     }
25 | 
26 | protected:
27 |     const size_t num_ = 3*kNumTuplesPerBlock + 2000;
28 | 
29 | };
30 | 
31 | TEST_F(BitVectorTest, Ctor){
32 |     BitVector *bitvector = new BitVector(num_);
33 | 
34 |     EXPECT_EQ(4UL, bitvector->GetNumBlocks());
35 |     EXPECT_EQ(num_, bitvector->num());
36 |     EXPECT_EQ(kNumTuplesPerBlock, bitvector->GetBVBlock(0)->num());
37 |     EXPECT_EQ(2000UL, bitvector->GetBVBlock(3)->num());
38 | 
39 |     delete bitvector;
40 | }
41 | 
42 | 
43 | }   // namespace
44 | 


--------------------------------------------------------------------------------
/tests/byteslice_column_block_test.cpp:
--------------------------------------------------------------------------------
  1 | /*******************************************************************************
  2 |  * Copyright (c) 2015
  3 |  * The Hong Kong Polytechnic University, Database Group
  4 |  *
  5 |  * Author: Ziqiang Feng (cszqfeng AT comp.polyu.edu.hk)
  6 |  *
  7 |  * See file LICENSE.md for details.
  8 |  *******************************************************************************/
  9 | 
 10 | #include	<cstdio>
 11 | #include    <cstdlib>
 12 | 
 13 | #include 	"gtest/gtest.h"
 14 | #include 	"src/byteslice_column_block.h"
 15 | #include 	"src/bitvector_block.h"
 16 | 
 17 | namespace byteslice{
 18 | 
 19 | class ByteSliceColumnBlockTest: public ::testing::Test{
 20 | public:
 21 |     virtual void SetUp(){
 22 |         num_ = kNumTuplesPerBlock*0.8;
 23 |         block_ = new ByteSliceColumnBlock<20>(num_);
 24 | 
 25 |         WordUnit* codes = new WordUnit[num_];
 26 |         for(size_t i=0; i < num_; i++){
 27 |             codes[i] = i;
 28 |         }
 29 | 
 30 |         block_->BulkLoadArray(codes, num_);
 31 |         delete[] codes;
 32 |     }
 33 | 
 34 |     virtual void TearDown(){
 35 |         delete block_;
 36 |     }
 37 | 
 38 | protected:
 39 |     ByteSliceColumnBlock<20>* block_;
 40 |     size_t num_;
 41 | };
 42 | 
 43 | TEST_F(ByteSliceColumnBlockTest, SerDeser){
 44 |     std::string filename(std::tmpnam(nullptr));
 45 |     std::cout << "temp file: " << filename << "\n";
 46 |     // Serialize this block
 47 |     SequentialWriteBinaryFile outfile;
 48 |     outfile.Open(filename);
 49 |     block_->SerToFile(outfile);
 50 |     outfile.Close();
 51 | 
 52 |     // Deserialize from file
 53 |     ColumnBlock* block2 = new ByteSliceColumnBlock<20>(num_);
 54 |     SequentialReadBinaryFile infile;
 55 |     infile.Open(filename);
 56 |     block2->DeserFromFile(infile);
 57 |     infile.Close();
 58 | 
 59 |     // Verify
 60 |     EXPECT_EQ(block_->num_tuples(), block2->num_tuples());
 61 |     for(size_t i=0; i<num_; i++){
 62 |         EXPECT_EQ(block_->GetTuple(i), block2->GetTuple(i));
 63 |     }
 64 | 
 65 |     delete block2;
 66 |     std::remove(filename.c_str());
 67 | }
 68 | 
 69 | TEST_F(ByteSliceColumnBlockTest, BulkLoadAndGetTuple){
 70 |     for(size_t i=0; i<num_; i++){
 71 |         EXPECT_EQ(i, block_->GetTuple(i));
 72 |     }
 73 | }
 74 | 
 75 | TEST_F(ByteSliceColumnBlockTest, ScanLiteral){
 76 |     BitVectorBlock* bvblock = new BitVectorBlock(num_);
 77 | 
 78 |     std::srand(std::time(0));
 79 |     const WordUnit lit = std::rand() % num_;
 80 |     block_->Scan(Comparator::kLess, lit, bvblock, Bitwise::kSet);
 81 |     EXPECT_EQ(lit, bvblock->CountOnes());
 82 | 
 83 |     delete bvblock;
 84 | }
 85 | 
 86 | TEST_F(ByteSliceColumnBlockTest, ScanOtherBlock){
 87 |     BitVectorBlock* bvblock = new BitVectorBlock(num_);
 88 |     ByteSliceColumnBlock<20>* block2 = new ByteSliceColumnBlock<20>(num_);
 89 | 
 90 |     std::srand(std::time(0));
 91 |     for(size_t i=0; i < num_; i++){
 92 |         block2->SetTuple(i, std::rand() % num_);
 93 |     }
 94 | 
 95 |     block_->Scan(Comparator::kGreaterEqual, block2, bvblock, Bitwise::kSet);
 96 |     for(size_t i=0; i < num_; i++){
 97 |         EXPECT_EQ(bvblock->GetBit(i), block_->GetTuple(i) >= block2->GetTuple(i));
 98 |     }
 99 | 
100 |     delete block2;
101 |     delete bvblock;
102 | }
103 | 
104 | }   // namespace
105 | 


--------------------------------------------------------------------------------
/tests/column_test.cpp:
--------------------------------------------------------------------------------
 1 | /*******************************************************************************
 2 |  * Copyright (c) 2015
 3 |  * The Hong Kong Polytechnic University, Database Group
 4 |  *
 5 |  * Author: Ziqiang Feng (cszqfeng AT comp.polyu.edu.hk)
 6 |  *
 7 |  * See file LICENSE.md for details.
 8 |  *******************************************************************************/
 9 | 
10 | #include    <cstdlib>
11 | #include    <fstream>
12 | #include    <string>
13 | 
14 | #include    "gtest/gtest.h"
15 | 
16 | #include 	"src/column.h"
17 | 
18 | 
19 | namespace byteslice{
20 | 
21 | class ColumnTest: public ::testing::Test{
22 | public:
23 |     virtual void SetUp(){
24 |         data_ = new WordUnit[num_];
25 |         std::srand(std::time(0));
26 |         for(size_t i=0; i < num_; i++){
27 |             data_[i] = std::rand() & mask_;
28 |         }
29 |     }
30 | 
31 |     virtual void TearDown(){
32 |         delete[] data_;
33 |     }
34 | 
35 | protected:
36 |     WordUnit* data_;
37 |     const size_t num_ = 2.45*kNumTuplesPerBlock;
38 |     const size_t bit_width_ = 21;
39 |     const WordUnit mask_ = (1ULL << bit_width_) - 1 ;
40 | };
41 | 
42 | TEST_F(ColumnTest, LoadTextFile){
43 |     std::string filename(std::tmpnam(nullptr));
44 |     
45 |     // write data to a text file
46 |     std::ofstream outfile(filename, std::ofstream::out);
47 |     for(size_t i=0; i < num_; i++){
48 |         outfile << data_[i] << std::endl;
49 |     }
50 |     outfile.close();
51 | 
52 |     // Verify
53 |     Column* column = new Column(ColumnType::kByteSlicePadRight, bit_width_, num_);
54 |     column->LoadTextFile(filename);
55 |     for(size_t i=0; i < num_; i++){
56 |         EXPECT_EQ(data_[i], column->GetTuple(i));
57 |     }
58 |     delete column;
59 | }
60 | 
61 | TEST_F(ColumnTest, NaiveSetTuple){
62 |     Column* column = new Column(ColumnType::kNaive, bit_width_, num_);
63 |     for(size_t i=0; i < num_; i++){
64 |         column->SetTuple(i, data_[i]);
65 |     }
66 |     for(size_t i=0; i < num_; i++){
67 |         EXPECT_EQ(data_[i], column->GetTuple(i));
68 |     }
69 |     delete column;
70 | }
71 | 
72 | 
73 | TEST_F(ColumnTest, NaiveBulkLoadAndScanLiteral){
74 |     WordUnit literal = std::rand() & mask_;
75 |     Column* column = new Column(ColumnType::kNaive, bit_width_, num_);
76 |     BitVector* bitvector = new BitVector(column);
77 | 
78 |     column->BulkLoadArray(data_, num_);
79 |     column->Scan(Comparator::kLess, literal, bitvector, Bitwise::kSet);
80 |     size_t bvcount = bitvector->CountOnes();
81 |     size_t count = 0;
82 |     for(size_t i=0; i < num_; i++){
83 |         EXPECT_EQ(data_[i], column->GetTuple(i));
84 |         count += (data_[i] < literal);
85 |         EXPECT_EQ((data_[i] < literal), bitvector->GetBit(i));
86 |     }
87 |     EXPECT_EQ(count, bvcount);
88 |     delete bitvector;
89 |     delete column;
90 | }
91 | 
92 | 
93 | 
94 | }   // namespace
95 | 


--------------------------------------------------------------------------------