├── .gitignore ├── CMakeLists.txt ├── LICENSE ├── README.md ├── bit_vector.hpp ├── bp_vector.cpp ├── bp_vector.hpp ├── broadword.hpp ├── cartesian_tree.hpp ├── darray.hpp ├── darray64.hpp ├── elias_fano.hpp ├── elias_fano_compressed_list.hpp ├── elias_fano_list.hpp ├── forward_enumerator.hpp ├── gamma_bit_vector.hpp ├── gamma_vector.hpp ├── intrinsics.hpp ├── mappable_vector.hpp ├── mapper.hpp ├── nibble_vector.hpp ├── perftest ├── .gitignore ├── CMakeLists.txt ├── perftest_bp_vector.cpp ├── perftest_bp_vector_rmq.cpp ├── perftest_cartesian_tree.cpp ├── perftest_common.hpp └── perftest_elias_fano.cpp ├── rs_bit_vector.cpp ├── rs_bit_vector.hpp ├── succinct_config.hpp.in ├── tables.hpp ├── test_bit_vector.cpp ├── test_bp_vector.cpp ├── test_bp_vector_common.hpp ├── test_bp_vector_rmq.cpp ├── test_cartesian_tree.cpp ├── test_common.hpp ├── test_darray.cpp ├── test_elias_fano.cpp ├── test_elias_fano_compressed_list.cpp ├── test_gamma_bit_vector.cpp ├── test_gamma_vector.cpp ├── test_mapper.cpp ├── test_rank_select_common.hpp ├── test_rs_bit_vector.cpp ├── test_topk_vector.cpp ├── topk_vector.hpp ├── util.hpp └── vbyte.hpp /.gitignore: -------------------------------------------------------------------------------- 1 | *~ 2 | libsuccinct.* 3 | 4 | test_bit_vector 5 | test_bp_vector 6 | test_bp_vector_rmq 7 | test_elias_fano 8 | test_mapper 9 | test_rs_bit_vector 10 | test_darray 11 | test_elias_fano_compressed_list 12 | test_gamma_bit_vector 13 | test_gamma_vector 14 | test_cartesian_tree 15 | test_topk_vector 16 | 17 | 18 | # cmake 19 | Makefile 20 | CMakeCache.txt 21 | *.cmake 22 | Testing/ 23 | CMakeFiles/ 24 | succinct_config.hpp 25 | 26 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 2.6) 2 | project(SUCCINCT) 3 | 4 | option(SUCCINCT_USE_LIBCXX 5 | "Use libc++ with Clang instead of libstdc++ (must be same as that used to compile Boost)" 6 | OFF) 7 | option(SUCCINCT_USE_INTRINSICS 8 | "Use a set of intrinsics available on all x86-64 architectures" 9 | ON) 10 | option(SUCCINCT_USE_POPCNT 11 | "Use popcount intrinsic. Available on x86-64 since SSE4.2." 12 | OFF) 13 | 14 | configure_file( 15 | ${SUCCINCT_SOURCE_DIR}/succinct_config.hpp.in 16 | ${SUCCINCT_SOURCE_DIR}/succinct_config.hpp) 17 | 18 | if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang") 19 | if (SUCCINCT_USE_LIBCXX) 20 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -stdlib=libc++") 21 | endif () 22 | endif () 23 | 24 | if (SUCCINCT_USE_POPCNT) 25 | if (UNIX) 26 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse4.2") 27 | endif () 28 | # XXX(ot): what to do for MSVC? 29 | endif () 30 | 31 | 32 | # XXX(ot): enable this on all compilers 33 | if (UNIX) 34 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra -Wno-missing-braces") 35 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wconversion") 36 | endif () 37 | 38 | 39 | find_package(Boost 1.42.0 COMPONENTS 40 | unit_test_framework iostreams system filesystem REQUIRED) 41 | include_directories(${Boost_INCLUDE_DIRS}) 42 | link_directories (${Boost_LIBRARY_DIRS}) 43 | 44 | include_directories(${PROJECT_SOURCE_DIR}) 45 | 46 | set(SUCCINCT_SOURCES 47 | rs_bit_vector.cpp 48 | bp_vector.cpp 49 | ) 50 | 51 | add_library(succinct STATIC ${SUCCINCT_SOURCES}) 52 | 53 | add_subdirectory(perftest) 54 | 55 | # make and run tests only if library is compiled stand-alone 56 | if (CMAKE_PROJECT_NAME STREQUAL PROJECT_NAME) 57 | enable_testing() 58 | file(GLOB SUCCINCT_TEST_SOURCES test_*.cpp) 59 | foreach(TEST_SRC ${SUCCINCT_TEST_SOURCES}) 60 | get_filename_component (TEST_SRC_NAME ${TEST_SRC} NAME_WE) 61 | add_executable(${TEST_SRC_NAME} ${TEST_SRC}) 62 | target_link_libraries(${TEST_SRC_NAME} 63 | succinct 64 | ${Boost_LIBRARIES} 65 | ) 66 | add_test(${TEST_SRC_NAME} ${TEST_SRC_NAME}) 67 | endforeach(TEST_SRC) 68 | endif () 69 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright 2011 Giuseppe Ottaviano 2 | 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | succinct 2 | ======== 3 | 4 | This library contains the implementation of some succinct data 5 | structures. It is rather undocumented now, but better documentation is 6 | under way. On the other hand, the code is quite extensively 7 | unit-tested. 8 | 9 | The library is meant to be imported as a git submodule in other 10 | projects and then included as a CMake subdirectory. See the unit 11 | tests, and the [semi_index](https://github.com/ot/semi_index) and 12 | [path_decomposed_tries](https://github.com/ot/path_decomposed_tries) 13 | projects for examples. 14 | 15 | How to build the code 16 | --------------------- 17 | 18 | ### Dependencies ### 19 | 20 | The following dependencies have to be installed to compile the library. 21 | 22 | * CMake >= 2.6, for the build system 23 | * Boost >= 1.42 24 | 25 | ### Supported systems ### 26 | 27 | The library is developed and tested mainly on Linux and Mac OS X, and 28 | it has been tested also on Windows 7. 29 | 30 | The code is designed for 64-bit architectures. It has been tested on 31 | 32-bit Linux as well, but it is significantly slower. To compile the 32 | library on 32-bit architectures it is necessary to disable intrinsics 33 | support, passing -DSUCCINCT_USE_INTRINSICS=OFF to cmake. 34 | 35 | ### Building on Unix ### 36 | 37 | The project uses CMake. To build it on Unix systems it should be 38 | sufficient to do the following: 39 | 40 | $ cmake . 41 | $ make 42 | 43 | It is also advised to perform a `make test`, which runs the unit 44 | tests. 45 | 46 | ### Builing on Mac OS X ### 47 | 48 | Same instructions for Unix apply, with one exception: the library must 49 | be compiled with the same standard library used to compile Boost. So, 50 | if libc++ was used with Clang, the following command must be used: 51 | 52 | $ cmake . -DSUCCINCT_USE_LIBCXX=ON 53 | 54 | 55 | ### Building on Windows ### 56 | 57 | On Windows, Boost and zlib are not installed in default locations, so 58 | it is necessary to set some environment variables to allow the build 59 | system to find them. 60 | 61 | * For Boost `BOOST_ROOT` must be set to the directory which contains 62 | the `boost` include directory. 63 | * The directories that contain the Boost must be added to `PATH` so 64 | that the executables find them 65 | 66 | Once the env variables are set, the quickest way to build the code is 67 | by using NMake (instead of the default Visual Studio). Run the 68 | following commands in a Visual Studio x64 Command Prompt: 69 | 70 | $ cmake -G "NMake Makefiles" . 71 | $ nmake 72 | $ nmake test 73 | -------------------------------------------------------------------------------- /bit_vector.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | #include 6 | 7 | #include "mappable_vector.hpp" 8 | #include "broadword.hpp" 9 | #include "util.hpp" 10 | 11 | namespace succinct { 12 | 13 | namespace detail { 14 | inline size_t words_for(uint64_t n) 15 | { 16 | return util::ceil_div(n, 64); 17 | } 18 | } 19 | 20 | class bit_vector; 21 | 22 | class bit_vector_builder : boost::noncopyable { 23 | public: 24 | 25 | typedef std::vector bits_type; 26 | 27 | bit_vector_builder(uint64_t size = 0, bool init = 0) 28 | : m_size(size) 29 | { 30 | m_bits.resize(detail::words_for(size), uint64_t(-init)); 31 | if (size) { 32 | m_cur_word = &m_bits.back(); 33 | // clear padding bits 34 | if (init && size % 64) { 35 | *m_cur_word >>= 64 - (size % 64); 36 | } 37 | } 38 | } 39 | 40 | void reserve(uint64_t size) { 41 | m_bits.reserve(detail::words_for(size)); 42 | } 43 | 44 | inline void push_back(bool b) { 45 | uint64_t pos_in_word = m_size % 64; 46 | if (pos_in_word == 0) { 47 | m_bits.push_back(0); 48 | m_cur_word = &m_bits.back(); 49 | } 50 | *m_cur_word |= (uint64_t)b << pos_in_word; 51 | ++m_size; 52 | } 53 | 54 | inline void set(uint64_t pos, bool b) { 55 | uint64_t word = pos / 64; 56 | uint64_t pos_in_word = pos % 64; 57 | 58 | m_bits[word] &= ~(uint64_t(1) << pos_in_word); 59 | m_bits[word] |= uint64_t(b) << pos_in_word; 60 | } 61 | 62 | inline void set_bits(uint64_t pos, uint64_t bits, size_t len) 63 | { 64 | assert(pos + len <= size()); 65 | // check there are no spurious bits 66 | assert(len == 64 || (bits >> len) == 0); 67 | if (!len) return; 68 | uint64_t mask = (len == 64) ? uint64_t(-1) : ((uint64_t(1) << len) - 1); 69 | uint64_t word = pos / 64; 70 | uint64_t pos_in_word = pos % 64; 71 | 72 | m_bits[word] &= ~(mask << pos_in_word); 73 | m_bits[word] |= bits << pos_in_word; 74 | 75 | uint64_t stored = 64 - pos_in_word; 76 | if (stored < len) { 77 | m_bits[word + 1] &= ~(mask >> stored); 78 | m_bits[word + 1] |= bits >> stored; 79 | } 80 | } 81 | 82 | inline void append_bits(uint64_t bits, size_t len) 83 | { 84 | // check there are no spurious bits 85 | assert(len == 64 || (bits >> len) == 0); 86 | if (!len) return; 87 | uint64_t pos_in_word = m_size % 64; 88 | m_size += len; 89 | if (pos_in_word == 0) { 90 | m_bits.push_back(bits); 91 | } else { 92 | *m_cur_word |= bits << pos_in_word; 93 | if (len > 64 - pos_in_word) { 94 | m_bits.push_back(bits >> (64 - pos_in_word)); 95 | } 96 | } 97 | m_cur_word = &m_bits.back(); 98 | } 99 | 100 | inline void zero_extend(uint64_t n) { 101 | m_size += n; 102 | uint64_t needed = detail::words_for(m_size) - m_bits.size(); 103 | if (needed) { 104 | m_bits.insert(m_bits.end(), needed, 0); 105 | m_cur_word = &m_bits.back(); 106 | } 107 | } 108 | 109 | inline void one_extend(uint64_t n) 110 | { 111 | while (n >= 64) { 112 | append_bits(uint64_t(-1), 64); 113 | n -= 64; 114 | } 115 | if (n) { 116 | append_bits(uint64_t(-1) >> (64 - n), n); 117 | } 118 | } 119 | 120 | void append(bit_vector_builder const& rhs) 121 | { 122 | if (!rhs.size()) return; 123 | 124 | uint64_t pos = m_bits.size(); 125 | uint64_t shift = size() % 64; 126 | m_size = size() + rhs.size(); 127 | m_bits.resize(detail::words_for(m_size)); 128 | 129 | if (shift == 0) { // word-aligned, easy case 130 | std::copy(rhs.m_bits.begin(), rhs.m_bits.end(), 131 | m_bits.begin() + ptrdiff_t(pos)); 132 | } else { 133 | uint64_t* cur_word = &m_bits.front() + pos - 1; 134 | for (size_t i = 0; i < rhs.m_bits.size() - 1; ++i) { 135 | uint64_t w = rhs.m_bits[i]; 136 | *cur_word |= w << shift; 137 | *++cur_word = w >> (64 - shift); 138 | } 139 | *cur_word |= rhs.m_bits.back() << shift; 140 | if (cur_word < &m_bits.back()) { 141 | *++cur_word = rhs.m_bits.back() >> (64 - shift); 142 | } 143 | } 144 | m_cur_word = &m_bits.back(); 145 | } 146 | 147 | // reverse in place 148 | void reverse() 149 | { 150 | uint64_t shift = 64 - (size() % 64); 151 | 152 | uint64_t remainder = 0; 153 | for (size_t i = 0; i < m_bits.size(); ++i) { 154 | uint64_t cur_word; 155 | if (shift != 64) { // this should be hoisted out 156 | cur_word = remainder | (m_bits[i] << shift); 157 | remainder = m_bits[i] >> (64 - shift); 158 | } else { 159 | cur_word = m_bits[i]; 160 | } 161 | m_bits[i] = broadword::reverse_bits(cur_word); 162 | } 163 | assert(remainder == 0); 164 | std::reverse(m_bits.begin(), m_bits.end()); 165 | } 166 | 167 | bits_type& move_bits() { 168 | assert(detail::words_for(m_size) == m_bits.size()); 169 | return m_bits; 170 | } 171 | 172 | uint64_t size() const { 173 | return m_size; 174 | } 175 | 176 | void swap(bit_vector_builder& other) 177 | { 178 | m_bits.swap(other.m_bits); 179 | std::swap(m_size, other.m_size); 180 | std::swap(m_cur_word, other.m_cur_word); 181 | } 182 | 183 | private: 184 | bits_type m_bits; 185 | uint64_t m_size; 186 | uint64_t* m_cur_word; 187 | }; 188 | 189 | class bit_vector { 190 | public: 191 | bit_vector() 192 | : m_size(0) 193 | {} 194 | 195 | template 196 | bit_vector(Range const& from) { 197 | std::vector bits; 198 | const uint64_t first_mask = uint64_t(1); 199 | uint64_t mask = first_mask; 200 | uint64_t cur_val = 0; 201 | m_size = 0; 202 | for (typename boost::range_const_iterator::type iter = boost::begin(from); 203 | iter != boost::end(from); 204 | ++iter) { 205 | if (*iter) { 206 | cur_val |= mask; 207 | } 208 | mask <<= 1; 209 | m_size += 1; 210 | if (!mask) { 211 | bits.push_back(cur_val); 212 | mask = first_mask; 213 | cur_val = 0; 214 | } 215 | } 216 | if (mask != first_mask) { 217 | bits.push_back(cur_val); 218 | } 219 | m_bits.steal(bits); 220 | } 221 | 222 | bit_vector(bit_vector_builder* from) { 223 | m_size = from->size(); 224 | m_bits.steal(from->move_bits()); 225 | } 226 | 227 | template 228 | void map(Visitor& visit) { 229 | visit 230 | (m_size, "m_size") 231 | (m_bits, "m_bits"); 232 | } 233 | 234 | void swap(bit_vector& other) { 235 | std::swap(other.m_size, m_size); 236 | other.m_bits.swap(m_bits); 237 | } 238 | 239 | inline size_t size() const { 240 | return m_size; 241 | } 242 | 243 | inline bool operator[](uint64_t pos) const { 244 | assert(pos < m_size); 245 | uint64_t block = pos / 64; 246 | assert(block < m_bits.size()); 247 | uint64_t shift = pos % 64; 248 | return (m_bits[block] >> shift) & 1; 249 | } 250 | 251 | inline uint64_t get_bits(uint64_t pos, uint64_t len) const { 252 | assert(pos + len <= size()); 253 | if (!len) { 254 | return 0; 255 | } 256 | uint64_t block = pos / 64; 257 | uint64_t shift = pos % 64; 258 | uint64_t mask = -(len == 64) | ((1ULL << len) - 1); 259 | if (shift + len <= 64) { 260 | return m_bits[block] >> shift & mask; 261 | } else { 262 | return (m_bits[block] >> shift) | (m_bits[block + 1] << (64 - shift) & mask); 263 | } 264 | } 265 | 266 | // same as get_bits(pos, 64) but it can extend further size(), padding with zeros 267 | inline uint64_t get_word(uint64_t pos) const 268 | { 269 | assert(pos < size()); 270 | uint64_t block = pos / 64; 271 | uint64_t shift = pos % 64; 272 | uint64_t word = m_bits[block] >> shift; 273 | if (shift && block + 1 < m_bits.size()) { 274 | word |= m_bits[block + 1] << (64 - shift); 275 | } 276 | return word; 277 | } 278 | 279 | // unsafe and fast version of get_word, it retrieves at least 56 bits 280 | inline uint64_t get_word56(uint64_t pos) const 281 | { 282 | // XXX check endianness? 283 | const char* ptr = reinterpret_cast(m_bits.data()); 284 | return *(reinterpret_cast(ptr + pos / 8)) >> (pos % 8); 285 | } 286 | 287 | inline uint64_t predecessor0(uint64_t pos) const { 288 | assert(pos < m_size); 289 | uint64_t block = pos / 64; 290 | uint64_t shift = 64 - pos % 64 - 1; 291 | uint64_t word = ~m_bits[block]; 292 | word = (word << shift) >> shift; 293 | 294 | unsigned long ret; 295 | while (!broadword::msb(word, ret)) { 296 | assert(block); 297 | word = ~m_bits[--block]; 298 | }; 299 | return block * 64 + ret; 300 | } 301 | 302 | inline uint64_t successor0(uint64_t pos) const { 303 | assert(pos < m_size); 304 | uint64_t block = pos / 64; 305 | uint64_t shift = pos % 64; 306 | uint64_t word = (~m_bits[block] >> shift) << shift; 307 | 308 | unsigned long ret; 309 | while (!broadword::lsb(word, ret)) { 310 | ++block; 311 | assert(block < m_bits.size()); 312 | word = ~m_bits[block]; 313 | }; 314 | return block * 64 + ret; 315 | } 316 | 317 | inline uint64_t predecessor1(uint64_t pos) const { 318 | assert(pos < m_size); 319 | uint64_t block = pos / 64; 320 | uint64_t shift = 64 - pos % 64 - 1; 321 | uint64_t word = m_bits[block]; 322 | word = (word << shift) >> shift; 323 | 324 | unsigned long ret; 325 | while (!broadword::msb(word, ret)) { 326 | assert(block); 327 | word = m_bits[--block]; 328 | }; 329 | return block * 64 + ret; 330 | } 331 | 332 | inline uint64_t successor1(uint64_t pos) const { 333 | assert(pos < m_size); 334 | uint64_t block = pos / 64; 335 | uint64_t shift = pos % 64; 336 | uint64_t word = (m_bits[block] >> shift) << shift; 337 | 338 | unsigned long ret; 339 | while (!broadword::lsb(word, ret)) { 340 | ++block; 341 | assert(block < m_bits.size()); 342 | word = m_bits[block]; 343 | }; 344 | return block * 64 + ret; 345 | } 346 | 347 | mapper::mappable_vector const& data() const 348 | { 349 | return m_bits; 350 | } 351 | 352 | struct enumerator { 353 | enumerator() 354 | : m_bv(0) 355 | , m_pos(uint64_t(-1)) 356 | {} 357 | 358 | enumerator(bit_vector const& bv, size_t pos) 359 | : m_bv(&bv) 360 | , m_pos(pos) 361 | , m_buf(0) 362 | , m_avail(0) 363 | { 364 | m_bv->data().prefetch(m_pos / 64); 365 | } 366 | 367 | inline bool next() 368 | { 369 | if (!m_avail) fill_buf(); 370 | bool b = m_buf & 1; 371 | m_buf >>= 1; 372 | m_avail -= 1; 373 | m_pos += 1; 374 | return b; 375 | } 376 | 377 | inline uint64_t take(size_t l) 378 | { 379 | if (m_avail < l) fill_buf(); 380 | uint64_t val; 381 | if (l != 64) { 382 | val = m_buf & ((uint64_t(1) << l) - 1); 383 | m_buf >>= l; 384 | } else { 385 | val = m_buf; 386 | } 387 | m_avail -= l; 388 | m_pos += l; 389 | return val; 390 | } 391 | 392 | inline uint64_t skip_zeros() 393 | { 394 | uint64_t zs = 0; 395 | // XXX the loop may be optimized by aligning access 396 | while (!m_buf) { 397 | m_pos += m_avail; 398 | zs += m_avail; 399 | m_avail = 0; 400 | fill_buf(); 401 | } 402 | 403 | uint64_t l = broadword::lsb(m_buf); 404 | m_buf >>= l; 405 | m_buf >>= 1; 406 | m_avail -= l + 1; 407 | m_pos += l + 1; 408 | return zs + l; 409 | } 410 | 411 | inline uint64_t position() const 412 | { 413 | return m_pos; 414 | } 415 | 416 | private: 417 | 418 | inline void fill_buf() 419 | { 420 | m_buf = m_bv->get_word(m_pos); 421 | m_avail = 64; 422 | } 423 | 424 | bit_vector const* m_bv; 425 | size_t m_pos; 426 | uint64_t m_buf; 427 | size_t m_avail; 428 | }; 429 | 430 | struct unary_enumerator { 431 | unary_enumerator() 432 | : m_data(0) 433 | , m_position(0) 434 | , m_buf(0) 435 | {} 436 | 437 | unary_enumerator(bit_vector const& bv, uint64_t pos) 438 | { 439 | m_data = bv.data().data(); 440 | m_position = pos; 441 | m_buf = m_data[pos / 64]; 442 | // clear low bits 443 | m_buf &= uint64_t(-1) << (pos % 64); 444 | } 445 | 446 | uint64_t position() const 447 | { 448 | return m_position; 449 | } 450 | 451 | uint64_t next() 452 | { 453 | unsigned long pos_in_word; 454 | uint64_t buf = m_buf; 455 | while (!broadword::lsb(buf, pos_in_word)) { 456 | m_position += 64; 457 | buf = m_data[m_position / 64]; 458 | } 459 | 460 | m_buf = buf & (buf - 1); // clear LSB 461 | m_position = (m_position & ~uint64_t(63)) + pos_in_word; 462 | return m_position; 463 | } 464 | 465 | // skip to the k-th one after the current position 466 | void skip(uint64_t k) 467 | { 468 | uint64_t skipped = 0; 469 | uint64_t buf = m_buf; 470 | uint64_t w = 0; 471 | while (skipped + (w = broadword::popcount(buf)) <= k) { 472 | skipped += w; 473 | m_position += 64; 474 | buf = m_data[m_position / 64]; 475 | } 476 | assert(buf); 477 | uint64_t pos_in_word = broadword::select_in_word(buf, k - skipped); 478 | m_buf = buf & (uint64_t(-1) << pos_in_word); 479 | m_position = (m_position & ~uint64_t(63)) + pos_in_word; 480 | } 481 | 482 | // return the position of the k-th one after the current position. 483 | uint64_t skip_no_move(uint64_t k) 484 | { 485 | uint64_t position = m_position; 486 | uint64_t skipped = 0; 487 | uint64_t buf = m_buf; 488 | uint64_t w = 0; 489 | while (skipped + (w = broadword::popcount(buf)) <= k) { 490 | skipped += w; 491 | position += 64; 492 | buf = m_data[position / 64]; 493 | } 494 | assert(buf); 495 | uint64_t pos_in_word = broadword::select_in_word(buf, k - skipped); 496 | position = (position & ~uint64_t(63)) + pos_in_word; 497 | return position; 498 | } 499 | 500 | // skip to the k-th zero after the current position 501 | void skip0(uint64_t k) 502 | { 503 | uint64_t skipped = 0; 504 | uint64_t pos_in_word = m_position % 64; 505 | uint64_t buf = ~m_buf & (uint64_t(-1) << pos_in_word); 506 | uint64_t w = 0; 507 | while (skipped + (w = broadword::popcount(buf)) <= k) { 508 | skipped += w; 509 | m_position += 64; 510 | buf = ~m_data[m_position / 64]; 511 | } 512 | assert(buf); 513 | pos_in_word = broadword::select_in_word(buf, k - skipped); 514 | m_buf = ~buf & (uint64_t(-1) << pos_in_word); 515 | m_position = (m_position & ~uint64_t(63)) + pos_in_word; 516 | } 517 | 518 | private: 519 | uint64_t const* m_data; 520 | uint64_t m_position; 521 | uint64_t m_buf; 522 | }; 523 | 524 | protected: 525 | size_t m_size; 526 | mapper::mappable_vector m_bits; 527 | }; 528 | 529 | } 530 | -------------------------------------------------------------------------------- /bp_vector.cpp: -------------------------------------------------------------------------------- 1 | #include "bp_vector.hpp" 2 | #include "util.hpp" 3 | 4 | namespace succinct { 5 | 6 | namespace { 7 | 8 | // XXX(ot): remove useless tables 9 | 10 | class excess_tables 11 | { 12 | public: 13 | excess_tables() { 14 | for (int c = 0; c < 256; ++c) { 15 | for (uint8_t i = 0; i < 9; ++i) { 16 | m_fwd_pos[c][i] = 0; 17 | m_bwd_pos[c][i] = 0; 18 | } 19 | // populate m_fwd_pos, m_fwd_min, and m_fwd_min_idx 20 | int excess = 0; 21 | m_fwd_min[c] = 0; 22 | m_fwd_min_idx[c] = 0; 23 | 24 | for (char i = 0; i < 8; ++i) { 25 | if ((c >> i) & 1) { // opening 26 | ++excess; 27 | } else { // closing 28 | --excess; 29 | if (excess < 0 && 30 | m_fwd_pos[c][-excess] == 0) { // not already found 31 | m_fwd_pos[c][-excess] = uint8_t(i + 1); 32 | } 33 | } 34 | 35 | if (-excess > m_fwd_min[c]) { 36 | m_fwd_min[c] = uint8_t(-excess); 37 | m_fwd_min_idx[c] = uint8_t(i + 1); 38 | } 39 | } 40 | m_fwd_exc[c] = (char)excess; 41 | 42 | // populate m_bwd_pos and m_bwd_min 43 | excess = 0; 44 | m_bwd_min[c] = 0; 45 | 46 | for (uint8_t i = 0; i < 8; ++i) { 47 | if ((c << i) & 128) { // opening 48 | ++excess; 49 | if (excess > 0 && 50 | m_bwd_pos[c][(uint8_t)excess] == 0) { // not already found 51 | m_bwd_pos[c][(uint8_t)excess] = uint8_t(i + 1); 52 | } 53 | } else { // closing 54 | --excess; 55 | } 56 | 57 | m_bwd_min[c] = uint8_t(std::max(excess, (int)m_bwd_min[c])); 58 | } 59 | } 60 | } 61 | 62 | char m_fwd_exc[256]; 63 | 64 | uint8_t m_fwd_pos[256][9]; 65 | uint8_t m_bwd_pos[256][9]; 66 | 67 | uint8_t m_bwd_min[256]; 68 | uint8_t m_fwd_min[256]; 69 | 70 | uint8_t m_fwd_min_idx[256]; 71 | }; 72 | 73 | const static excess_tables tables; 74 | 75 | inline bool find_close_in_word(uint64_t word, uint64_t byte_counts, bp_vector::excess_t cur_exc, uint64_t& ret) 76 | { 77 | assert(cur_exc > 0 && cur_exc <= 64); 78 | const uint64_t cum_exc_step_8 = (uint64_t(cur_exc) + ((2 * byte_counts - 8 * broadword::ones_step_8) << 8)) * broadword::ones_step_8; 79 | 80 | uint64_t min_exc_step_8 = 0; 81 | for (size_t i = 0; i < 8; ++i) { 82 | size_t shift = i * 8; 83 | min_exc_step_8 |= ((uint64_t)(tables.m_fwd_min[(word >> shift) & 0xFF])) << shift; 84 | } 85 | 86 | const uint64_t has_result = broadword::leq_step_8(cum_exc_step_8, min_exc_step_8); 87 | 88 | unsigned long shift; 89 | if (broadword::lsb(has_result, shift)) { 90 | uint8_t bit_pos = tables.m_fwd_pos[(word >> shift) & 0xFF][(cum_exc_step_8 >> shift) & 0xFF]; 91 | assert(bit_pos > 0); 92 | ret = shift + bit_pos - 1; 93 | return true; 94 | } 95 | return false; 96 | } 97 | 98 | inline bool find_open_in_word(uint64_t word, uint64_t byte_counts, bp_vector::excess_t cur_exc, uint64_t& ret) { 99 | assert(cur_exc > 0 && cur_exc <= 64); 100 | const uint64_t rev_byte_counts = broadword::reverse_bytes(byte_counts); 101 | const uint64_t cum_exc_step_8 = (uint64_t(cur_exc) - ((2 * rev_byte_counts - 8 * broadword::ones_step_8) << 8)) * broadword::ones_step_8; 102 | 103 | uint64_t max_exc_step_8 = 0; 104 | for (size_t i = 0; i < 8; ++i) { 105 | size_t shift = i * 8; 106 | max_exc_step_8 |= ((uint64_t)(tables.m_bwd_min[(word >> (64 - shift - 8)) & 0xFF])) << shift; 107 | } 108 | 109 | const uint64_t has_result = broadword::leq_step_8(cum_exc_step_8, max_exc_step_8); 110 | 111 | unsigned long shift; 112 | if (broadword::lsb(has_result, shift)) { 113 | uint8_t bit_pos = tables.m_bwd_pos[(word >> (64 - shift - 8)) & 0xFF][(cum_exc_step_8 >> shift) & 0xFF]; 114 | assert(bit_pos > 0); 115 | ret = 64 - (shift + bit_pos); 116 | return true; 117 | } 118 | return false; 119 | } 120 | 121 | inline void 122 | excess_rmq_in_word(uint64_t word, bp_vector::excess_t& exc, uint64_t word_start, 123 | bp_vector::excess_t& min_exc, uint64_t& min_exc_idx) 124 | { 125 | bp_vector::excess_t min_byte_exc = min_exc; 126 | uint64_t min_byte_idx = 0; 127 | 128 | for (size_t i = 0; i < 8; ++i) { 129 | size_t shift = i * 8; 130 | size_t byte = (word >> shift) & 0xFF; 131 | // m_fwd_min is negated 132 | bp_vector::excess_t cur_min = exc - tables.m_fwd_min[byte]; 133 | 134 | min_byte_idx = (cur_min < min_byte_exc) ? i : min_byte_idx; 135 | min_byte_exc = (cur_min < min_byte_exc) ? cur_min : min_byte_exc; 136 | 137 | exc += tables.m_fwd_exc[byte]; 138 | } 139 | 140 | if (min_byte_exc < min_exc) { 141 | min_exc = min_byte_exc; 142 | uint64_t shift = min_byte_idx * 8; 143 | min_exc_idx = word_start + shift + tables.m_fwd_min_idx[(word >> shift) & 0xFF]; 144 | } 145 | } 146 | } 147 | 148 | inline bool bp_vector::find_close_in_block(uint64_t block_offset, bp_vector::excess_t excess, uint64_t start, uint64_t& ret) const { 149 | if (excess > excess_t((bp_block_size - start) * 64)) { 150 | return false; 151 | } 152 | assert(excess > 0); 153 | for (uint64_t sub_block_offset = start; sub_block_offset < bp_block_size; ++sub_block_offset) { 154 | uint64_t sub_block = block_offset + sub_block_offset; 155 | uint64_t word = m_bits[sub_block]; 156 | uint64_t byte_counts = broadword::byte_counts(word); 157 | assert(excess > 0); 158 | if (excess <= 64) { 159 | if (find_close_in_word(word, byte_counts, excess, ret)) { 160 | ret += sub_block * 64; 161 | return true; 162 | } 163 | } 164 | excess += static_cast(2 * broadword::bytes_sum(byte_counts) - 64); 165 | } 166 | return false; 167 | } 168 | 169 | uint64_t bp_vector::find_close(uint64_t pos) const 170 | { 171 | assert((*this)[pos]); // check there is an opening parenthesis in pos 172 | uint64_t ret = -1U; 173 | // Search in current word 174 | uint64_t word_pos = (pos + 1) / 64; 175 | uint64_t shift = (pos + 1) % 64; 176 | uint64_t shifted_word = m_bits[word_pos] >> shift; 177 | // Pad with "open" 178 | uint64_t padded_word = shifted_word | (-!!shift & (~0ULL << (64 - shift))); 179 | uint64_t byte_counts = broadword::byte_counts(padded_word); 180 | 181 | excess_t word_exc = 1; 182 | if (find_close_in_word(padded_word, byte_counts, word_exc, ret)) { 183 | ret += pos + 1; 184 | return ret; 185 | } 186 | 187 | // Otherwise search in the local block 188 | uint64_t block = word_pos / bp_block_size; 189 | uint64_t block_offset = block * bp_block_size; 190 | uint64_t sub_block = word_pos % bp_block_size; 191 | uint64_t local_rank = broadword::bytes_sum(byte_counts) - shift; // subtract back the padding 192 | excess_t local_excess = static_cast((2 * local_rank) - (64 - shift)); 193 | if (find_close_in_block(block_offset, local_excess + 1, sub_block + 1, ret)) { 194 | return ret; 195 | } 196 | 197 | // Otherwise, find the first appropriate block 198 | excess_t pos_excess = excess(pos); 199 | uint64_t found_block = search_min_tree<1>(block + 1, pos_excess); 200 | uint64_t found_block_offset = found_block * bp_block_size; 201 | excess_t found_block_excess = get_block_excess(found_block); 202 | 203 | // Search in the found block 204 | bool found = find_close_in_block(found_block_offset, found_block_excess - pos_excess, 0, ret); 205 | assert(found); (void)found; 206 | return ret; 207 | } 208 | 209 | inline bool bp_vector::find_open_in_block(uint64_t block_offset, bp_vector::excess_t excess, uint64_t start, uint64_t& ret) const { 210 | if (excess > excess_t(start * 64)) { 211 | return false; 212 | } 213 | assert(excess >= 0); 214 | 215 | for (uint64_t sub_block_offset = start - 1; sub_block_offset + 1 > 0; --sub_block_offset) { 216 | assert(excess > 0); 217 | uint64_t sub_block = block_offset + sub_block_offset; 218 | uint64_t word = m_bits[sub_block]; 219 | uint64_t byte_counts = broadword::byte_counts(word); 220 | if (excess <= 64) { 221 | if (find_open_in_word(word, byte_counts, excess, ret)) { 222 | ret += sub_block * 64; 223 | return true; 224 | } 225 | } 226 | excess -= static_cast(2 * broadword::bytes_sum(byte_counts) - 64); 227 | } 228 | return false; 229 | } 230 | 231 | uint64_t bp_vector::find_open(uint64_t pos) const 232 | { 233 | assert(pos); 234 | uint64_t ret = -1U; 235 | // Search in current word 236 | uint64_t word_pos = (pos / 64); 237 | uint64_t len = pos % 64; 238 | // Rest is padded with "close" 239 | uint64_t shifted_word = -!!len & (m_bits[word_pos] << (64 - len)); 240 | uint64_t byte_counts = broadword::byte_counts(shifted_word); 241 | 242 | excess_t word_exc = 1; 243 | if (find_open_in_word(shifted_word, byte_counts, word_exc, ret)) { 244 | ret += pos - 64; 245 | return ret; 246 | } 247 | 248 | // Otherwise search in the local block 249 | uint64_t block = word_pos / bp_block_size; 250 | uint64_t block_offset = block * bp_block_size; 251 | uint64_t sub_block = word_pos % bp_block_size; 252 | uint64_t local_rank = broadword::bytes_sum(byte_counts); // no need to subtract the padding 253 | excess_t local_excess = -static_cast((2 * local_rank) - len); 254 | if (find_open_in_block(block_offset, local_excess + 1, sub_block, ret)) { 255 | return ret; 256 | } 257 | 258 | // Otherwise, find the first appropriate block 259 | excess_t pos_excess = excess(pos) - 1; 260 | uint64_t found_block = search_min_tree<0>(block - 1, pos_excess); 261 | uint64_t found_block_offset = found_block * bp_block_size; 262 | // Since search is backwards, have to add the current block 263 | excess_t found_block_excess = get_block_excess(found_block + 1); 264 | 265 | // Search in the found block 266 | bool found = find_open_in_block(found_block_offset, found_block_excess - pos_excess, bp_block_size, ret); 267 | assert(found); (void)found; 268 | return ret; 269 | } 270 | 271 | template 272 | inline bool bp_vector::search_block_in_superblock(uint64_t block, excess_t excess, size_t& found_block) const 273 | { 274 | size_t superblock = block / superblock_size; 275 | excess_t superblock_excess = get_block_excess(superblock * superblock_size); 276 | if (direction) { 277 | for (size_t cur_block = block; 278 | cur_block < std::min((superblock + 1) * superblock_size, (size_t)m_block_excess_min.size()); 279 | ++cur_block) { 280 | if (excess >= superblock_excess + m_block_excess_min[cur_block]) { 281 | found_block = cur_block; 282 | return true; 283 | } 284 | } 285 | } else { 286 | for (size_t cur_block = block; 287 | cur_block + 1 >= (superblock * superblock_size) + 1; 288 | --cur_block) { 289 | if (excess >= superblock_excess + m_block_excess_min[cur_block]) { 290 | found_block = cur_block; 291 | return true; 292 | } 293 | } 294 | } 295 | 296 | return false; 297 | } 298 | 299 | inline bp_vector::excess_t bp_vector::get_block_excess(uint64_t block) const { 300 | uint64_t sub_block_idx = block * bp_block_size; 301 | uint64_t block_pos = sub_block_idx * 64; 302 | excess_t excess = static_cast(2 * sub_block_rank(sub_block_idx) - block_pos); 303 | assert(excess >= 0); 304 | return excess; 305 | } 306 | 307 | inline bool bp_vector::in_node_range(uint64_t node, excess_t excess) const { 308 | assert(m_superblock_excess_min[node] != excess_t(size())); 309 | return excess >= m_superblock_excess_min[node]; 310 | } 311 | 312 | template 313 | inline uint64_t bp_vector::search_min_tree(uint64_t block, excess_t excess) const 314 | { 315 | size_t found_block = -1U; 316 | if (search_block_in_superblock(block, excess, found_block)) { 317 | return found_block; 318 | } 319 | 320 | size_t cur_superblock = block / superblock_size; 321 | size_t cur_node = m_internal_nodes + cur_superblock; 322 | while (true) { 323 | assert(cur_node); 324 | bool going_back = (cur_node & 1) == direction; 325 | if (!going_back) { 326 | size_t next_node = direction ? (cur_node + 1) : (cur_node - 1); 327 | if (in_node_range(next_node, excess)) { 328 | cur_node = next_node; 329 | break; 330 | } 331 | } 332 | cur_node /= 2; 333 | } 334 | 335 | assert(cur_node); 336 | 337 | while (cur_node < m_internal_nodes) { 338 | uint64_t next_node = cur_node * 2 + (1 - direction); 339 | if (in_node_range(next_node, excess)) { 340 | cur_node = next_node; 341 | continue; 342 | } 343 | 344 | next_node = direction ? (next_node + 1) : (next_node - 1); 345 | // if it is not one child, it must be the other 346 | assert(in_node_range(next_node, excess)); 347 | cur_node = next_node; 348 | } 349 | 350 | size_t next_superblock = cur_node - m_internal_nodes; 351 | bool ret = search_block_in_superblock(next_superblock * superblock_size + (1 - direction) * (superblock_size - 1), 352 | excess, found_block); 353 | assert(ret); (void)ret; 354 | 355 | return found_block; 356 | } 357 | 358 | 359 | bp_vector::excess_t 360 | bp_vector::excess(uint64_t pos) const 361 | { 362 | return static_cast(2 * rank(pos) - pos); 363 | } 364 | 365 | void 366 | bp_vector::excess_rmq_in_block(uint64_t start, uint64_t end, 367 | bp_vector::excess_t& exc, 368 | bp_vector::excess_t& min_exc, 369 | uint64_t& min_exc_idx) const 370 | { 371 | assert(start <= end); 372 | if (start == end) return; 373 | 374 | assert((start / bp_block_size) == ((end - 1) / bp_block_size)); 375 | for (size_t w = start; w < end; ++w) { 376 | excess_rmq_in_word(m_bits[w], exc, w * 64, 377 | min_exc, min_exc_idx); 378 | } 379 | } 380 | 381 | void 382 | bp_vector::excess_rmq_in_superblock(uint64_t block_start, uint64_t block_end, 383 | bp_vector::excess_t& block_min_exc, 384 | uint64_t& block_min_idx) const 385 | { 386 | assert(block_start <= block_end); 387 | if (block_start == block_end) return; 388 | 389 | uint64_t superblock = block_start / superblock_size; 390 | 391 | assert(superblock == ((block_end - 1) / superblock_size)); 392 | excess_t superblock_excess = get_block_excess(superblock * superblock_size); 393 | 394 | for (uint64_t block = block_start; block < block_end; ++block) { 395 | if (superblock_excess + m_block_excess_min[block] < block_min_exc) { 396 | block_min_exc = superblock_excess + m_block_excess_min[block]; 397 | block_min_idx = block; 398 | } 399 | } 400 | } 401 | 402 | 403 | void 404 | bp_vector::find_min_superblock(uint64_t superblock_start, uint64_t superblock_end, 405 | bp_vector::excess_t& superblock_min_exc, 406 | uint64_t& superblock_min_idx) const { 407 | 408 | if (superblock_start == superblock_end) return; 409 | 410 | uint64_t cur_node = m_internal_nodes + superblock_start; 411 | uint64_t rightmost_span = superblock_start; 412 | 413 | excess_t node_min_exc = m_superblock_excess_min[cur_node]; 414 | uint64_t node_min_idx = cur_node; 415 | 416 | // code below assumes that there is at least one right-turn in 417 | // the node-root-node path, so we must handle this case 418 | // separately 419 | if (superblock_end - superblock_start == 1) { 420 | superblock_min_exc = node_min_exc; 421 | superblock_min_idx = superblock_start; 422 | return; 423 | } 424 | 425 | // go up the tree until we find the lowest node that spans the 426 | // whole superblock range 427 | size_t h = 0; 428 | while (true) { 429 | assert(cur_node); 430 | 431 | if ((cur_node & 1) == 0) { // is a left child 432 | // add right subtree to candidate superblocks 433 | uint64_t right_sibling = cur_node + 1; 434 | rightmost_span += uint64_t(1) << h; 435 | 436 | if (rightmost_span < superblock_end && 437 | m_superblock_excess_min[right_sibling] < node_min_exc) { 438 | node_min_exc = m_superblock_excess_min[right_sibling]; 439 | node_min_idx = right_sibling; 440 | } 441 | 442 | if (rightmost_span >= superblock_end - 1) { 443 | cur_node += 1; 444 | break; 445 | } 446 | } 447 | 448 | cur_node /= 2; // parent 449 | h += 1; 450 | } 451 | 452 | assert(cur_node); 453 | 454 | // go down until we reach superblock_end 455 | while (rightmost_span > superblock_end - 1) { 456 | assert(cur_node < m_superblock_excess_min.size()); 457 | assert(h > 0); 458 | 459 | h -= 1; 460 | uint64_t left_child = cur_node * 2; 461 | uint64_t right_child_span = uint64_t(1) << h; 462 | if ((rightmost_span - right_child_span) >= (superblock_end - 1)) { 463 | // go to left child 464 | rightmost_span -= right_child_span; 465 | cur_node = left_child; 466 | } else { 467 | // go to right child and add left subtree to candidate 468 | // subblocks 469 | if (m_superblock_excess_min[left_child] < node_min_exc) { 470 | node_min_exc = m_superblock_excess_min[left_child]; 471 | node_min_idx = left_child; 472 | } 473 | cur_node = left_child + 1; 474 | } 475 | } 476 | 477 | // check last left-turn 478 | if (rightmost_span < superblock_end && 479 | m_superblock_excess_min[cur_node] < node_min_exc) { 480 | node_min_exc = m_superblock_excess_min[cur_node]; 481 | node_min_idx = cur_node; 482 | } 483 | 484 | assert(rightmost_span == superblock_end - 1); 485 | 486 | // now reach the minimum leaf in the found subtree (cur_node), 487 | // which is entirely contained in the range 488 | if (node_min_exc < superblock_min_exc) { 489 | cur_node = node_min_idx; 490 | while (cur_node < m_internal_nodes) { 491 | cur_node *= 2; 492 | // remember that past-the-end nodes are filled with size() 493 | if (m_superblock_excess_min[cur_node + 1] < 494 | m_superblock_excess_min[cur_node]) { 495 | cur_node += 1; 496 | } 497 | } 498 | 499 | assert(m_superblock_excess_min[cur_node] == node_min_exc); 500 | superblock_min_exc = node_min_exc; 501 | superblock_min_idx = cur_node - m_internal_nodes; 502 | 503 | assert(superblock_min_idx >= superblock_start); 504 | assert(superblock_min_idx < superblock_end); 505 | } 506 | } 507 | 508 | uint64_t bp_vector::excess_rmq(uint64_t a, uint64_t b, excess_t& min_exc) const 509 | { 510 | assert(a <= b); 511 | 512 | excess_t cur_exc = excess(a); 513 | min_exc = cur_exc; 514 | uint64_t min_exc_idx = a; 515 | 516 | if (a == b) { 517 | return min_exc_idx; 518 | } 519 | 520 | uint64_t range_len = b - a; 521 | 522 | uint64_t word_a_idx = a / 64; 523 | uint64_t word_b_idx = (b - 1) / 64; 524 | 525 | // search in word_a 526 | uint64_t shift_a = a % 64; 527 | uint64_t shifted_word_a = m_bits[word_a_idx] >> shift_a; 528 | uint64_t subword_len_a = std::min(64 - shift_a, range_len); 529 | 530 | uint64_t padded_word_a = 531 | (subword_len_a == 64) 532 | ? shifted_word_a 533 | : (shifted_word_a | (~0ULL << subword_len_a)); 534 | 535 | excess_rmq_in_word(padded_word_a, cur_exc, a, 536 | min_exc, min_exc_idx); 537 | 538 | if (word_a_idx == word_b_idx) { 539 | // single word 540 | return min_exc_idx; 541 | } 542 | 543 | uint64_t block_a = word_a_idx / bp_block_size; 544 | uint64_t block_b = word_b_idx / bp_block_size; 545 | 546 | cur_exc -= 64 - excess_t(subword_len_a); // remove padding 547 | 548 | if (block_a == block_b) { 549 | // same block 550 | excess_rmq_in_block(word_a_idx + 1, word_b_idx, 551 | cur_exc, min_exc, min_exc_idx); 552 | 553 | } else { 554 | // search in partial block of word_a 555 | excess_rmq_in_block(word_a_idx + 1, (block_a + 1) * bp_block_size, 556 | cur_exc, min_exc, min_exc_idx); 557 | 558 | // search in blocks 559 | excess_t block_min_exc = min_exc; 560 | uint64_t block_min_idx = -1U; 561 | 562 | uint64_t superblock_a = (block_a + 1) / superblock_size; 563 | uint64_t superblock_b = block_b / superblock_size; 564 | 565 | if (superblock_a == superblock_b) { 566 | // same superblock 567 | excess_rmq_in_superblock(block_a + 1, block_b, 568 | block_min_exc, block_min_idx); 569 | } else { 570 | // partial superblock of a 571 | excess_rmq_in_superblock(block_a + 1, 572 | (superblock_a + 1) * superblock_size, 573 | block_min_exc, 574 | block_min_idx); 575 | 576 | // search min superblock in the min tree 577 | excess_t superblock_min_exc = min_exc; 578 | uint64_t superblock_min_idx = -1U; 579 | find_min_superblock(superblock_a + 1, superblock_b, 580 | superblock_min_exc, superblock_min_idx); 581 | 582 | if (superblock_min_exc < min_exc) { 583 | excess_rmq_in_superblock(superblock_min_idx * superblock_size, 584 | (superblock_min_idx + 1) * superblock_size, 585 | block_min_exc, 586 | block_min_idx); 587 | } 588 | 589 | // partial superblock of b 590 | excess_rmq_in_superblock(superblock_b * superblock_size, 591 | block_b, 592 | block_min_exc, 593 | block_min_idx); 594 | } 595 | 596 | if (block_min_exc < min_exc) { 597 | cur_exc = get_block_excess(block_min_idx); 598 | excess_rmq_in_block(block_min_idx * bp_block_size, 599 | (block_min_idx + 1) * bp_block_size, 600 | cur_exc, min_exc, min_exc_idx); 601 | assert(min_exc == block_min_exc); 602 | } 603 | 604 | // search in partial block of word_b 605 | cur_exc = get_block_excess(block_b); 606 | excess_rmq_in_block(block_b * bp_block_size, word_b_idx, 607 | cur_exc, min_exc, min_exc_idx); 608 | } 609 | 610 | // search in word_b 611 | uint64_t word_b = m_bits[word_b_idx]; 612 | uint64_t offset_b = b % 64; 613 | uint64_t padded_word_b = 614 | (offset_b == 0) 615 | ? word_b 616 | : (word_b | (~0ULL << offset_b)); 617 | 618 | excess_rmq_in_word(padded_word_b, cur_exc, word_b_idx * 64, 619 | min_exc, min_exc_idx); 620 | 621 | assert(min_exc_idx >= a); 622 | assert(min_exc == excess(min_exc_idx)); 623 | 624 | return min_exc_idx; 625 | } 626 | 627 | 628 | void bp_vector::build_min_tree() 629 | { 630 | if (!size()) return; 631 | 632 | std::vector block_excess_min; 633 | excess_t cur_block_min = 0, cur_superblock_excess = 0; 634 | for (uint64_t sub_block = 0; sub_block < m_bits.size(); ++sub_block) { 635 | if (sub_block % bp_block_size == 0) { 636 | if (sub_block % (bp_block_size * superblock_size) == 0) { 637 | cur_superblock_excess = 0; 638 | } 639 | if (sub_block) { 640 | assert(cur_block_min >= std::numeric_limits::min()); 641 | assert(cur_block_min <= std::numeric_limits::max()); 642 | block_excess_min.push_back((block_min_excess_t)cur_block_min); 643 | cur_block_min = cur_superblock_excess; 644 | } 645 | } 646 | uint64_t word = m_bits[sub_block]; 647 | uint64_t mask = 1ULL; 648 | // for last block stop at bit boundary 649 | uint64_t n_bits = 650 | (sub_block == m_bits.size() - 1 && size() % 64) 651 | ? size() % 64 652 | : 64; 653 | // XXX(ot) use tables.m_fwd_{min,max} 654 | for (uint64_t i = 0; i < n_bits; ++i) { 655 | cur_superblock_excess += (word & mask) ? 1 : -1; 656 | cur_block_min = std::min(cur_block_min, cur_superblock_excess); 657 | mask <<= 1; 658 | } 659 | } 660 | // Flush last block mins 661 | assert(cur_block_min >= std::numeric_limits::min()); 662 | assert(cur_block_min <= std::numeric_limits::max()); 663 | block_excess_min.push_back((block_min_excess_t)cur_block_min); 664 | 665 | size_t n_blocks = util::ceil_div(data().size(), bp_block_size); 666 | assert(n_blocks == block_excess_min.size()); 667 | 668 | size_t n_superblocks = (n_blocks + superblock_size - 1) / superblock_size; 669 | 670 | size_t n_complete_leaves = 1; 671 | while (n_complete_leaves < n_superblocks) n_complete_leaves <<= 1; // XXX(ot): I'm sure this can be done with broadword::msb... 672 | // n_complete_leaves is the smallest power of 2 >= n_superblocks 673 | m_internal_nodes = n_complete_leaves; 674 | size_t treesize = m_internal_nodes + n_superblocks; 675 | 676 | std::vector superblock_excess_min(treesize); 677 | 678 | // Fill in the leaves of the tree 679 | for (size_t superblock = 0; superblock < n_superblocks; ++superblock) { 680 | excess_t cur_super_min = static_cast(size()); 681 | excess_t superblock_excess = get_block_excess(superblock * superblock_size); 682 | 683 | for (size_t block = superblock * superblock_size; 684 | block < std::min((superblock + 1) * superblock_size, n_blocks); 685 | ++block) { 686 | cur_super_min = std::min(cur_super_min, superblock_excess + block_excess_min[block]); 687 | } 688 | assert(cur_super_min >= 0 && cur_super_min < excess_t(size())); 689 | 690 | superblock_excess_min[m_internal_nodes + superblock] = cur_super_min; 691 | } 692 | 693 | // fill in the internal nodes with past-the-boundary values 694 | // (they will also serve as sentinels in debug) 695 | for (size_t node = 0; node < m_internal_nodes; ++node) { 696 | superblock_excess_min[node] = static_cast(size()); 697 | } 698 | 699 | // Fill bottom-up the other layers: each node updates the parent 700 | for (size_t node = treesize - 1; node > 1; --node) { 701 | size_t parent = node / 2; 702 | superblock_excess_min[parent] = std::min(superblock_excess_min[parent], // same node 703 | superblock_excess_min[node]); 704 | } 705 | 706 | m_block_excess_min.steal(block_excess_min); 707 | m_superblock_excess_min.steal(superblock_excess_min); 708 | } 709 | } 710 | -------------------------------------------------------------------------------- /bp_vector.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | #include 8 | 9 | #include "rs_bit_vector.hpp" 10 | 11 | namespace succinct { 12 | 13 | class bp_vector : public rs_bit_vector { 14 | public: 15 | bp_vector() 16 | : rs_bit_vector() 17 | {} 18 | 19 | template 20 | bp_vector(Range const& from, 21 | bool with_select_hints = false, 22 | bool with_select0_hints = false) 23 | : rs_bit_vector(from, with_select_hints, with_select0_hints) 24 | { 25 | build_min_tree(); 26 | } 27 | 28 | template 29 | void map(Visitor& visit) { 30 | rs_bit_vector::map(visit); 31 | visit 32 | (m_internal_nodes, "m_internal_nodes") 33 | (m_block_excess_min, "m_block_excess_min") 34 | (m_superblock_excess_min, "m_superblock_excess_min") 35 | ; 36 | } 37 | 38 | void swap(bp_vector& other) { 39 | rs_bit_vector::swap(other); 40 | std::swap(m_internal_nodes, other.m_internal_nodes); 41 | m_block_excess_min.swap(other.m_block_excess_min); 42 | m_superblock_excess_min.swap(other.m_superblock_excess_min); 43 | } 44 | 45 | uint64_t find_open(uint64_t pos) const; 46 | uint64_t find_close(uint64_t pos) const; 47 | uint64_t enclose(uint64_t pos) const { 48 | assert((*this)[pos]); 49 | return find_open(pos); 50 | } 51 | 52 | typedef int32_t excess_t; // Allow at most 2^31 depth of the tree 53 | 54 | excess_t excess(uint64_t pos) const; 55 | uint64_t excess_rmq(uint64_t a, uint64_t b, excess_t& min_exc) const; 56 | inline uint64_t excess_rmq(uint64_t a, uint64_t b) const { 57 | excess_t foo; 58 | return excess_rmq(a, b, foo); 59 | } 60 | 61 | 62 | protected: 63 | 64 | static const size_t bp_block_size = 4; // to increase confusion, bp block_size is not necessarily rs_bit_vector block_size 65 | static const size_t superblock_size = 32; // number of blocks in superblock 66 | 67 | typedef int16_t block_min_excess_t; // superblock must be at most 2^15 - 1 bits 68 | 69 | bool find_close_in_block(uint64_t pos, excess_t excess, 70 | uint64_t max_sub_blocks, uint64_t& ret) const; 71 | bool find_open_in_block(uint64_t pos, excess_t excess, 72 | uint64_t max_sub_blocks, uint64_t& ret) const; 73 | 74 | void excess_rmq_in_block(uint64_t start, uint64_t end, 75 | bp_vector::excess_t& exc, 76 | bp_vector::excess_t& min_exc, 77 | uint64_t& min_exc_idx) const; 78 | void excess_rmq_in_superblock(uint64_t block_start, uint64_t block_end, 79 | bp_vector::excess_t& block_min_exc, 80 | uint64_t& block_min_idx) const; 81 | void find_min_superblock(uint64_t superblock_start, uint64_t superblock_end, 82 | bp_vector::excess_t& superblock_min_exc, 83 | uint64_t& superblock_min_idx) const; 84 | 85 | 86 | inline excess_t get_block_excess(uint64_t block) const; 87 | inline bool in_node_range(uint64_t node, excess_t excess) const; 88 | 89 | template 90 | inline bool search_block_in_superblock(uint64_t block, excess_t excess, size_t& found_block) const; 91 | 92 | template 93 | inline uint64_t search_min_tree(uint64_t block, excess_t excess) const; 94 | 95 | void build_min_tree(); 96 | 97 | uint64_t m_internal_nodes; 98 | mapper::mappable_vector m_block_excess_min; 99 | mapper::mappable_vector m_superblock_excess_min; 100 | }; 101 | } 102 | -------------------------------------------------------------------------------- /broadword.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include "intrinsics.hpp" 5 | #include "tables.hpp" 6 | 7 | namespace succinct { namespace broadword { 8 | 9 | static const uint64_t ones_step_4 = 0x1111111111111111ULL; 10 | static const uint64_t ones_step_8 = 0x0101010101010101ULL; 11 | static const uint64_t ones_step_9 = 1ULL << 0 | 1ULL << 9 | 1ULL << 18 | 1ULL << 27 | 1ULL << 36 | 1ULL << 45 | 1ULL << 54; 12 | static const uint64_t msbs_step_8 = 0x80ULL * ones_step_8; 13 | static const uint64_t msbs_step_9 = 0x100ULL * ones_step_9; 14 | static const uint64_t incr_step_8 = 0x80ULL << 56 | 0x40ULL << 48 | 0x20ULL << 40 | 0x10ULL << 32 | 0x8ULL << 24 | 0x4ULL << 16 | 0x2ULL << 8 | 0x1; 15 | static const uint64_t inv_count_step_9 = 1ULL << 54 | 2ULL << 45 | 3ULL << 36 | 4ULL << 27 | 5ULL << 18 | 6ULL << 9 | 7ULL; 16 | 17 | static const uint64_t magic_mask_1 = 0x5555555555555555ULL; 18 | static const uint64_t magic_mask_2 = 0x3333333333333333ULL; 19 | static const uint64_t magic_mask_3 = 0x0F0F0F0F0F0F0F0FULL; 20 | static const uint64_t magic_mask_4 = 0x00FF00FF00FF00FFULL; 21 | static const uint64_t magic_mask_5 = 0x0000FFFF0000FFFFULL; 22 | static const uint64_t magic_mask_6 = 0x00000000FFFFFFFFULL; 23 | 24 | inline uint64_t leq_step_8(uint64_t x, uint64_t y) 25 | { 26 | return ((((y | msbs_step_8) - (x & ~msbs_step_8)) ^ (x ^ y)) & msbs_step_8) >> 7; 27 | } 28 | 29 | inline uint64_t uleq_step_8(uint64_t x, uint64_t y) 30 | { 31 | return (((((y | msbs_step_8) - (x & ~msbs_step_8)) ^ (x ^ y)) ^ (x & ~y)) & msbs_step_8) >> 7; 32 | } 33 | 34 | inline uint64_t zcompare_step_8(uint64_t x) 35 | { 36 | return ((x | ((x | msbs_step_8) - ones_step_8)) & msbs_step_8) >> 7; 37 | } 38 | 39 | inline uint64_t uleq_step_9(uint64_t x, uint64_t y) 40 | { 41 | return (((((y | msbs_step_9) - (x & ~msbs_step_9)) | (x ^ y)) ^ (x & ~y)) & msbs_step_9 ) >> 8; 42 | } 43 | 44 | inline uint64_t byte_counts(uint64_t x) 45 | { 46 | x = x - ((x & 0xa * ones_step_4) >> 1); 47 | x = (x & 3 * ones_step_4) + ((x >> 2) & 3 * ones_step_4); 48 | x = (x + (x >> 4)) & 0x0f * ones_step_8; 49 | return x; 50 | } 51 | 52 | inline uint64_t bytes_sum(uint64_t x) 53 | { 54 | return x * ones_step_8 >> 56; 55 | } 56 | 57 | inline uint64_t popcount(uint64_t x) 58 | { 59 | #if SUCCINCT_USE_POPCNT 60 | return intrinsics::popcount(x); 61 | #else 62 | return bytes_sum(byte_counts(x)); 63 | #endif 64 | } 65 | 66 | inline uint64_t reverse_bytes(uint64_t x) 67 | { 68 | #if SUCCINCT_USE_INTRINSICS 69 | return intrinsics::byteswap64(x); 70 | #else 71 | x = ((x >> 8) & magic_mask_4) | ((x & magic_mask_4) << 8); 72 | x = ((x >> 16) & magic_mask_5) | ((x & magic_mask_5) << 16); 73 | x = ((x >> 32) ) | ((x ) << 32); 74 | return x; 75 | #endif 76 | } 77 | 78 | inline uint64_t reverse_bits(uint64_t x) 79 | { 80 | x = ((x >> 1) & magic_mask_1) | ((x & magic_mask_1) << 1); 81 | x = ((x >> 2) & magic_mask_2) | ((x & magic_mask_2) << 2); 82 | x = ((x >> 4) & magic_mask_3) | ((x & magic_mask_3) << 4); 83 | return reverse_bytes(x); 84 | } 85 | 86 | inline uint64_t select_in_word(const uint64_t x, const uint64_t k) 87 | { 88 | assert(k < popcount(x)); 89 | 90 | uint64_t byte_sums = byte_counts(x) * ones_step_8; 91 | 92 | const uint64_t k_step_8 = k * ones_step_8; 93 | const uint64_t geq_k_step_8 = (((k_step_8 | msbs_step_8) - byte_sums) & msbs_step_8); 94 | #if SUCCINCT_USE_POPCNT 95 | const uint64_t place = intrinsics::popcount(geq_k_step_8) * 8; 96 | #else 97 | const uint64_t place = ((geq_k_step_8 >> 7) * ones_step_8 >> 53) & ~uint64_t(0x7); 98 | #endif 99 | const uint64_t byte_rank = k - (((byte_sums << 8 ) >> place) & uint64_t(0xFF)); 100 | return place + tables::select_in_byte[((x >> place) & 0xFF ) | (byte_rank << 8)]; 101 | } 102 | 103 | inline uint64_t same_msb(uint64_t x, uint64_t y) 104 | { 105 | return (x ^ y) <= (x & y); 106 | } 107 | 108 | namespace detail { 109 | // Adapted from LSB of Chess Programming Wiki 110 | static const uint8_t debruijn64_mapping[64] = { 111 | 63, 0, 58, 1, 59, 47, 53, 2, 112 | 60, 39, 48, 27, 54, 33, 42, 3, 113 | 61, 51, 37, 40, 49, 18, 28, 20, 114 | 55, 30, 34, 11, 43, 14, 22, 4, 115 | 62, 57, 46, 52, 38, 26, 32, 41, 116 | 50, 36, 17, 19, 29, 10, 13, 21, 117 | 56, 45, 25, 31, 35, 16, 9, 12, 118 | 44, 24, 15, 8, 23, 7, 6, 5 119 | }; 120 | static const uint64_t debruijn64 = 0x07EDD5E59A4E28C2ULL; 121 | } 122 | 123 | // return the position of the single bit set in the word x 124 | inline uint8_t bit_position(uint64_t x) 125 | { 126 | assert(popcount(x) == 1); 127 | return detail::debruijn64_mapping 128 | [(x * detail::debruijn64) >> 58]; 129 | } 130 | 131 | inline uint8_t msb(uint64_t x, unsigned long& ret) 132 | { 133 | #if SUCCINCT_USE_INTRINSICS 134 | return intrinsics::bsr64(&ret, x); 135 | #else 136 | if (!x) { 137 | return false; 138 | } 139 | 140 | // right-saturate the word 141 | x |= x >> 1; 142 | x |= x >> 2; 143 | x |= x >> 4; 144 | x |= x >> 8; 145 | x |= x >> 16; 146 | x |= x >> 32; 147 | 148 | // isolate the MSB 149 | x ^= x >> 1; 150 | ret = bit_position(x); 151 | 152 | return true; 153 | #endif 154 | } 155 | 156 | inline uint8_t msb(uint64_t x) 157 | { 158 | assert(x); 159 | unsigned long ret = -1U; 160 | msb(x, ret); 161 | return (uint8_t)ret; 162 | } 163 | 164 | inline uint8_t lsb(uint64_t x, unsigned long& ret) 165 | { 166 | #if SUCCINCT_USE_INTRINSICS 167 | return intrinsics::bsf64(&ret, x); 168 | #else 169 | if (!x) { 170 | return false; 171 | } 172 | ret = bit_position(x & -x); 173 | return true; 174 | #endif 175 | } 176 | 177 | inline uint8_t lsb(uint64_t x) 178 | { 179 | assert(x); 180 | unsigned long ret = -1U; 181 | lsb(x, ret); 182 | return (uint8_t)ret; 183 | } 184 | 185 | }} 186 | -------------------------------------------------------------------------------- /cartesian_tree.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | #include 6 | 7 | #include "bp_vector.hpp" 8 | #include "util.hpp" 9 | 10 | namespace succinct { 11 | 12 | // This class implements a cartesian-tree-based RMQ data 13 | // structure, using the 2d-Min-Heap DFUDS representation described 14 | // in "Space-Efficient Preprocessing Schemes for Range Minimum 15 | // Queries on Static Arrays", Johannes Fischer and Volker Heun, 16 | // SIAM J. Comput., 40(2), 465–492. 17 | 18 | // We made a few variations: 19 | // 20 | // - The rmq() operation in the paper checks whether x is parent 21 | // of w - 1, which can be written as select0(x - 1) < 22 | // find_open(w - 1). We use instead the fact that the excess 23 | // between x and w (both excluded) is strictly greater than the 24 | // excess of w, so the formula above holds iff excess(select0(x 25 | // - 1) + 1) <= excess(w). This is faster because a select0 is 26 | // faster than find_open+rank0. 27 | // 28 | // - The construction is done in reverse order so that the input 29 | // array can be traversed left-to-right. This involves 30 | // re-mapping all the indices at query time. Since the array is 31 | // reversed, in ties the leftmost element wins 32 | // 33 | // - Our data structures have 0-based indices, so the operations 34 | // are slightly different from those in the paper 35 | 36 | class cartesian_tree : boost::noncopyable { 37 | public: 38 | 39 | template 40 | class builder { 41 | public: 42 | builder(uint64_t expected_size = 0) 43 | { 44 | if (expected_size) { 45 | m_bp.reserve(2 * expected_size + 2); 46 | } 47 | } 48 | 49 | template 50 | void push_back(T const& val, Comparator const& comp) 51 | { 52 | m_bp.push_back(0); 53 | 54 | while (!m_stack.empty() 55 | && comp(val, m_stack.back())) { // val < m_stack.back() 56 | m_stack.pop_back(); 57 | m_bp.push_back(1); 58 | } 59 | 60 | m_stack.push_back(val); 61 | } 62 | 63 | bit_vector_builder& finalize() 64 | { 65 | // super-root 66 | m_bp.push_back(0); 67 | while (!m_stack.empty()) { 68 | m_stack.pop_back(); 69 | m_bp.push_back(1); 70 | } 71 | m_bp.push_back(1); 72 | 73 | m_bp.reverse(); 74 | return m_bp; 75 | } 76 | 77 | friend class cartesian_tree; 78 | private: 79 | std::vector m_stack; 80 | bit_vector_builder m_bp; 81 | }; 82 | 83 | cartesian_tree() {} 84 | 85 | template 86 | cartesian_tree(builder* b) 87 | { 88 | bp_vector(&b->finalize(), false, true).swap(m_bp); 89 | } 90 | 91 | template 92 | cartesian_tree(Range const& v) 93 | { 94 | build_from_range(v, std::less::type>()); 95 | } 96 | 97 | template 98 | cartesian_tree(Range const& v, Comparator const& comp) 99 | { 100 | build_from_range(v, comp); 101 | } 102 | 103 | // NOTE: this is RMQ in the interval [a, b], b inclusive 104 | // XXX(ot): maybe change this to [a, b), for consistency with 105 | // the rest of the library? 106 | uint64_t rmq(uint64_t a, uint64_t b) const 107 | { 108 | typedef bp_vector::excess_t excess_t; 109 | 110 | assert(a <= b); 111 | if (a == b) return a; 112 | 113 | uint64_t n = size(); 114 | 115 | uint64_t t = m_bp.select0(n - b - 1); 116 | excess_t exc_t = excess_t(t - 2 * (n - b - 1)); 117 | assert(exc_t - 1 == m_bp.excess(t + 1)); 118 | 119 | uint64_t x = m_bp.select0(n - b); 120 | uint64_t y = m_bp.select0(n - a); 121 | 122 | excess_t exc_w; 123 | uint64_t w = m_bp.excess_rmq(x, y, exc_w); 124 | uint64_t rank0_w = (w - uint64_t(exc_w)) / 2; 125 | assert(m_bp[w - 1] == 0); 126 | 127 | uint64_t ret; 128 | if (exc_w >= exc_t - 1) { 129 | ret = b; 130 | } else { 131 | ret = n - rank0_w; 132 | } 133 | 134 | assert(ret >= a); 135 | assert(ret <= b); 136 | return ret; 137 | } 138 | 139 | bp_vector const& get_bp() const 140 | { 141 | return m_bp; 142 | } 143 | 144 | uint64_t size() const 145 | { 146 | return m_bp.size() / 2 - 1; 147 | } 148 | 149 | template 150 | void map(Visitor& visit) 151 | { 152 | visit 153 | (m_bp, "m_bp"); 154 | } 155 | 156 | void swap(cartesian_tree& other) 157 | { 158 | other.m_bp.swap(m_bp); 159 | } 160 | 161 | protected: 162 | 163 | template 164 | void build_from_range(Range const& v, Comparator const& comp) 165 | { 166 | typedef typename 167 | boost::range_value::type value_type; 168 | typedef typename 169 | boost::range_iterator::type iter_type; 170 | 171 | builder b; 172 | for (iter_type it = boost::begin(v); it != boost::end(v); ++it) { 173 | b.push_back(*it, comp); 174 | } 175 | cartesian_tree(&b).swap(*this); 176 | } 177 | 178 | 179 | bp_vector m_bp; 180 | }; 181 | 182 | } 183 | -------------------------------------------------------------------------------- /darray.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "bit_vector.hpp" 4 | 5 | namespace succinct { 6 | 7 | namespace detail { 8 | 9 | template 10 | class darray { 11 | public: 12 | 13 | darray() 14 | : m_positions() 15 | {} 16 | 17 | darray(bit_vector const& bv) 18 | : m_positions() 19 | { 20 | mapper::mappable_vector const& data = bv.data(); 21 | 22 | std::vector cur_block_positions; 23 | std::vector block_inventory; 24 | std::vector subblock_inventory; 25 | std::vector overflow_positions; 26 | 27 | for (size_t word_idx = 0; word_idx < data.size(); ++word_idx) { 28 | size_t cur_pos = word_idx * 64; 29 | uint64_t cur_word = WordGetter()(data, word_idx); 30 | unsigned long l; 31 | while (broadword::lsb(cur_word, l)) { 32 | cur_pos += l; 33 | cur_word >>= l; 34 | if (cur_pos >= bv.size()) break; 35 | 36 | cur_block_positions.push_back(cur_pos); 37 | 38 | if (cur_block_positions.size() == block_size) { 39 | flush_cur_block(cur_block_positions, block_inventory, subblock_inventory, overflow_positions); 40 | } 41 | 42 | // can't do >>= l + 1, can be 64 43 | cur_word >>= 1; 44 | cur_pos += 1; 45 | m_positions += 1; 46 | } 47 | } 48 | if (cur_block_positions.size()) { 49 | flush_cur_block(cur_block_positions, block_inventory, subblock_inventory, overflow_positions); 50 | } 51 | 52 | m_block_inventory.steal(block_inventory); 53 | m_subblock_inventory.steal(subblock_inventory); 54 | m_overflow_positions.steal(overflow_positions); 55 | } 56 | 57 | template 58 | void map(Visitor& visit) { 59 | visit 60 | (m_positions, "m_positions") 61 | (m_block_inventory, "m_block_inventory") 62 | (m_subblock_inventory, "m_subblock_inventory") 63 | (m_overflow_positions, "m_overflow_positions") 64 | ; 65 | } 66 | 67 | void swap(darray& other) { 68 | std::swap(other.m_positions, m_positions); 69 | m_block_inventory.swap(other.m_block_inventory); 70 | m_subblock_inventory.swap(other.m_subblock_inventory); 71 | m_overflow_positions.swap(other.m_overflow_positions); 72 | } 73 | 74 | inline uint64_t select(bit_vector const& bv, uint64_t idx) const 75 | { 76 | assert(idx < num_positions()); 77 | uint64_t block = idx / block_size; 78 | int64_t block_pos = m_block_inventory[block]; 79 | if (block_pos < 0) { 80 | uint64_t overflow_pos = uint64_t(-block_pos - 1); 81 | return m_overflow_positions[overflow_pos + (idx % block_size)]; 82 | } 83 | 84 | size_t subblock = idx / subblock_size; 85 | size_t start_pos = uint64_t(block_pos) + m_subblock_inventory[subblock]; 86 | size_t reminder = idx % subblock_size; 87 | mapper::mappable_vector const& data = bv.data(); 88 | 89 | if (!reminder) { 90 | return start_pos; 91 | } else { 92 | size_t word_idx = start_pos / 64; 93 | size_t word_shift = start_pos % 64; 94 | uint64_t word = WordGetter()(data, word_idx) & (uint64_t(-1) << word_shift); 95 | 96 | while (true) { 97 | size_t popcnt = broadword::popcount(word); 98 | if (reminder < popcnt) break; 99 | reminder -= popcnt; 100 | word = WordGetter()(data, ++word_idx); 101 | } 102 | 103 | return 64 * word_idx + broadword::select_in_word(word, reminder); 104 | } 105 | } 106 | 107 | inline uint64_t num_positions() const { 108 | return m_positions; 109 | } 110 | 111 | protected: 112 | 113 | static void flush_cur_block(std::vector& cur_block_positions, std::vector& block_inventory, 114 | std::vector& subblock_inventory, std::vector& overflow_positions) 115 | { 116 | if (cur_block_positions.back() - cur_block_positions.front() < max_in_block_distance) { 117 | block_inventory.push_back(int64_t(cur_block_positions.front())); 118 | for (size_t i = 0; i < cur_block_positions.size(); i += subblock_size) { 119 | subblock_inventory.push_back(uint16_t(cur_block_positions[i] - cur_block_positions.front())); 120 | } 121 | } else { 122 | block_inventory.push_back(-int64_t(overflow_positions.size()) - 1); 123 | for (size_t i = 0; i < cur_block_positions.size(); ++i) { 124 | overflow_positions.push_back(cur_block_positions[i]); 125 | } 126 | for (size_t i = 0; i < cur_block_positions.size(); i += subblock_size) { 127 | subblock_inventory.push_back(uint16_t(-1)); 128 | } 129 | } 130 | cur_block_positions.clear(); 131 | } 132 | 133 | 134 | 135 | static const size_t block_size = 1024; 136 | static const size_t subblock_size = 32; 137 | static const size_t max_in_block_distance = 1 << 16; 138 | 139 | size_t m_positions; 140 | mapper::mappable_vector m_block_inventory; 141 | mapper::mappable_vector m_subblock_inventory; 142 | mapper::mappable_vector m_overflow_positions; 143 | }; 144 | 145 | struct identity_getter 146 | { 147 | uint64_t operator()(mapper::mappable_vector const& data, size_t idx) const { 148 | return data[idx]; 149 | } 150 | }; 151 | 152 | struct negating_getter 153 | { 154 | uint64_t operator()(mapper::mappable_vector const& data, size_t idx) const { 155 | return ~data[idx]; 156 | } 157 | }; 158 | } 159 | 160 | typedef detail::darray darray1; 161 | typedef detail::darray darray0; 162 | } 163 | -------------------------------------------------------------------------------- /darray64.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "bit_vector.hpp" 4 | #include "broadword.hpp" 5 | 6 | namespace succinct { 7 | 8 | struct darray64 9 | { 10 | struct builder { 11 | builder() 12 | : n_ones(0) 13 | {} 14 | 15 | void append1(size_t skip0 = 0) 16 | { 17 | bits.append_bits(0, skip0); 18 | bits.push_back(1); 19 | 20 | if (n_ones % block_size == 0) { 21 | block_inventory.push_back(bits.size() - 1); 22 | } 23 | if (n_ones % subblock_size == 0) { 24 | subblock_inventory.push_back(uint16_t(bits.size() - 1 - block_inventory[n_ones / block_size])); 25 | } 26 | 27 | n_ones += 1; 28 | } 29 | 30 | size_t n_ones; 31 | bit_vector_builder bits; 32 | std::vector block_inventory; 33 | std::vector subblock_inventory; 34 | }; 35 | 36 | darray64() 37 | : m_num_ones(0) 38 | {} 39 | 40 | darray64(builder* b) 41 | { 42 | m_num_ones = b->n_ones; 43 | bit_vector(&b->bits).swap(m_bits); 44 | m_block_inventory.steal(b->block_inventory); 45 | m_subblock_inventory.steal(b->subblock_inventory); 46 | } 47 | 48 | void swap(darray64& other) 49 | { 50 | std::swap(m_num_ones, other.m_num_ones); 51 | m_bits.swap(other.m_bits); 52 | m_block_inventory.swap(other.m_block_inventory); 53 | m_subblock_inventory.swap(other.m_subblock_inventory); 54 | } 55 | 56 | template 57 | void map(Visitor& visit) { 58 | visit 59 | (m_num_ones, "m_num_ones") 60 | (m_bits, "m_bits") 61 | (m_block_inventory, "m_block_inventory") 62 | (m_subblock_inventory, "m_subblock_inventory") 63 | ; 64 | } 65 | 66 | size_t num_ones() const 67 | { 68 | return m_num_ones; 69 | } 70 | 71 | bit_vector const& bits() const 72 | { 73 | return m_bits; 74 | } 75 | 76 | size_t select(size_t idx) const 77 | { 78 | assert(idx < num_ones()); 79 | size_t block = idx / block_size; 80 | size_t block_pos = m_block_inventory[block]; 81 | 82 | size_t subblock = idx / subblock_size; 83 | size_t start_pos = block_pos + m_subblock_inventory[subblock]; 84 | size_t reminder = idx % subblock_size; 85 | 86 | if (!reminder) { 87 | return start_pos; 88 | } else { 89 | size_t word_idx = start_pos / 64; 90 | size_t word_shift = start_pos % 64; 91 | uint64_t word = m_bits.data()[word_idx] & (uint64_t(-1) << word_shift); 92 | 93 | while (true) { 94 | size_t popcnt = broadword::popcount(word); 95 | if (reminder < popcnt) break; 96 | reminder -= popcnt; 97 | word = m_bits.data()[++word_idx]; 98 | } 99 | 100 | return 64 * word_idx + broadword::select_in_word(word, reminder); 101 | } 102 | } 103 | 104 | protected: 105 | 106 | static const size_t block_size = 1024; // 64 * block_size must fit in an uint16_t (64 is the max sparsity of bits) 107 | static const size_t subblock_size = 64; 108 | 109 | size_t m_num_ones; 110 | bit_vector m_bits; 111 | mapper::mappable_vector m_block_inventory; 112 | mapper::mappable_vector m_subblock_inventory; 113 | 114 | }; 115 | } 116 | -------------------------------------------------------------------------------- /elias_fano.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "bit_vector.hpp" 4 | #include "darray.hpp" 5 | 6 | namespace succinct { 7 | 8 | class elias_fano { 9 | public: 10 | elias_fano() 11 | : m_size(0) 12 | {} 13 | 14 | struct elias_fano_builder { 15 | elias_fano_builder(uint64_t n, uint64_t m) 16 | : m_n(n) 17 | , m_m(m) 18 | , m_pos(0) 19 | , m_last(0) 20 | , m_l(uint8_t((m && n / m) ? broadword::msb(n / m) : 0)) 21 | , m_high_bits((m + 1) + (n >> m_l) + 1) 22 | { 23 | assert(m_l < 64); // for the correctness of low_mask 24 | m_low_bits.reserve(m * m_l); 25 | } 26 | 27 | inline void push_back(uint64_t i) { 28 | assert(i >= m_last && i <= m_n); 29 | m_last = i; 30 | uint64_t low_mask = (1ULL << m_l) - 1; 31 | 32 | if (m_l) { 33 | m_low_bits.append_bits(i & low_mask, m_l); 34 | } 35 | m_high_bits.set((i >> m_l) + m_pos, 1); 36 | ++m_pos; 37 | assert(m_pos <= m_m); (void)m_m; 38 | } 39 | 40 | friend class elias_fano; 41 | private: 42 | uint64_t m_n; 43 | uint64_t m_m; 44 | uint64_t m_pos; 45 | uint64_t m_last; 46 | uint8_t m_l; 47 | bit_vector_builder m_high_bits; 48 | bit_vector_builder m_low_bits; 49 | }; 50 | 51 | 52 | elias_fano(bit_vector_builder* bvb, bool with_rank_index = true) 53 | { 54 | bit_vector_builder::bits_type& bits = bvb->move_bits(); 55 | uint64_t n = bvb->size(); 56 | 57 | uint64_t m = 0; 58 | for (size_t i = 0; i < bits.size(); ++i) { 59 | m += broadword::popcount(bits[i]); 60 | } 61 | 62 | bit_vector bv(bvb); 63 | elias_fano_builder builder(n, m); 64 | 65 | uint64_t i = 0; 66 | for (uint64_t pos = 0; pos < m; ++pos) { 67 | i = bv.successor1(i); 68 | builder.push_back(i); 69 | ++i; 70 | } 71 | 72 | build(builder, with_rank_index); 73 | } 74 | 75 | elias_fano(elias_fano_builder* builder, bool with_rank_index = true) 76 | { 77 | build(*builder, with_rank_index); 78 | } 79 | 80 | template 81 | void map(Visitor& visit) { 82 | visit 83 | (m_size, "m_size") 84 | (m_high_bits, "m_high_bits") 85 | (m_high_bits_d1, "m_high_bits_d1") 86 | (m_high_bits_d0, "m_high_bits_d0") 87 | (m_low_bits, "m_low_bits") 88 | (m_l, "m_l") 89 | ; 90 | } 91 | 92 | void swap(elias_fano& other) { 93 | std::swap(other.m_size, m_size); 94 | other.m_high_bits.swap(m_high_bits); 95 | other.m_high_bits_d1.swap(m_high_bits_d1); 96 | other.m_high_bits_d0.swap(m_high_bits_d0); 97 | other.m_low_bits.swap(m_low_bits); 98 | std::swap(other.m_l, m_l); 99 | } 100 | 101 | inline uint64_t size() const { 102 | return m_size; 103 | } 104 | 105 | inline uint64_t num_ones() const { 106 | return m_high_bits_d1.num_positions(); 107 | } 108 | 109 | inline bool operator[](uint64_t pos) const { 110 | assert(pos < size()); 111 | assert(m_high_bits_d0.num_positions()); // needs rank index 112 | uint64_t h_rank = pos >> m_l; 113 | uint64_t h_pos = m_high_bits_d0.select(m_high_bits, h_rank); 114 | uint64_t rank = h_pos - h_rank; 115 | uint64_t l_pos = pos & ((1ULL << m_l) - 1); 116 | 117 | while (h_pos > 0 118 | && m_high_bits[h_pos - 1]) { 119 | --rank; 120 | --h_pos; 121 | uint64_t cur_low_bits = m_low_bits.get_bits(rank * m_l, m_l); 122 | if (cur_low_bits == l_pos) { 123 | return true; 124 | } else if (cur_low_bits < l_pos) { 125 | return false; 126 | } 127 | } 128 | 129 | return false; 130 | } 131 | 132 | inline uint64_t select(uint64_t n) const { 133 | return 134 | ((m_high_bits_d1.select(m_high_bits, n) - n) << m_l) 135 | | m_low_bits.get_bits(n * m_l, m_l); 136 | } 137 | 138 | inline uint64_t rank(uint64_t pos) const { 139 | assert(pos <= m_size); 140 | assert(m_high_bits_d0.num_positions()); // needs rank index 141 | if (pos == size()) { 142 | return num_ones(); 143 | } 144 | 145 | uint64_t h_rank = pos >> m_l; 146 | uint64_t h_pos = m_high_bits_d0.select(m_high_bits, h_rank); 147 | uint64_t rank = h_pos - h_rank; 148 | uint64_t l_pos = pos & ((1ULL << m_l) - 1); 149 | 150 | while (h_pos > 0 151 | && m_high_bits[h_pos - 1] 152 | && m_low_bits.get_bits((rank - 1) * m_l, m_l) >= l_pos) { 153 | --rank; 154 | --h_pos; 155 | } 156 | 157 | return rank; 158 | } 159 | 160 | inline uint64_t predecessor1(uint64_t pos) const { 161 | return select(rank(pos + 1) - 1); 162 | } 163 | 164 | inline uint64_t successor1(uint64_t pos) const { 165 | return select(rank(pos)); 166 | } 167 | 168 | 169 | // Equivalent to select(n) - select(n - 1) (and select(0) for n = 0) 170 | // Involves a linear search for predecessor in high bits. 171 | // Efficient only if there are no large gaps in high bits 172 | // XXX(ot): could make this adaptive 173 | inline uint64_t delta(uint64_t n) const { 174 | uint64_t high_val = m_high_bits_d1.select(m_high_bits, n); 175 | uint64_t low_val = m_low_bits.get_bits(n * m_l, m_l); 176 | if (n) { 177 | return 178 | // need a + here instead of an | for carry 179 | ((high_val - m_high_bits.predecessor1(high_val - 1) - 1) << m_l) 180 | + low_val - m_low_bits.get_bits((n - 1) * m_l, m_l); 181 | } else { 182 | return 183 | ((high_val - n) << m_l) 184 | | low_val; 185 | } 186 | } 187 | 188 | 189 | // same as delta() 190 | inline std::pair select_range(uint64_t n) const 191 | { 192 | assert(n + 1 < num_ones()); 193 | uint64_t high_val_b = m_high_bits_d1.select(m_high_bits, n); 194 | uint64_t low_val_b = m_low_bits.get_bits(n * m_l, m_l); 195 | uint64_t high_val_e = m_high_bits.successor1(high_val_b + 1); 196 | uint64_t low_val_e = m_low_bits.get_bits((n + 1) * m_l, m_l); 197 | return std::make_pair(((high_val_b - n) << m_l) | low_val_b, 198 | ((high_val_e - n - 1) << m_l) | low_val_e); 199 | } 200 | 201 | struct select_enumerator { 202 | 203 | select_enumerator(elias_fano const& ef, uint64_t i) 204 | : m_ef(&ef) 205 | , m_i(i) 206 | , m_l(ef.m_l) 207 | { 208 | m_low_mask = (uint64_t(1) << m_l) - 1; 209 | m_low_buf = 0; 210 | if (m_l) { 211 | m_chunks_in_word = 64 / m_l; 212 | m_chunks_avail = 0; 213 | } else { 214 | m_chunks_in_word = 0; 215 | m_chunks_avail = m_ef->num_ones(); 216 | } 217 | 218 | if (!m_ef->num_ones()) return; 219 | uint64_t pos = m_ef->m_high_bits_d1.select(m_ef->m_high_bits, m_i); 220 | m_high_enum = bit_vector::unary_enumerator(m_ef->m_high_bits, pos); 221 | assert(m_l < 64); 222 | } 223 | 224 | uint64_t next() { 225 | if (!m_chunks_avail--) { 226 | m_low_buf = m_ef->m_low_bits.get_word(m_i * m_l); 227 | m_chunks_avail = m_chunks_in_word - 1; 228 | } 229 | 230 | uint64_t high = m_high_enum.next(); 231 | assert(high == m_ef->m_high_bits_d1.select(m_ef->m_high_bits, m_i)); 232 | uint64_t low = m_low_buf & m_low_mask; 233 | uint64_t ret = 234 | ((high - m_i) << m_l) 235 | | low; 236 | m_i += 1; 237 | m_low_buf >>= m_l; 238 | 239 | return ret; 240 | } 241 | 242 | private: 243 | 244 | elias_fano const* m_ef; 245 | uint64_t m_i; 246 | uint64_t m_l; 247 | bit_vector::unary_enumerator m_high_enum; 248 | uint64_t m_low_buf; 249 | uint64_t m_low_mask; 250 | uint64_t m_chunks_in_word; 251 | uint64_t m_chunks_avail; 252 | }; 253 | 254 | protected: 255 | void build(elias_fano_builder& builder, bool with_rank_index) { 256 | m_size = builder.m_n; 257 | m_l = builder.m_l; 258 | bit_vector(&builder.m_high_bits).swap(m_high_bits); 259 | darray1(m_high_bits).swap(m_high_bits_d1); 260 | if (with_rank_index) { 261 | darray0(m_high_bits).swap(m_high_bits_d0); 262 | } 263 | bit_vector(&builder.m_low_bits).swap(m_low_bits); 264 | } 265 | 266 | uint64_t m_size; 267 | bit_vector m_high_bits; 268 | darray1 m_high_bits_d1; 269 | darray0 m_high_bits_d0; 270 | bit_vector m_low_bits; 271 | uint8_t m_l; 272 | }; 273 | 274 | } 275 | -------------------------------------------------------------------------------- /elias_fano_compressed_list.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "elias_fano.hpp" 4 | 5 | namespace succinct { 6 | 7 | struct elias_fano_compressed_list 8 | { 9 | typedef uint64_t value_type; 10 | 11 | elias_fano_compressed_list() {} 12 | 13 | template 14 | elias_fano_compressed_list(Range const& ints) 15 | { 16 | typedef typename boost::range_const_iterator::type iterator_t; 17 | 18 | size_t s = 0; 19 | size_t n = 0; 20 | for (iterator_t iter = boost::begin(ints); 21 | iter != boost::end(ints); 22 | ++iter) { 23 | s += broadword::msb(*iter + 1); 24 | n += 1; 25 | } 26 | 27 | elias_fano::elias_fano_builder ef_builder(s + 1, n + 1); 28 | bit_vector_builder bits_builder; 29 | 30 | ef_builder.push_back(bits_builder.size()); 31 | for (iterator_t iter = boost::begin(ints); 32 | iter != boost::end(ints); 33 | ++iter) { 34 | size_t val = *iter + 1; 35 | size_t l = broadword::msb(val); 36 | bits_builder.append_bits(val ^ (uint64_t(1) << l), l); 37 | ef_builder.push_back(bits_builder.size()); 38 | } 39 | elias_fano(&ef_builder, false).swap(m_ef); 40 | bit_vector(&bits_builder).swap(m_bits); 41 | } 42 | 43 | value_type operator[](size_t idx) const 44 | { 45 | std::pair r = m_ef.select_range(idx); 46 | size_t l = r.second - r.first; 47 | return ((uint64_t(1) << l) | m_bits.get_bits(r.first, l)) - 1; 48 | } 49 | 50 | size_t size() const 51 | { 52 | return m_ef.num_ones() - 1; 53 | } 54 | 55 | void swap(elias_fano_compressed_list& other) 56 | { 57 | m_ef.swap(other.m_ef); 58 | m_bits.swap(other.m_bits); 59 | } 60 | 61 | template 62 | void map(Visitor& visit) { 63 | visit 64 | (m_ef, "m_ef") 65 | (m_bits, "m_bits") 66 | ; 67 | } 68 | 69 | private: 70 | elias_fano m_ef; 71 | bit_vector m_bits; 72 | }; 73 | 74 | } 75 | -------------------------------------------------------------------------------- /elias_fano_list.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "elias_fano.hpp" 4 | 5 | namespace succinct { 6 | 7 | struct elias_fano_list 8 | { 9 | typedef uint64_t value_type; 10 | 11 | elias_fano_list() {} 12 | 13 | template 14 | elias_fano_list(Range const& ints) 15 | { 16 | typedef typename boost::range_const_iterator::type iterator_t; 17 | 18 | size_t s = 0; 19 | size_t n = 0; 20 | for (iterator_t iter = boost::begin(ints); 21 | iter != boost::end(ints); 22 | ++iter) { 23 | s += *iter; 24 | n += 1; 25 | } 26 | 27 | elias_fano::elias_fano_builder ef_builder(s + 1, n); 28 | size_t cur_base = 0; 29 | for (iterator_t iter = boost::begin(ints); 30 | iter != boost::end(ints); 31 | ++iter) { 32 | cur_base += *iter; 33 | ef_builder.push_back(cur_base); 34 | } 35 | elias_fano(&ef_builder, false).swap(m_ef); 36 | } 37 | 38 | value_type operator[](size_t idx) const 39 | { 40 | return m_ef.delta(idx); 41 | } 42 | 43 | size_t size() const 44 | { 45 | return m_ef.num_ones(); 46 | } 47 | 48 | size_t sum() const { 49 | return m_ef.size() - 1; 50 | } 51 | 52 | void swap(elias_fano_list& other) 53 | { 54 | m_ef.swap(other.m_ef); 55 | } 56 | 57 | template 58 | void map(Visitor& visit) { 59 | visit 60 | (m_ef, "m_ef") 61 | ; 62 | } 63 | 64 | private: 65 | elias_fano m_ef; 66 | }; 67 | 68 | } 69 | -------------------------------------------------------------------------------- /forward_enumerator.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | namespace succinct { 4 | 5 | template 6 | struct forward_enumerator 7 | { 8 | typedef typename Container::value_type value_type; 9 | 10 | forward_enumerator(Container const& c, size_t idx = 0) 11 | : m_c(&c) 12 | , m_idx(idx) 13 | {} 14 | 15 | value_type next() 16 | { 17 | return (*m_c)[m_idx++]; 18 | } 19 | 20 | private: 21 | Container const* m_c; 22 | size_t m_idx; 23 | }; 24 | 25 | } 26 | -------------------------------------------------------------------------------- /gamma_bit_vector.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "broadword.hpp" 4 | #include "forward_enumerator.hpp" 5 | #include "darray64.hpp" 6 | 7 | namespace succinct { 8 | 9 | // Compressed random-access vector to store unsigned integers 10 | // using gamma codes. This implementation optimizes for integers 11 | // whose representation is at least one bit long. It can be used, 12 | // for example, to represent signed integers (with uniform sign 13 | // distribution) by putting the sign in the LSB. For generic 14 | // unsigned integers, use gamma_vector 15 | 16 | struct gamma_bit_vector 17 | { 18 | typedef uint64_t value_type; 19 | 20 | gamma_bit_vector() {} 21 | 22 | template 23 | gamma_bit_vector(Range const& vals) 24 | { 25 | darray64::builder high_bits; 26 | bit_vector_builder low_bits; 27 | 28 | high_bits.append1(); 29 | 30 | typedef typename boost::range_const_iterator::type iterator_t; 31 | for (iterator_t iter = boost::begin(vals); 32 | iter != boost::end(vals); 33 | ++iter) { 34 | const uint64_t val = *iter + 2; // increment the second bit 35 | 36 | uint8_t l = broadword::msb(val); 37 | 38 | assert(l > 0); 39 | high_bits.append1(l - 1); 40 | low_bits.append_bits(val ^ (uint64_t(1) << l), l); 41 | } 42 | 43 | darray64(&high_bits).swap(m_high_bits); 44 | bit_vector(&low_bits).swap(m_low_bits); 45 | } 46 | 47 | value_type operator[](size_t idx) const 48 | { 49 | size_t pos = m_high_bits.select(idx); 50 | size_t l; // ignored 51 | return retrieve_value(pos, l); 52 | } 53 | 54 | size_t size() const 55 | { 56 | return m_high_bits.num_ones() - 1; 57 | } 58 | 59 | void swap(gamma_bit_vector& other) 60 | { 61 | m_high_bits.swap(other.m_high_bits); 62 | m_low_bits.swap(other.m_low_bits); 63 | } 64 | 65 | template 66 | void map(Visitor& visit) { 67 | visit 68 | (m_high_bits, "m_high_bits") 69 | (m_low_bits, "m_low_bits") 70 | ; 71 | } 72 | 73 | private: 74 | 75 | value_type retrieve_value(size_t pos, size_t& l) const 76 | { 77 | assert(m_high_bits.bits()[pos] == 1); 78 | l = broadword::lsb(m_high_bits.bits().get_word(pos + 1)); 79 | uint64_t chunk = m_low_bits.get_bits(pos, l + 1); // bit . val 80 | uint64_t val = ((uint64_t(1) << (l + 1)) | chunk) - 2; 81 | return val; 82 | } 83 | 84 | friend struct forward_enumerator; 85 | 86 | darray64 m_high_bits; 87 | bit_vector m_low_bits; 88 | }; 89 | 90 | template <> 91 | struct forward_enumerator 92 | { 93 | typedef gamma_bit_vector::value_type value_type; 94 | 95 | forward_enumerator(gamma_bit_vector const& c, size_t idx = 0) 96 | : m_c(&c) 97 | , m_idx(idx) 98 | , m_pos(0) 99 | { 100 | if (idx < m_c->size()) { 101 | m_pos = m_c->m_high_bits.select(idx); 102 | m_high_bits_enumerator = 103 | bit_vector::unary_enumerator(m_c->m_high_bits.bits(), m_pos + 1); 104 | m_low_bits_enumerator = bit_vector::enumerator(m_c->m_low_bits, m_pos); 105 | } 106 | } 107 | 108 | void skip(size_t k) 109 | { 110 | // XXX actually implement this 111 | while (k--) next(); 112 | } 113 | 114 | value_type next() 115 | { 116 | assert(m_idx <= m_c->size()); 117 | size_t next_pos = m_high_bits_enumerator.next(); 118 | size_t l = next_pos - m_pos - 1; 119 | m_pos = next_pos; 120 | uint64_t chunk = m_low_bits_enumerator.take(l + 1); 121 | uint64_t val = (chunk | (uint64_t(1) << (l + 1))) - 2; 122 | m_idx += 1; 123 | return val; 124 | } 125 | 126 | private: 127 | gamma_bit_vector const* m_c; 128 | size_t m_idx; 129 | size_t m_pos; 130 | 131 | bit_vector::unary_enumerator m_high_bits_enumerator; 132 | bit_vector::enumerator m_low_bits_enumerator; 133 | }; 134 | } 135 | -------------------------------------------------------------------------------- /gamma_vector.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "broadword.hpp" 4 | #include "forward_enumerator.hpp" 5 | #include "darray64.hpp" 6 | 7 | namespace succinct { 8 | 9 | // Compressed random-access vector to store unsigned integers 10 | // using gamma codes. 11 | struct gamma_vector 12 | { 13 | typedef uint64_t value_type; 14 | 15 | gamma_vector() {} 16 | 17 | template 18 | gamma_vector(Range const& ints) 19 | { 20 | darray64::builder high_bits; 21 | bit_vector_builder low_bits; 22 | 23 | high_bits.append1(); 24 | 25 | typedef typename boost::range_const_iterator::type iterator_t; 26 | for (iterator_t iter = boost::begin(ints); 27 | iter != boost::end(ints); 28 | ++iter) { 29 | const value_type val = *iter + 1; 30 | 31 | uint8_t l = broadword::msb(val); 32 | 33 | low_bits.append_bits(val ^ (uint64_t(1) << l), l); 34 | high_bits.append1(l); 35 | } 36 | 37 | darray64(&high_bits).swap(m_high_bits); 38 | bit_vector(&low_bits).swap(m_low_bits); 39 | } 40 | 41 | value_type operator[](size_t idx) const 42 | { 43 | size_t pos = m_high_bits.select(idx); 44 | size_t l; // ignored 45 | return retrieve_value(idx, pos, l); 46 | } 47 | 48 | size_t size() const 49 | { 50 | return m_high_bits.num_ones() - 1; 51 | } 52 | 53 | void swap(gamma_vector& other) 54 | { 55 | m_high_bits.swap(other.m_high_bits); 56 | m_low_bits.swap(other.m_low_bits); 57 | } 58 | 59 | template 60 | void map(Visitor& visit) { 61 | visit 62 | (m_high_bits, "m_high_bits") 63 | (m_low_bits, "m_low_bits") 64 | ; 65 | } 66 | 67 | private: 68 | 69 | value_type retrieve_value(size_t idx, size_t pos, size_t& l) const 70 | { 71 | assert(m_high_bits.bits()[pos] == 1); 72 | l = broadword::lsb(m_high_bits.bits().get_word(pos + 1)); 73 | return ((uint64_t(1) << l) | m_low_bits.get_bits(pos - idx, l)) - 1; 74 | } 75 | 76 | friend struct forward_enumerator; 77 | darray64 m_high_bits; 78 | bit_vector m_low_bits; 79 | }; 80 | 81 | template <> 82 | struct forward_enumerator 83 | { 84 | typedef gamma_vector::value_type value_type; 85 | 86 | forward_enumerator(gamma_vector const& c, size_t idx = 0) 87 | : m_c(&c) 88 | , m_idx(idx) 89 | , m_pos(0) 90 | { 91 | if (idx < m_c->size()) { 92 | m_pos = m_c->m_high_bits.select(idx); 93 | m_high_bits_enumerator = 94 | bit_vector::unary_enumerator(m_c->m_high_bits.bits(), m_pos + 1); 95 | m_low_bits_enumerator = bit_vector::enumerator(m_c->m_low_bits, m_pos - m_idx); 96 | } 97 | } 98 | 99 | value_type next() 100 | { 101 | assert(m_idx <= m_c->size()); 102 | size_t next_pos = m_high_bits_enumerator.next(); 103 | size_t l = next_pos - m_pos - 1; 104 | m_pos = next_pos; 105 | uint64_t chunk = m_low_bits_enumerator.take(l); 106 | uint64_t val = (chunk | (uint64_t(1) << (l))) - 1; 107 | m_idx += 1; 108 | return val; 109 | } 110 | 111 | private: 112 | gamma_vector const* m_c; 113 | size_t m_idx; 114 | size_t m_pos; 115 | 116 | bit_vector::unary_enumerator m_high_bits_enumerator; 117 | bit_vector::enumerator m_low_bits_enumerator; 118 | }; 119 | } 120 | -------------------------------------------------------------------------------- /intrinsics.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include "succinct_config.hpp" 5 | 6 | #if SUCCINCT_USE_INTRINSICS 7 | #include 8 | 9 | #if defined(__GNUC__) || defined(__clang__) 10 | # define __INTRIN_INLINE inline __attribute__((__always_inline__)) 11 | #elif defined(_MSC_VER) 12 | # define __INTRIN_INLINE inline __forceinline 13 | #else 14 | # define __INTRIN_INLINE inline 15 | #endif 16 | 17 | #endif 18 | 19 | #if SUCCINCT_USE_POPCNT 20 | # if !SUCCINCT_USE_INTRINSICS 21 | # error "Intrinsics support needed for popcnt" 22 | # endif 23 | #include 24 | #endif 25 | 26 | 27 | 28 | namespace succinct { namespace intrinsics { 29 | 30 | 31 | #if SUCCINCT_USE_INTRINSICS 32 | 33 | __INTRIN_INLINE uint64_t byteswap64(uint64_t value) 34 | { 35 | #if defined(__GNUC__) || defined(__clang__) 36 | return __builtin_bswap64(value); 37 | #elif defined(_MSC_VER) 38 | return _byteswap_uint64(value); 39 | #else 40 | # error Unsupported platform 41 | #endif 42 | } 43 | 44 | __INTRIN_INLINE bool bsf64(unsigned long* const index, const uint64_t mask) 45 | { 46 | #if defined(__GNUC__) || defined(__clang__) 47 | if (mask) { 48 | *index = (unsigned long)__builtin_ctzll(mask); 49 | return true; 50 | } else { 51 | return false; 52 | } 53 | #elif defined(_MSC_VER) 54 | return _BitScanForward64(index, mask) != 0; 55 | #else 56 | # error Unsupported platform 57 | #endif 58 | } 59 | 60 | __INTRIN_INLINE bool bsr64(unsigned long* const index, const uint64_t mask) 61 | { 62 | #if defined(__GNUC__) || defined(__clang__) 63 | if (mask) { 64 | *index = (unsigned long)(63 - __builtin_clzll(mask)); 65 | return true; 66 | } else { 67 | return false; 68 | } 69 | #elif defined(_MSC_VER) 70 | return _BitScanReverse64(index, mask) != 0; 71 | #else 72 | # error Unsupported platform 73 | #endif 74 | } 75 | 76 | template 77 | __INTRIN_INLINE void prefetch(T const* ptr) 78 | { 79 | _mm_prefetch((const char*)ptr, _MM_HINT_T0); 80 | } 81 | 82 | #else /* SUCCINCT_USE_INTRINSICS */ 83 | 84 | template 85 | inline void prefetch(T const* /* ptr */) 86 | { 87 | /* do nothing */ 88 | } 89 | 90 | #endif /* SUCCINCT_USE_INTRINSICS */ 91 | 92 | #if SUCCINCT_USE_POPCNT 93 | 94 | __INTRIN_INLINE uint64_t popcount(uint64_t x) 95 | { 96 | return uint64_t(_mm_popcnt_u64(x)); 97 | } 98 | 99 | #endif /* SUCCINCT_USE_POPCNT */ 100 | 101 | }} 102 | -------------------------------------------------------------------------------- /mappable_vector.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | #include 13 | 14 | #include "intrinsics.hpp" 15 | 16 | namespace succinct { namespace mapper { 17 | 18 | namespace detail { 19 | class freeze_visitor; 20 | class map_visitor; 21 | class sizeof_visitor; 22 | } 23 | 24 | typedef boost::function deleter_t; 25 | 26 | template // T must be a POD 27 | class mappable_vector : boost::noncopyable { 28 | public: 29 | typedef T value_type; 30 | typedef const T* iterator; 31 | typedef const T* const_iterator; 32 | 33 | mappable_vector() 34 | : m_data(0) 35 | , m_size(0) 36 | , m_deleter() 37 | {} 38 | 39 | template 40 | mappable_vector(Range const& from) 41 | : m_data(0) 42 | , m_size(0) 43 | { 44 | size_t size = boost::size(from); 45 | T* data = new T[size]; 46 | m_deleter = boost::lambda::bind(boost::lambda::delete_array(), data); 47 | 48 | std::copy(boost::begin(from), 49 | boost::end(from), 50 | data); 51 | m_data = data; 52 | m_size = size; 53 | } 54 | 55 | ~mappable_vector() { 56 | if (m_deleter) { 57 | m_deleter(); 58 | } 59 | } 60 | 61 | void swap(mappable_vector& other) { 62 | using std::swap; 63 | swap(m_data, other.m_data); 64 | swap(m_size, other.m_size); 65 | swap(m_deleter, other.m_deleter); 66 | } 67 | 68 | void clear() { 69 | mappable_vector().swap(*this); 70 | } 71 | 72 | void steal(std::vector& vec) { 73 | clear(); 74 | m_size = vec.size(); 75 | if (m_size) { 76 | std::vector* new_vec = new std::vector; 77 | new_vec->swap(vec); 78 | m_deleter = boost::lambda::bind(boost::lambda::delete_ptr(), new_vec); 79 | m_data = &(*new_vec)[0]; 80 | } 81 | } 82 | 83 | template 84 | void assign(Range const& from) { 85 | clear(); 86 | mappable_vector(from).swap(*this); 87 | } 88 | 89 | uint64_t size() const { 90 | return m_size; 91 | } 92 | 93 | inline const_iterator begin() const { 94 | return m_data; 95 | } 96 | 97 | inline const_iterator end() const { 98 | return m_data + m_size; 99 | } 100 | 101 | inline T const& operator[](uint64_t i) const { 102 | assert(i < m_size); 103 | return m_data[i]; 104 | } 105 | 106 | inline T const* data() const { 107 | return m_data; 108 | } 109 | 110 | inline void prefetch(size_t i) const { 111 | succinct::intrinsics::prefetch(m_data + i); 112 | } 113 | 114 | friend class detail::freeze_visitor; 115 | friend class detail::map_visitor; 116 | friend class detail::sizeof_visitor; 117 | 118 | protected: 119 | const T* m_data; 120 | uint64_t m_size; 121 | deleter_t m_deleter; 122 | }; 123 | 124 | }} 125 | -------------------------------------------------------------------------------- /mapper.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | #include "mappable_vector.hpp" 13 | 14 | namespace succinct { namespace mapper { 15 | 16 | struct freeze_flags { 17 | // enum { 18 | // }; 19 | }; 20 | 21 | struct map_flags { 22 | enum { 23 | warmup = 1 24 | }; 25 | }; 26 | 27 | struct size_node; 28 | typedef boost::shared_ptr size_node_ptr; 29 | 30 | struct size_node 31 | { 32 | size_node() 33 | : size(0) 34 | {} 35 | 36 | std::string name; 37 | size_t size; 38 | std::vector children; 39 | 40 | void dump(std::ostream& os = std::cerr, size_t depth = 0) { 41 | os << std::string(depth * 4, ' ') 42 | << name << ": " 43 | << size << '\n'; 44 | for (size_t i = 0; i < children.size(); ++i) { 45 | children[i]->dump(os, depth + 1); 46 | } 47 | } 48 | }; 49 | 50 | namespace detail { 51 | class freeze_visitor : boost::noncopyable { 52 | public: 53 | freeze_visitor(std::ofstream& fout, uint64_t flags) 54 | : m_fout(fout) 55 | , m_flags(flags) 56 | , m_written(0) 57 | { 58 | // Save freezing flags 59 | m_fout.write(reinterpret_cast(&m_flags), sizeof(m_flags)); 60 | m_written += sizeof(m_flags); 61 | } 62 | 63 | template 64 | typename boost::disable_if, freeze_visitor&>::type 65 | operator()(T& val, const char* /* friendly_name */) { 66 | val.map(*this); 67 | return *this; 68 | } 69 | 70 | template 71 | typename boost::enable_if, freeze_visitor&>::type 72 | operator()(T& val, const char* /* friendly_name */) { 73 | m_fout.write(reinterpret_cast(&val), sizeof(T)); 74 | m_written += sizeof(T); 75 | return *this; 76 | } 77 | 78 | template 79 | freeze_visitor& 80 | operator()(mappable_vector& vec, const char* /* friendly_name */) { 81 | (*this)(vec.m_size, "size"); 82 | 83 | size_t n_bytes = static_cast(vec.m_size * sizeof(T)); 84 | m_fout.write(reinterpret_cast(vec.m_data), long(n_bytes)); 85 | m_written += n_bytes; 86 | 87 | return *this; 88 | } 89 | 90 | size_t written() const { 91 | return m_written; 92 | } 93 | 94 | protected: 95 | std::ofstream& m_fout; 96 | const uint64_t m_flags; 97 | uint64_t m_written; 98 | }; 99 | 100 | class map_visitor : boost::noncopyable { 101 | public: 102 | map_visitor(const char* base_address, uint64_t flags) 103 | : m_base(base_address) 104 | , m_cur(m_base) 105 | , m_flags(flags) 106 | { 107 | m_freeze_flags = *reinterpret_cast(m_cur); 108 | m_cur += sizeof(m_freeze_flags); 109 | } 110 | 111 | template 112 | typename boost::disable_if, map_visitor&>::type 113 | operator()(T& val, const char* /* friendly_name */) { 114 | val.map(*this); 115 | return *this; 116 | } 117 | 118 | template 119 | typename boost::enable_if, map_visitor&>::type 120 | operator()(T& val, const char* /* friendly_name */) { 121 | val = *reinterpret_cast(m_cur); 122 | m_cur += sizeof(T); 123 | return *this; 124 | } 125 | 126 | template 127 | map_visitor& 128 | operator()(mappable_vector& vec, const char* /* friendly_name */) { 129 | vec.clear(); 130 | (*this)(vec.m_size, "size"); 131 | 132 | vec.m_data = reinterpret_cast(m_cur); 133 | size_t bytes = vec.m_size * sizeof(T); 134 | 135 | if (m_flags & map_flags::warmup) { 136 | T foo; 137 | volatile T* bar = &foo; 138 | for (size_t i = 0; i < vec.m_size; ++i) { 139 | *bar = vec.m_data[i]; 140 | } 141 | } 142 | 143 | m_cur += bytes; 144 | return *this; 145 | } 146 | 147 | size_t bytes_read() const { 148 | return size_t(m_cur - m_base); 149 | } 150 | 151 | protected: 152 | const char* m_base; 153 | const char* m_cur; 154 | const uint64_t m_flags; 155 | uint64_t m_freeze_flags; 156 | }; 157 | 158 | class sizeof_visitor : boost::noncopyable { 159 | public: 160 | sizeof_visitor(bool with_tree = false) 161 | : m_size(0) 162 | { 163 | if (with_tree) { 164 | m_cur_size_node = boost::make_shared(); 165 | } 166 | } 167 | 168 | template 169 | typename boost::disable_if, sizeof_visitor&>::type 170 | operator()(T& val, const char* friendly_name) { 171 | size_t checkpoint = m_size; 172 | size_node_ptr parent_node; 173 | if (m_cur_size_node) { 174 | parent_node = m_cur_size_node; 175 | m_cur_size_node = make_node(friendly_name); 176 | } 177 | 178 | val.map(*this); 179 | 180 | if (m_cur_size_node) { 181 | m_cur_size_node->size = m_size - checkpoint; 182 | m_cur_size_node = parent_node; 183 | } 184 | return *this; 185 | } 186 | 187 | template 188 | typename boost::enable_if, sizeof_visitor&>::type 189 | operator()(T& /* val */, const char* /* friendly_name */) { 190 | // don't track PODs in the size tree (they are constant sized) 191 | m_size += sizeof(T); 192 | return *this; 193 | } 194 | 195 | template 196 | sizeof_visitor& 197 | operator()(mappable_vector& vec, const char* friendly_name) { 198 | size_t checkpoint = m_size; 199 | (*this)(vec.m_size, "size"); 200 | m_size += static_cast(vec.m_size * sizeof(T)); 201 | 202 | if (m_cur_size_node) { 203 | make_node(friendly_name)->size = m_size - checkpoint; 204 | } 205 | 206 | return *this; 207 | } 208 | 209 | size_t size() const { 210 | return m_size; 211 | } 212 | 213 | size_node_ptr size_tree() const { 214 | assert(m_cur_size_node); 215 | return m_cur_size_node; 216 | } 217 | 218 | protected: 219 | 220 | size_node_ptr make_node(const char* name) 221 | { 222 | size_node_ptr node = boost::make_shared(); 223 | m_cur_size_node->children.push_back(node); 224 | node->name = name; 225 | return node; 226 | } 227 | 228 | size_t m_size; 229 | size_node_ptr m_cur_size_node; 230 | }; 231 | 232 | } 233 | 234 | template 235 | size_t freeze(T& val, std::ofstream& fout, uint64_t flags = 0, const char* friendly_name = "") 236 | { 237 | detail::freeze_visitor freezer(fout, flags); 238 | freezer(val, friendly_name); 239 | return freezer.written(); 240 | } 241 | 242 | template 243 | size_t freeze(T& val, const char* filename, uint64_t flags = 0, const char* friendly_name = "") 244 | { 245 | std::ofstream fout(filename, std::ios::binary); 246 | return freeze(val, fout, flags, friendly_name); 247 | } 248 | 249 | template 250 | size_t map(T& val, const char* base_address, uint64_t flags = 0, const char* friendly_name = "") 251 | { 252 | detail::map_visitor mapper(base_address, flags); 253 | mapper(val, friendly_name); 254 | return mapper.bytes_read(); 255 | } 256 | 257 | template 258 | size_t map(T& val, boost::iostreams::mapped_file_source const& m, uint64_t flags = 0, const char* friendly_name = "") 259 | { 260 | return map(val, m.data(), flags, friendly_name); 261 | } 262 | 263 | template 264 | size_t size_of(T& val) 265 | { 266 | detail::sizeof_visitor sizer; 267 | sizer(val, ""); 268 | return sizer.size(); 269 | } 270 | 271 | template 272 | size_node_ptr size_tree_of(T& val, const char* friendly_name = "") 273 | { 274 | detail::sizeof_visitor sizer(true); 275 | sizer(val, friendly_name); 276 | assert(sizer.size_tree()->children.size()); 277 | return sizer.size_tree()->children[0]; 278 | } 279 | 280 | }} 281 | -------------------------------------------------------------------------------- /nibble_vector.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | #include 6 | 7 | #include "succinct/mappable_vector.hpp" 8 | 9 | namespace succinct { 10 | 11 | class nibble_vector { 12 | public: 13 | nibble_vector() 14 | : m_size(0) 15 | {} 16 | 17 | template 18 | nibble_vector(Range const& from) 19 | : m_size(0) 20 | { 21 | std::vector nibbles; 22 | bool parity = 0; 23 | uint8_t cur_byte = 0; 24 | for (typename boost::range_const_iterator::type iter = boost::begin(from); 25 | iter != boost::end(from); 26 | ++iter) { 27 | assert(*iter < 16); 28 | cur_byte |= *iter << (parity * 4); 29 | parity = !parity; 30 | if (!parity) { 31 | nibbles.push_back(cur_byte); 32 | cur_byte = 0; 33 | } 34 | ++m_size; 35 | } 36 | if (parity) { 37 | nibbles.push_back(cur_byte); 38 | } 39 | m_nibbles.steal(nibbles); 40 | } 41 | 42 | template 43 | void map(Visitor& visit) { 44 | visit 45 | (m_size, "m_size") 46 | (m_nibbles, "m_nibbles"); 47 | } 48 | 49 | void swap(nibble_vector& other) { 50 | std::swap(other.m_size, m_size); 51 | other.m_nibbles.swap(m_nibbles); 52 | } 53 | 54 | size_t size() const { 55 | return m_size; 56 | } 57 | 58 | uint8_t operator[](uint64_t pos) const { 59 | assert(pos < m_size); 60 | return (m_nibbles[pos / 2] >> ((pos % 2) * 4)) & 0x0F; 61 | } 62 | 63 | protected: 64 | size_t m_size; 65 | mapper::mappable_vector m_nibbles; 66 | }; 67 | 68 | } 69 | -------------------------------------------------------------------------------- /perftest/.gitignore: -------------------------------------------------------------------------------- 1 | perftest_bp_vector 2 | perftest_bp_vector_rmq 3 | perftest_cartesian_tree 4 | perftest_elias_fano 5 | -------------------------------------------------------------------------------- /perftest/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | file(GLOB SUCCINCT_TEST_SOURCES perftest_*.cpp) 2 | foreach(TEST_SRC ${SUCCINCT_TEST_SOURCES}) 3 | get_filename_component (TEST_SRC_NAME ${TEST_SRC} NAME_WE) 4 | add_executable(${TEST_SRC_NAME} ${TEST_SRC}) 5 | target_link_libraries(${TEST_SRC_NAME} 6 | succinct 7 | ${Boost_LIBRARIES} 8 | ) 9 | endforeach(TEST_SRC) 10 | -------------------------------------------------------------------------------- /perftest/perftest_bp_vector.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include 5 | 6 | #include "util.hpp" 7 | #include "test_bp_vector_common.hpp" 8 | 9 | #include "bp_vector.hpp" 10 | #include "mapper.hpp" 11 | 12 | #include "perftest_common.hpp" 13 | 14 | // this generic trait enables easy comparisons with other BP 15 | // implementations 16 | 17 | struct succinct_bp_vector_traits 18 | { 19 | typedef succinct::bit_vector_builder builder_type; 20 | typedef succinct::bp_vector bp_vector_type; 21 | 22 | static inline void build(builder_type& builder, bp_vector_type& bp) 23 | { 24 | bp_vector_type(&builder, true, false).swap(bp); 25 | } 26 | 27 | static inline std::string log_header() 28 | { 29 | return std::string("SUCCINCT"); 30 | } 31 | 32 | static inline double bits_per_bp(bp_vector_type& vec) 33 | { 34 | return double(succinct::mapper::size_of(vec)) 35 | * 8.0 / double(vec.size()); 36 | } 37 | 38 | }; 39 | 40 | template 41 | double time_visit(BpVector const& bp, size_t sample_size = 1000000) 42 | { 43 | std::vector random_bits; 44 | for (size_t i = 0; i < sample_size; ++i) { 45 | random_bits.push_back(rand() > (RAND_MAX / 2)); 46 | } 47 | 48 | volatile size_t foo = 0; // to prevent the compiler to optimize away the loop 49 | 50 | size_t find_close_performed = 0; 51 | size_t steps_done = 0; 52 | double elapsed; 53 | SUCCINCT_TIMEIT(elapsed) { 54 | while (steps_done < sample_size) { 55 | size_t cur_node = 1; // root 56 | 57 | while (bp[cur_node] && steps_done < sample_size) { 58 | if (random_bits[steps_done++]) { 59 | size_t next_node = bp.find_close(cur_node); 60 | cur_node = next_node + 1; 61 | find_close_performed += 1; 62 | } else { 63 | cur_node += 1; 64 | } 65 | } 66 | foo = cur_node; 67 | } 68 | } 69 | 70 | (void)foo; // silence warning 71 | return elapsed / double(find_close_performed); 72 | } 73 | 74 | template 75 | void build_random_binary_tree(typename BpVectorTraits::bp_vector_type& bp, size_t size) 76 | { 77 | typename BpVectorTraits::builder_type builder; 78 | succinct::random_binary_tree(builder, size); 79 | BpVectorTraits::build(builder, bp); 80 | } 81 | 82 | template 83 | void bp_benchmark(size_t runs) 84 | { 85 | srand(42); // make everything deterministic 86 | static const size_t sample_size = 10000000; 87 | 88 | std::cout << BpVectorTraits::log_header() << std::endl; 89 | std::cout << "log_height" "\t" "find_close_us" "\t" "bits_per_bp" << std::endl; 90 | 91 | for (size_t ln = 10; ln <= 28; ln += 2) { 92 | size_t n = 1 << ln; 93 | double elapsed = 0; 94 | double bits_per_bp = 0; 95 | for (size_t run = 0; run < runs; ++run) { 96 | typename BpVectorTraits::bp_vector_type bp; 97 | build_random_binary_tree(bp, n); 98 | elapsed += time_visit(bp, sample_size); 99 | bits_per_bp += BpVectorTraits::bits_per_bp(bp); 100 | } 101 | std::cout << ln 102 | << "\t" << elapsed / double(runs) 103 | << "\t" << bits_per_bp / double(runs) 104 | << std::endl; 105 | } 106 | } 107 | 108 | int main(int argc, char** argv) 109 | { 110 | size_t runs; 111 | 112 | if (argc == 2) { 113 | runs = boost::lexical_cast(argv[1]); 114 | } else { 115 | runs = 1; 116 | } 117 | 118 | bp_benchmark(runs); 119 | } 120 | -------------------------------------------------------------------------------- /perftest/perftest_bp_vector_rmq.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include 5 | 6 | #include "util.hpp" 7 | #include "test_bp_vector_common.hpp" 8 | 9 | #include "bp_vector.hpp" 10 | 11 | #include "perftest_common.hpp" 12 | 13 | double time_avg_rmq(succinct::bp_vector const& bp, size_t sample_size = 1000000) 14 | { 15 | typedef std::pair range_pair; 16 | std::vector pairs_sample; 17 | for (size_t i = 0; i < sample_size; ++i) { 18 | uint64_t a = uint64_t(rand()) % bp.size(); 19 | uint64_t b = a + (uint64_t(rand()) % (bp.size() - a)); 20 | pairs_sample.push_back(range_pair(a, b)); 21 | } 22 | 23 | volatile uint64_t foo; // to prevent the compiler to optimize away the loop 24 | 25 | size_t rmq_performed = 0; 26 | double elapsed; 27 | SUCCINCT_TIMEIT(elapsed) { 28 | for (size_t i = 0; i < pairs_sample.size(); ++i) { 29 | range_pair r = pairs_sample[i]; 30 | foo = bp.excess_rmq(r.first, r.second); 31 | rmq_performed += 1; 32 | } 33 | } 34 | 35 | (void)foo; // silence warning 36 | return elapsed / double(rmq_performed); 37 | } 38 | 39 | void build_random_binary_tree(succinct::bp_vector& bp, size_t size) 40 | { 41 | succinct::bit_vector_builder builder; 42 | succinct::random_binary_tree(builder, size); 43 | succinct::bp_vector(&builder, true, false).swap(bp); 44 | } 45 | 46 | void rmq_benchmark(size_t runs) 47 | { 48 | srand(42); // make everything deterministic 49 | static const size_t sample_size = 10000000; 50 | 51 | std::cout << "SUCCINCT_EXCESS_RMQ" << std::endl; 52 | std::cout << "log_height" "\t" "excess_rmq_us" << std::endl; 53 | 54 | for (size_t ln = 10; ln <= 28; ln += 2) { 55 | size_t n = 1 << ln; 56 | double elapsed = 0; 57 | for (size_t run = 0; run < runs; ++run) { 58 | succinct::bp_vector bp; 59 | build_random_binary_tree(bp, n); 60 | elapsed += time_avg_rmq(bp, sample_size); 61 | } 62 | std::cout << ln << "\t" << elapsed / double(runs) << std::endl; 63 | } 64 | } 65 | 66 | int main(int argc, char** argv) 67 | { 68 | size_t runs; 69 | 70 | if (argc == 2) { 71 | runs = boost::lexical_cast(argv[1]); 72 | } else { 73 | runs = 1; 74 | } 75 | 76 | rmq_benchmark(runs); 77 | } 78 | -------------------------------------------------------------------------------- /perftest/perftest_cartesian_tree.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include 5 | 6 | #include "util.hpp" 7 | #include "test_bp_vector_common.hpp" 8 | 9 | #include "cartesian_tree.hpp" 10 | 11 | #include "perftest_common.hpp" 12 | 13 | double time_avg_rmq(succinct::cartesian_tree const& tree, size_t sample_size = 1000000) 14 | { 15 | typedef std::pair range_pair; 16 | std::vector pairs_sample; 17 | for (size_t i = 0; i < sample_size; ++i) { 18 | uint64_t a = uint64_t(rand()) % tree.size(); 19 | uint64_t b = a + (uint64_t(rand()) % (tree.size() - a)); 20 | pairs_sample.push_back(range_pair(a, b)); 21 | } 22 | 23 | volatile uint64_t foo; // to prevent the compiler to optimize away the loop 24 | 25 | size_t rmq_performed = 0; 26 | double elapsed; 27 | SUCCINCT_TIMEIT(elapsed) { 28 | for (size_t i = 0; i < pairs_sample.size(); ++i) { 29 | range_pair r = pairs_sample[i]; 30 | foo = tree.rmq(r.first, r.second); 31 | rmq_performed += 1; 32 | } 33 | } 34 | 35 | (void)foo; // silence warning 36 | return elapsed / double(rmq_performed); 37 | } 38 | 39 | void rmq_benchmark(size_t runs) 40 | { 41 | srand(42); // make everything deterministic 42 | static const size_t sample_size = 10000000; 43 | 44 | std::cout << "SUCCINCT_CARTESIAN_TREE_RMQ" << std::endl; 45 | std::cout << "log_height" "\t" "excess_rmq_us" << std::endl; 46 | 47 | for (size_t ln = 10; ln <= 28; ln += 2) { 48 | size_t n = 1 << ln; 49 | double elapsed = 0; 50 | for (size_t run = 0; run < runs; ++run) { 51 | std::vector v(n); 52 | for (size_t i = 0; i < v.size(); ++i) { 53 | v[i] = uint64_t(rand()) % 1024; 54 | } 55 | 56 | succinct::cartesian_tree tree(v); 57 | elapsed += time_avg_rmq(tree, sample_size); 58 | } 59 | std::cout << ln << "\t" << elapsed / double(runs) << std::endl; 60 | } 61 | } 62 | 63 | int main(int argc, char** argv) 64 | { 65 | size_t runs; 66 | 67 | if (argc == 2) { 68 | runs = boost::lexical_cast(argv[1]); 69 | } else { 70 | runs = 1; 71 | } 72 | 73 | rmq_benchmark(runs); 74 | } 75 | -------------------------------------------------------------------------------- /perftest/perftest_common.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | namespace succinct { 6 | namespace detail { 7 | 8 | struct timer { 9 | timer() 10 | : m_tick(boost::posix_time::microsec_clock::universal_time()) 11 | , m_done(false) 12 | {} 13 | 14 | bool done() { return m_done; } 15 | 16 | void report(double& elapsed) { 17 | elapsed = (double)(boost::posix_time::microsec_clock::universal_time() - m_tick).total_microseconds(); 18 | m_done = true; 19 | } 20 | 21 | const std::string m_msg; 22 | boost::posix_time::ptime m_tick; 23 | bool m_done; 24 | }; 25 | 26 | } 27 | } 28 | 29 | #define SUCCINCT_TIMEIT(elapsed) \ 30 | for (::succinct::detail::timer SUCCINCT_TIMEIT_timer; \ 31 | !SUCCINCT_TIMEIT_timer.done(); \ 32 | SUCCINCT_TIMEIT_timer.report(elapsed)) \ 33 | /**/ 34 | -------------------------------------------------------------------------------- /perftest/perftest_elias_fano.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #include "util.hpp" 10 | 11 | #include "elias_fano.hpp" 12 | #include "mapper.hpp" 13 | 14 | #include "perftest_common.hpp" 15 | 16 | struct monotone_generator 17 | { 18 | monotone_generator(uint64_t m, uint8_t bits, unsigned int seed) 19 | : m_gen(seed) 20 | , m_bits(bits) 21 | { 22 | m_stack.push_back(state_t(0, m, 0)); 23 | } 24 | 25 | uint64_t next() 26 | { 27 | uint64_t cur_word, cur_m; 28 | uint8_t cur_depth; 29 | 30 | assert(m_stack.size()); 31 | boost::tie(cur_word, cur_m, cur_depth) = m_stack.back(); 32 | m_stack.pop_back(); 33 | 34 | while (cur_depth < m_bits) { 35 | boost::random::uniform_int_distribution dist(0, cur_m); 36 | uint64_t left_m = dist(m_gen); 37 | uint64_t right_m = cur_m - left_m; 38 | 39 | // push left and right children, if present 40 | if (right_m > 0) { 41 | m_stack.push_back(state_t(cur_word | (uint64_t(1) << (m_bits - cur_depth - 1)), 42 | right_m, cur_depth + 1)); 43 | } 44 | if (left_m > 0) { 45 | m_stack.push_back(state_t(cur_word, left_m, cur_depth + 1)); 46 | 47 | } 48 | 49 | // pop next child in visit 50 | boost::tie(cur_word, cur_m, cur_depth) = m_stack.back(); 51 | m_stack.pop_back(); 52 | } 53 | 54 | if (cur_m > 1) { 55 | // push back the current leaf, with cur_m decreased by one 56 | m_stack.push_back(state_t(cur_word, cur_m - 1, cur_depth)); 57 | } 58 | 59 | return cur_word; 60 | } 61 | 62 | bool done() const 63 | { 64 | return m_stack.empty(); 65 | } 66 | 67 | private: 68 | typedef boost::tuple state_t; 71 | std::vector m_stack; 72 | boost::random::mt19937 m_gen; 73 | uint8_t m_bits; 74 | }; 75 | 76 | void ef_enumeration_benchmark(uint64_t m, uint8_t bits) 77 | { 78 | succinct::elias_fano::elias_fano_builder bvb(uint64_t(1) << bits, m); 79 | monotone_generator mgen(m, bits, 37); 80 | for (size_t i = 0; i < m; ++i) { 81 | bvb.push_back(mgen.next()); 82 | } 83 | assert(mgen.done()); 84 | 85 | succinct::elias_fano ef(&bvb); 86 | succinct::mapper::size_tree_of(ef)->dump(); 87 | 88 | 89 | double elapsed; 90 | uint64_t foo = 0; 91 | SUCCINCT_TIMEIT(elapsed) { 92 | succinct::elias_fano::select_enumerator it(ef, 0); 93 | for (size_t i = 0; i < m; ++i) { 94 | foo ^= it.next(); 95 | } 96 | } 97 | volatile uint64_t vfoo = foo; 98 | (void)vfoo; // silence warning 99 | 100 | std::cerr << "Elapsed: " << elapsed / 1000 << " msec\n" 101 | << double(m) / elapsed << " Mcodes/s" << std::endl; 102 | } 103 | 104 | int main(int argc, char** argv) 105 | { 106 | if (argc != 3) { 107 | std::cerr << "Invalid arguments" << std::endl; 108 | std::terminate(); 109 | } 110 | size_t m = boost::lexical_cast(argv[1]); 111 | uint8_t bits = uint8_t(boost::lexical_cast(argv[2])); 112 | 113 | ef_enumeration_benchmark(m, bits); 114 | } 115 | -------------------------------------------------------------------------------- /rs_bit_vector.cpp: -------------------------------------------------------------------------------- 1 | #include "rs_bit_vector.hpp" 2 | 3 | namespace succinct { 4 | 5 | void rs_bit_vector::build_indices(bool with_select_hints, bool with_select0_hints) 6 | { 7 | { 8 | using broadword::popcount; 9 | std::vector block_rank_pairs; 10 | uint64_t next_rank = 0; 11 | uint64_t cur_subrank = 0; 12 | uint64_t subranks = 0; 13 | block_rank_pairs.push_back(0); 14 | for (uint64_t i = 0; i < m_bits.size(); ++i) { 15 | uint64_t word_pop = popcount(m_bits[i]); 16 | uint64_t shift = i % block_size; 17 | if (shift) { 18 | subranks <<= 9; 19 | subranks |= cur_subrank; 20 | } 21 | next_rank += word_pop; 22 | cur_subrank += word_pop; 23 | 24 | if (shift == block_size - 1) { 25 | block_rank_pairs.push_back(subranks); 26 | block_rank_pairs.push_back(next_rank); 27 | subranks = 0; 28 | cur_subrank = 0; 29 | } 30 | } 31 | uint64_t left = block_size - m_bits.size() % block_size; 32 | for (uint64_t i = 0; i < left; ++i) { 33 | subranks <<= 9; 34 | subranks |= cur_subrank; 35 | } 36 | block_rank_pairs.push_back(subranks); 37 | 38 | if (m_bits.size() % block_size) { 39 | block_rank_pairs.push_back(next_rank); 40 | block_rank_pairs.push_back(0); 41 | } 42 | 43 | m_block_rank_pairs.steal(block_rank_pairs); 44 | } 45 | 46 | if (with_select_hints) { 47 | std::vector select_hints; 48 | uint64_t cur_ones_threshold = select_ones_per_hint; 49 | for (uint64_t i = 0; i < num_blocks(); ++i) { 50 | if (block_rank(i + 1) > cur_ones_threshold) { 51 | select_hints.push_back(i); 52 | cur_ones_threshold += select_ones_per_hint; 53 | } 54 | } 55 | select_hints.push_back(num_blocks()); 56 | m_select_hints.steal(select_hints); 57 | } 58 | 59 | if (with_select0_hints) { 60 | std::vector select0_hints; 61 | uint64_t cur_zeros_threshold = select_zeros_per_hint; 62 | for (uint64_t i = 0; i < num_blocks(); ++i) { 63 | if (block_rank0(i + 1) > cur_zeros_threshold) { 64 | select0_hints.push_back(i); 65 | cur_zeros_threshold += select_zeros_per_hint; 66 | } 67 | } 68 | select0_hints.push_back(num_blocks()); 69 | m_select0_hints.steal(select0_hints); 70 | } 71 | } 72 | 73 | } 74 | -------------------------------------------------------------------------------- /rs_bit_vector.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | #include "bit_vector.hpp" 7 | #include "broadword.hpp" 8 | 9 | namespace succinct { 10 | 11 | class rs_bit_vector : public bit_vector { 12 | public: 13 | rs_bit_vector() 14 | : bit_vector() 15 | {} 16 | 17 | template 18 | rs_bit_vector(Range const& from, 19 | bool with_select_hints = false, 20 | bool with_select0_hints = false) 21 | : bit_vector(from) 22 | { 23 | build_indices(with_select_hints, with_select0_hints); 24 | } 25 | 26 | template 27 | void map(Visitor& visit) { 28 | bit_vector::map(visit); 29 | visit 30 | (m_block_rank_pairs, "m_block_rank_pairs") 31 | (m_select_hints, "m_select_hints") 32 | (m_select0_hints, "m_select0_hints") 33 | ; 34 | } 35 | 36 | void swap(rs_bit_vector& other) { 37 | bit_vector::swap(other); 38 | m_block_rank_pairs.swap(other.m_block_rank_pairs); 39 | m_select_hints.swap(other.m_select_hints); 40 | m_select0_hints.swap(other.m_select0_hints); 41 | } 42 | 43 | inline uint64_t num_ones() const { 44 | return *(m_block_rank_pairs.end() - 2); 45 | } 46 | 47 | inline uint64_t num_zeros() const { 48 | return size() - num_ones(); 49 | } 50 | 51 | inline uint64_t rank(uint64_t pos) const { 52 | assert(pos <= size()); 53 | if (pos == size()) { 54 | return num_ones(); 55 | } 56 | 57 | uint64_t sub_block = pos / 64; 58 | uint64_t r = sub_block_rank(sub_block); 59 | uint64_t sub_left = pos % 64; 60 | if (sub_left) { 61 | r += broadword::popcount(m_bits[sub_block] << (64 - sub_left)); 62 | } 63 | return r; 64 | } 65 | 66 | inline uint64_t rank0(uint64_t pos) const { 67 | return pos - rank(pos); 68 | } 69 | 70 | inline uint64_t select(uint64_t n) const { 71 | using broadword::popcount; 72 | using broadword::select_in_word; 73 | assert(n < num_ones()); 74 | uint64_t a = 0; 75 | uint64_t b = num_blocks(); 76 | if (m_select_hints.size()) { 77 | uint64_t chunk = n / select_ones_per_hint; 78 | if (chunk != 0) { 79 | a = m_select_hints[chunk - 1]; 80 | } 81 | b = m_select_hints[chunk] + 1; 82 | } 83 | 84 | uint64_t block = 0; 85 | while (b - a > 1) { 86 | uint64_t mid = a + (b - a) / 2; 87 | uint64_t x = block_rank(mid); 88 | if (x <= n) { 89 | a = mid; 90 | } else { 91 | b = mid; 92 | } 93 | } 94 | block = a; 95 | 96 | assert(block < num_blocks()); 97 | uint64_t block_offset = block * block_size; 98 | uint64_t cur_rank = block_rank(block); 99 | assert(cur_rank <= n); 100 | 101 | 102 | uint64_t rank_in_block_parallel = (n - cur_rank) * broadword::ones_step_9; 103 | uint64_t sub_ranks = sub_block_ranks(block); 104 | uint64_t sub_block_offset = broadword::uleq_step_9(sub_ranks, rank_in_block_parallel) * broadword::ones_step_9 >> 54 & 0x7; 105 | cur_rank += sub_ranks >> (7 - sub_block_offset) * 9 & 0x1FF; 106 | assert(cur_rank <= n); 107 | 108 | uint64_t word_offset = block_offset + sub_block_offset; 109 | return word_offset * 64 + select_in_word(m_bits[word_offset], n - cur_rank); 110 | } 111 | 112 | // TODO(ot): share code between select and select0 113 | inline uint64_t select0(uint64_t n) const { 114 | using broadword::popcount; 115 | using broadword::select_in_word; 116 | assert(n < num_zeros()); 117 | uint64_t a = 0; 118 | uint64_t b = num_blocks(); 119 | if (m_select0_hints.size()) { 120 | uint64_t chunk = n / select_zeros_per_hint; 121 | if (chunk != 0) { 122 | a = m_select0_hints[chunk - 1]; 123 | } 124 | b = m_select0_hints[chunk] + 1; 125 | } 126 | 127 | uint64_t block = 0; 128 | while (b - a > 1) { 129 | uint64_t mid = a + (b - a) / 2; 130 | uint64_t x = block_rank0(mid); 131 | if (x <= n) { 132 | a = mid; 133 | } else { 134 | b = mid; 135 | } 136 | } 137 | block = a; 138 | 139 | assert(block < num_blocks()); 140 | uint64_t block_offset = block * block_size; 141 | uint64_t cur_rank0 = block_rank0(block); 142 | assert(cur_rank0 <= n); 143 | 144 | uint64_t rank_in_block_parallel = (n - cur_rank0) * broadword::ones_step_9; 145 | uint64_t sub_ranks = 64 * broadword::inv_count_step_9 - sub_block_ranks(block); 146 | uint64_t sub_block_offset = broadword::uleq_step_9(sub_ranks, rank_in_block_parallel) * broadword::ones_step_9 >> 54 & 0x7; 147 | cur_rank0 += sub_ranks >> (7 - sub_block_offset) * 9 & 0x1FF; 148 | assert(cur_rank0 <= n); 149 | 150 | uint64_t word_offset = block_offset + sub_block_offset; 151 | return word_offset * 64 + select_in_word(~m_bits[word_offset], n - cur_rank0); 152 | } 153 | 154 | protected: 155 | 156 | inline uint64_t num_blocks() const { 157 | return m_block_rank_pairs.size() / 2 - 1; 158 | } 159 | 160 | inline uint64_t block_rank(uint64_t block) const { 161 | return m_block_rank_pairs[block * 2]; 162 | } 163 | 164 | inline uint64_t sub_block_rank(uint64_t sub_block) const { 165 | uint64_t r = 0; 166 | uint64_t block = sub_block / block_size; 167 | r += block_rank(block); 168 | uint64_t left = sub_block % block_size; 169 | r += sub_block_ranks(block) >> ((7 - left) * 9) & 0x1FF; 170 | return r; 171 | } 172 | 173 | inline uint64_t sub_block_ranks(uint64_t block) const { 174 | return m_block_rank_pairs[block * 2 + 1]; 175 | } 176 | 177 | inline uint64_t block_rank0(uint64_t block) const { 178 | return block * block_size * 64 - m_block_rank_pairs[block * 2]; 179 | } 180 | 181 | void build_indices(bool with_select_hints, bool with_select0_hints); 182 | 183 | static const uint64_t block_size = 8; // in 64bit words 184 | static const uint64_t select_ones_per_hint = 64 * block_size * 2; // must be > block_size * 64 185 | static const uint64_t select_zeros_per_hint = select_ones_per_hint; 186 | 187 | typedef mapper::mappable_vector uint64_vec; 188 | uint64_vec m_block_rank_pairs; 189 | uint64_vec m_select_hints; 190 | uint64_vec m_select0_hints; 191 | }; 192 | } 193 | -------------------------------------------------------------------------------- /succinct_config.hpp.in: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #cmakedefine SUCCINCT_USE_LIBCXX 1 4 | #ifndef SUCCINCT_USE_LIBCXX 5 | # define SUCCINCT_USE_LIBCXX 0 6 | #endif 7 | 8 | #cmakedefine SUCCINCT_USE_INTRINSICS 1 9 | #ifndef SUCCINCT_USE_INTRINSICS 10 | # define SUCCINCT_USE_INTRINSICS 0 11 | #endif 12 | 13 | #cmakedefine SUCCINCT_USE_POPCNT 1 14 | #ifndef SUCCINCT_USE_POPCNT 15 | # define SUCCINCT_USE_POPCNT 0 16 | #endif 17 | -------------------------------------------------------------------------------- /tables.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | namespace succinct { namespace tables { 6 | 7 | const uint8_t select_in_byte[2048] = { 8 | 8, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 5, 0, 1, 0, 2, 0, 1, 9 | 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 10 | 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 11 | 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 12 | 2, 0, 1, 0, 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 6, 0, 1, 13 | 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 14 | 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 8, 8, 8, 1, 8, 2, 2, 1, 8, 3, 3, 1, 3, 2, 2, 1, 8, 15 | 4, 4, 1, 4, 2, 2, 1, 4, 3, 3, 1, 3, 2, 2, 1, 8, 5, 5, 1, 5, 2, 2, 1, 5, 3, 3, 1, 3, 2, 2, 1, 5, 4, 4, 1, 4, 2, 2, 1, 16 | 4, 3, 3, 1, 3, 2, 2, 1, 8, 6, 6, 1, 6, 2, 2, 1, 6, 3, 3, 1, 3, 2, 2, 1, 6, 4, 4, 1, 4, 2, 2, 1, 4, 3, 3, 1, 3, 2, 2, 17 | 1, 6, 5, 5, 1, 5, 2, 2, 1, 5, 3, 3, 1, 3, 2, 2, 1, 5, 4, 4, 1, 4, 2, 2, 1, 4, 3, 3, 1, 3, 2, 2, 1, 8, 7, 7, 1, 7, 2, 18 | 2, 1, 7, 3, 3, 1, 3, 2, 2, 1, 7, 4, 4, 1, 4, 2, 2, 1, 4, 3, 3, 1, 3, 2, 2, 1, 7, 5, 5, 1, 5, 2, 2, 1, 5, 3, 3, 1, 3, 19 | 2, 2, 1, 5, 4, 4, 1, 4, 2, 2, 1, 4, 3, 3, 1, 3, 2, 2, 1, 7, 6, 6, 1, 6, 2, 2, 1, 6, 3, 3, 1, 3, 2, 2, 1, 6, 4, 4, 1, 20 | 4, 2, 2, 1, 4, 3, 3, 1, 3, 2, 2, 1, 6, 5, 5, 1, 5, 2, 2, 1, 5, 3, 3, 1, 3, 2, 2, 1, 5, 4, 4, 1, 4, 2, 2, 1, 4, 3, 3, 21 | 1, 3, 2, 2, 1, 8, 8, 8, 8, 8, 8, 8, 2, 8, 8, 8, 3, 8, 3, 3, 2, 8, 8, 8, 4, 8, 4, 4, 2, 8, 4, 4, 3, 4, 3, 3, 2, 8, 8, 22 | 8, 5, 8, 5, 5, 2, 8, 5, 5, 3, 5, 3, 3, 2, 8, 5, 5, 4, 5, 4, 4, 2, 5, 4, 4, 3, 4, 3, 3, 2, 8, 8, 8, 6, 8, 6, 6, 2, 8, 23 | 6, 6, 3, 6, 3, 3, 2, 8, 6, 6, 4, 6, 4, 4, 2, 6, 4, 4, 3, 4, 3, 3, 2, 8, 6, 6, 5, 6, 5, 5, 2, 6, 5, 5, 3, 5, 3, 3, 2, 24 | 6, 5, 5, 4, 5, 4, 4, 2, 5, 4, 4, 3, 4, 3, 3, 2, 8, 8, 8, 7, 8, 7, 7, 2, 8, 7, 7, 3, 7, 3, 3, 2, 8, 7, 7, 4, 7, 4, 4, 25 | 2, 7, 4, 4, 3, 4, 3, 3, 2, 8, 7, 7, 5, 7, 5, 5, 2, 7, 5, 5, 3, 5, 3, 3, 2, 7, 5, 5, 4, 5, 4, 4, 2, 5, 4, 4, 3, 4, 3, 26 | 3, 2, 8, 7, 7, 6, 7, 6, 6, 2, 7, 6, 6, 3, 6, 3, 3, 2, 7, 6, 6, 4, 6, 4, 4, 2, 6, 4, 4, 3, 4, 3, 3, 2, 7, 6, 6, 5, 6, 27 | 5, 5, 2, 6, 5, 5, 3, 5, 3, 3, 2, 6, 5, 5, 4, 5, 4, 4, 2, 5, 4, 4, 3, 4, 3, 3, 2, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 28 | 8, 8, 8, 3, 8, 8, 8, 8, 8, 8, 8, 4, 8, 8, 8, 4, 8, 4, 4, 3, 8, 8, 8, 8, 8, 8, 8, 5, 8, 8, 8, 5, 8, 5, 5, 3, 8, 8, 8, 29 | 5, 8, 5, 5, 4, 8, 5, 5, 4, 5, 4, 4, 3, 8, 8, 8, 8, 8, 8, 8, 6, 8, 8, 8, 6, 8, 6, 6, 3, 8, 8, 8, 6, 8, 6, 6, 4, 8, 6, 30 | 6, 4, 6, 4, 4, 3, 8, 8, 8, 6, 8, 6, 6, 5, 8, 6, 6, 5, 6, 5, 5, 3, 8, 6, 6, 5, 6, 5, 5, 4, 6, 5, 5, 4, 5, 4, 4, 3, 8, 31 | 8, 8, 8, 8, 8, 8, 7, 8, 8, 8, 7, 8, 7, 7, 3, 8, 8, 8, 7, 8, 7, 7, 4, 8, 7, 7, 4, 7, 4, 4, 3, 8, 8, 8, 7, 8, 7, 7, 5, 32 | 8, 7, 7, 5, 7, 5, 5, 3, 8, 7, 7, 5, 7, 5, 5, 4, 7, 5, 5, 4, 5, 4, 4, 3, 8, 8, 8, 7, 8, 7, 7, 6, 8, 7, 7, 6, 7, 6, 6, 33 | 3, 8, 7, 7, 6, 7, 6, 6, 4, 7, 6, 6, 4, 6, 4, 4, 3, 8, 7, 7, 6, 7, 6, 6, 5, 7, 6, 6, 5, 6, 5, 5, 3, 7, 6, 6, 5, 6, 5, 34 | 5, 4, 6, 5, 5, 4, 5, 4, 4, 3, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 35 | 8, 8, 4, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 5, 8, 8, 8, 8, 8, 8, 8, 5, 8, 8, 8, 5, 8, 5, 5, 4, 8, 8, 8, 8, 36 | 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 6, 8, 8, 8, 8, 8, 8, 8, 6, 8, 8, 8, 6, 8, 6, 6, 4, 8, 8, 8, 8, 8, 8, 8, 6, 8, 8, 8, 37 | 6, 8, 6, 6, 5, 8, 8, 8, 6, 8, 6, 6, 5, 8, 6, 6, 5, 6, 5, 5, 4, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 7, 8, 8, 38 | 8, 8, 8, 8, 8, 7, 8, 8, 8, 7, 8, 7, 7, 4, 8, 8, 8, 8, 8, 8, 8, 7, 8, 8, 8, 7, 8, 7, 7, 5, 8, 8, 8, 7, 8, 7, 7, 5, 8, 39 | 7, 7, 5, 7, 5, 5, 4, 8, 8, 8, 8, 8, 8, 8, 7, 8, 8, 8, 7, 8, 7, 7, 6, 8, 8, 8, 7, 8, 7, 7, 6, 8, 7, 7, 6, 7, 6, 6, 4, 40 | 8, 8, 8, 7, 8, 7, 7, 6, 8, 7, 7, 6, 7, 6, 6, 5, 8, 7, 7, 6, 7, 6, 6, 5, 7, 6, 6, 5, 6, 5, 5, 4, 8, 8, 8, 8, 8, 8, 8, 41 | 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 42 | 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 5, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 43 | 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 6, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 6, 8, 8, 8, 8, 8, 8, 8, 6, 8, 8, 8, 6, 44 | 8, 6, 6, 5, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 7, 8, 8, 8, 45 | 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 7, 8, 8, 8, 8, 8, 8, 8, 7, 8, 8, 8, 7, 8, 7, 7, 5, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 46 | 8, 8, 8, 8, 8, 7, 8, 8, 8, 8, 8, 8, 8, 7, 8, 8, 8, 7, 8, 7, 7, 6, 8, 8, 8, 8, 8, 8, 8, 7, 8, 8, 8, 7, 8, 7, 7, 6, 8, 47 | 8, 8, 7, 8, 7, 7, 6, 8, 7, 7, 6, 7, 6, 6, 5, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 48 | 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 49 | 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 50 | 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 6, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 51 | 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 52 | 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 53 | 8, 8, 8, 8, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 7, 8, 8, 8, 8, 8, 8, 8, 7, 8, 8, 8, 7, 8, 7, 7, 6, 8, 8, 54 | 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 55 | 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 56 | 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 57 | 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 58 | 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 59 | 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 60 | 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 7 61 | }; 62 | 63 | }} 64 | -------------------------------------------------------------------------------- /test_bit_vector.cpp: -------------------------------------------------------------------------------- 1 | #define BOOST_TEST_MODULE bit_vector 2 | #include "test_common.hpp" 3 | #include "test_rank_select_common.hpp" 4 | 5 | #include 6 | #include 7 | 8 | #include "mapper.hpp" 9 | #include "bit_vector.hpp" 10 | 11 | BOOST_AUTO_TEST_CASE(bit_vector) 12 | { 13 | srand(42); 14 | 15 | std::vector v = random_bit_vector(); 16 | 17 | { 18 | succinct::bit_vector_builder bvb; 19 | for (size_t i = 0; i < v.size(); ++i) { 20 | bvb.push_back(v[i]); 21 | } 22 | 23 | succinct::bit_vector bitmap(&bvb); 24 | test_equal_bits(v, bitmap, "Random bits (push_back)"); 25 | } 26 | 27 | { 28 | succinct::bit_vector_builder bvb(v.size()); 29 | for (size_t i = 0; i < v.size(); ++i) { 30 | bvb.set(i, v[i]); 31 | } 32 | bvb.push_back(0); 33 | v.push_back(0); 34 | bvb.push_back(1); 35 | v.push_back(1); 36 | 37 | succinct::bit_vector bitmap(&bvb); 38 | test_equal_bits(v, bitmap, "Random bits (set)"); 39 | } 40 | 41 | uint64_t ints[] = {uint64_t(-1), uint64_t(1) << 63, 1, 1, 1, 3, 5, 7, 0xFFF, 0xF0F, 1, 0xFFFFFF, 0x123456, uint64_t(1) << 63, uint64_t(-1)}; 42 | { 43 | succinct::bit_vector_builder bvb; 44 | BOOST_FOREACH(uint64_t i, ints) { 45 | uint64_t len = succinct::broadword::msb(i) + 1; 46 | bvb.append_bits(i, len); 47 | } 48 | succinct::bit_vector bitmap(&bvb); 49 | uint64_t pos = 0; 50 | BOOST_FOREACH(uint64_t i, ints) { 51 | uint64_t len = succinct::broadword::msb(i) + 1; 52 | BOOST_REQUIRE_EQUAL(i, bitmap.get_bits(pos, len)); 53 | pos += len; 54 | } 55 | } 56 | 57 | { 58 | using succinct::broadword::msb; 59 | std::vector positions(1); 60 | BOOST_FOREACH(uint64_t i, ints) { 61 | positions.push_back(positions.back() + msb(i) + 1); 62 | } 63 | 64 | succinct::bit_vector_builder bvb(positions.back()); 65 | 66 | for (size_t i = 0; i < positions.size() - 1; ++i) { 67 | uint64_t v = ints[i]; 68 | uint64_t len = positions[i + 1] - positions[i]; 69 | bvb.set_bits(positions[i], v, len); 70 | } 71 | 72 | succinct::bit_vector bitmap(&bvb); 73 | for (size_t i = 0; i < positions.size() - 1; ++i) { 74 | uint64_t v = ints[i]; 75 | uint64_t len = positions[i + 1] - positions[i]; 76 | BOOST_REQUIRE_EQUAL(v, bitmap.get_bits(positions[i], len)); 77 | } 78 | } 79 | } 80 | 81 | BOOST_AUTO_TEST_CASE(bit_vector_enumerator) 82 | { 83 | srand(42); 84 | std::vector v = random_bit_vector(); 85 | succinct::bit_vector bitmap(v); 86 | 87 | size_t i = 0; 88 | size_t pos = 0; 89 | 90 | succinct::bit_vector::enumerator e(bitmap, pos); 91 | while (pos < bitmap.size()) { 92 | bool next = e.next(); 93 | MY_REQUIRE_EQUAL(next, v[pos], "pos = " << pos << " i = " << i); 94 | pos += 1; 95 | 96 | pos += size_t(rand()) % (bitmap.size() - pos + 1); 97 | e = succinct::bit_vector::enumerator(bitmap, pos); 98 | i += 1; 99 | } 100 | } 101 | 102 | BOOST_AUTO_TEST_CASE(bit_vector_unary_enumerator) 103 | { 104 | srand(42); 105 | uint64_t n = 20000; 106 | std::vector v = random_bit_vector(n); 107 | 108 | // punch some long gaps in v 109 | for (size_t g = 0; g < n / 1000; ++g) { 110 | ssize_t l = std::min(ssize_t(rand() % 256), ssize_t(v.size() - g)); 111 | std::fill(v.begin(), v.begin() + l, 0); 112 | } 113 | 114 | succinct::bit_vector bitmap(v); 115 | 116 | std::vector ones; 117 | for (size_t i = 0; i < v.size(); ++i) { 118 | if (bitmap[i]) { 119 | ones.push_back(i); 120 | } 121 | } 122 | 123 | { 124 | succinct::bit_vector::unary_enumerator e(bitmap, 0); 125 | 126 | for (size_t r = 0; r < ones.size(); ++r) { 127 | uint64_t pos = e.next(); 128 | MY_REQUIRE_EQUAL(ones[r], pos, 129 | "r = " << r); 130 | } 131 | } 132 | 133 | { 134 | succinct::bit_vector::unary_enumerator e(bitmap, 0); 135 | 136 | for (size_t r = 0; r < ones.size(); ++r) { 137 | for (size_t k = 0; k < std::min(size_t(256), size_t(ones.size() - r)); ++k) { 138 | succinct::bit_vector::unary_enumerator ee(e); 139 | ee.skip(k); 140 | uint64_t pos = ee.next(); 141 | MY_REQUIRE_EQUAL(ones[r + k], pos, 142 | "r = " << r << " k = " << k); 143 | } 144 | e.next(); 145 | } 146 | } 147 | 148 | { 149 | succinct::bit_vector::unary_enumerator e(bitmap, 0); 150 | 151 | for (size_t r = 0; r < ones.size(); ++r) { 152 | for (size_t k = 0; k < std::min(size_t(256), size_t(ones.size() - r)); ++k) { 153 | succinct::bit_vector::unary_enumerator ee(e); 154 | uint64_t pos_skip = ee.skip_no_move(k); 155 | uint64_t pos = ee.next(); 156 | MY_REQUIRE_EQUAL(ones[r], pos, 157 | "r = " << r << " k = " << k); 158 | MY_REQUIRE_EQUAL(ones[r + k], pos_skip, 159 | "r = " << r << " k = " << k); 160 | 161 | } 162 | e.next(); 163 | } 164 | } 165 | 166 | { 167 | succinct::bit_vector::unary_enumerator e(bitmap, 0); 168 | 169 | for (size_t pos = 0; pos < v.size(); ++pos) { 170 | uint64_t skip = 0; 171 | for (size_t d = 0; d < std::min(size_t(256), size_t(v.size() - pos)); ++d) { 172 | if (v[pos + d] == 0) { 173 | succinct::bit_vector::unary_enumerator ee(bitmap, pos); 174 | ee.skip0(skip); 175 | 176 | uint64_t expected_pos = pos + d; 177 | for (; !v[expected_pos] && expected_pos < v.size(); ++expected_pos); 178 | if (!v[expected_pos]) break; 179 | uint64_t pos = ee.next(); 180 | MY_REQUIRE_EQUAL(expected_pos, pos, 181 | "pos = " << pos << " skip = " << skip); 182 | 183 | skip += 1; 184 | } 185 | } 186 | } 187 | } 188 | } 189 | 190 | void test_bvb_reverse(size_t n) 191 | { 192 | std::vector v = random_bit_vector(n); 193 | succinct::bit_vector_builder bvb; 194 | for (size_t i = 0; i < v.size(); ++i) { 195 | bvb.push_back(v[i]); 196 | } 197 | 198 | std::reverse(v.begin(), v.end()); 199 | bvb.reverse(); 200 | 201 | succinct::bit_vector bitmap(&bvb); 202 | test_equal_bits(v, bitmap, "In-place reverse"); 203 | } 204 | 205 | BOOST_AUTO_TEST_CASE(bvb_reverse) 206 | { 207 | srand(42); 208 | 209 | test_bvb_reverse(0); 210 | test_bvb_reverse(63); 211 | test_bvb_reverse(64); 212 | test_bvb_reverse(1000); 213 | test_bvb_reverse(1024); 214 | } 215 | -------------------------------------------------------------------------------- /test_bp_vector.cpp: -------------------------------------------------------------------------------- 1 | #define BOOST_TEST_MODULE bp_vector 2 | #include "test_common.hpp" 3 | 4 | #include 5 | #include 6 | 7 | #include "mapper.hpp" 8 | #include "bp_vector.hpp" 9 | #include "test_bp_vector_common.hpp" 10 | 11 | template 12 | void test_parentheses(std::vector const& v, BPVector const& bitmap, std::string test_name) 13 | { 14 | std::stack stack; 15 | std::vector open(v.size()); 16 | std::vector close(v.size()); 17 | std::vector enclose(v.size(), uint64_t(-1)); 18 | 19 | for (size_t i = 0; i < v.size(); ++i) { 20 | if (v[i]) { // opening 21 | if (!stack.empty()) { 22 | enclose[i] = stack.top(); 23 | } 24 | stack.push(i); 25 | } else { // closing 26 | BOOST_REQUIRE(!stack.empty()); // this is more a test on the test 27 | size_t opening = stack.top(); 28 | stack.pop(); 29 | close[opening] = i; 30 | open[i] = opening; 31 | 32 | } 33 | } 34 | BOOST_REQUIRE_EQUAL(0U, stack.size()); // ditto as above 35 | 36 | for (size_t i = 0; i < bitmap.size(); ++i) { 37 | if (v[i]) { // opening 38 | if (enclose[i] != uint64_t(-1)) { 39 | MY_REQUIRE_EQUAL(enclose[i], bitmap.enclose(i), 40 | "enclose (" << test_name << "): i = " << i); 41 | } 42 | MY_REQUIRE_EQUAL(close[i], bitmap.find_close(i), 43 | "find_close (" << test_name << "): i = " << i); 44 | } else { // closing 45 | MY_REQUIRE_EQUAL(open[i], bitmap.find_open(i), 46 | "find_open (" << test_name << "): i = " << i); 47 | } 48 | } 49 | } 50 | 51 | BOOST_AUTO_TEST_CASE(bp_vector) 52 | { 53 | srand(42); 54 | 55 | { 56 | std::vector v; 57 | succinct::bp_vector bitmap(v); 58 | test_parentheses(v, bitmap, "Empty vector"); 59 | } 60 | 61 | { 62 | std::vector v; 63 | succinct::random_bp(v, 100000); 64 | succinct::bp_vector bitmap(v); 65 | test_parentheses(v, bitmap, "Random parentheses"); 66 | } 67 | 68 | { 69 | size_t sizes[] = {2, 4, 512, 514, 8190, 8192, 8194, 16384, 16386, 100000}; 70 | for (size_t i = 0; i < sizeof(sizes) / sizeof(sizes[0]); ++i) { 71 | std::vector v; 72 | succinct::random_binary_tree(v, sizes[i]); 73 | succinct::bp_vector bitmap(v); 74 | test_parentheses(v, bitmap, "Random binary tree"); 75 | } 76 | } 77 | 78 | { 79 | size_t sizes[] = {2, 4, 512, 514, 8190, 8192, 8194, 16384, 16386, 32768, 32770}; 80 | size_t iterations[] = {1, 2, 3}; 81 | for (size_t s = 0; s < sizeof(sizes) / sizeof(sizes[0]); ++s) { 82 | for (size_t r = 0; r < sizeof(iterations) / sizeof(iterations[0]); ++r) { 83 | std::vector v; 84 | for (size_t i = 0; i < iterations[r]; ++i) { 85 | succinct::bp_path(v, sizes[s]); 86 | } 87 | succinct::bp_vector bitmap(v); 88 | test_parentheses(v, bitmap, "Nested parentheses"); 89 | } 90 | } 91 | } 92 | } 93 | -------------------------------------------------------------------------------- /test_bp_vector_common.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | namespace succinct { 4 | 5 | namespace detail { 6 | template 7 | void random_binary_tree_helper(BitVectorBuilder& builder, size_t size) 8 | { 9 | assert((size & 1) == 1); // binary trees can only have an odd number of nodes (internal + leaves) 10 | if (size == 1) { 11 | builder.push_back(0); // can only be a leaf 12 | return; 13 | } 14 | 15 | builder.push_back(1); 16 | size_t left_subtree_size = 2 * (size_t(rand()) % (size - 1) / 2) + 1; 17 | assert(left_subtree_size >= 1); 18 | size_t right_subtree_size = size - 1 - left_subtree_size; 19 | assert(right_subtree_size >= 1); 20 | assert(left_subtree_size + right_subtree_size + 1 == size); 21 | 22 | random_binary_tree_helper(builder, left_subtree_size); 23 | random_binary_tree_helper(builder, right_subtree_size); 24 | } 25 | } 26 | 27 | 28 | template 29 | void random_binary_tree(BitVectorBuilder& builder, size_t size) 30 | { 31 | assert((size & 1) == 0 && size >= 2); 32 | 33 | builder.push_back(1); // fake root 34 | detail::random_binary_tree_helper(builder, size - 1); 35 | } 36 | 37 | template 38 | void random_bp(BitVectorBuilder& builder, size_t size_est) 39 | { 40 | int excess = 0; 41 | for (size_t i = 0; i < size_est; ++i) { 42 | bool val = rand() > (RAND_MAX / 2); 43 | if (excess <= 1 && !val) { 44 | val = 1; 45 | } 46 | excess += (val ? 1 : -1); 47 | builder.push_back(val); 48 | } 49 | 50 | for (size_t i = 0; i < size_t(excess); ++i) { 51 | builder.push_back(0); // close all parentheses 52 | } 53 | } 54 | 55 | template 56 | void bp_path(BitVectorBuilder& builder, size_t size) 57 | { 58 | assert((size & 1) == 0); 59 | for (size_t i = 0; i < size / 2; ++i) { 60 | builder.push_back(1); 61 | } 62 | for (size_t i = 0; i < size / 2; ++i) { 63 | builder.push_back(0); 64 | } 65 | } 66 | 67 | 68 | } 69 | -------------------------------------------------------------------------------- /test_bp_vector_rmq.cpp: -------------------------------------------------------------------------------- 1 | #define BOOST_TEST_MODULE bp_vector_rmq 2 | #include "test_common.hpp" 3 | 4 | #include 5 | #include 6 | 7 | #include "mapper.hpp" 8 | #include "bp_vector.hpp" 9 | #include "test_bp_vector_common.hpp" 10 | 11 | 12 | template 13 | void test_rmq(std::vector const& v, BPVector const& bitmap, std::string test_name) 14 | { 15 | // XXX should we test this case? 16 | if (v.empty()) return; 17 | 18 | // test all values from a to v.size() for a in specific locations 19 | // plus a few random 20 | 21 | std::vector tests; 22 | tests.push_back(1); 23 | tests.push_back(8); 24 | tests.push_back(64); 25 | tests.push_back(8192); 26 | tests.push_back(v.size()); 27 | for (size_t t = 0; t < 10; ++t) { 28 | tests.push_back(size_t(rand()) % v.size()); 29 | } 30 | 31 | for(size_t t = 0; t < tests.size(); ++t) { 32 | uint64_t a = tests[t]; 33 | if (a > v.size()) continue; 34 | 35 | typename BPVector::enumerator bp_it(bitmap, a); 36 | typename BPVector::excess_t cur_exc = bitmap.excess(a); 37 | typename BPVector::excess_t min_exc = cur_exc, found_min_exc; 38 | uint64_t min_idx = a; 39 | 40 | BOOST_REQUIRE_EQUAL(min_idx, bitmap.excess_rmq(a, a, found_min_exc)); 41 | 42 | for (uint64_t b = a + 1; b < v.size(); ++b) { 43 | cur_exc += bp_it.next() ? 1 : -1; 44 | if (cur_exc < min_exc) { 45 | min_exc = cur_exc; 46 | min_idx = b; 47 | 48 | assert(min_exc == bitmap.excess(min_idx)); 49 | } 50 | 51 | MY_REQUIRE_EQUAL(min_idx, bitmap.excess_rmq(a, b, found_min_exc), 52 | "excess_rmq (" << test_name << "):" 53 | << " a = " << a 54 | << " b = " << b 55 | << " min_exc = " << min_exc 56 | << " found_min_exc = " << found_min_exc 57 | ); 58 | } 59 | } 60 | } 61 | 62 | BOOST_AUTO_TEST_CASE(bp_vector_rmq) 63 | { 64 | srand(42); 65 | 66 | { 67 | std::vector v; 68 | succinct::bp_vector bitmap(v); 69 | test_rmq(v, bitmap, "Empty vector"); 70 | } 71 | 72 | { 73 | std::vector v; 74 | succinct::random_bp(v, 100000); 75 | succinct::bp_vector bitmap(v); 76 | test_rmq(v, bitmap, "Random parentheses"); 77 | } 78 | 79 | { 80 | size_t sizes[] = {2, 4, 512, 514, 8190, 8192, 8194, 16384, 16386, 100000}; 81 | for (size_t i = 0; i < sizeof(sizes) / sizeof(sizes[0]); ++i) { 82 | std::vector v; 83 | succinct::random_binary_tree(v, sizes[i]); 84 | succinct::bp_vector bitmap(v); 85 | test_rmq(v, bitmap, "Random binary tree"); 86 | } 87 | } 88 | 89 | { 90 | size_t sizes[] = {2, 4, 512, 514, 8190, 8192, 8194, 16384, 16386, 32768, 32770}; 91 | size_t iterations[] = {1, 2, 3}; 92 | for (size_t s = 0; s < sizeof(sizes) / sizeof(sizes[0]); ++s) { 93 | for (size_t r = 0; r < sizeof(iterations) / sizeof(iterations[0]); ++r) { 94 | std::vector v; 95 | for (size_t i = 0; i < iterations[r]; ++i) { 96 | succinct::bp_path(v, sizes[s]); 97 | } 98 | succinct::bp_vector bitmap(v); 99 | test_rmq(v, bitmap, "Nested parentheses"); 100 | } 101 | } 102 | } 103 | } 104 | -------------------------------------------------------------------------------- /test_cartesian_tree.cpp: -------------------------------------------------------------------------------- 1 | #define BOOST_TEST_MODULE cartesian_tree 2 | #include "test_common.hpp" 3 | 4 | #include 5 | #include 6 | 7 | #include "mapper.hpp" 8 | #include "cartesian_tree.hpp" 9 | 10 | typedef uint64_t value_type; 11 | 12 | // XXX test (de)serialization 13 | 14 | template 15 | void test_rmq(std::vector const& v, succinct::cartesian_tree const& tree, 16 | Comparator const& comp, std::string test_name) 17 | { 18 | BOOST_REQUIRE_EQUAL(v.size(), tree.size()); 19 | 20 | if (v.empty()) return; 21 | 22 | std::vector tests; 23 | // A few special cases 24 | tests.push_back(0); 25 | tests.push_back(1); 26 | // This is the global minimum of the vector 27 | tests.push_back(uint64_t(std::min_element(v.begin(), v.end(), comp) - v.begin())); 28 | 29 | // Plus some random... 30 | for (size_t t = 0; t < 10; ++t) { 31 | tests.push_back(size_t(rand()) % v.size()); 32 | } 33 | 34 | for(size_t t = 0; t < tests.size(); ++t) { 35 | uint64_t a = tests[t]; 36 | if (a > v.size()) continue; 37 | 38 | uint64_t min_idx = a; 39 | value_type cur_min = v[a]; 40 | 41 | BOOST_REQUIRE_EQUAL(min_idx, tree.rmq(a, a)); 42 | 43 | for (uint64_t b = a + 1; b < v.size(); ++b) { 44 | if (comp(v[b], cur_min)) { 45 | cur_min = v[b]; 46 | min_idx = b; 47 | } 48 | 49 | uint64_t found_idx = tree.rmq(a, b); 50 | 51 | MY_REQUIRE_EQUAL(min_idx, found_idx, 52 | "rmq (" << test_name << "):" 53 | << " a = " << a 54 | << " b = " << b 55 | << " min = " << cur_min 56 | << " found_min = " << v[found_idx] 57 | ); 58 | } 59 | } 60 | } 61 | 62 | BOOST_AUTO_TEST_CASE(cartesian_tree) 63 | { 64 | srand(42); 65 | 66 | { 67 | std::vector v; 68 | succinct::cartesian_tree t(v); 69 | test_rmq(v, t, std::less(), "Empty vector"); 70 | } 71 | 72 | { 73 | std::vector v(20000); 74 | for (size_t i = 0; i < v.size(); ++i) { 75 | v[i] = i; 76 | } 77 | 78 | { 79 | succinct::cartesian_tree t(v); 80 | test_rmq(v, t, std::less(), "Increasing values"); 81 | } 82 | { 83 | succinct::cartesian_tree t(v, std::greater()); 84 | test_rmq(v, t, std::greater(), "Decreasing values"); 85 | } 86 | } 87 | 88 | { 89 | std::vector v(20000); 90 | for (size_t i = 0; i < v.size(); ++i) { 91 | if (i < v.size() / 2) { 92 | v[i] = i; 93 | } else { 94 | v[i] = v.size() - i; 95 | } 96 | } 97 | 98 | { 99 | succinct::cartesian_tree t(v); 100 | test_rmq(v, t, std::less(), "Convex values"); 101 | } 102 | 103 | { 104 | succinct::cartesian_tree t(v, std::greater()); 105 | test_rmq(v, t, std::greater(), "Concave values"); 106 | } 107 | } 108 | 109 | { 110 | size_t sizes[] = {2, 4, 512, 514, 8190, 8192, 8194, 16384, 16386, 100000}; 111 | for (size_t i = 0; i < sizeof(sizes) / sizeof(sizes[0]); ++i) { 112 | std::vector v(sizes[i]); 113 | for (size_t i = 0; i < v.size(); ++i) { 114 | v[i] = size_t(rand()) % 1024; 115 | } 116 | 117 | succinct::cartesian_tree t(v); 118 | test_rmq(v, t, std::less(), "Random values"); 119 | } 120 | } 121 | } 122 | -------------------------------------------------------------------------------- /test_common.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #define BOOST_TEST_DYN_LINK 4 | #include 5 | 6 | #include 7 | #include 8 | #include 9 | 10 | #define MY_REQUIRE_EQUAL(A, B, MSG) \ 11 | BOOST_REQUIRE_MESSAGE((A) == (B), BOOST_PP_STRINGIZE(A) << " == " << BOOST_PP_STRINGIZE(B) << " [" << A << " != " << B << "] " << MSG) 12 | 13 | inline std::vector random_bit_vector(size_t n = 10000, double density = 0.5) 14 | { 15 | std::vector v; 16 | for (size_t i = 0; i < n; ++i) { 17 | v.push_back(rand() < (RAND_MAX * density)); 18 | } 19 | return v; 20 | } 21 | -------------------------------------------------------------------------------- /test_darray.cpp: -------------------------------------------------------------------------------- 1 | #define BOOST_TEST_MODULE darray 2 | #include "test_common.hpp" 3 | #include "test_rank_select_common.hpp" 4 | 5 | #include 6 | #include 7 | 8 | #include "mapper.hpp" 9 | #include "darray.hpp" 10 | 11 | void test_darray(std::vector const& v, const char* test_name) 12 | { 13 | succinct::bit_vector bv(v); 14 | succinct::darray1 d1(bv); 15 | succinct::darray0 d0(bv); 16 | 17 | size_t cur_rank = 0; 18 | size_t cur_rank0 = 0; 19 | for (size_t i = 0; i < v.size(); ++i) { 20 | if (v[i]) { 21 | MY_REQUIRE_EQUAL(i, d1.select(bv, cur_rank), 22 | "select (" << test_name << "): cur_rank = " << cur_rank << ", i = " << i << ", v[i] = " << v[i]); 23 | cur_rank += 1; 24 | } else { 25 | MY_REQUIRE_EQUAL(i, d0.select(bv, cur_rank0), 26 | "select0 (" << test_name << "): cur_rank0 = " << cur_rank0 << ", i = " << i << ", v[i] = " << v[i]); 27 | cur_rank0 += 1; 28 | } 29 | } 30 | 31 | BOOST_REQUIRE_EQUAL(cur_rank, d1.num_positions()); 32 | BOOST_REQUIRE_EQUAL(cur_rank0, d0.num_positions()); 33 | } 34 | 35 | BOOST_AUTO_TEST_CASE(darray) 36 | { 37 | srand(42); 38 | size_t N = 10000; 39 | 40 | { 41 | // Random bitmap 42 | std::vector v = random_bit_vector(N); 43 | test_darray(v, "random"); 44 | } 45 | 46 | { 47 | // Empty bitmap 48 | std::vector v; 49 | test_darray(v, "empty"); 50 | } 51 | 52 | { 53 | // Only one value 54 | std::vector v(N); 55 | v[37] = 1; 56 | test_darray(v, "singleton"); 57 | } 58 | 59 | { 60 | // Full bitmap 61 | std::vector v(N, 1); 62 | test_darray(v, "full"); 63 | } 64 | 65 | { 66 | // Very sparse random bitmap 67 | size_t bigN = (1 << 16) * 4; 68 | std::vector v(bigN); 69 | size_t cur_pos = 0; 70 | while(cur_pos < bigN) { 71 | v[cur_pos] = 1; 72 | cur_pos += rand() % 1024; 73 | } 74 | test_darray(v, "sparse"); 75 | } 76 | } 77 | -------------------------------------------------------------------------------- /test_elias_fano.cpp: -------------------------------------------------------------------------------- 1 | #define BOOST_TEST_MODULE elias_fano 2 | #include "test_common.hpp" 3 | #include "test_rank_select_common.hpp" 4 | 5 | #include 6 | #include 7 | 8 | #include "mapper.hpp" 9 | #include "elias_fano.hpp" 10 | 11 | BOOST_AUTO_TEST_CASE(elias_fano) 12 | { 13 | srand(42); 14 | size_t N = 10000; 15 | 16 | { 17 | // Random bitmap 18 | for (size_t d = 1; d < 8; ++d) { 19 | double density = 1.0 / (1 << d); 20 | std::vector v = random_bit_vector(N, density); 21 | 22 | succinct::bit_vector_builder bvb; 23 | for (size_t i = 0; i < v.size(); ++i) { 24 | bvb.push_back(v[i]); 25 | } 26 | 27 | succinct::elias_fano bitmap(&bvb); 28 | test_equal_bits(v, bitmap, "Random bitmap"); 29 | test_rank_select1(v, bitmap, "Random bitmap"); 30 | test_delta(bitmap, "Random bitmap"); 31 | test_select_enumeration(v, bitmap, "Random bitmap"); 32 | } 33 | } 34 | 35 | { 36 | // Empty bitmap 37 | succinct::bit_vector_builder bvb(N); 38 | succinct::elias_fano bitmap(&bvb); 39 | BOOST_REQUIRE_EQUAL(0U, bitmap.num_ones()); 40 | test_equal_bits(std::vector(N), bitmap, "Empty bitmap"); 41 | test_select_enumeration(std::vector(N), bitmap, "Empty bitmap"); 42 | } 43 | 44 | { 45 | // Only one value 46 | std::vector v(N); 47 | succinct::bit_vector_builder bvb(N); 48 | bvb.set(37, 1); 49 | v[37] = 1; 50 | succinct::elias_fano bitmap(&bvb); 51 | test_equal_bits(v, bitmap, "Only one value"); 52 | test_rank_select1(v, bitmap, "Only one value"); 53 | test_delta(bitmap, "Only one value"); 54 | test_select_enumeration(v, bitmap, "Only one value"); 55 | BOOST_REQUIRE_EQUAL(1U, bitmap.num_ones()); 56 | } 57 | 58 | { 59 | // Full bitmap 60 | std::vector v(N, 1); 61 | succinct::bit_vector_builder bvb; 62 | for (size_t i = 0; i < N; ++i) { 63 | bvb.push_back(1); 64 | } 65 | succinct::elias_fano bitmap(&bvb); 66 | test_equal_bits(v, bitmap, "Full bitmap"); 67 | test_rank_select1(v, bitmap, "Full bitmap"); 68 | test_delta(bitmap, "Full bitmap"); 69 | test_select_enumeration(v, bitmap, "Full bitmap"); 70 | } 71 | } 72 | -------------------------------------------------------------------------------- /test_elias_fano_compressed_list.cpp: -------------------------------------------------------------------------------- 1 | #define BOOST_TEST_MODULE elias_fano_compressed_list 2 | #include "test_common.hpp" 3 | 4 | #include 5 | 6 | #include "elias_fano_compressed_list.hpp" 7 | 8 | BOOST_AUTO_TEST_CASE(elias_fano_compressed_list) 9 | { 10 | srand(42); 11 | const size_t test_size = 12345; 12 | 13 | std::vector v; 14 | 15 | for (size_t i = 0; i < test_size; ++i) { 16 | if (rand() < (RAND_MAX / 3)) { 17 | v.push_back(0); 18 | } else { 19 | v.push_back(size_t(rand())); 20 | } 21 | } 22 | 23 | succinct::elias_fano_compressed_list vv(v); 24 | 25 | BOOST_REQUIRE_EQUAL(v.size(), vv.size()); 26 | for (size_t i = 0; i < v.size(); ++i) { 27 | MY_REQUIRE_EQUAL(v[i], vv[i], "i = " << i); 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /test_gamma_bit_vector.cpp: -------------------------------------------------------------------------------- 1 | #define BOOST_TEST_MODULE gamma_bit_vector 2 | #include "test_common.hpp" 3 | 4 | #include 5 | 6 | #include "gamma_bit_vector.hpp" 7 | 8 | typedef std::vector std_vector_type; 9 | 10 | std_vector_type random_vector(size_t test_size) 11 | { 12 | std_vector_type v; 13 | 14 | for (size_t i = 0; i < test_size; ++i) { 15 | bool b = uint64_t(rand()) & 1; 16 | if (rand() < (RAND_MAX / 3)) { 17 | v.push_back(b); 18 | } else { 19 | v.push_back((uint64_t(rand()) << 1) | b); 20 | } 21 | } 22 | 23 | return v; 24 | } 25 | 26 | BOOST_AUTO_TEST_CASE(gamma_bit_vector) 27 | { 28 | srand(42); 29 | const size_t test_size = 12345; 30 | std_vector_type v = random_vector(test_size); 31 | 32 | succinct::gamma_bit_vector vv(v); 33 | 34 | BOOST_REQUIRE_EQUAL(v.size(), vv.size()); 35 | for (size_t i = 0; i < v.size(); ++i) { 36 | MY_REQUIRE_EQUAL(v[i], vv[i], "i = " << i); 37 | } 38 | } 39 | 40 | BOOST_AUTO_TEST_CASE(gamma_bit_enumerator) 41 | { 42 | srand(42); 43 | const size_t test_size = 12345; 44 | std_vector_type v = random_vector(test_size); 45 | 46 | succinct::gamma_bit_vector vv(v); 47 | 48 | size_t i = 0; 49 | size_t pos = 0; 50 | 51 | succinct::forward_enumerator e(vv, pos); 52 | while (pos < vv.size()) { 53 | succinct::gamma_bit_vector::value_type next = e.next(); 54 | MY_REQUIRE_EQUAL(next, v[pos], "pos = " << pos << " i = " << i); 55 | pos += 1; 56 | 57 | size_t step = uint64_t(rand()) % (vv.size() - pos + 1); 58 | pos += step; 59 | e = succinct::forward_enumerator(vv, pos); 60 | i += 1; 61 | } 62 | } 63 | -------------------------------------------------------------------------------- /test_gamma_vector.cpp: -------------------------------------------------------------------------------- 1 | #define BOOST_TEST_MODULE gamma_vector 2 | #include "test_common.hpp" 3 | 4 | #include 5 | 6 | #include "gamma_vector.hpp" 7 | 8 | BOOST_AUTO_TEST_CASE(gamma_vector) 9 | { 10 | srand(42); 11 | const size_t test_size = 12345; 12 | 13 | std::vector v; 14 | 15 | for (size_t i = 0; i < test_size; ++i) { 16 | if (rand() < (RAND_MAX / 3)) { 17 | v.push_back(0); 18 | } else { 19 | v.push_back(uint64_t(rand())); 20 | } 21 | } 22 | 23 | succinct::gamma_vector vv(v); 24 | 25 | BOOST_REQUIRE_EQUAL(v.size(), vv.size()); 26 | for (size_t i = 0; i < v.size(); ++i) { 27 | MY_REQUIRE_EQUAL(v[i], vv[i], "i = " << i); 28 | } 29 | } 30 | 31 | BOOST_AUTO_TEST_CASE(gamma_enumerator) 32 | { 33 | srand(42); 34 | const size_t test_size = 12345; 35 | 36 | std::vector v; 37 | 38 | for (size_t i = 0; i < test_size; ++i) { 39 | if (rand() < (RAND_MAX / 3)) { 40 | v.push_back(0); 41 | } else { 42 | v.push_back(uint64_t(rand())); 43 | } 44 | } 45 | 46 | succinct::gamma_vector vv(v); 47 | 48 | size_t i = 0; 49 | size_t pos = 0; 50 | 51 | succinct::forward_enumerator e(vv, pos); 52 | while (pos < vv.size()) { 53 | uint64_t next = e.next(); 54 | MY_REQUIRE_EQUAL(next, v[pos], "pos = " << pos << " i = " << i); 55 | pos += 1; 56 | 57 | size_t step = uint64_t(rand()) % (vv.size() - pos + 1); 58 | pos += step; 59 | e = succinct::forward_enumerator(vv, pos); 60 | i += 1; 61 | } 62 | } 63 | -------------------------------------------------------------------------------- /test_mapper.cpp: -------------------------------------------------------------------------------- 1 | #define BOOST_TEST_MODULE mapper 2 | #include "test_common.hpp" 3 | 4 | #include 5 | 6 | #include "mapper.hpp" 7 | 8 | BOOST_AUTO_TEST_CASE(basic_map) 9 | { 10 | succinct::mapper::mappable_vector vec; 11 | BOOST_REQUIRE_EQUAL(vec.size(), 0U); 12 | 13 | int nums[] = {1, 2, 3, 4}; 14 | vec.assign(nums); 15 | 16 | BOOST_REQUIRE_EQUAL(4U, vec.size()); 17 | BOOST_REQUIRE_EQUAL(1, vec[0]); 18 | BOOST_REQUIRE_EQUAL(4, vec[3]); 19 | 20 | succinct::mapper::freeze(vec, "temp.bin"); 21 | 22 | { 23 | succinct::mapper::mappable_vector mapped_vec; 24 | boost::iostreams::mapped_file_source m("temp.bin"); 25 | succinct::mapper::map(mapped_vec, m); 26 | BOOST_REQUIRE_EQUAL(vec.size(), mapped_vec.size()); 27 | BOOST_REQUIRE(std::equal(vec.begin(), vec.end(), mapped_vec.begin())); 28 | } 29 | 30 | boost::filesystem::remove("temp.bin"); 31 | } 32 | 33 | class complex_struct { 34 | public: 35 | complex_struct() 36 | : m_a(0) 37 | {} 38 | 39 | void init() { 40 | m_a = 42; 41 | uint32_t b[] = {1, 2}; 42 | m_b.assign(b); 43 | } 44 | 45 | template 46 | void map(Visitor& visit) { 47 | visit 48 | (m_a, "m_a") 49 | (m_b, "m_b") 50 | ; 51 | } 52 | 53 | uint64_t m_a; 54 | succinct::mapper::mappable_vector m_b; 55 | }; 56 | 57 | BOOST_AUTO_TEST_CASE(complex_struct_map) 58 | { 59 | complex_struct s; 60 | s.init(); 61 | succinct::mapper::freeze(s, "temp.bin"); 62 | 63 | BOOST_REQUIRE_EQUAL(24, succinct::mapper::size_of(s)); 64 | 65 | complex_struct mapped_s; 66 | BOOST_REQUIRE_EQUAL(0, mapped_s.m_a); 67 | BOOST_REQUIRE_EQUAL(0U, mapped_s.m_b.size()); 68 | 69 | { 70 | boost::iostreams::mapped_file_source m("temp.bin"); 71 | succinct::mapper::map(mapped_s, m); 72 | BOOST_REQUIRE_EQUAL(s.m_a, mapped_s.m_a); 73 | BOOST_REQUIRE_EQUAL(s.m_b.size(), mapped_s.m_b.size()); 74 | } 75 | 76 | boost::filesystem::remove("temp.bin"); 77 | } 78 | -------------------------------------------------------------------------------- /test_rank_select_common.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "test_common.hpp" 4 | 5 | template 6 | inline void test_equal_bits(std::vector const& v, Vector const& bitmap, const char* test_name) 7 | { 8 | BOOST_REQUIRE_EQUAL(v.size(), bitmap.size()); 9 | for (size_t i = 0; i < v.size(); ++i) { 10 | MY_REQUIRE_EQUAL((bool)v[i], bitmap[i], 11 | "operator[] (" << test_name << "): i=" << i); 12 | } 13 | } 14 | 15 | template 16 | void test_rank_select0(std::vector const& v, Vector const& bitmap, const char* test_name) 17 | { 18 | uint64_t cur_rank0 = 0; 19 | uint64_t last_zero = uint64_t(-1); 20 | 21 | for (size_t i = 0; i < v.size(); ++i) { 22 | MY_REQUIRE_EQUAL(cur_rank0, bitmap.rank0(i), 23 | "rank0 (" << test_name << "): cur_rank0 = " << cur_rank0 << ", i = " << i << ", v[i] = " << v[i]); 24 | if (!v[i]) { 25 | last_zero = i; 26 | MY_REQUIRE_EQUAL(last_zero, bitmap.select0(cur_rank0), 27 | "select0 (" << test_name << "): cur_rank0 = " << cur_rank0 << ", i = " << i << ", v[i] = " << v[i] << ", last_zero = " << last_zero); 28 | ++cur_rank0; 29 | } 30 | if (last_zero != uint64_t(-1)) { 31 | MY_REQUIRE_EQUAL(last_zero, bitmap.predecessor0(i), 32 | "predecessor0 (" << test_name << "): last_zero = " << last_zero <<", i = " << i << ",v[i] = " << v[i]); 33 | } 34 | } 35 | 36 | last_zero = uint64_t(-1); 37 | for (size_t i = v.size() - 1; i + 1 > 0; --i) { 38 | if (!v[i]) { 39 | last_zero = i; 40 | } 41 | 42 | if (last_zero != uint64_t(-1)) { 43 | MY_REQUIRE_EQUAL(last_zero, bitmap.successor0(i), 44 | "successor0 (" << test_name << "): last_zero = " << last_zero <<", i = " << i << ",v[i] = " << v[i]); 45 | } 46 | } 47 | } 48 | 49 | template 50 | void test_rank_select1(std::vector const& v, Vector const& bitmap, const char* test_name) 51 | { 52 | uint64_t cur_rank = 0; 53 | uint64_t last_one = uint64_t(-1); 54 | 55 | for (size_t i = 0; i < v.size(); ++i) { 56 | MY_REQUIRE_EQUAL(cur_rank, bitmap.rank(i), 57 | "rank (" << test_name << "): cur_rank = " << cur_rank << ", i = " << i << ", v[i] = " << v[i]); 58 | 59 | if (v[i]) { 60 | last_one = i; 61 | MY_REQUIRE_EQUAL(last_one, bitmap.select(cur_rank), 62 | "select (" << test_name << "): cur_rank = " << cur_rank << ", i = " << i << ", v[i] = " << v[i] << ", last_one = " << last_one); 63 | ++cur_rank; 64 | } 65 | 66 | if (last_one != uint64_t(-1)) { 67 | MY_REQUIRE_EQUAL(last_one, bitmap.predecessor1(i), 68 | "predecessor1 (" << test_name << "): last_one = " << last_one <<", i = " << i << ",v[i] = " << v[i]); 69 | } 70 | } 71 | 72 | last_one = uint64_t(-1); 73 | for (size_t i = v.size() - 1; i + 1 > 0; --i) { 74 | if (v[i]) { 75 | last_one = i; 76 | } 77 | 78 | if (last_one != uint64_t(-1)) { 79 | MY_REQUIRE_EQUAL(last_one, bitmap.successor1(i), 80 | "successor1 (" << test_name << "): last_one = " << last_one <<", i = " << i << ",v[i] = " << v[i]); 81 | } 82 | } 83 | } 84 | 85 | template 86 | void test_rank_select(std::vector const& v, Vector const& bitmap, const char* test_name) 87 | { 88 | test_rank_select0(v, bitmap, test_name); 89 | test_rank_select1(v, bitmap, test_name); 90 | } 91 | 92 | template 93 | void test_delta(Vector const& bitmap, const char* test_name) 94 | { 95 | for (size_t i = 0; i < bitmap.num_ones(); ++i) { 96 | if (i) { 97 | MY_REQUIRE_EQUAL(bitmap.select(i) - bitmap.select(i - 1), 98 | bitmap.delta(i), 99 | "delta (" << test_name << "), i = " << i); 100 | } else { 101 | MY_REQUIRE_EQUAL(bitmap.select(i), 102 | bitmap.delta(i), 103 | "delta (" << test_name << "), i = " << i); 104 | } 105 | } 106 | } 107 | 108 | template 109 | void test_select_enumeration(std::vector const& v, Vector const& bitmap, const char* test_name) 110 | { 111 | // XXX test other starting points 112 | typename Vector::select_enumerator it(bitmap, 0); 113 | 114 | for (size_t i = 0; i < v.size(); ++i) { 115 | if (v[i]) { 116 | uint64_t res = it.next(); 117 | MY_REQUIRE_EQUAL(i, 118 | res, 119 | "select_iterator next (" << test_name << "), i = " << i 120 | << ", n = " << bitmap.size() << ", m = " << bitmap.num_ones()); 121 | } 122 | } 123 | } 124 | -------------------------------------------------------------------------------- /test_rs_bit_vector.cpp: -------------------------------------------------------------------------------- 1 | #define BOOST_TEST_MODULE rs_bit_vector 2 | #include "test_common.hpp" 3 | #include "test_rank_select_common.hpp" 4 | 5 | #include 6 | #include 7 | 8 | #include "mapper.hpp" 9 | #include "rs_bit_vector.hpp" 10 | 11 | BOOST_AUTO_TEST_CASE(rs_bit_vector) 12 | { 13 | srand(42); 14 | 15 | // empty vector 16 | std::vector v; 17 | succinct::rs_bit_vector bitmap; 18 | 19 | succinct::rs_bit_vector(v).swap(bitmap); 20 | BOOST_REQUIRE_EQUAL(v.size(), bitmap.size()); 21 | succinct::rs_bit_vector(v, true).swap(bitmap); 22 | BOOST_REQUIRE_EQUAL(v.size(), bitmap.size()); 23 | 24 | // random vector 25 | v = random_bit_vector(); 26 | 27 | succinct::rs_bit_vector(v).swap(bitmap); 28 | BOOST_REQUIRE_EQUAL(v.size(), bitmap.size()); 29 | test_equal_bits(v, bitmap, "RS - Uniform bits"); 30 | test_rank_select(v, bitmap, "Uniform bits"); 31 | 32 | succinct::rs_bit_vector(v, true, true).swap(bitmap); 33 | test_rank_select(v, bitmap, "Uniform bits - with hints"); 34 | 35 | v.resize(10000); 36 | v[9999] = 1; 37 | v[9000] = 1; 38 | succinct::rs_bit_vector(v).swap(bitmap); 39 | 40 | BOOST_REQUIRE_EQUAL(v.size(), bitmap.size()); 41 | test_rank_select(v, bitmap, "Long runs of 0"); 42 | succinct::rs_bit_vector(v, true, true).swap(bitmap); 43 | test_rank_select(v, bitmap, "Long runs of 0 - with hints"); 44 | 45 | // corner cases 46 | v.clear(); 47 | v.resize(10000); 48 | v[0] = 1; 49 | v[511] = 1; 50 | v[512] = 1; 51 | v[1024] = 1; 52 | v[2112] = 1; 53 | succinct::rs_bit_vector(v).swap(bitmap); 54 | 55 | BOOST_REQUIRE_EQUAL(v.size(), bitmap.size()); 56 | test_rank_select(v, bitmap, "Corner cases"); 57 | succinct::rs_bit_vector(v, true).swap(bitmap); 58 | test_rank_select(v, bitmap, "Corner cases - with hints"); 59 | } 60 | -------------------------------------------------------------------------------- /test_topk_vector.cpp: -------------------------------------------------------------------------------- 1 | #define BOOST_TEST_MODULE topk_vector 2 | #include "test_common.hpp" 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | #include "mapper.hpp" 9 | #include "mappable_vector.hpp" 10 | #include "topk_vector.hpp" 11 | #include "elias_fano_compressed_list.hpp" 12 | 13 | typedef uint64_t value_type; 14 | 15 | // XXX test (de)serialization 16 | 17 | struct value_index_comparator { 18 | template 19 | bool operator()(Tuple const& a, Tuple const& b) const 20 | { 21 | using boost::get; 22 | // lexicographic, decreasing on value and increasing 23 | // on index 24 | return (get<0>(a) > get<0>(b) || 25 | (get<0>(a) == get<0>(b) && 26 | get<1>(a) < get<1>(b))); 27 | } 28 | }; 29 | 30 | template 31 | void test_topk(std::vector const& v, TopKVector const& topkv, std::string /* test_name */) 32 | { 33 | BOOST_REQUIRE_EQUAL(v.size(), topkv.size()); 34 | 35 | if (v.empty()) return; 36 | 37 | // test random pairs 38 | const size_t sample_size = 100; 39 | typedef std::pair range_pair; 40 | std::vector pairs_sample; 41 | for (size_t i = 0; i < sample_size; ++i) { 42 | uint64_t a = size_t(rand()) % v.size(); 43 | uint64_t b = a + (size_t(rand()) % (v.size() - a)); 44 | pairs_sample.push_back(range_pair(a, b)); 45 | } 46 | 47 | typedef typename TopKVector::entry_type entry_type; 48 | 49 | size_t k = 10; 50 | 51 | for (size_t i = 0; i < pairs_sample.size(); ++i) { 52 | range_pair r = pairs_sample[i]; 53 | uint64_t a = r.first, b = r.second; 54 | 55 | std::vector expected; 56 | for (uint64_t i = a; i <= b; ++i) { 57 | expected.push_back(entry_type(v[i], i)); 58 | } 59 | std::sort(expected.begin(), expected.end(), value_index_comparator()); // XXX 60 | expected.resize(std::min(expected.size(), k)); 61 | 62 | std::vector found = topkv.topk(a, b, k); 63 | 64 | BOOST_REQUIRE_EQUAL_COLLECTIONS(expected.begin(), expected.end(), 65 | found.begin(), found.end()); 66 | } 67 | } 68 | 69 | BOOST_AUTO_TEST_CASE(topk_vector) 70 | { 71 | srand(42); 72 | 73 | //typedef succinct::topk_vector > topk_type; 74 | typedef succinct::topk_vector topk_type; 75 | 76 | { 77 | std::vector v; 78 | topk_type t(v); 79 | test_topk(v, t, "Empty vector"); 80 | } 81 | 82 | { 83 | std::vector v(20000); 84 | for (size_t i = 0; i < v.size(); ++i) { 85 | if (i < v.size() / 2) { 86 | v[i] = i; 87 | } else { 88 | v[i] = v.size() - i; 89 | } 90 | } 91 | 92 | { 93 | topk_type t(v); 94 | test_topk(v, t, "Convex values"); 95 | } 96 | } 97 | 98 | { 99 | size_t sizes[] = {2, 4, 512, 514, 8190, 8192, 8194, 16384, 16386, 100000}; 100 | for (size_t i = 0; i < sizeof(sizes) / sizeof(sizes[0]); ++i) { 101 | std::vector v(sizes[i]); 102 | for (size_t i = 0; i < v.size(); ++i) { 103 | v[i] = size_t(rand()) % 1024; 104 | } 105 | 106 | topk_type t(v); 107 | test_topk(v, t, "Random values"); 108 | } 109 | } 110 | } 111 | -------------------------------------------------------------------------------- /topk_vector.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | #include 7 | #include 8 | 9 | #include "cartesian_tree.hpp" 10 | 11 | namespace succinct { 12 | 13 | // XXX(ot): implement arbitrary comparator 14 | template 15 | class topk_vector : boost::noncopyable { 16 | public: 17 | typedef Vector vector_type; 18 | typedef typename vector_type::value_type value_type; 19 | typedef boost::tuple entry_type; 20 | typedef std::vector entry_vector_type; 21 | 22 | topk_vector() 23 | {} 24 | 25 | template 26 | topk_vector(Range const& v) 27 | { 28 | cartesian_tree(v, std::greater::type>()) 29 | .swap(m_cartesian_tree); 30 | vector_type(v).swap(m_v); 31 | } 32 | 33 | value_type const 34 | operator[](uint64_t idx) const 35 | { 36 | return m_v[idx]; 37 | } 38 | 39 | uint64_t size() const 40 | { 41 | return m_v.size(); 42 | } 43 | 44 | class enumerator 45 | { 46 | public: 47 | enumerator() 48 | : m_topkv(0) 49 | {} 50 | 51 | bool next() 52 | { 53 | using boost::tie; 54 | if (m_q.empty()) return false; 55 | 56 | value_type cur_mid_val; 57 | uint64_t cur_mid, cur_a, cur_b; 58 | 59 | std::pop_heap(m_q.begin(), m_q.end(), value_index_comparator()); 60 | tie(cur_mid_val, cur_mid, cur_a, cur_b) = m_q.back(); 61 | m_q.pop_back(); 62 | 63 | m_cur = entry_type(cur_mid_val, cur_mid); 64 | 65 | if (cur_mid != cur_a) { 66 | uint64_t m = m_topkv->m_cartesian_tree.rmq(cur_a, cur_mid - 1); 67 | m_q.push_back(queue_element_type(m_topkv->m_v[m], m, cur_a, cur_mid - 1)); 68 | std::push_heap(m_q.begin(), m_q.end(), value_index_comparator()); 69 | } 70 | 71 | if (cur_mid != cur_b) { 72 | uint64_t m = m_topkv->m_cartesian_tree.rmq(cur_mid + 1, cur_b); 73 | m_q.push_back(queue_element_type(m_topkv->m_v[m], m, cur_mid + 1, cur_b)); 74 | std::push_heap(m_q.begin(), m_q.end(), value_index_comparator()); 75 | } 76 | 77 | return true; 78 | } 79 | 80 | entry_type const& value() const 81 | { 82 | return m_cur; 83 | } 84 | 85 | friend class topk_vector; 86 | 87 | void swap(enumerator& other) 88 | { 89 | using std::swap; 90 | swap(m_topkv, other.m_topkv); 91 | swap(m_q, other.m_q); 92 | swap(m_cur, other.m_cur); 93 | } 94 | 95 | private: 96 | 97 | void set(topk_vector const* topkv, uint64_t a, uint64_t b) 98 | { 99 | assert(a <= b); 100 | clear(); 101 | m_topkv = topkv; 102 | 103 | uint64_t m = m_topkv->m_cartesian_tree.rmq(a, b); 104 | m_q.push_back(queue_element_type(m_topkv->m_v[m], m, a, b)); 105 | } 106 | 107 | typedef boost::tuple queue_element_type; 108 | 109 | struct value_index_comparator { 110 | template 111 | bool operator()(Tuple const& a, Tuple const& b) const 112 | { 113 | using boost::get; 114 | // lexicographic, increasing on value and decreasing 115 | // on index 116 | return (get<0>(a) < get<0>(b) || 117 | (get<0>(a) == get<0>(b) && 118 | get<1>(a) > get<1>(b))); 119 | } 120 | }; 121 | 122 | public: 123 | void clear() 124 | { 125 | m_topkv = 0; 126 | m_q.clear(); 127 | } 128 | 129 | private: 130 | topk_vector const* m_topkv; 131 | std::vector m_q; 132 | entry_type m_cur; 133 | }; 134 | 135 | // NOTE this is b inclusive 136 | // XXX switch to [a, b) ? 137 | void get_topk_enumerator(uint64_t a, uint64_t b, enumerator& ret) const 138 | { 139 | ret.set(this, a, b); 140 | } 141 | 142 | enumerator get_topk_enumerator(uint64_t a, uint64_t b) const 143 | { 144 | enumerator ret; 145 | get_topk_enumerator(a, b, ret); 146 | return ret; 147 | } 148 | 149 | entry_vector_type 150 | topk(uint64_t a, uint64_t b, size_t k) const 151 | { 152 | entry_vector_type ret(std::min(size_t(b - a + 1), k)); 153 | enumerator it = get_topk_enumerator(a, b); 154 | 155 | bool hasnext; 156 | for (size_t i = 0; i < ret.size(); ++i) { 157 | hasnext = it.next(); 158 | assert(hasnext); (void)hasnext; 159 | ret[i] = it.value(); 160 | } 161 | 162 | assert(ret.size() == k || !it.next()); 163 | 164 | return ret; 165 | } 166 | 167 | 168 | template 169 | void map(Visitor& visit) 170 | { 171 | visit 172 | (m_v, "m_v") 173 | (m_cartesian_tree, "m_cartesian_tree"); 174 | } 175 | 176 | void swap(topk_vector& other) 177 | { 178 | other.m_v.swap(m_v); 179 | other.m_cartesian_tree.swap(m_cartesian_tree); 180 | } 181 | 182 | protected: 183 | 184 | vector_type m_v; 185 | cartesian_tree m_cartesian_tree; 186 | }; 187 | 188 | } 189 | -------------------------------------------------------------------------------- /util.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | #include 9 | 10 | #include 11 | #include 12 | 13 | namespace succinct { namespace util { 14 | 15 | inline void trim_newline_chars(std::string& s) 16 | { 17 | size_t l = s.size(); 18 | while (l && (s[l-1] == '\r' || 19 | s[l-1] == '\n')) { 20 | --l; 21 | } 22 | s.resize(l); 23 | } 24 | 25 | // this is considerably faster than std::getline 26 | inline bool fast_getline(std::string& line, FILE* input = stdin, bool trim_newline = false) 27 | { 28 | line.clear(); 29 | static const size_t max_buffer = 65536; 30 | char buffer[max_buffer]; 31 | bool done = false; 32 | while (!done) { 33 | if (!fgets(buffer, max_buffer, input)) { 34 | if (!line.size()) { 35 | return false; 36 | } else { 37 | done = true; 38 | } 39 | } 40 | line += buffer; 41 | if (*line.rbegin() == '\n') { 42 | done = true; 43 | } 44 | } 45 | if (trim_newline) { 46 | trim_newline_chars(line); 47 | } 48 | return true; 49 | } 50 | 51 | class line_iterator 52 | : public boost::iterator_facade 56 | { 57 | public: 58 | line_iterator() 59 | : m_file(0) 60 | {} 61 | 62 | explicit line_iterator(FILE* input, bool trim_newline = false) 63 | : m_file(input) 64 | , m_trim_newline(trim_newline) 65 | {} 66 | 67 | private: 68 | friend class boost::iterator_core_access; 69 | 70 | void increment() { 71 | assert(m_file); 72 | if (!fast_getline(m_line, m_file, m_trim_newline)) { 73 | m_file = 0; 74 | } 75 | } 76 | 77 | bool equal(line_iterator const& other) const 78 | { 79 | return this->m_file == other.m_file; 80 | } 81 | 82 | std::string const& dereference() const { 83 | return m_line; 84 | } 85 | 86 | std::string m_line; 87 | FILE* m_file; 88 | bool m_trim_newline; 89 | }; 90 | 91 | typedef std::pair line_range_t; 92 | 93 | inline line_range_t lines(FILE* ifs, bool trim_newline = false) { 94 | return std::make_pair(line_iterator(ifs, trim_newline), line_iterator()); 95 | } 96 | 97 | struct auto_file { 98 | 99 | auto_file(const char* name, const char* mode = "rb") 100 | : m_file(0) 101 | { 102 | m_file = fopen(name, mode); 103 | if(!m_file) { 104 | std::string msg("Unable to open file '"); 105 | msg += name; 106 | msg += "'."; 107 | throw std::invalid_argument(msg); 108 | 109 | } 110 | } 111 | 112 | ~auto_file() 113 | { 114 | if(m_file) { 115 | fclose(m_file); 116 | } 117 | } 118 | 119 | FILE* get() 120 | { 121 | return m_file; 122 | } 123 | 124 | private: 125 | auto_file(); 126 | auto_file( const auto_file & ); 127 | auto_file & operator=( const auto_file & ); 128 | 129 | FILE * m_file; 130 | }; 131 | 132 | typedef std::pair char_range; 133 | 134 | struct identity_adaptor 135 | { 136 | char_range operator()(char_range s) const 137 | { 138 | return s; 139 | } 140 | }; 141 | 142 | struct stl_string_adaptor 143 | { 144 | char_range operator()(std::string const& s) const 145 | { 146 | const uint8_t* buf = reinterpret_cast(s.c_str()); 147 | const uint8_t* end = buf + s.size() + 1; // add the null terminator 148 | return char_range(buf, end); 149 | } 150 | }; 151 | 152 | class buffer_line_iterator 153 | : public boost::iterator_facade 157 | { 158 | public: 159 | buffer_line_iterator() 160 | : m_buffer(0) 161 | , m_end(0) 162 | , m_cur_pos(0) 163 | {} 164 | 165 | buffer_line_iterator(const char* buffer, size_t size) 166 | : m_buffer(buffer) 167 | , m_end(buffer + size) 168 | , m_cur_pos(buffer) 169 | { 170 | increment(); 171 | } 172 | 173 | private: 174 | friend class boost::iterator_core_access; 175 | 176 | void increment() { 177 | assert(m_cur_pos); 178 | if (m_cur_pos >= m_end) { 179 | m_cur_pos = 0; 180 | return; 181 | } 182 | const char* begin = m_cur_pos; 183 | while (m_cur_pos < m_end && *m_cur_pos != '\n') { 184 | ++m_cur_pos; 185 | } 186 | const char* end = m_cur_pos; 187 | ++m_cur_pos; // skip the newline 188 | 189 | if (begin != end && *(end - 1) == '\r') { 190 | --end; 191 | } 192 | m_cur_value = std::string(begin, size_t(end - begin)); 193 | } 194 | 195 | bool equal(buffer_line_iterator const& other) const 196 | { 197 | return m_cur_pos == other.m_cur_pos; } 198 | 199 | std::string const& dereference() const 200 | { 201 | assert(m_cur_pos); 202 | return m_cur_value; 203 | } 204 | 205 | const char* m_buffer; 206 | const char* m_end; 207 | const char* m_cur_pos; 208 | std::string m_cur_value; 209 | }; 210 | 211 | struct mmap_lines 212 | { 213 | typedef buffer_line_iterator iterator; 214 | typedef buffer_line_iterator const_iterator; 215 | 216 | mmap_lines(std::string filename) 217 | : m_map(filename) 218 | {} 219 | 220 | const_iterator begin() const 221 | { 222 | return const_iterator(m_map.data(), m_map.size()); 223 | } 224 | 225 | const_iterator end() const 226 | { 227 | return const_iterator(); 228 | } 229 | 230 | private: 231 | boost::iostreams::mapped_file_source m_map; 232 | }; 233 | 234 | struct input_error : std::invalid_argument 235 | { 236 | input_error(std::string const& what) 237 | : invalid_argument(what) 238 | {} 239 | }; 240 | 241 | template 242 | inline void dispose(T& t) 243 | { 244 | T().swap(t); 245 | } 246 | 247 | inline uint64_t int2nat(int64_t x) 248 | { 249 | if (x < 0) { 250 | return uint64_t(-2 * x - 1); 251 | } else { 252 | return uint64_t(2 * x); 253 | } 254 | } 255 | 256 | inline int64_t nat2int(uint64_t n) 257 | { 258 | if (n % 2) { 259 | return -int64_t((n + 1) / 2); 260 | } else { 261 | return int64_t(n / 2); 262 | } 263 | } 264 | 265 | template 266 | inline IntType1 ceil_div(IntType1 dividend, IntType2 divisor) 267 | { 268 | // XXX(ot): put some static check that IntType1 >= IntType2 269 | IntType1 d = IntType1(divisor); 270 | return IntType1(dividend + d - 1) / d; 271 | } 272 | 273 | }} 274 | -------------------------------------------------------------------------------- /vbyte.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "broadword.hpp" 4 | 5 | namespace succinct { 6 | 7 | inline size_t vbyte_size(size_t val) 8 | { 9 | unsigned long bits; 10 | if (!broadword::msb(val, bits)) bits = 0; 11 | return util::ceil_div(bits + 1, 7); 12 | } 13 | 14 | template 15 | inline size_t append_vbyte(Vector& v, size_t val) 16 | { 17 | size_t chunks = vbyte_size(val); 18 | for (size_t b = chunks - 1; b + 1 > 0; --b) { 19 | uint8_t chunk = (val >> (b * 7)) & 0x7F; 20 | chunk |= b ? 0x80 : 0; 21 | v.push_back(chunk); 22 | } 23 | return chunks; 24 | } 25 | 26 | template 27 | inline size_t decode_vbyte(Vector const& v, size_t offset, size_t& val) 28 | { 29 | size_t pos = offset; 30 | val = 0; 31 | uint8_t chunk; 32 | do { 33 | chunk = v[pos++]; 34 | val <<= 7; 35 | val |= chunk & 0x7F; 36 | } while (chunk & 0x80); 37 | 38 | return pos - offset; 39 | } 40 | 41 | } 42 | --------------------------------------------------------------------------------