├── .clang-format ├── .gitignore ├── .travis.yml ├── CMakeLists.txt ├── CONTRIBUTING.md ├── EXAMPLES.md ├── LICENSE ├── README.md ├── docs ├── .gitattributes ├── CMakeLists.txt ├── tw_bitmap.2 └── tw_bitmap.2.md ├── include ├── CMakeLists.txt ├── twiddle.h └── twiddle │ ├── bitmap │ ├── bitmap.h │ └── bitmap_rle.h │ ├── bloomfilter │ ├── bloomfilter.h │ └── bloomfilter_a2.h │ ├── hash │ └── minhash.h │ ├── hyperloglog │ └── hyperloglog.h │ └── utils │ ├── hash.h │ └── projection.h ├── python ├── setup.py ├── tests │ ├── test_bitmap.py │ ├── test_bitmap_rle.py │ ├── test_bloomfilter.py │ ├── test_bloomfilter_a2.py │ ├── test_helpers.py │ ├── test_hyperloglog.py │ └── test_minhash.py └── twiddle │ ├── __init__.py │ ├── bitmap.py │ ├── bitmap_rle.py │ ├── bloomfilter.py │ ├── bloomfilter_a2.py │ ├── c.py │ ├── hyperloglog.py │ └── minhash.py ├── src ├── CMakeLists.txt ├── libtwiddle.pc.in └── twiddle │ ├── bitmap │ ├── bitmap.c │ └── bitmap_rle.c │ ├── bloomfilter │ ├── bloomfilter.c │ └── bloomfilter_a2.c │ ├── hash │ └── minhash.c │ ├── hyperloglog │ ├── hyperloglog.c │ ├── hyperloglog_bias.c │ └── hyperloglog_simd.c │ ├── macrology.h │ └── utils │ ├── hash.c │ ├── internal.h │ ├── metrohash.c │ └── murmur3.c ├── tests ├── .gitattributes ├── CMakeLists.txt ├── benchmarks │ ├── CMakeLists.txt │ ├── bench-bitmap.c │ ├── bench-bloomfilter.c │ ├── bench-minhash.c │ └── benchmark.h ├── check │ ├── CMakeLists.txt │ ├── COPYING.LESSER │ ├── Makefile.am │ ├── check.c │ ├── check.h │ ├── check.pc.in │ ├── check_error.c │ ├── check_error.h │ ├── check_impl.h │ ├── check_list.c │ ├── check_list.h │ ├── check_log.c │ ├── check_log.h │ ├── check_msg.c │ ├── check_msg.h │ ├── check_pack.c │ ├── check_pack.h │ ├── check_print.c │ ├── check_print.h │ ├── check_run.c │ ├── check_str.c │ ├── check_str.h │ └── libcompat.h ├── examples │ ├── CMakeLists.txt │ ├── bf-uniq.c │ ├── bf-uniq.t │ ├── example-bitmap-rle.c │ ├── example-bitmap.c │ ├── example-bloomfilter-a2.c │ ├── example-bloomfilter.c │ ├── example-hyperloglog.c │ ├── example-minhash.c │ ├── hll-wc.c │ └── hll-wc.t ├── test-bitmap-rle.c ├── test-bitmap.c ├── test-bloomfilter-a2.c ├── test-bloomfilter.c ├── test-hyperloglog.c ├── test-minhash.c └── test.h └── tools ├── cmake ├── FindCTargets.cmake ├── FindLTO.cmake ├── FindOptions.cmake ├── FindParseArguments.cmake ├── FindPrereqs.cmake └── FindVersion.cmake ├── cram ├── COPYING.cram.txt ├── ccram └── cram.py ├── docurium ├── Gemfile ├── api.docurium └── gen-doc ├── git └── hook-clang-format └── travis └── test /.clang-format: -------------------------------------------------------------------------------- 1 | Language: Cpp 2 | BasedOnStyle: LLVM 3 | IndentWidth: 2 4 | ColumnLimit: 80 5 | UseTab: Never 6 | BreakBeforeBraces: Linux 7 | BinPackArguments: true 8 | BinPackParameters: true 9 | AllowShortIfStatementsOnASingleLine: false 10 | IndentCaseLabels: false 11 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /build*/ 2 | /.build/ 3 | /tmp/ 4 | /python/.venv/ 5 | /python/.hypothesis/ 6 | /python/.cache/ 7 | 8 | 9 | libtwiddle.pc 10 | .gdb_history 11 | 12 | tools/docurium/.bundle/ 13 | tools/docurium/Gemfile.lock 14 | tools/docurium/bin/ 15 | tools/docurium/vendor/ 16 | tools/docurium/mkmf.log 17 | 18 | /libtwiddle*.tar.gz 19 | /libtwiddle*.tar.bz2 20 | 21 | *.[oa] 22 | *.os 23 | *.so 24 | *.dylib 25 | *.pyc 26 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | sudo: required 2 | dist: trusty 3 | 4 | matrix: 5 | include: 6 | - env: GCC_VERSION=4.8 7 | addons: &gcc48 8 | apt: 9 | packages: 10 | - gcc-4.8 11 | - g++-4.8 12 | - valgrind 13 | - check 14 | - pkg-config 15 | - cmake 16 | - cmake-data 17 | - python-virtualenv 18 | sources: 19 | - ubuntu-toolchain-r-test 20 | - george-edison55-precise-backports 21 | 22 | - env: GCC_VERSION=4.9 23 | addons: &gcc49 24 | apt: 25 | packages: 26 | - gcc-4.9 27 | - g++-4.9 28 | - valgrind 29 | - check 30 | - pkg-config 31 | - cmake 32 | - cmake-data 33 | - python-virtualenv 34 | sources: 35 | - ubuntu-toolchain-r-test 36 | - george-edison55-precise-backports 37 | 38 | - env: GCC_VERSION=5 39 | addons: &gcc50 40 | apt: 41 | packages: 42 | - gcc-5 43 | - g++-5 44 | - valgrind 45 | - check 46 | - pkg-config 47 | - cmake 48 | - cmake-data 49 | - python-virtualenv 50 | sources: 51 | - ubuntu-toolchain-r-test 52 | - george-edison55-precise-backports 53 | 54 | - env: CLANG_VERSION=3.5 55 | addons: &clang35 56 | apt: 57 | packages: 58 | - clang-3.5 59 | - llvm-3.5 60 | - valgrind 61 | - check 62 | - pkg-config 63 | - cmake 64 | - cmake-data 65 | - python-virtualenv 66 | sources: 67 | - ubuntu-toolchain-r-test 68 | - george-edison55-precise-backports 69 | - llvm-toolchain-precise-3.5 70 | 71 | - env: CLANG_VERSION=3.6 72 | addons: &clang36 73 | apt: 74 | packages: 75 | - clang-3.6 76 | - llvm-3.6 77 | - valgrind 78 | - check 79 | - pkg-config 80 | - cmake 81 | - cmake-data 82 | - python-virtualenv 83 | sources: 84 | - ubuntu-toolchain-r-test 85 | - george-edison55-precise-backports 86 | - llvm-toolchain-precise-3.6 87 | 88 | 89 | script: 90 | - cd ${TRAVIS_BUILD_DIR} && tools/travis/test 91 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 2.6) 2 | set(PROJECT_NAME libtwiddle) 3 | set(RELEASE_DATE 2016-05-07) 4 | project(${PROJECT_NAME}) 5 | include(CTest) 6 | 7 | option(USE_AVX "Enable AVX instructions" ON) 8 | option(USE_AVX2 "Enable AVX2 instructions" OFF) 9 | option(USE_AVX512 "Enable AVX512 instructions" OFF) 10 | option(USE_LTO "Enable LTO compilation optimization" ON) 11 | option(USE_STATIC_PIC "Enable -fPIC on static library" OFF) 12 | 13 | set(CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/tools/cmake") 14 | find_package(ParseArguments) 15 | find_package(Prereqs) 16 | find_package(CTargets) 17 | find_package(Version) 18 | find_package(Options) 19 | find_package(LTO) 20 | 21 | add_subdirectory(docs) 22 | add_subdirectory(include) 23 | add_subdirectory(src) 24 | add_subdirectory(tests) 25 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | Formatting 2 | ========== 3 | 4 | `clang-format` enforces a standard style for C code. You can find a git 5 | pre-commit hook in `tools/git/hook-clang-format` that will ensure every source 6 | code file respects the style. 7 | 8 | Test infrastructure 9 | =================== 10 | 11 | libtwiddle is backed by standard unit checks implemented with the Check 12 | library. Minimal Python bindings exist to test more thoroughly with the 13 | property-testing library [Hypothesis](https://github.com/DRMacIver/hypothesis). 14 | 15 | 16 | When developing, we recommend using the `Debug` release type, as it 17 | will enable debugging symbols: 18 | 19 | ``` 20 | $ mkdir build && cd build && cmake -DCMAKE_BUILD_TYPE=Debug .. 21 | $ make clean 22 | $ make all 23 | $ make test 24 | ... 25 | ``` 26 | 27 | One can also invoke the test suite with the `tools/travis/test` script which 28 | should create a grid of build directories: `build-{portable,avx,avx2}` and run 29 | the tests on each build directory. It should automagicaly detect CPU support for 30 | instruction sets required. 31 | 32 | To test with Hypothesis, one must prepare a virtualenv and install 33 | dependencies. This is only needed once. 34 | 35 | ``` 36 | $ # setup virtualenv for pip 37 | $ cd python && virtualenv .venv && source .venv/bin/activate 38 | $ # install dependencies 39 | $ pip install pytest hypothesis==1.19.0 40 | $ # run property tests 41 | $ PYTHONPATH=. py.test tests 42 | ``` 43 | 44 | One can also invoke the travis test script to run integration test: 45 | `RUN_HYPOTHESIS=true tools/travis/test`. 46 | 47 | Travis 48 | ------ 49 | 50 | Travis will try to build and run the test infrastructure on each commit. 51 | To fasten up feedback, integration tests are only run on pull-request branches 52 | from github. 53 | 54 | Debugging 55 | --------- 56 | 57 | Check forks for each test, making it hard to debug. To work around this, 58 | before entering gdb, you can do `export CK_FORK=no` 59 | and Check will stop forking. 60 | 61 | 62 | Adding a new data structure 63 | =========================== 64 | 65 | Assume you want to add a new data structure with shortname `ds` in group 66 | `ds_group`. 67 | 68 | Creating a new data structure involves the following steps: 69 | 70 | * add header file `include/ds_group/ds.h` 71 | * add implementation file `src/ds_group/ds.c` 72 | * update `src/CMakeLists.txt` and add reference to previous file 73 | 74 | Then, one must implement tests: 75 | 76 | * create unit test in `tests/test-ds.c` 77 | * update `tests/CMakeLists.txt` and add reference to previous file 78 | * create usage example in `tests/examples/example-ds.c` 79 | * update `tests/examples/CMakeLists.txt` and add reference to previous file 80 | 81 | Finally, Python bindings and tests must be added: 82 | 83 | * update python-c bindings in `python/twiddle/c.py` 84 | * add Python class in `python/twiddle/ds.py` 85 | * add fuzzing test in `python/tests/test_ds.py` 86 | -------------------------------------------------------------------------------- /EXAMPLES.md: -------------------------------------------------------------------------------- 1 | EXAMPLES 2 | ======== 3 | 4 | bitmap 5 | ------ 6 | 7 | ```C 8 | #include 9 | #include 10 | 11 | int main() { 12 | const uint64_t nbits = 1024; 13 | struct tw_bitmap* bitmap = tw_bitmap_new(nbits); 14 | 15 | assert(bitmap); 16 | 17 | tw_bitmap_set(bitmap, 512); 18 | assert(tw_bitmap_test_and_clear(bitmap, 512)); 19 | assert(!tw_bitmap_test(bitmap, 512)); 20 | 21 | tw_bitmap_set(bitmap, 768); 22 | assert(tw_bitmap_find_first_bit(bitmap) == 768); 23 | 24 | tw_bitmap_free(bitmap); 25 | 26 | return 0; 27 | } 28 | ``` 29 | 30 | bitmap-rle 31 | ------ 32 | 33 | ```C 34 | #include 35 | #include 36 | 37 | int main() { 38 | /** allocate a bitmap containing 2 billions bits */ 39 | const uint64_t nbits = 1UL << 31; 40 | struct tw_bitmap_rle* bitmap = tw_bitmap_rle_new(nbits); 41 | 42 | assert(bitmap); 43 | 44 | /** fill 1 billion bits */ 45 | const uint64_t start = 0UL; 46 | const uint64_t end = 1UL << 30; 47 | tw_bitmap_rle_set_range(bitmap, start, end); 48 | 49 | /** 50 | * bitmap_rle DOES NOT support setting bits in non sorted order, e.g. 51 | * calling the following will raise an assert() call (or undefined behaviour 52 | * if compiled with NDEBUG): 53 | * 54 | * tw_bitmap_rle_set(bitmap, start - 1); 55 | */ 56 | 57 | assert(tw_bitmap_rle_test(bitmap, start)); 58 | assert(tw_bitmap_rle_test(bitmap, end)); 59 | assert(tw_bitmap_rle_find_first_bit(bitmap) == (int64_t)start); 60 | assert(tw_bitmap_rle_find_first_zero(bitmap) == (int64_t)end + 1); 61 | 62 | tw_bitmap_rle_free(bitmap); 63 | 64 | return 0; 65 | } 66 | ``` 67 | 68 | bloomfilter 69 | ----------- 70 | 71 | ```C 72 | #include 73 | #include 74 | 75 | #include 76 | 77 | int main() { 78 | const uint64_t nbits = 1024; 79 | const uint16_t k = 7; 80 | struct tw_bloomfilter *bf = tw_bloomfilter_new(nbits, k); 81 | assert(bf); 82 | 83 | const char *values[] = {"herp", "derp", "ferp", "merp"}; 84 | 85 | for (size_t i = 0; i < ((sizeof(values) / sizeof(values[0]))); ++i) { 86 | tw_bloomfilter_set(bf, strlen(values[i]), values[i]); 87 | assert(tw_bloomfilter_test(bf, strlen(values[i]), values[i])); 88 | } 89 | 90 | assert(!tw_bloomfilter_test(bf, sizeof("nope"), "nope")); 91 | 92 | return 0; 93 | } 94 | ``` 95 | 96 | hyperloglog 97 | ----------- 98 | 99 | ```C 100 | #include 101 | #include 102 | 103 | #include 104 | 105 | int main() { 106 | const uint8_t precision = 16; 107 | struct tw_hyperloglog *hll = tw_hyperloglog_new(precision); 108 | assert(hll); 109 | 110 | const uint32_t n_elems = 10 * (1 << precision); 111 | for (size_t i = 0; i < n_elems ; ++i) { 112 | tw_hyperloglog_add(hll, sizeof(i), (char *) &i); 113 | } 114 | 115 | printf("estimated count: %f, real count: %d\n", 116 | tw_hyperloglog_count(hll), 117 | n_elems); 118 | 119 | tw_hyperloglog_free(hll); 120 | 121 | return 0; 122 | } 123 | ``` 124 | 125 | minhash 126 | ----------- 127 | 128 | ```C 129 | #include 130 | #include 131 | 132 | #include 133 | 134 | int main() 135 | { 136 | const uint32_t n_registers = 1 << 13; 137 | struct tw_minhash *a = tw_minhash_new(n_registers); 138 | assert(a); 139 | struct tw_minhash *b = tw_minhash_clone(a); 140 | assert(b); 141 | 142 | const uint32_t n_elems = 10 * n_registers; 143 | for (size_t i = 0; i < n_elems; ++i) { 144 | if (i % 3 == 0) { 145 | tw_minhash_add(a, (char *)&i, sizeof(i)); 146 | } 147 | 148 | if (i % 5 == 0) { 149 | tw_minhash_add(b, (char *)&i, sizeof(i)); 150 | } 151 | } 152 | 153 | printf("estimated jaccard: %f\n", tw_minhash_estimate(a, b)); 154 | 155 | tw_minhash_free(b); 156 | tw_minhash_free(a); 157 | 158 | return 0; 159 | } 160 | ``` 161 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (C) 2015-2016 François Saint-Jacques 2 | 3 | This program is free software: you can redistribute it and/or modify 4 | it under the terms of the GNU Lesser General Public License as published by 5 | the Free Software Foundation, either version 3 of the License, or 6 | (at your option) any later version. 7 | 8 | This program is distributed in the hope that it will be useful, 9 | but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | GNU Lesser General Public License for more details. 12 | 13 | You should have received a copy of the GNU Lesser General Public License 14 | along with this program. If not, see . 15 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | libtwiddle 2 | ========== 3 | [![Build Status](https://travis-ci.org/fsaintjacques/libtwiddle.svg?branch=develop)](https://travis-ci.org/fsaintjacques/libtwiddle) 4 | [![License](https://img.shields.io/badge/license-LGPL--3.0-blue.svg?style=flat)](https://github.com/fsaintjacques/libtwiddle/blob/develop/LICENSE) 5 | 6 | 7 | libtwiddle is a data structure library aiming for speed on modern 8 | Linux x86-64 systems. The following data structures are implemented: 9 | 10 | * bitmaps (dense & RLE); 11 | * Bloom filters (standard & active-active); 12 | * HyperLogLog 13 | * MinHash 14 | 15 | See [`EXAMPLES.md`](EXAMPLES.md) and 16 | [`tests/examples/`](tests/examples/) for examples. 17 | 18 | Why should you use libtwiddle? 19 | ------------------------------ 20 | 21 | * Written for the modern age; `gcc >= 4.8`, using C11 features, on x86-64 22 | * Extensive testing; unit tests and random property testing with `hypothesis` 23 | * Python bindings; courtesy of property testing 24 | * Vectorized implementations; AVX, AVX2, and partial support for AVX512 25 | * Continuous integration 26 | 27 | Install 28 | ======= 29 | 30 | libtwiddle uses CMake as its build manager. 31 | 32 | Prerequisite libraries 33 | ---------------------- 34 | 35 | To build libtwiddle, you need the following: 36 | 37 | * a recent C compiler (`gcc >= 4.8` or `clang >= 3.5`). 38 | * CMake build tool (`cmake >= 3.0`) 39 | 40 | Building from source 41 | -------------------- 42 | 43 | In most cases, you should be able to build the source code using the following: 44 | 45 | $ mkdir build 46 | $ cd build 47 | $ cmake .. -DCMAKE_INSTALL_PREFIX=$PREFIX 48 | $ make 49 | $ make test 50 | $ make install 51 | 52 | Building with SIMD support 53 | -------------------------- 54 | 55 | By default, libtwiddle will compile with AVX SIMD instructions. Use 56 | the following flags to enable newer instructions: 57 | 58 | * For AVX2: `-DUSE_AVX2=ON`; 59 | * for AVX512: `-DUSE_AVX512=ON`. 60 | 61 | Note that AVX2 implies AVX, and AVX512 implies AVX2. Some functions 62 | can't be implemented with AVX512, and will fallback to AVX2 code. 63 | 64 | To compile without SIMD support, invoke CMake with `-DUSE_AVX=OFF 65 | -DUSE_AVX2=OFF -DUSE_AVX512=OFF`. 66 | 67 | Contributions 68 | ------------- 69 | 70 | Contributions are more than welcome, see 71 | [`CONTRIBUTING.md`](CONTRIBUTING.md) for details. 72 | -------------------------------------------------------------------------------- /docs/.gitattributes: -------------------------------------------------------------------------------- 1 | *.graffle -diff -whitespace 2 | /*.[1-9] -diff -whitespace 3 | -------------------------------------------------------------------------------- /docs/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Fill in this with the text that you want to include in the header and footer 2 | # of each man page. 3 | 4 | set(MAN_HEADER "libtwiddle documentation") 5 | set(MAN_FOOTER "libtwiddle") 6 | 7 | # Fill this in with any man pages that should be built from a pandoc source 8 | # file. For a man page called foo.5, there should be a pandoc source file 9 | # called foo.5.md. 10 | 11 | set(MAN_PAGES 12 | tw_bitmap.2 13 | ) 14 | 15 | #----------------------------------------------------------------------- 16 | # Everything below is boilerplate! 17 | 18 | find_program( 19 | PANDOC_EXECUTABLE 20 | NAMES pandoc 21 | HINTS ENV PANDOC_DIR 22 | PATH_SUFFIXES bin 23 | DOC "Pandoc man page generator" 24 | ) 25 | 26 | set(GENERATE_DOC TRUE CACHE BOOL 27 | "Whether to rebuild documentation") 28 | 29 | if (NOT PANDOC_EXECUTABLE) 30 | message(WARNING "Unable to find pandoc documentation generator") 31 | set(GENERATE_DOC FALSE) 32 | endif (NOT PANDOC_EXECUTABLE) 33 | 34 | 35 | # Link man pages go in docs/links 36 | 37 | macro(install_links section) 38 | file(GLOB links "links/*.${section}") 39 | if (links) 40 | install( 41 | FILES ${links} 42 | DESTINATION "share/man/man${section}" 43 | ) 44 | endif (links) 45 | endmacro(install_links section) 46 | 47 | install_links(1) # commands 48 | install_links(3) # library API 49 | install_links(4) # special files and drivers 50 | install_links(5) # file formats and conventions 51 | install_links(7) # miscellaneous 52 | install_links(8) # system commands 53 | 54 | 55 | # Man pages with actual content go in docs 56 | 57 | set(ALL_MANS) 58 | 59 | macro(pandocify name) 60 | set(src "${CMAKE_CURRENT_SOURCE_DIR}/${name}.md") 61 | set(dest "${CMAKE_CURRENT_SOURCE_DIR}/${name}") 62 | get_filename_component(section "${name}" EXT) 63 | string(REPLACE "." "" section "${section}") 64 | 65 | # Only compile the markdown source into groff man pages if requested. 66 | if (GENERATE_DOC) 67 | add_custom_command( 68 | OUTPUT ${dest} 69 | COMMAND ${PANDOC_EXECUTABLE} 70 | -f markdown -t man -s --smart 71 | -V header="${MAN_HEADER}" 72 | -V footer="${MAN_FOOTER}" 73 | -V date=${RELEASE_DATE} 74 | -o ${dest} ${src} 75 | MAIN_DEPENDENCY ${src} 76 | COMMENT "Building ${name}" 77 | ) 78 | list(APPEND ALL_MANS ${dest}) 79 | endif (GENERATE_DOC) 80 | 81 | # We should always have an already-compiled copy of each man page in the 82 | # source tree, which we can install even if we didn't build fresh new 83 | # copies. 84 | install( 85 | FILES ${dest} 86 | DESTINATION "share/man/man${section}" 87 | ) 88 | endmacro(pandocify) 89 | 90 | foreach(MAN_PAGE ${MAN_PAGES}) 91 | pandocify(${MAN_PAGE}) 92 | endforeach(MAN_PAGE) 93 | 94 | add_custom_target(doc ALL DEPENDS ${ALL_MANS}) 95 | -------------------------------------------------------------------------------- /docs/tw_bitmap.2: -------------------------------------------------------------------------------- 1 | .TH "tw_bitmap" "2" "2015-09-03" "libtwiddle" "libtwiddle\ documentation" 2 | .SH NAME 3 | .PP 4 | tw_bitmap \[en] libtwiddle basic bitmap implementation 5 | .SH SYNOPSIS 6 | .IP 7 | .nf 8 | \f[C] 9 | #include\ 10 | 11 | struct\ tw_bitmap\ *\ tw_bitmap_new(uint32_t\ nbits); 12 | void\ tw_bitmap_free(struct\ tw_bitmap\ *bitmap); 13 | 14 | void\ tw_bitmap_set(struct\ tw_bitmap\ *bitmap,\ uint32_t\ pos); 15 | void\ tw_bitmap_clear(struct\ tw_bitmap\ *bitmap,\ uint32_t\ pos); 16 | 17 | bool\ tw_bitmap_test(struct\ tw_bitmap\ *bitmap,\ uint32_t\ pos); 18 | \f[] 19 | .fi 20 | .SH DESCRIPTION 21 | .PP 22 | \f[B]tw_bitmap\f[] is a simple struct 23 | .SH RETURN VALUE 24 | .SH SEE ALSO 25 | .SH AUTHORS 26 | François Saint\-Jacques. 27 | -------------------------------------------------------------------------------- /docs/tw_bitmap.2.md: -------------------------------------------------------------------------------- 1 | % tw_bitmap(2) 2 | % François Saint-Jacques 3 | % 2015-09-03 4 | 5 | # NAME 6 | 7 | tw_bitmap – libtwiddle basic bitmap implementation 8 | 9 | # SYNOPSIS 10 | 11 | ```c 12 | #include 13 | 14 | struct tw_bitmap * tw_bitmap_new(uint32_t nbits); 15 | void tw_bitmap_free(struct tw_bitmap *bitmap); 16 | 17 | void tw_bitmap_set(struct tw_bitmap *bitmap, uint32_t pos); 18 | void tw_bitmap_clear(struct tw_bitmap *bitmap, uint32_t pos); 19 | 20 | bool tw_bitmap_test(struct tw_bitmap *bitmap, uint32_t pos); 21 | ``` 22 | 23 | # DESCRIPTION 24 | 25 | **tw_bitmap** is a simple struct 26 | 27 | # RETURN VALUE 28 | 29 | # SEE ALSO 30 | -------------------------------------------------------------------------------- /include/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | install(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/ 2 | DESTINATION include 3 | FILES_MATCHING PATTERN "*.h") 4 | -------------------------------------------------------------------------------- /include/twiddle.h: -------------------------------------------------------------------------------- 1 | #ifndef TWIDDLE_H 2 | #define TWIDDLE_H 3 | 4 | #include 5 | #include 6 | 7 | #include 8 | #include 9 | 10 | #include 11 | 12 | #include 13 | 14 | #endif /* TWIDDLE_H */ 15 | -------------------------------------------------------------------------------- /include/twiddle/bloomfilter/bloomfilter.h: -------------------------------------------------------------------------------- 1 | #ifndef TWIDDLE_BLOOMFILTER_H 2 | #define TWIDDLE_BLOOMFILTER_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #define TW_LOG_2 0.6931471805599453 10 | 11 | #define tw_bloomfilter_optimal_m(n, p) (-n * log(p) / (TW_LOG_2 * TW_LOG_2)) 12 | #define tw_bloomfilter_optimal_k(n, m) (m / n * TW_LOG_2) 13 | 14 | struct tw_bitmap; 15 | 16 | /** 17 | * bloomfilter data structure 18 | * 19 | * This bloomfilter is static and does not support automatic resizing. The 20 | * underlaying storage is `struct tw_bitmap`. 21 | */ 22 | struct tw_bloomfilter { 23 | /** number of hash functions */ 24 | uint16_t k; 25 | /** bitmap holding the bits */ 26 | struct tw_bitmap *bitmap; 27 | }; 28 | 29 | /** 30 | * Allocate a `struct tw_bloomfilter`. 31 | * 32 | * @param size number of bits the bloomfilter should hold, between 33 | * (0, TW_BITMAP_MAX_BITS]. 34 | * @param k stricly positive number of hash functions used 35 | * 36 | * @return `NULL` if allocation failed, otherwise a pointer to the newly 37 | * allocated `struct tw_bloomfilter` 38 | * 39 | * @note group:bloomfilter 40 | */ 41 | struct tw_bloomfilter *tw_bloomfilter_new(uint64_t size, uint16_t k); 42 | 43 | /** 44 | * Free a `struct tw_bloomfilter`. 45 | * 46 | * @param bf bloomfilter to free 47 | * 48 | * @note group:bloomfilter 49 | */ 50 | void tw_bloomfilter_free(struct tw_bloomfilter *bf); 51 | 52 | /** 53 | * Copy a source `struct tw_bloomfilter` into a specified destination. 54 | * 55 | * @param src non-null bloomfilter to copy from 56 | * @param dst non-null bloomfilter to copy to 57 | * 58 | * @return `NULL` if any filter is null or not of the same cardinality, 59 | * otherwise a pointer to dst 60 | * 61 | * @note group:bloomfilter 62 | */ 63 | struct tw_bloomfilter *tw_bloomfilter_copy(const struct tw_bloomfilter *src, 64 | struct tw_bloomfilter *dst); 65 | 66 | /** 67 | * Clone a `struct tw_bloomfilter` into a newly allocated one. 68 | * 69 | * @param bf non-null bloomfilter to clone 70 | * 71 | * @return `NULL` if failed, otherwise a newly allocated bloomfilter initialized 72 | * from the requested bloomfilter. The caller is responsible to 73 | * deallocate with tw_bloomfilter_free 74 | * 75 | * @note group:bloomfilter 76 | */ 77 | struct tw_bloomfilter *tw_bloomfilter_clone(const struct tw_bloomfilter *bf); 78 | 79 | /** 80 | * Set an element in a `struct tw_bloomfilter`. 81 | * 82 | * @param bf non-null bloomfilter affected 83 | * @param key non-null buffer of the key to add 84 | * @param key_size stricly positive size of the buffer key to add 85 | * 86 | * @note group:bloomfilter 87 | */ 88 | void tw_bloomfilter_set(struct tw_bloomfilter *bf, const void *key, 89 | size_t key_size); 90 | 91 | /** 92 | * Verify if an element is present in a `struct tw_bloomfilter`. 93 | * 94 | * @param bf non-null bloomfilter affected 95 | * @param key non-null buffer of the key to test 96 | * @param key_size stricly positive size of the buffer key to test 97 | * 98 | * @return `false` if preconditions are not met, otherwise indicator if the 99 | * element is in the bloomfilter (with possibility of false positives) 100 | * 101 | * @note group:bloomfilter 102 | */ 103 | bool tw_bloomfilter_test(const struct tw_bloomfilter *bf, const void *key, 104 | size_t key_size); 105 | /** 106 | * Verify if a `struct tw_bloomfilter` is empty. 107 | * 108 | * @param bf non-null bloomfilter to verify emptyness 109 | * 110 | * @return `false` if bf is null, otherwise indicator if the bloomfilter is 111 | * empty. 112 | * 113 | * @note group:bloomfilter 114 | */ 115 | bool tw_bloomfilter_empty(const struct tw_bloomfilter *bf); 116 | 117 | /** 118 | * Verify if a `struct tw_bloomfilter` is full. 119 | * 120 | * @param bf non-null bloomfilter to verify fullness 121 | * 122 | * @return `false` if bf is null, otherwise indicator if the bloomfilter is 123 | * full. 124 | * 125 | * @note group:bloomfilter 126 | */ 127 | bool tw_bloomfilter_full(const struct tw_bloomfilter *bf); 128 | 129 | /** 130 | * Count the number of active bits in a `struct tw_bloomfilter`. 131 | * 132 | * @param bf non-null bloomfilter to count active bits 133 | * 134 | * @return `0` if bf is null, otherwise the number of active bits 135 | * 136 | * @note group:bloomfilter 137 | */ 138 | uint64_t tw_bloomfilter_count(const struct tw_bloomfilter *bf); 139 | 140 | /** 141 | * Count the percentage of active bits in a `struct tw_bloomfilter`. 142 | * 143 | * @param bf non-null bloomfilter to count the density 144 | * 145 | * @return `0.0` if bf is null, otherwise the portion of active bits 146 | * expressed as (count / size). 147 | * 148 | * @note group:bloomfilter 149 | */ 150 | float tw_bloomfilter_density(const struct tw_bloomfilter *bf); 151 | 152 | /** 153 | * Zero all bits in a `struct tw_bloomfilter`. 154 | * 155 | * @param bf non-null bloomfilter to zero 156 | * 157 | * @return `NULL` if bf is null, otherwise a pointer to bf on successful 158 | * operation 159 | * 160 | * @note group:bloomfilter 161 | */ 162 | struct tw_bloomfilter *tw_bloomfilter_zero(struct tw_bloomfilter *bf); 163 | 164 | /** 165 | * Fill all bits in a `struct tw_bloomfilter`. 166 | * 167 | * @param bf non-null bloomfilter to fill 168 | * 169 | * @return `NULL` if bf is null, otherwise a pointer to bf on successful 170 | * operation 171 | * 172 | * @note group:bloomfilter 173 | */ 174 | struct tw_bloomfilter *tw_bloomfilter_fill(struct tw_bloomfilter *bf); 175 | 176 | /** 177 | * Inverse all bits and zeroes in a `struct tw_bloomfilter`. 178 | * 179 | * @param bf non-null bloomfilter to inverse 180 | * 181 | * @return `NULL` if failed, otherwise the bloomfilter 182 | * 183 | * @note group:bloomfilter 184 | */ 185 | struct tw_bloomfilter *tw_bloomfilter_not(struct tw_bloomfilter *bf); 186 | 187 | /** 188 | * Verify if `struct tw_bloomfilter`s are equal. 189 | * 190 | * @param fst first non-null bloomfilter to check 191 | * @param snd second non-null bloomfilter to check 192 | * 193 | * @return `false` any bloomfilter is null or hashes are not of the same 194 | * cardinality, otherwise indicator if filters are equal 195 | * 196 | * @note group:bloomfilter 197 | */ 198 | bool tw_bloomfilter_equal(const struct tw_bloomfilter *a, 199 | const struct tw_bloomfilter *b); 200 | 201 | /** 202 | * Compute the union of `struct tw_bloomfilter`s. 203 | * 204 | * @param src non-null bloomfilter to union from 205 | * @param dst non-null bloomfilter to union to 206 | * 207 | * @return: `NULL` if failed, otherwise pointer to dst 208 | * 209 | * @note group:bloomfilter 210 | */ 211 | struct tw_bloomfilter *tw_bloomfilter_union(const struct tw_bloomfilter *src, 212 | struct tw_bloomfilter *dst); 213 | 214 | /** 215 | * Compute the intersection of `struct tw_bloomfilter`s. 216 | * 217 | * @param src non-null bloomfilter to intersect from 218 | * @param dst non-null bloomfilter to intersect to 219 | * 220 | * @return: `NULL` if failed, otherwise pointer to dst 221 | * 222 | * @note group:bloomfilter 223 | */ 224 | struct tw_bloomfilter * 225 | tw_bloomfilter_intersection(const struct tw_bloomfilter *src, 226 | struct tw_bloomfilter *dst); 227 | 228 | /** 229 | * Compute the symetric difference of `struct tw_bloomfilter`s. 230 | * 231 | * @param src non-null bloomfilter to xor from 232 | * @param dst non-null bloomfilter to xor to 233 | * 234 | * @return: `NULL` if failed, otherwise pointer to dst 235 | * 236 | * @note group:bloomfilter 237 | */ 238 | struct tw_bloomfilter *tw_bloomfilter_xor(const struct tw_bloomfilter *src, 239 | struct tw_bloomfilter *dst); 240 | 241 | #endif /* TWIDDLE_BLOOMFILTER_H */ 242 | -------------------------------------------------------------------------------- /include/twiddle/bloomfilter/bloomfilter_a2.h: -------------------------------------------------------------------------------- 1 | #ifndef TWIDDLE_BLOOMFILTER_A2_H 2 | #define TWIDDLE_BLOOMFILTER_A2_H 3 | 4 | #include 5 | #include 6 | 7 | struct tw_bloomfilter; 8 | 9 | /** 10 | * Aging bloomfilter with active buffers data structure 11 | * 12 | * The paper "Aging bloom filter with two active buffers for dynamic sets" 13 | * describe a method where 2 bloom filters are used to implement a FIFO. 14 | * 15 | * Elements are added to `active` until `density` (on active) is attained; 16 | * then `passive` is cleared and both filters are swapped. 17 | */ 18 | struct tw_bloomfilter_a2 { 19 | /** density threshold to trigger rotation */ 20 | float density; 21 | /** pointer to active bloomfilter */ 22 | struct tw_bloomfilter *active; 23 | /** pointer to passive bloomfilter */ 24 | struct tw_bloomfilter *passive; 25 | }; 26 | 27 | /** 28 | * Allocate a `struct tw_bloomfilter_a2`. 29 | * 30 | * @param size number of bits the bloomfilter should hold, between 31 | * (0, TW_BITMAP_MAX_BITS]. 32 | * @param k stricly positive number of hash functions used 33 | * @param density threshold for rotation within (0, 1] 34 | * 35 | * @return `NULL` if allocation failed, otherwise a pointer to the newly 36 | * allocated `struct tw_bloomfilter` 37 | * 38 | * @note group:bloomfilter_a2 39 | */ 40 | struct tw_bloomfilter_a2 *tw_bloomfilter_a2_new(uint64_t size, uint16_t k, 41 | float dentisy); 42 | 43 | /** 44 | * Free a `struct tw_bloomfilter_a2`. 45 | * 46 | * @param bf bloomfilter to free 47 | * 48 | * @note group:bloomfilter_a2 49 | */ 50 | void tw_bloomfilter_a2_free(struct tw_bloomfilter_a2 *bf); 51 | 52 | /** 53 | * Copy a source `struct tw_bloomfilter_a2` into a specified destination. 54 | * 55 | * @param src non-null bloomfilter to copy from 56 | * @param dst non-null bloomfilter to copy to 57 | * 58 | * @return `NULL` if any filter is null or not of the same cardinality, 59 | * otherwise a pointer to dst 60 | * 61 | * @note group:bloomfilter_a2 62 | */ 63 | struct tw_bloomfilter_a2 * 64 | tw_bloomfilter_a2_copy(const struct tw_bloomfilter_a2 *src, 65 | struct tw_bloomfilter_a2 *dst); 66 | 67 | /** 68 | * Clone a `struct tw_bloomfilter_a2` into a newly allocated one. 69 | * 70 | * @param bf non-null bloomfilter to clone 71 | * 72 | * @return `NULL` if failed, otherwise a newly allocated bloomfilter initialized 73 | * from the requested bloomfilter. The caller is responsible to 74 | * deallocate with tw_bloomfilter_a2_free 75 | * 76 | * @note group:bloomfilter_a2 77 | */ 78 | struct tw_bloomfilter_a2 * 79 | tw_bloomfilter_a2_clone(const struct tw_bloomfilter_a2 *bf); 80 | 81 | /** 82 | * Set an element in a `struct tw_bloomfilter_a2`. 83 | * 84 | * @param bf non-null bloomfilter affected 85 | * @param key non-null buffer of the key to add 86 | * @param key_size stricly positive size of the buffer key to add 87 | * 88 | * @note group:bloomfilter_a2 89 | */ 90 | void tw_bloomfilter_a2_set(struct tw_bloomfilter_a2 *bf, const void *key, 91 | size_t key_size); 92 | 93 | /** 94 | * Verify if an element is present in a `struct tw_bloomfilter_a2`. 95 | * 96 | * @param bf non-null bloomfilter affected 97 | * @param key non-null buffer of the key to test 98 | * @param key_size stricly positive size of the buffer key to test 99 | * 100 | * @return `false` if preconditions are not met, otherwise indicator if the 101 | * element is in the bloomfilter (with possibility of false positives) 102 | * 103 | * @note group:bloomfilter_a2 104 | */ 105 | bool tw_bloomfilter_a2_test(const struct tw_bloomfilter_a2 *bf, const void *key, 106 | size_t key_size); 107 | 108 | /** 109 | * Verify if a `struct tw_bloomfilter_a2` is empty. 110 | * 111 | * @param bf non-null bloomfilter to verify emptyness 112 | * 113 | * @return `false` if bf is null, otherwise indicator if the bloomfilter is 114 | * empty. 115 | * 116 | * @note group:bloomfilter_a2 117 | */ 118 | bool tw_bloomfilter_a2_empty(const struct tw_bloomfilter_a2 *bf); 119 | 120 | /** 121 | * Verify if a `struct tw_bloomfilter_a2` is full. 122 | * 123 | * @param bf non-null bloomfilter to verify fullness 124 | * 125 | * @return `false` if bf is null, otherwise indicator if the bloomfilter is 126 | * full. 127 | * 128 | * @note group:bloomfilter_a2 129 | */ 130 | bool tw_bloomfilter_a2_full(const struct tw_bloomfilter_a2 *bf); 131 | 132 | /** 133 | * Count the number of active bits in a `struct tw_bloomfilter_a2`. 134 | * 135 | * @param bf non-null bloomfilter to count active bits 136 | * 137 | * @return `0` if bf is null, otherwise the number of active bits 138 | * 139 | * @note group:bloomfilter_a2 140 | */ 141 | uint64_t tw_bloomfilter_a2_count(const struct tw_bloomfilter_a2 *bf); 142 | 143 | /** 144 | * Count the percentage of active bits in a `struct tw_bloomfilter_a2`. 145 | * 146 | * @param bf non-null bloomfilter to count the density 147 | * 148 | * @return `0.0` if bf is null, otherwise the portion of active bits 149 | * expressed as (count / size). 150 | * 151 | * @note group:bloomfilter_a2 152 | */ 153 | float tw_bloomfilter_a2_density(const struct tw_bloomfilter_a2 *bf); 154 | 155 | /** 156 | * Zero all bits in a `struct tw_bloomfilter_a2`. 157 | * 158 | * @param bf non-null bloomfilter to zero 159 | * 160 | * @return `NULL` if bf is null, otherwise a pointer to bf on successful 161 | * operation 162 | * 163 | * @note group:bloomfilter_a2 164 | */ 165 | struct tw_bloomfilter_a2 *tw_bloomfilter_a2_zero(struct tw_bloomfilter_a2 *bf); 166 | 167 | /** 168 | * Fill all bits in a `struct tw_bloomfilter_a2`. 169 | * 170 | * @param bf non-null bloomfilter to fill 171 | * 172 | * @return `NULL` if bf is null, otherwise a pointer to bf on successful 173 | * operation 174 | * 175 | * @note group:bloomfilter_a2 176 | */ 177 | struct tw_bloomfilter_a2 *tw_bloomfilter_a2_fill(struct tw_bloomfilter_a2 *bf); 178 | 179 | /** 180 | * Inverse all bits and zeroes in a `struct tw_bloomfilter_a2`. 181 | * 182 | * @param bf non-null bloomfilter to inverse 183 | * 184 | * @return `NULL` if failed, otherwise the bloomfilter 185 | * 186 | * @note group:bloomfilter_a2 187 | */ 188 | struct tw_bloomfilter_a2 *tw_bloomfilter_a2_not(struct tw_bloomfilter_a2 *bf); 189 | 190 | /** 191 | * Verify if `struct tw_bloomfilter_a2`s are equal. 192 | * 193 | * @param fst first non-null bloomfilter to check 194 | * @param snd second non-null bloomfilter to check 195 | * 196 | * @return `false` any bloomfilter is null or hashes are not of the same 197 | * cardinality, otherwise indicator if filters are equal 198 | * 199 | * @note group:bloomfilter_a2 200 | */ 201 | bool tw_bloomfilter_a2_equal(const struct tw_bloomfilter_a2 *fst, 202 | const struct tw_bloomfilter_a2 *snd); 203 | 204 | /** 205 | * Compute the union of `struct tw_bloomfilter_a2`s. 206 | * 207 | * @param src non-null bloomfilter to union from 208 | * @param dst non-null bloomfilter to union to 209 | * 210 | * @return: `NULL` if failed, otherwise pointer to dst 211 | * 212 | * @note group:bloomfilter_a2 213 | */ 214 | struct tw_bloomfilter_a2 * 215 | tw_bloomfilter_a2_union(const struct tw_bloomfilter_a2 *src, 216 | struct tw_bloomfilter_a2 *dst); 217 | 218 | /** 219 | * Compute the intersection of `struct tw_bloomfilter_a2`s. 220 | * 221 | * @param src non-null bloomfilter to intersect from 222 | * @param dst non-null bloomfilter to intersect to 223 | * 224 | * @return: `NULL` if failed, otherwise pointer to dst 225 | * 226 | * @note group:bloomfilter_a2 227 | */ 228 | struct tw_bloomfilter_a2 * 229 | tw_bloomfilter_a2_intersection(const struct tw_bloomfilter_a2 *src, 230 | struct tw_bloomfilter_a2 *dst); 231 | 232 | /** 233 | * Compute the symetric difference of `struct tw_bloomfilter_a2`s. 234 | * 235 | * @param src non-null bloomfilter to xor from 236 | * @param dst non-null bloomfilter to xor to 237 | * 238 | * @return: `NULL` if failed, otherwise pointer to dst 239 | * 240 | * @note group:bloomfilter_a2 241 | */ 242 | struct tw_bloomfilter_a2 * 243 | tw_bloomfilter_a2_xor(const struct tw_bloomfilter_a2 *src, 244 | struct tw_bloomfilter_a2 *dst); 245 | 246 | #endif /* TWIDDLE_BLOOMFILTER_A2_H */ 247 | -------------------------------------------------------------------------------- /include/twiddle/hash/minhash.h: -------------------------------------------------------------------------------- 1 | #ifndef TWIDDLE_HASH_MINHASH_H 2 | #define TWIDDLE_HASH_MINHASH_H 3 | 4 | #include 5 | #include 6 | 7 | /** 8 | * minhash data structure 9 | * 10 | * (source https://en.wikipedia.org/wiki/MinHash) 11 | * 12 | * MinHash (or the min-wise independent permutations locality sensitive hashing 13 | * scheme) is a technique for quickly estimating how similar two sets are. The 14 | * term MinHash was coined by Andrei Broder (1997), see [1]. This is also known 15 | * as the `k-mins` sketch. 16 | * 17 | * The simplest version of the MinHash scheme uses `k` different hash functions, 18 | * where `k` is a fixed integer parameter, and represents each set `S` by the 19 | * `k` values of `h_min(S)` for these `k` functions. 20 | * 21 | * To estimate `Jaccard(A, B)` using this version of the scheme, let `y` be the 22 | * number of hash functions for which `h_min(A) = h_min(B)`, and use `y/k` as 23 | * the estimate. This estimate is the average of `k` different 0-1 random 24 | * variables, each of which is one when `h_min(A) = h_min(B)` and zero 25 | * otherwise, and each of which is an unbiased estimator of `Jaccard(A, B)`. 26 | * Therefore, their average is also an unbiased estimator, and by standard 27 | * Chernoff bounds for sums of 0-1 random variables, its expected error is 28 | * `O(1/√k)`. 29 | * 30 | * Therefore, for any constant `ε > 0` there is a constant `k = O(1/ε^2)` such 31 | * that the expected error of the estimate is at most `ε`. For example, 400 32 | * hashes would be required to estimate `Jaccard(A, B)` with an expected error 33 | * less than or equal to .05. 34 | * 35 | * This implementation uses 32bits registers. It also uses max instead 36 | * of min, since it's homomorphic. 37 | * 38 | * [1] Broder, Andrei Z. "On the resemblance and containment of documents." 39 | * Compression and Complexity of Sequences 1997. Proceedings. IEEE, 1997. 40 | */ 41 | struct tw_minhash { 42 | /** number of registers, also the number of hash functions */ 43 | uint32_t n_registers; 44 | /** registers holding computed values */ 45 | uint32_t *registers; 46 | }; 47 | 48 | /** 49 | * Allocate a `struct tw_minhash`. 50 | * 51 | * The allocation will be rounded up to the closest multiple of a 52 | * cacheline. 53 | * 54 | * @param n_registers stricly positive number of 32bit registers the structure 55 | * holds 56 | * 57 | * @return `NULL` if allocation failed, otherwise a pointer to the newly 58 | * allocated `struct tw_minhash`. 59 | * 60 | * @note group:minhash 61 | */ 62 | struct tw_minhash *tw_minhash_new(uint32_t n_registers); 63 | 64 | /** 65 | * Free a `struct tw_minhash`. 66 | * 67 | * @param hash to free 68 | * 69 | * @note group:minhash 70 | */ 71 | void tw_minhash_free(struct tw_minhash *hash); 72 | 73 | /** 74 | * Copy a source `struct tw_minhash` into a specified destination. 75 | * 76 | * @param src non-null minhash to copy from 77 | * @param dst non-null minhash to copy to 78 | * 79 | * @return `NULL` if any hash is null or not of the same cardinality, otherwise 80 | * a pointer to dst 81 | * 82 | * @note group:minhash 83 | */ 84 | struct tw_minhash *tw_minhash_copy(const struct tw_minhash *src, 85 | struct tw_minhash *dst); 86 | 87 | /** 88 | * Clone a `struct tw_minhash` into a newly allocated one. 89 | * 90 | * @param hash non-null minhash to clone 91 | * 92 | * @return `NULL` if failed, otherwise a newly allocated minhash initialized 93 | * from the requests minhash. The caller is responsible to deallocate 94 | * the minhash with tw_minhash_free 95 | * 96 | * @note group:minhash 97 | */ 98 | struct tw_minhash *tw_minhash_clone(const struct tw_minhash *hash); 99 | 100 | /** 101 | * Add an element into a `struct tw_minhash`. 102 | * 103 | * @param hash non-null minhash to add 104 | * @param key non-null buffer of the key to add 105 | * @param key_size stricly positive size of the buffer of the key to add 106 | * 107 | * @note group:minhash 108 | */ 109 | void tw_minhash_add(struct tw_minhash *hash, const void *key, size_t key_size); 110 | 111 | /** 112 | * Estimate the jaccard index between two `struct tw_minhash`s. 113 | * 114 | * @param fst non-null first minhash 115 | * @param snd non-null second minhash 116 | * 117 | * @return `0.0` if any hash is null or hashes are not of the same cardinality, 118 | * otherwise the estimated jaccard index between `fst` and `snd` 119 | * 120 | * @note group:minhash 121 | */ 122 | float tw_minhash_estimate(const struct tw_minhash *fst, 123 | const struct tw_minhash *snd); 124 | 125 | /** 126 | * Verify if `struct tw_minhash`s are equal. 127 | * 128 | * @param fst non-null first minhash 129 | * @param snd non-null second minhash 130 | * 131 | * @return `false` any hash is null or hashes are not of the same cardinality, 132 | * otherwise indicator if hashes are equal 133 | * 134 | * @note group:minhash 135 | */ 136 | bool tw_minhash_equal(const struct tw_minhash *fst, 137 | const struct tw_minhash *snd); 138 | 139 | /** 140 | * Merge a `struct tw_minhash` in a specified destination. 141 | * 142 | * @param src non-null minhash to merge from 143 | * @param dst non-null minhash to merge to 144 | * 145 | * @return `NULL` if any hash is null or hashes are not of the same cardinality, 146 | * otherwise pointer to dst with merged registers 147 | * 148 | * @note group:minhash 149 | */ 150 | struct tw_minhash *tw_minhash_merge(const struct tw_minhash *src, 151 | struct tw_minhash *dst); 152 | 153 | #endif /* TWIDDLE_HASH_MINHASH_H */ 154 | -------------------------------------------------------------------------------- /include/twiddle/hyperloglog/hyperloglog.h: -------------------------------------------------------------------------------- 1 | #ifndef TWIDDLE_HYPERLOGLOG_H 2 | #define TWIDDLE_HYPERLOGLOG_H 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | #define TW_HLL_ERROR_FOR_REG(reg) (1.04 / sqrt((double)(reg))) 9 | #define TW_HLL_REG_FOR_ERROR(err) (1.0816 / ((err) * (err))) 10 | 11 | #define TW_HLL_MIN_PRECISION 6 12 | #define TW_HLL_MAX_PRECISION 18 13 | 14 | /** 15 | * hyperloglog data structure 16 | * 17 | * (source https://en.wikipedia.org/wiki/HyperLogLog) 18 | * 19 | * The basis of the HyperLogLog algorithm is the observation that the 20 | * cardinality of a multiset of uniformly distributed random numbers can be 21 | * estimated by calculating the maximum number of leading zeros in the binary 22 | * representation of each number in the set. If the maximum number of leading 23 | * zeros observed is `n`, an estimate for the number of 24 | * distinct elements in the set is `2^{n}`. 25 | * 26 | * In the HyperLogLog algorithm, a hash function is applied to each element in 27 | * the original multiset, to obtain a multiset of uniformly distributed random 28 | * numbers with the same cardinality as the original multiset. The cardinality 29 | * of this randomly distributed set can then be estimated using the algorithm 30 | * above. 31 | * 32 | * The simple estimate of cardinality obtained using the algorithm above has 33 | * the disadvantage of a large variance. In the HyperLogLog algorithm, the 34 | * variance is minimised by splitting the multiset into numerous subsets, 35 | * calculating the maximum number of leading zeros in the numbers in each of 36 | * these subsets, and using a harmonic mean to combine these estimates for each 37 | * subset into an estimate of the cardinality of the whole set. 38 | * 39 | * For small cardinality, the algorithm fallback to the linear counting 40 | * algorithm, see [1] for a detailed analysis. This implementation's estimator 41 | * use the bias correction proposed in [2]. Refering to the original paper, 42 | * we're using `m` registers of fixed size 64 bits. 43 | * 44 | * [1] Flajolet, Philippe, et al. "Hyperloglog: the analysis of a near-optimal 45 | * cardinality estimation algorithm." DMTCS Proceedings 1 (2008). 46 | * 47 | * [2] Heule, Stefan, Marc Nunkesser, and Alexander Hall. "HyperLogLog in 48 | * practice: Algorithmic engineering of a state of the art cardinality 49 | * estimation algorithm." Proceedings of the 16th International Conference on 50 | * Extending Database Technology. ACM, 2013. 51 | */ 52 | struct tw_hyperloglog { 53 | /** the number of registers will be defined as 2^precision */ 54 | uint8_t precision; 55 | /** allocated array containing the 8bit registers */ 56 | uint8_t *registers; 57 | }; 58 | 59 | /** 60 | * Allocate a `struct tw_hyperloglog`. 61 | * 62 | * @param precision power-of-2 exponent number of bucket hyperloglog should use, 63 | * must be greater or equal than `TW_HLL_MIN_PRECISION and 64 | * smaller or equal than `TW_HLL_MAX_PRECISION` 65 | * 66 | * @return `NULL` if allocation failed, otherwise a pointer to the newly 67 | * allocated `struct tw_hyperloglog`. 68 | * 69 | * @note group:hyperloglog 70 | */ 71 | struct tw_hyperloglog *tw_hyperloglog_new(uint8_t precision); 72 | 73 | /** 74 | * Free a `struct tw_hyperloglog`. 75 | * 76 | * @param hll to free 77 | * 78 | * @note group:hyperloglog 79 | */ 80 | void tw_hyperloglog_free(struct tw_hyperloglog *hll); 81 | 82 | /** 83 | * Copy a source `struct tw_hyperloglog` into a specified destination. 84 | * 85 | * @param src non-null hyperloglog to copy from 86 | * @param dst non-null hyperloglog of the same precision has src to copy to 87 | * 88 | * @return `NULL` if copy failed, otherwise a pointer to dst 89 | * 90 | * @note group:hyperloglog 91 | */ 92 | struct tw_hyperloglog *tw_hyperloglog_copy(const struct tw_hyperloglog *src, 93 | struct tw_hyperloglog *dst); 94 | 95 | /** 96 | * Clone a `struct tw_hyperloglog` into a newly allocated one. 97 | * 98 | * @param hll non-null hyperloglog to clone 99 | * 100 | * @return `NULL` if failed, otherwise a newly allocated hyperloglog initialized 101 | * from the requested hyperloglog. The caller is responsible to 102 | * deallocate with tw_hyperloglog_free 103 | * 104 | * @note group:hyperloglog 105 | */ 106 | struct tw_hyperloglog *tw_hyperloglog_clone(const struct tw_hyperloglog *hll); 107 | 108 | /** 109 | * Add an element in a `struct tw_hyperloglog`. 110 | * 111 | * @param hll non-null hyperloglog to add the element to 112 | * @param key non-null buffer of the key to add 113 | * @param key_size positive integer size of the key to add 114 | * 115 | * @note group:hyperloglog 116 | */ 117 | void tw_hyperloglog_add(struct tw_hyperloglog *hll, const void *key, 118 | size_t key_size); 119 | 120 | /** 121 | * Estimate the number of elements in a `struct tw_hyperloglog`. 122 | * 123 | * @param hll non-null hyperloglog to estimate 124 | * 125 | * @return `0.0` if hll is NULL, otherwise the estimated number of elements 126 | * in hll. 127 | * 128 | * @note group:hyperloglog 129 | */ 130 | double tw_hyperloglog_count(const struct tw_hyperloglog *hll); 131 | 132 | /** 133 | * Verify if `struct tw_hyperloglog`s are equal. 134 | * 135 | * @param fst non-null first hyperloglog to check 136 | * @param snd non-null second hyperloglog to check 137 | * 138 | * @return `false` if any is null or not of the same precision, otherwise an 139 | * indicator if `src' and `dst' are equal 140 | * 141 | * @note group:hyperloglog 142 | */ 143 | bool tw_hyperloglog_equal(const struct tw_hyperloglog *fst, 144 | const struct tw_hyperloglog *snd); 145 | 146 | /** 147 | * Merge a `struct tw_hyperloglog` in a specified destination. 148 | * 149 | * The merge operation is an elemwise max applied to the buckets. 150 | * 151 | * @param src non-null hyperloglog to merge from 152 | * @param dst non-null hyperloglog to merge to 153 | * 154 | * @return `NULL` if any is null or not of the same precision, otherwise a 155 | * pointer to merged `dst' 156 | * 157 | * @note group:hyperloglog 158 | */ 159 | struct tw_hyperloglog *tw_hyperloglog_merge(const struct tw_hyperloglog *src, 160 | struct tw_hyperloglog *dst); 161 | #endif /* TWIDDLE_HYPERLOGLOG_H */ 162 | -------------------------------------------------------------------------------- /include/twiddle/utils/hash.h: -------------------------------------------------------------------------------- 1 | #ifndef TWIDDLE_UTILS_HASH_H 2 | #define TWIDDLE_UTILS_HASH_H 3 | 4 | #include 5 | #include 6 | 7 | typedef struct { 8 | uint64_t h; 9 | uint64_t l; 10 | } tw_uint128_t; 11 | 12 | uint64_t tw_hash_128_64(tw_uint128_t hash); 13 | 14 | uint64_t tw_metrohash_64(const uint64_t seed, const void *key, 15 | const size_t key_len); 16 | 17 | tw_uint128_t tw_metrohash_128(const uint64_t seed, const void *key, 18 | const size_t key_len); 19 | 20 | uint64_t tw_murmur3_64(const uint64_t seed, const void *key, 21 | const size_t key_len); 22 | 23 | tw_uint128_t tw_murmur3_128(const uint64_t seed, const void *key, 24 | const size_t key_len); 25 | 26 | #endif /* TWIDDLE_HASH_HASH_H */ 27 | -------------------------------------------------------------------------------- /include/twiddle/utils/projection.h: -------------------------------------------------------------------------------- 1 | #ifndef TWIDDLE_UTILS_PROJECTION_H 2 | #define TWIDDLE_UTILS_PROJECTION_H 3 | 4 | inline uint32_t tw_projection_mod_32(uint32_t a, uint32_t b) { return a % b; } 5 | 6 | inline uint64_t tw_projection_mod_64(uint64_t a, uint64_t b) { return a % b; } 7 | 8 | inline uint32_t tw_projection_mul_32(uint32_t a, uint32_t b) 9 | { 10 | return ((uint64_t)a * (uint64_t)b) >> 32; 11 | } 12 | 13 | inline uint64_t tw_projection_mul_64(uint64_t a, uint64_t b) 14 | { 15 | return ((__uint128_t)a * (__uint128_t)b) >> 64; 16 | } 17 | 18 | #endif /* TWIDDLE_UTILS_PROJECTION_H */ 19 | -------------------------------------------------------------------------------- /python/setup.py: -------------------------------------------------------------------------------- 1 | import sys 2 | from setuptools import setup, find_packages 3 | from setuptools.command.test import test as TestCommand 4 | 5 | class PyTest(TestCommand): 6 | '''Define how to use pytest to test the code''' 7 | 8 | def finalize_options(self): 9 | TestCommand.finalize_options(self) 10 | self.test_args = [] 11 | self.test_suite = True 12 | 13 | def run_tests(self): 14 | #import here, cause outside the eggs aren't loaded 15 | import pytest 16 | sys.exit(pytest.main(['tests'])) 17 | 18 | setup( 19 | name="twiddle", 20 | description="python bindings to libtwiddle", 21 | version="0.0.0", 22 | author="Francois Saint-Jacques", 23 | author_email="fsaintjacques@gmail.com", 24 | url="https://github.com/fsaintjacques/libtwiddle", 25 | py_modules=['twiddle'], 26 | license="LGPL License, version 3.0", 27 | packages=find_packages(exclude=['tests']), 28 | tests_require=['hypothesis==1.19.0', 'pytest'], 29 | cmdclass={'test': PyTest} 30 | ) 31 | -------------------------------------------------------------------------------- /python/tests/test_bitmap.py: -------------------------------------------------------------------------------- 1 | from hypothesis import given 2 | from test_helpers import TwiddleTest, single_set, double_set 3 | from twiddle import Bitmap 4 | 5 | class TestBitmap(TwiddleTest): 6 | @given(single_set) 7 | def test_bitmap_find_first_zero(self, n_xs): 8 | n, xs = n_xs 9 | x = Bitmap.from_indices(n, xs) 10 | 11 | expected = -1 if x.full() else min(set(range(0, n)) - xs) 12 | first = x.find_first_zero() 13 | assert(first == expected) 14 | 15 | 16 | @given(single_set) 17 | def test_bitmap_find_first_bit(self, n_xs): 18 | n, xs = n_xs 19 | x = Bitmap.from_indices(n, xs) 20 | 21 | expected = -1 if x.empty() else min(xs) 22 | first = x.find_first_bit() 23 | assert(first == expected) 24 | 25 | 26 | @given(single_set) 27 | def test_bitmap_negation(self, n_xs): 28 | n, xs = n_xs 29 | x = Bitmap.from_indices(n, xs) 30 | 31 | y = -x 32 | 33 | for idx in xs: 34 | assert(idx not in y) 35 | 36 | assert(x != y) 37 | assert(x == -y) 38 | 39 | 40 | @given(double_set) 41 | def test_bitmap_union(self, n_xs_ys): 42 | n, xs, ys = n_xs_ys 43 | x, y = Bitmap.from_indices(n, xs), Bitmap.from_indices(n, ys) 44 | 45 | # tests __or__ 46 | z = x | y 47 | assert(z == Bitmap.from_indices(n, xs | ys)) 48 | 49 | # tests __ior__ 50 | x |= y 51 | assert(x == z) 52 | 53 | 54 | @given(double_set) 55 | def test_bitmap_intersection(self, n_xs_ys): 56 | n, xs, ys = n_xs_ys 57 | x, y = Bitmap.from_indices(n, xs), Bitmap.from_indices(n, ys) 58 | 59 | # tests __and__ 60 | z = x & y 61 | assert(z == Bitmap.from_indices(n, xs & ys)) 62 | 63 | # tests __iand__ 64 | x &= y 65 | assert(x == z) 66 | 67 | 68 | @given(double_set) 69 | def test_bitmap_xor(self, n_xs_ys): 70 | n, xs, ys = n_xs_ys 71 | x, y = Bitmap.from_indices(n, xs), Bitmap.from_indices(n, ys) 72 | 73 | # tests __xor__ 74 | z = x ^ y 75 | assert(z == Bitmap.from_indices(n, xs ^ ys)) 76 | 77 | # tests __ixor__ 78 | x ^= y 79 | assert(x == z) 80 | -------------------------------------------------------------------------------- /python/tests/test_bitmap_rle.py: -------------------------------------------------------------------------------- 1 | from hypothesis import given, example 2 | from test_helpers import TwiddleTest, single_set, double_set 3 | from twiddle import BitmapRLE 4 | 5 | class TestBitmapRLE(TwiddleTest): 6 | @given(single_set) 7 | def test_bitmap_find_first_zero(self, n_xs): 8 | n, xs = n_xs 9 | x = BitmapRLE.from_indices(n, xs) 10 | 11 | expected = -1 if x.full() else min(set(range(0, n)) - xs) 12 | first = x.find_first_zero() 13 | assert(first == expected) 14 | 15 | 16 | @given(single_set) 17 | def test_bitmap_find_first_bit(self, n_xs): 18 | n, xs = n_xs 19 | x = BitmapRLE.from_indices(n, xs) 20 | 21 | expected = -1 if x.empty() else min(xs) 22 | first = x.find_first_bit() 23 | assert(first == expected) 24 | 25 | 26 | @given(single_set) 27 | def test_bitmap_negation(self, n_xs): 28 | n, xs = n_xs 29 | x = BitmapRLE.from_indices(n, xs) 30 | 31 | y = -x 32 | 33 | for idx in xs: 34 | assert(idx not in y) 35 | 36 | assert(x != y) 37 | assert(x == -y) 38 | 39 | 40 | @given(double_set) 41 | def test_bitmap_union(self, n_xs_ys): 42 | n, xs, ys = n_xs_ys 43 | x, y = BitmapRLE.from_indices(n, xs), BitmapRLE.from_indices(n, ys) 44 | 45 | # tests __or__ 46 | z = x | y 47 | assert(z == BitmapRLE.from_indices(n, xs | ys)) 48 | 49 | # tests __ior__ 50 | x |= y 51 | assert(x == z) 52 | 53 | 54 | @given(double_set) 55 | def test_bitmap_intersection(self, n_xs_ys): 56 | n, xs, ys = n_xs_ys 57 | x, y = BitmapRLE.from_indices(n, xs), BitmapRLE.from_indices(n, ys) 58 | 59 | # tests __and__ 60 | z = x & y 61 | assert(z == BitmapRLE.from_indices(n, xs & ys)) 62 | 63 | # tests __iand__ 64 | x &= y 65 | assert(x == z) 66 | -------------------------------------------------------------------------------- /python/tests/test_bloomfilter.py: -------------------------------------------------------------------------------- 1 | from hypothesis import given 2 | from test_helpers import TwiddleTest, single_set, double_set 3 | from twiddle import BloomFilter 4 | 5 | class TestBloomFilter(TwiddleTest): 6 | @given(single_set) 7 | def test_bloomfilter_negation(self, n_xs): 8 | n, xs = n_xs 9 | x = BloomFilter.from_iterable(n, 8, xs) 10 | y = -x 11 | assert(x == -y) 12 | 13 | 14 | @given(double_set) 15 | def test_bloomfilter_union(self, n_xs_ys): 16 | n, xs, ys = n_xs_ys 17 | x, y = BloomFilter.from_iterable(n, 8, xs), BloomFilter.from_iterable(n, 8, ys) 18 | 19 | # tests __or__ 20 | z = x | y 21 | assert(z == BloomFilter.from_iterable(n, 8, xs | ys)) 22 | 23 | # tests __ior__ 24 | x |= y 25 | assert(x == z) 26 | 27 | 28 | @given(double_set) 29 | def test_bloomfilter_intersection(self, n_xs_ys): 30 | n, xs, ys = n_xs_ys 31 | x, y = BloomFilter.from_iterable(n, 8, xs), BloomFilter.from_iterable(n, 8, ys) 32 | zs = xs & ys 33 | 34 | # tests __and__ 35 | z = x & y 36 | for e in zs: 37 | assert(e in z) 38 | 39 | # tests __iand__ 40 | x &= y 41 | for e in zs: 42 | assert(e in x) 43 | -------------------------------------------------------------------------------- /python/tests/test_bloomfilter_a2.py: -------------------------------------------------------------------------------- 1 | from hypothesis import given 2 | from test_helpers import TwiddleTest, single_set, double_set 3 | from twiddle import BloomFilterA2 4 | 5 | class TestBloomFilterA2(TwiddleTest): 6 | @given(single_set) 7 | def test_bloomfilter_a2(self, n_xs): 8 | n, xs = n_xs 9 | bf = BloomFilterA2(n, 8, 0.5) 10 | 11 | for x in xs: 12 | bf.set(x) 13 | assert(x in bf) 14 | -------------------------------------------------------------------------------- /python/tests/test_helpers.py: -------------------------------------------------------------------------------- 1 | from math import log 2 | from hypothesis.strategies import integers, sets, just 3 | from hypothesis.testrunners.forking import ForkingTestCase 4 | 5 | 6 | single_set = integers(min_value=2**8, max_value=2**16).flatmap(lambda n: 7 | (just(n), sets(integers(min_value=0, max_value=n-1), 8 | min_size=1, max_size=n, 9 | average_size=n/log(n, 2)))) 10 | 11 | 12 | double_set = integers(min_value=2**8, max_value=2**16).flatmap(lambda n: 13 | (just(n), sets(integers(min_value=0, max_value=n-1), 14 | min_size=1, max_size=n, 15 | average_size=n/log(n, 2)), 16 | sets(integers(min_value=0, max_value=n-1), 17 | min_size=1, max_size=n, 18 | average_size=n/log(n, 2)))) 19 | 20 | 21 | class TwiddleTest(ForkingTestCase): 22 | pass 23 | -------------------------------------------------------------------------------- /python/tests/test_hyperloglog.py: -------------------------------------------------------------------------------- 1 | from math import log 2 | from hypothesis import given 3 | from test_helpers import TwiddleTest, single_set, double_set 4 | from twiddle import HyperLogLog 5 | from unittest import TestCase 6 | 7 | class TestHyperLogLog(TestCase): 8 | @given(double_set) 9 | def test_bloomfilter_union(self, n_xs_ys): 10 | n, xs, ys = n_xs_ys 11 | n = int(log(n, 2)) 12 | x, y = HyperLogLog.from_iterable(n, xs), HyperLogLog.from_iterable(n, ys) 13 | 14 | # tests __or__ 15 | z = x | y 16 | assert(x != z) 17 | 18 | # tests __ior__ 19 | x |= y 20 | assert(x == z) 21 | -------------------------------------------------------------------------------- /python/tests/test_minhash.py: -------------------------------------------------------------------------------- 1 | from math import log 2 | from hypothesis import given 3 | from test_helpers import double_set 4 | from twiddle import MinHash 5 | from unittest import TestCase 6 | 7 | class TestMinHash(TestCase): 8 | @given(double_set) 9 | def test_bloomfilter_union(self, n_xs_ys): 10 | n, xs, ys = n_xs_ys 11 | x, y = MinHash.from_iterable(n, xs), MinHash.from_iterable(n, ys) 12 | 13 | # tests __or__ 14 | z = x | y 15 | assert(x != z) 16 | 17 | # tests __ior__ 18 | x |= y 19 | assert(x == z) 20 | -------------------------------------------------------------------------------- /python/twiddle/__init__.py: -------------------------------------------------------------------------------- 1 | from bitmap import Bitmap 2 | from bitmap_rle import BitmapRLE 3 | from bloomfilter import BloomFilter 4 | from bloomfilter_a2 import BloomFilterA2 5 | from hyperloglog import HyperLogLog 6 | from minhash import MinHash 7 | 8 | __all__ = [ 'Bitmap', 9 | 'BitmapRLE', 10 | 'BloomFilter', 11 | 'BloomFilterA2', 12 | 'HyperLogLog', 13 | 'MinHash'] 14 | -------------------------------------------------------------------------------- /python/twiddle/bitmap.py: -------------------------------------------------------------------------------- 1 | from c import libtwiddle 2 | 3 | class Bitmap(object): 4 | def __init__(self, size, ptr=None): 5 | self.bitmap = ptr if ptr else libtwiddle.tw_bitmap_new(size) 6 | self.size = size 7 | 8 | 9 | def __del__(self): 10 | if self.bitmap: 11 | libtwiddle.tw_bitmap_free(self.bitmap) 12 | 13 | 14 | @classmethod 15 | def copy(cls, b): 16 | return cls(b.size, ptr=libtwiddle.tw_bitmap_clone(b.bitmap)) 17 | 18 | 19 | @classmethod 20 | def from_indices(cls, size, indices): 21 | bitmap = Bitmap(size) 22 | 23 | for idx in indices: 24 | bitmap[idx] = True 25 | 26 | return bitmap 27 | 28 | 29 | def __len__(self): 30 | return self.size 31 | 32 | 33 | def __getitem__(self, i): 34 | if (i < 0) or (i >= len(self)): 35 | raise ValueError("index must be within bitmap bounds") 36 | return libtwiddle.tw_bitmap_test(self.bitmap, i) 37 | 38 | 39 | def __setitem__(self, i, value): 40 | if (i < 0) or (i >= len(self)): 41 | raise ValueError("index must be within bitmap bounds") 42 | 43 | if not isinstance(value, bool): 44 | raise ValueError("Bitmap accepts only bool values") 45 | 46 | if value: 47 | libtwiddle.tw_bitmap_set(self.bitmap, i) 48 | else: 49 | libtwiddle.tw_bitmap_clear(self.bitmap, i) 50 | 51 | 52 | def __contains__(self, x): 53 | if (x < 0) or (x > self.size - 1): 54 | return False 55 | 56 | return self[x] 57 | 58 | 59 | def __eq__(self, other): 60 | if not isinstance(other, Bitmap): 61 | return False 62 | 63 | return libtwiddle.tw_bitmap_equal(self.bitmap, other.bitmap) 64 | 65 | 66 | def __neg__(self): 67 | ret = Bitmap.copy(self) 68 | libtwiddle.tw_bitmap_not(ret.bitmap) 69 | return ret 70 | 71 | 72 | def __op(self, other, func, copy=lambda x: Bitmap.copy(x)): 73 | if not isinstance(other, Bitmap): 74 | raise ValueError("Must compare Bitmap to Bitmap") 75 | 76 | if self.size != other.size: 77 | raise ValueError("Bitmaps must be of equal size to be comparable") 78 | 79 | ret = copy(self) 80 | 81 | func(other.bitmap, ret.bitmap) 82 | 83 | return ret 84 | 85 | 86 | def __iop(self, other, func): 87 | return self.__op(other, func, copy=lambda x: x) 88 | 89 | 90 | def __or__(self, other): 91 | return self.__op(other, libtwiddle.tw_bitmap_union) 92 | 93 | 94 | def __ior__(self, other): 95 | return self.__iop(other, libtwiddle.tw_bitmap_union) 96 | 97 | 98 | def __and__(self, other): 99 | return self.__op(other, libtwiddle.tw_bitmap_intersection) 100 | 101 | 102 | def __iand__(self, other): 103 | return self.__iop(other, libtwiddle.tw_bitmap_intersection) 104 | 105 | 106 | def __xor__(self, other): 107 | return self.__op(other, libtwiddle.tw_bitmap_xor) 108 | 109 | 110 | def __ixor__(self, other): 111 | return self.__iop(other, libtwiddle.tw_bitmap_xor) 112 | 113 | 114 | def empty(self): 115 | return libtwiddle.tw_bitmap_empty(self.bitmap) 116 | 117 | 118 | def full(self): 119 | return libtwiddle.tw_bitmap_full(self.bitmap) 120 | 121 | 122 | def count(self): 123 | return libtwiddle.tw_bitmap_count(self.bitmap) 124 | 125 | 126 | def density(self): 127 | return libtwiddle.tw_bitmap_density(self.bitmap) 128 | 129 | 130 | def zero(self): 131 | libtwiddle.tw_bitmap_zero(self.bitmap) 132 | 133 | 134 | def fill(self): 135 | libtwiddle.tw_bitmap_fill(self.bitmap) 136 | 137 | 138 | def find_first_zero(self): 139 | return libtwiddle.tw_bitmap_find_first_zero(self.bitmap) 140 | 141 | 142 | def find_first_bit(self): 143 | return libtwiddle.tw_bitmap_find_first_bit(self.bitmap) 144 | -------------------------------------------------------------------------------- /python/twiddle/bitmap_rle.py: -------------------------------------------------------------------------------- 1 | from c import libtwiddle 2 | 3 | class BitmapRLE(object): 4 | def __init__(self, size, ptr=None): 5 | self.bitmap = ptr if ptr else libtwiddle.tw_bitmap_rle_new(size) 6 | self.size = size 7 | 8 | 9 | def __del__(self): 10 | if self.bitmap: 11 | libtwiddle.tw_bitmap_rle_free(self.bitmap) 12 | 13 | 14 | @classmethod 15 | def copy(cls, b): 16 | return cls(b.size, ptr=libtwiddle.tw_bitmap_rle_clone(b.bitmap)) 17 | 18 | 19 | @classmethod 20 | def from_indices(cls, size, indices): 21 | bitmap = BitmapRLE(size) 22 | 23 | for idx in sorted(indices): 24 | bitmap[idx] = True 25 | 26 | return bitmap 27 | 28 | 29 | def __len__(self): 30 | return self.size 31 | 32 | 33 | def __getitem__(self, i): 34 | if (i < 0) or (i >= len(self)): 35 | raise ValueError("index must be within bitmap bounds") 36 | return libtwiddle.tw_bitmap_rle_test(self.bitmap, i) 37 | 38 | 39 | def __setitem__(self, i, value): 40 | if (i < 0) or (i >= len(self)): 41 | raise ValueError("index must be within bitmap bounds") 42 | 43 | if not isinstance(value, bool): 44 | raise ValueError("BitmapRLE accepts only bool values") 45 | 46 | if value: 47 | libtwiddle.tw_bitmap_rle_set(self.bitmap, i) 48 | 49 | 50 | def __contains__(self, x): 51 | if (x < 0) or (x > self.size - 1): 52 | return False 53 | 54 | return self[x] 55 | 56 | 57 | def __eq__(self, other): 58 | if not isinstance(other, BitmapRLE): 59 | return False 60 | 61 | return libtwiddle.tw_bitmap_rle_equal(self.bitmap, other.bitmap) 62 | 63 | 64 | def __neg__(self): 65 | ret = BitmapRLE.copy(self) 66 | libtwiddle.tw_bitmap_rle_not(self.bitmap, ret.bitmap) 67 | return ret 68 | 69 | 70 | def __op(self, other, func, copy=lambda x: BitmapRLE.copy(x)): 71 | if not isinstance(other, BitmapRLE): 72 | raise ValueError("Must compare BitmapRLE to BitmapRLE") 73 | 74 | if self.size != other.size: 75 | raise ValueError("BitmapRLE must be of equal size to be comparable") 76 | 77 | ret = copy(self) 78 | 79 | func(self.bitmap, other.bitmap, ret.bitmap) 80 | 81 | return ret 82 | 83 | 84 | def __iop(self, other, func): 85 | return self.__op(other, func) 86 | 87 | 88 | def __or__(self, other): 89 | return self.__op(other, libtwiddle.tw_bitmap_rle_union) 90 | 91 | 92 | def __ior__(self, other): 93 | return self.__iop(other, libtwiddle.tw_bitmap_rle_union) 94 | 95 | 96 | def __and__(self, other): 97 | return self.__op(other, libtwiddle.tw_bitmap_rle_intersection) 98 | 99 | 100 | def __iand__(self, other): 101 | return self.__iop(other, libtwiddle.tw_bitmap_rle_intersection) 102 | 103 | 104 | def empty(self): 105 | return libtwiddle.tw_bitmap_rle_empty(self.bitmap) 106 | 107 | 108 | def full(self): 109 | return libtwiddle.tw_bitmap_rle_full(self.bitmap) 110 | 111 | 112 | def count(self): 113 | return libtwiddle.tw_bitmap_rle_count(self.bitmap) 114 | 115 | 116 | def density(self): 117 | return libtwiddle.tw_bitmap_rle_density(self.bitmap) 118 | 119 | 120 | def zero(self): 121 | libtwiddle.tw_bitmap_rle_zero(self.bitmap) 122 | 123 | 124 | def fill(self): 125 | libtwiddle.tw_bitmap_rle_fill(self.bitmap) 126 | 127 | 128 | def find_first_zero(self): 129 | return libtwiddle.tw_bitmap_rle_find_first_zero(self.bitmap) 130 | 131 | 132 | def find_first_bit(self): 133 | return libtwiddle.tw_bitmap_rle_find_first_bit(self.bitmap) 134 | -------------------------------------------------------------------------------- /python/twiddle/bloomfilter.py: -------------------------------------------------------------------------------- 1 | from c import libtwiddle 2 | from ctypes import c_int, c_long, pointer 3 | 4 | class BloomFilter(object): 5 | def __init__(self, size, k, ptr=None): 6 | self.bloomfilter = ptr if ptr else libtwiddle.tw_bloomfilter_new(size, k) 7 | self.size = size 8 | self.k = k 9 | 10 | 11 | def __del__(self): 12 | if self.bloomfilter: 13 | libtwiddle.tw_bloomfilter_free(self.bloomfilter) 14 | 15 | 16 | @classmethod 17 | def copy(cls, b): 18 | return cls(b.size, b.k, ptr=libtwiddle.tw_bloomfilter_clone(b.bloomfilter)) 19 | 20 | 21 | @classmethod 22 | def from_iterable(cls, size, k, iterable): 23 | bloomfilter = BloomFilter(size, k) 24 | 25 | for i in iterable: 26 | bloomfilter.set(i) 27 | 28 | return bloomfilter 29 | 30 | 31 | def __len__(self): 32 | return self.size 33 | 34 | 35 | def __getitem__(self, x): 36 | h = pointer(c_long(hash(x))) 37 | return libtwiddle.tw_bloomfilter_test(self.bloomfilter, h, 8) 38 | 39 | 40 | def set(self, x): 41 | h = pointer(c_long(hash(x))) 42 | libtwiddle.tw_bloomfilter_set(self.bloomfilter, h, 8) 43 | 44 | 45 | def test(self, x): 46 | return self[x] 47 | 48 | 49 | def __contains__(self, x): 50 | return self[x] 51 | 52 | 53 | def __eq__(self, other): 54 | if not isinstance(other, BloomFilter): 55 | return False 56 | 57 | return libtwiddle.tw_bloomfilter_equal(self.bloomfilter, other.bloomfilter) 58 | 59 | 60 | def __neg__(self): 61 | ret = BloomFilter.copy(self) 62 | libtwiddle.tw_bloomfilter_not(ret.bloomfilter) 63 | return ret 64 | 65 | 66 | def __op(self, other, func, copy=lambda x: BloomFilter.copy(x)): 67 | if not isinstance(other, BloomFilter): 68 | raise ValueError("Must compare BloomFilter to BloomFilter") 69 | 70 | if self.size != other.size: 71 | raise ValueError("BloomFilters must be of equal size to be comparable") 72 | 73 | ret = copy(self) 74 | 75 | func(other.bloomfilter, ret.bloomfilter) 76 | 77 | return ret 78 | 79 | 80 | def __iop(self, other, func): 81 | return self.__op(other, func, copy=lambda x: x) 82 | 83 | 84 | def __or__(self, other): 85 | return self.__op(other, libtwiddle.tw_bloomfilter_union) 86 | 87 | 88 | def __ior__(self, other): 89 | return self.__iop(other, libtwiddle.tw_bloomfilter_union) 90 | 91 | 92 | def __and__(self, other): 93 | return self.__op(other, libtwiddle.tw_bloomfilter_intersection) 94 | 95 | 96 | def __iand__(self, other): 97 | return self.__iop(other, libtwiddle.tw_bloomfilter_intersection) 98 | 99 | 100 | def __xor__(self, other): 101 | return self.__op(other, libtwiddle.tw_bloomfilter_xor) 102 | 103 | 104 | def __ixor__(self, other): 105 | return self.__iop(other, libtwiddle.tw_bloomfilter_xor) 106 | 107 | 108 | def empty(self): 109 | return libtwiddle.tw_bloomfilter_empty(self.bloomfilter) 110 | 111 | 112 | def full(self): 113 | return libtwiddle.tw_bloomfilter_full(self.bloomfilter) 114 | 115 | 116 | def count(self): 117 | return libtwiddle.tw_bloomfilter_count(self.bloomfilter) 118 | 119 | 120 | def density(self): 121 | return libtwiddle.tw_bloomfilter_density(self.bloomfilter) 122 | 123 | 124 | def zero(self): 125 | libtwiddle.tw_bloomfilter_zero(self.bloomfilter) 126 | 127 | 128 | def fill(self): 129 | libtwiddle.tw_bloomfilter_fill(self.bloomfilter) 130 | -------------------------------------------------------------------------------- /python/twiddle/bloomfilter_a2.py: -------------------------------------------------------------------------------- 1 | from c import libtwiddle 2 | from ctypes import c_int, c_long, pointer 3 | 4 | class BloomFilterA2(object): 5 | def __init__(self, size, k, density, ptr=None): 6 | self.bloomfilter = ptr if ptr else libtwiddle.tw_bloomfilter_a2_new(size, k, density) 7 | self.size = size 8 | self.k = k 9 | self.density = density 10 | 11 | 12 | def __del__(self): 13 | if self.bloomfilter: 14 | libtwiddle.tw_bloomfilter_a2_free(self.bloomfilter) 15 | 16 | 17 | @classmethod 18 | def copy(cls, b): 19 | return cls(b.size, b.k, ptr=libtwiddle.tw_bloomfilter_a2_clone(b.bloomfilter)) 20 | 21 | 22 | @classmethod 23 | def from_iterable(cls, size, k, density, iterable): 24 | bloomfilter = BloomFilterA2(size, k, density) 25 | 26 | for i in iterable: 27 | bloomfilter.set(i) 28 | 29 | return bloomfilter 30 | 31 | 32 | def __len__(self): 33 | return self.size 34 | 35 | 36 | def __getitem__(self, x): 37 | h = pointer(c_long(hash(x))) 38 | return libtwiddle.tw_bloomfilter_a2_test(self.bloomfilter, h, 8) 39 | 40 | 41 | def set(self, x): 42 | h = pointer(c_long(hash(x))) 43 | libtwiddle.tw_bloomfilter_a2_set(self.bloomfilter, h, 8) 44 | 45 | 46 | def test(self, x): 47 | return self[x] 48 | 49 | 50 | def __contains__(self, x): 51 | return self[x] 52 | 53 | 54 | def __eq__(self, other): 55 | if not isinstance(other, BloomFilterA2): 56 | return False 57 | 58 | return libtwiddle.tw_bloomfilter_a2_equal(self.bloomfilter, other.bloomfilter) 59 | 60 | 61 | def __neg__(self): 62 | ret = BloomFilterA2.copy(self) 63 | libtwiddle.tw_bloomfilter_a2_not(ret.bloomfilter) 64 | return ret 65 | 66 | 67 | def __op(self, other, func, copy=lambda x: BloomFilterA2.copy(x)): 68 | if not isinstance(other, BloomFilterA2): 69 | raise ValueError("Must compare BloomFilterA2 to BloomFilterA2") 70 | 71 | if self.size != other.size: 72 | raise ValueError("BloomFiltersA2 must be of equal size to be comparable") 73 | 74 | ret = copy(self) 75 | 76 | func(other.bloomfilter, ret.bloomfilter) 77 | 78 | return ret 79 | 80 | 81 | def __iop(self, other, func): 82 | return self.__op(other, func, copy=lambda x: x) 83 | 84 | 85 | def __or__(self, other): 86 | return self.__op(other, libtwiddle.tw_bloomfilter_a2_union) 87 | 88 | 89 | def __ior__(self, other): 90 | return self.__iop(other, libtwiddle.tw_bloomfilter_a2_union) 91 | 92 | 93 | def __and__(self, other): 94 | return self.__op(other, libtwiddle.tw_bloomfilter_a2_intersection) 95 | 96 | 97 | def __iand__(self, other): 98 | return self.__iop(other, libtwiddle.tw_bloomfilter_a2_intersection) 99 | 100 | 101 | def __xor__(self, other): 102 | return self.__op(other, libtwiddle.tw_bloomfilter_a2_xor) 103 | 104 | 105 | def __ixor__(self, other): 106 | return self.__iop(other, libtwiddle.tw_bloomfilter_a2_xor) 107 | 108 | 109 | def empty(self): 110 | return libtwiddle.tw_bloomfilter_a2_empty(self.bloomfilter) 111 | 112 | 113 | def full(self): 114 | return libtwiddle.tw_bloomfilter_a2_full(self.bloomfilter) 115 | 116 | 117 | def count(self): 118 | return libtwiddle.tw_bloomfilter_a2_count(self.bloomfilter) 119 | 120 | 121 | def density(self): 122 | return libtwiddle.tw_bloomfilter_a2_density(self.bloomfilter) 123 | 124 | 125 | def zero(self): 126 | libtwiddle.tw_bloomfilter_a2_zero(self.bloomfilter) 127 | 128 | 129 | def fill(self): 130 | libtwiddle.tw_bloomfilter_a2_fill(self.bloomfilter) 131 | -------------------------------------------------------------------------------- /python/twiddle/hyperloglog.py: -------------------------------------------------------------------------------- 1 | from c import libtwiddle 2 | from ctypes import c_long, pointer 3 | 4 | class HyperLogLog(object): 5 | def __init__(self, precision, ptr=None): 6 | self.hyperloglog = ptr if ptr else libtwiddle.tw_hyperloglog_new(precision) 7 | self.precision = precision 8 | 9 | 10 | def __del__(self): 11 | if self.hyperloglog: 12 | libtwiddle.tw_hyperloglog_free(self.hyperloglog) 13 | 14 | 15 | @classmethod 16 | def copy(cls, h): 17 | return cls(h.precision, ptr=libtwiddle.tw_hyperloglog_clone(h.hyperloglog)) 18 | 19 | 20 | @classmethod 21 | def from_iterable(cls, precision, iterable): 22 | hyperloglog = HyperLogLog(precision) 23 | 24 | for i in iterable: 25 | hyperloglog.add(i) 26 | 27 | return hyperloglog 28 | 29 | 30 | def __len__(self): 31 | return self.count() 32 | 33 | 34 | def add(self, x): 35 | h = pointer(c_long(hash(x))) 36 | libtwiddle.tw_hyperloglog_add(self.hyperloglog, h, 8) 37 | 38 | 39 | def __eq__(self, other): 40 | if not isinstance(other, HyperLogLog): 41 | return False 42 | 43 | return libtwiddle.tw_hyperloglog_equal(self.hyperloglog, other.hyperloglog) 44 | 45 | 46 | def __op(self, other, func, copy=lambda x: HyperLogLog.copy(x)): 47 | if not isinstance(other, HyperLogLog): 48 | raise ValueError("Must compare HyperLogLog to HyperLogLog") 49 | 50 | if self.precision != other.precision: 51 | raise ValueError("HyperLogLogs must be of equal precision to be comparable") 52 | 53 | ret = copy(self) 54 | 55 | func(other.hyperloglog, ret.hyperloglog) 56 | 57 | return ret 58 | 59 | 60 | def __iop(self, other, func): 61 | return self.__op(other, func, copy=lambda x: x) 62 | 63 | 64 | def __or__(self, other): 65 | return self.__op(other, libtwiddle.tw_hyperloglog_merge) 66 | 67 | 68 | def __ior__(self, other): 69 | return self.__iop(other, libtwiddle.tw_hyperloglog_merge) 70 | 71 | 72 | def count(self): 73 | return libtwiddle.tw_hyperloglog_count(self.hyperloglog) 74 | -------------------------------------------------------------------------------- /python/twiddle/minhash.py: -------------------------------------------------------------------------------- 1 | from c import libtwiddle 2 | from ctypes import c_long, pointer 3 | 4 | class MinHash(object): 5 | def __init__(self, n_registers, ptr=None): 6 | self.minhash = ptr if ptr else libtwiddle.tw_minhash_new(n_registers) 7 | self.n_registers = n_registers 8 | 9 | 10 | def __del__(self): 11 | if self.minhash: 12 | libtwiddle.tw_minhash_free(self.minhash) 13 | 14 | 15 | @classmethod 16 | def copy(cls, h): 17 | return cls(h.n_registers, ptr=libtwiddle.tw_minhash_clone(h.minhash)) 18 | 19 | 20 | @classmethod 21 | def from_iterable(cls, n_registers, iterable): 22 | minhash = MinHash(n_registers) 23 | 24 | for i in iterable: 25 | minhash.add(i) 26 | 27 | return minhash 28 | 29 | 30 | def add(self, x): 31 | h = pointer(c_long(hash(x))) 32 | libtwiddle.tw_minhash_add(self.minhash, h, 8) 33 | 34 | 35 | def __eq__(self, other): 36 | if not isinstance(other, MinHash): 37 | return False 38 | 39 | return libtwiddle.tw_minhash_equal(self.minhash, other.minhash) 40 | 41 | 42 | def __op(self, other, func, copy=lambda x: MinHash.copy(x)): 43 | if not isinstance(other, MinHash): 44 | raise ValueError("Must compare MinHash to MinHash") 45 | 46 | if self.n_registers != other.n_registers: 47 | raise ValueError("MinHashs must be of equal n_registers to be comparable") 48 | 49 | ret = copy(self) 50 | 51 | func(other.minhash, ret.minhash) 52 | 53 | return ret 54 | 55 | 56 | def __iop(self, other, func): 57 | return self.__op(other, func, copy=lambda x: x) 58 | 59 | 60 | def __or__(self, other): 61 | return self.__op(other, libtwiddle.tw_minhash_merge) 62 | 63 | 64 | def __ior__(self, other): 65 | return self.__iop(other, libtwiddle.tw_minhash_merge) 66 | 67 | 68 | def estimate(self, other): 69 | if not isinstance(other, MinHash): 70 | raise ValueError("Must compare MinHash to MinHash") 71 | 72 | return libtwiddle.tw_minhash_estimate(self.minhash, other.minhash) 73 | -------------------------------------------------------------------------------- /src/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Update the VERSION property below according to the following rules (taken from 2 | # [1]): 3 | # 4 | # VERSION = current.revision.age 5 | # 6 | # 1. Start with a VERSION of `0.0.0` for each shared library. 7 | # 2. Update VERSION only immediately before a public release of your software. 8 | # More frequent updates are unnecessary, and only guarantee that the 9 | # current interface number gets larger faster. 10 | # 3. If the library source code has changed at all since the last update, then 11 | # increment `revision` (`c.r.a` becomes `c.r+1.a`). 12 | # 4. If any interfaces have been added, removed, or changed since the last 13 | # update, increment `current`, and set `revision` to 0. 14 | # 5. If any interfaces have been added since the last public release, then 15 | # increment `age`. 16 | # 6. If any interfaces have been removed or changed since the last public 17 | # release, then set `age` to 0. 18 | # 19 | # Note that changing `current` means that you are releasing a new 20 | # backwards-incompatible version of the library. This has implications on 21 | # packaging, so once an API has stabilized, this should be a rare occurrence. 22 | # 23 | # [1] http://www.gnu.org/software/libtool/manual/html_node/Updating-version-info.html#Updating-version-info 24 | set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} -funroll-loops") 25 | add_c_library( 26 | libtwiddle 27 | OUTPUT_NAME twiddle 28 | PKGCONFIG_NAME libtwiddle 29 | VERSION 1.0.0 30 | SOURCES 31 | twiddle/bitmap/bitmap.c 32 | twiddle/bitmap/bitmap_rle.c 33 | twiddle/bloomfilter/bloomfilter.c 34 | twiddle/bloomfilter/bloomfilter_a2.c 35 | twiddle/hyperloglog/hyperloglog.c 36 | twiddle/hyperloglog/hyperloglog_bias.c 37 | twiddle/hash/minhash.c 38 | twiddle/utils/hash.c 39 | twiddle/utils/murmur3.c 40 | twiddle/utils/metrohash.c 41 | ) 42 | -------------------------------------------------------------------------------- /src/libtwiddle.pc.in: -------------------------------------------------------------------------------- 1 | prefix=@prefix@ 2 | exec_prefix=${prefix} 3 | libdir=${exec_prefix}/lib 4 | includedir=${prefix}/include 5 | sharedir=${prefix}/share 6 | 7 | Name: libtwiddle 8 | Description: library to help you twiddle bits. 9 | Version: @VERSION@ 10 | URL: https://github.com/fsaintjacques/libtwiddle 11 | Libs: -L${libdir} -ltwiddle -lm 12 | Cflags: -I${includedir} 13 | -------------------------------------------------------------------------------- /src/twiddle/bloomfilter/bloomfilter.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | #define TW_BF_DEFAULT_SEED 3781869495ULL 9 | 10 | struct tw_bloomfilter *tw_bloomfilter_new(uint64_t size, uint16_t k) 11 | { 12 | if (!size || size > TW_BITMAP_MAX_BITS || !k) { 13 | return NULL; 14 | } 15 | 16 | struct tw_bloomfilter *bf = calloc(1, sizeof(struct tw_bloomfilter)); 17 | if (!bf) { 18 | return NULL; 19 | } 20 | 21 | bf->bitmap = tw_bitmap_new(size); 22 | if (!(bf->bitmap)) { 23 | free(bf); 24 | return NULL; 25 | } 26 | 27 | bf->k = k; 28 | 29 | return bf; 30 | } 31 | 32 | void tw_bloomfilter_free(struct tw_bloomfilter *bf) 33 | { 34 | if (!bf) { 35 | return; 36 | } 37 | 38 | tw_bitmap_free(bf->bitmap); 39 | free(bf); 40 | } 41 | 42 | struct tw_bloomfilter *tw_bloomfilter_copy(const struct tw_bloomfilter *src, 43 | struct tw_bloomfilter *dst) 44 | { 45 | if (!src || !dst || dst->bitmap->size != src->bitmap->size) { 46 | return NULL; 47 | } 48 | 49 | dst->k = src->k; 50 | 51 | if (!tw_bitmap_copy(src->bitmap, dst->bitmap)) { 52 | return NULL; 53 | } 54 | 55 | return dst; 56 | } 57 | 58 | struct tw_bloomfilter *tw_bloomfilter_clone(const struct tw_bloomfilter *bf) 59 | { 60 | if (!bf) { 61 | return NULL; 62 | } 63 | 64 | struct tw_bloomfilter *new = tw_bloomfilter_new(bf->bitmap->size, bf->k); 65 | if (!new) { 66 | return NULL; 67 | } 68 | 69 | return tw_bloomfilter_copy(bf, new); 70 | } 71 | 72 | void tw_bloomfilter_set(struct tw_bloomfilter *bf, const void *key, 73 | size_t key_size) 74 | { 75 | if (!bf || !key || !key_size) { 76 | return; 77 | } 78 | 79 | const tw_uint128_t hash = tw_metrohash_128(TW_BF_DEFAULT_SEED, key, key_size); 80 | const uint16_t k = bf->k; 81 | struct tw_bitmap *bitmap = bf->bitmap; 82 | const uint64_t b_size = bitmap->size; 83 | 84 | for (size_t i = 0; i < k; ++i) { 85 | const uint64_t hash_fn_i = hash.h + (i * hash.l); 86 | const uint64_t idx = tw_projection_mul_64(hash_fn_i, b_size); 87 | tw_bitmap_set(bitmap, idx); 88 | } 89 | } 90 | 91 | bool tw_bloomfilter_test(const struct tw_bloomfilter *bf, const void *key, 92 | size_t key_size) 93 | { 94 | if (!bf || !key || !key_size) { 95 | return false; 96 | } 97 | 98 | const tw_uint128_t hash = tw_metrohash_128(TW_BF_DEFAULT_SEED, key, key_size); 99 | 100 | const uint16_t k = bf->k; 101 | const struct tw_bitmap *bitmap = bf->bitmap; 102 | const uint64_t b_size = bitmap->size; 103 | 104 | for (size_t i = 0; i < k; ++i) { 105 | const uint64_t hash_fn_i = hash.h + (i * hash.l); 106 | const uint64_t idx = tw_projection_mul_64(hash_fn_i, b_size); 107 | if (!tw_bitmap_test(bitmap, idx)) { 108 | return false; 109 | } 110 | } 111 | 112 | return true; 113 | } 114 | 115 | bool tw_bloomfilter_empty(const struct tw_bloomfilter *bf) 116 | { 117 | if (!bf) { 118 | return false; 119 | } 120 | 121 | return tw_bitmap_empty(bf->bitmap); 122 | } 123 | 124 | bool tw_bloomfilter_full(const struct tw_bloomfilter *bf) 125 | { 126 | if (!bf) { 127 | return false; 128 | } 129 | 130 | return tw_bitmap_full(bf->bitmap); 131 | } 132 | 133 | uint64_t tw_bloomfilter_count(const struct tw_bloomfilter *bf) 134 | { 135 | if (!bf) { 136 | return 0; 137 | } 138 | 139 | return tw_bitmap_count(bf->bitmap); 140 | } 141 | 142 | float tw_bloomfilter_density(const struct tw_bloomfilter *bf) 143 | { 144 | if (!bf) { 145 | return 0.0f; 146 | } 147 | 148 | return tw_bitmap_density(bf->bitmap); 149 | } 150 | 151 | struct tw_bloomfilter *tw_bloomfilter_zero(struct tw_bloomfilter *bf) 152 | { 153 | if (!bf) { 154 | return NULL; 155 | } 156 | 157 | return (tw_bitmap_zero(bf->bitmap)) ? bf : NULL; 158 | } 159 | 160 | struct tw_bloomfilter *tw_bloomfilter_fill(struct tw_bloomfilter *bf) 161 | { 162 | if (!bf) { 163 | return NULL; 164 | } 165 | 166 | return (tw_bitmap_fill(bf->bitmap)) ? bf : NULL; 167 | } 168 | 169 | struct tw_bloomfilter *tw_bloomfilter_not(struct tw_bloomfilter *bf) 170 | { 171 | if (!bf) { 172 | return NULL; 173 | } 174 | 175 | return (tw_bitmap_not(bf->bitmap)) ? bf : NULL; 176 | } 177 | 178 | bool tw_bloomfilter_equal(const struct tw_bloomfilter *a, 179 | const struct tw_bloomfilter *b) 180 | { 181 | if (!a || !b) { 182 | return false; 183 | } 184 | 185 | return (a->k == b->k) && tw_bitmap_equal(a->bitmap, b->bitmap); 186 | } 187 | 188 | struct tw_bloomfilter *tw_bloomfilter_union(const struct tw_bloomfilter *src, 189 | struct tw_bloomfilter *dst) 190 | { 191 | if (!src || !dst || src->k != dst->k) { 192 | return false; 193 | } 194 | 195 | return (tw_bitmap_union(src->bitmap, dst->bitmap)) ? dst : NULL; 196 | } 197 | 198 | struct tw_bloomfilter * 199 | tw_bloomfilter_intersection(const struct tw_bloomfilter *src, 200 | struct tw_bloomfilter *dst) 201 | { 202 | if (!src || !dst || src->k != dst->k) { 203 | return false; 204 | } 205 | 206 | return (tw_bitmap_intersection(src->bitmap, dst->bitmap)) ? dst : NULL; 207 | } 208 | 209 | struct tw_bloomfilter *tw_bloomfilter_xor(const struct tw_bloomfilter *src, 210 | struct tw_bloomfilter *dst) 211 | { 212 | if (!src || !dst || src->k != dst->k) { 213 | return false; 214 | } 215 | 216 | return (tw_bitmap_xor(src->bitmap, dst->bitmap)) ? dst : NULL; 217 | } 218 | -------------------------------------------------------------------------------- /src/twiddle/bloomfilter/bloomfilter_a2.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include "../macrology.h" 6 | 7 | struct tw_bloomfilter_a2 *tw_bloomfilter_a2_new(uint64_t size, uint16_t k, 8 | float density) 9 | { 10 | if ((!size || size > TW_BITMAP_MAX_BITS) || !k || 11 | (density <= 0.0 || density > 1.0)) { 12 | return NULL; 13 | } 14 | 15 | struct tw_bloomfilter_a2 *bf = calloc(1, sizeof(struct tw_bloomfilter_a2)); 16 | 17 | struct tw_bloomfilter *active = tw_bloomfilter_new(size, k); 18 | if (!active) { 19 | free(bf); 20 | return NULL; 21 | } 22 | 23 | struct tw_bloomfilter *passive = tw_bloomfilter_new(size, k); 24 | if (!passive) { 25 | free(bf); 26 | free(active); 27 | return NULL; 28 | } 29 | 30 | bf->density = density; 31 | bf->active = active; 32 | bf->passive = passive; 33 | 34 | return bf; 35 | } 36 | 37 | struct tw_bloomfilter_a2 * 38 | tw_bloomfilter_a2_copy(const struct tw_bloomfilter_a2 *src, 39 | struct tw_bloomfilter_a2 *dst) 40 | { 41 | if (!src || !dst) { 42 | return NULL; 43 | } 44 | 45 | if (!tw_bloomfilter_copy(src->active, dst->active) || 46 | !tw_bloomfilter_copy(src->passive, dst->passive)) { 47 | return NULL; 48 | } 49 | 50 | return dst; 51 | } 52 | 53 | struct tw_bloomfilter_a2 * 54 | tw_bloomfilter_a2_clone(const struct tw_bloomfilter_a2 *bf) 55 | { 56 | if (!bf) { 57 | return NULL; 58 | } 59 | 60 | struct tw_bloomfilter_a2 *new = tw_bloomfilter_a2_new( 61 | bf->active->bitmap->size, bf->active->k, bf->density); 62 | if (!new) { 63 | return NULL; 64 | } 65 | 66 | return tw_bloomfilter_a2_copy(bf, new); 67 | } 68 | 69 | void tw_bloomfilter_a2_free(struct tw_bloomfilter_a2 *bf) 70 | { 71 | if (!bf) { 72 | return; 73 | } 74 | 75 | tw_bloomfilter_free(bf->active); 76 | tw_bloomfilter_free(bf->passive); 77 | free(bf); 78 | } 79 | 80 | static inline bool tw_bloomfilter_a2_rotate_(struct tw_bloomfilter_a2 *bf) 81 | { 82 | if (tw_unlikely(tw_bloomfilter_density(bf->active) >= bf->density)) { 83 | struct tw_bloomfilter *tmp = bf->passive; 84 | bf->passive = bf->active; 85 | bf->active = tmp; 86 | tw_bloomfilter_zero(tmp); 87 | return true; 88 | } 89 | 90 | return false; 91 | } 92 | 93 | void tw_bloomfilter_a2_set(struct tw_bloomfilter_a2 *bf, const void *key, 94 | size_t key_size) 95 | { 96 | if (!bf || !key || !key_size) { 97 | return; 98 | } 99 | 100 | tw_bloomfilter_a2_rotate_(bf); 101 | 102 | tw_bloomfilter_set(bf->active, key, key_size); 103 | } 104 | 105 | bool tw_bloomfilter_a2_test(const struct tw_bloomfilter_a2 *bf, const void *key, 106 | size_t key_size) 107 | { 108 | if (!bf || !key || !key_size) { 109 | return false; 110 | } 111 | 112 | return tw_bloomfilter_test(bf->active, key, key_size) || 113 | tw_bloomfilter_test(bf->passive, key, key_size); 114 | } 115 | 116 | bool tw_bloomfilter_a2_empty(const struct tw_bloomfilter_a2 *bf) 117 | { 118 | if (!bf) { 119 | return false; 120 | } 121 | 122 | return tw_bloomfilter_empty(bf->active) && tw_bloomfilter_empty(bf->passive); 123 | } 124 | 125 | bool tw_bloomfilter_a2_full(const struct tw_bloomfilter_a2 *bf) 126 | { 127 | if (!bf) { 128 | return false; 129 | } 130 | 131 | return tw_bloomfilter_full(bf->active) && tw_bloomfilter_full(bf->passive); 132 | } 133 | 134 | uint64_t tw_bloomfilter_a2_count(const struct tw_bloomfilter_a2 *bf) 135 | { 136 | if (!bf) { 137 | return 0; 138 | } 139 | 140 | return tw_bloomfilter_count(bf->active) + tw_bloomfilter_count(bf->passive); 141 | } 142 | 143 | float tw_bloomfilter_a2_density(const struct tw_bloomfilter_a2 *bf) 144 | { 145 | if (!bf) { 146 | return 0.0; 147 | } 148 | 149 | return (tw_bloomfilter_density(bf->active) + 150 | tw_bloomfilter_density(bf->passive)) / 151 | 2.0; 152 | } 153 | 154 | struct tw_bloomfilter_a2 *tw_bloomfilter_a2_zero(struct tw_bloomfilter_a2 *bf) 155 | { 156 | if (!bf) { 157 | return NULL; 158 | } 159 | 160 | return (tw_bloomfilter_zero(bf->active) && tw_bloomfilter_zero(bf->passive)) 161 | ? bf 162 | : NULL; 163 | } 164 | 165 | struct tw_bloomfilter_a2 *tw_bloomfilter_a2_fill(struct tw_bloomfilter_a2 *bf) 166 | { 167 | if (!bf) { 168 | return NULL; 169 | } 170 | 171 | return (tw_bloomfilter_fill(bf->active) && tw_bloomfilter_fill(bf->passive)) 172 | ? bf 173 | : NULL; 174 | } 175 | 176 | struct tw_bloomfilter_a2 *tw_bloomfilter_a2_not(struct tw_bloomfilter_a2 *bf) 177 | { 178 | if (!bf) { 179 | return NULL; 180 | } 181 | 182 | return (tw_bloomfilter_not(bf->active) && tw_bloomfilter_not(bf->passive)) 183 | ? bf 184 | : NULL; 185 | } 186 | 187 | bool tw_bloomfilter_a2_equal(const struct tw_bloomfilter_a2 *fst, 188 | const struct tw_bloomfilter_a2 *snd) 189 | { 190 | if (!fst || !snd) { 191 | return false; 192 | } 193 | 194 | return (tw_almost_equal(fst->density, snd->density) && 195 | tw_bloomfilter_equal(fst->active, snd->active) && 196 | tw_bloomfilter_equal(fst->passive, snd->passive)); 197 | } 198 | 199 | struct tw_bloomfilter_a2 * 200 | tw_bloomfilter_a2_union(const struct tw_bloomfilter_a2 *src, 201 | struct tw_bloomfilter_a2 *dst) 202 | { 203 | if (!src || !dst) { 204 | return NULL; 205 | } 206 | 207 | if (!tw_almost_equal(src->density, dst->density)) { 208 | return NULL; 209 | } 210 | 211 | return (tw_bloomfilter_union(src->active, dst->active) && 212 | tw_bloomfilter_union(src->passive, dst->passive)) 213 | ? dst 214 | : NULL; 215 | } 216 | 217 | struct tw_bloomfilter_a2 * 218 | tw_bloomfilter_a2_intersection(const struct tw_bloomfilter_a2 *src, 219 | struct tw_bloomfilter_a2 *dst) 220 | { 221 | if (!src || !dst) { 222 | return NULL; 223 | } 224 | 225 | if (!tw_almost_equal(src->density, dst->density)) { 226 | return NULL; 227 | } 228 | 229 | return (tw_bloomfilter_intersection(src->active, dst->active) && 230 | tw_bloomfilter_intersection(src->passive, dst->passive)) 231 | ? dst 232 | : NULL; 233 | } 234 | 235 | struct tw_bloomfilter_a2 * 236 | tw_bloomfilter_a2_xor(const struct tw_bloomfilter_a2 *src, 237 | struct tw_bloomfilter_a2 *dst) 238 | { 239 | if (!src || !dst) { 240 | return NULL; 241 | } 242 | 243 | if (!tw_almost_equal(src->density, dst->density)) { 244 | return NULL; 245 | } 246 | 247 | return (tw_bloomfilter_xor(src->active, dst->active) && 248 | tw_bloomfilter_xor(src->passive, dst->passive)) 249 | ? dst 250 | : NULL; 251 | } 252 | -------------------------------------------------------------------------------- /src/twiddle/hyperloglog/hyperloglog.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | #include 7 | #include 8 | 9 | #include "../macrology.h" 10 | #include "hyperloglog_simd.c" 11 | 12 | #define TW_BYTES_PER_HLL_REGISTER sizeof(uint8_t) 13 | #define TW_BITS_PER_HLL_REGISTER (TW_BYTES_PER_HLL * TW_BITS_IN_WORD) 14 | 15 | #define TW_HLL_DEFAULT_SEED 646086642ULL 16 | 17 | static_assert(TW_HLL_MIN_PRECISION >= 6, 18 | "precision must be at least one cacheline"); 19 | 20 | static_assert(TW_HLL_MAX_PRECISION < 64, 21 | "precision must be smaller than 64 for defined bit shifts"); 22 | 23 | struct tw_hyperloglog *tw_hyperloglog_new(uint8_t precision) 24 | { 25 | if (precision < TW_HLL_MIN_PRECISION || precision > TW_HLL_MAX_PRECISION) { 26 | return NULL; 27 | } 28 | 29 | struct tw_hyperloglog *hll = calloc(1, sizeof(struct tw_hyperloglog)); 30 | if (!hll) { 31 | return NULL; 32 | } 33 | 34 | size_t alloc_size = TW_ALLOC_TO_CACHELINE(1 << precision) * sizeof(uint8_t); 35 | 36 | if ((hll->registers = malloc_aligned(TW_CACHELINE, alloc_size)) == NULL) { 37 | free(hll); 38 | return NULL; 39 | } 40 | 41 | memset(hll->registers, 0, alloc_size); 42 | hll->precision = precision; 43 | 44 | return hll; 45 | } 46 | 47 | void tw_hyperloglog_free(struct tw_hyperloglog *hll) 48 | { 49 | if (!hll) { 50 | return; 51 | } 52 | 53 | free(hll->registers); 54 | free(hll); 55 | } 56 | 57 | struct tw_hyperloglog *tw_hyperloglog_copy(const struct tw_hyperloglog *src, 58 | struct tw_hyperloglog *dst) 59 | { 60 | if (!src || !dst || src->precision != dst->precision) { 61 | return NULL; 62 | } 63 | 64 | const uint8_t precision = src->precision; 65 | const uint32_t n_registers = 1 << precision; 66 | 67 | dst->precision = precision; 68 | memcpy(dst->registers, src->registers, 69 | n_registers * TW_BYTES_PER_HLL_REGISTER); 70 | 71 | return dst; 72 | } 73 | 74 | struct tw_hyperloglog *tw_hyperloglog_clone(const struct tw_hyperloglog *src) 75 | { 76 | if (!src) { 77 | return NULL; 78 | } 79 | 80 | struct tw_hyperloglog *dst = tw_hyperloglog_new(src->precision); 81 | if (dst == NULL) { 82 | return NULL; 83 | } 84 | 85 | return tw_hyperloglog_copy(src, dst); 86 | } 87 | 88 | void tw_hyperloglog_add(struct tw_hyperloglog *hll, const void *key, 89 | size_t key_size) 90 | { 91 | if (!hll || !key || !key_size) { 92 | return; 93 | } 94 | 95 | const tw_uint128_t hash = 96 | tw_metrohash_128(TW_HLL_DEFAULT_SEED, key, key_size); 97 | const uint8_t precision = hll->precision; 98 | 99 | const uint32_t register_idx = hash.l >> (64 - precision); 100 | 101 | const uint8_t leading_zeros = __builtin_clzll(hash.h) + 1; 102 | const uint8_t cur_leading_zeros = hll->registers[register_idx]; 103 | hll->registers[register_idx] = tw_max(leading_zeros, cur_leading_zeros); 104 | } 105 | 106 | extern double estimate(uint8_t precision, uint32_t n_zeros, float inverse_sum); 107 | 108 | #ifdef USE_AVX2 109 | extern void hyperloglog_count_avx2(const uint8_t *registers, 110 | uint32_t n_registers, float *inverse_sum, 111 | uint32_t *n_zeros); 112 | #elif defined USE_AVX 113 | extern void hyperloglog_count_avx(const uint8_t *registers, 114 | uint32_t n_registers, float *inverse_sum, 115 | uint32_t *n_zeros); 116 | #else 117 | extern void hyperloglog_count_port(const uint8_t *registers, 118 | uint32_t n_registers, float *inverse_sum, 119 | uint32_t *n_zeros); 120 | #endif 121 | 122 | double tw_hyperloglog_count(const struct tw_hyperloglog *hll) 123 | { 124 | if (!hll) { 125 | return 0.0; 126 | } 127 | 128 | const uint8_t precision = hll->precision; 129 | const uint32_t n_registers = 1 << precision; 130 | uint32_t n_zeros = 0; 131 | float inverse_sum = 0.0; 132 | 133 | #ifdef USE_AVX2 134 | hyperloglog_count_avx2(hll->registers, n_registers, &inverse_sum, &n_zeros); 135 | #elif defined USE_AVX 136 | hyperloglog_count_avx(hll->registers, n_registers, &inverse_sum, &n_zeros); 137 | #else 138 | hyperloglog_count_port(hll->registers, n_registers, &inverse_sum, &n_zeros); 139 | #endif 140 | 141 | return estimate(precision, n_zeros, inverse_sum); 142 | } 143 | 144 | bool tw_hyperloglog_equal(const struct tw_hyperloglog *fst, 145 | const struct tw_hyperloglog *snd) 146 | { 147 | if (!fst || !snd) { 148 | return false; 149 | } 150 | 151 | const uint8_t precision = fst->precision; 152 | 153 | if (precision != snd->precision) { 154 | return false; 155 | } 156 | 157 | const uint32_t n_registers = 1 << precision; 158 | 159 | #define HLL_EQ_LOOP(simd_t, simd_load, simd_equal) \ 160 | for (size_t i = 0; i < n_registers / (sizeof(simd_t)); ++i) { \ 161 | simd_t *fst_addr = (simd_t *)fst->registers + i, \ 162 | *snd_addr = (simd_t *)snd->registers + i; \ 163 | if (!simd_equal(simd_load(fst_addr), simd_load(snd_addr))) { \ 164 | return false; \ 165 | } \ 166 | } 167 | 168 | /* AVX512 does not have movemask_epi8 equivalent, fallback to AVX2 */ 169 | #ifdef USE_AVX2 170 | HLL_EQ_LOOP(__m256i, _mm256_load_si256, tw_mm256_equal) 171 | #elif defined USE_AVX 172 | HLL_EQ_LOOP(__m128i, _mm_load_si128, tw_mm_equal) 173 | #else 174 | for (size_t i = 0; i < n_registers; ++i) { 175 | if (fst->registers[i] != snd->registers[i]) { 176 | return false; 177 | } 178 | } 179 | #endif 180 | 181 | #undef HLL_EQ_LOOP 182 | 183 | return true; 184 | } 185 | 186 | struct tw_hyperloglog *tw_hyperloglog_merge(const struct tw_hyperloglog *src, 187 | struct tw_hyperloglog *dst) 188 | { 189 | if (!src || !dst || src->precision != dst->precision) { 190 | return NULL; 191 | } 192 | 193 | const uint8_t precision = src->precision; 194 | const uint32_t n_registers = 1 << precision; 195 | 196 | #define HLL_MAX_LOOP(simd_t, simd_load, simd_max, simd_store) \ 197 | for (size_t i = 0; i < n_registers / sizeof(simd_t); ++i) { \ 198 | simd_t *src_vec = (simd_t *)src->registers + i, \ 199 | *dst_vec = (simd_t *)dst->registers + i; \ 200 | const simd_t res = simd_max(simd_load(src_vec), simd_load(dst_vec)); \ 201 | simd_store(dst_vec, res); \ 202 | } 203 | 204 | #ifdef USE_AVX512 205 | HLL_MAX_LOOP(__m512i, _mm512_load_si512, _mm512_max_epu8, _mm512_store_si512) 206 | #elif defined USE_AVX2 207 | HLL_MAX_LOOP(__m256i, _mm256_load_si256, _mm256_max_epu8, _mm256_store_si256) 208 | #elif defined USE_AVX 209 | HLL_MAX_LOOP(__m128i, _mm_load_si128, _mm_max_epu8, _mm_store_si128) 210 | #else 211 | for (size_t i = 0; i < n_registers; ++i) { 212 | dst->registers[i] = tw_max(src->registers[i], dst->registers[i]); 213 | } 214 | #endif 215 | 216 | #undef HLL_MAX_LOOP 217 | 218 | return dst; 219 | } 220 | -------------------------------------------------------------------------------- /src/twiddle/hyperloglog/hyperloglog_simd.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include 5 | 6 | #ifdef USE_AVX2 7 | /* http://stackoverflow.com/questions/13219146/how-to-sum-m256-horizontally */ 8 | static inline float horizontal_sum_avx2(__m256 x) 9 | { 10 | const __m128 hi_quad = _mm256_extractf128_ps(x, 1); 11 | const __m128 lo_quad = _mm256_castps256_ps128(x); 12 | const __m128 sum_quad = _mm_add_ps(lo_quad, hi_quad); 13 | const __m128 lo_dual = sum_quad; 14 | const __m128 hi_dual = _mm_movehl_ps(sum_quad, sum_quad); 15 | const __m128 sum_dual = _mm_add_ps(lo_dual, hi_dual); 16 | const __m128 lo = sum_dual; 17 | const __m128 hi = _mm_shuffle_ps(sum_dual, sum_dual, 0x1); 18 | const __m128 sum = _mm_add_ss(lo, hi); 19 | return _mm_cvtss_f32(sum); 20 | } 21 | 22 | #define _mm256_cntz_epi8(simd) \ 23 | __builtin_popcount( \ 24 | _mm256_movemask_epi8(_mm256_cmpeq_epi8(simd, _mm256_setzero_si256()))) 25 | 26 | #define inverse_power_avx2(simd) \ 27 | _mm256_sub_epi32(ones, _mm256_slli_epi32(_mm256_cvtepu8_epi32(simd), 23)) 28 | 29 | static inline void hyperloglog_count_avx2(const uint8_t *registers, 30 | uint32_t n_registers, 31 | float *inverse_sum, uint32_t *n_zeros) 32 | { 33 | const __m256i ones = (__m256i)_mm256_set1_ps(1.0f); 34 | __m256 agg = _mm256_set1_ps(0.0f); 35 | 36 | for (size_t i = 0; i < n_registers / sizeof(__m256i); ++i) { 37 | const __m256i simd = _mm256_load_si256((__m256i *)registers + i); 38 | /* For some reason, VPSRLDQ works on lane of 128bits instead of 256. */ 39 | const __m128i low = _mm256_extracti128_si256(simd, 0); 40 | const __m128i high = _mm256_extracti128_si256(simd, 1); 41 | 42 | __m256i sums = inverse_power_avx2(low); 43 | agg = _mm256_add_ps(agg, (__m256)sums); 44 | 45 | sums = inverse_power_avx2(_mm_srli_si128(low, 8)); 46 | agg = _mm256_add_ps(agg, (__m256)sums); 47 | 48 | sums = inverse_power_avx2(high); 49 | agg = _mm256_add_ps(agg, (__m256)sums); 50 | 51 | sums = inverse_power_avx2(_mm_srli_si128(high, 8)); 52 | agg = _mm256_add_ps(agg, (__m256)sums); 53 | 54 | *n_zeros += _mm256_cntz_epi8(simd); 55 | } 56 | 57 | *inverse_sum = horizontal_sum_avx2(agg); 58 | } 59 | 60 | #elif defined USE_AVX 61 | 62 | static inline float horizontal_sum_avx(__m128 x) 63 | { 64 | x = _mm_hadd_ps(x, x); 65 | x = _mm_hadd_ps(x, x); 66 | return _mm_cvtss_f32(x); 67 | } 68 | 69 | #define _mm_cntz_epi8(simd) \ 70 | __builtin_popcount( \ 71 | _mm_movemask_epi8(_mm_cmpeq_epi8(simd, _mm_setzero_si128()))) 72 | 73 | #define inverse_power_avx(simd) \ 74 | _mm_sub_epi32(ones, _mm_slli_epi32(_mm_cvtepu8_epi32(simd), 23)) 75 | 76 | static inline void hyperloglog_count_avx(const uint8_t *registers, 77 | uint32_t n_registers, 78 | float *inverse_sum, uint32_t *n_zeros) 79 | { 80 | const __m128i ones = (__m128i)_mm_set1_ps(1.0f); 81 | __m128 agg = _mm_set1_ps(0.0f); 82 | 83 | for (size_t i = 0; i < n_registers / sizeof(__m128i); ++i) { 84 | const __m128i simd = _mm_load_si128((__m128i *)registers + i); 85 | 86 | __m128i powers = inverse_power_avx(simd); 87 | agg = _mm_add_ps(agg, (__m128)powers); 88 | 89 | powers = inverse_power_avx(_mm_srli_si128(simd, 4)); 90 | agg = _mm_add_ps(agg, (__m128)powers); 91 | 92 | powers = inverse_power_avx(_mm_srli_si128(simd, 8)); 93 | agg = _mm_add_ps(agg, (__m128)powers); 94 | 95 | powers = inverse_power_avx(_mm_srli_si128(simd, 12)); 96 | agg = _mm_add_ps(agg, (__m128)powers); 97 | 98 | *n_zeros += _mm_cntz_epi8(simd); 99 | } 100 | 101 | *inverse_sum = horizontal_sum_avx(agg); 102 | } 103 | 104 | #endif 105 | 106 | static inline void hyperloglog_count_port(const uint8_t *registers, 107 | uint32_t n_registers, 108 | float *inverse_sum, uint32_t *n_zeros) 109 | 110 | { 111 | for (size_t i = 0; i < n_registers; ++i) { 112 | const uint8_t val = registers[i]; 113 | *inverse_sum += powf(2, -1.0 * val); 114 | if (val == 0) { 115 | *n_zeros += 1; 116 | } 117 | } 118 | } 119 | -------------------------------------------------------------------------------- /src/twiddle/macrology.h: -------------------------------------------------------------------------------- 1 | #ifndef TWIDDLE_INTERNAL_UTILS_H 2 | #define TWIDDLE_INTERNAL_UTILS_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | #ifndef static_assert 11 | #define static_assert _Static_assert 12 | #endif 13 | 14 | /* Number of bytes per cache line */ 15 | #ifndef TW_CACHELINE 16 | #define TW_CACHELINE 64 17 | #endif 18 | 19 | #define TW_DIV_ROUND_UP(n, d) (((n) + (d)-1) / (d)) 20 | 21 | #define TW_ALLOC_TO_CACHELINE(size) \ 22 | ((TW_DIV_ROUND_UP((size), TW_CACHELINE) * TW_CACHELINE)) 23 | 24 | #define TW_ARRAY_SIZE(array) (sizeof(array) / sizeof(array[0])) 25 | 26 | #define TW_BITOP_ADDR(x) "+m"(*(volatile long *)(x)) 27 | 28 | #define TW_BITS_IN_WORD 8 29 | #define TW_BIT_POS(x) (1 << ((x) % TW_BITS_IN_WORD)) 30 | #define TW_BYTE_POS(x) ((x) / TW_BITS_IN_WORD) 31 | 32 | #define tw_likely(x) __builtin_expect((x), 1) 33 | #define tw_unlikely(x) __builtin_expect((x), 0) 34 | 35 | /* use with care, it evaluates twice a & b */ 36 | #define tw_min(a, b) (((a) < (b)) ? (a) : (b)) 37 | #define tw_max(a, b) (((a) < (b)) ? (b) : (a)) 38 | 39 | #define tw_almost_equal(a, b) (fabs((a) - (b)) < FLT_EPSILON) 40 | 41 | #ifdef _ISOC11_SOURCE 42 | #define malloc_aligned aligned_alloc 43 | #else 44 | #if _POSIX_C_SOURCE >= 200112L || _XOPEN_SOURCE >= 600 45 | static inline void *__aligned_alloc(size_t align, size_t size) 46 | { 47 | void *ptr = NULL; 48 | if (posix_memalign(&ptr, align, size)) { 49 | return NULL; 50 | } 51 | 52 | return ptr; 53 | } 54 | #define malloc_aligned __aligned_alloc 55 | #else 56 | static_assert(false, "you're in hell."); 57 | #endif 58 | #endif 59 | 60 | #define tw_simd_equal(a, b, simd_cmpeq, simd_maskmove, mask) \ 61 | ((int)mask == simd_maskmove(simd_cmpeq((a), (b)))) 62 | 63 | #define tw_mm256_equal(a, b) \ 64 | tw_simd_equal((a), (b), _mm256_cmpeq_epi8, _mm256_movemask_epi8, 0xFFFFFFFF) 65 | 66 | #define tw_mm_equal(a, b) \ 67 | tw_simd_equal((a), (b), _mm_cmpeq_epi8, _mm_movemask_epi8, 0xFFFF) 68 | 69 | #endif /* TWIDDLE_INTERNAL_UTILS_H */ 70 | -------------------------------------------------------------------------------- /src/twiddle/utils/hash.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | uint64_t tw_hash_128_64(tw_uint128_t x) 4 | { 5 | // Murmur-inspired hashing. 6 | const uint64_t k = 0x9ddfea08eb392d69ULL; 7 | uint64_t a = (x.l ^ x.h) * k; 8 | a ^= (a >> 47); 9 | uint64_t b = (x.h ^ a) * k; 10 | b ^= (b >> 47); 11 | b *= k; 12 | return b; 13 | } 14 | -------------------------------------------------------------------------------- /src/twiddle/utils/internal.h: -------------------------------------------------------------------------------- 1 | #ifndef TWIDDLE_HASH_INTERNAL_H 2 | #define TWIDDLE_HASH_INTERNAL_H 3 | 4 | #define BIG_CONSTANT(x) (x##LLU) 5 | 6 | static inline uint32_t rotl32(uint32_t x, int8_t r) 7 | { 8 | return (x << r) | (x >> (32 - r)); 9 | } 10 | 11 | static inline uint32_t rotr32(uint32_t x, int8_t r) 12 | { 13 | return (x >> r) | (x << (32 - r)); 14 | } 15 | 16 | static inline uint64_t rotl64(uint64_t x, int8_t r) 17 | { 18 | return (x << r) | (x >> (64 - r)); 19 | } 20 | 21 | static inline uint64_t rotr64(uint64_t x, int8_t r) 22 | { 23 | return (x >> r) | (x << (64 - r)); 24 | } 25 | 26 | static inline uint64_t cread_u64(const void *const ptr) 27 | { 28 | return *(uint64_t *)ptr; 29 | } 30 | 31 | static inline uint64_t cread_u32(const void *const ptr) 32 | { 33 | return *(uint32_t *)ptr; 34 | } 35 | 36 | static inline uint64_t cread_u16(const void *const ptr) 37 | { 38 | return *(uint16_t *)ptr; 39 | } 40 | 41 | static inline uint64_t cread_u8(const void *const ptr) 42 | { 43 | return *(uint8_t *)ptr; 44 | } 45 | 46 | #endif /* TWIDDLE_HASH_INTERNAL_H */ 47 | -------------------------------------------------------------------------------- /src/twiddle/utils/metrohash.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include "../macrology.h" 4 | #include "internal.h" 5 | 6 | static const uint64_t k0_64 = 0xD6D018F5; 7 | static const uint64_t k1_64 = 0xA2AA033B; 8 | static const uint64_t k2_64 = 0x62992FC1; 9 | static const uint64_t k3_64 = 0x30BC5B29; 10 | 11 | uint64_t tw_metrohash_64(const uint64_t seed, const void *key, 12 | const size_t key_len) 13 | { 14 | const uint8_t *ptr = (uint8_t *)key; 15 | const uint8_t *const end = ptr + key_len; 16 | 17 | uint64_t h = (seed + k2_64) * k0_64; 18 | 19 | // clang-format off 20 | if (key_len >= 32) { 21 | uint64_t v[4]; 22 | v[0] = h; 23 | v[1] = h; 24 | v[2] = h; 25 | v[3] = h; 26 | 27 | do { 28 | v[0] += cread_u64(ptr) * k0_64; ptr += 8; v[0] = rotr64(v[0],29) + v[2]; 29 | v[1] += cread_u64(ptr) * k1_64; ptr += 8; v[1] = rotr64(v[1],29) + v[3]; 30 | v[2] += cread_u64(ptr) * k2_64; ptr += 8; v[2] = rotr64(v[2],29) + v[0]; 31 | v[3] += cread_u64(ptr) * k3_64; ptr += 8; v[3] = rotr64(v[3],29) + v[1]; 32 | } while (ptr <= (end - 32)); 33 | 34 | v[2] ^= rotr64(((v[0] + v[3]) * k0_64) + v[1], 37) * k1_64; 35 | v[3] ^= rotr64(((v[1] + v[2]) * k1_64) + v[0], 37) * k0_64; 36 | v[0] ^= rotr64(((v[0] + v[2]) * k0_64) + v[3], 37) * k1_64; 37 | v[1] ^= rotr64(((v[1] + v[3]) * k1_64) + v[2], 37) * k0_64; 38 | h += v[0] ^ v[1]; 39 | } 40 | 41 | if ((end - ptr) >= 16) { 42 | uint64_t v0 = h + (cread_u64(ptr) * k2_64); ptr += 8; v0 = rotr64(v0,29) * k3_64; 43 | uint64_t v1 = h + (cread_u64(ptr) * k2_64); ptr += 8; v1 = rotr64(v1,29) * k3_64; 44 | v0 ^= rotr64(v0 * k0_64, 21) + v1; 45 | v1 ^= rotr64(v1 * k3_64, 21) + v0; 46 | h += v1; 47 | } 48 | 49 | if ((end - ptr) >= 8) { 50 | h += cread_u64(ptr) * k3_64; ptr += 8; 51 | h ^= rotr64(h, 55) * k1_64; 52 | } 53 | 54 | if ((end - ptr) >= 4) { 55 | h += cread_u32(ptr) * k3_64; ptr += 4; 56 | h ^= rotr64(h, 26) * k1_64; 57 | } 58 | 59 | if ((end - ptr) >= 2) { 60 | h += cread_u16(ptr) * k3_64; ptr += 2; 61 | h ^= rotr64(h, 48) * k1_64; 62 | } 63 | 64 | if ((end - ptr) >= 1) { 65 | h += cread_u8(ptr) * k3_64; 66 | h ^= rotr64(h, 37) * k1_64; 67 | } 68 | 69 | h ^= rotr64(h, 28); 70 | h *= k0_64; 71 | h ^= rotr64(h, 29); 72 | // clang-format on 73 | 74 | return h; 75 | } 76 | 77 | static const uint64_t k0_128 = 0xC83A91E1; 78 | static const uint64_t k1_128 = 0x8648DBDB; 79 | static const uint64_t k2_128 = 0x7BDEC03B; 80 | static const uint64_t k3_128 = 0x2F5870A5; 81 | 82 | tw_uint128_t tw_metrohash_128(const uint64_t seed, const void *key, 83 | size_t key_len) 84 | { 85 | const uint8_t *ptr = (uint8_t *)key; 86 | const uint8_t *const end = ptr + key_len; 87 | 88 | uint64_t v[4]; 89 | 90 | // clang-format off 91 | v[0] = (seed - k0_128) * k3_128; 92 | v[1] = (seed + k1_128) * k2_128; 93 | 94 | if (key_len >= 32) { 95 | v[2] = (seed + k0_128) * k2_128; 96 | v[3] = (seed - k1_128) * k3_128; 97 | 98 | do { 99 | v[0] += cread_u64(ptr) * k0_128; ptr += 8; v[0] = rotr64(v[0],29) + v[2]; 100 | v[1] += cread_u64(ptr) * k1_128; ptr += 8; v[1] = rotr64(v[1],29) + v[3]; 101 | v[2] += cread_u64(ptr) * k2_128; ptr += 8; v[2] = rotr64(v[2],29) + v[0]; 102 | v[3] += cread_u64(ptr) * k3_128; ptr += 8; v[3] = rotr64(v[3],29) + v[1]; 103 | } while (ptr <= (end - 32)); 104 | 105 | v[2] ^= rotr64(((v[0] + v[3]) * k0_128) + v[1], 21) * k1_128; 106 | v[3] ^= rotr64(((v[1] + v[2]) * k1_128) + v[0], 21) * k0_128; 107 | v[0] ^= rotr64(((v[0] + v[2]) * k0_128) + v[3], 21) * k1_128; 108 | v[1] ^= rotr64(((v[1] + v[3]) * k1_128) + v[2], 21) * k0_128; 109 | } 110 | 111 | if ((end - ptr) >= 16) { 112 | v[0] += cread_u64(ptr) * k2_128; ptr += 8; v[0] = rotr64(v[0],33) * k3_128; 113 | v[1] += cread_u64(ptr) * k2_128; ptr += 8; v[1] = rotr64(v[1],33) * k3_128; 114 | v[0] ^= rotr64((v[0] * k2_128) + v[1], 45) * k1_128; 115 | v[1] ^= rotr64((v[1] * k3_128) + v[0], 45) * k0_128; 116 | } 117 | 118 | if ((end - ptr) >= 8) { 119 | v[0] += cread_u64(ptr) * k2_128; ptr += 8; v[0] = rotr64(v[0],33) * k3_128; 120 | v[0] ^= rotr64((v[0] * k2_128) + v[1], 27) * k1_128; 121 | } 122 | 123 | if ((end - ptr) >= 4) { 124 | v[1] += cread_u32(ptr) * k2_128; ptr += 4; v[1] = rotr64(v[1],33) * k3_128; 125 | v[1] ^= rotr64((v[1] * k3_128) + v[0], 46) * k0_128; 126 | } 127 | 128 | if ((end - ptr) >= 2) { 129 | v[0] += cread_u16(ptr) * k2_128; ptr += 2; v[0] = rotr64(v[0],33) * k3_128; 130 | v[0] ^= rotr64((v[0] * k2_128) + v[1], 22) * k1_128; 131 | } 132 | 133 | if ((end - ptr) >= 1) { 134 | v[1] += cread_u8(ptr) * k2_128; v[1] = rotr64(v[1],33) * k3_128; 135 | v[1] ^= rotr64((v[1] * k3_128) + v[0], 58) * k0_128; 136 | } 137 | 138 | v[0] += rotr64((v[0] * k0_128) + v[1], 13); 139 | v[1] += rotr64((v[1] * k1_128) + v[0], 37); 140 | v[0] += rotr64((v[0] * k2_128) + v[1], 13); 141 | v[1] += rotr64((v[1] * k3_128) + v[0], 37); 142 | // clang-format on 143 | 144 | return (tw_uint128_t){.h = v[0], .l = v[1]}; 145 | } 146 | -------------------------------------------------------------------------------- /src/twiddle/utils/murmur3.c: -------------------------------------------------------------------------------- 1 | /** 2 | * MurmurHash3 was written by Austin Appleby, and is placed in the public 3 | * domain. The author hereby disclaims copyright to this source code. 4 | * 5 | * Note - The x86 and x64 versions do _not_ produce the same results, as the 6 | * algorithms are optimized for their respective platforms. You can still 7 | * compile and run any of them on any platform, but your performance with the 8 | * non-native version will be less than optimal. 9 | */ 10 | 11 | #include 12 | 13 | #include "internal.h" 14 | 15 | /** 16 | * Block read - if your platform needs to do endian-swapping or can only 17 | * handle aligned reads, do the conversion here 18 | */ 19 | 20 | #define getblock(p, i) (p[i]) 21 | 22 | static inline uint64_t fmix64(uint64_t k) 23 | { 24 | k ^= k >> 33; 25 | k *= BIG_CONSTANT(0xff51afd7ed558ccd); 26 | k ^= k >> 33; 27 | k *= BIG_CONSTANT(0xc4ceb9fe1a85ec53); 28 | k ^= k >> 33; 29 | 30 | return k; 31 | } 32 | 33 | uint64_t tw_murmur3_64(const uint64_t seed, const void *key, 34 | const size_t key_len) 35 | { 36 | return tw_hash_128_64(tw_murmur3_128(seed, key, key_len)); 37 | } 38 | 39 | tw_uint128_t tw_murmur3_128(const uint64_t seed, const void *key, 40 | const size_t key_len) 41 | { 42 | const uint8_t *data = (const uint8_t *)key; 43 | const int nblocks = key_len / 16; 44 | int i; 45 | 46 | uint64_t h1 = seed; 47 | uint64_t h2 = seed; 48 | 49 | const uint64_t c1 = BIG_CONSTANT(0x87c37b91114253d5); 50 | const uint64_t c2 = BIG_CONSTANT(0x4cf5ad432745937f); 51 | 52 | const uint64_t *blocks = (const uint64_t *)(key); 53 | 54 | // clang-format off 55 | for (i = 0; i < nblocks; i++) { 56 | uint64_t k1 = getblock(blocks, i * 2 + 0); 57 | uint64_t k2 = getblock(blocks, i * 2 + 1); 58 | 59 | k1 *= c1; k1 = rotl64(k1,31); k1 *= c2; h1 ^= k1; 60 | h1 = rotl64(h1,27); h1 += h2; h1 = h1*5+0x52dce729; 61 | k2 *= c2; k2 = rotl64(k2,33); k2 *= c1; h2 ^= k2; 62 | h2 = rotl64(h2,31); h2 += h1; h2 = h2*5+0x38495ab5; 63 | } 64 | 65 | const uint8_t *tail = (const uint8_t *)(data + nblocks * 16); 66 | 67 | uint64_t k1 = 0; 68 | uint64_t k2 = 0; 69 | 70 | switch (key_len & 15) { 71 | case 15: k2 ^= (uint64_t)(tail[14]) << 48; 72 | case 14: k2 ^= (uint64_t)(tail[13]) << 40; 73 | case 13: k2 ^= (uint64_t)(tail[12]) << 32; 74 | case 12: k2 ^= (uint64_t)(tail[11]) << 24; 75 | case 11: k2 ^= (uint64_t)(tail[10]) << 16; 76 | case 10: k2 ^= (uint64_t)(tail[ 9]) << 8; 77 | case 9: k2 ^= (uint64_t)(tail[ 8]) << 0; 78 | k2 *= c2; k2 = rotl64(k2,33); k2 *= c1; h2 ^= k2; 79 | 80 | case 8: k1 ^= (uint64_t)(tail[ 7]) << 56; 81 | case 7: k1 ^= (uint64_t)(tail[ 6]) << 48; 82 | case 6: k1 ^= (uint64_t)(tail[ 5]) << 40; 83 | case 5: k1 ^= (uint64_t)(tail[ 4]) << 32; 84 | case 4: k1 ^= (uint64_t)(tail[ 3]) << 24; 85 | case 3: k1 ^= (uint64_t)(tail[ 2]) << 16; 86 | case 2: k1 ^= (uint64_t)(tail[ 1]) << 8; 87 | case 1: k1 ^= (uint64_t)(tail[ 0]) << 0; 88 | k1 *= c1; k1 = rotl64(k1,31); k1 *= c2; h1 ^= k1; 89 | default: ; 90 | }; 91 | 92 | h1 ^= key_len; 93 | h2 ^= key_len; 94 | 95 | h1 += h2; 96 | h2 += h1; 97 | 98 | h1 = fmix64(h1); 99 | h2 = fmix64(h2); 100 | 101 | h1 += h2; 102 | h2 += h1; 103 | // clang-format on 104 | 105 | return (tw_uint128_t){.h = h1, .l = h2}; 106 | } 107 | -------------------------------------------------------------------------------- /tests/.gitattributes: -------------------------------------------------------------------------------- 1 | *.t -whitespace 2 | cram.py -diff 3 | -------------------------------------------------------------------------------- /tests/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_subdirectory(check) 2 | 3 | add_c_test(test-bitmap) 4 | add_c_test(test-bitmap-rle) 5 | add_c_test(test-bloomfilter) 6 | add_c_test(test-bloomfilter-a2) 7 | add_c_test(test-hyperloglog) 8 | add_c_test(test-minhash) 9 | 10 | add_subdirectory(benchmarks) 11 | add_subdirectory(examples) 12 | -------------------------------------------------------------------------------- /tests/benchmarks/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_c_benchmark(bench-bitmap) 2 | add_c_benchmark(bench-bloomfilter) 3 | add_c_benchmark(bench-minhash) 4 | -------------------------------------------------------------------------------- /tests/benchmarks/bench-bitmap.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | 5 | #include "benchmark.h" 6 | 7 | struct dual_bitmap { 8 | struct tw_bitmap *a; 9 | struct tw_bitmap *b; 10 | }; 11 | 12 | void bitmap_dual_setup(struct benchmark *b) 13 | { 14 | const size_t size = b->size * 8; 15 | 16 | b->opaque = malloc(sizeof(struct dual_bitmap)); 17 | struct dual_bitmap *dual = (struct dual_bitmap *)b->opaque; 18 | assert(dual); 19 | 20 | dual->a = tw_bitmap_new(size); 21 | assert(dual->a); 22 | dual->b = tw_bitmap_new(size); 23 | assert(dual->b); 24 | 25 | for (size_t i = 0; i < size; ++i) { 26 | if (i % 5) { 27 | tw_bitmap_set(dual->a, i); 28 | tw_bitmap_set(dual->b, i); 29 | } 30 | } 31 | } 32 | 33 | void bitmap_dual_teardown(struct benchmark *b) 34 | { 35 | struct dual_bitmap *dual = (struct dual_bitmap *)b->opaque; 36 | tw_bitmap_free(dual->b); 37 | tw_bitmap_free(dual->a); 38 | free(dual); 39 | b->opaque = NULL; 40 | } 41 | 42 | void bitmap_xor(void *opaque) 43 | { 44 | struct dual_bitmap *dual = (struct dual_bitmap *)opaque; 45 | 46 | tw_bitmap_xor(dual->a, dual->b); 47 | } 48 | 49 | void bitmap_equal(void *opaque) 50 | { 51 | struct dual_bitmap *dual = (struct dual_bitmap *)opaque; 52 | 53 | bool res = tw_bitmap_equal(dual->a, dual->b); 54 | (void)res; 55 | } 56 | 57 | int main(int argc, char *argv[]) 58 | { 59 | 60 | if (argc != 3) { 61 | fprintf(stderr, "usage: %s \n", argv[0]); 62 | return EXIT_FAILURE; 63 | } 64 | 65 | const size_t repeat = strtol(argv[1], NULL, 10); 66 | const size_t size = strtol(argv[2], NULL, 10); 67 | 68 | struct benchmark benchmarks[] = { 69 | BENCHMARK_FIXTURE(bitmap_equal, repeat, size, bitmap_dual_setup, 70 | bitmap_dual_teardown), 71 | BENCHMARK_FIXTURE(bitmap_xor, repeat, size, bitmap_dual_setup, 72 | bitmap_dual_teardown), 73 | }; 74 | 75 | run_benchmarks(benchmarks, sizeof(benchmarks) / sizeof(benchmarks[0])); 76 | 77 | return EXIT_SUCCESS; 78 | } 79 | -------------------------------------------------------------------------------- /tests/benchmarks/bench-bloomfilter.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | #include 5 | 6 | #include "benchmark.h" 7 | 8 | void bloomfilter_setup(struct benchmark *b) 9 | { 10 | const size_t size = b->size * 8; 11 | const uint16_t k = 10; 12 | 13 | b->opaque = tw_bloomfilter_new(size, k); 14 | assert(b->opaque); 15 | 16 | for (size_t i = 0; i < size; ++i) { 17 | if (i % 3) { 18 | tw_bloomfilter_set(b->opaque, &i, sizeof(i)); 19 | } 20 | } 21 | } 22 | 23 | void bloomfilter_teardown(struct benchmark *b) 24 | { 25 | struct tw_bloomfilter *bf = (struct tw_bloomfilter *)b->opaque; 26 | tw_bloomfilter_free(bf); 27 | b->opaque = NULL; 28 | } 29 | 30 | void bloomfilter_set(void *opaque) 31 | { 32 | struct tw_bloomfilter *bf = (struct tw_bloomfilter *)opaque; 33 | 34 | const size_t n_rounds = (bf->bitmap->size) / (8 * 128); 35 | for (size_t i = 0; i < n_rounds; ++i) { 36 | tw_bloomfilter_set(bf, &i, sizeof(i)); 37 | } 38 | } 39 | 40 | void bloomfilter_test(void *opaque) 41 | { 42 | struct tw_bloomfilter *bf = (struct tw_bloomfilter *)opaque; 43 | 44 | const size_t n_rounds = (bf->bitmap->size) / (8 * 128); 45 | for (size_t i = 0; i < n_rounds; ++i) { 46 | tw_bloomfilter_test(bf, &i, sizeof(i)); 47 | } 48 | } 49 | 50 | int main(int argc, char *argv[]) 51 | { 52 | 53 | if (argc != 3) { 54 | fprintf(stderr, "usage: %s \n", argv[0]); 55 | return EXIT_FAILURE; 56 | } 57 | 58 | const size_t repeat = strtol(argv[1], NULL, 10); 59 | const size_t size = strtol(argv[2], NULL, 10); 60 | (void)size; 61 | 62 | struct benchmark benchmarks[] = { 63 | BENCHMARK_FIXTURE(bloomfilter_set, repeat, size, bloomfilter_setup, 64 | bloomfilter_teardown), 65 | BENCHMARK_FIXTURE(bloomfilter_test, repeat, size, bloomfilter_setup, 66 | bloomfilter_teardown), 67 | }; 68 | 69 | run_benchmarks(benchmarks, sizeof(benchmarks) / sizeof(benchmarks[0])); 70 | 71 | return EXIT_SUCCESS; 72 | } 73 | -------------------------------------------------------------------------------- /tests/benchmarks/bench-minhash.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | 5 | #include "benchmark.h" 6 | 7 | void minhash_setup(struct benchmark *b) 8 | { 9 | 10 | b->opaque = (void *)tw_minhash_new(b->size); 11 | } 12 | 13 | void minhash_teardown(struct benchmark *b) 14 | { 15 | tw_minhash_free(b->opaque); 16 | b->opaque = NULL; 17 | } 18 | 19 | void minhash_add(void *opaque) 20 | { 21 | struct tw_minhash *h = (struct tw_minhash *)opaque; 22 | 23 | for (size_t i = 0; i < 10000; i++) 24 | tw_minhash_add(h, &i, sizeof(i)); 25 | } 26 | 27 | void minhash_est(void *opaque) 28 | { 29 | struct tw_minhash *h = (struct tw_minhash *)opaque; 30 | 31 | for (size_t i = 0; i < 10000; i++) 32 | tw_minhash_estimate(h, h); 33 | } 34 | 35 | int main(int argc, char *argv[]) 36 | { 37 | 38 | if (argc != 3) { 39 | fprintf(stderr, "usage: %s \n", argv[0]); 40 | return EXIT_FAILURE; 41 | } 42 | 43 | const size_t repeat = strtol(argv[1], NULL, 10); 44 | const size_t size = strtol(argv[2], NULL, 10); 45 | 46 | struct benchmark benchmarks[] = { 47 | BENCHMARK_FIXTURE(minhash_add, repeat, size, minhash_setup, 48 | minhash_teardown), 49 | BENCHMARK_FIXTURE(minhash_est, repeat, size, minhash_setup, 50 | minhash_teardown)}; 51 | 52 | run_benchmarks(benchmarks, sizeof(benchmarks) / sizeof(benchmarks[0])); 53 | 54 | return EXIT_SUCCESS; 55 | } 56 | -------------------------------------------------------------------------------- /tests/benchmarks/benchmark.h: -------------------------------------------------------------------------------- 1 | #ifndef TESTS_BENCHMARKS_BENCHMARK_H_ 2 | #define TESTS_BENCHMARKS_BENCHMARK_H_ 3 | 4 | #include 5 | #include 6 | 7 | struct benchmark { 8 | const char *name; 9 | 10 | size_t size; 11 | /* number of repetitions */ 12 | size_t repeat; 13 | /* skip the first `size` results for warmup */ 14 | size_t skip; 15 | 16 | void *opaque; 17 | 18 | void (*setup)(struct benchmark *); 19 | void (*benchmark)(void *); 20 | void (*teardown)(struct benchmark *); 21 | }; 22 | 23 | #define BENCHMARK_FIXTURE(bench, b_repeat, b_size, b_setup, b_teardown) \ 24 | ((struct benchmark){.name = #bench, \ 25 | .size = (b_size), \ 26 | .repeat = (b_repeat), \ 27 | .skip = (size_t)((b_repeat)*0.05), \ 28 | .benchmark = bench, \ 29 | .setup = b_setup, \ 30 | .teardown = b_teardown}) 31 | 32 | #define BENCHMARK(bench, b_repeat, b_size) \ 33 | BENCHMARK_FIXTURE(bench, b_repeat, b_size, NULL, NULL) 34 | 35 | void run_benchmark(struct benchmark *b); 36 | 37 | void run_benchmarks(struct benchmark *b, size_t size) 38 | { 39 | assert(b && size); 40 | 41 | for (size_t i = 0; i < size; ++i) { 42 | run_benchmark(&b[i]); 43 | } 44 | } 45 | 46 | #define RDTSC_START(cycles) \ 47 | do { \ 48 | register uint32_t cyc_high, cyc_low; \ 49 | __asm volatile("cpuid\n\t" \ 50 | "rdtsc\n\t" \ 51 | "mov %%edx, %0\n\t" \ 52 | "mov %%eax, %1\n\t" \ 53 | : "=r"(cyc_high), "=r"(cyc_low)::"%rax", "%rbx", "%rcx", \ 54 | "%rdx"); \ 55 | (cycles) = ((uint64_t)cyc_high << 32) | cyc_low; \ 56 | } while (0) 57 | 58 | #define RDTSC_FINAL(cycles) \ 59 | do { \ 60 | register uint32_t cyc_high, cyc_low; \ 61 | __asm volatile("rdtscp\n\t" \ 62 | "mov %%edx, %0\n\t" \ 63 | "mov %%eax, %1\n\t" \ 64 | "cpuid\n\t" \ 65 | : "=r"(cyc_high), "=r"(cyc_low)::"%rax", "%rbx", "%rcx", \ 66 | "%rdx"); \ 67 | (cycles) = ((uint64_t)cyc_high << 32) | cyc_low; \ 68 | } while (0) 69 | 70 | void run_benchmark(struct benchmark *b) 71 | { 72 | assert(b); 73 | 74 | const size_t repeat = b->repeat; 75 | const size_t size = b->size; 76 | const size_t skip = b->skip; 77 | const char *name = b->name; 78 | 79 | if (b->setup) { 80 | b->setup(b); 81 | } 82 | 83 | for (size_t i = 0; i < repeat; i++) { 84 | uint64_t cycles_start, cycles_final; 85 | 86 | __asm volatile("" ::: /* pretend to clobber */ "memory"); 87 | 88 | RDTSC_START(cycles_start); 89 | 90 | b->benchmark(b->opaque); 91 | 92 | RDTSC_FINAL(cycles_final); 93 | 94 | if (i >= skip) { 95 | printf("%s,%.2F\n", name, 96 | ((double)(cycles_final - cycles_start) / (double)size)); 97 | } 98 | } 99 | 100 | if (b->teardown) { 101 | b->teardown(b); 102 | } 103 | } 104 | 105 | #endif /* TESTS_BENCHMARKS_BENCHMARK_H_ */ 106 | -------------------------------------------------------------------------------- /tests/check/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # 2 | # Check: a unit test framework for C 3 | # Copyright (C) 2011 Mateusz Loskot 4 | # Copyright (C) 2001, 2002 Arien Malec 5 | # 6 | # This library is free software; you can redistribute it and/or 7 | # modify it under the terms of the GNU Lesser General Public 8 | # License as published by the Free Software Foundation; either 9 | # version 2.1 of the License, or (at your option) any later version. 10 | # 11 | # This library is distributed in the hope that it will be useful, 12 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 | # Lesser General Public License for more details. 15 | # 16 | # You should have received a copy of the GNU Lesser General Public 17 | # License along with this library; if not, write to the 18 | # Free Software Foundation, Inc., 59 Temple Place - Suite 330, 19 | # Boston, MA 02111-1307, USA. 20 | # 21 | 22 | set(SOURCES 23 | check.c 24 | check_error.c 25 | check_list.c 26 | check_log.c 27 | check_msg.c 28 | check_pack.c 29 | check_print.c 30 | check_run.c 31 | check_str.c) 32 | 33 | add_c_test_library( 34 | check 35 | OUTPUT_NAME check 36 | SOURCES ${SOURCES}) 37 | -------------------------------------------------------------------------------- /tests/check/Makefile.am: -------------------------------------------------------------------------------- 1 | ## Process this file with automake to produce Makefile.in 2 | 3 | lib_LTLIBRARIES = libcheck.la 4 | noinst_LTLIBRARIES = libcheckinternal.la 5 | 6 | include_HEADERS = check.h 7 | 8 | EXTRA_DIST = check.h.in 9 | 10 | AM_CFLAGS = @GCOV_CFLAGS@ @PTHREAD_CFLAGS@ $(LIBSUBUNIT_CFLAGS) 11 | 12 | CFILES =\ 13 | check.c \ 14 | check_error.c \ 15 | check_list.c \ 16 | check_log.c \ 17 | check_msg.c \ 18 | check_pack.c \ 19 | check_print.c \ 20 | check_run.c \ 21 | check_str.c 22 | 23 | HFILES =\ 24 | check.h \ 25 | check_error.h \ 26 | check_impl.h \ 27 | check_list.h \ 28 | check_log.h \ 29 | check_msg.h \ 30 | check_pack.h \ 31 | check_print.h \ 32 | check_str.h 33 | 34 | 35 | EXPORT_SYM = exported.sym 36 | $(EXPORT_SYM): check.h.in 37 | ${SED} -n -e 's/^..*CK_EXPORT[[:space:]][[:space:]]*\([[:alnum:]_][[:alnum:]_]*\)..*$$/\1/p' @top_srcdir@/src/check.h.in > $@ 38 | 39 | libcheck_la_DEPENDENCIES= $(EXPORT_SYM) 40 | libcheck_la_LDFLAGS = -no-undefined -export-symbols $(EXPORT_SYM) 41 | libcheck_la_SOURCES = $(CFILES) $(HFILES) 42 | libcheck_la_LIBADD = @GCOV_LIBS@ @PTHREAD_LIBS@ $(LIBSUBUNIT_LIBS) $(top_builddir)/lib/libcompat.la 43 | 44 | libcheckinternal_la_LDFLAGS = -no-undefined 45 | libcheckinternal_la_SOURCES = $(CFILES) $(HFILES) 46 | libcheckinternal_la_LIBADD = @GCOV_LIBS@ @PTHREAD_LIBS@ $(LIBSUBUNIT_LIBS) $(top_builddir)/lib/libcompat.la 47 | 48 | CLEANFILES = *~ *.gcno $(EXPORT_SYM) 49 | 50 | LCOV_INPUT = $(CFILES:%.c=.libs/%.gcda) 51 | LCOV_OUTPUT = lcov.info 52 | LCOV_HTML = lcov_html 53 | LCOV_LCOV = @LCOV@ 54 | LCOV_GENHTML = @GENHTML@ 55 | 56 | lcov: $(LCOV_HTML) 57 | 58 | $(LCOV_INPUT): libcheck.la libcheckinternal.la 59 | @$(MAKE) -C $(top_builddir)/tests check 60 | 61 | $(LCOV_OUTPUT): $(LCOV_INPUT) 62 | $(LCOV_LCOV) --capture --directory . --base-directory . --output-file $@ 63 | 64 | $(LCOV_HTML): $(LCOV_OUTPUT) 65 | -$(RM) -r $@ 66 | LANG=C $(LCOV_GENHTML) --output-directory $@ --title "Check Code Coverage" --show-details $< 67 | @echo "Point a web browser at $(LCOV_HTML)/index.html to see results." 68 | 69 | clean-local: lcov-clean 70 | 71 | .PHONY: lcov-clean 72 | lcov-clean: 73 | -$(RM) -r $(LCOV_HTML) $(LCOV_OUTPUT) 74 | -------------------------------------------------------------------------------- /tests/check/check.pc.in: -------------------------------------------------------------------------------- 1 | prefix=@prefix@ 2 | exec_prefix=@exec_prefix@ 3 | libdir=@libdir@ 4 | includedir=@includedir@ 5 | 6 | Name: Check 7 | Description: A unit test framework for C 8 | URL: http://check.sourceforge.net 9 | Version: @VERSION@ 10 | Requires.private: @LIBSUBUNIT_PC@ 11 | Libs: -L${libdir} -lcheck 12 | Libs.private: @GCOV_LIBS@ @PTHREAD_LIBS@ @LIBS@ 13 | Cflags: -I${includedir} @PTHREAD_CFLAGS@ 14 | -------------------------------------------------------------------------------- /tests/check/check_error.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Check: a unit test framework for C 3 | * Copyright (C) 2001, 2002 Arien Malec 4 | * 5 | * This library is free software; you can redistribute it and/or 6 | * modify it under the terms of the GNU Lesser General Public 7 | * License as published by the Free Software Foundation; either 8 | * version 2.1 of the License, or (at your option) any later version. 9 | * 10 | * This library is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 | * Lesser General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU Lesser General Public 16 | * License along with this library; if not, write to the 17 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 18 | * Boston, MA 02111-1307, USA. 19 | */ 20 | 21 | #include "libcompat.h" 22 | 23 | #include 24 | #include 25 | #include 26 | #include 27 | #include 28 | #include 29 | 30 | #include "check_error.h" 31 | 32 | /** 33 | * Storage for setjmp/longjmp context information used in NOFORK mode 34 | */ 35 | jmp_buf error_jmp_buffer; 36 | 37 | /* FIXME: including a colon at the end is a bad way to indicate an error */ 38 | __attribute__((format(printf, 1, 4))) void 39 | eprintf(const char *fmt, const char *file, int line, ...) 40 | { 41 | va_list args; 42 | 43 | fflush(stderr); 44 | 45 | fprintf(stderr, "%s:%d: ", file, line); 46 | va_start(args, line); 47 | vfprintf(stderr, fmt, args); 48 | va_end(args); 49 | 50 | /*include system error information if format ends in colon */ 51 | if (fmt[0] != '\0' && fmt[strlen(fmt) - 1] == ':') 52 | fprintf(stderr, " %s", strerror(errno)); 53 | fprintf(stderr, "\n"); 54 | 55 | exit(2); 56 | } 57 | 58 | void *emalloc(size_t n) 59 | { 60 | void *p; 61 | 62 | p = malloc(n); 63 | if (p == NULL) 64 | eprintf("malloc of %zu bytes failed:", __FILE__, __LINE__ - 2, n); 65 | return p; 66 | } 67 | 68 | void *erealloc(void *ptr, size_t n) 69 | { 70 | void *p; 71 | 72 | p = realloc(ptr, n); 73 | if (p == NULL) 74 | eprintf("realloc of %zu bytes failed:", __FILE__, __LINE__ - 2, n); 75 | return p; 76 | } 77 | -------------------------------------------------------------------------------- /tests/check/check_error.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Check: a unit test framework for C 3 | * Copyright (C) 2001, 2002 Arien Malec 4 | * 5 | * This library is free software; you can redistribute it and/or 6 | * modify it under the terms of the GNU Lesser General Public 7 | * License as published by the Free Software Foundation; either 8 | * version 2.1 of the License, or (at your option) any later version. 9 | * 10 | * This library is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 | * Lesser General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU Lesser General Public 16 | * License along with this library; if not, write to the 17 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 18 | * Boston, MA 02111-1307, USA. 19 | */ 20 | 21 | #ifndef ERROR_H 22 | #define ERROR_H 23 | 24 | #include "libcompat.h" 25 | #include 26 | 27 | extern jmp_buf error_jmp_buffer; 28 | 29 | /* Include stdlib.h beforehand */ 30 | 31 | /* Print error message and die 32 | If fmt ends in colon, include system error information */ 33 | void eprintf(const char *fmt, const char *file, int line, 34 | ...) CK_ATTRIBUTE_NORETURN; 35 | /* malloc or die */ 36 | void *emalloc(size_t n); 37 | void *erealloc(void *, size_t n); 38 | 39 | #endif /*ERROR_H */ 40 | -------------------------------------------------------------------------------- /tests/check/check_impl.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Check: a unit test framework for C 3 | * Copyright (C) 2001,2002 Arien Malec 4 | * 5 | * This library is free software; you can redistribute it and/or 6 | * modify it under the terms of the GNU Lesser General Public 7 | * License as published by the Free Software Foundation; either 8 | * version 2.1 of the License, or (at your option) any later version. 9 | * 10 | * This library is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 | * Lesser General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU Lesser General Public 16 | * License along with this library; if not, write to the 17 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 18 | * Boston, MA 02111-1307, USA. 19 | */ 20 | 21 | #ifndef CHECK_IMPL_H 22 | #define CHECK_IMPL_H 23 | 24 | #include "check_list.h" 25 | 26 | /* This header should be included by any module that needs 27 | to know the implementation details of the check structures 28 | Include stdio.h, time.h, & list.h before this header 29 | */ 30 | 31 | #define US_PER_SEC 1000000 32 | #define NANOS_PER_SECONDS 1000000000 33 | 34 | /** calculate the difference in useconds out of two "struct timespec"s */ 35 | #define DIFF_IN_USEC(begin, end) \ 36 | ((((end).tv_sec - (begin).tv_sec) * US_PER_SEC) + ((end).tv_nsec / 1000) - \ 37 | ((begin).tv_nsec / 1000)) 38 | 39 | typedef struct TF { 40 | TFun fn; 41 | int loop_start; 42 | int loop_end; 43 | const char *name; 44 | int signal; 45 | signed char allowed_exit_value; 46 | } TF; 47 | 48 | struct Suite { 49 | const char *name; 50 | List *tclst; /* List of test cases */ 51 | }; 52 | 53 | typedef struct Fixture { 54 | int ischecked; 55 | SFun fun; 56 | } Fixture; 57 | 58 | struct TCase { 59 | const char *name; 60 | struct timespec timeout; 61 | List *tflst; /* list of test functions */ 62 | List *unch_sflst; 63 | List *unch_tflst; 64 | List *ch_sflst; 65 | List *ch_tflst; 66 | }; 67 | 68 | typedef struct TestStats { 69 | int n_checked; 70 | int n_failed; 71 | int n_errors; 72 | } TestStats; 73 | 74 | struct TestResult { 75 | enum test_result rtype; /* Type of result */ 76 | enum ck_result_ctx ctx; /* When the result occurred */ 77 | char *file; /* File where the test occured */ 78 | int line; /* Line number where the test occurred */ 79 | int iter; /* The iteration value for looping tests */ 80 | int duration; /* duration of this test in microseconds */ 81 | const char *tcname; /* Test case that generated the result */ 82 | const char *tname; /* Test that generated the result */ 83 | char *msg; /* Failure message */ 84 | }; 85 | 86 | TestResult *tr_create(void); 87 | void tr_reset(TestResult *tr); 88 | void tr_free(TestResult *tr); 89 | 90 | enum cl_event { 91 | CLINITLOG_SR, /* Initialize log file */ 92 | CLENDLOG_SR, /* Tests are complete */ 93 | CLSTART_SR, /* Suite runner start */ 94 | CLSTART_S, /* Suite start */ 95 | CLEND_SR, /* Suite runner end */ 96 | CLEND_S, /* Suite end */ 97 | CLSTART_T, /* A test case is about to run */ 98 | CLEND_T /* Test case end */ 99 | }; 100 | 101 | typedef void (*LFun)(SRunner *, FILE *, enum print_output, void *, 102 | enum cl_event); 103 | 104 | typedef struct Log { 105 | FILE *lfile; 106 | LFun lfun; 107 | int close; 108 | enum print_output mode; 109 | } Log; 110 | 111 | struct SRunner { 112 | List *slst; /* List of Suite objects */ 113 | TestStats *stats; /* Run statistics */ 114 | List *resultlst; /* List of unit test results */ 115 | const char *log_fname; /* name of log file */ 116 | const char *xml_fname; /* name of xml output file */ 117 | const char *tap_fname; /* name of tap output file */ 118 | List *loglst; /* list of Log objects */ 119 | enum fork_status fstat; /* controls if suites are forked or not 120 | NOTE: Don't use this value directly, 121 | instead use srunner_fork_status */ 122 | }; 123 | 124 | void set_fork_status(enum fork_status fstat); 125 | enum fork_status cur_fork_status(void); 126 | 127 | clockid_t check_get_clockid(void); 128 | 129 | #endif /* CHECK_IMPL_H */ 130 | -------------------------------------------------------------------------------- /tests/check/check_list.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Check: a unit test framework for C 3 | * Copyright (C) 2001, 2002 Arien Malec 4 | * 5 | * This library is free software; you can redistribute it and/or 6 | * modify it under the terms of the GNU Lesser General Public 7 | * License as published by the Free Software Foundation; either 8 | * version 2.1 of the License, or (at your option) any later version. 9 | * 10 | * This library is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 | * Lesser General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU Lesser General Public 16 | * License along with this library; if not, write to the 17 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 18 | * Boston, MA 02111-1307, USA. 19 | */ 20 | 21 | #include "libcompat.h" 22 | 23 | #include 24 | #include 25 | 26 | #include "check_error.h" 27 | #include "check_list.h" 28 | 29 | enum { LINIT = 1, LGROW = 2 }; 30 | 31 | struct List { 32 | unsigned int n_elts; 33 | unsigned int max_elts; 34 | int current; /* pointer to the current node */ 35 | int last; /* pointer to the node before END */ 36 | void **data; 37 | }; 38 | 39 | static void maybe_grow(List *lp) 40 | { 41 | if (lp->n_elts >= lp->max_elts) { 42 | lp->max_elts *= LGROW; 43 | lp->data = (void **)erealloc(lp->data, lp->max_elts * sizeof(lp->data[0])); 44 | } 45 | } 46 | 47 | List *check_list_create(void) 48 | { 49 | List *lp; 50 | 51 | lp = (List *)emalloc(sizeof(List)); 52 | lp->n_elts = 0; 53 | lp->max_elts = LINIT; 54 | lp->data = (void **)emalloc(sizeof(lp->data[0]) * LINIT); 55 | lp->current = lp->last = -1; 56 | return lp; 57 | } 58 | 59 | void check_list_add_front(List *lp, void *val) 60 | { 61 | if (lp == NULL) 62 | return; 63 | maybe_grow(lp); 64 | memmove(lp->data + 1, lp->data, lp->n_elts * sizeof lp->data[0]); 65 | lp->last++; 66 | lp->n_elts++; 67 | lp->current = 0; 68 | lp->data[lp->current] = val; 69 | } 70 | 71 | void check_list_add_end(List *lp, void *val) 72 | { 73 | if (lp == NULL) 74 | return; 75 | maybe_grow(lp); 76 | lp->last++; 77 | lp->n_elts++; 78 | lp->current = lp->last; 79 | lp->data[lp->current] = val; 80 | } 81 | 82 | int check_list_at_end(List *lp) 83 | { 84 | if (lp->current == -1) 85 | return 1; 86 | else 87 | return (lp->current > lp->last); 88 | } 89 | 90 | void check_list_front(List *lp) 91 | { 92 | if (lp->current == -1) 93 | return; 94 | lp->current = 0; 95 | } 96 | 97 | void check_list_free(List *lp) 98 | { 99 | if (lp == NULL) 100 | return; 101 | 102 | free(lp->data); 103 | free(lp); 104 | } 105 | 106 | void *check_list_val(List *lp) 107 | { 108 | if (lp == NULL) 109 | return NULL; 110 | if (lp->current == -1 || lp->current > lp->last) 111 | return NULL; 112 | 113 | return lp->data[lp->current]; 114 | } 115 | 116 | void check_list_advance(List *lp) 117 | { 118 | if (lp == NULL) 119 | return; 120 | if (check_list_at_end(lp)) 121 | return; 122 | lp->current++; 123 | } 124 | 125 | void check_list_apply(List *lp, void (*fp)(void *)) 126 | { 127 | if (lp == NULL || fp == NULL) 128 | return; 129 | 130 | for (check_list_front(lp); !check_list_at_end(lp); check_list_advance(lp)) 131 | fp(check_list_val(lp)); 132 | } 133 | -------------------------------------------------------------------------------- /tests/check/check_list.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Check: a unit test framework for C 3 | * Copyright (C) 2001, 2002 Arien Malec 4 | * 5 | * This library is free software; you can redistribute it and/or 6 | * modify it under the terms of the GNU Lesser General Public 7 | * License as published by the Free Software Foundation; either 8 | * version 2.1 of the License, or (at your option) any later version. 9 | * 10 | * This library is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 | * Lesser General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU Lesser General Public 16 | * License along with this library; if not, write to the 17 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 18 | * Boston, MA 02111-1307, USA. 19 | */ 20 | 21 | #ifndef CHECK_LIST_H 22 | #define CHECK_LIST_H 23 | 24 | typedef struct List List; 25 | 26 | /* Create an empty list */ 27 | List *check_list_create(void); 28 | 29 | /* Is list at end? */ 30 | int check_list_at_end(List * lp); 31 | 32 | /* Position list at front */ 33 | void check_list_front(List * lp); 34 | 35 | /* Add a value to the front of the list, 36 | positioning newly added value as current value. 37 | More expensive than list_add_end, as it uses memmove. */ 38 | void check_list_add_front(List * lp, void *val); 39 | 40 | /* Add a value to the end of the list, 41 | positioning newly added value as current value */ 42 | void check_list_add_end(List * lp, void *val); 43 | 44 | /* Give the value of the current node */ 45 | void *check_list_val(List * lp); 46 | 47 | /* Position the list at the next node */ 48 | void check_list_advance(List * lp); 49 | 50 | /* Free a list, but don't free values */ 51 | void check_list_free(List * lp); 52 | 53 | void check_list_apply(List * lp, void (*fp) (void *)); 54 | 55 | 56 | #endif /* CHECK_LIST_H */ 57 | -------------------------------------------------------------------------------- /tests/check/check_log.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Check: a unit test framework for C 3 | * Copyright (C) 2001,2002 Arien Malec 4 | * 5 | * This library is free software; you can redistribute it and/or 6 | * modify it under the terms of the GNU Lesser General Public 7 | * License as published by the Free Software Foundation; either 8 | * version 2.1 of the License, or (at your option) any later version. 9 | * 10 | * This library is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 | * Lesser General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU Lesser General Public 16 | * License along with this library; if not, write to the 17 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 18 | * Boston, MA 02111-1307, USA. 19 | */ 20 | 21 | #ifndef CHECK_LOG_H 22 | #define CHECK_LOG_H 23 | 24 | void log_srunner_start(SRunner * sr); 25 | void log_srunner_end(SRunner * sr); 26 | void log_suite_start(SRunner * sr, Suite * s); 27 | void log_suite_end(SRunner * sr, Suite * s); 28 | void log_test_end(SRunner * sr, TestResult * tr); 29 | void log_test_start(SRunner * sr, TCase * tc, TF * tfun); 30 | 31 | void stdout_lfun(SRunner * sr, FILE * file, enum print_output, 32 | void *obj, enum cl_event evt); 33 | 34 | void lfile_lfun(SRunner * sr, FILE * file, enum print_output, 35 | void *obj, enum cl_event evt); 36 | 37 | void xml_lfun(SRunner * sr, FILE * file, enum print_output, 38 | void *obj, enum cl_event evt); 39 | 40 | void tap_lfun(SRunner * sr, FILE * file, enum print_output, 41 | void *obj, enum cl_event evt); 42 | 43 | void subunit_lfun(SRunner * sr, FILE * file, enum print_output, 44 | void *obj, enum cl_event evt); 45 | 46 | void srunner_register_lfun(SRunner * sr, FILE * lfile, int close, 47 | LFun lfun, enum print_output); 48 | 49 | FILE *srunner_open_lfile(SRunner * sr); 50 | FILE *srunner_open_xmlfile(SRunner * sr); 51 | FILE *srunner_open_tapfile(SRunner * sr); 52 | void srunner_init_logging(SRunner * sr, enum print_output print_mode); 53 | void srunner_end_logging(SRunner * sr); 54 | 55 | #endif /* CHECK_LOG_H */ 56 | -------------------------------------------------------------------------------- /tests/check/check_msg.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Check: a unit test framework for C 3 | * Copyright (C) 2001, 2002 Arien Malec 4 | * 5 | * This library is free software; you can redistribute it and/or 6 | * modify it under the terms of the GNU Lesser General Public 7 | * License as published by the Free Software Foundation; either 8 | * version 2.1 of the License, or (at your option) any later version. 9 | * 10 | * This library is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 | * Lesser General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU Lesser General Public 16 | * License along with this library; if not, write to the 17 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 18 | * Boston, MA 02111-1307, USA. 19 | */ 20 | 21 | #ifndef CHECK_MSG_NEW_H 22 | #define CHECK_MSG_NEW_H 23 | 24 | 25 | /* Functions implementing messaging during test runs */ 26 | 27 | void send_failure_info(const char *msg); 28 | void send_loc_info(const char *file, int line); 29 | void send_ctx_info(enum ck_result_ctx ctx); 30 | void send_duration_info(int duration); 31 | 32 | TestResult *receive_test_result(int waserror); 33 | 34 | void setup_messaging(void); 35 | void teardown_messaging(void); 36 | 37 | FILE *open_tmp_file(char **name); 38 | 39 | #endif /*CHECK_MSG_NEW_H */ 40 | -------------------------------------------------------------------------------- /tests/check/check_pack.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Check: a unit test framework for C 3 | * Copyright (C) 2001, 2002 Arien Malec 4 | * 5 | * This library is free software; you can redistribute it and/or 6 | * modify it under the terms of the GNU Lesser General Public 7 | * License as published by the Free Software Foundation; either 8 | * version 2.1 of the License, or (at your option) any later version. 9 | * 10 | * This library is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 | * Lesser General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU Lesser General Public 16 | * License along with this library; if not, write to the 17 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 18 | * Boston, MA 02111-1307, USA. 19 | */ 20 | 21 | #ifndef CHECK_PACK_H 22 | #define CHECK_PACK_H 23 | 24 | 25 | enum ck_msg_type 26 | { 27 | CK_MSG_CTX, 28 | CK_MSG_FAIL, 29 | CK_MSG_LOC, 30 | CK_MSG_DURATION, 31 | CK_MSG_LAST 32 | }; 33 | 34 | typedef struct CtxMsg 35 | { 36 | enum ck_result_ctx ctx; 37 | } CtxMsg; 38 | 39 | typedef struct LocMsg 40 | { 41 | int line; 42 | char *file; 43 | } LocMsg; 44 | 45 | typedef struct FailMsg 46 | { 47 | char *msg; 48 | } FailMsg; 49 | 50 | typedef struct DurationMsg 51 | { 52 | int duration; 53 | } DurationMsg; 54 | 55 | typedef union 56 | { 57 | CtxMsg ctx_msg; 58 | FailMsg fail_msg; 59 | LocMsg loc_msg; 60 | DurationMsg duration_msg; 61 | } CheckMsg; 62 | 63 | typedef struct RcvMsg 64 | { 65 | enum ck_result_ctx lastctx; 66 | enum ck_result_ctx failctx; 67 | char *fixture_file; 68 | int fixture_line; 69 | char *test_file; 70 | int test_line; 71 | char *msg; 72 | int duration; 73 | } RcvMsg; 74 | 75 | void rcvmsg_free(RcvMsg * rmsg); 76 | 77 | 78 | int pack(enum ck_msg_type type, char **buf, CheckMsg * msg); 79 | int upack(char *buf, CheckMsg * msg, enum ck_msg_type *type); 80 | 81 | void ppack(FILE * fdes, enum ck_msg_type type, CheckMsg * msg); 82 | RcvMsg *punpack(FILE * fdes); 83 | 84 | #endif /*CHECK_PACK_H */ 85 | -------------------------------------------------------------------------------- /tests/check/check_print.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Check: a unit test framework for C 3 | * Copyright (C) 2001, 2002 Arien Malec 4 | * 5 | * This library is free software; you can redistribute it and/or 6 | * modify it under the terms of the GNU Lesser General Public 7 | * License as published by the Free Software Foundation; either 8 | * version 2.1 of the License, or (at your option) any later version. 9 | * 10 | * This library is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 | * Lesser General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU Lesser General Public 16 | * License along with this library; if not, write to the 17 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 18 | * Boston, MA 02111-1307, USA. 19 | */ 20 | 21 | #include "libcompat.h" 22 | 23 | #include 24 | #include 25 | #include 26 | 27 | #include "check.h" 28 | #include "check_impl.h" 29 | #include "check_list.h" 30 | #include "check_print.h" 31 | #include "check_str.h" 32 | 33 | static void srunner_fprint_summary(FILE *file, SRunner *sr, 34 | enum print_output print_mode); 35 | static void srunner_fprint_results(FILE *file, SRunner *sr, 36 | enum print_output print_mode); 37 | 38 | void srunner_print(SRunner *sr, enum print_output print_mode) 39 | { 40 | srunner_fprint(stdout, sr, print_mode); 41 | } 42 | 43 | void srunner_fprint(FILE *file, SRunner *sr, enum print_output print_mode) 44 | { 45 | if (print_mode == CK_ENV) { 46 | print_mode = get_env_printmode(); 47 | } 48 | 49 | srunner_fprint_summary(file, sr, print_mode); 50 | srunner_fprint_results(file, sr, print_mode); 51 | } 52 | 53 | static void srunner_fprint_summary(FILE *file, SRunner *sr, 54 | enum print_output print_mode) 55 | { 56 | #if 0 57 | if (print_mode == CK_SUBUNIT) 58 | return; 59 | #endif 60 | 61 | if (print_mode >= CK_MINIMAL) { 62 | char *str; 63 | 64 | str = sr_stat_str(sr); 65 | fprintf(file, "%s\n", str); 66 | free(str); 67 | } 68 | return; 69 | } 70 | 71 | static void srunner_fprint_results(FILE *file, SRunner *sr, 72 | enum print_output print_mode) 73 | { 74 | List *resultlst; 75 | 76 | #if 0 77 | if (print_mode == CK_SUBUNIT) 78 | return; 79 | #endif 80 | 81 | resultlst = sr->resultlst; 82 | 83 | for (check_list_front(resultlst); !check_list_at_end(resultlst); 84 | check_list_advance(resultlst)) { 85 | TestResult *tr = (TestResult *)check_list_val(resultlst); 86 | 87 | tr_fprint(file, tr, print_mode); 88 | } 89 | return; 90 | } 91 | 92 | void fprint_xml_esc(FILE *file, const char *str) 93 | { 94 | for (; *str != '\0'; str++) { 95 | 96 | switch (*str) { 97 | 98 | /* handle special characters that must be escaped */ 99 | case '"': 100 | fputs(""", file); 101 | break; 102 | case '\'': 103 | fputs("'", file); 104 | break; 105 | case '<': 106 | fputs("<", file); 107 | break; 108 | case '>': 109 | fputs(">", file); 110 | break; 111 | case '&': 112 | fputs("&", file); 113 | break; 114 | 115 | /* regular characters, print as is */ 116 | default: 117 | fputc(*str, file); 118 | break; 119 | } 120 | } 121 | } 122 | 123 | void tr_fprint(FILE *file, TestResult *tr, enum print_output print_mode) 124 | { 125 | if (print_mode == CK_ENV) { 126 | print_mode = get_env_printmode(); 127 | } 128 | 129 | if ((print_mode >= CK_VERBOSE && tr->rtype == CK_PASS) || 130 | (tr->rtype != CK_PASS && print_mode >= CK_NORMAL)) { 131 | char *trstr = tr_str(tr); 132 | 133 | fprintf(file, "%s\n", trstr); 134 | free(trstr); 135 | } 136 | } 137 | 138 | void tr_xmlprint(FILE *file, TestResult *tr, 139 | enum print_output print_mode CK_ATTRIBUTE_UNUSED) 140 | { 141 | char result[10]; 142 | char *path_name = NULL; 143 | char *file_name = NULL; 144 | char *slash = NULL; 145 | 146 | switch (tr->rtype) { 147 | case CK_PASS: 148 | snprintf(result, sizeof(result), "%s", "success"); 149 | break; 150 | case CK_FAILURE: 151 | snprintf(result, sizeof(result), "%s", "failure"); 152 | break; 153 | case CK_ERROR: 154 | snprintf(result, sizeof(result), "%s", "error"); 155 | break; 156 | case CK_TEST_RESULT_INVALID: 157 | default: 158 | abort(); 159 | break; 160 | } 161 | 162 | if (tr->file) { 163 | slash = strrchr(tr->file, '/'); 164 | if (slash == NULL) { 165 | slash = strrchr(tr->file, '\\'); 166 | } 167 | 168 | if (slash == NULL) { 169 | path_name = strdup("."); 170 | file_name = tr->file; 171 | } else { 172 | path_name = strdup(tr->file); 173 | path_name[slash - tr->file] = 0; /* Terminate the temporary string. */ 174 | file_name = slash + 1; 175 | } 176 | } 177 | 178 | fprintf(file, " \n", result); 179 | fprintf(file, " %s\n", 180 | (path_name == NULL ? "" : path_name)); 181 | fprintf(file, " %s:%d\n", (file_name == NULL ? "" : file_name), 182 | tr->line); 183 | fprintf(file, " %s\n", tr->tname); 184 | fprintf(file, " %d\n", tr->iter); 185 | fprintf(file, " %d.%06d\n", 186 | tr->duration < 0 ? -1 : tr->duration / US_PER_SEC, 187 | tr->duration < 0 ? 0 : tr->duration % US_PER_SEC); 188 | fprintf(file, " "); 189 | fprint_xml_esc(file, tr->tcname); 190 | fprintf(file, "\n"); 191 | fprintf(file, " "); 192 | fprint_xml_esc(file, tr->msg); 193 | fprintf(file, "\n"); 194 | fprintf(file, " \n"); 195 | 196 | free(path_name); 197 | } 198 | 199 | enum print_output get_env_printmode(void) 200 | { 201 | char *env = getenv("CK_VERBOSITY"); 202 | 203 | if (env == NULL) 204 | return CK_NORMAL; 205 | if (strcmp(env, "silent") == 0) 206 | return CK_SILENT; 207 | if (strcmp(env, "minimal") == 0) 208 | return CK_MINIMAL; 209 | if (strcmp(env, "verbose") == 0) 210 | return CK_VERBOSE; 211 | return CK_NORMAL; 212 | } 213 | -------------------------------------------------------------------------------- /tests/check/check_print.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Check: a unit test framework for C 3 | * Copyright (C) 2001, 2002 Arien Malec 4 | * 5 | * This library is free software; you can redistribute it and/or 6 | * modify it under the terms of the GNU Lesser General Public 7 | * License as published by the Free Software Foundation; either 8 | * version 2.1 of the License, or (at your option) any later version. 9 | * 10 | * This library is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 | * Lesser General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU Lesser General Public 16 | * License along with this library; if not, write to the 17 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 18 | * Boston, MA 02111-1307, USA. 19 | */ 20 | 21 | #ifndef CHECK_PRINT_H 22 | #define CHECK_PRINT_H 23 | 24 | /* escape XML special characters (" ' < > &) in str and print to file */ 25 | void fprint_xml_esc(FILE * file, const char *str); 26 | void tr_fprint(FILE * file, TestResult * tr, enum print_output print_mode); 27 | void tr_xmlprint(FILE * file, TestResult * tr, enum print_output print_mode); 28 | void srunner_fprint(FILE * file, SRunner * sr, enum print_output print_mode); 29 | enum print_output get_env_printmode(void); 30 | 31 | 32 | #endif /* CHECK_PRINT_H */ 33 | -------------------------------------------------------------------------------- /tests/check/check_str.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Check: a unit test framework for C 3 | * Copyright (C) 2001, 2002 Arien Malec 4 | * 5 | * This library is free software; you can redistribute it and/or 6 | * modify it under the terms of the GNU Lesser General Public 7 | * License as published by the Free Software Foundation; either 8 | * version 2.1 of the License, or (at your option) any later version. 9 | * 10 | * This library is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 | * Lesser General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU Lesser General Public 16 | * License along with this library; if not, write to the 17 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 18 | * Boston, MA 02111-1307, USA. 19 | */ 20 | 21 | #include "libcompat.h" 22 | 23 | #include 24 | #include 25 | 26 | #include "check.h" 27 | #include "check_error.h" 28 | #include "check_impl.h" 29 | #include "check_list.h" 30 | #include "check_str.h" 31 | 32 | static const char *tr_type_str(TestResult *tr); 33 | static int percent_passed(TestStats *t); 34 | 35 | char *tr_str(TestResult *tr) 36 | { 37 | const char *exact_msg; 38 | char *rstr; 39 | 40 | exact_msg = (tr->rtype == CK_ERROR) ? "(after this point) " : ""; 41 | 42 | rstr = ck_strdup_printf("%s:%d:%s:%s:%s:%d: %s%s", tr->file, tr->line, 43 | tr_type_str(tr), tr->tcname, tr->tname, tr->iter, 44 | exact_msg, tr->msg); 45 | 46 | return rstr; 47 | } 48 | 49 | char *tr_short_str(TestResult *tr) 50 | { 51 | const char *exact_msg; 52 | char *rstr; 53 | 54 | exact_msg = (tr->rtype == CK_ERROR) ? "(after this point) " : ""; 55 | 56 | rstr = 57 | ck_strdup_printf("%s:%d: %s%s", tr->file, tr->line, exact_msg, tr->msg); 58 | 59 | return rstr; 60 | } 61 | 62 | char *sr_stat_str(SRunner *sr) 63 | { 64 | char *str; 65 | TestStats *ts; 66 | 67 | ts = sr->stats; 68 | 69 | str = ck_strdup_printf("%d%%: Checks: %d, Failures: %d, Errors: %d", 70 | percent_passed(ts), ts->n_checked, ts->n_failed, 71 | ts->n_errors); 72 | 73 | return str; 74 | } 75 | 76 | __attribute__((format(printf, 1, 2))) char *ck_strdup_printf(const char *fmt, 77 | ...) 78 | { 79 | /* Guess we need no more than 100 bytes. */ 80 | int n; 81 | size_t size = 100; 82 | char *p; 83 | va_list ap; 84 | 85 | p = (char *)emalloc(size); 86 | 87 | while (1) { 88 | /* Try to print in the allocated space. */ 89 | va_start(ap, fmt); 90 | n = vsnprintf(p, size, fmt, ap); 91 | va_end(ap); 92 | /* If that worked, return the string. */ 93 | if (n > -1 && n < (int)size) 94 | return p; 95 | 96 | /* Else try again with more space. */ 97 | if (n > -1) /* C99 conform vsnprintf() */ 98 | size = (size_t)n + 1; /* precisely what is needed */ 99 | else /* glibc 2.0 */ 100 | size *= 2; /* twice the old size */ 101 | 102 | p = (char *)erealloc(p, size); 103 | } 104 | } 105 | 106 | static const char *tr_type_str(TestResult *tr) 107 | { 108 | const char *str = NULL; 109 | 110 | if (tr->ctx == CK_CTX_TEST) { 111 | if (tr->rtype == CK_PASS) 112 | str = "P"; 113 | else if (tr->rtype == CK_FAILURE) 114 | str = "F"; 115 | else if (tr->rtype == CK_ERROR) 116 | str = "E"; 117 | } else 118 | str = "S"; 119 | 120 | return str; 121 | } 122 | 123 | static int percent_passed(TestStats *t) 124 | { 125 | if (t->n_failed == 0 && t->n_errors == 0) 126 | return 100; 127 | else if (t->n_checked == 0) 128 | return 0; 129 | else 130 | return (int)((float)(t->n_checked - (t->n_failed + t->n_errors)) / 131 | (float)t->n_checked * 100); 132 | } 133 | -------------------------------------------------------------------------------- /tests/check/check_str.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Check: a unit test framework for C 3 | * Copyright (C) 2001, 2002 Arien Malec 4 | * 5 | * This library is free software; you can redistribute it and/or 6 | * modify it under the terms of the GNU Lesser General Public 7 | * License as published by the Free Software Foundation; either 8 | * version 2.1 of the License, or (at your option) any later version. 9 | * 10 | * This library is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 | * Lesser General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU Lesser General Public 16 | * License along with this library; if not, write to the 17 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 18 | * Boston, MA 02111-1307, USA. 19 | */ 20 | 21 | #ifndef CHECK_STR_H 22 | #define CHECK_STR_H 23 | 24 | /* Return a string representation of the given TestResult. Return 25 | value has been malloc'd, and must be freed by the caller */ 26 | char *tr_str(TestResult * tr); 27 | 28 | /* Return a string representation of the given TestResult message 29 | without the test id or result type. This is suitable for separate 30 | formatting of the test and the message. Return value has been 31 | malloc'd, and must be freed by the caller */ 32 | char *tr_short_str(TestResult * tr); 33 | 34 | /* Return a string representation of the given SRunner's run 35 | statistics (% passed, num run, passed, errors, failures). Return 36 | value has been malloc'd, and must be freed by the caller 37 | */ 38 | char *sr_stat_str(SRunner * sr); 39 | 40 | char *ck_strdup_printf(const char *fmt, ...); 41 | 42 | #endif /* CHECK_STR_H */ 43 | -------------------------------------------------------------------------------- /tests/check/libcompat.h: -------------------------------------------------------------------------------- 1 | #ifndef LIBCOMPAT_H 2 | #define LIBCOMPAT_H 3 | 4 | #if defined(__GNUC__) && defined(__GNUC_MINOR__) 5 | #define GCC_VERSION_AT_LEAST(major, minor) \ 6 | ((__GNUC__ > (major)) || (__GNUC__ == (major) && __GNUC_MINOR__ >= (minor))) 7 | #else 8 | #define GCC_VERSION_AT_LEAST(major, minor) 0 9 | #endif 10 | 11 | #if GCC_VERSION_AT_LEAST(2, 95) 12 | #define CK_ATTRIBUTE_UNUSED __attribute__((unused)) 13 | #else 14 | #define CK_ATTRIBUTE_UNUSED 15 | #endif /* GCC 2.95 */ 16 | 17 | #if GCC_VERSION_AT_LEAST(2, 5) 18 | #define CK_ATTRIBUTE_NORETURN __attribute__((noreturn)) 19 | #else 20 | #define CK_ATTRIBUTE_NORETURN 21 | #endif /* GCC 2.5 */ 22 | 23 | /* 24 | * Used for MSVC to create the export attribute 25 | * CK_DLL_EXP is defined during the compilation of the library 26 | * on the command line. 27 | */ 28 | #ifndef CK_DLL_EXP 29 | #define CK_DLL_EXP 30 | #endif 31 | 32 | /* defines size_t */ 33 | #include 34 | 35 | /* provides assert */ 36 | #include 37 | 38 | /* defines FILE */ 39 | #include 40 | 41 | /* defines exit() */ 42 | #include 43 | 44 | /* provides localtime and struct tm */ 45 | #include 46 | #include 47 | 48 | /* declares fork(), _POSIX_VERSION. according to Autoconf.info, 49 | unistd.h defines _POSIX_VERSION if the system is POSIX-compliant, 50 | so we will use this as a test for all things uniquely provided by 51 | POSIX like sigaction() and fork() */ 52 | #ifdef HAVE_UNISTD_H 53 | #include 54 | #endif 55 | 56 | #ifdef HAVE_SYS_WAIT_H 57 | #include 58 | #endif 59 | 60 | /* declares pthread_create and friends */ 61 | #ifdef HAVE_PTHREAD 62 | #include 63 | #endif 64 | 65 | #ifdef HAVE_STDINT_H 66 | #include 67 | #endif 68 | 69 | /* 70 | * On systems where clock_gettime() is not available, or 71 | * on systems where some clocks may not be supported, the 72 | * definition for CLOCK_MONOTONIC and CLOCK_REALTIME may not 73 | * be available. These should define which type of clock 74 | * clock_gettime() should use. We define it here if it is 75 | * not defined simply so the reimplementation can ignore it. 76 | * 77 | * We set the values of these clocks to some (hopefully) 78 | * invalid value, to avoid the case where we define a 79 | * clock with a valid value, and unintentionally use 80 | * an actual good clock by accident. 81 | */ 82 | #ifndef CLOCK_MONOTONIC 83 | #define CLOCK_MONOTONIC -1 84 | #endif 85 | #ifndef CLOCK_REALTIME 86 | #define CLOCK_REALTIME -1 87 | #endif 88 | 89 | #ifndef HAVE_LIBRT 90 | 91 | #ifdef STRUCT_TIMESPEC_DEFINITION_MISSING 92 | /* 93 | * The following structure is defined in POSIX 1003.1 for times 94 | * specified in seconds and nanoseconds. If it is not defined in 95 | * time.g, then we need to define it here 96 | */ 97 | struct timespec { 98 | time_t tv_sec; 99 | long tv_nsec; 100 | }; 101 | #endif /* STRUCT_TIMESPEC_DEFINITION_MISSING */ 102 | 103 | #ifdef STRUCT_ITIMERSPEC_DEFINITION_MISSING 104 | /* 105 | * The following structure is defined in POSIX.1b for timer start values and 106 | * intervals. 107 | * If it is not defined in time.h, then we need to define it here. 108 | */ 109 | struct itimerspec { 110 | struct timespec it_interval; 111 | struct timespec it_value; 112 | }; 113 | #endif /* STRUCT_ITIMERSPEC_DEFINITION_MISSING */ 114 | 115 | /* 116 | * Do a simple forward declaration in case the struct is not defined. 117 | * In the versions of timer_create in libcompat, sigevent is never 118 | * used. 119 | */ 120 | struct sigevent; 121 | 122 | CK_DLL_EXP int clock_gettime(clockid_t clk_id, struct timespec *ts); 123 | CK_DLL_EXP int timer_create(clockid_t clockid, struct sigevent *sevp, 124 | timer_t *timerid); 125 | CK_DLL_EXP int timer_settime(timer_t timerid, int flags, 126 | const struct itimerspec *new_value, 127 | struct itimerspec *old_value); 128 | CK_DLL_EXP int timer_delete(timer_t timerid); 129 | #endif /* HAVE_LIBRT */ 130 | 131 | /* silence warnings about an empty library */ 132 | CK_DLL_EXP void ck_do_nothing(void) CK_ATTRIBUTE_NORETURN; 133 | 134 | #endif /* !LIBCOMPAT_H */ 135 | -------------------------------------------------------------------------------- /tests/examples/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_c_test(example-bitmap) 2 | add_c_test(example-bitmap-rle) 3 | add_c_test(example-bloomfilter) 4 | add_c_test(example-bloomfilter-a2) 5 | add_c_test(example-hyperloglog) 6 | add_c_test(example-minhash) 7 | 8 | add_c_example(bf-uniq) 9 | add_c_example(hll-wc) 10 | 11 | find_package(PythonInterp) 12 | 13 | if (PYTHON_EXECUTABLE) 14 | file(GLOB_RECURSE TESTS "${CMAKE_CURRENT_SOURCE_DIR}/*.t") 15 | foreach(TEST ${TESTS}) 16 | get_filename_component(TEST_NAME "${TEST}" NAME_WE) 17 | add_test( 18 | ${TEST_NAME} 19 | ${CMAKE_COMMAND} -E chdir ${CMAKE_BINARY_DIR} 20 | ${CMAKE_BINARY_DIR}/../tools/cram/ccram 21 | --python ${PYTHON_EXECUTABLE} 22 | --root ${CMAKE_CURRENT_SOURCE_DIR} 23 | --build ${CMAKE_BINARY_DIR}/tests/examples 24 | --tests ${TEST} 25 | ) 26 | endforeach(TEST) 27 | else (PYTHON_EXECUTABLE) 28 | message(WARNING "Unable to find Python; skipping cram tests.") 29 | endif (PYTHON_EXECUTABLE) 30 | -------------------------------------------------------------------------------- /tests/examples/bf-uniq.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | #include "../../src/twiddle/macrology.h" 8 | #include 9 | 10 | static struct option long_options[] = { 11 | {"probability", required_argument, 0, 'p'}, 12 | {"count", required_argument, 0, 'n'}, 13 | {"duration", required_argument, 0, 'd'}, 14 | {0, 0, 0, 0}}; 15 | 16 | static int parse_probability(float *p) 17 | { 18 | const float parsed_p = strtof(optarg, NULL); 19 | if (!(0 < parsed_p && parsed_p <= 1)) { 20 | return false; 21 | } 22 | 23 | *p = parsed_p; 24 | 25 | return true; 26 | } 27 | 28 | static bool parse_count(int64_t *n) 29 | { 30 | const int64_t parsed_n = strtoll(optarg, NULL, 10); 31 | if (parsed_n < 0) { 32 | return false; 33 | } 34 | 35 | *n = parsed_n; 36 | 37 | return true; 38 | } 39 | 40 | static bool apply_time_suffix(float *x, char suffix_char) 41 | { 42 | int multiplier; 43 | 44 | switch (suffix_char) { 45 | case 0: 46 | case 's': 47 | multiplier = 1; 48 | break; 49 | case 'm': 50 | multiplier = 60; 51 | break; 52 | case 'h': 53 | multiplier = 60 * 60; 54 | break; 55 | case 'd': 56 | multiplier = 60 * 60 * 24; 57 | break; 58 | default: 59 | return false; 60 | } 61 | 62 | *x *= multiplier; 63 | 64 | return true; 65 | } 66 | 67 | static bool parse_duration(const char *str, float *d) 68 | { 69 | char *ep; 70 | float duration = strtof(str, &ep); 71 | 72 | if (!(0 <= duration) 73 | /* No extra chars after the number and an optional s,m,h,d char. */ 74 | || (*ep && *(ep + 1)) 75 | /* Check any suffix char and update timeout based on the suffix. */ 76 | || !apply_time_suffix(&duration, *ep)) { 77 | return false; 78 | } 79 | 80 | *d = duration; 81 | 82 | return true; 83 | } 84 | 85 | static int parse_arguments(int argc, char **argv, int64_t *n, float *p, 86 | float *d) 87 | { 88 | int c = 0; 89 | 90 | while (1) { 91 | int option_index = 0; 92 | 93 | c = getopt_long(argc, argv, "n:p:d:", long_options, &option_index); 94 | if (c == -1) 95 | break; 96 | 97 | switch (c) { 98 | case 'n': 99 | if (!parse_count(n)) { 100 | return -1; 101 | } 102 | break; 103 | case 'p': 104 | if (!parse_probability(p)) { 105 | return -1; 106 | } 107 | break; 108 | case 'd': 109 | if (!parse_duration(optarg, d)) { 110 | return -1; 111 | } 112 | break; 113 | default: 114 | printf("?? getopt returned character code 0%o ??\n", c); 115 | return -1; 116 | } 117 | } 118 | 119 | return 0; 120 | } 121 | 122 | #ifdef __APPLE__ 123 | #include 124 | #define CLOCK_MONOTONIC 0 125 | int clock_gettime(int __attribute__((unused)) clk_id, struct timespec *t) 126 | { 127 | struct timeval now; 128 | int rv = gettimeofday(&now, NULL); 129 | if (rv) 130 | return rv; 131 | t->tv_sec = now.tv_sec; 132 | t->tv_nsec = now.tv_usec * 1000; 133 | return 0; 134 | } 135 | #endif 136 | 137 | int main(int argc, char *argv[]) 138 | { 139 | int64_t n = 1000000; 140 | float p = 0.0001; 141 | float c = -1.0; 142 | 143 | if (parse_arguments(argc, argv, &n, &p, &c) != 0) { 144 | exit(-1); 145 | } 146 | 147 | const uint64_t m = tw_bloomfilter_optimal_m(n, p); 148 | const uint16_t k = tw_bloomfilter_optimal_k(n, m); 149 | 150 | /* parse options */ 151 | 152 | struct tw_bloomfilter *bf = tw_bloomfilter_new(m, k); 153 | 154 | if (!bf) { 155 | exit(1); 156 | } 157 | 158 | char *line = NULL; 159 | size_t buf_len = 0; 160 | ssize_t line_len = 0; 161 | 162 | /* setup expire */ 163 | struct timespec next_expire = {0, 0}; 164 | clock_gettime(CLOCK_MONOTONIC, &next_expire); 165 | next_expire.tv_sec += c; 166 | 167 | while ((line_len = getline(&line, &buf_len, stdin)) != -1) { 168 | 169 | if (tw_unlikely(c > 0)) { 170 | struct timespec now = {0, 0}; 171 | clock_gettime(CLOCK_MONOTONIC, &now); 172 | 173 | if (tw_unlikely(now.tv_sec >= next_expire.tv_sec)) { 174 | next_expire.tv_sec += c; 175 | tw_bloomfilter_zero(bf); 176 | } 177 | } 178 | 179 | if (!tw_bloomfilter_test(bf, line, line_len)) { 180 | fprintf(stdout, "%s", line); 181 | tw_bloomfilter_set(bf, line, line_len); 182 | } 183 | } 184 | 185 | free(line); 186 | 187 | tw_bloomfilter_free(bf); 188 | 189 | return 0; 190 | } 191 | -------------------------------------------------------------------------------- /tests/examples/bf-uniq.t: -------------------------------------------------------------------------------- 1 | $ yes uuidgen | head -500 | xargs -L1 bash -c | sort > uuids 2 | $ uniq uuids > uuids.uniq 3 | $ cat uuids | bf-uniq > uuids.bf-uniq 4 | $ diff uuids.uniq uuids.bf-uniq 5 | $ (echo "a"; sleep 2; echo "a") | bf-uniq -d 1s 6 | a 7 | a 8 | -------------------------------------------------------------------------------- /tests/examples/example-bitmap-rle.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | int main() 5 | { 6 | /** allocate a bitmap containing 2 billions bits */ 7 | const uint64_t nbits = 1UL << 31; 8 | struct tw_bitmap_rle *bitmap = tw_bitmap_rle_new(nbits); 9 | 10 | assert(bitmap); 11 | 12 | /** fill 1 billion bits */ 13 | const uint64_t start = 0UL; 14 | const uint64_t end = 1UL << 30; 15 | tw_bitmap_rle_set_range(bitmap, start, end); 16 | 17 | /** 18 | * bitmap_rle DOES NOT support setting bits in non sorted order, e.g. 19 | * calling the following will raise an assert() call (or undefined behaviour 20 | * if compiled with NDEBUG): 21 | * 22 | * tw_bitmap_rle_set(bitmap, start - 1); 23 | */ 24 | 25 | assert(tw_bitmap_rle_test(bitmap, start)); 26 | assert(tw_bitmap_rle_test(bitmap, end)); 27 | assert(tw_bitmap_rle_find_first_bit(bitmap) == (int64_t)start); 28 | assert(tw_bitmap_rle_find_first_zero(bitmap) == (int64_t)end + 1); 29 | 30 | tw_bitmap_rle_free(bitmap); 31 | 32 | return 0; 33 | } 34 | -------------------------------------------------------------------------------- /tests/examples/example-bitmap.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | int main() 5 | { 6 | const uint64_t nbits = 1024; 7 | struct tw_bitmap *bitmap = tw_bitmap_new(nbits); 8 | 9 | assert(bitmap); 10 | 11 | tw_bitmap_set(bitmap, 512); 12 | assert(tw_bitmap_test_and_clear(bitmap, 512)); 13 | assert(!tw_bitmap_test(bitmap, 512)); 14 | 15 | tw_bitmap_set(bitmap, 768); 16 | assert(tw_bitmap_find_first_bit(bitmap) == 768); 17 | 18 | tw_bitmap_free(bitmap); 19 | 20 | return 0; 21 | } 22 | -------------------------------------------------------------------------------- /tests/examples/example-bloomfilter-a2.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include 5 | 6 | int main() 7 | { 8 | const uint64_t nbits = 1024; 9 | const uint16_t k = 7; 10 | const float density = 0.50; 11 | struct tw_bloomfilter_a2 *bf = tw_bloomfilter_a2_new(nbits, k, density); 12 | assert(bf); 13 | 14 | /** 15 | * An active-active -bloomfilter (a2-bloomfilter) is a pair of bloomfilters, 16 | * one active, one passive that gets rotated when 17 | * active.density >= density_thresold is reached. Before rotation the passive 18 | * bloomfilter gets cleared. 19 | * 20 | * Since density is applied localy to the active bloom filter, the whole 21 | * bloomfilter density is maximised by 2 * density. 22 | * 23 | * Thus an a2-bloomfilter a bloomfilter with a LRU (on write operation) 24 | * eviction policy. 25 | */ 26 | 27 | for (size_t i = 0; i < nbits * 10; ++i) { 28 | tw_bloomfilter_a2_set(bf, (void *)&i, sizeof(i)); 29 | assert(tw_bloomfilter_a2_density(bf) < 2 * density); 30 | } 31 | 32 | tw_bloomfilter_a2_free(bf); 33 | 34 | return 0; 35 | } 36 | -------------------------------------------------------------------------------- /tests/examples/example-bloomfilter.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include 5 | 6 | int main() 7 | { 8 | const uint64_t nbits = 1024; 9 | const uint16_t k = 7; 10 | struct tw_bloomfilter *bf = tw_bloomfilter_new(nbits, k); 11 | assert(bf); 12 | 13 | const char *values[] = {"herp", "derp", "ferp", "merp"}; 14 | 15 | for (size_t i = 0; i < ((sizeof(values) / sizeof(values[0]))); ++i) { 16 | tw_bloomfilter_set(bf, values[i], strlen(values[i])); 17 | assert(tw_bloomfilter_test(bf, values[i], strlen(values[i]))); 18 | } 19 | 20 | assert(!tw_bloomfilter_test(bf, "nope", sizeof("nope"))); 21 | 22 | tw_bloomfilter_free(bf); 23 | 24 | return 0; 25 | } 26 | -------------------------------------------------------------------------------- /tests/examples/example-hyperloglog.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include 5 | 6 | int main() 7 | { 8 | const uint8_t precision = 16; 9 | struct tw_hyperloglog *hll = tw_hyperloglog_new(precision); 10 | assert(hll); 11 | 12 | const uint32_t n_elems = 10 * (1 << precision); 13 | for (size_t i = 0; i < n_elems; ++i) { 14 | tw_hyperloglog_add(hll, (void *)&i, sizeof(i)); 15 | } 16 | 17 | printf("estimated count: %f, real count: %d\n", tw_hyperloglog_count(hll), 18 | n_elems); 19 | 20 | tw_hyperloglog_free(hll); 21 | 22 | return 0; 23 | } 24 | -------------------------------------------------------------------------------- /tests/examples/example-minhash.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include 5 | 6 | int main() 7 | { 8 | const uint32_t n_registers = 1 << 13; 9 | struct tw_minhash *a = tw_minhash_new(n_registers); 10 | assert(a); 11 | struct tw_minhash *b = tw_minhash_clone(a); 12 | assert(b); 13 | 14 | const uint32_t n_elems = 10 * n_registers; 15 | for (size_t i = 0; i < n_elems; ++i) { 16 | const size_t key_size = sizeof(i); 17 | const void *key = (void *)&i; 18 | if (i % 3 == 0) { 19 | tw_minhash_add(a, key, key_size); 20 | } 21 | 22 | if (i % 5 == 0) { 23 | tw_minhash_add(b, key, key_size); 24 | } 25 | } 26 | 27 | printf("estimated jaccard: %f\n", tw_minhash_estimate(a, b)); 28 | 29 | tw_minhash_free(b); 30 | tw_minhash_free(a); 31 | 32 | return 0; 33 | } 34 | -------------------------------------------------------------------------------- /tests/examples/hll-wc.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | #include 8 | 9 | static struct option long_options[] = {{"precision", required_argument, 0, 'p'}, 10 | {"stream", no_argument, 0, 's'}, 11 | {0, 0, 0, 0}}; 12 | 13 | int parse_precision(uint8_t *p) 14 | { 15 | const int64_t parsed_p = strtol(optarg, NULL, 10); 16 | if (!(TW_HLL_MIN_PRECISION < parsed_p && parsed_p <= TW_HLL_MAX_PRECISION)) { 17 | return -1; 18 | } 19 | 20 | *p = parsed_p; 21 | 22 | return 0; 23 | } 24 | 25 | static int parse_arguments(int argc, char **argv, uint8_t *p, bool *stream) 26 | { 27 | int c = 0; 28 | int ret = 0; 29 | 30 | while (1) { 31 | int option_index = 0; 32 | 33 | c = getopt_long(argc, argv, "p:s", long_options, &option_index); 34 | if (c == -1) 35 | break; 36 | 37 | switch (c) { 38 | case 'p': 39 | if ((ret = parse_precision(p)) != 0) { 40 | return ret; 41 | } 42 | break; 43 | case 's': 44 | *stream = true; 45 | break; 46 | default: 47 | printf("?? getopt returned character code 0%o ??\n", c); 48 | return -1; 49 | } 50 | } 51 | 52 | return 0; 53 | } 54 | 55 | int main(int argc, char *argv[]) 56 | { 57 | uint8_t p = 16; 58 | bool stream = false; 59 | 60 | if (parse_arguments(argc, argv, &p, &stream) != 0) { 61 | exit(-1); 62 | } 63 | 64 | struct tw_hyperloglog *hll = tw_hyperloglog_new(p); 65 | 66 | if (!hll) { 67 | exit(1); 68 | } 69 | 70 | char *line = NULL; 71 | size_t buf_len = 0; 72 | ssize_t line_len = 0; 73 | 74 | while ((line_len = getline(&line, &buf_len, stdin)) != -1) { 75 | tw_hyperloglog_add(hll, line, line_len); 76 | if (stream) { 77 | fprintf(stdout, "%" PRIu64 "\n", (uint64_t)tw_hyperloglog_count(hll)); 78 | } 79 | } 80 | 81 | if (!stream) { 82 | fprintf(stdout, "%" PRIu64 "\n", (uint64_t)tw_hyperloglog_count(hll)); 83 | } 84 | 85 | free(line); 86 | tw_hyperloglog_free(hll); 87 | 88 | return 0; 89 | } 90 | -------------------------------------------------------------------------------- /tests/examples/hll-wc.t: -------------------------------------------------------------------------------- 1 | $ yes uuidgen | head -500 | xargs -L1 bash -c | sort -u > uuids 2 | $ cat uuids | wc -l 3 | 500 4 | $ echo $(((500 - $(cat uuids uuids uuids | hll-wc)) / 10)) 5 | 0 6 | $ (echo '1'; echo '2'; echo '3'; echo '1'; echo '2'; echo '3') | hll-wc -s 7 | 1 8 | 2 9 | 3 10 | 3 11 | 3 12 | 3 13 | -------------------------------------------------------------------------------- /tests/test-bloomfilter.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include "../src/twiddle/macrology.h" 5 | #include "test.h" 6 | 7 | START_TEST(test_bloomfilter_basic) 8 | { 9 | DESCRIBE_TEST; 10 | 11 | const uint32_t sizes[] = {32, 64, 128, 256, 512, 1024, 2048, 4096, 1 << 17}; 12 | const uint32_t ks[] = {1, 2, 3, 4, 5, 6, 7, 8, 17}; 13 | const uint32_t offsets[] = {-1, 0, 1}; 14 | const char *values[] = {"herp", "derp", "ferp", "merp"}; 15 | 16 | for (size_t i = 0; i < TW_ARRAY_SIZE(sizes); ++i) { 17 | for (size_t j = 0; j < TW_ARRAY_SIZE(offsets); ++j) { 18 | const uint32_t nbits = sizes[i] + offsets[j]; 19 | const uint32_t k = ks[i]; 20 | struct tw_bloomfilter *bf = tw_bloomfilter_new(nbits, k); 21 | 22 | for (size_t l = 0; l < TW_ARRAY_SIZE(values); ++l) { 23 | const char *value = values[l]; 24 | tw_bloomfilter_set(bf, value, strlen(value)); 25 | ck_assert(tw_bloomfilter_test(bf, value, strlen(value))); 26 | } 27 | 28 | /** 29 | * This is prone to failure and may be removed if causing problem. 30 | */ 31 | const char *not_there = "oups!"; 32 | ck_assert(!tw_bloomfilter_test(bf, not_there, strlen(not_there))); 33 | 34 | tw_bloomfilter_free(bf); 35 | } 36 | } 37 | } 38 | END_TEST 39 | 40 | START_TEST(test_bloomfilter_copy_and_clone) 41 | { 42 | DESCRIBE_TEST; 43 | 44 | const uint32_t sizes[] = {1024, 2048, 4096, 1 << 17}; 45 | const uint32_t ks[] = {6, 7, 8, 17}; 46 | const uint32_t offsets[] = {-1, 0, 1}; 47 | 48 | const char *values[] = {"herp", "derp", "ferp", "merp"}; 49 | 50 | for (size_t i = 0; i < TW_ARRAY_SIZE(sizes); ++i) { 51 | for (size_t j = 0; j < TW_ARRAY_SIZE(offsets); ++j) { 52 | const uint32_t nbits = sizes[i] + offsets[j]; 53 | const uint32_t k = ks[i]; 54 | struct tw_bloomfilter *bf = tw_bloomfilter_new(nbits, k); 55 | 56 | for (size_t l = 0; l < TW_ARRAY_SIZE(values); ++l) { 57 | const char *value = values[l]; 58 | tw_bloomfilter_set(bf, value, strlen(value)); 59 | } 60 | 61 | struct tw_bloomfilter *copy = tw_bloomfilter_new(nbits, k); 62 | tw_bloomfilter_copy(bf, copy); 63 | struct tw_bloomfilter *clone = tw_bloomfilter_clone(copy); 64 | 65 | for (size_t l = 0; l < TW_ARRAY_SIZE(values); ++l) { 66 | const char *value = values[l]; 67 | ck_assert(tw_bloomfilter_test(bf, value, strlen(value))); 68 | ck_assert(tw_bloomfilter_test(copy, value, strlen(value))); 69 | ck_assert(tw_bloomfilter_test(clone, value, strlen(value))); 70 | } 71 | 72 | /** 73 | * This is prone to failure and may be removed if causing problem. 74 | */ 75 | const char *not_there = "oups!"; 76 | ck_assert(!tw_bloomfilter_test(bf, not_there, strlen(not_there))); 77 | 78 | /** 79 | * Quickly validate independance 80 | */ 81 | tw_bloomfilter_zero(bf); 82 | ck_assert(tw_bloomfilter_empty(bf)); 83 | ck_assert(!tw_bloomfilter_empty(copy)); 84 | ck_assert(!tw_bloomfilter_empty(clone)); 85 | 86 | tw_bloomfilter_zero(copy); 87 | ck_assert(tw_bloomfilter_empty(copy)); 88 | ck_assert(!tw_bloomfilter_empty(clone)); 89 | 90 | tw_bloomfilter_free(bf); 91 | tw_bloomfilter_free(copy); 92 | tw_bloomfilter_free(clone); 93 | } 94 | } 95 | } 96 | END_TEST 97 | 98 | START_TEST(test_bloomfilter_set_operations) 99 | { 100 | DESCRIBE_TEST; 101 | 102 | const int32_t sizes[] = {1024, 2048, 4096}; 103 | const int32_t ks[] = {6, 7, 8}; 104 | const int32_t offsets[] = {-1, 0, 1}; 105 | const char *values[] = {"herp", "derp", "ferp", "merp"}; 106 | 107 | for (size_t i = 0; i < TW_ARRAY_SIZE(sizes); ++i) { 108 | for (size_t j = 0; j < TW_ARRAY_SIZE(offsets); ++j) { 109 | const int32_t nbits = sizes[i] + offsets[j]; 110 | const int32_t k = ks[i]; 111 | struct tw_bloomfilter *src = tw_bloomfilter_new(nbits, k); 112 | struct tw_bloomfilter *dst = tw_bloomfilter_new(nbits, k); 113 | 114 | tw_bloomfilter_set(src, values[0], strlen(values[0])); 115 | tw_bloomfilter_set(src, values[1], strlen(values[1])); 116 | tw_bloomfilter_set(src, values[2], strlen(values[2])); 117 | 118 | tw_bloomfilter_set(dst, values[1], strlen(values[1])); 119 | tw_bloomfilter_set(dst, values[2], strlen(values[2])); 120 | tw_bloomfilter_set(dst, values[3], strlen(values[3])); 121 | 122 | ck_assert_ptr_ne(tw_bloomfilter_intersection(src, dst), NULL); 123 | ck_assert(!tw_bloomfilter_test(dst, values[0], strlen(values[0]))); 124 | ck_assert(tw_bloomfilter_test(dst, values[1], strlen(values[1]))); 125 | ck_assert(tw_bloomfilter_test(dst, values[2], strlen(values[2]))); 126 | ck_assert(!tw_bloomfilter_test(dst, values[3], strlen(values[3]))); 127 | 128 | ck_assert_ptr_ne(tw_bloomfilter_union(src, dst), NULL); 129 | ck_assert(tw_bloomfilter_test(dst, values[0], strlen(values[0]))); 130 | ck_assert(tw_bloomfilter_test(dst, values[1], strlen(values[1]))); 131 | ck_assert(tw_bloomfilter_test(dst, values[2], strlen(values[2]))); 132 | ck_assert(!tw_bloomfilter_test(dst, values[3], strlen(values[3]))); 133 | ck_assert(tw_bloomfilter_equal(src, dst)); 134 | 135 | tw_bloomfilter_free(src); 136 | tw_bloomfilter_free(dst); 137 | } 138 | } 139 | } 140 | END_TEST 141 | 142 | START_TEST(test_bloomfilter_errors) 143 | { 144 | DESCRIBE_TEST; 145 | 146 | uint8_t k = 8; 147 | uint64_t size = 1 << 18; 148 | 149 | struct tw_bloomfilter *a = tw_bloomfilter_new(size, k), 150 | *b = tw_bloomfilter_new(size + 1, k), 151 | *c = tw_bloomfilter_new(size, k + 1); 152 | 153 | ck_assert_ptr_eq(tw_bloomfilter_clone(NULL), NULL); 154 | ck_assert_ptr_eq(tw_bloomfilter_copy(a, NULL), NULL); 155 | ck_assert_ptr_eq(tw_bloomfilter_copy(NULL, NULL), NULL); 156 | ck_assert_ptr_eq(tw_bloomfilter_copy(a, b), NULL); 157 | ck_assert_ptr_eq(tw_bloomfilter_copy(a, c), c); 158 | 159 | tw_bloomfilter_set(NULL, NULL, 0); 160 | tw_bloomfilter_set(a, NULL, 1); 161 | tw_bloomfilter_set(a, &k, 0); 162 | 163 | tw_bloomfilter_fill(a); 164 | 165 | ck_assert(!tw_bloomfilter_test(NULL, NULL, 0)); 166 | ck_assert(!tw_bloomfilter_test(a, NULL, 1)); 167 | ck_assert(!tw_bloomfilter_test(a, &k, 0)); 168 | 169 | ck_assert(!tw_bloomfilter_empty(NULL)); 170 | ck_assert(!tw_bloomfilter_full(NULL)); 171 | ck_assert_int_eq(tw_bloomfilter_count(NULL), 0); 172 | ck_assert_ptr_eq(tw_bloomfilter_zero(NULL), NULL); 173 | ck_assert_ptr_eq(tw_bloomfilter_fill(NULL), NULL); 174 | ck_assert_ptr_eq(tw_bloomfilter_not(NULL), NULL); 175 | 176 | ck_assert(!tw_bloomfilter_equal(NULL, NULL)); 177 | ck_assert(!tw_bloomfilter_equal(a, NULL)); 178 | ck_assert(!tw_bloomfilter_equal(a, b)); 179 | ck_assert(!tw_bloomfilter_equal(a, c)); 180 | 181 | ck_assert_ptr_eq(tw_bloomfilter_union(NULL, NULL), NULL); 182 | ck_assert_ptr_eq(tw_bloomfilter_union(a, NULL), NULL); 183 | ck_assert_ptr_eq(tw_bloomfilter_union(NULL, b), NULL); 184 | 185 | ck_assert_ptr_eq(tw_bloomfilter_intersection(NULL, NULL), NULL); 186 | ck_assert_ptr_eq(tw_bloomfilter_intersection(a, NULL), NULL); 187 | ck_assert_ptr_eq(tw_bloomfilter_intersection(NULL, b), NULL); 188 | 189 | ck_assert_ptr_eq(tw_bloomfilter_xor(NULL, NULL), NULL); 190 | ck_assert_ptr_eq(tw_bloomfilter_xor(a, NULL), NULL); 191 | ck_assert_ptr_eq(tw_bloomfilter_xor(NULL, b), NULL); 192 | 193 | tw_bloomfilter_density(NULL); 194 | 195 | tw_bloomfilter_free(NULL); 196 | tw_bloomfilter_free(c); 197 | tw_bloomfilter_free(b); 198 | tw_bloomfilter_free(a); 199 | } 200 | END_TEST 201 | 202 | int run_tests() 203 | { 204 | int number_failed; 205 | 206 | Suite *s = suite_create("bloomfilter"); 207 | SRunner *runner = srunner_create(s); 208 | TCase *tc = tcase_create("basic"); 209 | tcase_add_test(tc, test_bloomfilter_basic); 210 | tcase_add_test(tc, test_bloomfilter_copy_and_clone); 211 | tcase_add_test(tc, test_bloomfilter_set_operations); 212 | tcase_add_test(tc, test_bloomfilter_errors); 213 | suite_add_tcase(s, tc); 214 | srunner_run_all(runner, CK_NORMAL); 215 | number_failed = srunner_ntests_failed(runner); 216 | srunner_free(runner); 217 | 218 | return number_failed; 219 | } 220 | 221 | int main() { return (run_tests() == 0) ? EXIT_SUCCESS : EXIT_FAILURE; } 222 | -------------------------------------------------------------------------------- /tests/test-minhash.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include 6 | 7 | #include "../src/twiddle/macrology.h" 8 | #include "test.h" 9 | 10 | static bool estimate_in_bounds(uint32_t n, float jaccard, 11 | float jaccard_estimate) 12 | { 13 | /* This is solely used to make test pass with reasonnable bounds */ 14 | const float epsilon = 0.25; 15 | return fabs(jaccard - jaccard_estimate) < (1 + epsilon) * (1 / sqrt(n)); 16 | } 17 | 18 | START_TEST(test_minhash_basic) 19 | { 20 | DESCRIBE_TEST; 21 | 22 | const uint32_t sizes[] = {256, 512, 1024, 2048, 4096, 1 << 13}; 23 | 24 | const uint8_t sample = 4; 25 | 26 | for (size_t i = 0; i < TW_ARRAY_SIZE(sizes); ++i) { 27 | const uint32_t n_registers = sizes[i]; 28 | struct tw_minhash *a = tw_minhash_new(n_registers); 29 | struct tw_minhash *b = tw_minhash_new(n_registers); 30 | 31 | const uint32_t n_items = n_registers * 4; 32 | uint32_t intersection = 0; 33 | for (size_t j = 0; j < n_items; ++j) { 34 | const size_t key_size = sizeof(j); 35 | const void *key = (void *)&j; 36 | 37 | tw_minhash_add(a, key, key_size); 38 | if (j % sample == 0) { 39 | tw_minhash_add(b, key, key_size); 40 | intersection++; 41 | } 42 | } 43 | 44 | const float jaccard = (float)intersection / (float)n_items; 45 | const float jaccard_estimate = tw_minhash_estimate(a, b); 46 | ck_assert(estimate_in_bounds(n_registers, jaccard, jaccard_estimate)); 47 | 48 | tw_minhash_free(b); 49 | tw_minhash_free(a); 50 | } 51 | } 52 | END_TEST 53 | 54 | START_TEST(test_minhash_copy_and_clone) 55 | { 56 | DESCRIBE_TEST; 57 | 58 | const uint32_t sizes[] = {256, 512, 1024, 2048, 4096, 1 << 13}; 59 | 60 | for (size_t i = 0; i < TW_ARRAY_SIZE(sizes); ++i) { 61 | const uint32_t n_registers = sizes[i]; 62 | struct tw_minhash *a = tw_minhash_new(n_registers); 63 | struct tw_minhash *b = tw_minhash_clone(a); 64 | 65 | ck_assert(tw_minhash_equal(a, b)); 66 | 67 | const uint32_t n_items = n_registers / 2; 68 | for (size_t j = 0; j < n_items; ++j) { 69 | const size_t key_size = sizeof(j); 70 | const void *key = (void *)&j; 71 | tw_minhash_add(a, key, key_size); 72 | } 73 | 74 | ck_assert(!tw_minhash_equal(a, b)); 75 | 76 | for (size_t j = 0; j < n_items; ++j) { 77 | const size_t key_size = sizeof(j); 78 | const void *key = (void *)&j; 79 | tw_minhash_add(b, key, key_size); 80 | } 81 | 82 | ck_assert(tw_minhash_equal(a, b)); 83 | 84 | struct tw_minhash *c = tw_minhash_new(n_registers); 85 | 86 | ck_assert(!tw_minhash_equal(a, c)); 87 | ck_assert_ptr_ne(tw_minhash_copy(b, c), NULL); 88 | ck_assert(tw_minhash_equal(a, c)); 89 | 90 | tw_minhash_free(c); 91 | tw_minhash_free(b); 92 | tw_minhash_free(a); 93 | } 94 | } 95 | END_TEST 96 | 97 | START_TEST(test_minhash_merge) 98 | { 99 | DESCRIBE_TEST; 100 | 101 | const uint32_t sizes[] = {16, 32, 64, 128, 256, 102 | 512, 1024, 2048, 4096, 1 << 13}; 103 | 104 | for (size_t i = 0; i < TW_ARRAY_SIZE(sizes); ++i) { 105 | const uint32_t n_registers = sizes[i]; 106 | struct tw_minhash *a = tw_minhash_new(n_registers); 107 | struct tw_minhash *b = tw_minhash_new(n_registers); 108 | struct tw_minhash *f = tw_minhash_new(n_registers); 109 | 110 | const uint32_t n_items = n_registers * 4; 111 | for (size_t j = 0; j < n_items; ++j) { 112 | const size_t key_size = sizeof(j); 113 | const void *key = (void *)&j; 114 | tw_minhash_add(((j % 2) ? a : b), key, key_size); 115 | tw_minhash_add(f, key, key_size); 116 | } 117 | 118 | ck_assert(estimate_in_bounds(n_registers, 0, tw_minhash_estimate(a, b))); 119 | ck_assert(estimate_in_bounds(n_registers, 0.5, tw_minhash_estimate(a, f))); 120 | ck_assert(estimate_in_bounds(n_registers, 0.5, tw_minhash_estimate(b, f))); 121 | 122 | struct tw_minhash *u = tw_minhash_clone(a); 123 | ck_assert_ptr_ne(tw_minhash_merge(b, u), NULL); 124 | 125 | for (size_t j = 0; j < n_registers; j++) { 126 | bool same_value = 127 | tw_max(a->registers[j], b->registers[j]) == u->registers[j]; 128 | ck_assert(same_value); 129 | } 130 | 131 | ck_assert(estimate_in_bounds(n_registers, 0.5, tw_minhash_estimate(a, u))); 132 | ck_assert(estimate_in_bounds(n_registers, 0.5, tw_minhash_estimate(b, u))); 133 | ck_assert(estimate_in_bounds(n_registers, 1.0, tw_minhash_estimate(f, u))); 134 | 135 | tw_minhash_free(u); 136 | tw_minhash_free(f); 137 | tw_minhash_free(b); 138 | tw_minhash_free(a); 139 | } 140 | } 141 | END_TEST 142 | 143 | START_TEST(test_minhash_errors) 144 | { 145 | DESCRIBE_TEST; 146 | 147 | const uint32_t a_size = 1 << 16, b_size = (1 << 16) + 1; 148 | 149 | struct tw_minhash *a = tw_minhash_new(a_size); 150 | struct tw_minhash *b = tw_minhash_new(b_size); 151 | 152 | ck_assert_ptr_eq(tw_minhash_new(0), NULL); 153 | 154 | ck_assert_ptr_eq(tw_minhash_copy(a, b), NULL); 155 | ck_assert_ptr_eq(tw_minhash_copy(a, NULL), NULL); 156 | ck_assert_ptr_eq(tw_minhash_copy(NULL, a), NULL); 157 | ck_assert_ptr_eq(tw_minhash_clone(NULL), NULL); 158 | 159 | tw_minhash_add(NULL, NULL, 0); 160 | tw_minhash_add(a, NULL, 1); 161 | tw_minhash_add(a, &a_size, 0); 162 | tw_minhash_add(a, &a_size, 1); 163 | 164 | tw_minhash_estimate(a, b); 165 | tw_minhash_estimate(a, NULL); 166 | tw_minhash_estimate(NULL, NULL); 167 | 168 | ck_assert(!tw_minhash_equal(a, b)); 169 | ck_assert(!tw_minhash_equal(NULL, b)); 170 | ck_assert(!tw_minhash_equal(a, NULL)); 171 | 172 | ck_assert_ptr_eq(tw_minhash_merge(a, b), NULL); 173 | ck_assert_ptr_eq(tw_minhash_merge(a, NULL), NULL); 174 | ck_assert_ptr_eq(tw_minhash_merge(NULL, b), NULL); 175 | 176 | tw_minhash_free(NULL); 177 | tw_minhash_free(b); 178 | tw_minhash_free(a); 179 | } 180 | END_TEST 181 | 182 | int run_tests() 183 | { 184 | int number_failed; 185 | 186 | Suite *s = suite_create("minhash"); 187 | SRunner *runner = srunner_create(s); 188 | TCase *tc = tcase_create("basic"); 189 | tcase_add_test(tc, test_minhash_basic); 190 | tcase_add_test(tc, test_minhash_copy_and_clone); 191 | tcase_add_test(tc, test_minhash_merge); 192 | tcase_add_test(tc, test_minhash_errors); 193 | /* added for travis slowness of clang */ 194 | tcase_set_timeout(tc, 15); 195 | suite_add_tcase(s, tc); 196 | srunner_run_all(runner, CK_NORMAL); 197 | number_failed = srunner_ntests_failed(runner); 198 | srunner_free(runner); 199 | 200 | return number_failed; 201 | } 202 | 203 | int main() { return (run_tests() == 0) ? EXIT_SUCCESS : EXIT_FAILURE; } 204 | -------------------------------------------------------------------------------- /tests/test.h: -------------------------------------------------------------------------------- 1 | #include "check/check.h" 2 | #include 3 | #include 4 | 5 | #undef _ck_assert_ptr 6 | #define _ck_assert_ptr(X, OP, Y) \ 7 | do { \ 8 | const void *_ck_x = (X); \ 9 | const void *_ck_y = (Y); \ 10 | ck_assert_msg(_ck_x OP _ck_y, \ 11 | "Assertion '%s' failed: %s == %#x, %s == %#x", \ 12 | #X " " #OP " " #Y, #X, _ck_x, #Y, _ck_y); \ 13 | } while (0) 14 | 15 | #define DESCRIBE_TEST fprintf(stderr, "--- %s\n", __func__); 16 | 17 | #define _ck_assert_type(type, typefmt, X, OP, Y) \ 18 | do { \ 19 | type _ck_x = (X); \ 20 | type _ck_y = (Y); \ 21 | ck_assert_msg(_ck_x OP _ck_y, "Assertion '%s' failed: %s == %" typefmt \ 22 | " , %s == %" typefmt, \ 23 | #X " " #OP " " #Y, #X, _ck_x, #Y, _ck_y); \ 24 | } while (0) 25 | 26 | #define ck_assert_uint8_t_eq(X, Y) _ck_assert_type(uint8_t, PRIu8, X, ==, Y) 27 | #define ck_assert_uint8_t_ne(X, Y) _ck_assert_type(uint8_t, PRIu8, X, !=, Y) 28 | #define ck_assert_uint8_t_lt(X, Y) _ck_assert_type(uint8_t, PRIu8, X, <, Y) 29 | #define ck_assert_uint8_t_gt(X, Y) _ck_assert_type(uint8_t, PRIu8, X, >, Y) 30 | #define ck_assert_uint8_t_le(X, Y) _ck_assert_type(uint8_t, PRIu8, X, <=, Y) 31 | #define ck_assert_uint8_t_ge(X, Y) _ck_assert_type(uint8_t, PRIu8, X, >=, Y) 32 | 33 | #define ck_assert_uint16_t_eq(X, Y) _ck_assert_type(uint16_t, PRIu16, X, ==, Y) 34 | #define ck_assert_uint16_t_ne(X, Y) _ck_assert_type(uint16_t, PRIu16, X, !=, Y) 35 | #define ck_assert_uint16_t_lt(X, Y) _ck_assert_type(uint16_t, PRIu16, X, <, Y) 36 | #define ck_assert_uint16_t_gt(X, Y) _ck_assert_type(uint16_t, PRIu16, X, >, Y) 37 | #define ck_assert_uint16_t_le(X, Y) _ck_assert_type(uint16_t, PRIu16, X, <=, Y) 38 | #define ck_assert_uint16_t_ge(X, Y) _ck_assert_type(uint16_t, PRIu16, X, >=, Y) 39 | 40 | #define ck_assert_uint32_t_eq(X, Y) _ck_assert_type(uint32_t, PRIu32, X, ==, Y) 41 | #define ck_assert_uint32_t_ne(X, Y) _ck_assert_type(uint32_t, PRIu32, X, !=, Y) 42 | #define ck_assert_uint32_t_lt(X, Y) _ck_assert_type(uint32_t, PRIu32, X, <, Y) 43 | #define ck_assert_uint32_t_gt(X, Y) _ck_assert_type(uint32_t, PRIu32, X, >, Y) 44 | #define ck_assert_uint32_t_le(X, Y) _ck_assert_type(uint32_t, PRIu32, X, <=, Y) 45 | #define ck_assert_uint32_t_ge(X, Y) _ck_assert_type(uint32_t, PRIu32, X, >=, Y) 46 | 47 | #define ck_assert_uint64_t_eq(X, Y) _ck_assert_type(uint64_t, PRIu64, X, ==, Y) 48 | #define ck_assert_uint64_t_ne(X, Y) _ck_assert_type(uint64_t, PRIu64, X, !=, Y) 49 | #define ck_assert_uint64_t_lt(X, Y) _ck_assert_type(uint64_t, PRIu64, X, <, Y) 50 | #define ck_assert_uint64_t_gt(X, Y) _ck_assert_type(uint64_t, PRIu64, X, >, Y) 51 | #define ck_assert_uint64_t_le(X, Y) _ck_assert_type(uint64_t, PRIu64, X, <=, Y) 52 | #define ck_assert_uint64_t_ge(X, Y) _ck_assert_type(uint64_t, PRIu64, X, >=, Y) 53 | 54 | #define ck_assert_int8_t_eq(X, Y) _ck_assert_type(int8_t, PRId8, X, ==, Y) 55 | #define ck_assert_int8_t_ne(X, Y) _ck_assert_type(int8_t, PRId8, X, !=, Y) 56 | #define ck_assert_int8_t_lt(X, Y) _ck_assert_type(int8_t, PRId8, X, <, Y) 57 | #define ck_assert_int8_t_gt(X, Y) _ck_assert_type(int8_t, PRId8, X, >, Y) 58 | #define ck_assert_int8_t_le(X, Y) _ck_assert_type(int8_t, PRId8, X, <=, Y) 59 | #define ck_assert_int8_t_ge(X, Y) _ck_assert_type(int8_t, PRId8, X, >=, Y) 60 | 61 | #define ck_assert_int16_t_eq(X, Y) _ck_assert_type(int16_t, PRId16, X, ==, Y) 62 | #define ck_assert_int16_t_ne(X, Y) _ck_assert_type(int16_t, PRId16, X, !=, Y) 63 | #define ck_assert_int16_t_lt(X, Y) _ck_assert_type(int16_t, PRId16, X, <, Y) 64 | #define ck_assert_int16_t_gt(X, Y) _ck_assert_type(int16_t, PRId16, X, >, Y) 65 | #define ck_assert_int16_t_le(X, Y) _ck_assert_type(int16_t, PRId16, X, <=, Y) 66 | #define ck_assert_int16_t_ge(X, Y) _ck_assert_type(int16_t, PRId16, X, >=, Y) 67 | 68 | #define ck_assert_int32_t_eq(X, Y) _ck_assert_type(int32_t, PRId32, X, ==, Y) 69 | #define ck_assert_int32_t_ne(X, Y) _ck_assert_type(int32_t, PRId32, X, !=, Y) 70 | #define ck_assert_int32_t_lt(X, Y) _ck_assert_type(int32_t, PRId32, X, <, Y) 71 | #define ck_assert_int32_t_gt(X, Y) _ck_assert_type(int32_t, PRId32, X, >, Y) 72 | #define ck_assert_int32_t_le(X, Y) _ck_assert_type(int32_t, PRId32, X, <=, Y) 73 | #define ck_assert_int32_t_ge(X, Y) _ck_assert_type(int32_t, PRId32, X, >=, Y) 74 | 75 | #define ck_assert_int64_t_eq(X, Y) _ck_assert_type(int64_t, PRId64, X, ==, Y) 76 | #define ck_assert_int64_t_ne(X, Y) _ck_assert_type(int64_t, PRId64, X, !=, Y) 77 | #define ck_assert_int64_t_lt(X, Y) _ck_assert_type(int64_t, PRId64, X, <, Y) 78 | #define ck_assert_int64_t_gt(X, Y) _ck_assert_type(int64_t, PRId64, X, >, Y) 79 | #define ck_assert_int64_t_le(X, Y) _ck_assert_type(int64_t, PRId64, X, <=, Y) 80 | #define ck_assert_int64_t_ge(X, Y) _ck_assert_type(int64_t, PRId64, X, >=, Y) 81 | 82 | /* 83 | #undef _ck_assert_type_all 84 | #undef _ck_assert_type 85 | */ 86 | -------------------------------------------------------------------------------- /tools/cmake/FindLTO.cmake: -------------------------------------------------------------------------------- 1 | # Credits goes to nkurz see 2 | # https://github.com/RoaringBitmap/CRoaring/blob/master/tools/cmake/FindLTO.cmake 3 | 4 | macro(append var string) 5 | set(${var} "${${var}} ${string}") 6 | endmacro(append) 7 | 8 | if(USE_LTO) 9 | if ("${CMAKE_C_COMPILER_ID}" MATCHES "Intel") 10 | append(CMAKE_C_FLAGS "-ipo") 11 | append(CMAKE_CXX_FLAGS "-ipo") 12 | elseif ("${CMAKE_C_COMPILER_ID}" MATCHES "Clang") 13 | append(CMAKE_C_FLAGS "-flto") 14 | append(CMAKE_CXX_FLAGS "-flto") 15 | append(CMAKE_EXE_LINKER_FLAGS "-fuse-ld=gold -flto") 16 | append(CMAKE_SHARED_LINKER_FLAGS "-fuse-ld=gold -flto") 17 | if(NOT ("${CMAKE_AR}" MATCHES "llvm-ar-*")) 18 | message(FATAL_ERROR 19 | "You requested clang compiler & LTO without providing CMAKE_AR. " 20 | "CMAKE_AR is currently set to ${CMAKE_AR}. " 21 | "To fix this, invoke cmake with `-DCMAKE_AR=$(which llvm-ar-$version)`.") 22 | endif() 23 | if(NOT ("${CMAKE_RANLIB}" MATCHES "llvm-ranlib-*")) 24 | message(FATAL_ERROR 25 | "You requested clang compiler & LTO without providing CMAKE_RANLIB. " 26 | "CMAKE_RANLIB is currently set to ${CMAKE_RANLIB}. " 27 | "To fix this, invoke cmake `-DCMAKE_RANLIB=$(which llvm-ranlib-$version).`") 28 | endif() 29 | else() 30 | append(CMAKE_C_FLAGS "-flto") 31 | append(CMAKE_CXX_FLAGS "-flto") 32 | set(CMAKE_AR "gcc-ar") 33 | set(CMAKE_RANLIB "gcc-ranlib") 34 | endif() 35 | endif(USE_LTO) 36 | -------------------------------------------------------------------------------- /tools/cmake/FindOptions.cmake: -------------------------------------------------------------------------------- 1 | #----------------------------------------------------------------------- 2 | # Set some options 3 | 4 | # set C standard 5 | if (CMAKE_VERSION VERSION_LESS "3.1") 6 | if (CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang") 7 | set (CMAKE_C_FLAGS "-std=gnu11 ${CMAKE_C_FLAGS}") 8 | endif () 9 | else () 10 | set (CMAKE_C_STANDARD 11) 11 | endif () 12 | 13 | 14 | set(CMAKE_INSTALL_LIBDIR lib CACHE STRING 15 | "The base name of the installation directory for libraries") 16 | 17 | # default to Release type if not provided 18 | # compile to -03 with no assert 19 | if(NOT CMAKE_BUILD_TYPE) 20 | set(CMAKE_BUILD_TYPE Release CACHE STRING 21 | "Choose the type of build, options are: None Debug Release RelWithDebInfo MinSizeRel." 22 | FORCE) 23 | endif(NOT CMAKE_BUILD_TYPE) 24 | 25 | 26 | if(USE_AVX512) 27 | if (CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang") 28 | add_definitions(-mavx512 -mavx2 -mavx) 29 | elseif (CMAKE_C_COMPILER_ID MATCHES "Intel") 30 | add_definitions(-xCORE-AVX512) 31 | endif () 32 | add_definitions(-DUSE_AVX512=1 -DUSE_AVX2=1 -DUSE_AVX=1) 33 | elseif(USE_AVX2) 34 | if (CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang") 35 | add_definitions(-mavx2 -mavx) 36 | elseif (CMAKE_C_COMPILER_ID MATCHES "Intel") 37 | add_definitions(-xCORE-AVX2) 38 | endif () 39 | add_definitions(-DUSE_AVX2=1) 40 | add_definitions(-DUSE_AVX=1) 41 | elseif(USE_AVX) 42 | if (CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang") 43 | add_definitions(-mavx) 44 | elseif (CMAKE_C_COMPILER_ID MATCHES "Intel") 45 | add_definitions(-xCORE-AVX-I) 46 | endif () 47 | add_definitions(-DUSE_AVX=1) 48 | else() 49 | add_definitions(-DUSE_PORTABLE=1) 50 | endif() 51 | 52 | # strict compile options 53 | add_definitions(-Wall -Werror -pedantic -Wextra -Wshadow -Wfloat-equal -Wundef 54 | -Wwrite-strings -Wunreachable-code -Wformat=2 -Winit-self) 55 | -------------------------------------------------------------------------------- /tools/cmake/FindParseArguments.cmake: -------------------------------------------------------------------------------- 1 | # CMake 2.8.4 and higher gives us cmake_parse_arguments out of the box. For 2 | # earlier versions (RHEL5!) we have to define it ourselves. (The definition 3 | # comes from .) 4 | 5 | if (CMAKE_VERSION VERSION_LESS "2.8.4") 6 | 7 | MACRO(CMAKE_PARSE_ARGUMENTS prefix arg_names option_names) 8 | SET(DEFAULT_ARGS) 9 | FOREACH(arg_name ${arg_names}) 10 | SET(${prefix}_${arg_name}) 11 | ENDFOREACH(arg_name) 12 | FOREACH(option ${option_names}) 13 | SET(${prefix}_${option} FALSE) 14 | ENDFOREACH(option) 15 | 16 | SET(current_arg_name DEFAULT_ARGS) 17 | SET(current_arg_list) 18 | FOREACH(arg ${ARGN}) 19 | SET(larg_names ${arg_names}) 20 | LIST(FIND larg_names "${arg}" is_arg_name) 21 | IF (is_arg_name GREATER -1) 22 | SET(${prefix}_${current_arg_name} ${current_arg_list}) 23 | SET(current_arg_name ${arg}) 24 | SET(current_arg_list) 25 | ELSE (is_arg_name GREATER -1) 26 | SET(loption_names ${option_names}) 27 | LIST(FIND loption_names "${arg}" is_option) 28 | IF (is_option GREATER -1) 29 | SET(${prefix}_${arg} TRUE) 30 | ELSE (is_option GREATER -1) 31 | SET(current_arg_list ${current_arg_list} ${arg}) 32 | ENDIF (is_option GREATER -1) 33 | ENDIF (is_arg_name GREATER -1) 34 | ENDFOREACH(arg) 35 | SET(${prefix}_${current_arg_name} ${current_arg_list}) 36 | ENDMACRO(CMAKE_PARSE_ARGUMENTS) 37 | 38 | else (CMAKE_VERSION VERSION_LESS "2.8.4") 39 | 40 | include(CMakeParseArguments) 41 | 42 | endif (CMAKE_VERSION VERSION_LESS "2.8.4") 43 | -------------------------------------------------------------------------------- /tools/cmake/FindPrereqs.cmake: -------------------------------------------------------------------------------- 1 | #----------------------------------------------------------------------- 2 | # Configuration options that control all of the below 3 | 4 | set(PKG_CONFIG_PATH CACHE STRING "pkg-config search path") 5 | if (PKG_CONFIG_PATH) 6 | set(ENV{PKG_CONFIG_PATH} "${PKG_CONFIG_PATH}:$ENV{PKG_CONFIG_PATH}") 7 | endif (PKG_CONFIG_PATH) 8 | 9 | 10 | #----------------------------------------------------------------------- 11 | # pkg-config prerequisites 12 | 13 | find_package(PkgConfig) 14 | 15 | function(pkgconfig_prereq DEP) 16 | set(options OPTIONAL) 17 | set(one_args) 18 | set(multi_args) 19 | cmake_parse_arguments(_ "${options}" "${one_args}" "${multi_args}" ${ARGN}) 20 | 21 | string(REGEX REPLACE "[<>=].*" "" SHORT_NAME "${DEP}") 22 | string(REPLACE "-" "_" SHORT_NAME "${SHORT_NAME}") 23 | string(TOUPPER ${SHORT_NAME} UPPER_SHORT_NAME) 24 | string(TOLOWER ${SHORT_NAME} LOWER_SHORT_NAME) 25 | 26 | set(USE_CUSTOM_${UPPER_SHORT_NAME} NO CACHE BOOL 27 | "Whether you want to provide custom details for ${LOWER_SHORT_NAME}") 28 | 29 | if (NOT USE_CUSTOM_${UPPER_SHORT_NAME}) 30 | set(PKG_CHECK_ARGS) 31 | if (NOT __OPTIONAL) 32 | list(APPEND PKG_CHECK_ARGS REQUIRED) 33 | endif (NOT __OPTIONAL) 34 | list(APPEND PKG_CHECK_ARGS ${DEP}) 35 | 36 | pkg_check_modules(${UPPER_SHORT_NAME} ${PKG_CHECK_ARGS}) 37 | endif (NOT USE_CUSTOM_${UPPER_SHORT_NAME}) 38 | 39 | include_directories(${${UPPER_SHORT_NAME}_INCLUDE_DIRS}) 40 | link_directories(${${UPPER_SHORT_NAME}_LIBRARY_DIRS}) 41 | endfunction(pkgconfig_prereq) 42 | 43 | 44 | #----------------------------------------------------------------------- 45 | # find_library prerequisites 46 | 47 | function(library_prereq LIB_NAME) 48 | set(options OPTIONAL) 49 | set(one_args) 50 | set(multi_args) 51 | cmake_parse_arguments(_ "${options}" "${one_args}" "${multi_args}" ${ARGN}) 52 | 53 | string(REPLACE "-" "_" SHORT_NAME "${LIB_NAME}") 54 | string(TOUPPER ${SHORT_NAME} UPPER_SHORT_NAME) 55 | string(TOLOWER ${SHORT_NAME} LOWER_SHORT_NAME) 56 | 57 | set(USE_CUSTOM_${UPPER_SHORT_NAME} NO CACHE BOOL 58 | "Whether you want to provide custom details for ${LOWER_SHORT_NAME}") 59 | 60 | if (USE_CUSTOM_${UPPER_SHORT_NAME}) 61 | include_directories(${${UPPER_SHORT_NAME}_INCLUDE_DIRS}) 62 | link_directories(${${UPPER_SHORT_NAME}_LIBRARY_DIRS}) 63 | if (NOT ${UPPER_SHORT_NAME}_STATIC_LDFLAGS) 64 | set(${UPPER_SHORT_NAME}_STATIC_LDFLAGS 65 | ${${UPPER_SHORT_NAME}_LDFLAGS} 66 | PARENT_SCOPE) 67 | endif (NOT ${UPPER_SHORT_NAME}_STATIC_LDFLAGS) 68 | else (USE_CUSTOM_${UPPER_SHORT_NAME}) 69 | find_library(${UPPER_SHORT_NAME}_LDFLAGS ${LIB_NAME}) 70 | set(${UPPER_SHORT_NAME}_STATIC_LDFLAGS 71 | ${${UPPER_SHORT_NAME}_LDFLAGS} 72 | PARENT_SCOPE) 73 | endif (USE_CUSTOM_${UPPER_SHORT_NAME}) 74 | 75 | endfunction(library_prereq) 76 | -------------------------------------------------------------------------------- /tools/cmake/FindVersion.cmake: -------------------------------------------------------------------------------- 1 | #----------------------------------------------------------------------- 2 | # Retrieve the current version number 3 | 4 | execute_process( 5 | COMMAND git describe 6 | WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} 7 | RESULT_VARIABLE VERSION_RESULT 8 | OUTPUT_VARIABLE VERSION 9 | OUTPUT_STRIP_TRAILING_WHITESPACE 10 | ) 11 | if(VERSION_RESULT) 12 | set(VERSION "0.0.0") 13 | message(STATUS "Cannot determine version number reverting to: " ${VERSION}) 14 | endif(VERSION_RESULT) 15 | message(STATUS "Current version: " ${VERSION}) 16 | 17 | if(VERSION MATCHES "^([0-9]+)\\.([0-9]+)\\.([0-9]+)(-.*)?$") 18 | set(VERSION_MAJOR "${CMAKE_MATCH_1}") 19 | set(VERSION_MINOR "${CMAKE_MATCH_2}") 20 | set(VERSION_PATCH "${CMAKE_MATCH_3}") 21 | set(VERSION_COMMIT "${CMAKE_MATCH_4}") 22 | else(VERSION MATCHES "^([0-9]+)\\.([0-9]+)\\.([0-9]+)(-.*)?$") 23 | message(FATAL_ERROR "Invalid version number: ${VERSION}") 24 | endif(VERSION MATCHES "^([0-9]+)\\.([0-9]+)\\.([0-9]+)(-.*)?$") 25 | 26 | execute_process( 27 | COMMAND git rev-parse HEAD 28 | WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} 29 | RESULT_VARIABLE GIT_SHA1_RESULT 30 | OUTPUT_VARIABLE GIT_SHA1 31 | OUTPUT_STRIP_TRAILING_WHITESPACE 32 | ) 33 | if(GIT_SHA1_RESULT) 34 | message(FATAL_ERROR 35 | "Cannot determine git commit: " ${GIT_SHA1_RESULT}) 36 | endif(GIT_SHA1_RESULT) 37 | message(STATUS "Current revision: " ${GIT_SHA1}) 38 | -------------------------------------------------------------------------------- /tools/cram/ccram: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | if [ "$1" = "--python" ]; then 4 | shift 5 | PYTHON="$1" 6 | shift 7 | else 8 | PYTHON=python 9 | fi 10 | 11 | if [ "$1" = "--root" ]; then 12 | shift 13 | ROOT="$1" 14 | shift 15 | else 16 | ROOT=$(dirname $PWD) 17 | fi 18 | 19 | if [ "$1" = "--build" ]; then 20 | shift 21 | BUILD="$1" 22 | shift 23 | else 24 | BUILD=$(dirname $PWD) 25 | fi 26 | 27 | if [ "$1" = "--tests" ]; then 28 | shift 29 | TESTS="$1" 30 | shift 31 | else 32 | TESTS=../tests 33 | fi 34 | 35 | export ROOT 36 | 37 | LD_LIBRARY_PATH="$PWD/src:$LD_LIBRARY_PATH" \ 38 | PATH="$BUILD:$PATH" \ 39 | "$PYTHON" "$ROOT/../../tools/cram/cram.py" "$@" "$TESTS" 40 | -------------------------------------------------------------------------------- /tools/docurium/Gemfile: -------------------------------------------------------------------------------- 1 | source "http://rubygems.org" 2 | 3 | platforms :rbx do 4 | gem 'rubysl', '~> 2.0' 5 | end 6 | 7 | gem 'docurium', :git => 'https://github.com/fsaintjacques/docurium.git', :branch => 'feature/group-command' 8 | 9 | # vim:ft=ruby 10 | -------------------------------------------------------------------------------- /tools/docurium/api.docurium: -------------------------------------------------------------------------------- 1 | { 2 | "name": "libtwiddle", 3 | "github": "fsaintjacques/libtwiddle", 4 | "input": "include/twiddle", 5 | "prefix": "tw_", 6 | "branch": "gh-pages", 7 | "examples": "tests/examples", 8 | "versions": ["1.0.0"] 9 | } 10 | -------------------------------------------------------------------------------- /tools/docurium/gen-doc: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -o errexit -o nounset 4 | 5 | DOCU_HOME="$(cd "$(dirname "$(which "$0")")"/.. >/dev/null; pwd -P)"/docurium 6 | 7 | install() { 8 | bundler install --binstubs=bin/ --path vendor 9 | } 10 | 11 | generate() { 12 | LLVM_CONFIG=llvm-config-3.8 bin/cm doc api.docurium 13 | } 14 | 15 | pushd "$DOCU_HOME" 16 | 17 | if [ ! -e bin/cm ]; then 18 | install 19 | fi 20 | 21 | generate 22 | 23 | popd 24 | -------------------------------------------------------------------------------- /tools/git/hook-clang-format: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -o nounset 3 | 4 | FILE_EXTS=${FILE_EXTS:-(c|h)} 5 | 6 | get_files() { 7 | git diff --cached --name-only --diff-filter=ACMRT | \ 8 | egrep "\.$FILE_EXTS\$" 9 | } 10 | 11 | _IFS=$IFS 12 | IFS=$'\n' 13 | FILES=($(get_files)) 14 | IFS=$_IFS 15 | 16 | for ((i = 0; i < ${#FILES[@]}; ++i)); do 17 | file=${FILES[$i]} 18 | clang-format "$file" | cmp -s "$file" - 19 | if [ $? -ne 0 ]; then 20 | echo "File does not respect formatting style: $file" 1>&2 21 | exit 1 22 | fi 23 | done 24 | -------------------------------------------------------------------------------- /tools/travis/test: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -o errexit 4 | set -o nounset 5 | set -o pipefail 6 | 7 | set -o xtrace 8 | 9 | : ${BUILD_TYPE:=Release} 10 | : ${RUN_VALGRIND:=false} 11 | : ${RUN_HYPOTHESIS:=false} 12 | : ${VERBOSE:=false} 13 | 14 | function set_travis_env { 15 | 16 | VERBOSE=true 17 | 18 | if [[ -n "${CLANG_VERSION:-}" ]]; then 19 | export CC="clang-${CLANG_VERSION}" ; 20 | export CXX="clang++-${CLANG_VERSION}" ; 21 | export GCOV="llvm-cov-${CLANG_VERSION}" ; 22 | elif [[ -n "${GCC_VERSION:-}" ]]; then 23 | export CC="gcc-${GCC_VERSION}" ; 24 | export CXX="g++-${GCC_VERSION}" ; 25 | export GCOV="gcov-${GCC_VERSION}" ; 26 | fi 27 | 28 | if [[ ${TRAVIS_PULL_REQUEST:-false} != "false" ]]; then 29 | RUN_HYPOTHESIS=true 30 | RUN_VALGRIND=true 31 | fi 32 | 33 | } 34 | 35 | function run_tests { 36 | local name=$1 37 | 38 | local dir="build-${name}" 39 | 40 | # if you change this, modify `python/twiddle/c.py` accordingly. 41 | export BUILD_DIR=$dir 42 | 43 | mkdir -p "$dir" 44 | 45 | CMAKE_FLAGS="-DUSE_AVX=OFF" 46 | if [[ ${name:-} = "avx512" ]]; then 47 | CMAKE_FLAGS="-DUSE_AVX512=ON" 48 | elif [[ ${name:-} = "avx2" ]]; then 49 | CMAKE_FLAGS="-DUSE_AVX2=ON" 50 | elif [[ ${name:-} = "avx" ]]; then 51 | CMAKE_FLAGS="-DUSE_AVX=ON" 52 | if [[ ${RUN_VALGRIND:-false} != "false" ]]; then 53 | CMAKE_FLAGS="-DUSE_VALGRIND=ON ${CMAKE_FLAGS}$" 54 | fi 55 | fi 56 | 57 | 58 | pushd $dir 59 | 60 | if [[ -n "${CLANG_VERSION:-}" ]]; then 61 | #LLVM_AR=$(which llvm-ar-${CLANG_VERSION}) 62 | #LLVM_RANLIB=$(which llvm-ranlib-${CLANG_VERSION}) 63 | #CMAKE_FLAGS="-DCMAKE_AR=${LLVM_AR} -DCMAKE_RANLIB=${LLVM_RANLIB} ${CMAKE_FLAGS}" 64 | 65 | # I'm not wasting time on travis anymore. If you're interested in building 66 | # libtwiddle with clang & LTO, see the previous commented invocation. 67 | CMAKE_FLAGS="-DUSE_LTO=OFF ${CMAKE_FLAGS}" 68 | fi 69 | 70 | cmake -DCMAKE_BUILD_TYPE=${BUILD_TYPE} \ 71 | ${CMAKE_FLAGS:-} .. 72 | 73 | # disable FORK (and incidently timeouts) 74 | export CK_FORK=no 75 | 76 | if [[ ${VERBOSE:-false} != "false" ]]; then 77 | VERBOSE_FLAG="VERBOSE=1" 78 | fi 79 | 80 | # unit tests 81 | make all test ${VERBOSE_FLAG:-} 82 | popd 83 | 84 | if [[ ${RUN_HYPOTHESIS} != "false" ]]; then 85 | # property python tests 86 | run_property_tests 87 | fi 88 | 89 | } 90 | 91 | function run_property_tests { 92 | pushd python 93 | py_dir=. 94 | venv_dir=${py_dir}/.venv 95 | virtualenv ${venv_dir} 96 | set +o nounset 97 | source ${venv_dir}/bin/activate 98 | set -o nounset 99 | pip install pytest hypothesis==1.19.0 100 | PYTHONPATH=${py_dir} py.test tests 101 | popd 102 | } 103 | 104 | function has_cpu_extension { 105 | local ext=$1 106 | cat /proc/cpuinfo | grep -q "$ext" 107 | } 108 | 109 | function run_tests_with_extension { 110 | local ext=$1 111 | if has_cpu_extension "${ext}"; then 112 | run_tests "${ext}" 113 | fi 114 | } 115 | 116 | 117 | if [[ -n "${TRAVIS:-}" ]]; then 118 | set_travis_env 119 | fi 120 | 121 | run_tests portable 122 | run_tests_with_extension avx 123 | run_tests_with_extension avx2 124 | --------------------------------------------------------------------------------